Switch FFTS to linkotec branch for cross-arch support

master
Timothy Pearson 8 years ago
parent d8856bdf08
commit e4e92bf2b0

@ -0,0 +1,462 @@
cmake_minimum_required(VERSION 2.8.12 FATAL_ERROR)
project(ffts C ASM)
# TODO: to support AutoConfigure building, this should came from "template" file
set(FFTS_MAJOR 0)
set(FFTS_MINOR 9)
set(FFTS_MICRO 0)
set(FFTS_VERSION "ffts-${FFTS_MAJOR}.${FFTS_MINOR}.${FFTS_MICRO}")
set(CMAKE_MODULE_PATH ${CMAKE_CURRENT_SOURCE_DIR}/cmake)
set_property(GLOBAL PROPERTY USE_FOLDERS ON)
# default build type is Debug which means no optimization
if(NOT CMAKE_BUILD_TYPE)
set(CMAKE_BUILD_TYPE "Release")
endif(NOT CMAKE_BUILD_TYPE)
# common options
option(ENABLE_NEON
"Enables the use of NEON instructions." OFF
)
option(ENABLE_VFP
"Enables the use of VFP instructions." OFF
)
option(DISABLE_DYNAMIC_CODE
"Disables the use of dynamic machine code generation." OFF
)
option(GENERATE_POSITION_INDEPENDENT_CODE
"Generate position independent code" OFF
)
option(ENABLE_SHARED
"Enable building a shared library." OFF
)
option(ENABLE_STATIC
"Enable building a static library." ON
)
include(CheckCSourceCompiles)
include(CheckCSourceRuns)
include(CheckIncludeFile)
# Ensure defined when building FFTS (as opposed to using it from
# another project). Used to export functions from Windows DLL.
add_definitions(-DFFTS_BUILD)
# check existence of various headers
check_include_file(malloc.h HAVE_MALLOC_H)
check_include_file(stdint.h HAVE_STDINT_H)
check_include_file(stdlib.h HAVE_STDLIB_H)
check_include_file(string.h HAVE_STRING_H)
check_include_file(sys/mman.h HAVE_SYS_MMAN_H)
check_include_file(unistd.h HAVE_UNISTD_H)
if(HAVE_MALLOC_H)
add_definitions(-DHAVE_MALLOC_H)
endif(HAVE_MALLOC_H)
if(HAVE_STDINT_H)
add_definitions(-DHAVE_STDINT_H)
endif(HAVE_STDINT_H)
if(HAVE_STDLIB_H)
add_definitions(-DHAVE_STDLIB_H)
endif(HAVE_STDLIB_H)
if(HAVE_STRING_H)
add_definitions(-DHAVE_STRING_H)
endif(HAVE_STRING_H)
if(HAVE_SYS_MMAN_H)
add_definitions(-DHAVE_SYS_MMAN_H)
endif(HAVE_SYS_MMAN_H)
if(HAVE_UNISTD_H)
add_definitions(-DHAVE_UNISTD_H)
endif(HAVE_UNISTD_H)
# backup flags
set(CMAKE_REQUIRED_FLAGS_SAVE ${CMAKE_REQUIRED_FLAGS})
# Determinate if we are cross-compiling
if(NOT CMAKE_CROSSCOMPILING)
if(CMAKE_SYSTEM_PROCESSOR MATCHES "^arm")
# Determinate ARM architecture
# Try to execute quietly without messages
set(CMAKE_REQUIRED_QUIET 1)
# The test for ARM architecture
set(TEST_SOURCE_CODE "int main() { return 0; }")
# GCC documentation says "native" is only supported on Linux, but let's try
set(CMAKE_REQUIRED_FLAGS "${CMAKE_REQUIRED_FLAGS_SAVE} -march=native")
check_c_source_runs("${TEST_SOURCE_CODE}" GCC_MARCH_NATIVE_FLAG_SUPPORTED)
if(NOT GCC_MARCH_NATIVE_FLAG_SUPPORTED)
# Fallback trying generic ARMv7
set(CMAKE_REQUIRED_FLAGS "${CMAKE_REQUIRED_FLAGS_SAVE} -march=armv7-a")
check_c_source_runs("${TEST_SOURCE_CODE}" GCC_MARCH_ARMV7A_FLAG_SUPPORTED)
if(NOT GCC_MARCH_ARMV7A_FLAG_SUPPORTED)
# Fallback trying generic ARMv6
set(CMAKE_REQUIRED_FLAGS "${CMAKE_REQUIRED_FLAGS_SAVE} -march=armv6")
check_c_source_runs("${TEST_SOURCE_CODE}" GCC_MARCH_ARMV6_FLAG_SUPPORTED)
if(NOT GCC_MARCH_ARMV6_FLAG_SUPPORTED)
message(WARNING "FFTS failed to determinate ARM architecture")
set(CMAKE_REQUIRED_FLAGS ${CMAKE_REQUIRED_FLAGS_SAVE})
else()
message("FFTS is build using 'march=armv6'")
set(CMAKE_ASM_FLAGS "${CMAKE_ASM_FLAGS} -march=armv6")
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -march=armv6")
endif(NOT GCC_MARCH_ARMV6_FLAG_SUPPORTED)
else()
message("FFTS is build using 'march=armv7-a'")
set(CMAKE_ASM_FLAGS "${CMAKE_ASM_FLAGS} -march=armv7-a")
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -march=armv7-a")
endif(NOT GCC_MARCH_ARMV7A_FLAG_SUPPORTED)
else()
message("FFTS is build using 'march=native'")
set(CMAKE_ASM_FLAGS "${CMAKE_ASM_FLAGS} -march=native")
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -march=native")
endif(NOT GCC_MARCH_NATIVE_FLAG_SUPPORTED)
# Determinate what floating-point hardware (or hardware emulation) is available
set(CMAKE_REQUIRED_FLAGS_SAVE ${CMAKE_REQUIRED_FLAGS})
# The test for ARM NEON support
set(TEST_SOURCE_CODE "
#include <arm_neon.h>
int main()
{
float32x4_t v;
float zeros[4] = {0.0f, 0.0f, 0.0f, 0.0f};
v = vld1q_f32(zeros);
return 0;
}"
)
# Test running with -mfpu=neon and -mfloat-abi=hard
set(CMAKE_REQUIRED_FLAGS "${CMAKE_REQUIRED_FLAGS_SAVE} -mfpu=neon -mfloat-abi=hard")
check_c_source_runs("${TEST_SOURCE_CODE}" NEON_HARDFP_SUPPORTED)
if(NOT NEON_HARDFP_SUPPORTED)
# Test running with -mfpu=neon and -mfloat-abi=softfp
set(CMAKE_REQUIRED_FLAGS "${CMAKE_REQUIRED_FLAGS_SAVE} -mfpu=neon -mfloat-abi=softfp")
check_c_source_runs("${TEST_SOURCE_CODE}" NEON_SOFTFP_SUPPORTED)
if(NOT NEON_SOFTFP_SUPPORTED)
if(ENABLE_NEON)
message(FATAL_ERROR "FFTS cannot enable NEON on this platform")
endif(ENABLE_NEON)
else()
message("FFTS is using 'neon' FPU and 'softfp' float ABI")
set(CMAKE_ASM_FLAGS "${CMAKE_ASM_FLAGS} -mfpu=neon -mfloat-abi=softfp")
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -mfpu=neon -mfloat-abi=softfp")
set(ENABLE_NEON ON)
endif(NOT NEON_SOFTFP_SUPPORTED)
else()
message("FFTS is using 'neon' FPU and 'hard' float ABI")
set(CMAKE_ASM_FLAGS "${CMAKE_ASM_FLAGS} -mfpu=neon -mfloat-abi=hard")
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -mfpu=neon -mfloat-abi=hard")
set(ENABLE_NEON ON)
endif(NOT NEON_HARDFP_SUPPORTED)
# Fallback using VFP if NEON is not supported
if(NOT NEON_HARDFP_SUPPORTED AND NOT NEON_SOFTFP_SUPPORTED)
# Test for ARM VFP support
set(TEST_SOURCE_CODE "
double sum(double a, double b)
{
return a + b;
}
int main()
{
double s1, s2, v1 = 1.0, v2 = 2.0, v3 = 1.0e-322;
s1 = sum(v1, v2);
s2 = sum(v3, v3);
return 0;
}"
)
# Test running with -mfpu=vfp
set(CMAKE_REQUIRED_FLAGS "${CMAKE_REQUIRED_FLAGS_SAVE} -mfpu=vfp")
check_c_source_runs("${TEST_SOURCE_CODE}" VFP_SUPPORTED)
if(NOT VFP_SUPPORTED)
# Fallback using emulation if VFP is not supported
if(ENABLE_VFP)
message(FATAL_ERROR "FFTS cannot enable VFP on this platform")
endif(ENABLE_VFP)
message(WARNING "FFTS is using 'soft' FPU")
else()
message("FFTS is using 'vfp' FPU")
set(CMAKE_ASM_FLAGS "${CMAKE_ASM_FLAGS} -mfpu=vfp")
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -mfpu=vfp")
set(CMAKE_REQUIRED_FLAGS_SAVE ${CMAKE_REQUIRED_FLAGS})
set(ENABLE_VFP ON)
endif(NOT VFP_SUPPORTED)
# Test running with -mfloat-abi=hard
set(CMAKE_REQUIRED_FLAGS "${CMAKE_REQUIRED_FLAGS_SAVE} -mfloat-abi=hard")
# Use the same test as before
check_c_source_runs("${TEST_SOURCE_CODE}" HARDFP_SUPPORTED)
if(NOT HARDFP_SUPPORTED)
# Test running with -mfloat-abi=softfp
set(CMAKE_REQUIRED_FLAGS "${CMAKE_REQUIRED_FLAGS_SAVE} -mfloat-abi=softfp")
check_c_source_runs("${TEST_SOURCE_CODE}" SOFTFP_SUPPORTED)
if(NOT SOFTFP_SUPPORTED)
# Most likely development libraries are missing
message(WARNING "FFTS is using 'soft' float ABI")
else()
message("FFTS is using 'softfp' float ABI")
set(CMAKE_ASM_FLAGS "${CMAKE_ASM_FLAGS} -mfloat-abi=softfp")
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -mfloat-abi=softfp")
endif(NOT SOFTFP_SUPPORTED)
else()
message("FFTS is using 'hard' float ABI")
set(CMAKE_ASM_FLAGS "${CMAKE_ASM_FLAGS} -mfloat-abi=hard")
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -mfloat-abi=hard")
endif(NOT HARDFP_SUPPORTED)
endif(NOT NEON_HARDFP_SUPPORTED AND NOT NEON_SOFTFP_SUPPORTED)
else()
# enable SSE code generation
if(CMAKE_COMPILER_IS_GNUCC)
set(CMAKE_REQUIRED_FLAGS "${CMAKE_REQUIRED_FLAGS_SAVE} -msse")
endif(CMAKE_COMPILER_IS_GNUCC)
# check if the platform has support for SSE intrinsics
check_include_file(xmmintrin.h HAVE_XMMINTRIN_H)
if(HAVE_XMMINTRIN_H)
add_definitions(-DHAVE_SSE)
set(CMAKE_REQUIRED_FLAGS_SAVE ${CMAKE_REQUIRED_FLAGS})
endif(HAVE_XMMINTRIN_H)
# enable SSE2 code generation
if(CMAKE_COMPILER_IS_GNUCC)
set(CMAKE_REQUIRED_FLAGS "${CMAKE_REQUIRED_FLAGS_SAVE} -msse2")
endif(CMAKE_COMPILER_IS_GNUCC)
# check if the platform has support for SSE2 intrinsics
check_include_file(emmintrin.h HAVE_EMMINTRIN_H)
if(HAVE_EMMINTRIN_H)
add_definitions(-DHAVE_SSE2)
set(CMAKE_REQUIRED_FLAGS_SAVE ${CMAKE_REQUIRED_FLAGS})
endif(HAVE_EMMINTRIN_H)
# enable SSE3 code generation
if(CMAKE_COMPILER_IS_GNUCC)
set(CMAKE_REQUIRED_FLAGS "${CMAKE_REQUIRED_FLAGS_SAVE} -msse3")
endif(CMAKE_COMPILER_IS_GNUCC)
# check if the platform has support for SSE3 intrinsics
check_include_file(pmmintrin.h HAVE_PMMINTRIN_H)
if(HAVE_PMMINTRIN_H)
add_definitions(-DHAVE_PMMINTRIN_H)
add_definitions(-DHAVE_SSE3)
set(CMAKE_REQUIRED_FLAGS_SAVE ${CMAKE_REQUIRED_FLAGS})
else()
# check if the platform has specific intrinsics
check_include_file(intrin.h HAVE_INTRIN_H)
if(HAVE_INTRIN_H)
add_definitions(-DHAVE_INTRIN_H)
check_c_source_compiles("
#include<intrin.h>
int main(int argc, char** argv)
{
(void) argv;
(void) argc;
return _mm_movemask_ps(_mm_moveldup_ps(_mm_set_ss(1.0f)));
}" HAVE__MM_MOVELDUP_PS
)
if(HAVE__MM_MOVELDUP_PS)
# assume that we have all SSE3 intrinsics
add_definitions(-DHAVE_SSE3)
endif(HAVE__MM_MOVELDUP_PS)
endif(HAVE_INTRIN_H)
endif(HAVE_PMMINTRIN_H)
endif(CMAKE_SYSTEM_PROCESSOR MATCHES "^arm")
else()
# TODO: Add detections for compiler support and headers
endif(NOT CMAKE_CROSSCOMPILING)
# restore flags
set(CMAKE_REQUIRED_FLAGS ${CMAKE_REQUIRED_FLAGS_SAVE})
# compiler settings
if(MSVC)
# enable all warnings but also disable some..
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} /W4 /wd4127")
# mark debug versions
set(CMAKE_DEBUG_POSTFIX "d")
add_definitions(-D_USE_MATH_DEFINES)
elseif(CMAKE_COMPILER_IS_GNUCC)
include(CheckCCompilerFlag)
include(CheckLibraryExists)
# enable all warnings
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -Wall -Wextra")
# check if we can control visibility of symbols
check_c_compiler_flag(-fvisibility=hidden HAVE_GCC_VISIBILITY)
if(HAVE_GCC_VISIBILITY)
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -fvisibility=hidden")
add_definitions(-DHAVE_GCC_VISIBILITY)
endif(HAVE_GCC_VISIBILITY)
# some systems need libm for the math functions to work
check_library_exists(m pow "" HAVE_LIBM)
if(HAVE_LIBM)
list(APPEND CMAKE_REQUIRED_LIBRARIES m)
list(APPEND FFTS_EXTRA_LIBRARIES m)
endif(HAVE_LIBM)
if(HAVE_PMMINTRIN_H)
add_definitions(-msse3)
elseif(HAVE_EMMINTRIN_H)
add_definitions(-msse2)
elseif(HAVE_XMMINTRIN_H)
add_definitions(-msse)
endif(HAVE_PMMINTRIN_H)
endif(MSVC)
include_directories(include)
include_directories(src)
include_directories(${CMAKE_CURRENT_BINARY_DIR})
set(FFTS_HEADERS
include/ffts.h
)
set(FFTS_SOURCES
src/ffts_attributes.h
src/ffts.c
src/ffts_internal.h
src/ffts_nd.c
src/ffts_nd.h
src/ffts_real.h
src/ffts_real.c
src/ffts_real_nd.c
src/ffts_real_nd.h
src/ffts_transpose.c
src/ffts_transpose.h
src/ffts_trig.c
src/ffts_trig.h
src/ffts_static.c
src/ffts_static.h
src/macros.h
src/patterns.h
src/types.h
)
if(ENABLE_NEON)
list(APPEND FFTS_SOURCES
src/neon.s
)
if(DISABLE_DYNAMIC_CODE)
list(APPEND FFTS_SOURCES
src/neon_static.s
)
endif(DISABLE_DYNAMIC_CODE)
add_definitions(-DHAVE_NEON)
elseif(ENABLE_VFP)
if(NOT DISABLE_DYNAMIC_CODE)
list(APPEND FFTS_SOURCES
src/vfp.s
)
endif(NOT DISABLE_DYNAMIC_CODE)
add_definitions(-DHAVE_VFP)
elseif(HAVE_XMMINTRIN_H)
add_definitions(-DHAVE_SSE)
list(APPEND FFTS_SOURCES
src/macros-sse.h
)
if(NOT DISABLE_DYNAMIC_CODE)
if(CMAKE_SIZEOF_VOID_P EQUAL 8)
list(APPEND FFTS_SOURCES
src/codegen_sse.h
)
else()
message(WARNING "Dynamic code is only supported with x64, disabling dynamic code.")
set(DISABLE_DYNAMIC_CODE ON)
endif(CMAKE_SIZEOF_VOID_P EQUAL 8)
endif(NOT DISABLE_DYNAMIC_CODE)
endif(ENABLE_NEON)
if(DISABLE_DYNAMIC_CODE)
add_definitions(-DDYNAMIC_DISABLED)
else()
list(APPEND FFTS_SOURCES
src/codegen.c
src/codegen.h
)
endif(DISABLE_DYNAMIC_CODE)
if(GENERATE_POSITION_INDEPENDENT_CODE)
set(CMAKE_POSITION_INDEPENDENT_CODE ON)
endif(GENERATE_POSITION_INDEPENDENT_CODE)
if(ENABLE_SHARED)
add_library(ffts_shared SHARED
${FFTS_HEADERS}
${FFTS_SOURCES}
)
# On unix-like platforms the library is called "libffts.so" and on Windows "ffts.dll"
set_target_properties(ffts_shared PROPERTIES
DEFINE_SYMBOL FFTS_SHARED
OUTPUT_NAME ffts
VERSION ${FFTS_MAJOR}.${FFTS_MINOR}.${FFTS_MICRO}
)
endif(ENABLE_SHARED)
if(ENABLE_STATIC)
add_library(ffts_static STATIC
${FFTS_HEADERS}
${FFTS_SOURCES}
)
if(UNIX)
# On unix-like platforms the library is called "libffts.a"
set_target_properties(ffts_static PROPERTIES OUTPUT_NAME ffts)
endif(UNIX)
endif(ENABLE_STATIC)
if(ENABLE_STATIC OR ENABLE_SHARED)
add_executable(ffts_test
tests/test.c
)
# link with static library by default
if(ENABLE_STATIC)
add_library(ffts ALIAS ffts_static)
else()
add_library(ffts ALIAS ffts_shared)
endif(ENABLE_STATIC)
target_link_libraries(ffts_test
ffts
${FFTS_EXTRA_LIBRARIES}
)
endif(ENABLE_STATIC OR ENABLE_SHARED)

@ -1,7 +1,7 @@
# Makefile.in generated by automake 1.12.4 from Makefile.am.
# Makefile.in generated by automake 1.14 from Makefile.am.
# @configure_input@
# Copyright (C) 1994-2012 Free Software Foundation, Inc.
# Copyright (C) 1994-2013 Free Software Foundation, Inc.
# This Makefile.in is free software; the Free Software Foundation
# gives unlimited permission to copy and/or distribute it,
@ -15,23 +15,51 @@
@SET_MAKE@
VPATH = @srcdir@
am__make_dryrun = \
{ \
am__dry=no; \
am__is_gnu_make = test -n '$(MAKEFILE_LIST)' && test -n '$(MAKELEVEL)'
am__make_running_with_option = \
case $${target_option-} in \
?) ;; \
*) echo "am__make_running_with_option: internal error: invalid" \
"target option '$${target_option-}' specified" >&2; \
exit 1;; \
esac; \
has_opt=no; \
sane_makeflags=$$MAKEFLAGS; \
if $(am__is_gnu_make); then \
sane_makeflags=$$MFLAGS; \
else \
case $$MAKEFLAGS in \
*\\[\ \ ]*) \
echo 'am--echo: ; @echo "AM" OK' | $(MAKE) -f - 2>/dev/null \
| grep '^AM OK$$' >/dev/null || am__dry=yes;; \
*) \
for am__flg in $$MAKEFLAGS; do \
case $$am__flg in \
*=*|--*) ;; \
*n*) am__dry=yes; break;; \
esac; \
done;; \
bs=\\; \
sane_makeflags=`printf '%s\n' "$$MAKEFLAGS" \
| sed "s/$$bs$$bs[$$bs $$bs ]*//g"`;; \
esac; \
test $$am__dry = yes; \
}
fi; \
skip_next=no; \
strip_trailopt () \
{ \
flg=`printf '%s\n' "$$flg" | sed "s/$$1.*$$//"`; \
}; \
for flg in $$sane_makeflags; do \
test $$skip_next = yes && { skip_next=no; continue; }; \
case $$flg in \
*=*|--*) continue;; \
-*I) strip_trailopt 'I'; skip_next=yes;; \
-*I?*) strip_trailopt 'I';; \
-*O) strip_trailopt 'O'; skip_next=yes;; \
-*O?*) strip_trailopt 'O';; \
-*l) strip_trailopt 'l'; skip_next=yes;; \
-*l?*) strip_trailopt 'l';; \
-[dEDm]) skip_next=yes;; \
-[JT]) skip_next=yes;; \
esac; \
case $$flg in \
*$$target_option*) has_opt=yes; break;; \
esac; \
done; \
test $$has_opt = yes
am__make_dryrun = (target_option=n; $(am__make_running_with_option))
am__make_keepgoing = (target_option=k; $(am__make_running_with_option))
pkgdatadir = $(datadir)/@PACKAGE@
pkgincludedir = $(includedir)/@PACKAGE@
pkglibdir = $(libdir)/@PACKAGE@
@ -52,10 +80,11 @@ build_triplet = @build@
host_triplet = @host@
@ENABLE_JNI_TRUE@am__append_1 = java
subdir = .
DIST_COMMON = README $(am__configure_deps) $(srcdir)/Makefile.am \
$(srcdir)/Makefile.in $(srcdir)/config.h.in \
$(srcdir)/ffts.pc.in $(top_srcdir)/configure AUTHORS \
config.guess config.sub depcomp install-sh ltmain.sh missing
DIST_COMMON = $(srcdir)/Makefile.in $(srcdir)/Makefile.am \
$(top_srcdir)/configure $(am__configure_deps) \
$(srcdir)/config.h.in $(srcdir)/ffts.pc.in AUTHORS README \
compile config.guess config.sub depcomp install-sh missing \
ltmain.sh
ACLOCAL_M4 = $(top_srcdir)/aclocal.m4
am__aclocal_m4_deps = $(top_srcdir)/m4/ax_check_classpath.m4 \
$(top_srcdir)/m4/ax_check_java_home.m4 \
@ -73,15 +102,28 @@ mkinstalldirs = $(install_sh) -d
CONFIG_HEADER = config.h
CONFIG_CLEAN_FILES = ffts.pc
CONFIG_CLEAN_VPATH_FILES =
AM_V_P = $(am__v_P_@AM_V@)
am__v_P_ = $(am__v_P_@AM_DEFAULT_V@)
am__v_P_0 = false
am__v_P_1 = :
AM_V_GEN = $(am__v_GEN_@AM_V@)
am__v_GEN_ = $(am__v_GEN_@AM_DEFAULT_V@)
am__v_GEN_0 = @echo " GEN " $@;
am__v_GEN_1 =
AM_V_at = $(am__v_at_@AM_V@)
am__v_at_ = $(am__v_at_@AM_DEFAULT_V@)
am__v_at_0 = @
am__v_at_1 =
SOURCES =
DIST_SOURCES =
RECURSIVE_TARGETS = all-recursive check-recursive dvi-recursive \
html-recursive info-recursive install-data-recursive \
install-dvi-recursive install-exec-recursive \
install-html-recursive install-info-recursive \
install-pdf-recursive install-ps-recursive install-recursive \
installcheck-recursive installdirs-recursive pdf-recursive \
ps-recursive uninstall-recursive
RECURSIVE_TARGETS = all-recursive check-recursive cscopelist-recursive \
ctags-recursive dvi-recursive html-recursive info-recursive \
install-data-recursive install-dvi-recursive \
install-exec-recursive install-html-recursive \
install-info-recursive install-pdf-recursive \
install-ps-recursive install-recursive installcheck-recursive \
installdirs-recursive pdf-recursive ps-recursive \
tags-recursive uninstall-recursive
am__can_run_installinfo = \
case $$AM_UPDATE_INFO_DIR in \
n|no|NO) false;; \
@ -118,9 +160,30 @@ am__installdirs = "$(DESTDIR)$(pkgconfigdir)"
DATA = $(pkgconfig_DATA)
RECURSIVE_CLEAN_TARGETS = mostlyclean-recursive clean-recursive \
distclean-recursive maintainer-clean-recursive
AM_RECURSIVE_TARGETS = $(RECURSIVE_TARGETS:-recursive=) \
$(RECURSIVE_CLEAN_TARGETS:-recursive=) tags TAGS ctags CTAGS \
am__recursive_targets = \
$(RECURSIVE_TARGETS) \
$(RECURSIVE_CLEAN_TARGETS) \
$(am__extra_recursive_targets)
AM_RECURSIVE_TARGETS = $(am__recursive_targets:-recursive=) TAGS CTAGS \
cscope distdir dist dist-all distcheck
am__tagged_files = $(HEADERS) $(SOURCES) $(TAGS_FILES) \
$(LISP)config.h.in
# Read a list of newline-separated strings from the standard input,
# and print each of them once, without duplicates. Input order is
# *not* preserved.
am__uniquify_input = $(AWK) '\
BEGIN { nonempty = 0; } \
{ items[$$0] = 1; nonempty = 1; } \
END { if (nonempty) { for (i in items) print i; }; } \
'
# Make sure the list of sources is unique. This is necessary because,
# e.g., the same source file might be shared among _SOURCES variables
# for different programs/libraries.
am__define_uniq_tagged_files = \
list='$(am__tagged_files)'; \
unique=`for i in $$list; do \
if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \
done | $(am__uniquify_input)`
ETAGS = etags
CTAGS = ctags
CSCOPE = cscope
@ -169,6 +232,7 @@ am__distuninstallcheck_listfiles = $(distuninstallcheck_listfiles) \
distcleancheck_listfiles = find . -type f -print
ACLOCAL = @ACLOCAL@
AMTAR = @AMTAR@
AM_DEFAULT_VERBOSITY = @AM_DEFAULT_VERBOSITY@
AR = @AR@
AUTOCONF = @AUTOCONF@
AUTOHEADER = @AUTOHEADER@
@ -343,8 +407,8 @@ $(ACLOCAL_M4): $(am__aclocal_m4_deps)
$(am__aclocal_m4_deps):
config.h: stamp-h1
@if test ! -f $@; then rm -f stamp-h1; else :; fi
@if test ! -f $@; then $(MAKE) $(AM_MAKEFLAGS) stamp-h1; else :; fi
@test -f $@ || rm -f stamp-h1
@test -f $@ || $(MAKE) $(AM_MAKEFLAGS) stamp-h1
stamp-h1: $(srcdir)/config.h.in $(top_builddir)/config.status
@rm -f stamp-h1
@ -395,14 +459,13 @@ uninstall-pkgconfigDATA:
# (1) if the variable is set in 'config.status', edit 'config.status'
# (which will cause the Makefiles to be regenerated when you run 'make');
# (2) otherwise, pass the desired values on the 'make' command line.
$(RECURSIVE_TARGETS) $(RECURSIVE_CLEAN_TARGETS):
@fail= failcom='exit 1'; \
for f in x $$MAKEFLAGS; do \
case $$f in \
*=* | --[!k]*);; \
*k*) failcom='fail=yes';; \
esac; \
done; \
$(am__recursive_targets):
@fail=; \
if $(am__make_keepgoing); then \
failcom='fail=yes'; \
else \
failcom='exit 1'; \
fi; \
dot_seen=no; \
target=`echo $@ | sed s/-recursive//`; \
case "$@" in \
@ -423,31 +486,13 @@ $(RECURSIVE_TARGETS) $(RECURSIVE_CLEAN_TARGETS):
if test "$$dot_seen" = "no"; then \
$(MAKE) $(AM_MAKEFLAGS) "$$target-am" || exit 1; \
fi; test -z "$$fail"
tags-recursive:
list='$(SUBDIRS)'; for subdir in $$list; do \
test "$$subdir" = . || ($(am__cd) $$subdir && $(MAKE) $(AM_MAKEFLAGS) tags); \
done
ctags-recursive:
list='$(SUBDIRS)'; for subdir in $$list; do \
test "$$subdir" = . || ($(am__cd) $$subdir && $(MAKE) $(AM_MAKEFLAGS) ctags); \
done
cscopelist-recursive:
list='$(SUBDIRS)'; for subdir in $$list; do \
test "$$subdir" = . || ($(am__cd) $$subdir && $(MAKE) $(AM_MAKEFLAGS) cscopelist); \
done
ID: $(HEADERS) $(SOURCES) $(LISP) $(TAGS_FILES)
list='$(SOURCES) $(HEADERS) $(LISP) $(TAGS_FILES)'; \
unique=`for i in $$list; do \
if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \
done | \
$(AWK) '{ files[$$0] = 1; nonempty = 1; } \
END { if (nonempty) { for (i in files) print i; }; }'`; \
mkid -fID $$unique
tags: TAGS
TAGS: tags-recursive $(HEADERS) $(SOURCES) config.h.in $(TAGS_DEPENDENCIES) \
$(TAGS_FILES) $(LISP)
ID: $(am__tagged_files)
$(am__define_uniq_tagged_files); mkid -fID $$unique
tags: tags-recursive
TAGS: tags
tags-am: $(TAGS_DEPENDENCIES) $(am__tagged_files)
set x; \
here=`pwd`; \
if ($(ETAGS) --etags-include --version) >/dev/null 2>&1; then \
@ -463,12 +508,7 @@ TAGS: tags-recursive $(HEADERS) $(SOURCES) config.h.in $(TAGS_DEPENDENCIES) \
set "$$@" "$$include_option=$$here/$$subdir/TAGS"; \
fi; \
done; \
list='$(SOURCES) $(HEADERS) config.h.in $(LISP) $(TAGS_FILES)'; \
unique=`for i in $$list; do \
if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \
done | \
$(AWK) '{ files[$$0] = 1; nonempty = 1; } \
END { if (nonempty) { for (i in files) print i; }; }'`; \
$(am__define_uniq_tagged_files); \
shift; \
if test -z "$(ETAGS_ARGS)$$*$$unique"; then :; else \
test -n "$$unique" || unique=$$empty_fix; \
@ -480,15 +520,11 @@ TAGS: tags-recursive $(HEADERS) $(SOURCES) config.h.in $(TAGS_DEPENDENCIES) \
$$unique; \
fi; \
fi
ctags: CTAGS
CTAGS: ctags-recursive $(HEADERS) $(SOURCES) config.h.in $(TAGS_DEPENDENCIES) \
$(TAGS_FILES) $(LISP)
list='$(SOURCES) $(HEADERS) config.h.in $(LISP) $(TAGS_FILES)'; \
unique=`for i in $$list; do \
if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \
done | \
$(AWK) '{ files[$$0] = 1; nonempty = 1; } \
END { if (nonempty) { for (i in files) print i; }; }'`; \
ctags: ctags-recursive
CTAGS: ctags
ctags-am: $(TAGS_DEPENDENCIES) $(am__tagged_files)
$(am__define_uniq_tagged_files); \
test -z "$(CTAGS_ARGS)$$unique" \
|| $(CTAGS) $(CTAGSFLAGS) $(AM_CTAGSFLAGS) $(CTAGS_ARGS) \
$$unique
@ -497,18 +533,16 @@ GTAGS:
here=`$(am__cd) $(top_builddir) && pwd` \
&& $(am__cd) $(top_srcdir) \
&& gtags -i $(GTAGS_ARGS) "$$here"
cscope: cscope.files
test ! -s cscope.files \
|| $(CSCOPE) -b -q $(AM_CSCOPEFLAGS) $(CSCOPEFLAGS) -i cscope.files $(CSCOPE_ARGS)
clean-cscope:
-rm -f cscope.files
cscope.files: clean-cscope cscopelist
cscopelist: cscopelist-recursive
cscope.files: clean-cscope cscopelist-recursive cscopelist
cscopelist: cscopelist-recursive $(HEADERS) $(SOURCES) $(LISP)
list='$(SOURCES) $(HEADERS) $(LISP)'; \
cscopelist-am: $(am__tagged_files)
list='$(am__tagged_files)'; \
case "$(srcdir)" in \
[\\/]* | ?:[\\/]*) sdir="$(srcdir)" ;; \
*) sdir=$(subdir)/$(srcdir) ;; \
@ -606,10 +640,16 @@ dist-xz: distdir
$(am__post_remove_distdir)
dist-tarZ: distdir
@echo WARNING: "Support for shar distribution archives is" \
"deprecated." >&2
@echo WARNING: "It will be removed altogether in Automake 2.0" >&2
tardir=$(distdir) && $(am__tar) | compress -c >$(distdir).tar.Z
$(am__post_remove_distdir)
dist-shar: distdir
@echo WARNING: "Support for distribution archives compressed with" \
"legacy program 'compress' is deprecated." >&2
@echo WARNING: "It will be removed altogether in Automake 2.0" >&2
shar $(distdir) | GZIP=$(GZIP_ENV) gzip -c >$(distdir).shar.gz
$(am__post_remove_distdir)
@ -814,27 +854,24 @@ ps-am:
uninstall-am: uninstall-pkgconfigDATA
.MAKE: $(RECURSIVE_CLEAN_TARGETS) $(RECURSIVE_TARGETS) all \
cscopelist-recursive ctags-recursive install-am install-strip \
tags-recursive
.PHONY: $(RECURSIVE_CLEAN_TARGETS) $(RECURSIVE_TARGETS) CTAGS GTAGS \
all all-am am--refresh check check-am clean clean-cscope \
clean-generic clean-libtool cscope cscopelist \
cscopelist-recursive ctags ctags-recursive dist dist-all \
dist-bzip2 dist-gzip dist-lzip dist-shar dist-tarZ dist-xz \
dist-zip distcheck distclean distclean-generic distclean-hdr \
distclean-libtool distclean-tags distcleancheck distdir \
distuninstallcheck dvi dvi-am html html-am info info-am \
install install-am install-data install-data-am install-dvi \
install-dvi-am install-exec install-exec-am install-html \
install-html-am install-info install-info-am install-man \
install-pdf install-pdf-am install-pkgconfigDATA install-ps \
install-ps-am install-strip installcheck installcheck-am \
installdirs installdirs-am maintainer-clean \
.MAKE: $(am__recursive_targets) all install-am install-strip
.PHONY: $(am__recursive_targets) CTAGS GTAGS TAGS all all-am \
am--refresh check check-am clean clean-cscope clean-generic \
clean-libtool cscope cscopelist-am ctags ctags-am dist \
dist-all dist-bzip2 dist-gzip dist-lzip dist-shar dist-tarZ \
dist-xz dist-zip distcheck distclean distclean-generic \
distclean-hdr distclean-libtool distclean-tags distcleancheck \
distdir distuninstallcheck dvi dvi-am html html-am info \
info-am install install-am install-data install-data-am \
install-dvi install-dvi-am install-exec install-exec-am \
install-html install-html-am install-info install-info-am \
install-man install-pdf install-pdf-am install-pkgconfigDATA \
install-ps install-ps-am install-strip installcheck \
installcheck-am installdirs installdirs-am maintainer-clean \
maintainer-clean-generic mostlyclean mostlyclean-generic \
mostlyclean-libtool pdf pdf-am ps ps-am tags tags-recursive \
uninstall uninstall-am uninstall-pkgconfigDATA
mostlyclean-libtool pdf pdf-am ps ps-am tags tags-am uninstall \
uninstall-am uninstall-pkgconfigDATA
# Tell versions [3.59,3.63) of GNU make to not export all variables.

@ -1,27 +0,0 @@
FFTS -- The Fastest Fourier Transform in the South
by Anthony Blake <anthonix@me.com>
To build for Android, edit and run build_android.sh
To build for iOS, edit and run build_iphone.sh
To build for Linux or OS X on x86, run
./configure --enable-sse --enable-single --prefix=/usr/local
make
make install
FFTS dynamically generates code at runtime. This can be disabled with
--disable-dynamic-code
For JNI targets: --enable-jni will build the jni stuff automatically for
the host target, and --enable-shared must also be added manually for it to
work.
If you like FFTS, please show your support by sending a postcard to:
Anthony Blake
Department of Computer Science
The University of Waikato
Private Bag 3105
Hamilton 3240
NEW ZEALAND

@ -0,0 +1,35 @@
# FFTS -- The Fastest Fourier Transform in the South
[![Build Status](https://travis-ci.org/linkotec/ffts.svg?branch=master)](https://travis-ci.org/linkotec/ffts)
To build for Android, edit and run build_android.sh
To build for iOS, edit and run build_iphone.sh
To build for Linux or OS X on x86, run
./configure --enable-sse --enable-single --prefix=/usr/local
make
make install
Optionally build for Windows and Linux with CMake, run
mkdir build
cd build
cmake ..
FFTS dynamically generates code at runtime. This can be disabled with
--disable-dynamic-code
Note that 32 bit x86 dynamic machine code generation is not supported at the moment.
For JNI targets: --enable-jni will build the jni stuff automatically for
the host target, and --enable-shared must also be added manually for it to
work.
If you like FFTS, please show your support by sending a postcard to:
Anthony Blake<br>
Department of Computer Science<br>
The University of Waikato<br>
Private Bag 3105<br>
Hamilton 3240<br>
NEW ZEALAND

409
lib/ffts/aclocal.m4 vendored

@ -1,6 +1,6 @@
# generated automatically by aclocal 1.12.4 -*- Autoconf -*-
# generated automatically by aclocal 1.14 -*- Autoconf -*-
# Copyright (C) 1996-2012 Free Software Foundation, Inc.
# Copyright (C) 1996-2013 Free Software Foundation, Inc.
# This file is free software; the Free Software Foundation
# gives unlimited permission to copy and/or distribute it,
@ -11,6 +11,7 @@
# even the implied warranty of MERCHANTABILITY or FITNESS FOR A
# PARTICULAR PURPOSE.
m4_ifndef([AC_CONFIG_MACRO_DIRS], [m4_defun([_AM_CONFIG_MACRO_DIRS], [])m4_defun([AC_CONFIG_MACRO_DIRS], [_AM_CONFIG_MACRO_DIRS($@)])])
m4_ifndef([AC_AUTOCONF_VERSION],
[m4_copy([m4_PACKAGE_VERSION], [AC_AUTOCONF_VERSION])])dnl
m4_if(m4_defn([AC_AUTOCONF_VERSION]), [2.69],,
@ -8606,7 +8607,7 @@ m4_ifndef([_LT_PROG_F77], [AC_DEFUN([_LT_PROG_F77])])
m4_ifndef([_LT_PROG_FC], [AC_DEFUN([_LT_PROG_FC])])
m4_ifndef([_LT_PROG_CXX], [AC_DEFUN([_LT_PROG_CXX])])
# Copyright (C) 2002-2012 Free Software Foundation, Inc.
# Copyright (C) 2002-2013 Free Software Foundation, Inc.
#
# This file is free software; the Free Software Foundation
# gives unlimited permission to copy and/or distribute it,
@ -8618,10 +8619,10 @@ m4_ifndef([_LT_PROG_CXX], [AC_DEFUN([_LT_PROG_CXX])])
# generated from the m4 files accompanying Automake X.Y.
# (This private macro should not be called outside this file.)
AC_DEFUN([AM_AUTOMAKE_VERSION],
[am__api_version='1.12'
[am__api_version='1.14'
dnl Some users find AM_AUTOMAKE_VERSION and mistake it for a way to
dnl require some minimum version. Point them to the right macro.
m4_if([$1], [1.12.4], [],
m4_if([$1], [1.14], [],
[AC_FATAL([Do not call $0, use AM_INIT_AUTOMAKE([$1]).])])dnl
])
@ -8637,14 +8638,14 @@ m4_define([_AM_AUTOCONF_VERSION], [])
# Call AM_AUTOMAKE_VERSION and AM_AUTOMAKE_VERSION so they can be traced.
# This function is AC_REQUIREd by AM_INIT_AUTOMAKE.
AC_DEFUN([AM_SET_CURRENT_AUTOMAKE_VERSION],
[AM_AUTOMAKE_VERSION([1.12.4])dnl
[AM_AUTOMAKE_VERSION([1.14])dnl
m4_ifndef([AC_AUTOCONF_VERSION],
[m4_copy([m4_PACKAGE_VERSION], [AC_AUTOCONF_VERSION])])dnl
_AM_AUTOCONF_VERSION(m4_defn([AC_AUTOCONF_VERSION]))])
# Figure out how to run the assembler. -*- Autoconf -*-
# Copyright (C) 2001-2012 Free Software Foundation, Inc.
# Copyright (C) 2001-2013 Free Software Foundation, Inc.
#
# This file is free software; the Free Software Foundation
# gives unlimited permission to copy and/or distribute it,
@ -8664,7 +8665,7 @@ _AM_IF_OPTION([no-dependencies],, [_AM_DEPENDENCIES([CCAS])])dnl
# AM_AUX_DIR_EXPAND -*- Autoconf -*-
# Copyright (C) 2001-2012 Free Software Foundation, Inc.
# Copyright (C) 2001-2013 Free Software Foundation, Inc.
#
# This file is free software; the Free Software Foundation
# gives unlimited permission to copy and/or distribute it,
@ -8717,7 +8718,7 @@ am_aux_dir=`cd $ac_aux_dir && pwd`
# AM_CONDITIONAL -*- Autoconf -*-
# Copyright (C) 1997-2012 Free Software Foundation, Inc.
# Copyright (C) 1997-2013 Free Software Foundation, Inc.
#
# This file is free software; the Free Software Foundation
# gives unlimited permission to copy and/or distribute it,
@ -8748,7 +8749,7 @@ AC_CONFIG_COMMANDS_PRE(
Usually this means the macro was only invoked conditionally.]])
fi])])
# Copyright (C) 1999-2012 Free Software Foundation, Inc.
# Copyright (C) 1999-2013 Free Software Foundation, Inc.
#
# This file is free software; the Free Software Foundation
# gives unlimited permission to copy and/or distribute it,
@ -8939,7 +8940,7 @@ _AM_SUBST_NOTMAKE([am__nodep])dnl
# Generate code to set up dependency tracking. -*- Autoconf -*-
# Copyright (C) 1999-2012 Free Software Foundation, Inc.
# Copyright (C) 1999-2013 Free Software Foundation, Inc.
#
# This file is free software; the Free Software Foundation
# gives unlimited permission to copy and/or distribute it,
@ -8950,7 +8951,7 @@ _AM_SUBST_NOTMAKE([am__nodep])dnl
# ------------------------------
AC_DEFUN([_AM_OUTPUT_DEPENDENCY_COMMANDS],
[{
# Autoconf 2.62 quotes --file arguments for eval, but not when files
# Older Autoconf quotes --file arguments for eval, but not when files
# are listed without --file. Let's play safe and only enable the eval
# if we detect the quoting.
case $CONFIG_FILES in
@ -8979,7 +8980,7 @@ AC_DEFUN([_AM_OUTPUT_DEPENDENCY_COMMANDS],
DEPDIR=`sed -n 's/^DEPDIR = //p' < "$mf"`
test -z "$DEPDIR" && continue
am__include=`sed -n 's/^am__include = //p' < "$mf"`
test -z "am__include" && continue
test -z "$am__include" && continue
am__quote=`sed -n 's/^am__quote = //p' < "$mf"`
# Find all dependency output files, they are included files with
# $(DEPDIR) in their names. We invoke sed twice because it is the
@ -9015,7 +9016,7 @@ AC_DEFUN([AM_OUTPUT_DEPENDENCY_COMMANDS],
# Do all the work for Automake. -*- Autoconf -*-
# Copyright (C) 1996-2012 Free Software Foundation, Inc.
# Copyright (C) 1996-2013 Free Software Foundation, Inc.
#
# This file is free software; the Free Software Foundation
# gives unlimited permission to copy and/or distribute it,
@ -9024,6 +9025,12 @@ AC_DEFUN([AM_OUTPUT_DEPENDENCY_COMMANDS],
# This macro actually does too much. Some checks are only needed if
# your package does certain things. But this isn't really a big deal.
dnl Redefine AC_PROG_CC to automatically invoke _AM_PROG_CC_C_O.
m4_define([AC_PROG_CC],
m4_defn([AC_PROG_CC])
[_AM_PROG_CC_C_O
])
# AM_INIT_AUTOMAKE(PACKAGE, VERSION, [NO-DEFINE])
# AM_INIT_AUTOMAKE([OPTIONS])
# -----------------------------------------------
@ -9036,7 +9043,7 @@ AC_DEFUN([AM_OUTPUT_DEPENDENCY_COMMANDS],
# arguments mandatory, and then we can depend on a new Autoconf
# release and drop the old call support.
AC_DEFUN([AM_INIT_AUTOMAKE],
[AC_PREREQ([2.62])dnl
[AC_PREREQ([2.65])dnl
dnl Autoconf wants to disallow AM_ names. We explicitly allow
dnl the ones we care about.
m4_pattern_allow([^AM_[A-Z]+FLAGS$])dnl
@ -9066,8 +9073,7 @@ AC_SUBST([CYGPATH_W])
dnl Distinguish between old-style and new-style calls.
m4_ifval([$2],
[AC_DIAGNOSE([obsolete],
[$0: two- and three-arguments forms are deprecated. For more info, see:
http://www.gnu.org/software/automake/manual/automake.html#Modernize-AM_INIT_AUTOMAKE-invocation])
[$0: two- and three-arguments forms are deprecated.])
m4_ifval([$3], [_AM_SET_OPTION([no-define])])dnl
AC_SUBST([PACKAGE], [$1])dnl
AC_SUBST([VERSION], [$2])],
@ -9121,22 +9127,60 @@ AC_PROVIDE_IFELSE([AC_PROG_OBJC],
[_AM_DEPENDENCIES([OBJC])],
[m4_define([AC_PROG_OBJC],
m4_defn([AC_PROG_OBJC])[_AM_DEPENDENCIES([OBJC])])])dnl
dnl Support for Objective C++ was only introduced in Autoconf 2.65,
dnl but we still cater to Autoconf 2.62.
m4_ifdef([AC_PROG_OBJCXX],
[AC_PROVIDE_IFELSE([AC_PROG_OBJCXX],
AC_PROVIDE_IFELSE([AC_PROG_OBJCXX],
[_AM_DEPENDENCIES([OBJCXX])],
[m4_define([AC_PROG_OBJCXX],
m4_defn([AC_PROG_OBJCXX])[_AM_DEPENDENCIES([OBJCXX])])])])dnl
m4_defn([AC_PROG_OBJCXX])[_AM_DEPENDENCIES([OBJCXX])])])dnl
])
_AM_IF_OPTION([silent-rules], [AC_REQUIRE([AM_SILENT_RULES])])dnl
dnl The 'parallel-tests' driver may need to know about EXEEXT, so add the
dnl 'am__EXEEXT' conditional if _AM_COMPILER_EXEEXT was seen. This macro
dnl is hooked onto _AC_COMPILER_EXEEXT early, see below.
AC_REQUIRE([AM_SILENT_RULES])dnl
dnl The testsuite driver may need to know about EXEEXT, so add the
dnl 'am__EXEEXT' conditional if _AM_COMPILER_EXEEXT was seen. This
dnl macro is hooked onto _AC_COMPILER_EXEEXT early, see below.
AC_CONFIG_COMMANDS_PRE(dnl
[m4_provide_if([_AM_COMPILER_EXEEXT],
[AM_CONDITIONAL([am__EXEEXT], [test -n "$EXEEXT"])])])dnl
])
# POSIX will say in a future version that running "rm -f" with no argument
# is OK; and we want to be able to make that assumption in our Makefile
# recipes. So use an aggressive probe to check that the usage we want is
# actually supported "in the wild" to an acceptable degree.
# See automake bug#10828.
# To make any issue more visible, cause the running configure to be aborted
# by default if the 'rm' program in use doesn't match our expectations; the
# user can still override this though.
if rm -f && rm -fr && rm -rf; then : OK; else
cat >&2 <<'END'
Oops!
Your 'rm' program seems unable to run without file operands specified
on the command line, even when the '-f' option is present. This is contrary
to the behaviour of most rm programs out there, and not conforming with
the upcoming POSIX standard: <http://austingroupbugs.net/view.php?id=542>
Please tell bug-automake@gnu.org about your system, including the value
of your $PATH and any error possibly output before this message. This
can help us improve future automake versions.
END
if test x"$ACCEPT_INFERIOR_RM_PROGRAM" = x"yes"; then
echo 'Configuration will proceed anyway, since you have set the' >&2
echo 'ACCEPT_INFERIOR_RM_PROGRAM variable to "yes"' >&2
echo >&2
else
cat >&2 <<'END'
Aborting the configuration process, to ensure you take notice of the issue.
You can download and install GNU coreutils to get an 'rm' implementation
that behaves properly: <http://www.gnu.org/software/coreutils/>.
If you want to complete the configuration process using your problematic
'rm' anyway, export the environment variable ACCEPT_INFERIOR_RM_PROGRAM
to "yes", and re-run configure.
END
AC_MSG_ERROR([Your 'rm' program is bad, sorry.])
fi
fi])
dnl Hook into '_AC_COMPILER_EXEEXT' early to learn its expansion. Do not
dnl add the conditional right here, as _AC_COMPILER_EXEEXT may be further
@ -9144,7 +9188,6 @@ dnl mangled by Autoconf and run in a shell conditional statement.
m4_define([_AC_COMPILER_EXEEXT],
m4_defn([_AC_COMPILER_EXEEXT])[m4_provide([_AM_COMPILER_EXEEXT])])
# When config.status generates a header, we must update the stamp-h file.
# This file resides in the same directory as the config header
# that is generated. The stamp files are numbered to have different names.
@ -9166,7 +9209,7 @@ for _am_header in $config_headers :; do
done
echo "timestamp for $_am_arg" >`AS_DIRNAME(["$_am_arg"])`/stamp-h[]$_am_stamp_count])
# Copyright (C) 2001-2012 Free Software Foundation, Inc.
# Copyright (C) 2001-2013 Free Software Foundation, Inc.
#
# This file is free software; the Free Software Foundation
# gives unlimited permission to copy and/or distribute it,
@ -9187,7 +9230,7 @@ if test x"${install_sh}" != xset; then
fi
AC_SUBST([install_sh])])
# Copyright (C) 2003-2012 Free Software Foundation, Inc.
# Copyright (C) 2003-2013 Free Software Foundation, Inc.
#
# This file is free software; the Free Software Foundation
# gives unlimited permission to copy and/or distribute it,
@ -9208,7 +9251,7 @@ AC_SUBST([am__leading_dot])])
# Check to see how 'make' treats includes. -*- Autoconf -*-
# Copyright (C) 2001-2012 Free Software Foundation, Inc.
# Copyright (C) 2001-2013 Free Software Foundation, Inc.
#
# This file is free software; the Free Software Foundation
# gives unlimited permission to copy and/or distribute it,
@ -9258,7 +9301,7 @@ rm -f confinc confmf
# Fake the existence of programs that GNU maintainers use. -*- Autoconf -*-
# Copyright (C) 1997-2012 Free Software Foundation, Inc.
# Copyright (C) 1997-2013 Free Software Foundation, Inc.
#
# This file is free software; the Free Software Foundation
# gives unlimited permission to copy and/or distribute it,
@ -9273,8 +9316,8 @@ AC_SUBST($1)])
# AM_MISSING_HAS_RUN
# ------------------
# Define MISSING if not defined so far and test if it supports --run.
# If it does, set am_missing_run to use it, otherwise, to nothing.
# Define MISSING if not defined so far and test if it is modern enough.
# If it is, set am_missing_run to use it, otherwise, to nothing.
AC_DEFUN([AM_MISSING_HAS_RUN],
[AC_REQUIRE([AM_AUX_DIR_EXPAND])dnl
AC_REQUIRE_AUX_FILE([missing])dnl
@ -9287,8 +9330,8 @@ if test x"${MISSING+set}" != xset; then
esac
fi
# Use eval to expand $SHELL
if eval "$MISSING --run true"; then
am_missing_run="$MISSING --run "
if eval "$MISSING --is-lightweight"; then
am_missing_run="$MISSING "
else
am_missing_run=
AC_MSG_WARN(['missing' script is too old or missing])
@ -9297,7 +9340,7 @@ fi
# Helper functions for option handling. -*- Autoconf -*-
# Copyright (C) 2001-2012 Free Software Foundation, Inc.
# Copyright (C) 2001-2013 Free Software Foundation, Inc.
#
# This file is free software; the Free Software Foundation
# gives unlimited permission to copy and/or distribute it,
@ -9326,9 +9369,73 @@ AC_DEFUN([_AM_SET_OPTIONS],
AC_DEFUN([_AM_IF_OPTION],
[m4_ifset(_AM_MANGLE_OPTION([$1]), [$2], [$3])])
# Copyright (C) 1999-2013 Free Software Foundation, Inc.
#
# This file is free software; the Free Software Foundation
# gives unlimited permission to copy and/or distribute it,
# with or without modifications, as long as this notice is preserved.
# _AM_PROG_CC_C_O
# ---------------
# Like AC_PROG_CC_C_O, but changed for automake. We rewrite AC_PROG_CC
# to automatically call this.
AC_DEFUN([_AM_PROG_CC_C_O],
[AC_REQUIRE([AM_AUX_DIR_EXPAND])dnl
AC_REQUIRE_AUX_FILE([compile])dnl
AC_LANG_PUSH([C])dnl
AC_CACHE_CHECK(
[whether $CC understands -c and -o together],
[am_cv_prog_cc_c_o],
[AC_LANG_CONFTEST([AC_LANG_PROGRAM([])])
# Make sure it works both with $CC and with simple cc.
# Following AC_PROG_CC_C_O, we do the test twice because some
# compilers refuse to overwrite an existing .o file with -o,
# though they will create one.
am_cv_prog_cc_c_o=yes
for am_i in 1 2; do
if AM_RUN_LOG([$CC -c conftest.$ac_ext -o conftest2.$ac_objext]) \
&& test -f conftest2.$ac_objext; then
: OK
else
am_cv_prog_cc_c_o=no
break
fi
done
rm -f core conftest*
unset am_i])
if test "$am_cv_prog_cc_c_o" != yes; then
# Losing compiler, so override with the script.
# FIXME: It is wrong to rewrite CC.
# But if we don't then we get into trouble of one sort or another.
# A longer-term fix would be to have automake use am__CC in this case,
# and then we could set am__CC="\$(top_srcdir)/compile \$(CC)"
CC="$am_aux_dir/compile $CC"
fi
AC_LANG_POP([C])])
# For backward compatibility.
AC_DEFUN_ONCE([AM_PROG_CC_C_O], [AC_REQUIRE([AC_PROG_CC])])
# Copyright (C) 2001-2013 Free Software Foundation, Inc.
#
# This file is free software; the Free Software Foundation
# gives unlimited permission to copy and/or distribute it,
# with or without modifications, as long as this notice is preserved.
# AM_RUN_LOG(COMMAND)
# -------------------
# Run COMMAND, save the exit status in ac_status, and log it.
# (This has been adapted from Autoconf's _AC_RUN_LOG macro.)
AC_DEFUN([AM_RUN_LOG],
[{ echo "$as_me:$LINENO: $1" >&AS_MESSAGE_LOG_FD
($1) >&AS_MESSAGE_LOG_FD 2>&AS_MESSAGE_LOG_FD
ac_status=$?
echo "$as_me:$LINENO: \$? = $ac_status" >&AS_MESSAGE_LOG_FD
(exit $ac_status); }])
# Check to make sure that the build environment is sane. -*- Autoconf -*-
# Copyright (C) 1996-2012 Free Software Foundation, Inc.
# Copyright (C) 1996-2013 Free Software Foundation, Inc.
#
# This file is free software; the Free Software Foundation
# gives unlimited permission to copy and/or distribute it,
@ -9409,7 +9516,67 @@ AC_CONFIG_COMMANDS_PRE(
rm -f conftest.file
])
# Copyright (C) 2001-2012 Free Software Foundation, Inc.
# Copyright (C) 2009-2013 Free Software Foundation, Inc.
#
# This file is free software; the Free Software Foundation
# gives unlimited permission to copy and/or distribute it,
# with or without modifications, as long as this notice is preserved.
# AM_SILENT_RULES([DEFAULT])
# --------------------------
# Enable less verbose build rules; with the default set to DEFAULT
# ("yes" being less verbose, "no" or empty being verbose).
AC_DEFUN([AM_SILENT_RULES],
[AC_ARG_ENABLE([silent-rules], [dnl
AS_HELP_STRING(
[--enable-silent-rules],
[less verbose build output (undo: "make V=1")])
AS_HELP_STRING(
[--disable-silent-rules],
[verbose build output (undo: "make V=0")])dnl
])
case $enable_silent_rules in @%:@ (((
yes) AM_DEFAULT_VERBOSITY=0;;
no) AM_DEFAULT_VERBOSITY=1;;
*) AM_DEFAULT_VERBOSITY=m4_if([$1], [yes], [0], [1]);;
esac
dnl
dnl A few 'make' implementations (e.g., NonStop OS and NextStep)
dnl do not support nested variable expansions.
dnl See automake bug#9928 and bug#10237.
am_make=${MAKE-make}
AC_CACHE_CHECK([whether $am_make supports nested variables],
[am_cv_make_support_nested_variables],
[if AS_ECHO([['TRUE=$(BAR$(V))
BAR0=false
BAR1=true
V=1
am__doit:
@$(TRUE)
.PHONY: am__doit']]) | $am_make -f - >/dev/null 2>&1; then
am_cv_make_support_nested_variables=yes
else
am_cv_make_support_nested_variables=no
fi])
if test $am_cv_make_support_nested_variables = yes; then
dnl Using '$V' instead of '$(V)' breaks IRIX make.
AM_V='$(V)'
AM_DEFAULT_V='$(AM_DEFAULT_VERBOSITY)'
else
AM_V=$AM_DEFAULT_VERBOSITY
AM_DEFAULT_V=$AM_DEFAULT_VERBOSITY
fi
AC_SUBST([AM_V])dnl
AM_SUBST_NOTMAKE([AM_V])dnl
AC_SUBST([AM_DEFAULT_V])dnl
AM_SUBST_NOTMAKE([AM_DEFAULT_V])dnl
AC_SUBST([AM_DEFAULT_VERBOSITY])dnl
AM_BACKSLASH='\'
AC_SUBST([AM_BACKSLASH])dnl
_AM_SUBST_NOTMAKE([AM_BACKSLASH])dnl
])
# Copyright (C) 2001-2013 Free Software Foundation, Inc.
#
# This file is free software; the Free Software Foundation
# gives unlimited permission to copy and/or distribute it,
@ -9437,7 +9604,7 @@ fi
INSTALL_STRIP_PROGRAM="\$(install_sh) -c -s"
AC_SUBST([INSTALL_STRIP_PROGRAM])])
# Copyright (C) 2006-2012 Free Software Foundation, Inc.
# Copyright (C) 2006-2013 Free Software Foundation, Inc.
#
# This file is free software; the Free Software Foundation
# gives unlimited permission to copy and/or distribute it,
@ -9456,7 +9623,7 @@ AC_DEFUN([AM_SUBST_NOTMAKE], [_AM_SUBST_NOTMAKE($@)])
# Check how to create a tarball. -*- Autoconf -*-
# Copyright (C) 2004-2012 Free Software Foundation, Inc.
# Copyright (C) 2004-2013 Free Software Foundation, Inc.
#
# This file is free software; the Free Software Foundation
# gives unlimited permission to copy and/or distribute it,
@ -9475,76 +9642,114 @@ AC_DEFUN([AM_SUBST_NOTMAKE], [_AM_SUBST_NOTMAKE($@)])
# Substitute a variable $(am__untar) that extract such
# a tarball read from stdin.
# $(am__untar) < result.tar
#
AC_DEFUN([_AM_PROG_TAR],
[# Always define AMTAR for backward compatibility. Yes, it's still used
# in the wild :-( We should find a proper way to deprecate it ...
AC_SUBST([AMTAR], ['$${TAR-tar}'])
m4_if([$1], [v7],
[am__tar='$${TAR-tar} chof - "$$tardir"' am__untar='$${TAR-tar} xf -'],
[m4_case([$1], [ustar],, [pax],,
[m4_fatal([Unknown tar format])])
AC_MSG_CHECKING([how to create a $1 tar archive])
# Loop over all known methods to create a tar archive until one works.
# We'll loop over all known methods to create a tar archive until one works.
_am_tools='gnutar m4_if([$1], [ustar], [plaintar]) pax cpio none'
_am_tools=${am_cv_prog_tar_$1-$_am_tools}
# Do not fold the above two line into one, because Tru64 sh and
# Solaris sh will not grok spaces in the rhs of '-'.
for _am_tool in $_am_tools
do
case $_am_tool in
gnutar)
for _am_tar in tar gnutar gtar;
do
AM_RUN_LOG([$_am_tar --version]) && break
done
am__tar="$_am_tar --format=m4_if([$1], [pax], [posix], [$1]) -chf - "'"$$tardir"'
am__tar_="$_am_tar --format=m4_if([$1], [pax], [posix], [$1]) -chf - "'"$tardir"'
am__untar="$_am_tar -xf -"
;;
plaintar)
# Must skip GNU tar: if it does not support --format= it doesn't create
# ustar tarball either.
(tar --version) >/dev/null 2>&1 && continue
am__tar='tar chf - "$$tardir"'
am__tar_='tar chf - "$tardir"'
am__untar='tar xf -'
;;
pax)
am__tar='pax -L -x $1 -w "$$tardir"'
am__tar_='pax -L -x $1 -w "$tardir"'
am__untar='pax -r'
;;
cpio)
am__tar='find "$$tardir" -print | cpio -o -H $1 -L'
am__tar_='find "$tardir" -print | cpio -o -H $1 -L'
am__untar='cpio -i -H $1 -d'
;;
none)
am__tar=false
am__tar_=false
am__untar=false
;;
esac
# If the value was cached, stop now. We just wanted to have am__tar
# and am__untar set.
test -n "${am_cv_prog_tar_$1}" && break
m4_if([$1], [v7],
[am__tar='$${TAR-tar} chof - "$$tardir"' am__untar='$${TAR-tar} xf -'],
[m4_case([$1],
[ustar],
[# The POSIX 1988 'ustar' format is defined with fixed-size fields.
# There is notably a 21 bits limit for the UID and the GID. In fact,
# the 'pax' utility can hang on bigger UID/GID (see automake bug#8343
# and bug#13588).
am_max_uid=2097151 # 2^21 - 1
am_max_gid=$am_max_uid
# The $UID and $GID variables are not portable, so we need to resort
# to the POSIX-mandated id(1) utility. Errors in the 'id' calls
# below are definitely unexpected, so allow the users to see them
# (that is, avoid stderr redirection).
am_uid=`id -u || echo unknown`
am_gid=`id -g || echo unknown`
AC_MSG_CHECKING([whether UID '$am_uid' is supported by ustar format])
if test $am_uid -le $am_max_uid; then
AC_MSG_RESULT([yes])
else
AC_MSG_RESULT([no])
_am_tools=none
fi
AC_MSG_CHECKING([whether GID '$am_gid' is supported by ustar format])
if test $am_gid -le $am_max_gid; then
AC_MSG_RESULT([yes])
else
AC_MSG_RESULT([no])
_am_tools=none
fi],
[pax],
[],
# tar/untar a dummy directory, and stop if the command works
rm -rf conftest.dir
mkdir conftest.dir
echo GrepMe > conftest.dir/file
AM_RUN_LOG([tardir=conftest.dir && eval $am__tar_ >conftest.tar])
[m4_fatal([Unknown tar format])])
AC_MSG_CHECKING([how to create a $1 tar archive])
# Go ahead even if we have the value already cached. We do so because we
# need to set the values for the 'am__tar' and 'am__untar' variables.
_am_tools=${am_cv_prog_tar_$1-$_am_tools}
for _am_tool in $_am_tools; do
case $_am_tool in
gnutar)
for _am_tar in tar gnutar gtar; do
AM_RUN_LOG([$_am_tar --version]) && break
done
am__tar="$_am_tar --format=m4_if([$1], [pax], [posix], [$1]) -chf - "'"$$tardir"'
am__tar_="$_am_tar --format=m4_if([$1], [pax], [posix], [$1]) -chf - "'"$tardir"'
am__untar="$_am_tar -xf -"
;;
plaintar)
# Must skip GNU tar: if it does not support --format= it doesn't create
# ustar tarball either.
(tar --version) >/dev/null 2>&1 && continue
am__tar='tar chf - "$$tardir"'
am__tar_='tar chf - "$tardir"'
am__untar='tar xf -'
;;
pax)
am__tar='pax -L -x $1 -w "$$tardir"'
am__tar_='pax -L -x $1 -w "$tardir"'
am__untar='pax -r'
;;
cpio)
am__tar='find "$$tardir" -print | cpio -o -H $1 -L'
am__tar_='find "$tardir" -print | cpio -o -H $1 -L'
am__untar='cpio -i -H $1 -d'
;;
none)
am__tar=false
am__tar_=false
am__untar=false
;;
esac
# If the value was cached, stop now. We just wanted to have am__tar
# and am__untar set.
test -n "${am_cv_prog_tar_$1}" && break
# tar/untar a dummy directory, and stop if the command works.
rm -rf conftest.dir
mkdir conftest.dir
echo GrepMe > conftest.dir/file
AM_RUN_LOG([tardir=conftest.dir && eval $am__tar_ >conftest.tar])
rm -rf conftest.dir
if test -s conftest.tar; then
AM_RUN_LOG([$am__untar <conftest.tar])
AM_RUN_LOG([cat conftest.dir/file])
grep GrepMe conftest.dir/file >/dev/null 2>&1 && break
fi
done
rm -rf conftest.dir
if test -s conftest.tar; then
AM_RUN_LOG([$am__untar <conftest.tar])
grep GrepMe conftest.dir/file >/dev/null 2>&1 && break
fi
done
rm -rf conftest.dir
AC_CACHE_VAL([am_cv_prog_tar_$1], [am_cv_prog_tar_$1=$_am_tool])
AC_MSG_RESULT([$am_cv_prog_tar_$1])])
AC_CACHE_VAL([am_cv_prog_tar_$1], [am_cv_prog_tar_$1=$_am_tool])
AC_MSG_RESULT([$am_cv_prog_tar_$1])])
AC_SUBST([am__tar])
AC_SUBST([am__untar])
]) # _AM_PROG_TAR

@ -9,9 +9,6 @@
/* Define to FFT in single precision. */
#undef FFTS_PREC_SINGLE
/* Set ARM float abi. */
#undef FLOAT_ABI
/* Define to 1 if you have the declaration of `memalign', and to 0 if you
don't. */
#undef HAVE_DECL_MEMALIGN
@ -146,3 +143,5 @@
/* Define to the type of an unsigned integer type of width exactly 64 bits if
such a type exists and the standard includes do not define it. */
#undef uint64_t
// vim: set autoindent noexpandtab tabstop=3 shiftwidth=3:

166
lib/ffts/configure vendored

@ -713,6 +713,10 @@ build_os
build_vendor
build_cpu
build
AM_BACKSLASH
AM_DEFAULT_VERBOSITY
AM_DEFAULT_V
AM_V
am__untar
am__tar
AMTAR
@ -777,6 +781,7 @@ SHELL'
ac_subst_files=''
ac_user_opts='
enable_option_checking
enable_silent_rules
enable_dependency_tracking
enable_shared
enable_static
@ -1429,6 +1434,8 @@ Optional Features:
--disable-option-checking ignore unrecognized --enable/--with options
--disable-FEATURE do not include FEATURE (same as --enable-FEATURE=no)
--enable-FEATURE[=ARG] include FEATURE [ARG=yes]
--enable-silent-rules less verbose build output (undo: "make V=1")
--disable-silent-rules verbose build output (undo: "make V=0")
--enable-dependency-tracking
do not reject slow dependency extractors
--disable-dependency-tracking
@ -2608,7 +2615,7 @@ ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $
ac_compiler_gnu=$ac_cv_c_compiler_gnu
am__api_version='1.12'
am__api_version='1.14'
ac_aux_dir=
for ac_dir in "$srcdir" "$srcdir/.." "$srcdir/../.."; do
@ -2821,8 +2828,8 @@ if test x"${MISSING+set}" != xset; then
esac
fi
# Use eval to expand $SHELL
if eval "$MISSING --run true"; then
am_missing_run="$MISSING --run "
if eval "$MISSING --is-lightweight"; then
am_missing_run="$MISSING "
else
am_missing_run=
{ $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: 'missing' script is too old or missing" >&5
@ -3062,6 +3069,45 @@ else
fi
rmdir .tst 2>/dev/null
# Check whether --enable-silent-rules was given.
if test "${enable_silent_rules+set}" = set; then :
enableval=$enable_silent_rules;
fi
case $enable_silent_rules in # (((
yes) AM_DEFAULT_VERBOSITY=0;;
no) AM_DEFAULT_VERBOSITY=1;;
*) AM_DEFAULT_VERBOSITY=1;;
esac
am_make=${MAKE-make}
{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether $am_make supports nested variables" >&5
$as_echo_n "checking whether $am_make supports nested variables... " >&6; }
if ${am_cv_make_support_nested_variables+:} false; then :
$as_echo_n "(cached) " >&6
else
if $as_echo 'TRUE=$(BAR$(V))
BAR0=false
BAR1=true
V=1
am__doit:
@$(TRUE)
.PHONY: am__doit' | $am_make -f - >/dev/null 2>&1; then
am_cv_make_support_nested_variables=yes
else
am_cv_make_support_nested_variables=no
fi
fi
{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $am_cv_make_support_nested_variables" >&5
$as_echo "$am_cv_make_support_nested_variables" >&6; }
if test $am_cv_make_support_nested_variables = yes; then
AM_V='$(V)'
AM_DEFAULT_V='$(AM_DEFAULT_VERBOSITY)'
else
AM_V=$AM_DEFAULT_VERBOSITY
AM_DEFAULT_V=$AM_DEFAULT_VERBOSITY
fi
AM_BACKSLASH='\'
if test "`cd $srcdir && pwd`" != "`pwd`"; then
# Use -I$(srcdir) only when $(srcdir) != ., so that make's output
# is not polluted with repeated "-I."
@ -3125,6 +3171,10 @@ mkdir_p='$(MKDIR_P)'
# in the wild :-( We should find a proper way to deprecate it ...
AMTAR='$${TAR-tar}'
# We'll loop over all known methods to create a tar archive until one works.
_am_tools='gnutar pax cpio none'
am__tar='$${TAR-tar} chof - "$$tardir"' am__untar='$${TAR-tar} xf -'
@ -3132,6 +3182,48 @@ am__tar='$${TAR-tar} chof - "$$tardir"' am__untar='$${TAR-tar} xf -'
# POSIX will say in a future version that running "rm -f" with no argument
# is OK; and we want to be able to make that assumption in our Makefile
# recipes. So use an aggressive probe to check that the usage we want is
# actually supported "in the wild" to an acceptable degree.
# See automake bug#10828.
# To make any issue more visible, cause the running configure to be aborted
# by default if the 'rm' program in use doesn't match our expectations; the
# user can still override this though.
if rm -f && rm -fr && rm -rf; then : OK; else
cat >&2 <<'END'
Oops!
Your 'rm' program seems unable to run without file operands specified
on the command line, even when the '-f' option is present. This is contrary
to the behaviour of most rm programs out there, and not conforming with
the upcoming POSIX standard: <http://austingroupbugs.net/view.php?id=542>
Please tell bug-automake@gnu.org about your system, including the value
of your $PATH and any error possibly output before this message. This
can help us improve future automake versions.
END
if test x"$ACCEPT_INFERIOR_RM_PROGRAM" = x"yes"; then
echo 'Configuration will proceed anyway, since you have set the' >&2
echo 'ACCEPT_INFERIOR_RM_PROGRAM variable to "yes"' >&2
echo >&2
else
cat >&2 <<'END'
Aborting the configuration process, to ensure you take notice of the issue.
You can download and install GNU coreutils to get an 'rm' implementation
that behaves properly: <http://www.gnu.org/software/coreutils/>.
If you want to complete the configuration process using your problematic
'rm' anyway, export the environment variable ACCEPT_INFERIOR_RM_PROGRAM
to "yes", and re-run configure.
END
as_fn_error $? "Your 'rm' program is bad, sorry." "$LINENO" 5
fi
fi
# AC_CONFIG_SRCDIR([include/common.h])
@ -4448,6 +4540,65 @@ ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5'
ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5'
ac_compiler_gnu=$ac_cv_c_compiler_gnu
ac_ext=c
ac_cpp='$CPP $CPPFLAGS'
ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5'
ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5'
ac_compiler_gnu=$ac_cv_c_compiler_gnu
{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether $CC understands -c and -o together" >&5
$as_echo_n "checking whether $CC understands -c and -o together... " >&6; }
if ${am_cv_prog_cc_c_o+:} false; then :
$as_echo_n "(cached) " >&6
else
cat confdefs.h - <<_ACEOF >conftest.$ac_ext
/* end confdefs.h. */
int
main ()
{
;
return 0;
}
_ACEOF
# Make sure it works both with $CC and with simple cc.
# Following AC_PROG_CC_C_O, we do the test twice because some
# compilers refuse to overwrite an existing .o file with -o,
# though they will create one.
am_cv_prog_cc_c_o=yes
for am_i in 1 2; do
if { echo "$as_me:$LINENO: $CC -c conftest.$ac_ext -o conftest2.$ac_objext" >&5
($CC -c conftest.$ac_ext -o conftest2.$ac_objext) >&5 2>&5
ac_status=$?
echo "$as_me:$LINENO: \$? = $ac_status" >&5
(exit $ac_status); } \
&& test -f conftest2.$ac_objext; then
: OK
else
am_cv_prog_cc_c_o=no
break
fi
done
rm -f core conftest*
unset am_i
fi
{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $am_cv_prog_cc_c_o" >&5
$as_echo "$am_cv_prog_cc_c_o" >&6; }
if test "$am_cv_prog_cc_c_o" != yes; then
# Losing compiler, so override with the script.
# FIXME: It is wrong to rewrite CC.
# But if we don't then we get into trouble of one sort or another.
# A longer-term fix would be to have automake use am__CC in this case,
# and then we could set am__CC="\$(top_srcdir)/compile \$(CC)"
CC="$am_aux_dir/compile $CC"
fi
ac_ext=c
ac_cpp='$CPP $CPPFLAGS'
ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5'
ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5'
ac_compiler_gnu=$ac_cv_c_compiler_gnu
depcc="$CC" am_compiler_list=
{ $as_echo "$as_me:${as_lineno-$LINENO}: checking dependency style of $depcc" >&5
@ -15533,9 +15684,6 @@ else
fi
$as_echo "#define FLOAT_ABI \$float_abi" >>confdefs.h
# Check whether --enable-jni was given.
if test "${enable_jni+set}" = set; then :
enableval=$enable_jni; have_jni=$enableval
@ -15747,7 +15895,7 @@ else
JAVA_TEST=Test.java
CLASS_TEST=Test.class
cat << \EOF > $JAVA_TEST
/* #line 15750 "configure" */
/* #line 15898 "configure" */
public class Test {
}
EOF
@ -18377,7 +18525,7 @@ $as_echo "$as_me: executing $ac_file commands" >&6;}
case $ac_file$ac_mode in
"depfiles":C) test x"$AMDEP_TRUE" != x"" || {
# Autoconf 2.62 quotes --file arguments for eval, but not when files
# Older Autoconf quotes --file arguments for eval, but not when files
# are listed without --file. Let's play safe and only enable the eval
# if we detect the quoting.
case $CONFIG_FILES in
@ -18428,7 +18576,7 @@ $as_echo X"$mf" |
DEPDIR=`sed -n 's/^DEPDIR = //p' < "$mf"`
test -z "$DEPDIR" && continue
am__include=`sed -n 's/^am__include = //p' < "$mf"`
test -z "am__include" && continue
test -z "$am__include" && continue
am__quote=`sed -n 's/^am__quote = //p' < "$mf"`
# Find all dependency output files, they are included files with
# $(DEPDIR) in their names. We invoke sed twice because it is the

@ -1,7 +1,7 @@
/*
This file is part of FFTS.
Copyright (c) 2012, Anthony M. Blake
All rights reserved.
@ -29,40 +29,82 @@
*/
#ifndef __FFTS_H__
#define __FFTS_H__
#ifndef FFTS_H
#define FFTS_H
#if defined (_MSC_VER) && (_MSC_VER >= 1020)
#pragma once
#endif
#include <stdio.h>
#include <stdlib.h>
#include <math.h>
#include <stdint.h>
#include <stddef.h>
#ifdef __cplusplus
extern "C"
{
#endif /* __cplusplus */
extern "C" {
#endif
#if (defined(_WIN32) || defined(WIN32)) && defined(FFTS_SHARED)
# ifdef FFTS_BUILD
# define FFTS_API __declspec(dllexport)
# else
# define FFTS_API __declspec(dllimport)
# endif
#else
# if (__GNUC__ >= 4) || defined(HAVE_GCC_VISIBILITY)
# define FFTS_API __attribute__ ((visibility("default")))
# else
# define FFTS_API
# endif
#endif
/* The direction of the transform
(i.e, the sign of the exponent in the transform.)
*/
#define FFTS_FORWARD (-1)
#define FFTS_BACKWARD (+1)
struct _ffts_plan_t;
typedef struct _ffts_plan_t ffts_plan_t;
ffts_plan_t *ffts_init_1d(size_t N, int sign);
ffts_plan_t *ffts_init_2d(size_t N1, size_t N2, int sign);
ffts_plan_t *ffts_init_nd(int rank, size_t *Ns, int sign);
/* Complex data is stored in the interleaved format
(i.e, the real and imaginary parts composing each
element of complex data are stored adjacently in memory)
The multi-dimensional arrays passed are expected to be
stored as a single contiguous block in row-major order
*/
FFTS_API ffts_plan_t*
ffts_init_1d(size_t N, int sign);
// For real transforms, sign == -1 implies a real-to-complex forwards tranform,
// and sign == 1 implies a complex-to-real backwards transform
// The output of a real-to-complex transform is N/2+1 complex numbers, where the
// redundant outputs have been omitted.
ffts_plan_t *ffts_init_1d_real(size_t N, int sign);
ffts_plan_t *ffts_init_2d_real(size_t N1, size_t N2, int sign);
ffts_plan_t *ffts_init_nd_real(int rank, size_t *Ns, int sign);
FFTS_API ffts_plan_t*
ffts_init_2d(size_t N1, size_t N2, int sign);
void ffts_execute(ffts_plan_t * , const void *input, void *output);
void ffts_free(ffts_plan_t *);
FFTS_API ffts_plan_t*
ffts_init_nd(int rank, size_t *Ns, int sign);
#ifdef __cplusplus
} /* extern "C" */
#endif /* __cplusplus */
/* For real transforms, sign == FFTS_FORWARD implies a real-to-complex
forwards tranform, and sign == FFTS_BACKWARD implies a complex-to-real
backwards transform.
The output of a real-to-complex transform is N/2+1 complex numbers,
where the redundant outputs have been omitted.
*/
FFTS_API ffts_plan_t*
ffts_init_1d_real(size_t N, int sign);
FFTS_API ffts_plan_t*
ffts_init_2d_real(size_t N1, size_t N2, int sign);
FFTS_API ffts_plan_t*
ffts_init_nd_real(int rank, size_t *Ns, int sign);
FFTS_API void
ffts_execute(ffts_plan_t *p, const void *input, void *output);
FFTS_API void
ffts_free(ffts_plan_t *p);
#ifdef __cplusplus
}
#endif
#endif /* FFTS_H */

@ -1,7 +1,7 @@
# Makefile.in generated by automake 1.12.4 from Makefile.am.
# Makefile.in generated by automake 1.14 from Makefile.am.
# @configure_input@
# Copyright (C) 1994-2012 Free Software Foundation, Inc.
# Copyright (C) 1994-2013 Free Software Foundation, Inc.
# This Makefile.in is free software; the Free Software Foundation
# gives unlimited permission to copy and/or distribute it,
@ -20,23 +20,51 @@
VPATH = @srcdir@
am__make_dryrun = \
{ \
am__dry=no; \
am__is_gnu_make = test -n '$(MAKEFILE_LIST)' && test -n '$(MAKELEVEL)'
am__make_running_with_option = \
case $${target_option-} in \
?) ;; \
*) echo "am__make_running_with_option: internal error: invalid" \
"target option '$${target_option-}' specified" >&2; \
exit 1;; \
esac; \
has_opt=no; \
sane_makeflags=$$MAKEFLAGS; \
if $(am__is_gnu_make); then \
sane_makeflags=$$MFLAGS; \
else \
case $$MAKEFLAGS in \
*\\[\ \ ]*) \
echo 'am--echo: ; @echo "AM" OK' | $(MAKE) -f - 2>/dev/null \
| grep '^AM OK$$' >/dev/null || am__dry=yes;; \
*) \
for am__flg in $$MAKEFLAGS; do \
case $$am__flg in \
*=*|--*) ;; \
*n*) am__dry=yes; break;; \
esac; \
done;; \
bs=\\; \
sane_makeflags=`printf '%s\n' "$$MAKEFLAGS" \
| sed "s/$$bs$$bs[$$bs $$bs ]*//g"`;; \
esac; \
test $$am__dry = yes; \
}
fi; \
skip_next=no; \
strip_trailopt () \
{ \
flg=`printf '%s\n' "$$flg" | sed "s/$$1.*$$//"`; \
}; \
for flg in $$sane_makeflags; do \
test $$skip_next = yes && { skip_next=no; continue; }; \
case $$flg in \
*=*|--*) continue;; \
-*I) strip_trailopt 'I'; skip_next=yes;; \
-*I?*) strip_trailopt 'I';; \
-*O) strip_trailopt 'O'; skip_next=yes;; \
-*O?*) strip_trailopt 'O';; \
-*l) strip_trailopt 'l'; skip_next=yes;; \
-*l?*) strip_trailopt 'l';; \
-[dEDm]) skip_next=yes;; \
-[JT]) skip_next=yes;; \
esac; \
case $$flg in \
*$$target_option*) has_opt=yes; break;; \
esac; \
done; \
test $$has_opt = yes
am__make_dryrun = (target_option=n; $(am__make_running_with_option))
am__make_keepgoing = (target_option=k; $(am__make_running_with_option))
pkgdatadir = $(datadir)/@PACKAGE@
pkgincludedir = $(includedir)/@PACKAGE@
pkglibdir = $(libdir)/@PACKAGE@
@ -56,7 +84,7 @@ POST_UNINSTALL = :
build_triplet = @build@
host_triplet = @host@
subdir = java
DIST_COMMON = $(srcdir)/Makefile.am $(srcdir)/Makefile.in \
DIST_COMMON = $(srcdir)/Makefile.in $(srcdir)/Makefile.am \
$(top_srcdir)/depcomp
ACLOCAL_M4 = $(top_srcdir)/aclocal.m4
am__aclocal_m4_deps = $(top_srcdir)/m4/ax_check_classpath.m4 \
@ -109,23 +137,49 @@ am__libffts_jni_la_SOURCES_DIST = jni/ffts_jni.c
@ENABLE_JNI_TRUE@am_libffts_jni_la_OBJECTS = \
@ENABLE_JNI_TRUE@ libffts_jni_la-ffts_jni.lo
libffts_jni_la_OBJECTS = $(am_libffts_jni_la_OBJECTS)
libffts_jni_la_LINK = $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) \
$(LIBTOOLFLAGS) --mode=link $(CCLD) $(libffts_jni_la_CFLAGS) \
$(CFLAGS) $(libffts_jni_la_LDFLAGS) $(LDFLAGS) -o $@
AM_V_lt = $(am__v_lt_@AM_V@)
am__v_lt_ = $(am__v_lt_@AM_DEFAULT_V@)
am__v_lt_0 = --silent
am__v_lt_1 =
libffts_jni_la_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CC \
$(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=link $(CCLD) \
$(libffts_jni_la_CFLAGS) $(CFLAGS) $(libffts_jni_la_LDFLAGS) \
$(LDFLAGS) -o $@
@ENABLE_JNI_TRUE@am_libffts_jni_la_rpath = -rpath $(libdir)
AM_V_P = $(am__v_P_@AM_V@)
am__v_P_ = $(am__v_P_@AM_DEFAULT_V@)
am__v_P_0 = false
am__v_P_1 = :
AM_V_GEN = $(am__v_GEN_@AM_V@)
am__v_GEN_ = $(am__v_GEN_@AM_DEFAULT_V@)
am__v_GEN_0 = @echo " GEN " $@;
am__v_GEN_1 =
AM_V_at = $(am__v_at_@AM_V@)
am__v_at_ = $(am__v_at_@AM_DEFAULT_V@)
am__v_at_0 = @
am__v_at_1 =
DEFAULT_INCLUDES = -I.@am__isrc@ -I$(top_builddir)
depcomp = $(SHELL) $(top_srcdir)/depcomp
am__depfiles_maybe = depfiles
am__mv = mv -f
COMPILE = $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) \
$(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS)
LTCOMPILE = $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) \
--mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) \
$(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS)
LTCOMPILE = $(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) \
$(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) \
$(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) \
$(AM_CFLAGS) $(CFLAGS)
AM_V_CC = $(am__v_CC_@AM_V@)
am__v_CC_ = $(am__v_CC_@AM_DEFAULT_V@)
am__v_CC_0 = @echo " CC " $@;
am__v_CC_1 =
CCLD = $(CC)
LINK = $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) \
--mode=link $(CCLD) $(AM_CFLAGS) $(CFLAGS) $(AM_LDFLAGS) \
$(LDFLAGS) -o $@
LINK = $(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) \
$(LIBTOOLFLAGS) --mode=link $(CCLD) $(AM_CFLAGS) $(CFLAGS) \
$(AM_LDFLAGS) $(LDFLAGS) -o $@
AM_V_CCLD = $(am__v_CCLD_@AM_V@)
am__v_CCLD_ = $(am__v_CCLD_@AM_DEFAULT_V@)
am__v_CCLD_0 = @echo " CCLD " $@;
am__v_CCLD_1 =
SOURCES = $(libffts_jni_la_SOURCES)
DIST_SOURCES = $(am__libffts_jni_la_SOURCES_DIST)
am__can_run_installinfo = \
@ -135,11 +189,29 @@ am__can_run_installinfo = \
esac
DATA = $(pkgdata_DATA)
HEADERS = $(nodist_include_HEADERS)
am__tagged_files = $(HEADERS) $(SOURCES) $(TAGS_FILES) $(LISP)
# Read a list of newline-separated strings from the standard input,
# and print each of them once, without duplicates. Input order is
# *not* preserved.
am__uniquify_input = $(AWK) '\
BEGIN { nonempty = 0; } \
{ items[$$0] = 1; nonempty = 1; } \
END { if (nonempty) { for (i in items) print i; }; } \
'
# Make sure the list of sources is unique. This is necessary because,
# e.g., the same source file might be shared among _SOURCES variables
# for different programs/libraries.
am__define_uniq_tagged_files = \
list='$(am__tagged_files)'; \
unique=`for i in $$list; do \
if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \
done | $(am__uniquify_input)`
ETAGS = etags
CTAGS = ctags
DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST)
ACLOCAL = @ACLOCAL@
AMTAR = @AMTAR@
AM_DEFAULT_VERBOSITY = @AM_DEFAULT_VERBOSITY@
AR = @AR@
AUTOCONF = @AUTOCONF@
AUTOHEADER = @AUTOHEADER@
@ -313,6 +385,7 @@ $(top_srcdir)/configure: $(am__configure_deps)
$(ACLOCAL_M4): $(am__aclocal_m4_deps)
cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
$(am__aclocal_m4_deps):
install-libLTLIBRARIES: $(lib_LTLIBRARIES)
@$(NORMAL_INSTALL)
@list='$(lib_LTLIBRARIES)'; test -n "$(libdir)" || list=; \
@ -347,8 +420,9 @@ clean-libLTLIBRARIES:
echo rm -f $${locs}; \
rm -f $${locs}; \
}
libffts_jni.la: $(libffts_jni_la_OBJECTS) $(libffts_jni_la_DEPENDENCIES) $(EXTRA_libffts_jni_la_DEPENDENCIES)
$(libffts_jni_la_LINK) $(am_libffts_jni_la_rpath) $(libffts_jni_la_OBJECTS) $(libffts_jni_la_LIBADD) $(LIBS)
$(AM_V_CCLD)$(libffts_jni_la_LINK) $(am_libffts_jni_la_rpath) $(libffts_jni_la_OBJECTS) $(libffts_jni_la_LIBADD) $(LIBS)
mostlyclean-compile:
-rm -f *.$(OBJEXT)
@ -359,32 +433,32 @@ distclean-compile:
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/libffts_jni_la-ffts_jni.Plo@am__quote@
.c.o:
@am__fastdepCC_TRUE@ $(COMPILE) -MT $@ -MD -MP -MF $(DEPDIR)/$*.Tpo -c -o $@ $<
@am__fastdepCC_TRUE@ $(am__mv) $(DEPDIR)/$*.Tpo $(DEPDIR)/$*.Po
@AMDEP_TRUE@@am__fastdepCC_FALSE@ source='$<' object='$@' libtool=no @AMDEPBACKSLASH@
@am__fastdepCC_TRUE@ $(AM_V_CC)$(COMPILE) -MT $@ -MD -MP -MF $(DEPDIR)/$*.Tpo -c -o $@ $<
@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/$*.Tpo $(DEPDIR)/$*.Po
@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$<' object='$@' libtool=no @AMDEPBACKSLASH@
@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
@am__fastdepCC_FALSE@ $(COMPILE) -c $<
@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(COMPILE) -c -o $@ $<
.c.obj:
@am__fastdepCC_TRUE@ $(COMPILE) -MT $@ -MD -MP -MF $(DEPDIR)/$*.Tpo -c -o $@ `$(CYGPATH_W) '$<'`
@am__fastdepCC_TRUE@ $(am__mv) $(DEPDIR)/$*.Tpo $(DEPDIR)/$*.Po
@AMDEP_TRUE@@am__fastdepCC_FALSE@ source='$<' object='$@' libtool=no @AMDEPBACKSLASH@
@am__fastdepCC_TRUE@ $(AM_V_CC)$(COMPILE) -MT $@ -MD -MP -MF $(DEPDIR)/$*.Tpo -c -o $@ `$(CYGPATH_W) '$<'`
@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/$*.Tpo $(DEPDIR)/$*.Po
@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$<' object='$@' libtool=no @AMDEPBACKSLASH@
@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
@am__fastdepCC_FALSE@ $(COMPILE) -c `$(CYGPATH_W) '$<'`
@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(COMPILE) -c -o $@ `$(CYGPATH_W) '$<'`
.c.lo:
@am__fastdepCC_TRUE@ $(LTCOMPILE) -MT $@ -MD -MP -MF $(DEPDIR)/$*.Tpo -c -o $@ $<
@am__fastdepCC_TRUE@ $(am__mv) $(DEPDIR)/$*.Tpo $(DEPDIR)/$*.Plo
@AMDEP_TRUE@@am__fastdepCC_FALSE@ source='$<' object='$@' libtool=yes @AMDEPBACKSLASH@
@am__fastdepCC_TRUE@ $(AM_V_CC)$(LTCOMPILE) -MT $@ -MD -MP -MF $(DEPDIR)/$*.Tpo -c -o $@ $<
@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/$*.Tpo $(DEPDIR)/$*.Plo
@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$<' object='$@' libtool=yes @AMDEPBACKSLASH@
@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
@am__fastdepCC_FALSE@ $(LTCOMPILE) -c -o $@ $<
@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LTCOMPILE) -c -o $@ $<
libffts_jni_la-ffts_jni.lo: jni/ffts_jni.c
@am__fastdepCC_TRUE@ $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libffts_jni_la_CFLAGS) $(CFLAGS) -MT libffts_jni_la-ffts_jni.lo -MD -MP -MF $(DEPDIR)/libffts_jni_la-ffts_jni.Tpo -c -o libffts_jni_la-ffts_jni.lo `test -f 'jni/ffts_jni.c' || echo '$(srcdir)/'`jni/ffts_jni.c
@am__fastdepCC_TRUE@ $(am__mv) $(DEPDIR)/libffts_jni_la-ffts_jni.Tpo $(DEPDIR)/libffts_jni_la-ffts_jni.Plo
@AMDEP_TRUE@@am__fastdepCC_FALSE@ source='jni/ffts_jni.c' object='libffts_jni_la-ffts_jni.lo' libtool=yes @AMDEPBACKSLASH@
@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libffts_jni_la_CFLAGS) $(CFLAGS) -MT libffts_jni_la-ffts_jni.lo -MD -MP -MF $(DEPDIR)/libffts_jni_la-ffts_jni.Tpo -c -o libffts_jni_la-ffts_jni.lo `test -f 'jni/ffts_jni.c' || echo '$(srcdir)/'`jni/ffts_jni.c
@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/libffts_jni_la-ffts_jni.Tpo $(DEPDIR)/libffts_jni_la-ffts_jni.Plo
@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='jni/ffts_jni.c' object='libffts_jni_la-ffts_jni.lo' libtool=yes @AMDEPBACKSLASH@
@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
@am__fastdepCC_FALSE@ $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libffts_jni_la_CFLAGS) $(CFLAGS) -c -o libffts_jni_la-ffts_jni.lo `test -f 'jni/ffts_jni.c' || echo '$(srcdir)/'`jni/ffts_jni.c
@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libffts_jni_la_CFLAGS) $(CFLAGS) -c -o libffts_jni_la-ffts_jni.lo `test -f 'jni/ffts_jni.c' || echo '$(srcdir)/'`jni/ffts_jni.c
mostlyclean-libtool:
-rm -f *.lo
@ -434,26 +508,15 @@ uninstall-nodist_includeHEADERS:
files=`for p in $$list; do echo $$p; done | sed -e 's|^.*/||'`; \
dir='$(DESTDIR)$(includedir)'; $(am__uninstall_files_from_dir)
ID: $(HEADERS) $(SOURCES) $(LISP) $(TAGS_FILES)
list='$(SOURCES) $(HEADERS) $(LISP) $(TAGS_FILES)'; \
unique=`for i in $$list; do \
if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \
done | \
$(AWK) '{ files[$$0] = 1; nonempty = 1; } \
END { if (nonempty) { for (i in files) print i; }; }'`; \
mkid -fID $$unique
tags: TAGS
TAGS: $(HEADERS) $(SOURCES) $(TAGS_DEPENDENCIES) \
$(TAGS_FILES) $(LISP)
ID: $(am__tagged_files)
$(am__define_uniq_tagged_files); mkid -fID $$unique
tags: tags-am
TAGS: tags
tags-am: $(TAGS_DEPENDENCIES) $(am__tagged_files)
set x; \
here=`pwd`; \
list='$(SOURCES) $(HEADERS) $(LISP) $(TAGS_FILES)'; \
unique=`for i in $$list; do \
if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \
done | \
$(AWK) '{ files[$$0] = 1; nonempty = 1; } \
END { if (nonempty) { for (i in files) print i; }; }'`; \
$(am__define_uniq_tagged_files); \
shift; \
if test -z "$(ETAGS_ARGS)$$*$$unique"; then :; else \
test -n "$$unique" || unique=$$empty_fix; \
@ -465,15 +528,11 @@ TAGS: $(HEADERS) $(SOURCES) $(TAGS_DEPENDENCIES) \
$$unique; \
fi; \
fi
ctags: CTAGS
CTAGS: $(HEADERS) $(SOURCES) $(TAGS_DEPENDENCIES) \
$(TAGS_FILES) $(LISP)
list='$(SOURCES) $(HEADERS) $(LISP) $(TAGS_FILES)'; \
unique=`for i in $$list; do \
if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \
done | \
$(AWK) '{ files[$$0] = 1; nonempty = 1; } \
END { if (nonempty) { for (i in files) print i; }; }'`; \
ctags: ctags-am
CTAGS: ctags
ctags-am: $(TAGS_DEPENDENCIES) $(am__tagged_files)
$(am__define_uniq_tagged_files); \
test -z "$(CTAGS_ARGS)$$unique" \
|| $(CTAGS) $(CTAGSFLAGS) $(AM_CTAGSFLAGS) $(CTAGS_ARGS) \
$$unique
@ -482,9 +541,10 @@ GTAGS:
here=`$(am__cd) $(top_builddir) && pwd` \
&& $(am__cd) $(top_srcdir) \
&& gtags -i $(GTAGS_ARGS) "$$here"
cscopelist: cscopelist-am
cscopelist: $(HEADERS) $(SOURCES) $(LISP)
list='$(SOURCES) $(HEADERS) $(LISP)'; \
cscopelist-am: $(am__tagged_files)
list='$(am__tagged_files)'; \
case "$(srcdir)" in \
[\\/]* | ?:[\\/]*) sdir="$(srcdir)" ;; \
*) sdir=$(subdir)/$(srcdir) ;; \
@ -646,9 +706,9 @@ uninstall-am: uninstall-libLTLIBRARIES uninstall-nodist_includeHEADERS \
.MAKE: all check install install-am install-strip
.PHONY: CTAGS GTAGS all all-am check check-am clean clean-generic \
clean-libLTLIBRARIES clean-libtool clean-local cscopelist \
ctags distclean distclean-compile distclean-generic \
.PHONY: CTAGS GTAGS TAGS all all-am check check-am clean clean-generic \
clean-libLTLIBRARIES clean-libtool clean-local cscopelist-am \
ctags ctags-am distclean distclean-compile distclean-generic \
distclean-libtool distclean-tags distdir dvi dvi-am html \
html-am info info-am install install-am install-data \
install-data-am install-dvi install-dvi-am install-exec \
@ -659,7 +719,7 @@ uninstall-am: uninstall-libLTLIBRARIES uninstall-nodist_includeHEADERS \
installcheck installcheck-am installdirs maintainer-clean \
maintainer-clean-generic mostlyclean mostlyclean-compile \
mostlyclean-generic mostlyclean-libtool pdf pdf-am ps ps-am \
tags uninstall uninstall-am uninstall-libLTLIBRARIES \
tags tags-am uninstall uninstall-am uninstall-libLTLIBRARIES \
uninstall-nodist_includeHEADERS uninstall-pkgdataDATA

@ -0,0 +1,9 @@
<?xml version="1.0" encoding="UTF-8"?>
<classpath>
<classpathentry kind="src" path="gen"/>
<classpathentry kind="src" path="src"/>
<classpathentry kind="con" path="com.android.ide.eclipse.adt.ANDROID_FRAMEWORK"/>
<classpathentry exported="true" kind="con" path="com.android.ide.eclipse.adt.LIBRARIES"/>
<classpathentry exported="true" kind="con" path="com.android.ide.eclipse.adt.DEPENDENCIES"/>
<classpathentry kind="output" path="bin/classes"/>
</classpath>

@ -0,0 +1,40 @@
<?xml version="1.0" encoding="UTF-8"?>
<projectDescription>
<name>ffts-android</name>
<comment></comment>
<projects>
</projects>
<buildSpec>
<buildCommand>
<name>com.android.ide.eclipse.adt.ResourceManagerBuilder</name>
<arguments>
</arguments>
</buildCommand>
<buildCommand>
<name>com.android.ide.eclipse.adt.PreCompilerBuilder</name>
<arguments>
</arguments>
</buildCommand>
<buildCommand>
<name>org.eclipse.jdt.core.javabuilder</name>
<arguments>
</arguments>
</buildCommand>
<buildCommand>
<name>com.android.ide.eclipse.adt.ApkBuilder</name>
<arguments>
</arguments>
</buildCommand>
</buildSpec>
<natures>
<nature>com.android.ide.eclipse.adt.AndroidNature</nature>
<nature>org.eclipse.jdt.core.javanature</nature>
</natures>
<linkedResources>
<link>
<name>src</name>
<type>2</type>
<locationURI>PARENT-1-PROJECT_LOC/src</locationURI>
</link>
</linkedResources>
</projectDescription>

@ -0,0 +1,4 @@
eclipse.preferences.version=1
org.eclipse.jdt.core.compiler.codegen.targetPlatform=1.6
org.eclipse.jdt.core.compiler.compliance=1.6
org.eclipse.jdt.core.compiler.source=1.6

@ -0,0 +1,2 @@
eclipse.preferences.version=1
org.eclipse.ltk.core.refactoring.enable.project.refactoring.history=false

@ -0,0 +1,7 @@
<?xml version="1.0" encoding="utf-8"?>
<manifest xmlns:android="http://schemas.android.com/apk/res/android"
package="nz.waikato.ffts"
android:versionCode="1"
android:versionName="1.0">
<uses-sdk android:minSdkVersion="8" />
</manifest>

@ -0,0 +1,18 @@
# This file is used to override default values used by the Ant build system.
#
# This file must be checked into Version Control Systems, as it is
# integral to the build system of your project.
# This file is only used by the Ant script.
# You can use this to override default values such as
# 'source.dir' for the location of your java source folder and
# 'out.dir' for the location of your output folder.
source.dir=../src
# You can also use it define how the release builds are signed by declaring
# the following properties:
# 'key.store' for the location of your keystore and
# 'key.alias' for the name of the key to use.
# The password will be asked during the build when you use the 'release' target.

@ -0,0 +1,92 @@
<?xml version="1.0" encoding="UTF-8"?>
<project name="ffts" default="help">
<!-- The local.properties file is created and updated by the 'android' tool.
It contains the path to the SDK. It should *NOT* be checked into
Version Control Systems. -->
<property file="local.properties" />
<!-- The ant.properties file can be created by you. It is only edited by the
'android' tool to add properties to it.
This is the place to change some Ant specific build properties.
Here are some properties you may want to change/update:
source.dir
The name of the source directory. Default is 'src'.
out.dir
The name of the output directory. Default is 'bin'.
For other overridable properties, look at the beginning of the rules
files in the SDK, at tools/ant/build.xml
Properties related to the SDK location or the project target should
be updated using the 'android' tool with the 'update' action.
This file is an integral part of the build system for your
application and should be checked into Version Control Systems.
-->
<property file="ant.properties" />
<!-- if sdk.dir was not set from one of the property file, then
get it from the ANDROID_HOME env var.
This must be done before we load project.properties since
the proguard config can use sdk.dir -->
<property environment="env" />
<condition property="sdk.dir" value="${env.ANDROID_HOME}">
<isset property="env.ANDROID_HOME" />
</condition>
<!-- The project.properties file is created and updated by the 'android'
tool, as well as ADT.
This contains project specific properties such as project target, and library
dependencies. Lower level build properties are stored in ant.properties
(or in .classpath for Eclipse projects).
This file is an integral part of the build system for your
application and should be checked into Version Control Systems. -->
<loadproperties srcFile="project.properties" />
<!-- quick check on sdk.dir -->
<fail
message="sdk.dir is missing. Make sure to generate local.properties using 'android update project' or to inject it through the ANDROID_HOME environment variable."
unless="sdk.dir"
/>
<!--
Import per project custom build rules if present at the root of the project.
This is the place to put custom intermediary targets such as:
-pre-build
-pre-compile
-post-compile (This is typically used for code obfuscation.
Compiled code location: ${out.classes.absolute.dir}
If this is not done in place, override ${out.dex.input.absolute.dir})
-post-package
-post-build
-pre-clean
-->
<import file="custom_rules.xml" optional="true" />
<!-- Import the actual build file.
To customize existing targets, there are two options:
- Customize only one target:
- copy/paste the target into this file, *before* the
<import> task.
- customize it to your needs.
- Customize the whole content of build.xml
- copy/paste the content of the rules files (minus the top node)
into this file, replacing the <import> task.
- customize to your needs.
***********************
****** IMPORTANT ******
***********************
In all cases you must update the value of version-tag below to read 'custom' instead of an integer,
in order to avoid having your file be overridden by tools such as "android update project"
-->
<!-- version-tag: 1 -->
<import file="${sdk.dir}/tools/ant/build.xml" />
</project>

@ -0,0 +1,25 @@
LOCAL_PATH := $(call my-dir)
TOP=../../..
# Include the shared library
#include $(CLEAR_VARS)
#LOCAL_MODULE := ffts
#LOCAL_SRC_FILES := ../../../src/.libs/libffts.so
#include $(PREBUILT_SHARED_LIBRARY)
# Include the static library in shared lib
include $(CLEAR_VARS)
LOCAL_MODULE := ffts
LOCAL_SRC_FILES := $(TOP)/java/android/bin/lib/libffts.a
LOCAL_EXPORT_C_INCLUDES := $(TOP)/include
include $(PREBUILT_STATIC_LIBRARY)
include $(CLEAR_VARS)
LOCAL_MODULE := ffts_jni
LOCAL_CFLAGS := -I$(TOP)/include -I$(TOP)/java/jni -I$(TOP) -Wno-pointer-to-int-cast -Wno-int-to-pointer-cast
LOCAL_SRC_FILES := $(TOP)/java/jni/ffts_jni.c
LOCAL_LDLIBS := -L$(SYSROOT)/usr/lib -llog
LOCAL_STATIC_LIBRARIES := ffts
include $(BUILD_SHARED_LIBRARY)

@ -0,0 +1,2 @@
# requires NEON atm
APP_ABI := armeabi-v7a

@ -0,0 +1,20 @@
# To enable ProGuard in your project, edit project.properties
# to define the proguard.config property as described in that file.
#
# Add project specific ProGuard rules here.
# By default, the flags in this file are appended to flags specified
# in ${sdk.dir}/tools/proguard/proguard-android.txt
# You can edit the include path and order by changing the ProGuard
# include property in project.properties.
#
# For more details, see
# http://developer.android.com/guide/developing/tools/proguard.html
# Add any project specific keep options here:
# If your project uses WebView with JS, uncomment the following
# and specify the fully qualified class name to the JavaScript interface
# class:
#-keepclassmembers class fqcn.of.javascript.interface.for.webview {
# public *;
#}

@ -0,0 +1,15 @@
# This file is automatically generated by Android Tools.
# Do not modify this file -- YOUR CHANGES WILL BE ERASED!
#
# This file must be checked in Version Control Systems.
#
# To customize properties used by the Ant build system edit
# "ant.properties", and override values to adapt the script to your
# project structure.
#
# To enable ProGuard to shrink and obfuscate your code, uncomment this (available properties: sdk.dir, user.home):
#proguard.config=${sdk.dir}/tools/proguard/proguard-android.txt:proguard-project.txt
android.library=true
# Project target.
target=android-10

@ -38,6 +38,8 @@
// the classes ... but we can't build the project without the jni.
#ifdef ANDROID
#include <jni.h>
#define NEEDS_ALIGNED
#undef HAVE_DECL_POSIX_MEMALIGN
#else
#include "nz_ac_waikato_ffts_FFTS.h"
#endif
@ -231,3 +233,5 @@ JNIEXPORT void JNICALL Java_nz_ac_waikato_ffts_FFTS_free
ffts_free(plan);
}
// vim: set autoindent noexpandtab tabstop=3 shiftwidth=3:

@ -0,0 +1,203 @@
/*
* This file is part of FFTS -- The Fastest Fourier Transform in the South
*
* Copyright (c) 2013, Michael Zucchi <notzed@gmail.com>
*
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of the organization nor the
* names of its contributors may be used to endorse or promote products
* derived from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* DISCLAIMED. IN NO EVENT SHALL ANTHONY M. BLAKE BE LIABLE FOR ANY
* DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
package nz.ac.waikato.ffts;
import java.nio.FloatBuffer;
/**
* A java wrapper for ffts plans.
*
* Plans must currently be freed explicitly.
*
* @author notzed
*/
public class FFTS {
/**
* C pointer
*/
private long p;
/**
* Minimum size of input
*/
final protected long inSize;
/**
* Minimum size of output
*/
final protected long outSize;
private FFTS(long p, long inSize) {
this(p, inSize, inSize);
}
private FFTS(long p, long inSize, long outSize) {
this.p = p;
this.inSize = inSize;
this.outSize = inSize;
}
/**
* The sign to use for a forward transform.
*/
public static final int FORWARD = -1;
/**
* The sign to use for a backward transform.
*/
public static final int BACKWARD = 1;
/**
* Create a FFT plan for a 1-dimensional complex transform.
*
* The src and dst parameters to execute() use complex data.
*
* @param sign The direction of the transform.
* @param N The size of the transform.
* @return
*/
public static FFTS complex(int sign, int N) {
return new FFTS(complex_1d(N, sign), N * 2);
}
/**
* Create a FFT plan for a 2-dimensional complex transform.
* @param sign The direction of the transform.
* @param N1 The size of the transform.
* @param N2 The size of the transform.
* @return
*/
public static FFTS complex(int sign, int N1, int N2) {
return new FFTS(complex_2d(N1, N2, sign), N1 * N2 * 2);
}
public static FFTS complex(int sign, int... Ns) {
return new FFTS(complex_nd(Ns, sign), size(Ns) * 2);
}
public static FFTS real(int sign, int N) {
return new FFTS(real_1d(N, sign), sign == FORWARD ? N : (N / 2 + 1) * 2, sign == FORWARD ? (N / 2 + 1) * 2 : N);
}
public static FFTS real(int sign, int N1, int N2) {
return new FFTS(real_2d(N1, N2, sign), sign == FORWARD ? N1 * N2 : (N1 * N2 / 2 + 1) * 2, sign == FORWARD ? (N1 * N2 / 2 + 1) * 2 : N1 * N2);
}
public static FFTS real(int sign, int... Ns) {
return new FFTS(real_nd(Ns, sign), sign == FORWARD ? size(Ns) : (size(Ns) / 2 + 1) * 2, sign == FORWARD ? (size(Ns) / 2 + 1) * 2 : size(Ns));
}
/**
* Execute this plan with the given array data.
*
* @param src
* @param dst
*/
public void execute(float[] src, float[] dst) {
execute(src, 0, dst, 0);
}
/**
* Execute this plan with the given array data.
* @param src
* @param soff Start offset into src array.
* @param dst
* @param doff Start offset into dst array.
*/
public void execute(float[] src, int soff, float[] dst, int doff) {
if (src.length - soff < inSize || dst.length - doff < outSize)
throw new ArrayIndexOutOfBoundsException();
if (p == 0)
throw new NullPointerException();
execute(p, inSize, src, soff, dst, doff);
}
/**
* Execute this plan with the given nio buffers. The bufffers
* must be derived from direct buffers.
*
* The buffer position and limits are ignored.
*
* @param src
* @param dst
*/
public void execute(FloatBuffer src, FloatBuffer dst) {
if (src.capacity() < inSize || dst.capacity() < outSize)
throw new ArrayIndexOutOfBoundsException();
if (p == 0)
throw new NullPointerException();
execute(p, inSize, src, dst);
}
/**
* Free the plan.
*/
public void free() {
if (p == 0)
throw new NullPointerException();
free(p);
}
/*
* Calculate the number of elements required to store one
* set of n-dimensional data.
*/
protected static long size(int[] Ns) {
long s = Ns[0];
for (int i = 1; i < Ns.length; i++)
s *= Ns[i];
return s;
}
static {
System.loadLibrary("ffts_jni");
}
/*
* Native interface
*/
protected static native long complex_1d(int N, int sign);
protected static native long complex_2d(int N1, int N2, int sign);
protected static native long complex_nd(int[] Ns, int sign);
protected static native long real_1d(int N, int sign);
protected static native long real_2d(int N1, int N2, int sign);
protected static native long real_nd(int[] Ns, int sign);
protected static native void execute(long p, long size, float[] src, int soff, float[] dst, int doff);
protected static native void execute(long p, long size, FloatBuffer src, FloatBuffer dst);
protected static native void free(long p);
}

@ -0,0 +1,144 @@
# ===========================================================================
# http://www.gnu.org/software/autoconf-archive/ax_check_class.html
# ===========================================================================
#
# SYNOPSIS
#
# AX_CHECK_CLASS
#
# DESCRIPTION
#
# AX_CHECK_CLASS tests the existence of a given Java class, either in a
# jar or in a '.class' file.
#
# *Warning*: its success or failure can depend on a proper setting of the
# CLASSPATH env. variable.
#
# Note: This is part of the set of autoconf M4 macros for Java programs.
# It is VERY IMPORTANT that you download the whole set, some macros depend
# on other. Unfortunately, the autoconf archive does not support the
# concept of set of macros, so I had to break it for submission. The
# general documentation, as well as the sample configure.in, is included
# in the AX_PROG_JAVA macro.
#
# LICENSE
#
# Copyright (c) 2008 Stephane Bortzmeyer <bortzmeyer@pasteur.fr>
#
# This program is free software; you can redistribute it and/or modify it
# under the terms of the GNU General Public License as published by the
# Free Software Foundation; either version 2 of the License, or (at your
# option) any later version.
#
# This program is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
# Public License for more details.
#
# You should have received a copy of the GNU General Public License along
# with this program. If not, see <http://www.gnu.org/licenses/>.
#
# As a special exception, the respective Autoconf Macro's copyright owner
# gives unlimited permission to copy, distribute and modify the configure
# scripts that are the output of Autoconf when processing the Macro. You
# need not follow the terms of the GNU General Public License when using
# or distributing such scripts, even though portions of the text of the
# Macro appear in them. The GNU General Public License (GPL) does govern
# all other use of the material that constitutes the Autoconf Macro.
#
# This special exception to the GPL applies to versions of the Autoconf
# Macro released by the Autoconf Archive. When you make and distribute a
# modified version of the Autoconf Macro, you may extend this special
# exception to the GPL to apply to your modified version as well.
#serial 7
AU_ALIAS([AC_CHECK_CLASS], [AX_CHECK_CLASS])
AC_DEFUN([AX_CHECK_CLASS],[
AC_REQUIRE([AX_PROG_JAVA])
ac_var_name=`echo $1 | sed 's/\./_/g'`
dnl Normaly I'd use a AC_CACHE_CHECK here but since the variable name is
dnl dynamic I need an extra level of extraction
AC_MSG_CHECKING([for $1 class])
AC_CACHE_VAL(ax_cv_class_$ac_var_name, [
if test x$ac_cv_prog_uudecode_base64 = xyes; then
dnl /**
dnl * Test.java: used to test dynamicaly if a class exists.
dnl */
dnl public class Test
dnl {
dnl
dnl public static void
dnl main( String[] argv )
dnl {
dnl Class lib;
dnl if (argv.length < 1)
dnl {
dnl System.err.println ("Missing argument");
dnl System.exit (77);
dnl }
dnl try
dnl {
dnl lib = Class.forName (argv[0]);
dnl }
dnl catch (ClassNotFoundException e)
dnl {
dnl System.exit (1);
dnl }
dnl lib = null;
dnl System.exit (0);
dnl }
dnl
dnl }
cat << \EOF > Test.uue
begin-base64 644 Test.class
yv66vgADAC0AKQcAAgEABFRlc3QHAAQBABBqYXZhL2xhbmcvT2JqZWN0AQAE
bWFpbgEAFihbTGphdmEvbGFuZy9TdHJpbmc7KVYBAARDb2RlAQAPTGluZU51
bWJlclRhYmxlDAAKAAsBAANlcnIBABVMamF2YS9pby9QcmludFN0cmVhbTsJ
AA0ACQcADgEAEGphdmEvbGFuZy9TeXN0ZW0IABABABBNaXNzaW5nIGFyZ3Vt
ZW50DAASABMBAAdwcmludGxuAQAVKExqYXZhL2xhbmcvU3RyaW5nOylWCgAV
ABEHABYBABNqYXZhL2lvL1ByaW50U3RyZWFtDAAYABkBAARleGl0AQAEKEkp
VgoADQAXDAAcAB0BAAdmb3JOYW1lAQAlKExqYXZhL2xhbmcvU3RyaW5nOylM
amF2YS9sYW5nL0NsYXNzOwoAHwAbBwAgAQAPamF2YS9sYW5nL0NsYXNzBwAi
AQAgamF2YS9sYW5nL0NsYXNzTm90Rm91bmRFeGNlcHRpb24BAAY8aW5pdD4B
AAMoKVYMACMAJAoAAwAlAQAKU291cmNlRmlsZQEACVRlc3QuamF2YQAhAAEA
AwAAAAAAAgAJAAUABgABAAcAAABtAAMAAwAAACkqvgSiABCyAAwSD7YAFBBN
uAAaKgMyuAAeTKcACE0EuAAaAUwDuAAasQABABMAGgAdACEAAQAIAAAAKgAK
AAAACgAAAAsABgANAA4ADgATABAAEwASAB4AFgAiABgAJAAZACgAGgABACMA
JAABAAcAAAAhAAEAAQAAAAUqtwAmsQAAAAEACAAAAAoAAgAAAAQABAAEAAEA
JwAAAAIAKA==
====
EOF
if $UUDECODE Test.uue; then
:
else
echo "configure: __oline__: uudecode had trouble decoding base 64 file 'Test.uue'" >&AS_MESSAGE_LOG_FD
echo "configure: failed file was:" >&AS_MESSAGE_LOG_FD
cat Test.uue >&AS_MESSAGE_LOG_FD
ac_cv_prog_uudecode_base64=no
fi
rm -f Test.uue
if AC_TRY_COMMAND($JAVA $JAVAFLAGS Test $1) >/dev/null 2>&1; then
eval "ac_cv_class_$ac_var_name=yes"
else
eval "ac_cv_class_$ac_var_name=no"
fi
rm -f Test.class
else
AX_TRY_COMPILE_JAVA([$1], , [eval "ac_cv_class_$ac_var_name=yes"],
[eval "ac_cv_class_$ac_var_name=no"])
fi
eval "ac_var_val=$`eval echo ac_cv_class_$ac_var_name`"
eval "HAVE_$ac_var_name=$`echo ac_cv_class_$ac_var_val`"
HAVE_LAST_CLASS=$ac_var_val
if test x$ac_var_val = xyes; then
ifelse([$2], , :, [$2])
else
ifelse([$3], , :, [$3])
fi
])
dnl for some reason the above statment didn't fall though here?
dnl do scripts have variable scoping?
eval "ac_var_val=$`eval echo ac_cv_class_$ac_var_name`"
AC_MSG_RESULT($ac_var_val)
])

@ -0,0 +1,101 @@
# ===========================================================================
# http://www.gnu.org/software/autoconf-archive/ax_check_java_plugin.html
# ===========================================================================
#
# SYNOPSIS
#
# AX_CHECK_JAVA_PLUGIN(<shell-variable>)
#
# DESCRIPTION
#
# This macro sets <shell-variable> to empty on failure and to a compatible
# version of plugin.jar otherwise. Directories searched are /usr/java/*
# and /usr/local/java/*, which are assumed to be j{dk,re} installations.
# Apply the shell variable as you see fit. If sun changes things so
# <jre>/lib/plugin.jar is not the magic file it will stop working.
#
# This macro assumes that unzip, zipinfo or pkzipc is avialable (and can
# list the contents of the jar archive). The first two are assumed to work
# similarly enough to the infozip versisonms. The pkzipc version is
# assumed to work if I undertstand the documentation on pkware's site but
# YMMV. I do not have access to pwkware's version to test it.
#
# LICENSE
#
# Copyright (c) 2008 Duncan Simpson <dps@simpson.demon.co.uk>
#
# This program is free software; you can redistribute it and/or modify it
# under the terms of the GNU General Public License as published by the
# Free Software Foundation; either version 2 of the License, or (at your
# option) any later version.
#
# This program is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
# Public License for more details.
#
# You should have received a copy of the GNU General Public License along
# with this program. If not, see <http://www.gnu.org/licenses/>.
#
# As a special exception, the respective Autoconf Macro's copyright owner
# gives unlimited permission to copy, distribute and modify the configure
# scripts that are the output of Autoconf when processing the Macro. You
# need not follow the terms of the GNU General Public License when using
# or distributing such scripts, even though portions of the text of the
# Macro appear in them. The GNU General Public License (GPL) does govern
# all other use of the material that constitutes the Autoconf Macro.
#
# This special exception to the GPL applies to versions of the Autoconf
# Macro released by the Autoconf Archive. When you make and distribute a
# modified version of the Autoconf Macro, you may extend this special
# exception to the GPL to apply to your modified version as well.
#serial 6
AU_ALIAS([DPS_CHECK_PLUGIN], [AX_CHECK_JAVA_PLUGIN])
AC_DEFUN([AX_CHECK_JAVA_PLUGIN],
[AC_REQUIRE([AC_PROG_AWK])
AC_REQUIRE([AC_PROG_FGREP])
AC_CHECK_PROG(ZIPINFO,[zipinfo unzip pkzipc])
AC_MSG_CHECKING([for the java plugin])
case "x$ZIPINFO" in
[*/zipinfo)]
zipinf="zipinfo -1" ;;
[*/unzip)]
zipinf="unzip -l";;
[*/pkzipc)]
ziping="unzipc -view";;
[x*)]
AC_MSG_RESULT([skiped, none of zipinfo, unzip and pkzipc found])
AC_SUBST($1,[])
zipinf="";;
esac
if test "x$zipinf" != "x"; then
jplugin=""
for jhome in `ls -dr /usr/java/* /usr/local/java/* 2> /dev/null`; do
for jfile in lib/plugin.jar jre/lib/plugin.jar; do
if test "x$jplugin" = "x" && test -f "$jhome/$jfile"; then
eval "$zipinf $jhome/$jfile | $AWK '{ print \$NF; }' | $FGREP netscape/javascript/JSObject" >/dev/null 2>/dev/null
if test $? -eq 0; then
dnl Some version of gcj (and javac) refuse to work with some files
dnl that pass this test. To stop this problem make sure that the compiler
dnl still works with this jar file in the classpath
cat << \EOF > Test.java
/* [#]line __oline__ "configure" */
public class Test {
}
EOF
if eval "$JAVAC -classpath $jhome/$jfile Test.java 2>/dev/null >/dev/null" && test -f Test.class; then
jplugin="$jhome/$jfile"
fi
rm -f Test.java Test.class
fi; fi; done; done
if test "x$jplugin" != "x"; then
AC_SUBST($1,$jplugin)
AC_MSG_RESULT($jplugin)
else
AC_MSG_RESULT([java plugin not found])
AC_SUBST($1,[])
fi
fi
])

@ -0,0 +1,85 @@
# ===========================================================================
# http://www.gnu.org/software/autoconf-archive/ax_java_check_class.html
# ===========================================================================
#
# SYNOPSIS
#
# AX_JAVA_CHECK_CLASS(<class>,<action-if-found>,<action-if-not-found>)
#
# DESCRIPTION
#
# Test if a Java class is available. Based on AX_PROG_JAVAC_WORKS. This
# version uses a cache variable which is both compiler, options and
# classpath dependent (so if you switch from javac to gcj it correctly
# notices and redoes the test).
#
# The macro tries to compile a minimal program importing <class>. Some
# newer compilers moan about the failure to use this but fail or produce a
# class file anyway. All moaing is sunk to /dev/null since I only wanted
# to know if the class could be imported. This is a recommended followup
# to AX_CHECK_JAVA_PLUGIN with classpath appropriately adjusted.
#
# LICENSE
#
# Copyright (c) 2008 Duncan Simpson <dps@simpson.demon.co.uk>
#
# This program is free software; you can redistribute it and/or modify it
# under the terms of the GNU General Public License as published by the
# Free Software Foundation; either version 2 of the License, or (at your
# option) any later version.
#
# This program is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
# Public License for more details.
#
# You should have received a copy of the GNU General Public License along
# with this program. If not, see <http://www.gnu.org/licenses/>.
#
# As a special exception, the respective Autoconf Macro's copyright owner
# gives unlimited permission to copy, distribute and modify the configure
# scripts that are the output of Autoconf when processing the Macro. You
# need not follow the terms of the GNU General Public License when using
# or distributing such scripts, even though portions of the text of the
# Macro appear in them. The GNU General Public License (GPL) does govern
# all other use of the material that constitutes the Autoconf Macro.
#
# This special exception to the GPL applies to versions of the Autoconf
# Macro released by the Autoconf Archive. When you make and distribute a
# modified version of the Autoconf Macro, you may extend this special
# exception to the GPL to apply to your modified version as well.
#serial 8
AU_ALIAS([DPS_JAVA_CHECK_CLASS], [AX_JAVA_CHECK_CLASS])
AC_DEFUN([AX_JAVA_CHECK_CLASS],[
m4_define([cache_val],[m4_translit(ax_cv_have_java_class_$1, " ." ,"__")])
if test "x$CLASSPATH" != "x"; then
xtra=" with classpath ${CLASSPATH}"
xopts=`echo ${CLASSPATH} | ${SED} 's/^ *://'`
xopts="-classpath $xopts"
else xtra=""; xopts=""; fi
cache_var="cache_val"AS_TR_SH([_Jc_${JAVAC}_Cp_${CLASSPATH}])
AC_CACHE_CHECK([if the $1 class is avialable$xtra], [$cache_var], [
JAVA_TEST=Test.java
CLASS_TEST=Test.class
cat << \EOF > $JAVA_TEST
/* [#]xline __oline__ "configure" */
import $1;
public class Test {
}
EOF
if AC_TRY_COMMAND($JAVAC $JAVACFLAGS $xopts $JAVA_TEST) >/dev/null 2>&1; then
eval "${cache_var}=yes"
else
eval "${cache_var}=no"
echo "configure: failed program was:" >&AS_MESSAGE_LOG_FD
cat $JAVA_TEST >&AS_MESSAGE_LOG_FD
fi
rm -f $JAVA_TEST $CLASS_TEST
])
if eval 'test "x$'${cache_var}'" = "xyes"'; then
$2
true; else
$3
false; fi])

@ -0,0 +1,115 @@
# ===========================================================================
# http://www.gnu.org/software/autoconf-archive/ax_prog_java.html
# ===========================================================================
#
# SYNOPSIS
#
# AX_PROG_JAVA
#
# DESCRIPTION
#
# Here is a summary of the main macros:
#
# AX_PROG_JAVAC: finds a Java compiler.
#
# AX_PROG_JAVA: finds a Java virtual machine.
#
# AX_CHECK_CLASS: finds if we have the given class (beware of CLASSPATH!).
#
# AX_CHECK_RQRD_CLASS: finds if we have the given class and stops
# otherwise.
#
# AX_TRY_COMPILE_JAVA: attempt to compile user given source.
#
# AX_TRY_RUN_JAVA: attempt to compile and run user given source.
#
# AX_JAVA_OPTIONS: adds Java configure options.
#
# AX_PROG_JAVA tests an existing Java virtual machine. It uses the
# environment variable JAVA then tests in sequence various common Java
# virtual machines. For political reasons, it starts with the free ones.
# You *must* call [AX_PROG_JAVAC] before.
#
# If you want to force a specific VM:
#
# - at the configure.in level, set JAVA=yourvm before calling AX_PROG_JAVA
#
# (but after AC_INIT)
#
# - at the configure level, setenv JAVA
#
# You can use the JAVA variable in your Makefile.in, with @JAVA@.
#
# *Warning*: its success or failure can depend on a proper setting of the
# CLASSPATH env. variable.
#
# TODO: allow to exclude virtual machines (rationale: most Java programs
# cannot run with some VM like kaffe).
#
# Note: This is part of the set of autoconf M4 macros for Java programs.
# It is VERY IMPORTANT that you download the whole set, some macros depend
# on other. Unfortunately, the autoconf archive does not support the
# concept of set of macros, so I had to break it for submission.
#
# A Web page, with a link to the latest CVS snapshot is at
# <http://www.internatif.org/bortzmeyer/autoconf-Java/>.
#
# This is a sample configure.in Process this file with autoconf to produce
# a configure script.
#
# AC_INIT(UnTag.java)
#
# dnl Checks for programs.
# AC_CHECK_CLASSPATH
# AX_PROG_JAVAC
# AX_PROG_JAVA
#
# dnl Checks for classes
# AX_CHECK_RQRD_CLASS(org.xml.sax.Parser)
# AX_CHECK_RQRD_CLASS(com.jclark.xml.sax.Driver)
#
# AC_OUTPUT(Makefile)
#
# LICENSE
#
# Copyright (c) 2008 Stephane Bortzmeyer <bortzmeyer@pasteur.fr>
#
# This program is free software; you can redistribute it and/or modify it
# under the terms of the GNU General Public License as published by the
# Free Software Foundation; either version 2 of the License, or (at your
# option) any later version.
#
# This program is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
# Public License for more details.
#
# You should have received a copy of the GNU General Public License along
# with this program. If not, see <http://www.gnu.org/licenses/>.
#
# As a special exception, the respective Autoconf Macro's copyright owner
# gives unlimited permission to copy, distribute and modify the configure
# scripts that are the output of Autoconf when processing the Macro. You
# need not follow the terms of the GNU General Public License when using
# or distributing such scripts, even though portions of the text of the
# Macro appear in them. The GNU General Public License (GPL) does govern
# all other use of the material that constitutes the Autoconf Macro.
#
# This special exception to the GPL applies to versions of the Autoconf
# Macro released by the Autoconf Archive. When you make and distribute a
# modified version of the Autoconf Macro, you may extend this special
# exception to the GPL to apply to your modified version as well.
#serial 8
AU_ALIAS([AC_PROG_JAVA], [AX_PROG_JAVA])
AC_DEFUN([AX_PROG_JAVA],[
if test x$JAVAPREFIX = x; then
test x$JAVA = x && AC_CHECK_PROGS(JAVA, kaffe java)
else
test x$JAVA = x && AC_CHECK_PROGS(JAVA, kaffe java, $JAVAPREFIX)
fi
test x$JAVA = x && AC_MSG_ERROR([no acceptable Java virtual machine found in \$PATH])
AX_PROG_JAVA_WORKS
AC_PROVIDE([$0])dnl
])

@ -0,0 +1,104 @@
# ===========================================================================
# http://www.gnu.org/software/autoconf-archive/ax_prog_java_cc.html
# ===========================================================================
#
# SYNOPSIS
#
# AX_PROG_JAVA_CC
#
# DESCRIPTION
#
# Finds the appropriate java compiler on your path. By preference the java
# compiler is gcj, then jikes then javac.
#
# The macro can take one argument specifying a space separated list of
# java compiler names.
#
# For example:
#
# AX_PROG_JAVA_CC(javac, gcj)
#
# The macro also sets the compiler options variable: JAVA_CC_OPTS to
# something sensible:
#
# - for GCJ it sets it to: @GCJ_OPTS@
# (if GCJ_OPTS is not yet defined then it is set to "-C")
#
# - no other compiler has applicable options yet
#
# Here's an example configure.in:
#
# AC_INIT(Makefile.in)
# AX_PROG_JAVA_CC()
# AC_OUTPUT(Makefile)
# dnl End.
#
# And here's the start of the Makefile.in:
#
# PROJECT_ROOT := @srcdir@
# # Tool definitions.
# JAVAC := @JAVA_CC@
# JAVAC_OPTS := @JAVA_CC_OPTS@
# JAR_TOOL := @jar_tool@
#
# LICENSE
#
# Copyright (c) 2008 Nic Ferrier <nferrier@tapsellferrier.co.uk>
#
# This program is free software; you can redistribute it and/or modify it
# under the terms of the GNU General Public License as published by the
# Free Software Foundation; either version 2 of the License, or (at your
# option) any later version.
#
# This program is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
# Public License for more details.
#
# You should have received a copy of the GNU General Public License along
# with this program. If not, see <http://www.gnu.org/licenses/>.
#
# As a special exception, the respective Autoconf Macro's copyright owner
# gives unlimited permission to copy, distribute and modify the configure
# scripts that are the output of Autoconf when processing the Macro. You
# need not follow the terms of the GNU General Public License when using
# or distributing such scripts, even though portions of the text of the
# Macro appear in them. The GNU General Public License (GPL) does govern
# all other use of the material that constitutes the Autoconf Macro.
#
# This special exception to the GPL applies to versions of the Autoconf
# Macro released by the Autoconf Archive. When you make and distribute a
# modified version of the Autoconf Macro, you may extend this special
# exception to the GPL to apply to your modified version as well.
#serial 4
# AX_PROG_JAVA_CC([COMPILER ...])
# --------------------------
# COMPILER ... is a space separated list of java compilers to search for.
# This just gives the user an opportunity to specify an alternative
# search list for the java compiler.
AU_ALIAS([AC_PROG_JAVA_CC], [AX_PROG_JAVA_CC])
AC_DEFUN([AX_PROG_JAVA_CC],
[AC_ARG_VAR([JAVA_CC], [java compiler command])dnl
AC_ARG_VAR([JAVA_CC_FLAGS], [java compiler flags])dnl
m4_ifval([$1],
[AC_CHECK_TOOLS(JAVA_CC, [$1])],
[AC_CHECK_TOOL(JAVA_CC, gcj)
if test -z "$JAVA_CC"; then
AC_CHECK_TOOL(JAVA_CC, javac)
fi
if test -z "$JAVA_CC"; then
AC_CHECK_TOOL(JAVA_CC, jikes)
fi
])
if test "$JAVA_CC" = "gcj"; then
if test "$GCJ_OPTS" = ""; then
AC_SUBST(GCJ_OPTS,-C)
fi
AC_SUBST(JAVA_CC_OPTS, @GCJ_OPTS@,
[Define the compilation options for GCJ])
fi
test -z "$JAVA_CC" && AC_MSG_ERROR([no acceptable java compiler found in \$PATH])
])# AX_PROG_JAVA_CC

@ -0,0 +1,134 @@
# ===========================================================================
# http://www.gnu.org/software/autoconf-archive/ax_prog_java_works.html
# ===========================================================================
#
# SYNOPSIS
#
# AX_PROG_JAVA_WORKS
#
# DESCRIPTION
#
# Internal use ONLY.
#
# Note: This is part of the set of autoconf M4 macros for Java programs.
# It is VERY IMPORTANT that you download the whole set, some macros depend
# on other. Unfortunately, the autoconf archive does not support the
# concept of set of macros, so I had to break it for submission. The
# general documentation, as well as the sample configure.in, is included
# in the AX_PROG_JAVA macro.
#
# LICENSE
#
# Copyright (c) 2008 Stephane Bortzmeyer <bortzmeyer@pasteur.fr>
#
# This program is free software; you can redistribute it and/or modify it
# under the terms of the GNU General Public License as published by the
# Free Software Foundation; either version 2 of the License, or (at your
# option) any later version.
#
# This program is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
# Public License for more details.
#
# You should have received a copy of the GNU General Public License along
# with this program. If not, see <http://www.gnu.org/licenses/>.
#
# As a special exception, the respective Autoconf Macro's copyright owner
# gives unlimited permission to copy, distribute and modify the configure
# scripts that are the output of Autoconf when processing the Macro. You
# need not follow the terms of the GNU General Public License when using
# or distributing such scripts, even though portions of the text of the
# Macro appear in them. The GNU General Public License (GPL) does govern
# all other use of the material that constitutes the Autoconf Macro.
#
# This special exception to the GPL applies to versions of the Autoconf
# Macro released by the Autoconf Archive. When you make and distribute a
# modified version of the Autoconf Macro, you may extend this special
# exception to the GPL to apply to your modified version as well.
#serial 8
AU_ALIAS([AC_PROG_JAVA_WORKS], [AX_PROG_JAVA_WORKS])
AC_DEFUN([AX_PROG_JAVA_WORKS], [
AC_PATH_PROG(UUDECODE, uudecode, [no])
if test x$UUDECODE != xno; then
AC_CACHE_CHECK([if uudecode can decode base 64 file], ac_cv_prog_uudecode_base64, [
dnl /**
dnl * Test.java: used to test if java compiler works.
dnl */
dnl public class Test
dnl {
dnl
dnl public static void
dnl main( String[] argv )
dnl {
dnl System.exit (0);
dnl }
dnl
dnl }
cat << \EOF > Test.uue
begin-base64 644 Test.class
yv66vgADAC0AFQcAAgEABFRlc3QHAAQBABBqYXZhL2xhbmcvT2JqZWN0AQAE
bWFpbgEAFihbTGphdmEvbGFuZy9TdHJpbmc7KVYBAARDb2RlAQAPTGluZU51
bWJlclRhYmxlDAAKAAsBAARleGl0AQAEKEkpVgoADQAJBwAOAQAQamF2YS9s
YW5nL1N5c3RlbQEABjxpbml0PgEAAygpVgwADwAQCgADABEBAApTb3VyY2VG
aWxlAQAJVGVzdC5qYXZhACEAAQADAAAAAAACAAkABQAGAAEABwAAACEAAQAB
AAAABQO4AAyxAAAAAQAIAAAACgACAAAACgAEAAsAAQAPABAAAQAHAAAAIQAB
AAEAAAAFKrcAErEAAAABAAgAAAAKAAIAAAAEAAQABAABABMAAAACABQ=
====
EOF
if $UUDECODE Test.uue; then
ac_cv_prog_uudecode_base64=yes
else
echo "configure: __oline__: uudecode had trouble decoding base 64 file 'Test.uue'" >&AS_MESSAGE_LOG_FD
echo "configure: failed file was:" >&AS_MESSAGE_LOG_FD
cat Test.uue >&AS_MESSAGE_LOG_FD
ac_cv_prog_uudecode_base64=no
fi
rm -f Test.uue])
fi
if test x$ac_cv_prog_uudecode_base64 != xyes; then
rm -f Test.class
AC_MSG_WARN([I have to compile Test.class from scratch])
if test x$ac_cv_prog_javac_works = xno; then
AC_MSG_ERROR([Cannot compile java source. $JAVAC does not work properly])
fi
if test x$ac_cv_prog_javac_works = x; then
AX_PROG_JAVAC
fi
fi
AC_CACHE_CHECK(if $JAVA works, ac_cv_prog_java_works, [
JAVA_TEST=Test.java
CLASS_TEST=Test.class
TEST=Test
changequote(, )dnl
cat << \EOF > $JAVA_TEST
/* [#]line __oline__ "configure" */
public class Test {
public static void main (String args[]) {
System.exit (0);
} }
EOF
changequote([, ])dnl
if test x$ac_cv_prog_uudecode_base64 != xyes; then
if AC_TRY_COMMAND($JAVAC $JAVACFLAGS $JAVA_TEST) && test -s $CLASS_TEST; then
:
else
echo "configure: failed program was:" >&AS_MESSAGE_LOG_FD
cat $JAVA_TEST >&AS_MESSAGE_LOG_FD
AC_MSG_ERROR(The Java compiler $JAVAC failed (see config.log, check the CLASSPATH?))
fi
fi
if AC_TRY_COMMAND($JAVA $JAVAFLAGS $TEST) >/dev/null 2>&1; then
ac_cv_prog_java_works=yes
else
echo "configure: failed program was:" >&AS_MESSAGE_LOG_FD
cat $JAVA_TEST >&AS_MESSAGE_LOG_FD
AC_MSG_ERROR(The Java VM $JAVA failed (see config.log, check the CLASSPATH?))
fi
rm -fr $JAVA_TEST $CLASS_TEST Test.uue
])
AC_PROVIDE([$0])dnl
]
)

@ -0,0 +1,52 @@
# ===========================================================================
# http://www.gnu.org/software/autoconf-archive/ax_prog_javadoc.html
# ===========================================================================
#
# SYNOPSIS
#
# AX_PROG_JAVADOC
#
# DESCRIPTION
#
# AX_PROG_JAVADOC tests for an existing javadoc generator. It uses the
# environment variable JAVADOC then tests in sequence various common
# javadoc generator.
#
# If you want to force a specific compiler:
#
# - at the configure.in level, set JAVADOC=yourgenerator before calling
# AX_PROG_JAVADOC
#
# - at the configure level, setenv JAVADOC
#
# You can use the JAVADOC variable in your Makefile.in, with @JAVADOC@.
#
# Note: This macro depends on the autoconf M4 macros for Java programs. It
# is VERY IMPORTANT that you download that whole set, some macros depend
# on other. Unfortunately, the autoconf archive does not support the
# concept of set of macros, so I had to break it for submission.
#
# The general documentation of those macros, as well as the sample
# configure.in, is included in the AX_PROG_JAVA macro.
#
# LICENSE
#
# Copyright (c) 2008 Egon Willighagen <e.willighagen@science.ru.nl>
#
# Copying and distribution of this file, with or without modification, are
# permitted in any medium without royalty provided the copyright notice
# and this notice are preserved. This file is offered as-is, without any
# warranty.
#serial 7
AU_ALIAS([AC_PROG_JAVADOC], [AX_PROG_JAVADOC])
AC_DEFUN([AX_PROG_JAVADOC],[
if test "x$JAVAPREFIX" = x; then
test "x$JAVADOC" = x && AC_CHECK_PROGS(JAVADOC, javadoc)
else
test "x$JAVADOC" = x && AC_CHECK_PROGS(JAVADOC, javadoc, $JAVAPREFIX)
fi
test "x$JAVADOC" = x && AC_MSG_ERROR([no acceptable javadoc generator found in \$PATH])
AC_PROVIDE([$0])dnl
])

@ -0,0 +1,43 @@
# ===========================================================================
# http://www.gnu.org/software/autoconf-archive/ax_prog_javah.html
# ===========================================================================
#
# SYNOPSIS
#
# AX_PROG_JAVAH
#
# DESCRIPTION
#
# AX_PROG_JAVAH tests the availability of the javah header generator and
# looks for the jni.h header file. If available, JAVAH is set to the full
# path of javah and CPPFLAGS is updated accordingly.
#
# LICENSE
#
# Copyright (c) 2008 Luc Maisonobe <luc@spaceroots.org>
#
# Copying and distribution of this file, with or without modification, are
# permitted in any medium without royalty provided the copyright notice
# and this notice are preserved. This file is offered as-is, without any
# warranty.
#serial 5
AU_ALIAS([AC_PROG_JAVAH], [AX_PROG_JAVAH])
AC_DEFUN([AX_PROG_JAVAH],[
AC_REQUIRE([AC_CANONICAL_SYSTEM])dnl
AC_REQUIRE([AC_PROG_CPP])dnl
AC_PATH_PROG(JAVAH,javah)
if test x"`eval 'echo $ac_cv_path_JAVAH'`" != x ; then
AC_TRY_CPP([#include <jni.h>],,[
ac_save_CPPFLAGS="$CPPFLAGS"
changequote(, )dnl
ac_dir=`echo $ac_cv_path_JAVAH | sed 's,\(.*\)/[^/]*/[^/]*$,\1/include,'`
ac_machdep=`echo $build_os | sed 's,[-0-9].*,,' | sed 's,cygwin,win32,'`
changequote([, ])dnl
CPPFLAGS="$ac_save_CPPFLAGS -I$ac_dir -I$ac_dir/$ac_machdep"
AC_TRY_CPP([#include <jni.h>],
ac_save_CPPFLAGS="$CPPFLAGS",
AC_MSG_WARN([unable to include <jni.h>]))
CPPFLAGS="$ac_save_CPPFLAGS"])
fi])

@ -0,0 +1,55 @@
# ===========================================================================
# http://www.gnu.org/software/autoconf-archive/ax_try_compile_java.html
# ===========================================================================
#
# SYNOPSIS
#
# AX_TRY_COMPILE_JAVA
#
# DESCRIPTION
#
# AX_TRY_COMPILE_JAVA attempt to compile user given source.
#
# *Warning*: its success or failure can depend on a proper setting of the
# CLASSPATH env. variable.
#
# Note: This is part of the set of autoconf M4 macros for Java programs.
# It is VERY IMPORTANT that you download the whole set, some macros depend
# on other. Unfortunately, the autoconf archive does not support the
# concept of set of macros, so I had to break it for submission. The
# general documentation, as well as the sample configure.in, is included
# in the AX_PROG_JAVA macro.
#
# LICENSE
#
# Copyright (c) 2008 Devin Weaver <ktohg@tritarget.com>
#
# Copying and distribution of this file, with or without modification, are
# permitted in any medium without royalty provided the copyright notice
# and this notice are preserved. This file is offered as-is, without any
# warranty.
#serial 7
AU_ALIAS([AC_TRY_COMPILE_JAVA], [AX_TRY_COMPILE_JAVA])
AC_DEFUN([AX_TRY_COMPILE_JAVA],[
AC_REQUIRE([AX_PROG_JAVAC])dnl
cat << \EOF > Test.java
/* [#]line __oline__ "configure" */
ifelse([$1], , , [import $1;])
public class Test {
[$2]
}
EOF
if AC_TRY_COMMAND($JAVAC $JAVACFLAGS Test.java) && test -s Test.class
then
dnl Don't remove the temporary files here, so they can be examined.
ifelse([$3], , :, [$3])
else
echo "configure: failed program was:" >&AS_MESSAGE_LOG_FD
cat Test.java >&AS_MESSAGE_LOG_FD
ifelse([$4], , , [ rm -fr Test*
$4
])dnl
fi
rm -fr Test*])

@ -0,0 +1,56 @@
# ===========================================================================
# http://www.gnu.org/software/autoconf-archive/ax_try_run_java.html
# ===========================================================================
#
# SYNOPSIS
#
# AX_TRY_RUN_JAVA
#
# DESCRIPTION
#
# AX_TRY_RUN_JAVA attempt to compile and run user given source.
#
# *Warning*: its success or failure can depend on a proper setting of the
# CLASSPATH env. variable.
#
# Note: This is part of the set of autoconf M4 macros for Java programs.
# It is VERY IMPORTANT that you download the whole set, some macros depend
# on other. Unfortunately, the autoconf archive does not support the
# concept of set of macros, so I had to break it for submission. The
# general documentation, as well as the sample configure.in, is included
# in the AX_PROG_JAVA macro.
#
# LICENSE
#
# Copyright (c) 2008 Devin Weaver <ktohg@tritarget.com>
#
# Copying and distribution of this file, with or without modification, are
# permitted in any medium without royalty provided the copyright notice
# and this notice are preserved. This file is offered as-is, without any
# warranty.
#serial 1
AU_ALIAS([AC_TRY_RUN_JAVA], [AX_TRY_RUN_JAVA])
AC_DEFUN([AX_TRY_RUN_JAVA],[
AC_REQUIRE([AX_PROG_JAVAC])dnl
AC_REQUIRE([AX_PROG_JAVA])dnl
cat << \EOF > Test.java
/* [#]line __oline__ "configure" */
ifelse([$1], , , [include $1;])
public class Test {
[$2]
}
EOF
if AC_TRY_COMMAND($JAVAC $JAVACFLAGS Test.java) && test -s Test.class && ($JAVA $JAVAFLAGS Test; exit) 2>/dev/null
then
dnl Don't remove the temporary files here, so they can be examined.
ifelse([$3], , :, [$3])
else
echo "configure: failed program was:" >&AS_MESSAGE_LOG_FD
cat Test.java >&AS_MESSAGE_LOG_FD
ifelse([$4], , , [ rm -fr Test*
$4
])dnl
fi
rm -fr Test*])

@ -20,10 +20,10 @@ libffts_la_SOURCES += vfp.s
else
if HAVE_NEON
libffts_la_SOURCES += neon.s
if DYNAMIC_DISABLED
libffts_la_SOURCES += neon_static_f.s neon_static_i.s
else
libffts_la_SOURCES += neon.s
endif
else

@ -1,7 +1,7 @@
# Makefile.in generated by automake 1.12.4 from Makefile.am.
# Makefile.in generated by automake 1.14 from Makefile.am.
# @configure_input@
# Copyright (C) 1994-2012 Free Software Foundation, Inc.
# Copyright (C) 1994-2013 Free Software Foundation, Inc.
# This Makefile.in is free software; the Free Software Foundation
# gives unlimited permission to copy and/or distribute it,
@ -16,23 +16,51 @@
VPATH = @srcdir@
am__make_dryrun = \
{ \
am__dry=no; \
am__is_gnu_make = test -n '$(MAKEFILE_LIST)' && test -n '$(MAKELEVEL)'
am__make_running_with_option = \
case $${target_option-} in \
?) ;; \
*) echo "am__make_running_with_option: internal error: invalid" \
"target option '$${target_option-}' specified" >&2; \
exit 1;; \
esac; \
has_opt=no; \
sane_makeflags=$$MAKEFLAGS; \
if $(am__is_gnu_make); then \
sane_makeflags=$$MFLAGS; \
else \
case $$MAKEFLAGS in \
*\\[\ \ ]*) \
echo 'am--echo: ; @echo "AM" OK' | $(MAKE) -f - 2>/dev/null \
| grep '^AM OK$$' >/dev/null || am__dry=yes;; \
*) \
for am__flg in $$MAKEFLAGS; do \
case $$am__flg in \
*=*|--*) ;; \
*n*) am__dry=yes; break;; \
esac; \
done;; \
bs=\\; \
sane_makeflags=`printf '%s\n' "$$MAKEFLAGS" \
| sed "s/$$bs$$bs[$$bs $$bs ]*//g"`;; \
esac; \
test $$am__dry = yes; \
}
fi; \
skip_next=no; \
strip_trailopt () \
{ \
flg=`printf '%s\n' "$$flg" | sed "s/$$1.*$$//"`; \
}; \
for flg in $$sane_makeflags; do \
test $$skip_next = yes && { skip_next=no; continue; }; \
case $$flg in \
*=*|--*) continue;; \
-*I) strip_trailopt 'I'; skip_next=yes;; \
-*I?*) strip_trailopt 'I';; \
-*O) strip_trailopt 'O'; skip_next=yes;; \
-*O?*) strip_trailopt 'O';; \
-*l) strip_trailopt 'l'; skip_next=yes;; \
-*l?*) strip_trailopt 'l';; \
-[dEDm]) skip_next=yes;; \
-[JT]) skip_next=yes;; \
esac; \
case $$flg in \
*$$target_option*) has_opt=yes; break;; \
esac; \
done; \
test $$has_opt = yes
am__make_dryrun = (target_option=n; $(am__make_running_with_option))
am__make_keepgoing = (target_option=k; $(am__make_running_with_option))
pkgdatadir = $(datadir)/@PACKAGE@
pkgincludedir = $(includedir)/@PACKAGE@
pkglibdir = $(libdir)/@PACKAGE@
@ -54,12 +82,12 @@ host_triplet = @host@
@DYNAMIC_DISABLED_TRUE@am__append_1 = ffts_static.c
@DYNAMIC_DISABLED_FALSE@am__append_2 = codegen.c
@HAVE_VFP_TRUE@am__append_3 = vfp.s
@DYNAMIC_DISABLED_TRUE@@HAVE_NEON_TRUE@@HAVE_VFP_FALSE@am__append_4 = neon_static_f.s neon_static_i.s
@DYNAMIC_DISABLED_FALSE@@HAVE_NEON_TRUE@@HAVE_VFP_FALSE@am__append_5 = neon.s
@HAVE_NEON_TRUE@@HAVE_VFP_FALSE@am__append_4 = neon.s
@DYNAMIC_DISABLED_TRUE@@HAVE_NEON_TRUE@@HAVE_VFP_FALSE@am__append_5 = neon_static_f.s neon_static_i.s
@HAVE_NEON_FALSE@@HAVE_SSE_TRUE@@HAVE_VFP_FALSE@am__append_6 = sse.s
subdir = src
DIST_COMMON = $(libffts_include_HEADERS) $(srcdir)/Makefile.am \
$(srcdir)/Makefile.in $(top_srcdir)/depcomp
DIST_COMMON = $(srcdir)/Makefile.in $(srcdir)/Makefile.am \
$(top_srcdir)/depcomp $(libffts_include_HEADERS)
ACLOCAL_M4 = $(top_srcdir)/aclocal.m4
am__aclocal_m4_deps = $(top_srcdir)/m4/ax_check_classpath.m4 \
$(top_srcdir)/m4/ax_check_java_home.m4 \
@ -111,14 +139,14 @@ am__libffts_la_SOURCES_DIST = ffts.c ffts_small.c ffts_nd.c \
codegen_sse.h ffts.h ffts_nd.h ffts_real.h ffts_real_nd.h \
ffts_small.h ffts_static.h macros-alpha.h macros-altivec.h \
macros-neon.h macros-sse.h macros.h neon.h neon_float.h \
patterns.h types.h vfp.h ffts_static.c codegen.c vfp.s \
neon_static_f.s neon_static_i.s neon.s sse.s
patterns.h types.h vfp.h ffts_static.c codegen.c vfp.s neon.s \
neon_static_f.s neon_static_i.s sse.s
@DYNAMIC_DISABLED_TRUE@am__objects_1 = ffts_static.lo
@DYNAMIC_DISABLED_FALSE@am__objects_2 = codegen.lo
@HAVE_VFP_TRUE@am__objects_3 = vfp.lo
@DYNAMIC_DISABLED_TRUE@@HAVE_NEON_TRUE@@HAVE_VFP_FALSE@am__objects_4 = neon_static_f.lo \
@HAVE_NEON_TRUE@@HAVE_VFP_FALSE@am__objects_4 = neon.lo
@DYNAMIC_DISABLED_TRUE@@HAVE_NEON_TRUE@@HAVE_VFP_FALSE@am__objects_5 = neon_static_f.lo \
@DYNAMIC_DISABLED_TRUE@@HAVE_NEON_TRUE@@HAVE_VFP_FALSE@ neon_static_i.lo
@DYNAMIC_DISABLED_FALSE@@HAVE_NEON_TRUE@@HAVE_VFP_FALSE@am__objects_5 = neon.lo
@HAVE_NEON_FALSE@@HAVE_SSE_TRUE@@HAVE_VFP_FALSE@am__objects_6 = \
@HAVE_NEON_FALSE@@HAVE_SSE_TRUE@@HAVE_VFP_FALSE@ sse.lo
am_libffts_la_OBJECTS = ffts.lo ffts_small.lo ffts_nd.lo ffts_real.lo \
@ -126,22 +154,52 @@ am_libffts_la_OBJECTS = ffts.lo ffts_small.lo ffts_nd.lo ffts_real.lo \
$(am__objects_3) $(am__objects_4) $(am__objects_5) \
$(am__objects_6)
libffts_la_OBJECTS = $(am_libffts_la_OBJECTS)
AM_V_lt = $(am__v_lt_@AM_V@)
am__v_lt_ = $(am__v_lt_@AM_DEFAULT_V@)
am__v_lt_0 = --silent
am__v_lt_1 =
AM_V_P = $(am__v_P_@AM_V@)
am__v_P_ = $(am__v_P_@AM_DEFAULT_V@)
am__v_P_0 = false
am__v_P_1 = :
AM_V_GEN = $(am__v_GEN_@AM_V@)
am__v_GEN_ = $(am__v_GEN_@AM_DEFAULT_V@)
am__v_GEN_0 = @echo " GEN " $@;
am__v_GEN_1 =
AM_V_at = $(am__v_at_@AM_V@)
am__v_at_ = $(am__v_at_@AM_DEFAULT_V@)
am__v_at_0 = @
am__v_at_1 =
DEFAULT_INCLUDES = -I.@am__isrc@ -I$(top_builddir)
depcomp = $(SHELL) $(top_srcdir)/depcomp
am__depfiles_maybe = depfiles
am__mv = mv -f
COMPILE = $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) \
$(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS)
LTCOMPILE = $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) \
--mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) \
$(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS)
LTCOMPILE = $(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) \
$(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) \
$(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) \
$(AM_CFLAGS) $(CFLAGS)
AM_V_CC = $(am__v_CC_@AM_V@)
am__v_CC_ = $(am__v_CC_@AM_DEFAULT_V@)
am__v_CC_0 = @echo " CC " $@;
am__v_CC_1 =
CCLD = $(CC)
LINK = $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) \
--mode=link $(CCLD) $(AM_CFLAGS) $(CFLAGS) $(AM_LDFLAGS) \
$(LDFLAGS) -o $@
LINK = $(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) \
$(LIBTOOLFLAGS) --mode=link $(CCLD) $(AM_CFLAGS) $(CFLAGS) \
$(AM_LDFLAGS) $(LDFLAGS) -o $@
AM_V_CCLD = $(am__v_CCLD_@AM_V@)
am__v_CCLD_ = $(am__v_CCLD_@AM_DEFAULT_V@)
am__v_CCLD_0 = @echo " CCLD " $@;
am__v_CCLD_1 =
CCASCOMPILE = $(CCAS) $(AM_CCASFLAGS) $(CCASFLAGS)
LTCCASCOMPILE = $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) \
--mode=compile $(CCAS) $(AM_CCASFLAGS) $(CCASFLAGS)
LTCCASCOMPILE = $(LIBTOOL) $(AM_V_lt) $(AM_LIBTOOLFLAGS) \
$(LIBTOOLFLAGS) --mode=compile $(CCAS) $(AM_CCASFLAGS) \
$(CCASFLAGS)
AM_V_CCAS = $(am__v_CCAS_@AM_V@)
am__v_CCAS_ = $(am__v_CCAS_@AM_DEFAULT_V@)
am__v_CCAS_0 = @echo " CCAS " $@;
am__v_CCAS_1 =
SOURCES = $(libffts_la_SOURCES)
DIST_SOURCES = $(am__libffts_la_SOURCES_DIST)
am__can_run_installinfo = \
@ -150,11 +208,29 @@ am__can_run_installinfo = \
*) (install-info --version) >/dev/null 2>&1;; \
esac
HEADERS = $(libffts_include_HEADERS)
am__tagged_files = $(HEADERS) $(SOURCES) $(TAGS_FILES) $(LISP)
# Read a list of newline-separated strings from the standard input,
# and print each of them once, without duplicates. Input order is
# *not* preserved.
am__uniquify_input = $(AWK) '\
BEGIN { nonempty = 0; } \
{ items[$$0] = 1; nonempty = 1; } \
END { if (nonempty) { for (i in items) print i; }; } \
'
# Make sure the list of sources is unique. This is necessary because,
# e.g., the same source file might be shared among _SOURCES variables
# for different programs/libraries.
am__define_uniq_tagged_files = \
list='$(am__tagged_files)'; \
unique=`for i in $$list; do \
if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \
done | $(am__uniquify_input)`
ETAGS = etags
CTAGS = ctags
DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST)
ACLOCAL = @ACLOCAL@
AMTAR = @AMTAR@
AM_DEFAULT_VERBOSITY = @AM_DEFAULT_VERBOSITY@
AR = @AR@
AUTOCONF = @AUTOCONF@
AUTOHEADER = @AUTOHEADER@
@ -328,6 +404,7 @@ $(top_srcdir)/configure: $(am__configure_deps)
$(ACLOCAL_M4): $(am__aclocal_m4_deps)
cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
$(am__aclocal_m4_deps):
install-libLTLIBRARIES: $(lib_LTLIBRARIES)
@$(NORMAL_INSTALL)
@list='$(lib_LTLIBRARIES)'; test -n "$(libdir)" || list=; \
@ -362,8 +439,9 @@ clean-libLTLIBRARIES:
echo rm -f $${locs}; \
rm -f $${locs}; \
}
libffts.la: $(libffts_la_OBJECTS) $(libffts_la_DEPENDENCIES) $(EXTRA_libffts_la_DEPENDENCIES)
$(LINK) -rpath $(libdir) $(libffts_la_OBJECTS) $(libffts_la_LIBADD) $(LIBS)
$(AM_V_CCLD)$(LINK) -rpath $(libdir) $(libffts_la_OBJECTS) $(libffts_la_LIBADD) $(LIBS)
mostlyclean-compile:
-rm -f *.$(OBJEXT)
@ -381,34 +459,34 @@ distclean-compile:
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/patterns.Plo@am__quote@
.c.o:
@am__fastdepCC_TRUE@ $(COMPILE) -MT $@ -MD -MP -MF $(DEPDIR)/$*.Tpo -c -o $@ $<
@am__fastdepCC_TRUE@ $(am__mv) $(DEPDIR)/$*.Tpo $(DEPDIR)/$*.Po
@AMDEP_TRUE@@am__fastdepCC_FALSE@ source='$<' object='$@' libtool=no @AMDEPBACKSLASH@
@am__fastdepCC_TRUE@ $(AM_V_CC)$(COMPILE) -MT $@ -MD -MP -MF $(DEPDIR)/$*.Tpo -c -o $@ $<
@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/$*.Tpo $(DEPDIR)/$*.Po
@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$<' object='$@' libtool=no @AMDEPBACKSLASH@
@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
@am__fastdepCC_FALSE@ $(COMPILE) -c $<
@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(COMPILE) -c -o $@ $<
.c.obj:
@am__fastdepCC_TRUE@ $(COMPILE) -MT $@ -MD -MP -MF $(DEPDIR)/$*.Tpo -c -o $@ `$(CYGPATH_W) '$<'`
@am__fastdepCC_TRUE@ $(am__mv) $(DEPDIR)/$*.Tpo $(DEPDIR)/$*.Po
@AMDEP_TRUE@@am__fastdepCC_FALSE@ source='$<' object='$@' libtool=no @AMDEPBACKSLASH@
@am__fastdepCC_TRUE@ $(AM_V_CC)$(COMPILE) -MT $@ -MD -MP -MF $(DEPDIR)/$*.Tpo -c -o $@ `$(CYGPATH_W) '$<'`
@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/$*.Tpo $(DEPDIR)/$*.Po
@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$<' object='$@' libtool=no @AMDEPBACKSLASH@
@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
@am__fastdepCC_FALSE@ $(COMPILE) -c `$(CYGPATH_W) '$<'`
@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(COMPILE) -c -o $@ `$(CYGPATH_W) '$<'`
.c.lo:
@am__fastdepCC_TRUE@ $(LTCOMPILE) -MT $@ -MD -MP -MF $(DEPDIR)/$*.Tpo -c -o $@ $<
@am__fastdepCC_TRUE@ $(am__mv) $(DEPDIR)/$*.Tpo $(DEPDIR)/$*.Plo
@AMDEP_TRUE@@am__fastdepCC_FALSE@ source='$<' object='$@' libtool=yes @AMDEPBACKSLASH@
@am__fastdepCC_TRUE@ $(AM_V_CC)$(LTCOMPILE) -MT $@ -MD -MP -MF $(DEPDIR)/$*.Tpo -c -o $@ $<
@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/$*.Tpo $(DEPDIR)/$*.Plo
@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$<' object='$@' libtool=yes @AMDEPBACKSLASH@
@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
@am__fastdepCC_FALSE@ $(LTCOMPILE) -c -o $@ $<
@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LTCOMPILE) -c -o $@ $<
.s.o:
$(CCASCOMPILE) -c -o $@ $<
$(AM_V_CCAS)$(CCASCOMPILE) -c -o $@ $<
.s.obj:
$(CCASCOMPILE) -c -o $@ `$(CYGPATH_W) '$<'`
$(AM_V_CCAS)$(CCASCOMPILE) -c -o $@ `$(CYGPATH_W) '$<'`
.s.lo:
$(LTCCASCOMPILE) -c -o $@ $<
$(AM_V_CCAS)$(LTCCASCOMPILE) -c -o $@ $<
mostlyclean-libtool:
-rm -f *.lo
@ -437,26 +515,15 @@ uninstall-libffts_includeHEADERS:
files=`for p in $$list; do echo $$p; done | sed -e 's|^.*/||'`; \
dir='$(DESTDIR)$(libffts_includedir)'; $(am__uninstall_files_from_dir)
ID: $(HEADERS) $(SOURCES) $(LISP) $(TAGS_FILES)
list='$(SOURCES) $(HEADERS) $(LISP) $(TAGS_FILES)'; \
unique=`for i in $$list; do \
if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \
done | \
$(AWK) '{ files[$$0] = 1; nonempty = 1; } \
END { if (nonempty) { for (i in files) print i; }; }'`; \
mkid -fID $$unique
tags: TAGS
TAGS: $(HEADERS) $(SOURCES) $(TAGS_DEPENDENCIES) \
$(TAGS_FILES) $(LISP)
ID: $(am__tagged_files)
$(am__define_uniq_tagged_files); mkid -fID $$unique
tags: tags-am
TAGS: tags
tags-am: $(TAGS_DEPENDENCIES) $(am__tagged_files)
set x; \
here=`pwd`; \
list='$(SOURCES) $(HEADERS) $(LISP) $(TAGS_FILES)'; \
unique=`for i in $$list; do \
if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \
done | \
$(AWK) '{ files[$$0] = 1; nonempty = 1; } \
END { if (nonempty) { for (i in files) print i; }; }'`; \
$(am__define_uniq_tagged_files); \
shift; \
if test -z "$(ETAGS_ARGS)$$*$$unique"; then :; else \
test -n "$$unique" || unique=$$empty_fix; \
@ -468,15 +535,11 @@ TAGS: $(HEADERS) $(SOURCES) $(TAGS_DEPENDENCIES) \
$$unique; \
fi; \
fi
ctags: CTAGS
CTAGS: $(HEADERS) $(SOURCES) $(TAGS_DEPENDENCIES) \
$(TAGS_FILES) $(LISP)
list='$(SOURCES) $(HEADERS) $(LISP) $(TAGS_FILES)'; \
unique=`for i in $$list; do \
if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \
done | \
$(AWK) '{ files[$$0] = 1; nonempty = 1; } \
END { if (nonempty) { for (i in files) print i; }; }'`; \
ctags: ctags-am
CTAGS: ctags
ctags-am: $(TAGS_DEPENDENCIES) $(am__tagged_files)
$(am__define_uniq_tagged_files); \
test -z "$(CTAGS_ARGS)$$unique" \
|| $(CTAGS) $(CTAGSFLAGS) $(AM_CTAGSFLAGS) $(CTAGS_ARGS) \
$$unique
@ -485,9 +548,10 @@ GTAGS:
here=`$(am__cd) $(top_builddir) && pwd` \
&& $(am__cd) $(top_srcdir) \
&& gtags -i $(GTAGS_ARGS) "$$here"
cscopelist: cscopelist-am
cscopelist: $(HEADERS) $(SOURCES) $(LISP)
list='$(SOURCES) $(HEADERS) $(LISP)'; \
cscopelist-am: $(am__tagged_files)
list='$(am__tagged_files)'; \
case "$(srcdir)" in \
[\\/]* | ?:[\\/]*) sdir="$(srcdir)" ;; \
*) sdir=$(subdir)/$(srcdir) ;; \
@ -644,20 +708,20 @@ uninstall-am: uninstall-libLTLIBRARIES \
.MAKE: install-am install-strip
.PHONY: CTAGS GTAGS all all-am check check-am clean clean-generic \
clean-libLTLIBRARIES clean-libtool cscopelist ctags distclean \
distclean-compile distclean-generic distclean-libtool \
distclean-tags distdir dvi dvi-am html html-am info info-am \
install install-am install-data install-data-am install-dvi \
install-dvi-am install-exec install-exec-am install-html \
install-html-am install-info install-info-am \
install-libLTLIBRARIES install-libffts_includeHEADERS \
install-man install-pdf install-pdf-am install-ps \
install-ps-am install-strip installcheck installcheck-am \
installdirs maintainer-clean maintainer-clean-generic \
mostlyclean mostlyclean-compile mostlyclean-generic \
mostlyclean-libtool pdf pdf-am ps ps-am tags uninstall \
uninstall-am uninstall-libLTLIBRARIES \
.PHONY: CTAGS GTAGS TAGS all all-am check check-am clean clean-generic \
clean-libLTLIBRARIES clean-libtool cscopelist-am ctags \
ctags-am distclean distclean-compile distclean-generic \
distclean-libtool distclean-tags distdir dvi dvi-am html \
html-am info info-am install install-am install-data \
install-data-am install-dvi install-dvi-am install-exec \
install-exec-am install-html install-html-am install-info \
install-info-am install-libLTLIBRARIES \
install-libffts_includeHEADERS install-man install-pdf \
install-pdf-am install-ps install-ps-am install-strip \
installcheck installcheck-am installdirs maintainer-clean \
maintainer-clean-generic mostlyclean mostlyclean-compile \
mostlyclean-generic mostlyclean-libtool pdf pdf-am ps ps-am \
tags tags-am uninstall uninstall-am uninstall-libLTLIBRARIES \
uninstall-libffts_includeHEADERS

@ -0,0 +1,6 @@
/Makefile
/Makefile.in
/.deps
/.libs
/*.la
/*.lo

File diff suppressed because it is too large Load Diff

@ -0,0 +1,21 @@
Copyright (c) 2001, 2002, 2003 Ximian, Inc and the individuals listed
on the ChangeLog entries.
Permission is hereby granted, free of charge, to any person obtaining
a copy of this software and associated documentation files (the
"Software"), to deal in the Software without restriction, including
without limitation the rights to use, copy, modify, merge, publish,
distribute, sublicense, and/or sell copies of the Software, and to
permit persons to whom the Software is furnished to do so, subject to
the following conditions:
The above copyright notice and this permission notice shall be
included in all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.

@ -0,0 +1,11 @@
DIST_SUBDIRS = x86 ppc sparc arm arm64 s390x amd64 ia64 mips
AM_CPPFLAGS = $(GLIB_CFLAGS) -I$(top_srcdir)
if ARM
# arm needs to build some stuff even in JIT mode
SUBDIRS = $(arch_target)
endif
EXTRA_DIST = ChangeLog

@ -0,0 +1,7 @@
mono_arch
=========
Part of Mono project, https://github.com/mono
These are C macros that are useful when generating native code on various platforms.
This code is MIT X11 licensed.

@ -0,0 +1 @@
/arm-wmmx.h -crlf

@ -0,0 +1,15 @@
/Makefile
/Makefile.in
/.deps
/.libs
/*.o
/*.la
/*.lo
/*.lib
/*.obj
/*.exe
/*.dll
/arm_dpimacros.h
/arm_fpamacros.h
/arm_vfpmacros.h
/fixeol.sh

@ -0,0 +1,27 @@
AM_CPPFLAGS = $(GLIB_CFLAGS) -I$(top_srcdir)
noinst_LTLIBRARIES = libmonoarch-arm.la
BUILT_SOURCES = arm_dpimacros.h arm_vfpmacros.h
libmonoarch_arm_la_SOURCES = $(BUILT_SOURCES) \
arm-codegen.c \
arm-codegen.h \
arm-dis.c \
arm-dis.h
arm_dpimacros.h: dpiops.sh mov_macros.th dpi_macros.th cmp_macros.th
(cd $(srcdir); bash ./dpiops.sh) > $@t
mv $@t $@
arm_vfpmacros.h: vfpops.sh vfpm_macros.th vfp_macros.th
(cd $(srcdir); bash ./vfpops.sh) > $@t
mv $@t $@
CLEANFILES = $(BUILT_SOURCES)
EXTRA_DIST = dpiops.sh mov_macros.th dpi_macros.th cmp_macros.th \
vfpm_macros.th vfp_macros.th arm-vfp-codegen.h vfpops.sh

@ -0,0 +1,193 @@
/*
* arm-codegen.c
* Copyright (c) 2002 Sergey Chaban <serge@wildwestsoftware.com>
*/
#include "arm-codegen.h"
arminstr_t* arm_emit_std_prologue(arminstr_t* p, unsigned int local_size) {
ARM_MOV_REG_REG(p, ARMREG_IP, ARMREG_SP);
/* save args */
ARM_PUSH(p, (1 << ARMREG_A1)
| (1 << ARMREG_A2)
| (1 << ARMREG_A3)
| (1 << ARMREG_A4));
ARM_PUSH(p, (1U << ARMREG_IP) | (1U << ARMREG_LR));
if (local_size != 0) {
if ((local_size & (~0xFF)) == 0) {
ARM_SUB_REG_IMM8(p, ARMREG_SP, ARMREG_SP, local_size);
} else {
/* TODO: optimize */
p = arm_mov_reg_imm32(p, ARMREG_IP, local_size);
ARM_SUB_REG_REG(p, ARMREG_SP, ARMREG_SP, ARMREG_IP);
ARM_ADD_REG_IMM8(p, ARMREG_IP, ARMREG_IP, sizeof(armword_t));
ARM_LDR_REG_REG(p, ARMREG_IP, ARMREG_SP, ARMREG_IP);
}
}
return p;
}
arminstr_t* arm_emit_std_epilogue(arminstr_t* p, unsigned int local_size, int pop_regs) {
if (local_size != 0) {
if ((local_size & (~0xFF)) == 0) {
ARM_ADD_REG_IMM8(p, ARMREG_SP, ARMREG_SP, local_size);
} else {
/* TODO: optimize */
p = arm_mov_reg_imm32(p, ARMREG_IP, local_size);
ARM_ADD_REG_REG(p, ARMREG_SP, ARMREG_SP, ARMREG_IP);
}
}
ARM_POP_NWB(p, (1 << ARMREG_SP) | (1 << ARMREG_PC) | (pop_regs & 0x3FF));
return p;
}
/* do not push A1-A4 */
arminstr_t* arm_emit_lean_prologue(arminstr_t* p, unsigned int local_size, int push_regs) {
ARM_MOV_REG_REG(p, ARMREG_IP, ARMREG_SP);
/* push_regs upto R10 will be saved */
ARM_PUSH(p, (1U << ARMREG_IP) | (1U << ARMREG_LR) | (push_regs & 0x3FF));
if (local_size != 0) {
if ((local_size & (~0xFF)) == 0) {
ARM_SUB_REG_IMM8(p, ARMREG_SP, ARMREG_SP, local_size);
} else {
/* TODO: optimize */
p = arm_mov_reg_imm32(p, ARMREG_IP, local_size);
ARM_SUB_REG_REG(p, ARMREG_SP, ARMREG_SP, ARMREG_IP);
/* restore IP from stack */
ARM_ADD_REG_IMM8(p, ARMREG_IP, ARMREG_IP, sizeof(armword_t));
ARM_LDR_REG_REG(p, ARMREG_IP, ARMREG_SP, ARMREG_IP);
}
}
return p;
}
/* Bit scan forward. */
int arm_bsf(armword_t val) {
int i;
armword_t mask;
if (val == 0) return 0;
for (i=1, mask=1; (i <= 8 * sizeof(armword_t)) && ((val & mask) == 0); ++i, mask<<=1);
return i;
}
int arm_is_power_of_2(armword_t val) {
return ((val & (val-1)) == 0);
}
/*
* returns:
* 1 - unable to represent
* positive even number - MOV-representable
* negative even number - MVN-representable
*/
int calc_arm_mov_const_shift(armword_t val) {
armword_t mask;
int res = 1, shift;
for (shift=0; shift < 32; shift+=2) {
mask = ARM_SCALE(0xFF, shift);
if ((val & (~mask)) == 0) {
res = shift;
break;
}
if (((~val) & (~mask)) == 0) {
res = -shift - 2;
break;
}
}
return res;
}
int is_arm_const(armword_t val) {
int res;
res = arm_is_power_of_2(val);
if (!res) {
res = calc_arm_mov_const_shift(val);
res = !(res < 0 || res == 1);
}
return res;
}
int arm_const_steps(armword_t val) {
int shift, steps = 0;
while (val != 0) {
shift = (arm_bsf(val) - 1) & (~1);
val &= ~(0xFF << shift);
++steps;
}
return steps;
}
/*
* ARM cannot load arbitrary 32-bit constants directly into registers;
* widely used work-around for this is to store constants into a
* PC-addressable pool and use LDR instruction with PC-relative address
* to load constant into register. Easiest way to implement this is to
* embed constant inside a function with unconditional branch around it.
* The above method is not used at the moment.
* This routine always emits sequence of instructions to generate
* requested constant. In the worst case it takes 4 instructions to
* synthesize a constant - 1 MOV and 3 subsequent ORRs.
*/
arminstr_t* arm_mov_reg_imm32_cond(arminstr_t* p, int reg, armword_t imm32, int cond) {
int mov_op;
int step_op;
int snip;
int shift = calc_arm_mov_const_shift(imm32);
if ((shift & 0x80000001) != 1) {
if (shift >= 0) {
ARM_MOV_REG_IMM_COND(p, reg, imm32 >> ((32 - shift) & 31), shift, cond);
} else {
ARM_MVN_REG_IMM_COND(p, reg, (imm32 ^ (~0)) >> ((32 + 2 + shift) & 31), (-shift - 2), cond);
}
} else {
mov_op = ARMOP_MOV;
step_op = ARMOP_ORR;
if (arm_const_steps(imm32) > arm_const_steps(~imm32)) {
mov_op = ARMOP_MVN;
step_op = ARMOP_SUB;
imm32 = ~imm32;
}
shift = (arm_bsf(imm32) - 1) & (~1);
snip = imm32 & (0xFF << shift);
ARM_EMIT(p, ARM_DEF_DPI_IMM_COND((unsigned)snip >> shift, (32 - shift) >> 1, reg, 0, 0, mov_op, cond));
while ((imm32 ^= snip) != 0) {
shift = (arm_bsf(imm32) - 1) & (~1);
snip = imm32 & (0xFF << shift);
ARM_EMIT(p, ARM_DEF_DPI_IMM_COND((unsigned)snip >> shift, (32 - shift) >> 1, reg, reg, 0, step_op, cond));
}
}
return p;
}
arminstr_t* arm_mov_reg_imm32(arminstr_t* p, int reg, armword_t imm32) {
return arm_mov_reg_imm32_cond(p, reg, imm32, ARMCOND_AL);
}

File diff suppressed because it is too large Load Diff

@ -0,0 +1,509 @@
/*
* Copyright (c) 2002 Sergey Chaban <serge@wildwestsoftware.com>
*/
#include <stdarg.h>
#include "arm-dis.h"
#include "arm-codegen.h"
static ARMDis* gdisasm = NULL;
static int use_reg_alias = 1;
const static char* cond[] = {
"eq", "ne", "cs", "cc", "mi", "pl", "vs", "vc",
"hi", "ls", "ge", "lt", "gt", "le", "", "nv"
};
const static char* ops[] = {
"and", "eor", "sub", "rsb", "add", "adc", "sbc", "rsc",
"tst", "teq", "cmp", "cmn", "orr", "mov", "bic", "mvn"
};
const static char* shift_types[] = {"lsl", "lsr", "asr", "ror"};
const static char* mul_ops[] = {
"mul", "mla", "?", "?", "umull", "umlal", "smull", "smlal"
};
const static char* reg_alias[] = {
"a1", "a2", "a3", "a4",
"r4", "r5", "r6", "r7", "r8", "r9", "r10",
"fp", "ip", "sp", "lr", "pc"
};
const static char* msr_fld[] = {"f", "c", "x", "?", "s"};
/* private functions prototypes (to keep compiler happy) */
void chk_out(ARMDis* dis);
void dump_reg(ARMDis* dis, int reg);
void dump_creg(ARMDis* dis, int creg);
void dump_reglist(ARMDis* dis, int reg_list);
void init_gdisasm(void);
void dump_br(ARMDis* dis, ARMInstr i);
void dump_cdp(ARMDis* dis, ARMInstr i);
void dump_cdt(ARMDis* dis, ARMInstr i);
void dump_crt(ARMDis* dis, ARMInstr i);
void dump_dpi(ARMDis* dis, ARMInstr i);
void dump_hxfer(ARMDis* dis, ARMInstr i);
void dump_mrs(ARMDis* dis, ARMInstr i);
void dump_mrt(ARMDis* dis, ARMInstr i);
void dump_msr(ARMDis* dis, ARMInstr i);
void dump_mul(ARMDis* dis, ARMInstr i);
void dump_swi(ARMDis* dis, ARMInstr i);
void dump_swp(ARMDis* dis, ARMInstr i);
void dump_wxfer(ARMDis* dis, ARMInstr i);
void dump_clz(ARMDis* dis, ARMInstr i);
/*
void out(ARMDis* dis, const char* format, ...) {
va_list arglist;
va_start(arglist, format);
fprintf(dis->dis_out, format, arglist);
va_end(arglist);
}
*/
void chk_out(ARMDis* dis) {
if (dis != NULL && dis->dis_out == NULL) dis->dis_out = stdout;
}
void armdis_set_output(ARMDis* dis, FILE* f) {
if (dis != NULL) {
dis->dis_out = f;
chk_out(dis);
}
}
FILE* armdis_get_output(ARMDis* dis) {
return (dis != NULL ? dis->dis_out : NULL);
}
void dump_reg(ARMDis* dis, int reg) {
reg &= 0xF;
if (!use_reg_alias || (reg > 3 && reg < 11)) {
fprintf(dis->dis_out, "r%d", reg);
} else {
fprintf(dis->dis_out, "%s", reg_alias[reg]);
}
}
void dump_creg(ARMDis* dis, int creg) {
if (dis != NULL) {
creg &= 0xF;
fprintf(dis->dis_out, "c%d", creg);
}
}
void dump_reglist(ARMDis* dis, int reg_list) {
int i = 0, j, n = 0;
int m1 = 1, m2, rn;
while (i < 16) {
if ((reg_list & m1) != 0) {
if (n != 0) fprintf(dis->dis_out, ", ");
n++;
dump_reg(dis, i);
for (j = i+1, rn = 0, m2 = m1<<1; j < 16; ++j, m2<<=1) {
if ((reg_list & m2) != 0) ++rn;
else break;
}
i+=rn;
if (rn > 1) {
fprintf(dis->dis_out, "-");
dump_reg(dis, i);
} else if (rn == 1) {
fprintf(dis->dis_out, ", ");
dump_reg(dis, i);
}
m1<<=(rn+1);
i++;
} else {
++i;
m1<<=1;
}
}
}
void dump_br(ARMDis* dis, ARMInstr i) {
fprintf(dis->dis_out, "b%s%s\t%x\t; %p -> %#x",
(i.br.link == 1) ? "l" : "",
cond[i.br.cond], i.br.offset, dis->pi, (int)dis->pi + 4*2 + ((int)(i.br.offset << 8) >> 6));
}
void dump_dpi(ARMDis* dis, ARMInstr i) {
fprintf(dis->dis_out, "%s%s", ops[i.dpi.all.opcode], cond[i.dpi.all.cond]);
if ((i.dpi.all.opcode < ARMOP_TST || i.dpi.all.opcode > ARMOP_CMN) && (i.dpi.all.s != 0)) {
fprintf(dis->dis_out, "s");
}
fprintf(dis->dis_out, "\t");
if ((i.dpi.all.opcode < ARMOP_TST) || (i.dpi.all.opcode > ARMOP_CMN)) {
/* for comparison operations Rd is ignored */
dump_reg(dis, i.dpi.all.rd);
fprintf(dis->dis_out, ", ");
}
if ((i.dpi.all.opcode != ARMOP_MOV) && (i.dpi.all.opcode != ARMOP_MVN)) {
/* for MOV/MVN Rn is ignored */
dump_reg(dis, i.dpi.all.rn);
fprintf(dis->dis_out, ", ");
}
if (i.dpi.all.type == 1) {
/* immediate */
if (i.dpi.op2_imm.rot != 0) {
fprintf(dis->dis_out, "#%d, %d\t; 0x%x", i.dpi.op2_imm.imm, i.dpi.op2_imm.rot << 1,
ARM_SCALE(i.dpi.op2_imm.imm, (i.dpi.op2_imm.rot << 1)) );
} else {
fprintf(dis->dis_out, "#%d\t; 0x%x", i.dpi.op2_imm.imm, i.dpi.op2_imm.imm);
}
} else {
/* reg-reg */
if (i.dpi.op2_reg.tag == 0) {
/* op2 is reg shift by imm */
dump_reg(dis, i.dpi.op2_reg_imm.r2.rm);
if (i.dpi.op2_reg_imm.imm.shift != 0) {
fprintf(dis->dis_out, " %s #%d", shift_types[i.dpi.op2_reg_imm.r2.type], i.dpi.op2_reg_imm.imm.shift);
}
} else {
/* op2 is reg shift by reg */
dump_reg(dis, i.dpi.op2_reg_reg.r2.rm);
fprintf(dis->dis_out, " %s ", shift_types[i.dpi.op2_reg_reg.r2.type]);
dump_reg(dis, i.dpi.op2_reg_reg.reg.rs);
}
}
}
void dump_wxfer(ARMDis* dis, ARMInstr i) {
fprintf(dis->dis_out, "%s%s%s%s\t",
(i.wxfer.all.ls == 0) ? "str" : "ldr",
cond[i.generic.cond],
(i.wxfer.all.b == 0) ? "" : "b",
(i.wxfer.all.ls != 0 && i.wxfer.all.wb != 0) ? "t" : "");
dump_reg(dis, i.wxfer.all.rd);
fprintf(dis->dis_out, ", [");
dump_reg(dis, i.wxfer.all.rn);
fprintf(dis->dis_out, "%s, ", (i.wxfer.all.p == 0) ? "]" : "");
if (i.wxfer.all.type == 0) { /* imm */
fprintf(dis->dis_out, "#%s%d", (i.wxfer.all.u == 0) ? "-" : "", i.wxfer.all.op2_imm);
} else {
dump_reg(dis, i.wxfer.op2_reg_imm.r2.rm);
if (i.wxfer.op2_reg_imm.imm.shift != 0) {
fprintf(dis->dis_out, " %s #%d", shift_types[i.wxfer.op2_reg_imm.r2.type], i.wxfer.op2_reg_imm.imm.shift);
}
}
if (i.wxfer.all.p != 0) {
/* close pre-index instr, also check for write-back */
fprintf(dis->dis_out, "]%s", (i.wxfer.all.wb != 0) ? "!" : "");
}
}
void dump_hxfer(ARMDis* dis, ARMInstr i) {
fprintf(dis->dis_out, "%s%s%s%s\t",
(i.hxfer.ls == 0) ? "str" : "ldr",
cond[i.generic.cond],
(i.hxfer.s != 0) ? "s" : "",
(i.hxfer.h != 0) ? "h" : "b");
dump_reg(dis, i.hxfer.rd);
fprintf(dis->dis_out, ", [");
dump_reg(dis, i.hxfer.rn);
fprintf(dis->dis_out, "%s, ", (i.hxfer.p == 0) ? "]" : "");
if (i.hxfer.type != 0) { /* imm */
fprintf(dis->dis_out, "#%s%d", (i.hxfer.u == 0) ? "-" : "", (i.hxfer.imm_hi << 4) | i.hxfer.rm);
} else {
dump_reg(dis, i.hxfer.rm);
}
if (i.hxfer.p != 0) {
/* close pre-index instr, also check for write-back */
fprintf(dis->dis_out, "]%s", (i.hxfer.wb != 0) ? "!" : "");
}
}
void dump_mrt(ARMDis* dis, ARMInstr i) {
fprintf(dis->dis_out, "%s%s%s%s\t", (i.mrt.ls == 0) ? "stm" : "ldm", cond[i.mrt.cond],
(i.mrt.u == 0) ? "d" : "i", (i.mrt.p == 0) ? "a" : "b");
dump_reg(dis, i.mrt.rn);
fprintf(dis->dis_out, "%s, {", (i.mrt.wb != 0) ? "!" : "");
dump_reglist(dis, i.mrt.reg_list);
fprintf(dis->dis_out, "}");
}
void dump_swp(ARMDis* dis, ARMInstr i) {
fprintf(dis->dis_out, "swp%s%s ", cond[i.swp.cond], (i.swp.b != 0) ? "b" : "");
dump_reg(dis, i.swp.rd);
fprintf(dis->dis_out, ", ");
dump_reg(dis, i.swp.rm);
fprintf(dis->dis_out, ", [");
dump_reg(dis, i.swp.rn);
fprintf(dis->dis_out, "]");
}
void dump_mul(ARMDis* dis, ARMInstr i) {
fprintf(dis->dis_out, "%s%s%s\t", mul_ops[i.mul.opcode], cond[i.mul.cond], (i.mul.s != 0) ? "s" : "");
switch (i.mul.opcode) {
case ARMOP_MUL:
dump_reg(dis, i.mul.rd);
fprintf(dis->dis_out, ", ");
dump_reg(dis, i.mul.rm);
fprintf(dis->dis_out, ", ");
dump_reg(dis, i.mul.rs);
break;
case ARMOP_MLA:
dump_reg(dis, i.mul.rd);
fprintf(dis->dis_out, ", ");
dump_reg(dis, i.mul.rm);
fprintf(dis->dis_out, ", ");
dump_reg(dis, i.mul.rs);
fprintf(dis->dis_out, ", ");
dump_reg(dis, i.mul.rn);
break;
case ARMOP_UMULL:
case ARMOP_UMLAL:
case ARMOP_SMULL:
case ARMOP_SMLAL:
dump_reg(dis, i.mul.rd);
fprintf(dis->dis_out, ", ");
dump_reg(dis, i.mul.rn);
fprintf(dis->dis_out, ", ");
dump_reg(dis, i.mul.rm);
fprintf(dis->dis_out, ", ");
dump_reg(dis, i.mul.rs);
break;
default:
fprintf(dis->dis_out, "DCD 0x%x\t; <unknown>", i.raw);
break;
}
}
void dump_cdp(ARMDis* dis, ARMInstr i) {
fprintf(dis->dis_out, "cdp%s\tp%d, %d, ", cond[i.generic.cond], i.cdp.cpn, i.cdp.op);
dump_creg(dis, i.cdp.crd);
fprintf(dis->dis_out, ", ");
dump_creg(dis, i.cdp.crn);
fprintf(dis->dis_out, ", ");
dump_creg(dis, i.cdp.crm);
if (i.cdp.op2 != 0) {
fprintf(dis->dis_out, ", %d", i.cdp.op2);
}
}
void dump_cdt(ARMDis* dis, ARMInstr i) {
fprintf(dis->dis_out, "%s%s%s\tp%d, ", (i.cdt.ls == 0) ? "stc" : "ldc",
cond[i.generic.cond], (i.cdt.n != 0) ? "l" : "", i.cdt.cpn);
dump_creg(dis, i.cdt.crd);
fprintf(dis->dis_out, ", ");
dump_reg(dis, i.cdt.rn);
if (i.cdt.p == 0) {
fprintf(dis->dis_out, "]");
}
if (i.cdt.offs != 0) {
fprintf(dis->dis_out, ", #%d", i.cdt.offs);
}
if (i.cdt.p != 0) {
fprintf(dis->dis_out, "]%s", (i.cdt.wb != 0) ? "!" : "");
}
}
void dump_crt(ARMDis* dis, ARMInstr i) {
fprintf(dis->dis_out, "%s%s\tp%d, %d, ", (i.crt.ls == 0) ? "mrc" : "mcr",
cond[i.generic.cond], i.crt.cpn, i.crt.op1);
dump_reg(dis, i.crt.rd);
fprintf(dis->dis_out, ", ");
dump_creg(dis, i.crt.crn);
fprintf(dis->dis_out, ", ");
dump_creg(dis, i.crt.crm);
if (i.crt.op2 != 0) {
fprintf(dis->dis_out, ", %d", i.crt.op2);
}
}
void dump_msr(ARMDis* dis, ARMInstr i) {
fprintf(dis->dis_out, "msr%s\t%spsr_, ", cond[i.generic.cond],
(i.msr.all.sel == 0) ? "s" : "c");
if (i.msr.all.type == 0) {
/* reg */
fprintf(dis->dis_out, "%s, ", msr_fld[i.msr.all.fld]);
dump_reg(dis, i.msr.all.rm);
} else {
/* imm */
fprintf(dis->dis_out, "f, #%d", i.msr.op2_imm.imm << i.msr.op2_imm.rot);
}
}
void dump_mrs(ARMDis* dis, ARMInstr i) {
fprintf(dis->dis_out, "mrs%s\t", cond[i.generic.cond]);
dump_reg(dis, i.mrs.rd);
fprintf(dis->dis_out, ", %spsr", (i.mrs.sel == 0) ? "s" : "c");
}
void dump_swi(ARMDis* dis, ARMInstr i) {
fprintf(dis->dis_out, "swi%s\t%d", cond[i.generic.cond], i.swi.num);
}
void dump_clz(ARMDis* dis, ARMInstr i) {
fprintf(dis->dis_out, "clz\t");
dump_reg(dis, i.clz.rd);
fprintf(dis->dis_out, ", ");
dump_reg(dis, i.clz.rm);
fprintf(dis->dis_out, "\n");
}
void armdis_decode(ARMDis* dis, void* p, int size) {
int i;
arminstr_t* pi = (arminstr_t*)p;
ARMInstr instr;
if (dis == NULL) return;
chk_out(dis);
size/=sizeof(arminstr_t);
for (i=0; i<size; ++i) {
fprintf(dis->dis_out, "%p:\t%08x\t", pi, *pi);
dis->pi = pi;
instr.raw = *pi++;
if ((instr.raw & ARM_BR_MASK) == ARM_BR_TAG) {
dump_br(dis, instr);
} else if ((instr.raw & ARM_SWP_MASK) == ARM_SWP_TAG) {
dump_swp(dis, instr);
} else if ((instr.raw & ARM_MUL_MASK) == ARM_MUL_TAG) {
dump_mul(dis, instr);
} else if ((instr.raw & ARM_CLZ_MASK) == ARM_CLZ_TAG) {
dump_clz(dis, instr);
} else if ((instr.raw & ARM_WXFER_MASK) == ARM_WXFER_TAG) {
dump_wxfer(dis, instr);
} else if ((instr.raw & ARM_HXFER_MASK) == ARM_HXFER_TAG) {
dump_hxfer(dis, instr);
} else if ((instr.raw & ARM_DPI_MASK) == ARM_DPI_TAG) {
dump_dpi(dis, instr);
} else if ((instr.raw & ARM_MRT_MASK) == ARM_MRT_TAG) {
dump_mrt(dis, instr);
} else if ((instr.raw & ARM_CDP_MASK) == ARM_CDP_TAG) {
dump_cdp(dis, instr);
} else if ((instr.raw & ARM_CDT_MASK) == ARM_CDT_TAG) {
dump_cdt(dis, instr);
} else if ((instr.raw & ARM_CRT_MASK) == ARM_CRT_TAG) {
dump_crt(dis, instr);
} else if ((instr.raw & ARM_MSR_MASK) == ARM_MSR_TAG) {
dump_msr(dis, instr);
} else if ((instr.raw & ARM_MRS_MASK) == ARM_MRS_TAG) {
dump_mrs(dis, instr);
} else if ((instr.raw & ARM_SWI_MASK) == ARM_SWI_TAG) {
dump_swi(dis, instr);
} else {
fprintf(dis->dis_out, "DCD 0x%x\t; <unknown>", instr.raw);
}
fprintf(dis->dis_out, "\n");
}
}
void armdis_open(ARMDis* dis, const char* dump_name) {
if (dis != NULL && dump_name != NULL) {
armdis_set_output(dis, fopen(dump_name, "w"));
}
}
void armdis_close(ARMDis* dis) {
if (dis->dis_out != NULL && dis->dis_out != stdout && dis->dis_out != stderr) {
fclose(dis->dis_out);
dis->dis_out = NULL;
}
}
void armdis_dump(ARMDis* dis, const char* dump_name, void* p, int size) {
armdis_open(dis, dump_name);
armdis_decode(dis, p, size);
armdis_close(dis);
}
void armdis_init(ARMDis* dis) {
if (dis != NULL) {
/* set to stdout */
armdis_set_output(dis, NULL);
}
}
void init_gdisasm() {
if (gdisasm == NULL) {
gdisasm = (ARMDis*)malloc(sizeof(ARMDis));
armdis_init(gdisasm);
}
}
void _armdis_set_output(FILE* f) {
init_gdisasm();
armdis_set_output(gdisasm, f);
}
FILE* _armdis_get_output() {
init_gdisasm();
return armdis_get_output(gdisasm);
}
void _armdis_decode(void* p, int size) {
init_gdisasm();
armdis_decode(gdisasm, p, size);
}
void _armdis_open(const char* dump_name) {
init_gdisasm();
armdis_open(gdisasm, dump_name);
}
void _armdis_close() {
init_gdisasm();
armdis_close(gdisasm);
}
void _armdis_dump(const char* dump_name, void* p, int size) {
init_gdisasm();
armdis_dump(gdisasm, dump_name, p, size);
}

@ -0,0 +1,41 @@
/*
* Copyright (c) 2002 Sergey Chaban <serge@wildwestsoftware.com>
*/
#ifndef ARM_DIS
#define ARM_DIS
#include <stdlib.h>
#include <stdio.h>
#ifdef __cplusplus
extern "C" {
#endif
typedef struct _ARMDis {
FILE* dis_out;
void* pi;
} ARMDis;
void _armdis_set_output(FILE* f);
FILE* _armdis_get_output(void);
void _armdis_decode(void* p, int size);
void _armdis_open(const char* dump_name);
void _armdis_close(void);
void _armdis_dump(const char* dump_name, void* p, int size);
void armdis_init(ARMDis* dis);
void armdis_set_output(ARMDis* dis, FILE* f);
FILE* armdis_get_output(ARMDis* dis);
void armdis_decode(ARMDis* dis, void* p, int size);
void armdis_open(ARMDis* dis, const char* dump_name);
void armdis_close(ARMDis* dis);
void armdis_dump(ARMDis* dis, const char* dump_name, void* p, int size);
#ifdef __cplusplus
}
#endif
#endif /* ARM_DIS */

@ -0,0 +1,247 @@
//
// Copyright 2011 Xamarin Inc
//
#ifndef __MONO_ARM_VFP_CODEGEN_H__
#define __MONO_ARM_VFP_CODEGEN_H__
#include "arm-codegen.h"
enum {
/* VFP registers */
ARM_VFP_F0,
ARM_VFP_F1,
ARM_VFP_F2,
ARM_VFP_F3,
ARM_VFP_F4,
ARM_VFP_F5,
ARM_VFP_F6,
ARM_VFP_F7,
ARM_VFP_F8,
ARM_VFP_F9,
ARM_VFP_F10,
ARM_VFP_F11,
ARM_VFP_F12,
ARM_VFP_F13,
ARM_VFP_F14,
ARM_VFP_F15,
ARM_VFP_F16,
ARM_VFP_F17,
ARM_VFP_F18,
ARM_VFP_F19,
ARM_VFP_F20,
ARM_VFP_F21,
ARM_VFP_F22,
ARM_VFP_F23,
ARM_VFP_F24,
ARM_VFP_F25,
ARM_VFP_F26,
ARM_VFP_F27,
ARM_VFP_F28,
ARM_VFP_F29,
ARM_VFP_F30,
ARM_VFP_F31,
ARM_VFP_D0 = ARM_VFP_F0,
ARM_VFP_D1 = ARM_VFP_F2,
ARM_VFP_D2 = ARM_VFP_F4,
ARM_VFP_D3 = ARM_VFP_F6,
ARM_VFP_D4 = ARM_VFP_F8,
ARM_VFP_D5 = ARM_VFP_F10,
ARM_VFP_D6 = ARM_VFP_F12,
ARM_VFP_D7 = ARM_VFP_F14,
ARM_VFP_D8 = ARM_VFP_F16,
ARM_VFP_D9 = ARM_VFP_F18,
ARM_VFP_D10 = ARM_VFP_F20,
ARM_VFP_D11 = ARM_VFP_F22,
ARM_VFP_D12 = ARM_VFP_F24,
ARM_VFP_D13 = ARM_VFP_F26,
ARM_VFP_D14 = ARM_VFP_F28,
ARM_VFP_D15 = ARM_VFP_F30,
ARM_VFP_COPROC_SINGLE = 10,
ARM_VFP_COPROC_DOUBLE = 11,
#define ARM_VFP_OP(p,q,r,s) (((p) << 23) | ((q) << 21) | ((r) << 20) | ((s) << 6))
#define ARM_VFP_OP2(Fn,N) (ARM_VFP_OP (1,1,1,1) | ((Fn) << 16) | ((N) << 7))
ARM_VFP_MUL = ARM_VFP_OP (0,1,0,0),
ARM_VFP_NMUL = ARM_VFP_OP (0,1,0,1),
ARM_VFP_ADD = ARM_VFP_OP (0,1,1,0),
ARM_VFP_SUB = ARM_VFP_OP (0,1,1,1),
ARM_VFP_DIV = ARM_VFP_OP (1,0,0,0),
ARM_VFP_CPY = ARM_VFP_OP2 (0,0),
ARM_VFP_ABS = ARM_VFP_OP2 (0,1),
ARM_VFP_NEG = ARM_VFP_OP2 (1,0),
ARM_VFP_SQRT = ARM_VFP_OP2 (1,1),
ARM_VFP_CMP = ARM_VFP_OP2 (4,0),
ARM_VFP_CMPE = ARM_VFP_OP2 (4,1),
ARM_VFP_CMPZ = ARM_VFP_OP2 (5,0),
ARM_VFP_CMPEZ = ARM_VFP_OP2 (5,1),
ARM_VFP_CVT = ARM_VFP_OP2 (7,1),
ARM_VFP_UITO = ARM_VFP_OP2 (8,0),
ARM_VFP_SITO = ARM_VFP_OP2 (8,1),
ARM_VFP_TOUI = ARM_VFP_OP2 (12,0),
ARM_VFP_TOSI = ARM_VFP_OP2 (13,0),
ARM_VFP_TOUIZ = ARM_VFP_OP2 (12,1),
ARM_VFP_TOSIZ = ARM_VFP_OP2 (13,1),
ARM_VFP_SID = 0,
ARM_VFP_SCR = 1 << 1,
ARM_VFP_EXC = 8 << 1
};
#define ARM_DEF_VFP_DYADIC(cond,cp,op,Fd,Fn,Fm) \
(14 << 24) | \
((cp) << 8) | \
(op) | \
(((Fd) >> 1) << 12) | \
(((Fd) & 1) << 22) | \
(((Fn) >> 1) << 16) | \
(((Fn) & 1) << 7) | \
(((Fm) >> 1) << 0) | \
(((Fm) & 1) << 5) | \
ARM_DEF_COND(cond)
#define ARM_DEF_VFP_MONADIC(cond,cp,op,Fd,Fm) \
(14 << 24) | \
((cp) << 8) | \
(op) | \
(((Fd) >> 1) << 12) | \
(((Fd) & 1) << 22) | \
(((Fm) >> 1) << 0) | \
(((Fm) & 1) << 5) | \
ARM_DEF_COND(cond)
#define ARM_DEF_VFP_LSF(cond,cp,post,ls,wback,basereg,Fd,offset) \
((offset) >= 0? (offset)>>2: -(offset)>>2) | \
(6 << 25) | \
((cp) << 8) | \
(((Fd) >> 1) << 12) | \
(((Fd) & 1) << 22) | \
((basereg) << 16) | \
((ls) << 20) | \
((wback) << 21) | \
(((offset) >= 0) << 23) | \
((wback) << 21) | \
((post) << 24) | \
ARM_DEF_COND(cond)
#define ARM_DEF_VFP_CPT(cond,cp,op,L,Fn,Rd) \
(14 << 24) | \
(1 << 4) | \
((cp) << 8) | \
((op) << 21) | \
((L) << 20) | \
((Rd) << 12) | \
(((Fn) >> 1) << 16) | \
(((Fn) & 1) << 7) | \
ARM_DEF_COND(cond)
/* FP load and stores */
#define ARM_FLDS_COND(p,freg,base,offset,cond) \
ARM_EMIT((p), ARM_DEF_VFP_LSF((cond),ARM_VFP_COPROC_SINGLE,1,ARMOP_LDR,0,(base),(freg),(offset)))
#define ARM_FLDS(p,freg,base,offset) \
ARM_FLDS_COND(p,freg,base,offset,ARMCOND_AL)
#define ARM_FLDD_COND(p,freg,base,offset,cond) \
ARM_EMIT((p), ARM_DEF_VFP_LSF((cond),ARM_VFP_COPROC_DOUBLE,1,ARMOP_LDR,0,(base),(freg),(offset)))
#define ARM_FLDD(p,freg,base,offset) \
ARM_FLDD_COND(p,freg,base,offset,ARMCOND_AL)
#define ARM_FSTS_COND(p,freg,base,offset,cond) \
ARM_EMIT((p), ARM_DEF_VFP_LSF((cond),ARM_VFP_COPROC_SINGLE,1,ARMOP_STR,0,(base),(freg),(offset)))
#define ARM_FSTS(p,freg,base,offset) \
ARM_FSTS_COND(p,freg,base,offset,ARMCOND_AL)
#define ARM_FSTD_COND(p,freg,base,offset,cond) \
ARM_EMIT((p), ARM_DEF_VFP_LSF((cond),ARM_VFP_COPROC_DOUBLE,1,ARMOP_STR,0,(base),(freg),(offset)))
#define ARM_FSTD(p,freg,base,offset) \
ARM_FSTD_COND(p,freg,base,offset,ARMCOND_AL)
#define ARM_FLDMD_COND(p,first_reg,nregs,base,cond) \
ARM_EMIT((p), ARM_DEF_VFP_LSF((cond),ARM_VFP_COPROC_DOUBLE,0,ARMOP_LDR,0,(base),(first_reg),((nregs) * 2) << 2))
#define ARM_FLDMD(p,first_reg,nregs,base) \
ARM_FLDMD_COND(p,first_reg,nregs,base,ARMCOND_AL)
#define ARM_FSTMD_COND(p,first_reg,nregs,base,cond) \
ARM_EMIT((p), ARM_DEF_VFP_LSF((cond),ARM_VFP_COPROC_DOUBLE,0,ARMOP_STR,0,(base),(first_reg),((nregs) * 2) << 2))
#define ARM_FSTMD(p,first_reg,nregs,base) \
ARM_FSTMD_COND(p,first_reg,nregs,base,ARMCOND_AL)
#include <mono/arch/arm/arm_vfpmacros.h>
/* coprocessor register transfer */
#define ARM_FMSR(p,freg,reg) \
ARM_EMIT((p), ARM_DEF_VFP_CPT(ARMCOND_AL,ARM_VFP_COPROC_SINGLE,0,0,(freg),(reg)))
#define ARM_FMRS(p,reg,freg) \
ARM_EMIT((p), ARM_DEF_VFP_CPT(ARMCOND_AL,ARM_VFP_COPROC_SINGLE,0,1,(freg),(reg)))
#define ARM_FMDLR(p,freg,reg) \
ARM_EMIT((p), ARM_DEF_VFP_CPT(ARMCOND_AL,ARM_VFP_COPROC_DOUBLE,0,0,(freg),(reg)))
#define ARM_FMRDL(p,reg,freg) \
ARM_EMIT((p), ARM_DEF_VFP_CPT(ARMCOND_AL,ARM_VFP_COPROC_DOUBLE,0,1,(freg),(reg)))
#define ARM_FMDHR(p,freg,reg) \
ARM_EMIT((p), ARM_DEF_VFP_CPT(ARMCOND_AL,ARM_VFP_COPROC_DOUBLE,1,0,(freg),(reg)))
#define ARM_FMRDH(p,reg,freg) \
ARM_EMIT((p), ARM_DEF_VFP_CPT(ARMCOND_AL,ARM_VFP_COPROC_DOUBLE,1,1,(freg),(reg)))
#define ARM_FMXR(p,freg,reg) \
ARM_EMIT((p), ARM_DEF_VFP_CPT(ARMCOND_AL,ARM_VFP_COPROC_SINGLE,7,0,(freg),(reg)))
#define ARM_FMRX(p,reg,fcreg) \
ARM_EMIT((p), ARM_DEF_VFP_CPT(ARMCOND_AL,ARM_VFP_COPROC_SINGLE,7,1,(fcreg),(reg)))
#define ARM_FMSTAT(p) \
ARM_FMRX((p),ARMREG_R15,ARM_VFP_SCR)
#define ARM_DEF_MCRR(cond,cp,rn,rd,Fm,M) \
((Fm) << 0) | \
(1 << 4) | \
((M) << 5) | \
((cp) << 8) | \
((rd) << 12) | \
((rn) << 16) | \
((2) << 21) | \
(12 << 24) | \
ARM_DEF_COND(cond)
#define ARM_FMDRR(p,rd,rn,dm) \
ARM_EMIT((p), ARM_DEF_MCRR(ARMCOND_AL,ARM_VFP_COPROC_DOUBLE,(rn),(rd),(dm) >> 1, (dm) & 1))
#define ARM_DEF_FMRRD(cond,cp,rn,rd,Dm,D) \
((Dm) << 0) | \
(1 << 4) | \
((cp) << 8) | \
((rd) << 12) | \
((rn) << 16) | \
((0xc5) << 20) | \
ARM_DEF_COND(cond)
#define ARM_FMRRD(p,rd,rn,dm) \
ARM_EMIT((p), ARM_DEF_FMRRD(ARMCOND_AL,ARM_VFP_COPROC_DOUBLE,(rn),(rd),(dm) >> 1, (dm) & 1))
#define ARM_DEF_FUITOS(cond,Dd,D,Fm,M) ((cond) << 28) | ((0x1d) << 23) | ((D) << 22) | ((0x3) << 20) | ((8) << 16) | ((Dd) << 12) | ((0xa) << 8) | ((1) << 6) | ((M) << 5) | ((Fm) << 0)
#define ARM_FUITOS(p,dreg,sreg) \
ARM_EMIT((p), ARM_DEF_FUITOS (ARMCOND_AL, (dreg) >> 1, (dreg) & 1, (sreg) >> 1, (sreg) & 1))
#define ARM_DEF_FUITOD(cond,Dd,D,Fm,M) ((cond) << 28) | ((0x1d) << 23) | ((D) << 22) | ((0x3) << 20) | ((8) << 16) | ((Dd) << 12) | ((0xb) << 8) | ((1) << 6) | ((M) << 5) | ((Fm) << 0)
#define ARM_FUITOD(p,dreg,sreg) \
ARM_EMIT((p), ARM_DEF_FUITOD (ARMCOND_AL, (dreg) >> 1, (dreg) & 1, (sreg) >> 1, (sreg) & 1))
#define ARM_DEF_FSITOS(cond,Dd,D,Fm,M) ((cond) << 28) | ((0x1d) << 23) | ((D) << 22) | ((0x3) << 20) | ((8) << 16) | ((Dd) << 12) | ((0xa) << 8) | ((1) << 7) | ((1) << 6) | ((M) << 5) | ((Fm) << 0)
#define ARM_FSITOS(p,dreg,sreg) \
ARM_EMIT((p), ARM_DEF_FSITOS (ARMCOND_AL, (dreg) >> 1, (dreg) & 1, (sreg) >> 1, (sreg) & 1))
#define ARM_DEF_FSITOD(cond,Dd,D,Fm,M) ((cond) << 28) | ((0x1d) << 23) | ((D) << 22) | ((0x3) << 20) | ((8) << 16) | ((Dd) << 12) | ((0xb) << 8) | ((1) << 7) | ((1) << 6) | ((M) << 5) | ((Fm) << 0)
#define ARM_FSITOD(p,dreg,sreg) \
ARM_EMIT((p), ARM_DEF_FSITOD (ARMCOND_AL, (dreg) >> 1, (dreg) & 1, (sreg) >> 1, (sreg) & 1))
#endif /* __MONO_ARM_VFP_CODEGEN_H__ */

@ -0,0 +1,177 @@
/*
* ARM CodeGen
* XScale WirelessMMX extensions
* Copyright 2002 Wild West Software
*/
#ifndef __WMMX_H__
#define __WMMX_H__ 1
#if 0
#include <arm-codegen.h>
#endif
#if defined(ARM_IASM)
# define WM_ASM(_expr) ARM_IASM(_expr)
#else
# define WM_ASM(_expr) __emit (_expr)
#endif
#if defined(ARM_EMIT)
# define WM_EMIT(p, i) ARM_EMIT(p, i)
#else
# define WM_EMIT(p, i)
#endif
enum {
WM_CC_EQ = 0x0,
WM_CC_NE = 0x1,
WM_CC_CS = 0x2,
WM_CC_HS = WM_CC_CS,
WM_CC_CC = 0x3,
WM_CC_LO = WM_CC_CC,
WM_CC_MI = 0x4,
WM_CC_PL = 0x5,
WM_CC_VS = 0x6,
WM_CC_VC = 0x7,
WM_CC_HI = 0x8,
WM_CC_LS = 0x9,
WM_CC_GE = 0xA,
WM_CC_LT = 0xB,
WM_CC_GT = 0xC,
WM_CC_LE = 0xD,
WM_CC_AL = 0xE,
WM_CC_NV = 0xF,
WM_CC_SHIFT = 28
};
#if defined(ARM_DEF_COND)
# define WM_DEF_CC(_cc) ARM_DEF_COND(_cc)
#else
# define WM_DEF_CC(_cc) ((_cc & 0xF) << WM_CC_SHIFT)
#endif
enum {
WM_R0 = 0x0,
WM_R1 = 0x1,
WM_R2 = 0x2,
WM_R3 = 0x3,
WM_R4 = 0x4,
WM_R5 = 0x5,
WM_R6 = 0x6,
WM_R7 = 0x7,
WM_R8 = 0x8,
WM_R9 = 0x9,
WM_R10 = 0xA,
WM_R11 = 0xB,
WM_R12 = 0xC,
WM_R13 = 0xD,
WM_R14 = 0xE,
WM_R15 = 0xF,
WM_wR0 = 0x0,
WM_wR1 = 0x1,
WM_wR2 = 0x2,
WM_wR3 = 0x3,
WM_wR4 = 0x4,
WM_wR5 = 0x5,
WM_wR6 = 0x6,
WM_wR7 = 0x7,
WM_wR8 = 0x8,
WM_wR9 = 0x9,
WM_wR10 = 0xA,
WM_wR11 = 0xB,
WM_wR12 = 0xC,
WM_wR13 = 0xD,
WM_wR14 = 0xE,
WM_wR15 = 0xF
};
/*
* Qualifiers:
* H - 16-bit (HalfWord) SIMD
* W - 32-bit (Word) SIMD
* D - 64-bit (Double)
*/
enum {
WM_B = 0,
WM_H = 1,
WM_D = 2
};
/*
* B.2.3 Transfers From Coprocessor Register (MRC)
* Table B-5
*/
enum {
WM_TMRC_OP2 = 0,
WM_TMRC_CPNUM = 1,
WM_TMOVMSK_OP2 = 1,
WM_TMOVMSK_CPNUM = 0,
WM_TANDC_OP2 = 1,
WM_TANDC_CPNUM = 1,
WM_TORC_OP2 = 2,
WM_TORC_CPNUM = 1,
WM_TEXTRC_OP2 = 3,
WM_TEXTRC_CPNUM = 1,
WM_TEXTRM_OP2 = 3,
WM_TEXTRM_CPNUM = 0
};
/*
* TANDC<B,H,W>{Cond} R15
* Performs AND across the fields of the SIMD PSR register (wCASF) and sends the result
* to CPSR; can be performed after a Byte, Half-word or Word operation that sets the flags.
* NOTE: R15 is omitted from the macro declaration;
*/
#define DEF_WM_TNADC_CC(_q, _cc) WM_DEF_CC((_cc)) + ((_q) << 0x16) + 0xE13F130
#define _WM_TNADC_CC(_q, _cc) WM_ASM(DEF_WM_TNADC_CC(_q, _cc))
#define ARM_WM_TNADC_CC(_p, _q, _cc) WM_EMIT(_p, DEF_WM_TNADC_CC(_q, _cc))
/* inline assembly */
#define _WM_TNADC(_q) _WM_TNADC_CC((_q), WM_CC_AL)
#define _WM_TNADCB() _WM_TNADC(WM_B)
#define _WM_TNADCH() _WM_TNADC(WM_H)
#define _WM_TNADCD() _WM_TNADC(WM_D)
/* codegen */
#define ARM_WM_TNADC(_p, _q) ARM_WM_TNADC_CC((_p), (_q), WM_CC_AL)
#define ARM_WM_TNADCB(_p) ARM_WM_TNADC(_p, WM_B)
#define ARM_WM_TNADCH(_p) ARM_WM_TNADC(_p, WM_H)
#define ARM_WM_TNADCD(_p) ARM_WM_TNADC(_p, WM_D)
/*
* TBCST<B,H,W>{Cond} wRd, Rn
* Broadcasts a value from the ARM Source reg (Rn) to every SIMD position
* in the WMMX Destination reg (wRd).
*/
#define DEF_WM_TBCST_CC(_q, _cc, _wrd, _rn) \
WM_DEF_CC((_cc)) + ((_q) << 6) + ((_wrd) << 16) + ((_rn) << 12) + 0xE200010
#define _WM_TBCST_CC(_q, _cc, _wrd, _rn) WM_ASM(DEF_WM_TBCST_CC(_q, _cc, _wrd, _rn))
#define ARM_WM_TBCST_CC(_p, _q, _cc, _wrd, _rn) WM_EMIT(_p, DEF_WM_TBCST_CC(_q, _cc, _wrd, _rn))
/* inline */
#define _WM_TBCST(_q, _wrd, _rn) _WM_TBCST_CC(_q, WM_CC_AL, _wrd, _rn)
#define _WM_TBCSTB(_wrd, _rn) _WM_TBCST(WM_B)
#define _WM_TBCSTH(_wrd, _rn) _WM_TBCST(WM_H)
#define _WM_TBCSTD(_wrd, _rn) _WM_TBCST(WM_D)
/* codegen */
#define ARM_WM_TBCST(_p, _q, _wrd, _rn) ARM_WM_TBCST_CC(_p, _q, WM_CC_AL, _wrd, _rn)
#define ARM_WM_TBCSTB(_p, _wrd, _rn) _WM_TBCST(_p, WM_B)
#define ARM_WM_TBCSTH(_p, _wrd, _rn) _WM_TBCST(_p, WM_H)
#define ARM_WM_TBCSTD(_p, _wrd, _rn) _WM_TBCST(_p, WM_D)
#endif /* __WMMX_H__ */

@ -0,0 +1,56 @@
/* PSR := <Op> Rn, (imm8 ROR 2*rot) */
#define ARM_<Op>_REG_IMM_COND(p, rn, imm8, rot, cond) \
ARM_DPIOP_S_REG_IMM8ROT_COND(p, ARMOP_<Op>, 0, rn, imm8, rot, cond)
#define ARM_<Op>_REG_IMM(p, rn, imm8, rot) \
ARM_<Op>_REG_IMM_COND(p, rn, imm8, rot, ARMCOND_AL)
#ifndef ARM_NOIASM
#define _<Op>_REG_IMM_COND(rn, imm8, rot, cond) \
ARM_IASM_DPIOP_S_REG_IMM8ROT_COND(ARMOP_<Op>, 0, rn, imm8, rot, cond)
#define _<Op>_REG_IMM(rn, imm8, rot) \
_<Op>_REG_IMM_COND(rn, imm8, rot, ARMCOND_AL)
#endif
/* PSR := <Op> Rn, imm8 */
#define ARM_<Op>_REG_IMM8_COND(p, rn, imm8, cond) \
ARM_<Op>_REG_IMM_COND(p, rn, imm8, 0, cond)
#define ARM_<Op>_REG_IMM8(p, rn, imm8) \
ARM_<Op>_REG_IMM8_COND(p, rn, imm8, ARMCOND_AL)
#ifndef ARM_NOIASM
#define _<Op>_REG_IMM8_COND(rn, imm8, cond) \
_<Op>_REG_IMM_COND(rn, imm8, 0, cond)
#define _<Op>_REG_IMM8(rn, imm8) \
_<Op>_REG_IMM8_COND(rn, imm8, ARMCOND_AL)
#endif
/* PSR := <Op> Rn, Rm */
#define ARM_<Op>_REG_REG_COND(p, rn, rm, cond) \
ARM_DPIOP_S_REG_REG_COND(p, ARMOP_<Op>, 0, rn, rm, cond)
#define ARM_<Op>_REG_REG(p, rn, rm) \
ARM_<Op>_REG_REG_COND(p, rn, rm, ARMCOND_AL)
#ifndef ARM_NOIASM
#define _<Op>_REG_REG_COND(rn, rm, cond) \
ARM_IASM_DPIOP_S_REG_REG_COND(ARMOP_<Op>, 0, rn, rm, cond)
#define _<Op>_REG_REG(rn, rm) \
_<Op>_REG_REG_COND(rn, rm, ARMCOND_AL)
#endif
/* PSR := <Op> Rn, (Rm <shift_type> imm8) */
#define ARM_<Op>_REG_IMMSHIFT_COND(p, rn, rm, shift_type, imm_shift, cond) \
ARM_DPIOP_S_REG_IMMSHIFT_COND(p, ARMOP_<Op>, 0, rn, rm, shift_type, imm_shift, cond)
#define ARM_<Op>_REG_IMMSHIFT(p, rn, rm, shift_type, imm_shift) \
ARM_<Op>_REG_IMMSHIFT_COND(p, rn, rm, shift_type, imm_shift, ARMCOND_AL)
#ifndef ARM_NOIASM
#define _<Op>_REG_IMMSHIFT_COND(rn, rm, shift_type, imm_shift, cond) \
ARM_IASM_DPIOP_S_REG_IMMSHIFT_COND(ARMOP_<Op>, 0, rn, rm, shift_type, imm_shift, cond)
#define _<Op>_REG_IMMSHIFT(rn, rm, shift_type, imm_shift) \
_<Op>_REG_IMMSHIFT_COND(rn, rm, shift_type, imm_shift, ARMCOND_AL)
#endif

@ -0,0 +1,112 @@
/* -- <Op> -- */
/* Rd := Rn <Op> (imm8 ROR rot) ; rot is power of 2 */
#define ARM_<Op>_REG_IMM_COND(p, rd, rn, imm8, rot, cond) \
ARM_DPIOP_REG_IMM8ROT_COND(p, ARMOP_<Op>, rd, rn, imm8, rot, cond)
#define ARM_<Op>_REG_IMM(p, rd, rn, imm8, rot) \
ARM_<Op>_REG_IMM_COND(p, rd, rn, imm8, rot, ARMCOND_AL)
#define ARM_<Op>S_REG_IMM_COND(p, rd, rn, imm8, rot, cond) \
ARM_DPIOP_S_REG_IMM8ROT_COND(p, ARMOP_<Op>, rd, rn, imm8, rot, cond)
#define ARM_<Op>S_REG_IMM(p, rd, rn, imm8, rot) \
ARM_<Op>S_REG_IMM_COND(p, rd, rn, imm8, rot, ARMCOND_AL)
#ifndef ARM_NOIASM
#define _<Op>_REG_IMM_COND(rd, rn, imm8, rot, cond) \
ARM_IASM_DPIOP_REG_IMM8ROT_COND(ARMOP_<Op>, rd, rn, imm8, rot, cond)
#define _<Op>_REG_IMM(rd, rn, imm8, rot) \
_<Op>_REG_IMM_COND(rd, rn, imm8, rot, ARMCOND_AL)
#define _<Op>S_REG_IMM_COND(rd, rn, imm8, rot, cond) \
ARM_IASM_DPIOP_S_REG_IMM8ROT_COND(ARMOP_<Op>, rd, rn, imm8, rot, cond)
#define _<Op>S_REG_IMM(rd, rn, imm8, rot) \
_<Op>S_REG_IMM_COND(rd, rn, imm8, rot, ARMCOND_AL)
#endif
/* Rd := Rn <Op> imm8 */
#define ARM_<Op>_REG_IMM8_COND(p, rd, rn, imm8, cond) \
ARM_<Op>_REG_IMM_COND(p, rd, rn, imm8, 0, cond)
#define ARM_<Op>_REG_IMM8(p, rd, rn, imm8) \
ARM_<Op>_REG_IMM8_COND(p, rd, rn, imm8, ARMCOND_AL)
#define ARM_<Op>S_REG_IMM8_COND(p, rd, rn, imm8, cond) \
ARM_<Op>S_REG_IMM_COND(p, rd, rn, imm8, 0, cond)
#define ARM_<Op>S_REG_IMM8(p, rd, rn, imm8) \
ARM_<Op>S_REG_IMM8_COND(p, rd, rn, imm8, ARMCOND_AL)
#ifndef ARM_NOIASM
#define _<Op>_REG_IMM8_COND(rd, rn, imm8, cond) \
_<Op>_REG_IMM_COND(rd, rn, imm8, 0, cond)
#define _<Op>_REG_IMM8(rd, rn, imm8) \
_<Op>_REG_IMM8_COND(rd, rn, imm8, ARMCOND_AL)
#define _<Op>S_REG_IMM8_COND(rd, rn, imm8, cond) \
_<Op>S_REG_IMM_COND(rd, rn, imm8, 0, cond)
#define _<Op>S_REG_IMM8(rd, rn, imm8) \
_<Op>S_REG_IMM8_COND(rd, rn, imm8, ARMCOND_AL)
#endif
/* Rd := Rn <Op> Rm */
#define ARM_<Op>_REG_REG_COND(p, rd, rn, rm, cond) \
ARM_DPIOP_REG_REG_COND(p, ARMOP_<Op>, rd, rn, rm, cond)
#define ARM_<Op>_REG_REG(p, rd, rn, rm) \
ARM_<Op>_REG_REG_COND(p, rd, rn, rm, ARMCOND_AL)
#define ARM_<Op>S_REG_REG_COND(p, rd, rn, rm, cond) \
ARM_DPIOP_S_REG_REG_COND(p, ARMOP_<Op>, rd, rn, rm, cond)
#define ARM_<Op>S_REG_REG(p, rd, rn, rm) \
ARM_<Op>S_REG_REG_COND(p, rd, rn, rm, ARMCOND_AL)
#ifndef ARM_NOIASM
#define _<Op>_REG_REG_COND(rd, rn, rm, cond) \
ARM_IASM_DPIOP_REG_REG_COND(ARMOP_<Op>, rd, rn, rm, cond)
#define _<Op>_REG_REG(rd, rn, rm) \
_<Op>_REG_REG_COND(rd, rn, rm, ARMCOND_AL)
#define _<Op>S_REG_REG_COND(rd, rn, rm, cond) \
ARM_IASM_DPIOP_S_REG_REG_COND(ARMOP_<Op>, rd, rn, rm, cond)
#define _<Op>S_REG_REG(rd, rn, rm) \
_<Op>S_REG_REG_COND(rd, rn, rm, ARMCOND_AL)
#endif
/* Rd := Rn <Op> (Rm <shift_type> imm_shift) */
#define ARM_<Op>_REG_IMMSHIFT_COND(p, rd, rn, rm, shift_type, imm_shift, cond) \
ARM_DPIOP_REG_IMMSHIFT_COND(p, ARMOP_<Op>, rd, rn, rm, shift_type, imm_shift, cond)
#define ARM_<Op>_REG_IMMSHIFT(p, rd, rn, rm, shift_type, imm_shift) \
ARM_<Op>_REG_IMMSHIFT_COND(p, rd, rn, rm, shift_type, imm_shift, ARMCOND_AL)
#define ARM_<Op>S_REG_IMMSHIFT_COND(p, rd, rn, rm, shift_type, imm_shift, cond) \
ARM_DPIOP_S_REG_IMMSHIFT_COND(p, ARMOP_<Op>, rd, rn, rm, shift_type, imm_shift, cond)
#define ARM_<Op>S_REG_IMMSHIFT(p, rd, rn, rm, shift_type, imm_shift) \
ARM_<Op>S_REG_IMMSHIFT_COND(p, rd, rn, rm, shift_type, imm_shift, ARMCOND_AL)
#ifndef ARM_NOIASM
#define _<Op>_REG_IMMSHIFT_COND(rd, rn, rm, shift_type, imm_shift, cond) \
ARM_IASM_DPIOP_REG_IMMSHIFT_COND(ARMOP_<Op>, rd, rn, rm, shift_type, imm_shift, cond)
#define _<Op>_REG_IMMSHIFT(rd, rn, rm, shift_type, imm_shift) \
_<Op>_REG_IMMSHIFT_COND(rd, rn, rm, shift_type, imm_shift, ARMCOND_AL)
#define _<Op>S_REG_IMMSHIFT_COND(rd, rn, rm, shift_type, imm_shift, cond) \
ARM_IASM_DPIOP_S_REG_IMMSHIFT_COND(ARMOP_<Op>, rd, rn, rm, shift_type, imm_shift, cond)
#define _<Op>S_REG_IMMSHIFT(rd, rn, rm, shift_type, imm_shift) \
_<Op>S_REG_IMMSHIFT_COND(rd, rn, rm, shift_type, imm_shift, ARMCOND_AL)
#endif
/* Rd := Rn <Op> (Rm <shift_type> Rs) */
#define ARM_<Op>_REG_REGSHIFT_COND(p, rd, rn, rm, shift_type, rs, cond) \
ARM_DPIOP_REG_REGSHIFT_COND(p, ARMOP_<Op>, rd, rn, rm, shift_t, rs, cond)
#define ARM_<Op>_REG_REGSHIFT(p, rd, rn, rm, shift_type, rs) \
ARM_<Op>_REG_REGSHIFT_COND(p, rd, rn, rm, shift_type, rs, ARMCOND_AL)
#define ARM_<Op>S_REG_REGSHIFT_COND(p, rd, rn, rm, shift_type, rs, cond) \
ARM_DPIOP_S_REG_REGSHIFT_COND(p, ARMOP_<Op>, rd, rn, rm, shift_t, rs, cond)
#define ARM_<Op>S_REG_REGSHIFT(p, rd, rn, rm, shift_type, rs) \
ARM_<Op>S_REG_REGSHIFT_COND(p, rd, rn, rm, shift_type, rs, ARMCOND_AL)
#ifndef ARM_NOIASM
#define _<Op>_REG_REGSHIFT_COND(rd, rn, rm, shift_type, rs, cond) \
ARM_IASM_DPIOP_REG_REGSHIFT_COND(ARMOP_<Op>, rd, rn, rm, shift_t, rs, cond)
#define _<Op>_REG_REGSHIFT(rd, rn, rm, shift_type, rs) \
_<Op>_REG_REGSHIFT_COND(rd, rn, rm, shift_type, rs, ARMCOND_AL)
#define _<Op>S_REG_REGSHIFT_COND(rd, rn, rm, shift_type, rs, cond) \
ARM_IASM_DPIOP_S_REG_REGSHIFT_COND(ARMOP_<Op>, rd, rn, rm, shift_t, rs, cond)
#define _<Op>S_REG_REGSHIFT(rd, rn, rm, shift_type, rs) \
_<Op>S_REG_REGSHIFT_COND(rd, rn, rm, shift_type, rs, ARMCOND_AL)
#endif

@ -0,0 +1,30 @@
#!/bin/sh
OPCODES="AND EOR SUB RSB ADD ADC SBC RSC ORR BIC"
CMP_OPCODES="TST TEQ CMP CMN"
MOV_OPCODES="MOV MVN"
# $1: opcode list
# $2: template
gen() {
for i in $1; do
sed "s/<Op>/$i/g" $2.th
done
}
echo -e "/* Macros for DPI ops, auto-generated from template */\n"
echo -e "\n/* mov/mvn */\n"
gen "$MOV_OPCODES" mov_macros
echo -e "\n/* DPIs, arithmetic and logical */\n"
gen "$OPCODES" dpi_macros
echo -e "\n\n"
echo -e "\n/* DPIs, comparison */\n"
gen "$CMP_OPCODES" cmp_macros
echo -e "\n/* end generated */\n"

@ -0,0 +1,121 @@
/* Rd := imm8 ROR rot */
#define ARM_<Op>_REG_IMM_COND(p, reg, imm8, rot, cond) \
ARM_DPIOP_REG_IMM8ROT_COND(p, ARMOP_<Op>, reg, 0, imm8, rot, cond)
#define ARM_<Op>_REG_IMM(p, reg, imm8, rot) \
ARM_<Op>_REG_IMM_COND(p, reg, imm8, rot, ARMCOND_AL)
/* S */
#define ARM_<Op>S_REG_IMM_COND(p, reg, imm8, rot, cond) \
ARM_DPIOP_S_REG_IMM8ROT_COND(p, ARMOP_<Op>, reg, 0, imm8, rot, cond)
#define ARM_<Op>S_REG_IMM(p, reg, imm8, rot) \
ARM_<Op>S_REG_IMM_COND(p, reg, imm8, rot, ARMCOND_AL)
#ifndef ARM_NOIASM
#define _<Op>_REG_IMM_COND(reg, imm8, rot, cond) \
ARM_IASM_DPIOP_REG_IMM8ROT_COND(ARMOP_<Op>, reg, 0, imm8, rot, cond)
#define _<Op>_REG_IMM(reg, imm8, rot) \
_<Op>_REG_IMM_COND(reg, imm8, rot, ARMCOND_AL)
/* S */
#define _<Op>S_REG_IMM_COND(reg, imm8, rot, cond) \
ARM_IASM_DPIOP_S_REG_IMM8ROT_COND(ARMOP_<Op>, reg, 0, imm8, rot, cond)
#define _<Op>S_REG_IMM(reg, imm8, rot) \
_<Op>S_REG_IMM_COND(reg, imm8, rot, ARMCOND_AL)
#endif
/* Rd := imm8 */
#define ARM_<Op>_REG_IMM8_COND(p, reg, imm8, cond) \
ARM_DPIOP_REG_IMM8ROT_COND(p, ARMOP_<Op>, reg, 0, imm8, 0, cond)
#define ARM_<Op>_REG_IMM8(p, reg, imm8) \
ARM_<Op>_REG_IMM8_COND(p, reg, imm8, ARMCOND_AL)
/* S */
#define ARM_<Op>S_REG_IMM8_COND(p, reg, imm8, cond) \
ARM_DPIOP_S_REG_IMM8ROT_COND(p, ARMOP_<Op>, reg, 0, imm8, 0, cond)
#define ARM_<Op>S_REG_IMM8(p, reg, imm8) \
ARM_<Op>S_REG_IMM8_COND(p, reg, imm8, ARMCOND_AL)
#ifndef ARM_NOIASM
#define _<Op>_REG_IMM8_COND(reg, imm8, cond) \
ARM_IASM_DPIOP_REG_IMM8ROT_COND(ARMOP_<Op>, reg, 0, imm8, 0, cond)
#define _<Op>_REG_IMM8(reg, imm8) \
_<Op>_REG_IMM8_COND(reg, imm8, ARMCOND_AL)
/* S */
#define _<Op>S_REG_IMM8_COND(reg, imm8, cond) \
ARM_IASM_DPIOP_S_REG_IMM8ROT_COND(ARMOP_<Op>, reg, 0, imm8, 0, cond)
#define _<Op>S_REG_IMM8(reg, imm8) \
_<Op>S_REG_IMM8_COND(reg, imm8, ARMCOND_AL)
#endif
/* Rd := Rm */
#define ARM_<Op>_REG_REG_COND(p, rd, rm, cond) \
ARM_DPIOP_REG_REG_COND(p, ARMOP_<Op>, rd, 0, rm, cond)
#define ARM_<Op>_REG_REG(p, rd, rm) \
ARM_<Op>_REG_REG_COND(p, rd, rm, ARMCOND_AL)
/* S */
#define ARM_<Op>S_REG_REG_COND(p, rd, rm, cond) \
ARM_DPIOP_S_REG_REG_COND(p, ARMOP_<Op>, rd, 0, rm, cond)
#define ARM_<Op>S_REG_REG(p, rd, rm) \
ARM_<Op>S_REG_REG_COND(p, rd, rm, ARMCOND_AL)
#ifndef ARM_NOIASM
#define _<Op>_REG_REG_COND(rd, rm, cond) \
ARM_IASM_DPIOP_REG_REG_COND(ARMOP_<Op>, rd, 0, rm, cond)
#define _<Op>_REG_REG(rd, rm) \
_<Op>_REG_REG_COND(rd, rm, ARMCOND_AL)
/* S */
#define _<Op>S_REG_REG_COND(rd, rm, cond) \
ARM_IASM_DPIOP_S_REG_REG_COND(ARMOP_<Op>, rd, 0, rm, cond)
#define _<Op>S_REG_REG(rd, rm) \
_<Op>S_REG_REG_COND(rd, rm, ARMCOND_AL)
#endif
/* Rd := Rm <shift_type> imm_shift */
#define ARM_<Op>_REG_IMMSHIFT_COND(p, rd, rm, shift_type, imm_shift, cond) \
ARM_DPIOP_REG_IMMSHIFT_COND(p, ARMOP_<Op>, rd, 0, rm, shift_type, imm_shift, cond)
#define ARM_<Op>_REG_IMMSHIFT(p, rd, rm, shift_type, imm_shift) \
ARM_<Op>_REG_IMMSHIFT_COND(p, rd, rm, shift_type, imm_shift, ARMCOND_AL)
/* S */
#define ARM_<Op>S_REG_IMMSHIFT_COND(p, rd, rm, shift_type, imm_shift, cond) \
ARM_DPIOP_S_REG_IMMSHIFT_COND(p, ARMOP_<Op>, rd, 0, rm, shift_type, imm_shift, cond)
#define ARM_<Op>S_REG_IMMSHIFT(p, rd, rm, shift_type, imm_shift) \
ARM_<Op>S_REG_IMMSHIFT_COND(p, rd, rm, shift_type, imm_shift, ARMCOND_AL)
#ifndef ARM_NOIASM
#define _<Op>_REG_IMMSHIFT_COND(rd, rm, shift_type, imm_shift, cond) \
ARM_IASM_DPIOP_REG_IMMSHIFT_COND(ARMOP_<Op>, rd, 0, rm, shift_type, imm_shift, cond)
#define _<Op>_REG_IMMSHIFT(rd, rm, shift_type, imm_shift) \
_<Op>_REG_IMMSHIFT_COND(rd, rm, shift_type, imm_shift, ARMCOND_AL)
/* S */
#define _<Op>S_REG_IMMSHIFT_COND(rd, rm, shift_type, imm_shift, cond) \
ARM_IASM_DPIOP_S_REG_IMMSHIFT_COND(ARMOP_<Op>, rd, 0, rm, shift_type, imm_shift, cond)
#define _<Op>S_REG_IMMSHIFT(rd, rm, shift_type, imm_shift) \
_<Op>S_REG_IMMSHIFT_COND(rd, rm, shift_type, imm_shift, ARMCOND_AL)
#endif
/* Rd := (Rm <shift_type> Rs) */
#define ARM_<Op>_REG_REGSHIFT_COND(p, rd, rm, shift_type, rs, cond) \
ARM_DPIOP_REG_REGSHIFT_COND(p, ARMOP_<Op>, rd, 0, rm, shift_type, rs, cond)
#define ARM_<Op>_REG_REGSHIFT(p, rd, rm, shift_type, rs) \
ARM_<Op>_REG_REGSHIFT_COND(p, rd, rm, shift_type, rs, ARMCOND_AL)
/* S */
#define ARM_<Op>S_REG_REGSHIFT_COND(p, rd, rm, shift_type, rs, cond) \
ARM_DPIOP_S_REG_REGSHIFT_COND(p, ARMOP_<Op>, rd, 0, rm, shift_type, rs, cond)
#define ARM_<Op>S_REG_REGSHIFT(p, rd, rm, shift_type, rs) \
ARM_<Op>S_REG_REGSHIFT_COND(p, rd, rm, shift_type, rs, ARMCOND_AL)
#ifndef ARM_NOIASM
#define _<Op>_REG_REGSHIFT_COND(rd, rm, shift_type, rs, cond) \
ARM_IASM_DPIOP_REG_REGSHIFT_COND(ARMOP_<Op>, rd, 0, rm, shift_type, rs, cond)
#define _<Op>_REG_REGSHIFT(rd, rm, shift_type, rs) \
_<Op>_REG_REGSHIFT_COND(rd, rm, shift_type, rs, ARMCOND_AL)
/* S */
#define _<Op>S_REG_REGSHIFT_COND(rd, rm, shift_type, rs, cond) \
ARM_IASM_DPIOP_S_REG_REGSHIFT_COND(ARMOP_<Op>, rd, 0, rm, shift_type, rs, cond)
#define _<Op>S_REG_REGSHIFT(rd, rm, shift_type, rs) \
_<Op>S_REG_REGSHIFT_COND(rd, rm, shift_type, rs, ARMCOND_AL)
#endif

@ -0,0 +1,710 @@
/*
* Create trampolines to invoke arbitrary functions.
* Copyright (c) 2002 Sergey Chaban <serge@wildwestsoftware.com>
*
* Contributions by Malte Hildingson
*/
#include "arm-codegen.h"
#include "arm-dis.h"
#if defined(_WIN32_WCE) || defined (UNDER_CE)
# include <windows.h>
#else
#include <unistd.h>
#include <sys/mman.h>
#endif
#if !defined(PLATFORM_MACOSX)
#include <errno.h>
#include "mono/metadata/class.h"
#include "mono/metadata/tabledefs.h"
#include "mono/interpreter/interp.h"
#include "mono/metadata/appdomain.h"
#if 0
# define ARM_DUMP_DISASM 1
#endif
/* prototypes for private functions (to avoid compiler warnings) */
void flush_icache (void);
void* alloc_code_buff (int num_instr);
/*
* The resulting function takes the form:
* void func (void (*callme)(), void *retval, void *this_obj, stackval *arguments);
* NOTE: all args passed in ARM registers (A1-A4),
* then copied to R4-R7 (see definitions below).
*/
#define REG_FUNC_ADDR ARMREG_R4
#define REG_RETVAL ARMREG_R5
#define REG_THIS ARMREG_R6
#define REG_ARGP ARMREG_R7
#define ARG_SIZE sizeof(stackval)
void flush_icache ()
{
#if defined(_WIN32)
FlushInstructionCache(GetCurrentProcess(), NULL, 0);
#else
# if 0
asm ("mov r0, r0");
asm ("mov r0, #0");
asm ("mcr p15, 0, r0, c7, c7, 0");
# else
/* TODO: use (movnv pc, rx) method */
# endif
#endif
}
void* alloc_code_buff (int num_instr)
{
void* code_buff;
int code_size = num_instr * sizeof(arminstr_t);
#if defined(_WIN32) || defined(UNDER_CE)
int old_prot = 0;
code_buff = malloc(code_size);
VirtualProtect(code_buff, code_size, PAGE_EXECUTE_READWRITE, &old_prot);
#else
int page_size = sysconf(_SC_PAGESIZE);
int new_code_size;
new_code_size = code_size + page_size - 1;
code_buff = malloc(new_code_size);
code_buff = (void *) (((int) code_buff + page_size - 1) & ~(page_size - 1));
if (mprotect(code_buff, code_size, PROT_READ|PROT_WRITE|PROT_EXEC) != 0) {
g_critical (G_GNUC_PRETTY_FUNCTION
": mprotect error: %s", g_strerror (errno));
}
#endif
return code_buff;
}
/*
* Refer to ARM Procedure Call Standard (APCS) for more info.
*/
MonoPIFunc mono_arch_create_trampoline (MonoMethodSignature *sig, gboolean string_ctor)
{
MonoType* param;
MonoPIFunc code_buff;
arminstr_t* p;
guint32 code_size, stack_size;
guint32 simple_type;
int i, hasthis, aregs, regc, stack_offs;
int this_loaded;
guchar reg_alloc [ARM_NUM_ARG_REGS];
/* pessimistic estimation for prologue/epilogue size */
code_size = 16 + 16;
/* push/pop work regs */
code_size += 2;
/* call */
code_size += 2;
/* handle retval */
code_size += 2;
stack_size = 0;
hasthis = sig->hasthis ? 1 : 0;
aregs = ARM_NUM_ARG_REGS - hasthis;
for (i = 0, regc = aregs; i < sig->param_count; ++i) {
param = sig->params [i];
/* keep track of argument sizes */
if (i < ARM_NUM_ARG_REGS) reg_alloc [i] = 0;
if (param->byref) {
if (regc > 0) {
code_size += 1;
reg_alloc [i] = regc;
--regc;
} else {
code_size += 2;
stack_size += sizeof(gpointer);
}
} else {
simple_type = param->type;
enum_calc_size:
switch (simple_type) {
case MONO_TYPE_BOOLEAN:
case MONO_TYPE_CHAR:
case MONO_TYPE_I1:
case MONO_TYPE_U1:
case MONO_TYPE_I2:
case MONO_TYPE_U2:
case MONO_TYPE_I4:
case MONO_TYPE_U4:
case MONO_TYPE_I:
case MONO_TYPE_U:
case MONO_TYPE_PTR:
case MONO_TYPE_R4:
case MONO_TYPE_SZARRAY:
case MONO_TYPE_CLASS:
case MONO_TYPE_OBJECT:
case MONO_TYPE_STRING:
if (regc > 0) {
/* register arg */
code_size += 1;
reg_alloc [i] = regc;
--regc;
} else {
/* stack arg */
code_size += 2;
stack_size += 4;
}
break;
case MONO_TYPE_I8:
case MONO_TYPE_U8:
case MONO_TYPE_R8:
/* keep track of argument sizes */
if (regc > 1) {
/* fits into registers, two LDRs */
code_size += 2;
reg_alloc [i] = regc;
regc -= 2;
} else if (regc > 0) {
/* first half fits into register, one LDR */
code_size += 1;
reg_alloc [i] = regc;
--regc;
/* the rest on the stack, LDR/STR */
code_size += 2;
stack_size += 4;
} else {
/* stack arg, 4 instrs - 2x(LDR/STR) */
code_size += 4;
stack_size += 2 * 4;
}
break;
case MONO_TYPE_VALUETYPE:
if (param->data.klass->enumtype) {
simple_type = param->data.klass->enum_basetype->type;
goto enum_calc_size;
}
if (mono_class_value_size(param->data.klass, NULL) != 4) {
g_error("can only marshal enums, not generic structures (size: %d)", mono_class_value_size(param->data.klass, NULL));
}
if (regc > 0) {
/* register arg */
code_size += 1;
reg_alloc [i] = regc;
--regc;
} else {
/* stack arg */
code_size += 2;
stack_size += 4;
}
break;
default :
break;
}
}
}
code_buff = (MonoPIFunc)alloc_code_buff(code_size);
p = (arminstr_t*)code_buff;
/* prologue */
p = arm_emit_lean_prologue(p, stack_size,
/* save workset (r4-r7) */
(1 << ARMREG_R4) | (1 << ARMREG_R5) | (1 << ARMREG_R6) | (1 << ARMREG_R7));
/* copy args into workset */
/* callme - always present */
ARM_MOV_REG_REG(p, ARMREG_R4, ARMREG_A1);
/* retval */
if (sig->ret->byref || string_ctor || (sig->ret->type != MONO_TYPE_VOID)) {
ARM_MOV_REG_REG(p, ARMREG_R5, ARMREG_A2);
}
/* this_obj */
if (sig->hasthis) {
this_loaded = 0;
if (stack_size == 0) {
ARM_MOV_REG_REG(p, ARMREG_A1, ARMREG_A3);
this_loaded = 1;
} else {
ARM_MOV_REG_REG(p, ARMREG_R6, ARMREG_A3);
}
}
/* args */
if (sig->param_count != 0) {
ARM_MOV_REG_REG(p, ARMREG_R7, ARMREG_A4);
}
stack_offs = stack_size;
/* handle arguments */
/* in reverse order so we could use r0 (arg1) for memory transfers */
for (i = sig->param_count; --i >= 0;) {
param = sig->params [i];
if (param->byref) {
if (i < aregs && reg_alloc[i] > 0) {
ARM_LDR_IMM(p, ARMREG_A1 + i, REG_ARGP, i*ARG_SIZE);
} else {
stack_offs -= sizeof(armword_t);
ARM_LDR_IMM(p, ARMREG_R0, REG_ARGP, i*ARG_SIZE);
ARM_STR_IMM(p, ARMREG_R0, ARMREG_SP, stack_offs);
}
} else {
simple_type = param->type;
enum_marshal:
switch (simple_type) {
case MONO_TYPE_BOOLEAN:
case MONO_TYPE_CHAR:
case MONO_TYPE_I1:
case MONO_TYPE_U1:
case MONO_TYPE_I2:
case MONO_TYPE_U2:
case MONO_TYPE_I4:
case MONO_TYPE_U4:
case MONO_TYPE_I:
case MONO_TYPE_U:
case MONO_TYPE_PTR:
case MONO_TYPE_R4:
case MONO_TYPE_SZARRAY:
case MONO_TYPE_CLASS:
case MONO_TYPE_OBJECT:
case MONO_TYPE_STRING:
if (i < aregs && reg_alloc [i] > 0) {
/* pass in register */
ARM_LDR_IMM(p, ARMREG_A1 + hasthis + (aregs - reg_alloc [i]), REG_ARGP, i*ARG_SIZE);
} else {
stack_offs -= sizeof(armword_t);
ARM_LDR_IMM(p, ARMREG_R0, REG_ARGP, i*ARG_SIZE);
ARM_STR_IMM(p, ARMREG_R0, ARMREG_SP, stack_offs);
}
break;
case MONO_TYPE_I8:
case MONO_TYPE_U8:
case MONO_TYPE_R8:
if (i < aregs && reg_alloc [i] > 0) {
if (reg_alloc [i] > 1) {
/* pass in registers */
ARM_LDR_IMM(p, ARMREG_A1 + hasthis + (aregs - reg_alloc [i]), REG_ARGP, i*ARG_SIZE);
ARM_LDR_IMM(p, ARMREG_A1 + hasthis + (aregs - reg_alloc [i]) + 1, REG_ARGP, i*ARG_SIZE + 4);
} else {
stack_offs -= sizeof(armword_t);
ARM_LDR_IMM(p, ARMREG_R0, REG_ARGP, i*ARG_SIZE + 4);
ARM_STR_IMM(p, ARMREG_R0, ARMREG_SP, stack_offs);
ARM_LDR_IMM(p, ARMREG_A1 + hasthis + (aregs - reg_alloc [i]), REG_ARGP, i*ARG_SIZE);
}
} else {
/* two words transferred on the stack */
stack_offs -= 2*sizeof(armword_t);
ARM_LDR_IMM(p, ARMREG_R0, REG_ARGP, i*ARG_SIZE);
ARM_STR_IMM(p, ARMREG_R0, ARMREG_SP, stack_offs);
ARM_LDR_IMM(p, ARMREG_R0, REG_ARGP, i*ARG_SIZE + 4);
ARM_STR_IMM(p, ARMREG_R0, ARMREG_SP, stack_offs + 4);
}
break;
case MONO_TYPE_VALUETYPE:
if (param->data.klass->enumtype) {
/* it's an enum value, proceed based on its base type */
simple_type = param->data.klass->enum_basetype->type;
goto enum_marshal;
} else {
if (i < aregs && reg_alloc[i] > 0) {
int vtreg = ARMREG_A1 + hasthis +
hasthis + (aregs - reg_alloc[i]);
ARM_LDR_IMM(p, vtreg, REG_ARGP, i * ARG_SIZE);
ARM_LDR_IMM(p, vtreg, vtreg, 0);
} else {
stack_offs -= sizeof(armword_t);
ARM_LDR_IMM(p, ARMREG_R0, REG_ARGP, i * ARG_SIZE);
ARM_LDR_IMM(p, ARMREG_R0, ARMREG_R0, 0);
ARM_STR_IMM(p, ARMREG_R0, ARMREG_SP, stack_offs);
}
}
break;
default:
break;
}
}
}
if (sig->hasthis && !this_loaded) {
/* [this] always passed in A1, regardless of sig->call_convention */
ARM_MOV_REG_REG(p, ARMREG_A1, REG_THIS);
}
/* call [func] */
ARM_MOV_REG_REG(p, ARMREG_LR, ARMREG_PC);
ARM_MOV_REG_REG(p, ARMREG_PC, REG_FUNC_ADDR);
/* handle retval */
if (sig->ret->byref || string_ctor) {
ARM_STR_IMM(p, ARMREG_R0, REG_RETVAL, 0);
} else {
simple_type = sig->ret->type;
enum_retvalue:
switch (simple_type) {
case MONO_TYPE_BOOLEAN:
case MONO_TYPE_I1:
case MONO_TYPE_U1:
ARM_STRB_IMM(p, ARMREG_R0, REG_RETVAL, 0);
break;
case MONO_TYPE_CHAR:
case MONO_TYPE_I2:
case MONO_TYPE_U2:
ARM_STRH_IMM(p, ARMREG_R0, REG_RETVAL, 0);
break;
/*
* A 32-bit integer and integer-equivalent return value
* is returned in R0.
* Single-precision floating-point values are returned in R0.
*/
case MONO_TYPE_I:
case MONO_TYPE_U:
case MONO_TYPE_I4:
case MONO_TYPE_U4:
case MONO_TYPE_R4:
case MONO_TYPE_OBJECT:
case MONO_TYPE_CLASS:
case MONO_TYPE_ARRAY:
case MONO_TYPE_SZARRAY:
case MONO_TYPE_STRING:
ARM_STR_IMM(p, ARMREG_R0, REG_RETVAL, 0);
break;
/*
* A 64-bit integer is returned in R0 and R1.
* Double-precision floating-point values are returned in R0 and R1.
*/
case MONO_TYPE_I8:
case MONO_TYPE_U8:
case MONO_TYPE_R8:
ARM_STR_IMM(p, ARMREG_R0, REG_RETVAL, 0);
ARM_STR_IMM(p, ARMREG_R1, REG_RETVAL, 4);
break;
case MONO_TYPE_VALUETYPE:
if (sig->ret->data.klass->enumtype) {
simple_type = sig->ret->data.klass->enum_basetype->type;
goto enum_retvalue;
}
break;
case MONO_TYPE_VOID:
break;
default:
break;
}
}
p = arm_emit_std_epilogue(p, stack_size,
/* restore R4-R7 */
(1 << ARMREG_R4) | (1 << ARMREG_R5) | (1 << ARMREG_R6) | (1 << ARMREG_R7));
flush_icache();
#ifdef ARM_DUMP_DISASM
_armdis_decode((arminstr_t*)code_buff, ((guint8*)p) - ((guint8*)code_buff));
#endif
return code_buff;
}
#define MINV_OFFS(member) G_STRUCT_OFFSET(MonoInvocation, member)
/*
* Returns a pointer to a native function that can be used to
* call the specified method.
* The function created will receive the arguments according
* to the call convention specified in the method.
* This function works by creating a MonoInvocation structure,
* filling the fields in and calling ves_exec_method on it.
* Still need to figure out how to handle the exception stuff
* across the managed/unmanaged boundary.
*/
void* mono_arch_create_method_pointer (MonoMethod* method)
{
MonoMethodSignature* sig;
guchar* p, * p_method, * p_stackval_from_data, * p_exec;
void* code_buff;
int i, stack_size, arg_pos, arg_add, stackval_pos, offs;
int areg, reg_args, shift, pos;
MonoJitInfo *ji;
code_buff = alloc_code_buff(128);
p = (guchar*)code_buff;
sig = method->signature;
ARM_B(p, 3);
/* embed magic number followed by method pointer */
*p++ = 'M';
*p++ = 'o';
*p++ = 'n';
*p++ = 'o';
/* method ptr */
*(void**)p = method;
p_method = p;
p += 4;
/* call table */
*(void**)p = stackval_from_data;
p_stackval_from_data = p;
p += 4;
*(void**)p = ves_exec_method;
p_exec = p;
p += 4;
stack_size = sizeof(MonoInvocation) + ARG_SIZE*(sig->param_count + 1) + ARM_NUM_ARG_REGS*2*sizeof(armword_t);
/* prologue */
p = (guchar*)arm_emit_lean_prologue((arminstr_t*)p, stack_size,
(1 << ARMREG_R4) |
(1 << ARMREG_R5) |
(1 << ARMREG_R6) |
(1 << ARMREG_R7));
/* R7 - ptr to stack args */
ARM_MOV_REG_REG(p, ARMREG_R7, ARMREG_IP);
/*
* Initialize MonoInvocation fields, first the ones known now.
*/
ARM_MOV_REG_IMM8(p, ARMREG_R4, 0);
ARM_STR_IMM(p, ARMREG_R4, ARMREG_SP, MINV_OFFS(ex));
ARM_STR_IMM(p, ARMREG_R4, ARMREG_SP, MINV_OFFS(ex_handler));
ARM_STR_IMM(p, ARMREG_R4, ARMREG_SP, MINV_OFFS(parent));
/* Set the method pointer. */
ARM_LDR_IMM(p, ARMREG_R4, ARMREG_PC, -(int)(p - p_method + sizeof(arminstr_t)*2));
ARM_STR_IMM(p, ARMREG_R4, ARMREG_SP, MINV_OFFS(method));
if (sig->hasthis) {
/* [this] in A1 */
ARM_STR_IMM(p, ARMREG_A1, ARMREG_SP, MINV_OFFS(obj));
} else {
/* else set minv.obj to NULL */
ARM_STR_IMM(p, ARMREG_R4, ARMREG_SP, MINV_OFFS(obj));
}
/* copy args from registers to stack */
areg = ARMREG_A1 + sig->hasthis;
arg_pos = -(int)(ARM_NUM_ARG_REGS - sig->hasthis) * 2 * sizeof(armword_t);
arg_add = 0;
for (i = 0; i < sig->param_count; ++i) {
if (areg >= ARM_NUM_ARG_REGS) break;
ARM_STR_IMM(p, areg, ARMREG_R7, arg_pos);
++areg;
if (!sig->params[i]->byref) {
switch (sig->params[i]->type) {
case MONO_TYPE_I8:
case MONO_TYPE_U8:
case MONO_TYPE_R8:
if (areg >= ARM_NUM_ARG_REGS) {
/* load second half of 64-bit arg */
ARM_LDR_IMM(p, ARMREG_R4, ARMREG_R7, 0);
ARM_STR_IMM(p, ARMREG_R4, ARMREG_R7, arg_pos + sizeof(armword_t));
arg_add = sizeof(armword_t);
} else {
/* second half is already the register */
ARM_STR_IMM(p, areg, ARMREG_R7, arg_pos + sizeof(armword_t));
++areg;
}
break;
case MONO_TYPE_VALUETYPE:
/* assert */
default:
break;
}
}
arg_pos += 2 * sizeof(armword_t);
}
/* number of args passed in registers */
reg_args = i;
/*
* Calc and save stack args ptr,
* args follow MonoInvocation struct on the stack.
*/
ARM_ADD_REG_IMM8(p, ARMREG_R1, ARMREG_SP, sizeof(MonoInvocation));
ARM_STR_IMM(p, ARMREG_R1, ARMREG_SP, MINV_OFFS(stack_args));
/* convert method args to stackvals */
arg_pos = -(int)(ARM_NUM_ARG_REGS - sig->hasthis) * 2 * sizeof(armword_t);
stackval_pos = sizeof(MonoInvocation);
for (i = 0; i < sig->param_count; ++i) {
if (i < reg_args) {
ARM_SUB_REG_IMM8(p, ARMREG_A3, ARMREG_R7, -arg_pos);
arg_pos += 2 * sizeof(armword_t);
} else {
if (arg_pos < 0) arg_pos = 0;
pos = arg_pos + arg_add;
if (pos <= 0xFF) {
ARM_ADD_REG_IMM8(p, ARMREG_A3, ARMREG_R7, pos);
} else {
if (is_arm_const((armword_t)pos)) {
shift = calc_arm_mov_const_shift((armword_t)pos);
ARM_ADD_REG_IMM(p, ARMREG_A3, ARMREG_R7, pos >> ((32 - shift) & 31), shift >> 1);
} else {
p = (guchar*)arm_mov_reg_imm32((arminstr_t*)p, ARMREG_R6, (armword_t)pos);
ARM_ADD_REG_REG(p, ARMREG_A2, ARMREG_R7, ARMREG_R6);
}
}
arg_pos += sizeof(armword_t);
if (!sig->params[i]->byref) {
switch (sig->params[i]->type) {
case MONO_TYPE_I8:
case MONO_TYPE_U8:
case MONO_TYPE_R8:
arg_pos += sizeof(armword_t);
break;
case MONO_TYPE_VALUETYPE:
/* assert */
default:
break;
}
}
}
/* A2 = result */
if (stackval_pos <= 0xFF) {
ARM_ADD_REG_IMM8(p, ARMREG_A2, ARMREG_SP, stackval_pos);
} else {
if (is_arm_const((armword_t)stackval_pos)) {
shift = calc_arm_mov_const_shift((armword_t)stackval_pos);
ARM_ADD_REG_IMM(p, ARMREG_A2, ARMREG_SP, stackval_pos >> ((32 - shift) & 31), shift >> 1);
} else {
p = (guchar*)arm_mov_reg_imm32((arminstr_t*)p, ARMREG_R6, (armword_t)stackval_pos);
ARM_ADD_REG_REG(p, ARMREG_A2, ARMREG_SP, ARMREG_R6);
}
}
/* A1 = type */
p = (guchar*)arm_mov_reg_imm32((arminstr_t*)p, ARMREG_A1, (armword_t)sig->params [i]);
stackval_pos += ARG_SIZE;
offs = -(p + 2*sizeof(arminstr_t) - p_stackval_from_data);
/* load function address */
ARM_LDR_IMM(p, ARMREG_R4, ARMREG_PC, offs);
/* call stackval_from_data */
ARM_MOV_REG_REG(p, ARMREG_LR, ARMREG_PC);
ARM_MOV_REG_REG(p, ARMREG_PC, ARMREG_R4);
}
/* store retval ptr */
p = (guchar*)arm_mov_reg_imm32((arminstr_t*)p, ARMREG_R5, (armword_t)stackval_pos);
ARM_ADD_REG_REG(p, ARMREG_R5, ARMREG_SP, ARMREG_R4);
ARM_STR_IMM(p, ARMREG_R5, ARMREG_SP, MINV_OFFS(retval));
/*
* Call the method.
*/
/* A1 = MonoInvocation ptr */
ARM_MOV_REG_REG(p, ARMREG_A1, ARMREG_SP);
offs = -(p + 2*sizeof(arminstr_t) - p_exec);
/* load function address */
ARM_LDR_IMM(p, ARMREG_R4, ARMREG_PC, offs);
/* call ves_exec */
ARM_MOV_REG_REG(p, ARMREG_LR, ARMREG_PC);
ARM_MOV_REG_REG(p, ARMREG_PC, ARMREG_R4);
/*
* Move retval into reg.
*/
if (sig->ret->byref) {
ARM_LDR_IMM(p, ARMREG_R0, ARMREG_R5, 0);
} else {
switch (sig->ret->type) {
case MONO_TYPE_BOOLEAN:
case MONO_TYPE_I1:
case MONO_TYPE_U1:
ARM_LDRB_IMM(p, ARMREG_R0, ARMREG_R5, 0);
break;
case MONO_TYPE_CHAR:
case MONO_TYPE_I2:
case MONO_TYPE_U2:
ARM_LDRH_IMM(p, ARMREG_R0, ARMREG_R5, 0);
break;
case MONO_TYPE_I:
case MONO_TYPE_U:
case MONO_TYPE_I4:
case MONO_TYPE_U4:
case MONO_TYPE_R4:
case MONO_TYPE_OBJECT:
case MONO_TYPE_CLASS:
case MONO_TYPE_ARRAY:
case MONO_TYPE_SZARRAY:
ARM_LDR_IMM(p, ARMREG_R0, ARMREG_R5, 0);
break;
case MONO_TYPE_I8:
case MONO_TYPE_U8:
case MONO_TYPE_R8:
ARM_LDR_IMM(p, ARMREG_R0, ARMREG_R5, 0);
ARM_LDR_IMM(p, ARMREG_R1, ARMREG_R5, 4);
break;
case MONO_TYPE_VOID:
default:
break;
}
}
p = (guchar*)arm_emit_std_epilogue((arminstr_t*)p, stack_size,
(1 << ARMREG_R4) |
(1 << ARMREG_R5) |
(1 << ARMREG_R6) |
(1 << ARMREG_R7));
flush_icache();
#ifdef ARM_DUMP_DISASM
_armdis_decode((arminstr_t*)code_buff, ((guint8*)p) - ((guint8*)code_buff));
#endif
ji = g_new0(MonoJitInfo, 1);
ji->method = method;
ji->code_size = ((guint8 *) p) - ((guint8 *) code_buff);
ji->code_start = (gpointer) code_buff;
mono_jit_info_table_add(mono_get_root_domain (), ji);
return code_buff;
}
/*
* mono_create_method_pointer () will insert a pointer to the MonoMethod
* so that the interp can easily get at the data: this function will retrieve
* the method from the code stream.
*/
MonoMethod* mono_method_pointer_get (void* code)
{
unsigned char* c = code;
/* check out magic number that follows unconditional branch */
if (c[4] == 'M' &&
c[5] == 'o' &&
c[6] == 'n' &&
c[7] == 'o') return ((MonoMethod**)code)[2];
return NULL;
}
#endif

@ -0,0 +1,15 @@
/* -- <Op> -- */
/* Fd := Fn <Op> Fm */
#define ARM_VFP_<Op>D_COND(p, rd, rn, rm, cond) \
ARM_EMIT((p), ARM_DEF_VFP_DYADIC(cond,ARM_VFP_COPROC_DOUBLE,ARM_VFP_<Op>,rd,rn,rm))
#define ARM_VFP_<Op>D(p, rd, rn, rm) \
ARM_VFP_<Op>D_COND(p, rd, rn, rm, ARMCOND_AL)
#define ARM_VFP_<Op>S_COND(p, rd, rn, rm, cond) \
ARM_EMIT((p), ARM_DEF_VFP_DYADIC(cond,ARM_VFP_COPROC_SINGLE,ARM_VFP_<Op>,rd,rn,rm))
#define ARM_VFP_<Op>S(p, rd, rn, rm) \
ARM_VFP_<Op>S_COND(p, rd, rn, rm, ARMCOND_AL)

@ -0,0 +1,14 @@
/* -- <Op> -- */
/* Fd := <Op> Fm */
#define ARM_<Op>D_COND(p,dreg,sreg,cond) \
ARM_EMIT((p), ARM_DEF_VFP_MONADIC((cond),ARM_VFP_COPROC_DOUBLE,ARM_VFP_<Op>,(dreg),(sreg)))
#define ARM_<Op>D(p,dreg,sreg) ARM_<Op>D_COND(p,dreg,sreg,ARMCOND_AL)
#define ARM_<Op>S_COND(p,dreg,sreg,cond) \
ARM_EMIT((p), ARM_DEF_VFP_MONADIC((cond),ARM_VFP_COPROC_SINGLE,ARM_VFP_<Op>,(dreg),(sreg)))
#define ARM_<Op>S(p,dreg,sreg) ARM_<Op>S_COND(p,dreg,sreg,ARMCOND_AL)

@ -0,0 +1,24 @@
#!/bin/sh
DYADIC="ADD SUB MUL NMUL DIV"
MONADIC="CPY ABS NEG SQRT CMP CMPE CMPZ CMPEZ CVT UITO SITO TOUI TOSI TOUIZ TOSIZ"
# $1: opcode list
# $2: template
gen() {
for i in $1; do
sed "s/<Op>/$i/g" $2.th
done
}
echo -e "/* Macros for VFP ops, auto-generated from template */\n"
echo -e "\n/* dyadic */\n"
gen "$DYADIC" vfp_macros
echo -e "\n/* monadic */\n"
gen "$MONADIC" vfpm_macros
echo -e "\n\n"
echo -e "\n/* end generated */\n"

@ -0,0 +1,6 @@
/
/Makefile
/Makefile.in
/*.o
/*.lo
/.deps

@ -0,0 +1,3 @@
#include "../../../../mono-extensions/mono/arch/arm64/arm64-codegen.h"

@ -0,0 +1,2 @@
/Makefile
/Makefile.in

@ -0,0 +1,3 @@
EXTRA_DIST = ia64-codegen.h

@ -0,0 +1,861 @@
/*
* codegen.c: Tests for the IA64 code generation macros
*/
#include <glib.h>
#include <stdio.h>
#include <ctype.h>
#define IA64_SIMPLE_EMIT_BUNDLE
#include <mono/arch/ia64/ia64-codegen.h>
void
mono_disassemble_code (guint8 *code, int size, char *id)
{
int i;
FILE *ofd;
const char *tmp = g_get_tmp_dir ();
const char *objdump_args = g_getenv ("MONO_OBJDUMP_ARGS");
char *as_file;
char *o_file;
char *cmd;
as_file = g_strdup_printf ("%s/test.s", tmp);
if (!(ofd = fopen (as_file, "w")))
g_assert_not_reached ();
for (i = 0; id [i]; ++i) {
if (!isalnum (id [i]))
fprintf (ofd, "_");
else
fprintf (ofd, "%c", id [i]);
}
fprintf (ofd, ":\n");
for (i = 0; i < size; ++i)
fprintf (ofd, ".byte %d\n", (unsigned int) code [i]);
fclose (ofd);
#ifdef __ia64__
#define DIS_CMD "objdump -d"
#define AS_CMD "as"
#else
#define DIS_CMD "ia64-linux-gnu-objdump -d"
#define AS_CMD "ia64-linux-gnu-as"
#endif
o_file = g_strdup_printf ("%s/test.o", tmp);
cmd = g_strdup_printf (AS_CMD " %s -o %s", as_file, o_file);
system (cmd);
g_free (cmd);
if (!objdump_args)
objdump_args = "";
cmd = g_strdup_printf (DIS_CMD " %s %s", objdump_args, o_file);
system (cmd);
g_free (cmd);
g_free (o_file);
g_free (as_file);
}
int
main ()
{
Ia64CodegenState code;
guint8 *buf = g_malloc0 (40960);
ia64_codegen_init (code, buf);
ia64_add (code, 1, 2, 3);
ia64_add1 (code, 1, 2, 3);
ia64_sub (code, 1, 2, 3);
ia64_sub1 (code, 1, 2, 3);
ia64_addp4 (code, 1, 2, 3);
ia64_and (code, 1, 2, 3);
ia64_andcm (code, 1, 2, 3);
ia64_or (code, 1, 2, 3);
ia64_xor (code, 1, 2, 3);
ia64_shladd (code, 1, 2, 3, 4);
ia64_shladdp4 (code, 1, 2, 3, 4);
ia64_sub_imm (code, 1, 0x7f, 2);
ia64_sub_imm (code, 1, -1, 2);
ia64_and_imm (code, 1, -128, 2);
ia64_andcm_imm (code, 1, -128, 2);
ia64_or_imm (code, 1, -128, 2);
ia64_xor_imm (code, 1, -128, 2);
ia64_adds_imm (code, 1, 8191, 2);
ia64_adds_imm (code, 1, -8192, 2);
ia64_adds_imm (code, 1, 1234, 2);
ia64_adds_imm (code, 1, -1234, 2);
ia64_addp4_imm (code, 1, -1234, 2);
ia64_addl_imm (code, 1, 1234, 2);
ia64_addl_imm (code, 1, -1234, 2);
ia64_addl_imm (code, 1, 2097151, 2);
ia64_addl_imm (code, 1, -2097152, 2);
ia64_cmp_lt (code, 1, 2, 1, 2);
ia64_cmp_ltu (code, 1, 2, 1, 2);
ia64_cmp_eq (code, 1, 2, 1, 2);
ia64_cmp_lt_unc (code, 1, 2, 1, 2);
ia64_cmp_ltu_unc (code, 1, 2, 1, 2);
ia64_cmp_eq_unc (code, 1, 2, 1, 2);
ia64_cmp_eq_and (code, 1, 2, 1, 2);
ia64_cmp_eq_or (code, 1, 2, 1, 2);
ia64_cmp_eq_or_andcm (code, 1, 2, 1, 2);
ia64_cmp_ne_and (code, 1, 2, 1, 2);
ia64_cmp_ne_or (code, 1, 2, 1, 2);
ia64_cmp_ne_or_andcm (code, 1, 2, 1, 2);
ia64_cmp4_lt (code, 1, 2, 1, 2);
ia64_cmp4_ltu (code, 1, 2, 1, 2);
ia64_cmp4_eq (code, 1, 2, 1, 2);
ia64_cmp4_lt_unc (code, 1, 2, 1, 2);
ia64_cmp4_ltu_unc (code, 1, 2, 1, 2);
ia64_cmp4_eq_unc (code, 1, 2, 1, 2);
ia64_cmp4_eq_and (code, 1, 2, 1, 2);
ia64_cmp4_eq_or (code, 1, 2, 1, 2);
ia64_cmp4_eq_or_andcm (code, 1, 2, 1, 2);
ia64_cmp4_ne_and (code, 1, 2, 1, 2);
ia64_cmp4_ne_or (code, 1, 2, 1, 2);
ia64_cmp4_ne_or_andcm (code, 1, 2, 1, 2);
ia64_cmp_gt_and (code, 1, 2, 0, 2);
ia64_cmp_gt_or (code, 1, 2, 0, 2);
ia64_cmp_gt_or_andcm (code, 1, 2, 0, 2);
ia64_cmp_le_and (code, 1, 2, 0, 2);
ia64_cmp_le_or (code, 1, 2, 0, 2);
ia64_cmp_le_or_andcm (code, 1, 2, 0, 2);
ia64_cmp_ge_and (code, 1, 2, 0, 2);
ia64_cmp_ge_or (code, 1, 2, 0, 2);
ia64_cmp_ge_or_andcm (code, 1, 2, 0, 2);
ia64_cmp_lt_and (code, 1, 2, 0, 2);
ia64_cmp_lt_or (code, 1, 2, 0, 2);
ia64_cmp_lt_or_andcm (code, 1, 2, 0, 2);
ia64_cmp4_gt_and (code, 1, 2, 0, 2);
ia64_cmp4_gt_or (code, 1, 2, 0, 2);
ia64_cmp4_gt_or_andcm (code, 1, 2, 0, 2);
ia64_cmp4_le_and (code, 1, 2, 0, 2);
ia64_cmp4_le_or (code, 1, 2, 0, 2);
ia64_cmp4_le_or_andcm (code, 1, 2, 0, 2);
ia64_cmp4_ge_and (code, 1, 2, 0, 2);
ia64_cmp4_ge_or (code, 1, 2, 0, 2);
ia64_cmp4_ge_or_andcm (code, 1, 2, 0, 2);
ia64_cmp4_lt_and (code, 1, 2, 0, 2);
ia64_cmp4_lt_or (code, 1, 2, 0, 2);
ia64_cmp4_lt_or_andcm (code, 1, 2, 0, 2);
ia64_cmp_lt_imm (code, 1, 2, 127, 2);
ia64_cmp_lt_imm (code, 1, 2, -128, 2);
ia64_cmp_lt_imm (code, 1, 2, -128, 2);
ia64_cmp_ltu_imm (code, 1, 2, -128, 2);
ia64_cmp_eq_imm (code, 1, 2, -128, 2);
ia64_cmp_lt_unc_imm (code, 1, 2, -128, 2);
ia64_cmp_ltu_unc_imm (code, 1, 2, -128, 2);
ia64_cmp_eq_unc_imm (code, 1, 2, -128, 2);
ia64_cmp_eq_and_imm (code, 1, 2, -128, 2);
ia64_cmp_eq_or_imm (code, 1, 2, -128, 2);
ia64_cmp_eq_unc_imm (code, 1, 2, -128, 2);
ia64_cmp_ne_and_imm (code, 1, 2, -128, 2);
ia64_cmp_ne_or_imm (code, 1, 2, -128, 2);
ia64_cmp_ne_or_andcm_imm (code, 1, 2, -128, 2);
ia64_cmp4_lt_imm (code, 1, 2, -128, 2);
ia64_cmp4_ltu_imm (code, 1, 2, -128, 2);
ia64_cmp4_eq_imm (code, 1, 2, -128, 2);
ia64_cmp4_lt_unc_imm (code, 1, 2, -128, 2);
ia64_cmp4_ltu_unc_imm (code, 1, 2, -128, 2);
ia64_cmp4_eq_unc_imm (code, 1, 2, -128, 2);
ia64_cmp4_eq_and_imm (code, 1, 2, -128, 2);
ia64_cmp4_eq_or_imm (code, 1, 2, -128, 2);
ia64_cmp4_eq_unc_imm (code, 1, 2, -128, 2);
ia64_cmp4_ne_and_imm (code, 1, 2, -128, 2);
ia64_cmp4_ne_or_imm (code, 1, 2, -128, 2);
ia64_cmp4_ne_or_andcm_imm (code, 1, 2, -128, 2);
ia64_padd1 (code, 1, 2, 3);
ia64_padd2 (code, 1, 2, 3);
ia64_padd4 (code, 1, 2, 3);
ia64_padd1_sss (code, 1, 2, 3);
ia64_padd2_sss (code, 1, 2, 3);
ia64_padd1_uuu (code, 1, 2, 3);
ia64_padd2_uuu (code, 1, 2, 3);
ia64_padd1_uus (code, 1, 2, 3);
ia64_padd2_uus (code, 1, 2, 3);
ia64_psub1 (code, 1, 2, 3);
ia64_psub2 (code, 1, 2, 3);
ia64_psub4 (code, 1, 2, 3);
ia64_psub1_sss (code, 1, 2, 3);
ia64_psub2_sss (code, 1, 2, 3);
ia64_psub1_uuu (code, 1, 2, 3);
ia64_psub2_uuu (code, 1, 2, 3);
ia64_psub1_uus (code, 1, 2, 3);
ia64_psub2_uus (code, 1, 2, 3);
ia64_pavg1 (code, 1, 2, 3);
ia64_pavg2 (code, 1, 2, 3);
ia64_pavg1_raz (code, 1, 2, 3);
ia64_pavg2_raz (code, 1, 2, 3);
ia64_pavgsub1 (code, 1, 2, 3);
ia64_pavgsub2 (code, 1, 2, 3);
ia64_pcmp1_eq (code, 1, 2, 3);
ia64_pcmp2_eq (code, 1, 2, 3);
ia64_pcmp4_eq (code, 1, 2, 3);
ia64_pcmp1_gt (code, 1, 2, 3);
ia64_pcmp2_gt (code, 1, 2, 3);
ia64_pcmp4_gt (code, 1, 2, 3);
ia64_pshladd2 (code, 1, 2, 3, 4);
ia64_pshradd2 (code, 1, 2, 3, 4);
ia64_pmpyshr2 (code, 1, 2, 3, 0);
ia64_pmpyshr2_u (code, 1, 2, 3, 0);
ia64_pmpyshr2 (code, 1, 2, 3, 7);
ia64_pmpyshr2_u (code, 1, 2, 3, 7);
ia64_pmpyshr2 (code, 1, 2, 3, 15);
ia64_pmpyshr2_u (code, 1, 2, 3, 15);
ia64_pmpyshr2 (code, 1, 2, 3, 16);
ia64_pmpyshr2_u (code, 1, 2, 3, 16);
ia64_pmpy2_r (code, 1, 2, 3);
ia64_pmpy2_l (code, 1, 2, 3);
ia64_mix1_r (code, 1, 2, 3);
ia64_mix2_r (code, 1, 2, 3);
ia64_mix4_r (code, 1, 2, 3);
ia64_mix1_l (code, 1, 2, 3);
ia64_mix2_l (code, 1, 2, 3);
ia64_mix4_l (code, 1, 2, 3);
ia64_pack2_uss (code, 1, 2, 3);
ia64_pack2_sss (code, 1, 2, 3);
ia64_pack4_sss (code, 1, 2, 3);
ia64_unpack1_h (code, 1, 2, 3);
ia64_unpack2_h (code, 1, 2, 3);
ia64_unpack4_h (code, 1, 2, 3);
ia64_unpack1_l (code, 1, 2, 3);
ia64_unpack2_l (code, 1, 2, 3);
ia64_unpack4_l (code, 1, 2, 3);
ia64_pmin1_u (code, 1, 2, 3);
ia64_pmax1_u (code, 1, 2, 3);
ia64_pmin2 (code, 1, 2, 3);
ia64_pmax2 (code, 1, 2, 3);
ia64_psad1 (code, 1, 2, 3);
ia64_mux1 (code, 1, 2, IA64_MUX1_BRCST);
ia64_mux1 (code, 1, 2, IA64_MUX1_MIX);
ia64_mux1 (code, 1, 2, IA64_MUX1_SHUF);
ia64_mux1 (code, 1, 2, IA64_MUX1_ALT);
ia64_mux1 (code, 1, 2, IA64_MUX1_REV);
ia64_mux2 (code, 1, 2, 0x8d);
ia64_pshr2 (code, 1, 2, 3);
ia64_pshr4 (code, 1, 2, 3);
ia64_shr (code, 1, 2, 3);
ia64_pshr2_u (code, 1, 2, 3);
ia64_pshr4_u (code, 1, 2, 3);
ia64_shr_u (code, 1, 2, 3);
ia64_pshr2_imm (code, 1, 2, 20);
ia64_pshr4_imm (code, 1, 2, 20);
ia64_pshr2_u_imm (code, 1, 2, 20);
ia64_pshr4_u_imm (code, 1, 2, 20);
ia64_pshl2 (code, 1, 2, 3);
ia64_pshl4 (code, 1, 2, 3);
ia64_shl (code, 1, 2, 3);
ia64_pshl2_imm (code, 1, 2, 20);
ia64_pshl4_imm (code, 1, 2, 20);
ia64_popcnt (code, 1, 2);
ia64_shrp (code, 1, 2, 3, 62);
ia64_extr_u (code, 1, 2, 62, 61);
ia64_extr (code, 1, 2, 62, 61);
ia64_dep_z (code, 1, 2, 62, 61);
ia64_dep_z_imm (code, 1, 127, 62, 61);
ia64_dep_z_imm (code, 1, -128, 62, 61);
ia64_dep_imm (code, 1, 0, 2, 62, 61);
ia64_dep_imm (code, 1, -1, 2, 62, 61);
ia64_dep (code, 1, 2, 3, 10, 15);
ia64_tbit_z (code, 1, 2, 3, 0);
ia64_tbit_z (code, 1, 2, 3, 63);
ia64_tbit_z_unc (code, 1, 2, 3, 63);
ia64_tbit_z_and (code, 1, 2, 3, 63);
ia64_tbit_nz_and (code, 1, 2, 3, 63);
ia64_tbit_z_or (code, 1, 2, 3, 63);
ia64_tbit_nz_or (code, 1, 2, 3, 63);
ia64_tbit_z_or_andcm (code, 1, 2, 3, 63);
ia64_tbit_nz_or_andcm (code, 1, 2, 3, 63);
ia64_tnat_z (code, 1, 2, 3);
ia64_tnat_z_unc (code, 1, 2, 3);
ia64_tnat_z_and (code, 1, 2, 3);
ia64_tnat_nz_and (code, 1, 2, 3);
ia64_tnat_z_or (code, 1, 2, 3);
ia64_tnat_nz_or (code, 1, 2, 3);
ia64_tnat_z_or_andcm (code, 1, 2, 3);
ia64_tnat_nz_or_andcm (code, 1, 2, 3);
ia64_nop_i (code, 0x1234);
ia64_hint_i (code, 0x1234);
ia64_break_i (code, 0x1234);
ia64_chk_s_i (code, 1, 0);
ia64_chk_s_i (code, 1, -1);
ia64_chk_s_i (code, 1, 1);
ia64_mov_to_br_hint (code, 1, 1, -1, IA64_MOV_TO_BR_WH_NONE, 0);
ia64_mov_to_br_hint (code, 1, 1, -1, IA64_MOV_TO_BR_WH_SPTK, 0);
ia64_mov_to_br_hint (code, 1, 1, -1, IA64_MOV_TO_BR_WH_DPTK, 0);
ia64_mov_to_br_hint (code, 1, 1, -1, IA64_MOV_TO_BR_WH_DPTK, IA64_BR_IH_IMP);
ia64_mov_ret_to_br_hint (code, 1, 1, -1, IA64_MOV_TO_BR_WH_NONE, 0);
ia64_mov_from_br (code, 1, 1);
ia64_mov_to_pred (code, 1, 0xfe);
ia64_mov_to_pred_rot_imm (code, 0xff0000);
ia64_mov_from_ip (code, 1);
ia64_mov_from_pred (code, 1);
ia64_mov_to_ar_i (code, 1, 1);
ia64_mov_to_ar_imm_i (code, 1, 127);
ia64_mov_from_ar_i (code, 1, 1);
ia64_zxt1 (code, 1, 2);
ia64_zxt2 (code, 1, 2);
ia64_zxt4 (code, 1, 2);
ia64_sxt1 (code, 1, 2);
ia64_sxt2 (code, 1, 2);
ia64_sxt4 (code, 1, 2);
ia64_czx1_l (code, 1, 2);
ia64_czx2_l (code, 1, 2);
ia64_czx1_r (code, 1, 2);
ia64_czx2_r (code, 1, 2);
ia64_ld1_hint (code, 1, 2, IA64_LD_HINT_NONE);
ia64_ld1_hint (code, 1, 2, IA64_LD_HINT_NT1);
ia64_ld1_hint (code, 1, 2, IA64_LD_HINT_NTA);
ia64_ld1_hint (code, 1, 2, 0);
ia64_ld2_hint (code, 1, 2, 0);
ia64_ld4_hint (code, 1, 2, 0);
ia64_ld8_hint (code, 1, 2, 0);
ia64_ld1_s_hint (code, 1, 2, 0);
ia64_ld2_s_hint (code, 1, 2, 0);
ia64_ld4_s_hint (code, 1, 2, 0);
ia64_ld8_s_hint (code, 1, 2, 0);
ia64_ld1_a_hint (code, 1, 2, 0);
ia64_ld2_a_hint (code, 1, 2, 0);
ia64_ld4_a_hint (code, 1, 2, 0);
ia64_ld8_a_hint (code, 1, 2, 0);
ia64_ld1_sa_hint (code, 1, 2, 0);
ia64_ld2_sa_hint (code, 1, 2, 0);
ia64_ld4_sa_hint (code, 1, 2, 0);
ia64_ld8_sa_hint (code, 1, 2, 0);
ia64_ld1_bias_hint (code, 1, 2, 0);
ia64_ld2_bias_hint (code, 1, 2, 0);
ia64_ld4_bias_hint (code, 1, 2, 0);
ia64_ld8_bias_hint (code, 1, 2, 0);
ia64_ld1_inc_hint (code, 1, 2, 3, IA64_LD_HINT_NONE);
ia64_ld1_inc_imm_hint (code, 1, 2, 255, IA64_LD_HINT_NONE);
ia64_ld1_inc_imm_hint (code, 1, 2, -256, IA64_LD_HINT_NONE);
ia64_st1_hint (code, 1, 2, IA64_ST_HINT_NTA);
ia64_st1_hint (code, 1, 2, IA64_ST_HINT_NONE);
ia64_st2_hint (code, 1, 2, IA64_ST_HINT_NONE);
ia64_st4_hint (code, 1, 2, IA64_ST_HINT_NONE);
ia64_st8_hint (code, 1, 2, IA64_ST_HINT_NONE);
ia64_st1_rel_hint (code, 1, 2, IA64_ST_HINT_NONE);
ia64_st2_rel_hint (code, 1, 2, IA64_ST_HINT_NONE);
ia64_st4_rel_hint (code, 1, 2, IA64_ST_HINT_NONE);
ia64_st8_rel_hint (code, 1, 2, IA64_ST_HINT_NONE);
ia64_st8_spill_hint (code, 1, 2, IA64_ST_HINT_NONE);
ia64_st16_hint (code, 1, 2, IA64_ST_HINT_NONE);
ia64_st16_rel_hint (code, 1, 2, IA64_ST_HINT_NONE);
ia64_st1_inc_imm_hint (code, 1, 2, 255, IA64_ST_HINT_NONE);
ia64_st2_inc_imm_hint (code, 1, 2, 255, IA64_ST_HINT_NONE);
ia64_st4_inc_imm_hint (code, 1, 2, 255, IA64_ST_HINT_NONE);
ia64_st8_inc_imm_hint (code, 1, 2, 255, IA64_ST_HINT_NONE);
ia64_st1_rel_inc_imm_hint (code, 1, 2, 255, IA64_ST_HINT_NONE);
ia64_st2_rel_inc_imm_hint (code, 1, 2, 255, IA64_ST_HINT_NONE);
ia64_st4_rel_inc_imm_hint (code, 1, 2, 255, IA64_ST_HINT_NONE);
ia64_st8_rel_inc_imm_hint (code, 1, 2, 255, IA64_ST_HINT_NONE);
ia64_st8_spill_inc_imm_hint (code, 1, 2, 255, IA64_ST_HINT_NONE);
ia64_ldfs_hint (code, 1, 2, 0);
ia64_ldfd_hint (code, 1, 2, 0);
ia64_ldf8_hint (code, 1, 2, 0);
ia64_ldfe_hint (code, 1, 2, 0);
ia64_ldfs_s_hint (code, 1, 2, 0);
ia64_ldfd_s_hint (code, 1, 2, 0);
ia64_ldf8_s_hint (code, 1, 2, 0);
ia64_ldfe_s_hint (code, 1, 2, 0);
ia64_ldfs_a_hint (code, 1, 2, 0);
ia64_ldfd_a_hint (code, 1, 2, 0);
ia64_ldf8_a_hint (code, 1, 2, 0);
ia64_ldfe_a_hint (code, 1, 2, 0);
ia64_ldfs_sa_hint (code, 1, 2, 0);
ia64_ldfd_sa_hint (code, 1, 2, 0);
ia64_ldf8_sa_hint (code, 1, 2, 0);
ia64_ldfe_sa_hint (code, 1, 2, 0);
ia64_ldfs_c_clr_hint (code, 1, 2, 0);
ia64_ldfd_c_clr_hint (code, 1, 2, 0);
ia64_ldf8_c_clr_hint (code, 1, 2, 0);
ia64_ldfe_c_clr_hint (code, 1, 2, 0);
ia64_ldfs_c_nc_hint (code, 1, 2, 0);
ia64_ldfd_c_nc_hint (code, 1, 2, 0);
ia64_ldf8_c_nc_hint (code, 1, 2, 0);
ia64_ldfe_c_nc_hint (code, 1, 2, 0);
ia64_ldf_fill_hint (code, 1, 2, 0);
ia64_ldfs_inc_hint (code, 1, 2, 3, 0);
ia64_ldfd_inc_hint (code, 1, 2, 3, 0);
ia64_ldf8_inc_hint (code, 1, 2, 3, 0);
ia64_ldfe_inc_hint (code, 1, 2, 3, 0);
ia64_ldfs_s_inc_hint (code, 1, 2, 3, 0);
ia64_ldfd_s_inc_hint (code, 1, 2, 3, 0);
ia64_ldf8_s_inc_hint (code, 1, 2, 3, 0);
ia64_ldfe_s_inc_hint (code, 1, 2, 3, 0);
ia64_ldfs_a_inc_hint (code, 1, 2, 3, 0);
ia64_ldfd_a_inc_hint (code, 1, 2, 3, 0);
ia64_ldf8_a_inc_hint (code, 1, 2, 3, 0);
ia64_ldfe_a_inc_hint (code, 1, 2, 3, 0);
ia64_ldfs_sa_inc_hint (code, 1, 2, 3, 0);
ia64_ldfd_sa_inc_hint (code, 1, 2, 3, 0);
ia64_ldf8_sa_inc_hint (code, 1, 2, 3, 0);
ia64_ldfe_sa_inc_hint (code, 1, 2, 3, 0);
ia64_ldfs_c_clr_inc_hint (code, 1, 2, 3, 0);
ia64_ldfd_c_clr_inc_hint (code, 1, 2, 3, 0);
ia64_ldf8_c_clr_inc_hint (code, 1, 2, 3, 0);
ia64_ldfe_c_clr_inc_hint (code, 1, 2, 3, 0);
ia64_ldfs_c_nc_inc_hint (code, 1, 2, 3, 0);
ia64_ldfd_c_nc_inc_hint (code, 1, 2, 3, 0);
ia64_ldf8_c_nc_inc_hint (code, 1, 2, 3, 0);
ia64_ldfe_c_nc_inc_hint (code, 1, 2, 3, 0);
ia64_ldf_fill_inc_hint (code, 1, 2, 3, 0);
ia64_ldfs_inc_imm_hint (code, 1, 2, 255, 0);
ia64_ldfd_inc_imm_hint (code, 1, 2, 255, 0);
ia64_ldf8_inc_imm_hint (code, 1, 2, 255, 0);
ia64_ldfe_inc_imm_hint (code, 1, 2, 255, 0);
ia64_ldfs_s_inc_imm_hint (code, 1, 2, 255, 0);
ia64_ldfd_s_inc_imm_hint (code, 1, 2, 255, 0);
ia64_ldf8_s_inc_imm_hint (code, 1, 2, 255, 0);
ia64_ldfe_s_inc_imm_hint (code, 1, 2, 255, 0);
ia64_ldfs_a_inc_imm_hint (code, 1, 2, 255, 0);
ia64_ldfd_a_inc_imm_hint (code, 1, 2, 255, 0);
ia64_ldf8_a_inc_imm_hint (code, 1, 2, 255, 0);
ia64_ldfe_a_inc_imm_hint (code, 1, 2, 255, 0);
ia64_ldfs_sa_inc_imm_hint (code, 1, 2, 255, 0);
ia64_ldfd_sa_inc_imm_hint (code, 1, 2, 255, 0);
ia64_ldf8_sa_inc_imm_hint (code, 1, 2, 255, 0);
ia64_ldfe_sa_inc_imm_hint (code, 1, 2, 255, 0);
ia64_ldfs_c_clr_inc_imm_hint (code, 1, 2, 255, 0);
ia64_ldfd_c_clr_inc_imm_hint (code, 1, 2, 255, 0);
ia64_ldf8_c_clr_inc_imm_hint (code, 1, 2, 255, 0);
ia64_ldfe_c_clr_inc_imm_hint (code, 1, 2, 255, 0);
ia64_ldfs_c_nc_inc_imm_hint (code, 1, 2, 255, 0);
ia64_ldfd_c_nc_inc_imm_hint (code, 1, 2, 255, 0);
ia64_ldf8_c_nc_inc_imm_hint (code, 1, 2, 255, 0);
ia64_ldfe_c_nc_inc_imm_hint (code, 1, 2, 255, 0);
ia64_ldf_fill_inc_imm_hint (code, 1, 2, 255, 0);
ia64_stfs_hint (code, 1, 2, 0);
ia64_stfd_hint (code, 1, 2, 0);
ia64_stf8_hint (code, 1, 2, 0);
ia64_stfe_hint (code, 1, 2, 0);
ia64_stf_spill_hint (code, 1, 2, 0);
ia64_stfs_inc_imm_hint (code, 1, 2, 255, 0);
ia64_stfd_inc_imm_hint (code, 1, 2, 255, 0);
ia64_stf8_inc_imm_hint (code, 1, 2, 255, 0);
ia64_stfe_inc_imm_hint (code, 1, 2, 255, 0);
ia64_stf_spill_inc_imm_hint (code, 1, 2, 255, 0);
ia64_ldfps_hint (code, 1, 2, 3, 0);
ia64_ldfpd_hint (code, 1, 2, 3, 0);
ia64_ldfp8_hint (code, 1, 2, 3, 0);
ia64_ldfps_s_hint (code, 1, 2, 3, 0);
ia64_ldfpd_s_hint (code, 1, 2, 3, 0);
ia64_ldfp8_s_hint (code, 1, 2, 3, 0);
ia64_ldfps_a_hint (code, 1, 2, 3, 0);
ia64_ldfpd_a_hint (code, 1, 2, 3, 0);
ia64_ldfp8_a_hint (code, 1, 2, 3, 0);
ia64_ldfps_sa_hint (code, 1, 2, 3, 0);
ia64_ldfpd_sa_hint (code, 1, 2, 3, 0);
ia64_ldfp8_sa_hint (code, 1, 2, 3, 0);
ia64_ldfps_c_clr_hint (code, 1, 2, 3, 0);
ia64_ldfpd_c_clr_hint (code, 1, 2, 3, 0);
ia64_ldfp8_c_clr_hint (code, 1, 2, 3, 0);
ia64_ldfps_c_nc_hint (code, 1, 2, 3, 0);
ia64_ldfpd_c_nc_hint (code, 1, 2, 3, 0);
ia64_ldfp8_c_nc_hint (code, 1, 2, 3, 0);
ia64_ldfps_inc_hint (code, 1, 2, 3, 0);
ia64_ldfpd_inc_hint (code, 1, 2, 3, 0);
ia64_ldfp8_inc_hint (code, 1, 2, 3, 0);
ia64_ldfps_s_inc_hint (code, 1, 2, 3, 0);
ia64_ldfpd_s_inc_hint (code, 1, 2, 3, 0);
ia64_ldfp8_s_inc_hint (code, 1, 2, 3, 0);
ia64_ldfps_a_inc_hint (code, 1, 2, 3, 0);
ia64_ldfpd_a_inc_hint (code, 1, 2, 3, 0);
ia64_ldfp8_a_inc_hint (code, 1, 2, 3, 0);
ia64_ldfps_sa_inc_hint (code, 1, 2, 3, 0);
ia64_ldfpd_sa_inc_hint (code, 1, 2, 3, 0);
ia64_ldfp8_sa_inc_hint (code, 1, 2, 3, 0);
ia64_ldfps_c_clr_inc_hint (code, 1, 2, 3, 0);
ia64_ldfpd_c_clr_inc_hint (code, 1, 2, 3, 0);
ia64_ldfp8_c_clr_inc_hint (code, 1, 2, 3, 0);
ia64_ldfps_c_nc_inc_hint (code, 1, 2, 3, 0);
ia64_ldfpd_c_nc_inc_hint (code, 1, 2, 3, 0);
ia64_ldfp8_c_nc_inc_hint (code, 1, 2, 3, 0);
ia64_lfetch_hint (code, 1, 0);
ia64_lfetch_excl_hint (code, 1, 0);
ia64_lfetch_fault_hint (code, 1, 0);
ia64_lfetch_fault_excl_hint (code, 1, 0);
ia64_lfetch_hint (code, 1, IA64_LFHINT_NT1);
ia64_lfetch_hint (code, 1, IA64_LFHINT_NT2);
ia64_lfetch_hint (code, 1, IA64_LFHINT_NTA);
ia64_lfetch_inc_hint (code, 1, 2, 0);
ia64_lfetch_excl_inc_hint (code, 1, 2, 0);
ia64_lfetch_fault_inc_hint (code, 1, 2, 0);
ia64_lfetch_fault_excl_inc_hint (code, 1, 2, 0);
ia64_lfetch_inc_imm_hint (code, 1, 255, 0);
ia64_lfetch_excl_inc_imm_hint (code, 1, 255, 0);
ia64_lfetch_fault_inc_imm_hint (code, 1, 255, 0);
ia64_lfetch_fault_excl_inc_imm_hint (code, 1, 255, 0);
ia64_cmpxchg1_acq_hint (code, 1, 2, 3, 0);
ia64_cmpxchg2_acq_hint (code, 1, 2, 3, 0);
ia64_cmpxchg4_acq_hint (code, 1, 2, 3, 0);
ia64_cmpxchg8_acq_hint (code, 1, 2, 3, 0);
ia64_cmpxchg1_rel_hint (code, 1, 2, 3, 0);
ia64_cmpxchg2_rel_hint (code, 1, 2, 3, 0);
ia64_cmpxchg4_rel_hint (code, 1, 2, 3, 0);
ia64_cmpxchg8_rel_hint (code, 1, 2, 3, 0);
ia64_cmpxchg16_acq_hint (code, 1, 2, 3, 0);
ia64_cmpxchg16_rel_hint (code, 1, 2, 3, 0);
ia64_xchg1_hint (code, 1, 2, 3, 0);
ia64_xchg2_hint (code, 1, 2, 3, 0);
ia64_xchg4_hint (code, 1, 2, 3, 0);
ia64_xchg8_hint (code, 1, 2, 3, 0);
ia64_fetchadd4_acq_hint (code, 1, 2, -16, 0);
ia64_fetchadd4_acq_hint (code, 1, 2, -8, 0);
ia64_fetchadd4_acq_hint (code, 1, 2, -4, 0);
ia64_fetchadd4_acq_hint (code, 1, 2, -1, 0);
ia64_fetchadd4_acq_hint (code, 1, 2, 1, 0);
ia64_fetchadd4_acq_hint (code, 1, 2, 4, 0);
ia64_fetchadd4_acq_hint (code, 1, 2, 8, 0);
ia64_fetchadd4_acq_hint (code, 1, 2, 16, 0);
ia64_fetchadd4_acq_hint (code, 1, 2, 16, 0);
ia64_fetchadd8_acq_hint (code, 1, 2, 16, 0);
ia64_fetchadd4_rel_hint (code, 1, 2, 16, 0);
ia64_fetchadd8_rel_hint (code, 1, 2, 16, 0);
ia64_setf_sig (code, 1, 2);
ia64_setf_exp (code, 1, 2);
ia64_setf_s (code, 1, 2);
ia64_setf_d (code, 1, 2);
ia64_getf_sig (code, 1, 2);
ia64_getf_exp (code, 1, 2);
ia64_getf_s (code, 1, 2);
ia64_getf_d (code, 1, 2);
ia64_chk_s_m (code, 1, 0);
ia64_chk_s_m (code, 1, 1);
ia64_chk_s_m (code, 1, -1);
ia64_chk_s_float_m (code, 1, 0);
ia64_chk_a_nc (code, 1, 0);
ia64_chk_a_nc (code, 1, 1);
ia64_chk_a_nc (code, 1, -1);
ia64_chk_a_nc (code, 1, 0);
ia64_chk_a_clr (code, 1, 0);
ia64_chk_a_nc_float (code, 1, 0);
ia64_chk_a_clr_float (code, 1, 0);
ia64_invala (code);
ia64_fwb (code);
ia64_mf (code);
ia64_mf_a (code);
ia64_srlz_d (code);
ia64_stlz_i (code);
ia64_sync_i (code);
ia64_flushrs (code);
ia64_loadrs (code);
ia64_invala_e (code, 1);
ia64_invala_e_float (code, 1);
ia64_fc (code, 1);
ia64_fc_i (code, 1);
ia64_mov_to_ar_m (code, 1, 1);
ia64_mov_to_ar_imm_m (code, 1, 127);
ia64_mov_from_ar_m (code, 1, 1);
ia64_mov_to_cr (code, 1, 2);
ia64_mov_from_cr (code, 1, 2);
ia64_alloc (code, 1, 3, 4, 5, 0);
ia64_alloc (code, 1, 3, 4, 5, 8);
ia64_mov_to_psr_l (code, 1);
ia64_mov_to_psr_um (code, 1);
ia64_mov_from_psr (code, 1);
ia64_mov_from_psr_um (code, 1);
ia64_break_m (code, 0x1234);
ia64_nop_m (code, 0x1234);
ia64_hint_m (code, 0x1234);
ia64_br_cond_hint (code, 0, 0, 0, 0);
ia64_br_wexit_hint (code, 0, 0, 0, 0);
ia64_br_wtop_hint (code, 0, 0, 0, 0);
ia64_br_cloop_hint (code, 0, 0, 0, 0);
ia64_br_cexit_hint (code, 0, 0, 0, 0);
ia64_br_ctop_hint (code, 0, 0, 0, 0);
ia64_br_call_hint (code, 1, 0, 0, 0, 0);
ia64_br_cond_reg_hint (code, 1, 0, 0, 0);
ia64_br_ia_reg_hint (code, 1, 0, 0, 0);
ia64_br_ret_reg_hint (code, 1, 0, 0, 0);
ia64_br_call_reg_hint (code, 1, 2, 0, 0, 0);
ia64_cover (code);
ia64_clrrrb (code);
ia64_clrrrb_pr (code);
ia64_rfi (code);
ia64_bsw_0 (code);
ia64_bsw_1 (code);
ia64_epc (code);
ia64_break_b (code, 0x1234);
ia64_nop_b (code, 0x1234);
ia64_hint_b (code, 0x1234);
ia64_break_x (code, 0x2123456789ABCDEFULL);
ia64_movl (code, 1, 0x123456789ABCDEF0LL);
ia64_brl_cond_hint (code, 0, 0, 0, 0);
ia64_brl_cond_hint (code, -1, 0, 0, 0);
ia64_brl_call_hint (code, 1, 0, 0, 0, 0);
ia64_brl_call_hint (code, 1, -1, 0, 0, 0);
ia64_nop_x (code, 0x2123456789ABCDEFULL);
ia64_hint_x (code, 0x2123456789ABCDEFULL);
ia64_movl_pred (code, 1, 1, 0x123456789ABCDEF0LL);
/* FLOATING-POINT */
ia64_fma_sf_pred (code, 1, 1, 2, 3, 4, 2);
ia64_fma_s_sf_pred (code, 1, 1, 2, 3, 4, 2);
ia64_fma_d_sf_pred (code, 1, 1, 2, 3, 4, 2);
ia64_fpma_sf_pred (code, 1, 1, 2, 3, 4, 2);
ia64_fms_sf_pred (code, 1, 1, 2, 3, 4, 2);
ia64_fms_s_sf_pred (code, 1, 1, 2, 3, 4, 2);
ia64_fms_d_sf_pred (code, 1, 1, 2, 3, 4, 2);
ia64_fpms_sf_pred (code, 1, 1, 2, 3, 4, 2);
ia64_fnma_sf_pred (code, 1, 1, 2, 3, 4, 2);
ia64_fnma_s_sf_pred (code, 1, 1, 2, 3, 4, 2);
ia64_fnma_d_sf_pred (code, 1, 1, 2, 3, 4, 2);
ia64_fpnma_sf_pred (code, 1, 1, 2, 3, 4, 2);
ia64_xma_l_pred (code, 1, 1, 2, 3, 4);
ia64_xma_h_pred (code, 1, 1, 2, 3, 4);
ia64_xma_hu_pred (code, 1, 1, 2, 3, 4);
ia64_fselect_pred (code, 1, 1, 2, 3, 4);
ia64_fcmp_eq_sf_pred (code, 1, 1, 2, 3, 4, 0);
ia64_fcmp_lt_sf_pred (code, 1, 1, 2, 3, 4, 0);
ia64_fcmp_le_sf_pred (code, 1, 1, 2, 3, 4, 0);
ia64_fcmp_unord_sf_pred (code, 1, 1, 2, 3, 4, 0);
ia64_fcmp_eq_unc_sf_pred (code, 1, 1, 2, 3, 4, 0);
ia64_fcmp_lt_unc_sf_pred (code, 1, 1, 2, 3, 4, 0);
ia64_fcmp_le_unc_sf_pred (code, 1, 1, 2, 3, 4, 0);
ia64_fcmp_unord_unc_sf_pred (code, 1, 1, 2, 3, 4, 0);
ia64_fclass_m_pred (code, 1, 1, 2, 3, 0x1ff);
ia64_fclass_m_unc_pred (code, 1, 1, 2, 3, 0x1ff);
ia64_frcpa_sf_pred (code, 1, 1, 2, 3, 4, 0);
ia64_fprcpa_sf_pred (code, 1, 1, 2, 3, 4, 0);
ia64_frsqrta_sf_pred (code, 1, 1, 2, 4, 0);
ia64_fprsqrta_sf_pred (code, 1, 1, 2, 4, 0);
ia64_fmin_sf_pred (code, 1, 2, 3, 4, 0);
ia64_fman_sf_pred (code, 1, 2, 3, 4, 0);
ia64_famin_sf_pred (code, 1, 2, 3, 4, 0);
ia64_famax_sf_pred (code, 1, 2, 3, 4, 0);
ia64_fpmin_sf_pred (code, 1, 2, 3, 4, 0);
ia64_fpman_sf_pred (code, 1, 2, 3, 4, 0);
ia64_fpamin_sf_pred (code, 1, 2, 3, 4, 0);
ia64_fpamax_sf_pred (code, 1, 2, 3, 4, 0);
ia64_fpcmp_eq_sf_pred (code, 1, 2, 3, 4, 0);
ia64_fpcmp_lt_sf_pred (code, 1, 2, 3, 4, 0);
ia64_fpcmp_le_sf_pred (code, 1, 2, 3, 4, 0);
ia64_fpcmp_unord_sf_pred (code, 1, 2, 3, 4, 0);
ia64_fpcmp_neq_sf_pred (code, 1, 2, 3, 4, 0);
ia64_fpcmp_nlt_sf_pred (code, 1, 2, 3, 4, 0);
ia64_fpcmp_nle_sf_pred (code, 1, 2, 3, 4, 0);
ia64_fpcmp_ord_sf_pred (code, 1, 2, 3, 4, 0);
ia64_fmerge_s_pred (code, 1, 2, 3, 4);
ia64_fmerge_ns_pred (code, 1, 2, 3, 4);
ia64_fmerge_se_pred (code, 1, 2, 3, 4);
ia64_fmix_lr_pred (code, 1, 2, 3, 4);
ia64_fmix_r_pred (code, 1, 2, 3, 4);
ia64_fmix_l_pred (code, 1, 2, 3, 4);
ia64_fsxt_r_pred (code, 1, 2, 3, 4);
ia64_fsxt_l_pred (code, 1, 2, 3, 4);
ia64_fpack_pred (code, 1, 2, 3, 4);
ia64_fswap_pred (code, 1, 2, 3, 4);
ia64_fswap_nl_pred (code, 1, 2, 3, 4);
ia64_fswap_nr_pred (code, 1, 2, 3, 4);
ia64_fand_pred (code, 1, 2, 3, 4);
ia64_fandcm_pred (code, 1, 2, 3, 4);
ia64_for_pred (code, 1, 2, 3, 4);
ia64_fxor_pred (code, 1, 2, 3, 4);
ia64_fpmerge_s_pred (code, 1, 2, 3, 4);
ia64_fpmerge_ns_pred (code, 1, 2, 3, 4);
ia64_fpmerge_se_pred (code, 1, 2, 3, 4);
ia64_fcvt_fx_sf_pred ((code), 1, 2, 3, 0);
ia64_fcvt_fxu_sf_pred ((code), 1, 2, 3, 0);
ia64_fcvt_fx_trunc_sf_pred ((code), 1, 2, 3, 0);
ia64_fcvt_fxu_trunc_sf_pred ((code), 1, 2, 3, 0);
ia64_fpcvt_fx_sf_pred ((code), 1, 2, 3, 0);
ia64_fpcvt_fxu_sf_pred ((code), 1, 2, 3, 0);
ia64_fpcvt_fx_trunc_sf_pred ((code), 1, 2, 3, 0);
ia64_fpcvt_fxu_trunc_sf_pred ((code), 1, 2, 3, 0);
ia64_fcvt_xf_pred ((code), 1, 2, 3);
ia64_fsetc_sf_pred ((code), 1, 0x33, 0x33, 3);
ia64_fclrf_sf_pred ((code), 1, 3);
ia64_fchkf_sf_pred ((code), 1, -1, 3);
ia64_break_f_pred ((code), 1, 0x1234);
ia64_movl (code, 31, -123456);
ia64_codegen_close (code);
#if 0
/* disassembly */
{
guint8 *buf = code.buf;
int template;
guint64 dw1, dw2;
guint64 ins1, ins2, ins3;
ia64_break_i (code, 0x1234);
ia64_codegen_close (code);
dw1 = ((guint64*)buf) [0];
dw2 = ((guint64*)buf) [1];
template = ia64_bundle_template (buf);
ins1 = ia64_bundle_ins1 (buf);
ins2 = ia64_bundle_ins2 (buf);
ins3 = ia64_bundle_ins3 (buf);
code.buf = buf;
ia64_emit_bundle_template (&code, template, ins1, ins2, ins3);
g_assert (dw1 == ((guint64*)buf) [0]);
g_assert (dw2 == ((guint64*)buf) [1]);
}
#endif
mono_disassemble_code (buf, 40960, "code");
return 0;
}

File diff suppressed because it is too large Load Diff

@ -0,0 +1,6 @@
/
/Makefile
/Makefile.in
/*.o
/*.lo
/.deps

@ -0,0 +1,8 @@
AM_CPPFLAGS = $(GLIB_CFLAGS) -I$(top_srcdir)
noinst_LTLIBRARIES = libmonoarch-mips.la
libmonoarch_mips_la_SOURCES = mips-codegen.h
noinst_PROGRAMS = test

@ -0,0 +1,435 @@
#ifndef __MIPS_CODEGEN_H__
#define __MIPS_CODEGEN_H__
/*
* Copyright (c) 2004 Novell, Inc
* Author: Paolo Molaro (lupus@ximian.com)
*
*/
/* registers */
enum {
mips_zero,
mips_at, /* assembler temp */
mips_v0, /* return values */
mips_v1,
mips_a0, /* 4 - func arguments */
mips_a1,
mips_a2,
mips_a3,
#if _MIPS_SIM == _ABIO32
mips_t0, /* 8 temporaries */
mips_t1,
mips_t2,
mips_t3,
mips_t4,
mips_t5,
mips_t6,
mips_t7,
#elif _MIPS_SIM == _ABIN32
mips_a4, /* 4 more argument registers */
mips_a5,
mips_a6,
mips_a7,
mips_t0, /* 4 temporaries */
mips_t1,
mips_t2,
mips_t3,
#endif
mips_s0, /* 16 calle saved */
mips_s1,
mips_s2,
mips_s3,
mips_s4,
mips_s5,
mips_s6,
mips_s7,
mips_t8, /* 24 temps */
mips_t9, /* 25 temp / pic call-through register */
mips_k0, /* 26 kernel-reserved */
mips_k1,
mips_gp, /* 28 */
mips_sp, /* stack pointer */
mips_fp, /* frame pointer */
mips_ra /* return address */
};
/* we treat the register file as containing just doubles... */
enum {
mips_f0, /* return regs */
mips_f1,
mips_f2,
mips_f3,
mips_f4, /* temps */
mips_f5,
mips_f6,
mips_f7,
mips_f8,
mips_f9,
mips_f10,
mips_f11,
mips_f12, /* first arg */
mips_f13,
mips_f14, /* second arg */
mips_f15,
mips_f16, /* temps */
mips_f17,
mips_f18,
mips_f19,
mips_f20, /* callee saved */
mips_f21,
mips_f22,
mips_f23,
mips_f24,
mips_f25,
mips_f26,
mips_f27,
mips_f28,
mips_f29,
mips_f30,
mips_f31
};
/* prefetch hints */
enum {
MIPS_FOR_LOAD,
MIPS_FOR_STORE,
MIPS_FOR_LOAD_STREAMED = 4,
MIPS_FOR_STORE_STREAMED,
MIPS_FOR_LOAD_RETAINED,
MIPS_FOR_STORE_RETAINED
};
/* coprocessors */
enum {
MIPS_COP0,
MIPS_COP1,
MIPS_COP2,
MIPS_COP3
};
enum {
MIPS_FMT_SINGLE = 16,
MIPS_FMT_DOUBLE = 17,
MIPS_FMT_WORD = 20,
MIPS_FMT_LONG = 21,
MIPS_FMT3_SINGLE = 0,
MIPS_FMT3_DOUBLE = 1
};
/* fpu rounding mode */
enum {
MIPS_ROUND_TO_NEAREST,
MIPS_ROUND_TO_ZERO,
MIPS_ROUND_TO_POSINF,
MIPS_ROUND_TO_NEGINF,
MIPS_ROUND_MASK = 3
};
/* fpu enable/cause flags, cc */
enum {
MIPS_FPU_C_MASK = 1 << 23,
MIPS_INEXACT = 1,
MIPS_UNDERFLOW = 2,
MIPS_OVERFLOW = 4,
MIPS_DIVZERO = 8,
MIPS_INVALID = 16,
MIPS_NOTIMPL = 32,
MIPS_FPU_FLAGS_OFFSET = 2,
MIPS_FPU_ENABLES_OFFSET = 7,
MIPS_FPU_CAUSES_OFFSET = 12
};
/* fpu condition values - see manual entry for C.cond.fmt instructions */
enum {
MIPS_FPU_F,
MIPS_FPU_UN,
MIPS_FPU_EQ,
MIPS_FPU_UEQ,
MIPS_FPU_OLT,
MIPS_FPU_ULT,
MIPS_FPU_OLE,
MIPS_FPU_ULE,
MIPS_FPU_SF,
MIPS_FPU_NGLE,
MIPS_FPU_SEQ,
MIPS_FPU_NGL,
MIPS_FPU_LT,
MIPS_FPU_NGE,
MIPS_FPU_LE,
MIPS_FPU_NGT
};
#if SIZEOF_REGISTER == 4
#define MIPS_SW mips_sw
#define MIPS_LW mips_lw
#define MIPS_ADDU mips_addu
#define MIPS_ADDIU mips_addiu
#define MIPS_SWC1 mips_swc1
#define MIPS_LWC1 mips_lwc1
#define MIPS_MOVE mips_move
#elif SIZEOF_REGISTER == 8
#define MIPS_SW mips_sd
#define MIPS_LW mips_ld
#define MIPS_ADDU mips_daddu
#define MIPS_ADDIU mips_daddiu
#define MIPS_SWC1 mips_sdc1
#define MIPS_LWC1 mips_ldc1
#define MIPS_MOVE mips_dmove
#else
#error Unknown SIZEOF_REGISTER
#endif
#define mips_emit32(c,x) do { \
*((guint32 *) (void *)(c)) = x; \
(c) = (typeof(c))(((guint32 *)(void *)(c)) + 1); \
} while (0)
#define mips_format_i(code,op,rs,rt,imm) mips_emit32 ((code), (((op)<<26)|((rs)<<21)|((rt)<<16)|((imm)&0xffff)))
#define mips_format_j(code,op,imm) mips_emit32 ((code), (((op)<<26)|((imm)&0x03ffffff)))
#define mips_format_r(code,op,rs,rt,rd,sa,func) mips_emit32 ((code), (((op)<<26)|((rs)<<21)|((rt)<<16)|((rd)<<11)|((sa)<<6)|(func)))
#define mips_format_divmul(code,op,src1,src2,fun) mips_emit32 ((code), (((op)<<26)|((src1)<<21)|((src2)<<16)|(fun)))
#define mips_is_imm16(val) ((gint)(gshort)(gint)(val) == (gint)(val))
/* Load always using lui/addiu pair (for later patching) */
#define mips_load(c,D,v) do { \
if (((guint32)(v)) & (1 << 15)) { \
mips_lui ((c), (D), mips_zero, (((guint32)(v))>>16)+1); \
} \
else { \
mips_lui ((c), (D), mips_zero, (((guint32)(v))>>16)); \
} \
mips_addiu ((c), (D), (D), ((guint32)(v)) & 0xffff); \
} while (0)
/* load constant - no patch-up */
#define mips_load_const(c,D,v) do { \
if (!mips_is_imm16 ((v))) { \
if (((guint32)(v)) & (1 << 15)) { \
mips_lui ((c), (D), mips_zero, (((guint32)(v))>>16)+1); \
} \
else { \
mips_lui ((c), (D), mips_zero, (((guint32)(v))>>16)); \
} \
if (((guint32)(v)) & 0xffff) \
mips_addiu ((c), (D), (D), ((guint32)(v)) & 0xffff); \
} \
else \
mips_addiu ((c), (D), mips_zero, ((guint32)(v)) & 0xffff); \
} while (0)
/* arithmetric ops */
#define mips_add(c,dest,src1,src2) mips_format_r(c,0,src1,src2,dest,0,32)
#define mips_addi(c,dest,src1,imm) mips_format_i(c,8,src1,dest,imm)
#define mips_addu(c,dest,src1,src2) mips_format_r(c,0,src1,src2,dest,0,33)
#define mips_addiu(c,dest,src1,imm) mips_format_i(c,9,src1,dest,imm)
#define mips_dadd(c,dest,src1,src2) mips_format_r(c,0,src1,src2,dest,0,44)
#define mips_daddi(c,dest,src1,imm) mips_format_i(c,24,src1,dest,imm)
#define mips_daddu(c,dest,src1,src2) mips_format_r(c,0,src1,src2,dest,0,45)
#define mips_daddiu(c,dest,src1,imm) mips_format_i(c,25,src1,dest,imm)
#define mips_dsub(c,dest,src1,src2) mips_format_r(c,0,src1,src2,dest,0,46)
#define mips_dsubu(c,dest,src1,src2) mips_format_r(c,0,src1,src2,dest,0,47)
#define mips_mul(c,dest,src1,src2) mips_format_r(c,28,src1,src2,dest,0,2)
#define mips_sub(c,dest,src1,src2) mips_format_r(c,0,src1,src2,dest,0,34)
#define mips_subu(c,dest,src1,src2) mips_format_r(c,0,src1,src2,dest,0,35)
/* div and mul ops */
#define mips_ddiv(c,src1,src2) mips_format_divmul(c,0,src1,src2,30)
#define mips_ddivu(c,src1,src2) mips_format_divmul(c,0,src1,src2,31)
#define mips_div(c,src1,src2) mips_format_divmul(c,0,src1,src2,26)
#define mips_divu(c,src1,src2) mips_format_divmul(c,0,src1,src2,27)
#define mips_dmult(c,src1,src2) mips_format_divmul(c,0,src1,src2,28)
#define mips_dmultu(c,src1,src2) mips_format_divmul(c,0,src1,src2,29)
#define mips_mult(c,src1,src2) mips_format_divmul(c,0,src1,src2,24)
#define mips_multu(c,src1,src2) mips_format_divmul(c,0,src1,src2,25)
/* shift ops */
#define mips_dsll(c,dest,src1,imm) mips_format_r(c,0,0,src1,dest,imm,56)
#define mips_dsll32(c,dest,src1,imm) mips_format_r(c,0,0,src1,dest,imm,60)
#define mips_dsllv(c,dest,src1,src2) mips_format_r(c,0,src2,src1,dest,0,20)
#define mips_dsra(c,dest,src1,imm) mips_format_r(c,0,0,src1,dest,imm,59)
#define mips_dsra32(c,dest,src1,imm) mips_format_r(c,0,0,src1,dest,imm,63)
#define mips_dsrav(c,dest,src1,src2) mips_format_r(c,0,src2,src1,dest,0,23)
#define mips_dsrl(c,dest,src1,imm) mips_format_r(c,0,0,src1,dest,imm,58)
#define mips_dsrl32(c,dest,src1,imm) mips_format_r(c,0,0,src1,dest,imm,62)
#define mips_dsrlv(c,dest,src1,src2) mips_format_r(c,0,src2,src1,dest,0,22)
#define mips_sll(c,dest,src1,imm) mips_format_r(c,0,0,src1,dest,imm,0)
#define mips_sllv(c,dest,src1,src2) mips_format_r(c,0,src2,src1,dest,0,4)
#define mips_sra(c,dest,src1,imm) mips_format_r(c,0,0,src1,dest,imm,3)
#define mips_srav(c,dest,src1,src2) mips_format_r(c,0,src2,src1,dest,0,7)
#define mips_srl(c,dest,src1,imm) mips_format_r(c,0,0,src1,dest,imm,2)
#define mips_srlv(c,dest,src1,src2) mips_format_r(c,0,src2,src1,dest,0,6)
/* logical ops */
#define mips_and(c,dest,src1,src2) mips_format_r(c,0,src1,src2,dest,0,36)
#define mips_andi(c,dest,src1,imm) mips_format_i(c,12,src1,dest,imm)
#define mips_nor(c,dest,src1,src2) mips_format_r(c,0,src1,src2,dest,0,39)
#define mips_or(c,dest,src1,src2) mips_format_r(c,0,src1,src2,dest,0,37)
#define mips_ori(c,dest,src1,uimm) mips_format_i(c,13,src1,dest,uimm)
#define mips_xor(c,dest,src1,src2) mips_format_r(c,0,src1,src2,dest,0,38)
#define mips_xori(c,dest,src1,uimm) mips_format_i(c,14,src1,dest,uimm)
/* compares */
#define mips_slt(c,dest,src1,src2) mips_format_r(c,0,src1,src2,dest,0,42)
#define mips_slti(c,dest,src1,imm) mips_format_i(c,10,src1,dest,imm)
#define mips_sltiu(c,dest,src1,imm) mips_format_i(c,11,src1,dest,imm)
#define mips_sltu(c,dest,src1,src2) mips_format_r(c,0,src1,src2,dest,0,43)
/* missing traps: teq, teqi, tge, tgei, tgeiu, tgeu, tlt, tlti, tltiu, tltu, tne, tnei, */
/* conditional branches */
#define mips_beq(c,src1,src2,offset) mips_format_i(c,4,src1,src2,offset)
#define mips_beql(c,src1,src2,offset) mips_format_i(c,20,src1,src2,offset)
#define mips_bgez(c,src1,offset) mips_format_i(c,1,src1,1,offset)
#define mips_bgezal(c,src1,offset) mips_format_i(c,1,src1,17,offset)
#define mips_bgezall(c,src1,offset) mips_format_i(c,1,src1,19,offset)
#define mips_bgezl(c,src1,offset) mips_format_i(c,1,src1,3,offset)
#define mips_bgtz(c,src1,offset) mips_format_i(c,7,src1,0,offset)
#define mips_bgtzl(c,src1,offset) mips_format_i(c,23,src1,0,offset)
#define mips_blez(c,src1,offset) mips_format_i(c,6,src1,0,offset)
#define mips_blezl(c,src1,offset) mips_format_i(c,22,src1,0,offset)
#define mips_bltz(c,src1,offset) mips_format_i(c,1,src1,0,offset)
#define mips_bltzal(c,src1,offset) mips_format_i(c,1,src1,16,offset)
#define mips_bltzall(c,src1,offset) mips_format_i(c,1,src1,18,offset)
#define mips_bltzl(c,src1,offset) mips_format_i(c,1,src1,2,offset)
#define mips_bne(c,src1,src2,offset) mips_format_i(c,5,src1,src2,offset)
#define mips_bnel(c,src1,src2,offset) mips_format_i(c,21,src1,src2,offset)
/* uncond branches and calls */
#define mips_jump(c,target) mips_format_j(c,2,target)
#define mips_jumpl(c,target) mips_format_j(c,3,target)
#define mips_jalr(c,src1,retreg) mips_format_r(c,0,src1,0,retreg,0,9)
#define mips_jr(c,src1) mips_emit32(c,((src1)<<21)|8)
/* loads and stores */
#define mips_lb(c,dest,base,offset) mips_format_i(c,32,base,dest,offset)
#define mips_lbu(c,dest,base,offset) mips_format_i(c,36,base,dest,offset)
#define mips_ld(c,dest,base,offset) mips_format_i(c,55,base,dest,offset)
#define mips_ldl(c,dest,base,offset) mips_format_i(c,26,base,dest,offset)
#define mips_ldr(c,dest,base,offset) mips_format_i(c,27,base,dest,offset)
#define mips_lh(c,dest,base,offset) mips_format_i(c,33,base,dest,offset)
#define mips_lhu(c,dest,base,offset) mips_format_i(c,37,base,dest,offset)
#define mips_ll(c,dest,base,offset) mips_format_i(c,48,base,dest,offset)
#define mips_lld(c,dest,base,offset) mips_format_i(c,52,base,dest,offset)
#define mips_lui(c,dest,base,uimm) mips_format_i(c,15,base,dest,uimm)
#define mips_lw(c,dest,base,offset) mips_format_i(c,35,base,dest,offset)
#define mips_lwl(c,dest,base,offset) mips_format_i(c,34,base,dest,offset)
#define mips_lwr(c,dest,base,offset) mips_format_i(c,38,base,dest,offset)
#define mips_lwu(c,dest,base,offset) mips_format_i(c,39,base,dest,offset)
#define mips_sb(c,src,base,offset) mips_format_i(c,40,base,src,offset)
#define mips_sc(c,src,base,offset) mips_format_i(c,56,base,src,offset)
#define mips_scd(c,src,base,offset) mips_format_i(c,60,base,src,offset)
#define mips_sd(c,src,base,offset) mips_format_i(c,63,base,src,offset)
#define mips_sdl(c,src,base,offset) mips_format_i(c,44,base,src,offset)
#define mips_sdr(c,src,base,offset) mips_format_i(c,45,base,src,offset)
#define mips_sh(c,src,base,offset) mips_format_i(c,41,base,src,offset)
#define mips_sw(c,src,base,offset) mips_format_i(c,43,base,src,offset)
#define mips_swl(c,src,base,offset) mips_format_i(c,50,base,src,offset)
#define mips_swr(c,src,base,offset) mips_format_i(c,54,base,src,offset)
/* misc and coprocessor ops */
#define mips_move(c,dest,src) mips_addu(c,dest,src,mips_zero)
#define mips_dmove(c,dest,src) mips_daddu(c,dest,src,mips_zero)
#define mips_nop(c) mips_or(c,mips_at,mips_at,0)
#define mips_break(c,code) mips_emit32(c, ((code)<<6)|13)
#define mips_mfhi(c,dest) mips_format_r(c,0,0,0,dest,0,16)
#define mips_mflo(c,dest) mips_format_r(c,0,0,0,dest,0,18)
#define mips_mthi(c,src) mips_format_r(c,0,src,0,0,0,17)
#define mips_mtlo(c,src) mips_format_r(c,0,src,0,0,0,19)
#define mips_movn(c,dest,src,test) mips_format_r(c,0,src,test,dest,0,11)
#define mips_movz(c,dest,src,test) mips_format_r(c,0,src,test,dest,0,10)
#define mips_pref(c,hint,base,offset) mips_format_i(c,51,base,hint,offset)
#define mips_prefidx(c,hint,base,idx) mips_format_r(c,19,base,idx,hint,0,15)
#define mips_sync(c,stype) mips_emit32(c, ((stype)<<6)|15)
#define mips_syscall(c,code) mips_emit32(c, ((code)<<6)|12)
#define mips_cop(c,cop,fun) mips_emit32(c, ((16|(cop))<<26)|(fun))
#define mips_ldc(c,cop,dest,base,offset) mips_format_i(c,(52|(cop)),base,dest,offset)
#define mips_lwc(c,cop,dest,base,offset) mips_format_i(c,(48|(cop)),base,dest,offset)
#define mips_sdc(c,cop,src,base,offset) mips_format_i(c,(60|(cop)),base,src,offset)
#define mips_swc(c,cop,src,base,offset) mips_format_i(c,(56|(cop)),base,src,offset)
#define mips_cfc1(c,dest,src) mips_format_r(c,17,2,dest,src,0,0)
#define mips_ctc1(c,dest,src) mips_format_r(c,17,6,dest,src,0,0)
/* fpu ops */
#define mips_fabss(c,dest,src) mips_format_r(c,17,MIPS_FMT_SINGLE,0,src,dest,5)
#define mips_fabsd(c,dest,src) mips_format_r(c,17,MIPS_FMT_DOUBLE,0,src,dest,5)
#define mips_fadds(c,dest,src1,src2) mips_format_r(c,17,MIPS_FMT_SINGLE,src2,src1,dest,0)
#define mips_faddd(c,dest,src1,src2) mips_format_r(c,17,MIPS_FMT_DOUBLE,src2,src1,dest,0)
#define mips_fdivs(c,dest,src1,src2) mips_format_r(c,17,MIPS_FMT_SINGLE,src2,src1,dest,3)
#define mips_fdivd(c,dest,src1,src2) mips_format_r(c,17,MIPS_FMT_DOUBLE,src2,src1,dest,3)
#define mips_fmuls(c,dest,src1,src2) mips_format_r(c,17,MIPS_FMT_SINGLE,src2,src1,dest,2)
#define mips_fmuld(c,dest,src1,src2) mips_format_r(c,17,MIPS_FMT_DOUBLE,src2,src1,dest,2)
#define mips_fnegs(c,dest,src) mips_format_r(c,17,MIPS_FMT_SINGLE,0,src,dest,7)
#define mips_fnegd(c,dest,src) mips_format_r(c,17,MIPS_FMT_DOUBLE,0,src,dest,7)
#define mips_fsqrts(c,dest,src) mips_format_r(c,17,MIPS_FMT_SINGLE,0,src,dest,4)
#define mips_fsqrtd(c,dest,src) mips_format_r(c,17,MIPS_FMT_DOUBLE,0,src,dest,4)
#define mips_fsubs(c,dest,src1,src2) mips_format_r(c,17,MIPS_FMT_SINGLE,src2,src1,dest,1)
#define mips_fsubd(c,dest,src1,src2) mips_format_r(c,17,MIPS_FMT_DOUBLE,src2,src1,dest,1)
#define mips_madds(c,dest,src1,src2,srcadd) mips_format_r(c,19,srcadd,src2,src1,dest,32|MIPS_FMT_SINGLE)
#define mips_maddd(c,dest,src1,src2,srcadd) mips_format_r(c,19,srcadd,src2,src1,dest,32|MIPS_FMT_DOUBLE)
#define mips_nmadds(c,dest,src1,src2,srcadd) mips_format_r(c,19,srcadd,src2,src1,dest,48|MIPS_FMT_SINGLE)
#define mips_nmaddd(c,dest,src1,src2,srcadd) mips_format_r(c,19,srcadd,src2,src1,dest,48|MIPS_FMT_DOUBLE)
#define mips_msubs(c,dest,src1,src2,srcsub) mips_format_r(c,19,srcsub,src2,src1,dest,40|MIPS_FMT_SINGLE)
#define mips_msubd(c,dest,src1,src2,srcsub) mips_format_r(c,19,srcsub,src2,src1,dest,40|MIPS_FMT_DOUBLE)
#define mips_nmsubs(c,dest,src1,src2,srcsub) mips_format_r(c,19,srcsub,src2,src1,dest,56|MIPS_FMT_SINGLE)
#define mips_nmsubd(c,dest,src1,src2,srcsub) mips_format_r(c,19,srcsub,src2,src1,dest,56|MIPS_FMT_DOUBLE)
/* fp compare and branch */
#define mips_fcmps(c,cond,src1,src2) mips_format_r(c,17,MIPS_FMT_SINGLE,src2,src1,0,(3<<4)|(cond))
#define mips_fcmpd(c,cond,src1,src2) mips_format_r(c,17,MIPS_FMT_DOUBLE,src2,src1,0,(3<<4)|(cond))
#define mips_fbfalse(c,offset) mips_format_i(c,17,8,0,offset)
#define mips_fbfalsel(c,offset) mips_format_i(c,17,8,2,offset)
#define mips_fbtrue(c,offset) mips_format_i(c,17,8,1,offset)
#define mips_fbtruel(c,offset) mips_format_i(c,17,8,3,offset)
/* fp convert */
#define mips_ceills(c,dest,src) mips_format_r(c,17,MIPS_FMT_SINGLE,0,src,dest,10)
#define mips_ceilld(c,dest,src) mips_format_r(c,17,MIPS_FMT_DOUBLE,0,src,dest,10)
#define mips_ceilws(c,dest,src) mips_format_r(c,17,MIPS_FMT_SINGLE,0,src,dest,14)
#define mips_ceilwd(c,dest,src) mips_format_r(c,17,MIPS_FMT_DOUBLE,0,src,dest,14)
#define mips_cvtds(c,dest,src) mips_format_r(c,17,MIPS_FMT_SINGLE,0,src,dest,33)
#define mips_cvtdw(c,dest,src) mips_format_r(c,17,MIPS_FMT_WORD,0,src,dest,33)
#define mips_cvtdl(c,dest,src) mips_format_r(c,17,MIPS_FMT_LONG,0,src,dest,33)
#define mips_cvtls(c,dest,src) mips_format_r(c,17,MIPS_FMT_SINGLE,0,src,dest,37)
#define mips_cvtld(c,dest,src) mips_format_r(c,17,MIPS_FMT_DOUBLE,0,src,dest,37)
#define mips_cvtsd(c,dest,src) mips_format_r(c,17,MIPS_FMT_DOUBLE,0,src,dest,32)
#define mips_cvtsw(c,dest,src) mips_format_r(c,17,MIPS_FMT_WORD,0,src,dest,32)
#define mips_cvtsl(c,dest,src) mips_format_r(c,17,MIPS_FMT_LONG,0,src,dest,32)
#define mips_cvtws(c,dest,src) mips_format_r(c,17,MIPS_FMT_SINGLE,0,src,dest,36)
#define mips_cvtwd(c,dest,src) mips_format_r(c,17,MIPS_FMT_DOUBLE,0,src,dest,36)
#define mips_floorls(c,dest,src) mips_format_r(c,17,MIPS_FMT_SINGLE,0,src,dest,11)
#define mips_floorld(c,dest,src) mips_format_r(c,17,MIPS_FMT_DOUBLE,0,src,dest,11)
#define mips_floorws(c,dest,src) mips_format_r(c,17,MIPS_FMT_SINGLE,0,src,dest,15)
#define mips_floorwd(c,dest,src) mips_format_r(c,17,MIPS_FMT_DOUBLE,0,src,dest,15)
#define mips_roundls(c,dest,src) mips_format_r(c,17,MIPS_FMT_SINGLE,0,src,dest,8)
#define mips_roundld(c,dest,src) mips_format_r(c,17,MIPS_FMT_DOUBLE,0,src,dest,8)
#define mips_roundws(c,dest,src) mips_format_r(c,17,MIPS_FMT_SINGLE,0,src,dest,12)
#define mips_roundwd(c,dest,src) mips_format_r(c,17,MIPS_FMT_DOUBLE,0,src,dest,12)
#define mips_truncls(c,dest,src) mips_format_r(c,17,MIPS_FMT_SINGLE,0,src,dest,9)
#define mips_truncld(c,dest,src) mips_format_r(c,17,MIPS_FMT_DOUBLE,0,src,dest,9)
#define mips_truncws(c,dest,src) mips_format_r(c,17,MIPS_FMT_SINGLE,0,src,dest,13)
#define mips_truncwd(c,dest,src) mips_format_r(c,17,MIPS_FMT_DOUBLE,0,src,dest,13)
/* fp moves, loads */
#define mips_fmovs(c,dest,src) mips_format_r(c,17,MIPS_FMT_SINGLE,0,src,dest,6)
#define mips_fmovd(c,dest,src) mips_format_r(c,17,MIPS_FMT_DOUBLE,0,src,dest,6)
#define mips_mfc1(c,dest,src) mips_format_r(c,17,0,dest,src,0,0)
#define mips_mtc1(c,dest,src) mips_format_r(c,17,4,src,dest,0,0)
#define mips_dmfc1(c,dest,src) mips_format_r(c,17,1,0,dest,src,0)
#define mips_dmtc1(c,dest,src) mips_format_r(c,17,1,0,src,dest,0)
#define mips_ldc1(c,dest,base,offset) mips_ldc(c,1,dest,base,offset)
#define mips_ldxc1(c,dest,base,idx) mips_format_r(c,19,base,idx,0,dest,1)
#define mips_lwc1(c,dest,base,offset) mips_lwc(c,1,dest,base,offset)
#define mips_lwxc1(c,dest,base,idx) mips_format_r(c,19,base,idx,0,dest,0)
#define mips_sdc1(c,src,base,offset) mips_sdc(c,1,src,base,offset)
#define mips_sdxc1(c,src,base,idx) mips_format_r(c,19,base,idx,src,0,9)
#define mips_swc1(c,src,base,offset) mips_swc(c,1,src,base,offset)
#define mips_swxc1(c,src,base,idx) mips_format_r(c,19,base,idx,src,0,8)
#endif /* __MIPS_CODEGEN_H__ */

@ -0,0 +1,159 @@
#include "config.h"
#include <stdlib.h>
#include <string.h>
#define NO_MIPS_JIT_DEBUG
#include "mips-codegen.h"
#include "mono/metadata/class.h"
/* don't run the resulting program, it will destroy your computer,
* just objdump -d it to inspect we generated the correct assembler.
*/
int main (int argc, char *argv[]) {
guint32 *code, * p;
code = p = (guint32 *) malloc (sizeof (guint32) * 1024);
mips_add (p, 3, 4, 5);
mips_addi (p, 3, 4, 5);
mips_addu (p, 3, 4, 5);
mips_addiu (p, 3, 4, 5);
mips_sub (p, 3, 4, 5);
mips_subu (p, 3, 4, 5);
mips_dadd (p, 3, 4, 5);
mips_daddi (p, 3, 4, 5);
mips_daddu (p, 3, 4, 5);
mips_daddiu (p, 3, 4, 5);
mips_dsub (p, 3, 4, 5);
mips_dsubu (p, 3, 4, 5);
mips_mult (p, 6, 7);
mips_multu (p, 6, 7);
mips_div (p, 6, 7);
mips_divu (p, 6, 7);
mips_dmult (p, 6, 7);
mips_dmultu (p, 6, 7);
mips_ddiv (p, 6, 7);
mips_ddivu (p, 6, 7);
mips_sll (p, 3, 4, 5);
mips_sllv (p, 3, 4, 5);
mips_sra (p, 3, 4, 5);
mips_srav (p, 3, 4, 5);
mips_srl (p, 3, 4, 5);
mips_srlv (p, 3, 4, 5);
mips_dsll (p, 3, 4, 5);
mips_dsll32 (p, 3, 4, 5);
mips_dsllv (p, 3, 4, 5);
mips_dsra (p, 3, 4, 5);
mips_dsra32 (p, 3, 4, 5);
mips_dsrav (p, 3, 4, 5);
mips_dsrl (p, 3, 4, 5);
mips_dsrl32 (p, 3, 4, 5);
mips_dsrlv (p, 3, 4, 5);
mips_and (p, 8, 9, 10);
mips_andi (p, 8, 9, 10);
mips_nor (p, 8, 9, 10);
mips_or (p, 8, 9, 10);
mips_ori (p, 8, 9, 10);
mips_xor (p, 8, 9, 10);
mips_xori (p, 8, 9, 10);
mips_slt (p, 8, 9, 10);
mips_slti (p, 8, 9, 10);
mips_sltu (p, 8, 9, 10);
mips_sltiu (p, 8, 9, 10);
mips_beq (p, 8, 9, 0xff1f);
mips_beql (p, 8, 9, 0xff1f);
mips_bne (p, 8, 9, 0xff1f);
mips_bnel (p, 8, 9, 0xff1f);
mips_bgez (p, 11, 0xff1f);
mips_bgezal (p, 11, 0xff1f);
mips_bgezall (p, 11, 0xff1f);
mips_bgezl (p, 11, 0xff1f);
mips_bgtz (p, 11, 0xff1f);
mips_bgtzl (p, 11, 0xff1f);
mips_blez (p, 11, 0xff1f);
mips_blezl (p, 11, 0xff1f);
mips_bltz (p, 11, 0xff1f);
mips_bltzal (p, 11, 0xff1f);
mips_bltzall (p, 11, 0xff1f);
mips_bltzl (p, 11, 0xff1f);
mips_jump (p, 0xff1f);
mips_jumpl (p, 0xff1f);
mips_jalr (p, 12, mips_ra);
mips_jr (p, 12);
mips_lb (p, 13, 14, 128);
mips_lbu (p, 13, 14, 128);
mips_ld (p, 13, 14, 128);
mips_ldl (p, 13, 14, 128);
mips_ldr (p, 13, 14, 128);
mips_lh (p, 13, 14, 128);
mips_lhu (p, 13, 14, 128);
mips_ll (p, 13, 14, 128);
mips_lld (p, 13, 14, 128);
mips_lui (p, 13, 14, 128);
mips_lw (p, 13, 14, 128);
mips_lwl (p, 13, 14, 128);
mips_lwr (p, 13, 14, 128);
mips_lwu (p, 13, 14, 128);
mips_sb (p, 13, 14, 128);
mips_sc (p, 13, 14, 128);
mips_scd (p, 13, 14, 128);
mips_sd (p, 13, 14, 128);
mips_sdl (p, 13, 14, 128);
mips_sdr (p, 13, 14, 128);
mips_sh (p, 13, 14, 128);
mips_sw (p, 13, 14, 128);
mips_swl (p, 13, 14, 128);
mips_swr (p, 13, 14, 128);
mips_move (p, 15, 16);
mips_nop (p);
mips_break (p, 0);
mips_sync (p, 0);
mips_mfhi (p, 17);
mips_mflo (p, 17);
mips_mthi (p, 17);
mips_mtlo (p, 17);
mips_fabsd (p, 16, 18);
mips_fnegd (p, 16, 18);
mips_fsqrtd (p, 16, 18);
mips_faddd (p, 16, 18, 20);
mips_fdivd (p, 16, 18, 20);
mips_fmuld (p, 16, 18, 20);
mips_fsubd (p, 16, 18, 20);
mips_fcmpd (p, MIPS_FPU_EQ, 18, 20);
mips_fbfalse (p, 0xff1f);
mips_fbfalsel (p, 0xff1f);
mips_fbtrue (p, 0xff1f);
mips_fbtruel (p, 0xff1f);
mips_ceilwd (p, 20, 22);
mips_ceilld (p, 20, 22);
mips_floorwd (p, 20, 22);
mips_floorld (p, 20, 22);
mips_roundwd (p, 20, 22);
mips_roundld (p, 20, 22);
mips_truncwd (p, 20, 22);
mips_truncld (p, 20, 22);
mips_cvtdw (p, 20, 22);
mips_cvtds (p, 20, 22);
mips_cvtdl (p, 20, 22);
mips_cvtld (p, 20, 22);
mips_cvtsd (p, 20, 22);
mips_cvtwd (p, 20, 22);
mips_fmovd (p, 20, 22);
printf ("size: %d\n", p - code);
return 0;
}

@ -0,0 +1,7 @@
/Makefile
/Makefile.in
/.libs
/.deps
/*.la
/*.lo
/test

@ -0,0 +1 @@
EXTRA_DIST = ppc-codegen.h

@ -0,0 +1,953 @@
/*
Authors:
Radek Doulik
Christopher Taylor <ct_AT_clemson_DOT_edu>
Andreas Faerber <andreas.faerber@web.de>
Copyright (C) 2001 Radek Doulik
Copyright (C) 2007-2008 Andreas Faerber
for testing do the following: ./test | as -o test.o
*/
#ifndef __MONO_PPC_CODEGEN_H__
#define __MONO_PPC_CODEGEN_H__
#include <glib.h>
#include <assert.h>
typedef enum {
ppc_r0 = 0,
ppc_r1,
ppc_sp = ppc_r1,
ppc_r2,
ppc_r3,
ppc_r4,
ppc_r5,
ppc_r6,
ppc_r7,
ppc_r8,
ppc_r9,
ppc_r10,
ppc_r11,
ppc_r12,
ppc_r13,
ppc_r14,
ppc_r15,
ppc_r16,
ppc_r17,
ppc_r18,
ppc_r19,
ppc_r20,
ppc_r21,
ppc_r22,
ppc_r23,
ppc_r24,
ppc_r25,
ppc_r26,
ppc_r27,
ppc_r28,
ppc_r29,
ppc_r30,
ppc_r31
} PPCIntRegister;
typedef enum {
ppc_f0 = 0,
ppc_f1,
ppc_f2,
ppc_f3,
ppc_f4,
ppc_f5,
ppc_f6,
ppc_f7,
ppc_f8,
ppc_f9,
ppc_f10,
ppc_f11,
ppc_f12,
ppc_f13,
ppc_f14,
ppc_f15,
ppc_f16,
ppc_f17,
ppc_f18,
ppc_f19,
ppc_f20,
ppc_f21,
ppc_f22,
ppc_f23,
ppc_f24,
ppc_f25,
ppc_f26,
ppc_f27,
ppc_f28,
ppc_f29,
ppc_f30,
ppc_f31
} PPCFloatRegister;
typedef enum {
ppc_lr = 256,
ppc_ctr = 256 + 32,
ppc_xer = 32
} PPCSpecialRegister;
enum {
/* B0 operand for branches */
PPC_BR_DEC_CTR_NONZERO_FALSE = 0,
PPC_BR_LIKELY = 1, /* can be or'ed with the conditional variants */
PPC_BR_DEC_CTR_ZERO_FALSE = 2,
PPC_BR_FALSE = 4,
PPC_BR_DEC_CTR_NONZERO_TRUE = 8,
PPC_BR_DEC_CTR_ZERO_TRUE = 10,
PPC_BR_TRUE = 12,
PPC_BR_DEC_CTR_NONZERO = 16,
PPC_BR_DEC_CTR_ZERO = 18,
PPC_BR_ALWAYS = 20,
/* B1 operand for branches */
PPC_BR_LT = 0,
PPC_BR_GT = 1,
PPC_BR_EQ = 2,
PPC_BR_SO = 3
};
enum {
PPC_TRAP_LT = 1,
PPC_TRAP_GT = 2,
PPC_TRAP_EQ = 4,
PPC_TRAP_LT_UN = 8,
PPC_TRAP_GT_UN = 16,
PPC_TRAP_LE = 1 + PPC_TRAP_EQ,
PPC_TRAP_GE = 2 + PPC_TRAP_EQ,
PPC_TRAP_LE_UN = 8 + PPC_TRAP_EQ,
PPC_TRAP_GE_UN = 16 + PPC_TRAP_EQ
};
#define ppc_emit32(c,x) do { *((guint32 *) (c)) = GUINT32_TO_BE (x); (c) = (gpointer)((guint8 *)(c) + sizeof (guint32));} while (0)
#define ppc_is_imm16(val) ((((val)>> 15) == 0) || (((val)>> 15) == -1))
#define ppc_is_uimm16(val) ((glong)(val) >= 0L && (glong)(val) <= 65535L)
#define ppc_ha(val) (((val >> 16) + ((val & 0x8000) ? 1 : 0)) & 0xffff)
#define ppc_load32(c,D,v) G_STMT_START { \
ppc_lis ((c), (D), (guint32)(v) >> 16); \
ppc_ori ((c), (D), (D), (guint32)(v) & 0xffff); \
} G_STMT_END
/* Macros to load/store pointer sized quantities */
#if defined(__mono_ppc64__) && !defined(__mono_ilp32__)
#define ppc_ldptr(c,D,d,A) ppc_ld ((c), (D), (d), (A))
#define ppc_ldptr_update(c,D,d,A) ppc_ldu ((c), (D), (d), (A))
#define ppc_ldptr_indexed(c,D,A,B) ppc_ldx ((c), (D), (A), (B))
#define ppc_ldptr_update_indexed(c,D,A,B) ppc_ldux ((c), (D), (A), (B))
#define ppc_stptr(c,S,d,A) ppc_std ((c), (S), (d), (A))
#define ppc_stptr_update(c,S,d,A) ppc_stdu ((c), (S), (d), (A))
#define ppc_stptr_indexed(c,S,A,B) ppc_stdx ((c), (S), (A), (B))
#define ppc_stptr_update_indexed(c,S,A,B) ppc_stdux ((c), (S), (A), (B))
#else
/* Same as ppc32 */
#define ppc_ldptr(c,D,d,A) ppc_lwz ((c), (D), (d), (A))
#define ppc_ldptr_update(c,D,d,A) ppc_lwzu ((c), (D), (d), (A))
#define ppc_ldptr_indexed(c,D,A,B) ppc_lwzx ((c), (D), (A), (B))
#define ppc_ldptr_update_indexed(c,D,A,B) ppc_lwzux ((c), (D), (A), (B))
#define ppc_stptr(c,S,d,A) ppc_stw ((c), (S), (d), (A))
#define ppc_stptr_update(c,S,d,A) ppc_stwu ((c), (S), (d), (A))
#define ppc_stptr_indexed(c,S,A,B) ppc_stwx ((c), (S), (A), (B))
#define ppc_stptr_update_indexed(c,S,A,B) ppc_stwux ((c), (S), (A), (B))
#endif
/* Macros to load pointer sized immediates */
#define ppc_load_ptr(c,D,v) ppc_load ((c),(D),(gsize)(v))
#define ppc_load_ptr_sequence(c,D,v) ppc_load_sequence ((c),(D),(gsize)(v))
/* Macros to load/store regsize quantities */
#ifdef __mono_ppc64__
#define ppc_ldr(c,D,d,A) ppc_ld ((c), (D), (d), (A))
#define ppc_ldr_indexed(c,D,A,B) ppc_ldx ((c), (D), (A), (B))
#define ppc_str(c,S,d,A) ppc_std ((c), (S), (d), (A))
#define ppc_str_update(c,S,d,A) ppc_stdu ((c), (S), (d), (A))
#define ppc_str_indexed(c,S,A,B) ppc_stdx ((c), (S), (A), (B))
#define ppc_str_update_indexed(c,S,A,B) ppc_stdux ((c), (S), (A), (B))
#else
#define ppc_ldr(c,D,d,A) ppc_lwz ((c), (D), (d), (A))
#define ppc_ldr_indexed(c,D,A,B) ppc_lwzx ((c), (D), (A), (B))
#define ppc_str(c,S,d,A) ppc_stw ((c), (S), (d), (A))
#define ppc_str_update(c,S,d,A) ppc_stwu ((c), (S), (d), (A))
#define ppc_str_indexed(c,S,A,B) ppc_stwx ((c), (S), (A), (B))
#define ppc_str_update_indexed(c,S,A,B) ppc_stwux ((c), (S), (A), (B))
#endif
#define ppc_str_multiple(c,S,d,A) ppc_store_multiple_regs((c),(S),(d),(A))
#define ppc_ldr_multiple(c,D,d,A) ppc_load_multiple_regs((c),(D),(d),(A))
/* PPC32 macros */
#ifndef __mono_ppc64__
#define ppc_load_sequence(c,D,v) ppc_load32 ((c), (D), (guint32)(v))
#define PPC_LOAD_SEQUENCE_LENGTH 8
#define ppc_load(c,D,v) G_STMT_START { \
if (ppc_is_imm16 ((guint32)(v))) { \
ppc_li ((c), (D), (guint16)(guint32)(v)); \
} else { \
ppc_load32 ((c), (D), (guint32)(v)); \
} \
} G_STMT_END
#define ppc_load_func(c,D,V) ppc_load_sequence ((c), (D), (V))
#define ppc_load_multiple_regs(c,D,d,A) ppc_lmw ((c), (D), (d), (A))
#define ppc_store_multiple_regs(c,S,d,A) ppc_stmw ((c), (S), (d), (A))
#define ppc_compare(c,cfrD,A,B) ppc_cmp((c), (cfrD), 0, (A), (B))
#define ppc_compare_reg_imm(c,cfrD,A,B) ppc_cmpi((c), (cfrD), 0, (A), (B))
#define ppc_compare_log(c,cfrD,A,B) ppc_cmpl((c), (cfrD), 0, (A), (B))
#define ppc_shift_left(c,A,S,B) ppc_slw((c), (S), (A), (B))
#define ppc_shift_left_imm(c,A,S,n) ppc_slwi((c), (A), (S), (n))
#define ppc_shift_right_imm(c,A,S,B) ppc_srwi((c), (A), (S), (B))
#define ppc_shift_right_arith_imm(c,A,S,B) ppc_srawi((c), (A), (S), (B))
#define ppc_multiply(c,D,A,B) ppc_mullw((c), (D), (A), (B))
#define ppc_clear_right_imm(c,A,S,n) ppc_clrrwi((c), (A), (S), (n))
#endif
#define ppc_opcode(c) ((c) >> 26)
#define ppc_split_5_1_1(x) (((x) >> 5) & 0x1)
#define ppc_split_5_1_5(x) ((x) & 0x1F)
#define ppc_split_5_1(x) ((ppc_split_5_1_5(x) << 1) | ppc_split_5_1_1(x))
#define ppc_break(c) ppc_tw((c),31,0,0)
#define ppc_addi(c,D,A,i) ppc_emit32 (c, (14 << 26) | ((D) << 21) | ((A) << 16) | (guint16)(i))
#define ppc_addis(c,D,A,i) ppc_emit32 (c, (15 << 26) | ((D) << 21) | ((A) << 16) | (guint16)(i))
#define ppc_li(c,D,v) ppc_addi (c, D, 0, (guint16)(v))
#define ppc_lis(c,D,v) ppc_addis (c, D, 0, (guint16)(v))
#define ppc_lwz(c,D,d,A) ppc_emit32 (c, (32 << 26) | ((D) << 21) | ((A) << 16) | (guint16)(d))
#define ppc_lhz(c,D,d,A) ppc_emit32 (c, (40 << 26) | ((D) << 21) | ((A) << 16) | (guint16)(d))
#define ppc_lbz(c,D,d,A) ppc_emit32 (c, (34 << 26) | ((D) << 21) | ((A) << 16) | (guint16)(d))
#define ppc_stw(c,S,d,A) ppc_emit32 (c, (36 << 26) | ((S) << 21) | ((A) << 16) | (guint16)(d))
#define ppc_sth(c,S,d,A) ppc_emit32 (c, (44 << 26) | ((S) << 21) | ((A) << 16) | (guint16)(d))
#define ppc_stb(c,S,d,A) ppc_emit32 (c, (38 << 26) | ((S) << 21) | ((A) << 16) | (guint16)(d))
#define ppc_stwu(c,s,d,A) ppc_emit32 (c, (37 << 26) | ((s) << 21) | ((A) << 16) | (guint16)(d))
#define ppc_or(c,a,s,b) ppc_emit32 (c, (31 << 26) | ((s) << 21) | ((a) << 16) | ((b) << 11) | 888)
#define ppc_mr(c,a,s) ppc_or (c, a, s, s)
#define ppc_ori(c,S,A,ui) ppc_emit32 (c, (24 << 26) | ((S) << 21) | ((A) << 16) | (guint16)(ui))
#define ppc_nop(c) ppc_ori (c, 0, 0, 0)
#define ppc_mfspr(c,D,spr) ppc_emit32 (c, (31 << 26) | ((D) << 21) | ((spr) << 11) | (339 << 1))
#define ppc_mflr(c,D) ppc_mfspr (c, D, ppc_lr)
#define ppc_mtspr(c,spr,S) ppc_emit32 (c, (31 << 26) | ((S) << 21) | ((spr) << 11) | (467 << 1))
#define ppc_mtlr(c,S) ppc_mtspr (c, ppc_lr, S)
#define ppc_mtctr(c,S) ppc_mtspr (c, ppc_ctr, S)
#define ppc_mtxer(c,S) ppc_mtspr (c, ppc_xer, S)
#define ppc_b(c,li) ppc_emit32 (c, (18 << 26) | ((li) << 2))
#define ppc_bl(c,li) ppc_emit32 (c, (18 << 26) | ((li) << 2) | 1)
#define ppc_ba(c,li) ppc_emit32 (c, (18 << 26) | ((li) << 2) | 2)
#define ppc_bla(c,li) ppc_emit32 (c, (18 << 26) | ((li) << 2) | 3)
#define ppc_blrl(c) ppc_emit32 (c, 0x4e800021)
#define ppc_blr(c) ppc_emit32 (c, 0x4e800020)
#define ppc_lfs(c,D,d,A) ppc_emit32 (c, (48 << 26) | ((D) << 21) | ((A) << 16) | (guint16)(d))
#define ppc_lfd(c,D,d,A) ppc_emit32 (c, (50 << 26) | ((D) << 21) | ((A) << 16) | (guint16)(d))
#define ppc_stfs(c,S,d,a) ppc_emit32 (c, (52 << 26) | ((S) << 21) | ((a) << 16) | (guint16)(d))
#define ppc_stfd(c,S,d,a) ppc_emit32 (c, (54 << 26) | ((S) << 21) | ((a) << 16) | (guint16)(d))
/***********************************************************************
The macros below were tapped out by Christopher Taylor <ct_AT_clemson_DOT_edu>
from 18 November 2002 to 19 December 2002.
Special thanks to rodo, lupus, dietmar, miguel, and duncan for patience,
and motivation.
The macros found in this file are based on the assembler instructions found
in Motorola and Digital DNA's:
"Programming Enviornments Manual For 32-bit Implementations of the PowerPC Architecture"
MPCFPE32B/AD
12/2001
REV2
see pages 326 - 524 for detailed information regarding each instruction
Also see the "Ximian Copyright Agreement, 2002" for more information regarding
my and Ximian's copyright to this code. ;)
*************************************************************************/
#define ppc_addx(c,D,A,B,OE,Rc) ppc_emit32(c, (31 << 26) | ((D) << 21) | ((A) << 16) | ((B) << 11) | (OE << 10) | (266 << 1) | Rc)
#define ppc_add(c,D,A,B) ppc_addx(c,D,A,B,0,0)
#define ppc_addd(c,D,A,B) ppc_addx(c,D,A,B,0,1)
#define ppc_addo(c,D,A,B) ppc_addx(c,D,A,B,1,0)
#define ppc_addod(c,D,A,B) ppc_addx(c,D,A,B,1,1)
#define ppc_addcx(c,D,A,B,OE,Rc) ppc_emit32(c, (31 << 26) | ((D) << 21) | ((A) << 16) | ((B) << 11) | (OE << 10) | (10 << 1) | Rc)
#define ppc_addc(c,D,A,B) ppc_addcx(c,D,A,B,0,0)
#define ppc_addcd(c,D,A,B) ppc_addcx(c,D,A,B,0,1)
#define ppc_addco(c,D,A,B) ppc_addcx(c,D,A,B,1,0)
#define ppc_addcod(c,D,A,B) ppc_addcx(c,D,A,B,1,1)
#define ppc_addex(c,D,A,B,OE,Rc) ppc_emit32(c, (31 << 26) | ((D) << 21) | ((A) << 16) | ((B) << 11) | (OE << 10) | (138 << 1) | Rc)
#define ppc_adde(c,D,A,B) ppc_addex(c,D,A,B,0,0)
#define ppc_added(c,D,A,B) ppc_addex(c,D,A,B,0,1)
#define ppc_addeo(c,D,A,B) ppc_addex(c,D,A,B,1,0)
#define ppc_addeod(c,D,A,B) ppc_addex(c,D,A,B,1,1)
#define ppc_addic(c,D,A,i) ppc_emit32(c, (12 << 26) | ((D) << 21) | ((A) << 16) | (guint16)(i))
#define ppc_addicd(c,D,A,i) ppc_emit32(c, (13 << 26) | ((D) << 21) | ((A) << 16) | (guint16)(i))
#define ppc_addmex(c,D,A,OE,RC) ppc_emit32(c, (31 << 26) | ((D) << 21 ) | ((A) << 16) | (0 << 11) | ((OE) << 10) | (234 << 1) | RC)
#define ppc_addme(c,D,A) ppc_addmex(c,D,A,0,0)
#define ppc_addmed(c,D,A) ppc_addmex(c,D,A,0,1)
#define ppc_addmeo(c,D,A) ppc_addmex(c,D,A,1,0)
#define ppc_addmeod(c,D,A) ppc_addmex(c,D,A,1,1)
#define ppc_addzex(c,D,A,OE,RC) ppc_emit32(c, (31 << 26) | ((D) << 21 ) | ((A) << 16) | (0 << 11) | ((OE) << 10) | (202 << 1) | RC)
#define ppc_addze(c,D,A) ppc_addzex(c,D,A,0,0)
#define ppc_addzed(c,D,A) ppc_addzex(c,D,A,0,1)
#define ppc_addzeo(c,D,A) ppc_addzex(c,D,A,1,0)
#define ppc_addzeod(c,D,A) ppc_addzex(c,D,A,1,1)
#define ppc_andx(c,S,A,B,RC) ppc_emit32(c, (31 << 26) | ((S) << 21 ) | ((A) << 16) | ((B) << 11) | (28 << 1) | RC)
#define ppc_and(c,S,A,B) ppc_andx(c,S,A,B,0)
#define ppc_andd(c,S,A,B) ppc_andx(c,S,A,B,1)
#define ppc_andcx(c,S,A,B,RC) ppc_emit32(c, (31 << 26) | ((S) << 21 ) | ((A) << 16) | ((B) << 11) | (60 << 1) | RC)
#define ppc_andc(c,S,A,B) ppc_andcx(c,S,A,B,0)
#define ppc_andcd(c,S,A,B) ppc_andcx(c,S,A,B,1)
#define ppc_andid(c,S,A,ui) ppc_emit32(c, (28 << 26) | ((S) << 21 ) | ((A) << 16) | ((guint16)(ui)))
#define ppc_andisd(c,S,A,ui) ppc_emit32(c, (29 << 26) | ((S) << 21 ) | ((A) << 16) | ((guint16)(ui)))
#define ppc_bcx(c,BO,BI,BD,AA,LK) ppc_emit32(c, (16 << 26) | (BO << 21 )| (BI << 16) | (BD << 2) | ((AA) << 1) | LK)
#define ppc_bc(c,BO,BI,BD) ppc_bcx(c,BO,BI,BD,0,0)
#define ppc_bca(c,BO,BI,BD) ppc_bcx(c,BO,BI,BD,1,0)
#define ppc_bcl(c,BO,BI,BD) ppc_bcx(c,BO,BI,BD,0,1)
#define ppc_bcla(c,BO,BI,BD) ppc_bcx(c,BO,BI,BD,1,1)
#define ppc_bcctrx(c,BO,BI,LK) ppc_emit32(c, (19 << 26) | (BO << 21 )| (BI << 16) | (0 << 11) | (528 << 1) | LK)
#define ppc_bcctr(c,BO,BI) ppc_bcctrx(c,BO,BI,0)
#define ppc_bcctrl(c,BO,BI) ppc_bcctrx(c,BO,BI,1)
#define ppc_bnectrp(c,BO,BI) ppc_bcctr(c,BO,BI)
#define ppc_bnectrlp(c,BO,BI) ppc_bcctr(c,BO,BI)
#define ppc_bclrx(c,BO,BI,BH,LK) ppc_emit32(c, (19 << 26) | ((BO) << 21 )| ((BI) << 16) | (0 << 13) | ((BH) << 11) | (16 << 1) | (LK))
#define ppc_bclr(c,BO,BI,BH) ppc_bclrx(c,BO,BI,BH,0)
#define ppc_bclrl(c,BO,BI,BH) ppc_bclrx(c,BO,BI,BH,1)
#define ppc_bnelrp(c,BO,BI) ppc_bclr(c,BO,BI,0)
#define ppc_bnelrlp(c,BO,BI) ppc_bclr(c,BO,BI,0)
#define ppc_cmp(c,cfrD,L,A,B) ppc_emit32(c, (31 << 26) | ((cfrD) << 23) | (0 << 22) | ((L) << 21) | ((A) << 16) | ((B) << 11) | (0 << 1) | 0)
#define ppc_cmpi(c,cfrD,L,A,B) ppc_emit32(c, (11 << 26) | (cfrD << 23) | (0 << 22) | (L << 21) | (A << 16) | (guint16)(B))
#define ppc_cmpl(c,cfrD,L,A,B) ppc_emit32(c, (31 << 26) | ((cfrD) << 23) | (0 << 22) | ((L) << 21) | ((A) << 16) | ((B) << 11) | (32 << 1) | 0)
#define ppc_cmpli(c,cfrD,L,A,B) ppc_emit32(c, (10 << 26) | (cfrD << 23) | (0 << 22) | (L << 21) | (A << 16) | (guint16)(B))
#define ppc_cmpw(c,cfrD,A,B) ppc_cmp(c, (cfrD), 0, (A), (B))
#define ppc_cntlzwx(c,S,A,Rc) ppc_emit32(c, (31 << 26) | (S << 21) | (A << 16) | (0 << 11) | (26 << 1) | Rc)
#define ppc_cntlzw(c,S,A) ppc_cntlzwx(c,S,A,0)
#define ppc_cntlzwd(c,S,A) ppc_cntlzwx(c,S,A,1)
#define ppc_crand(c,D,A,B) ppc_emit32(c, (19 << 26) | (D << 21) | (A << 16) | (B << 11) | (257 << 1) | 0)
#define ppc_crandc(c,D,A,B) ppc_emit32(c, (19 << 26) | (D << 21) | (A << 16) | (B << 11) | (129 << 1) | 0)
#define ppc_creqv(c,D,A,B) ppc_emit32(c, (19 << 26) | (D << 21) | (A << 16) | (B << 11) | (289 << 1) | 0)
#define ppc_crnand(c,D,A,B) ppc_emit32(c, (19 << 26) | (D << 21) | (A << 16) | (B << 11) | (225 << 1) | 0)
#define ppc_crnor(c,D,A,B) ppc_emit32(c, (19 << 26) | (D << 21) | (A << 16) | (B << 11) | (33 << 1) | 0)
#define ppc_cror(c,D,A,B) ppc_emit32(c, (19 << 26) | (D << 21) | (A << 16) | (B << 11) | (449 << 1) | 0)
#define ppc_crorc(c,D,A,B) ppc_emit32(c, (19 << 26) | (D << 21) | (A << 16) | (B << 11) | (417 << 1) | 0)
#define ppc_crxor(c,D,A,B) ppc_emit32(c, (19 << 26) | (D << 21) | (A << 16) | (B << 11) | (193 << 1) | 0)
#define ppc_dcba(c,A,B) ppc_emit32(c, (31 << 26) | (0 << 21) | (A << 16) | (B << 11) | (758 << 1) | 0)
#define ppc_dcbf(c,A,B) ppc_emit32(c, (31 << 26) | (0 << 21) | (A << 16) | (B << 11) | (86 << 1) | 0)
#define ppc_dcbi(c,A,B) ppc_emit32(c, (31 << 26) | (0 << 21) | (A << 16) | (B << 11) | (470 << 1) | 0)
#define ppc_dcbst(c,A,B) ppc_emit32(c, (31 << 26) | (0 << 21) | (A << 16) | (B << 11) | (54 << 1) | 0)
#define ppc_dcbt(c,A,B) ppc_emit32(c, (31 << 26) | (0 << 21) | (A << 16) | (B << 11) | (278 << 1) | 0)
#define ppc_dcbtst(c,A,B) ppc_emit32(c, (31 << 26) | (0 << 21) | (A << 16) | (B << 11) | (246 << 1) | 0)
#define ppc_dcbz(c,A,B) ppc_emit32(c, (31 << 26) | (0 << 21) | (A << 16) | (B << 11) | (1014 << 1) | 0)
#define ppc_divwx(c,D,A,B,OE,Rc) ppc_emit32(c, (31 << 26) | (D << 21) | (A << 16) | (B << 11) | (OE << 10) | (491 << 1) | Rc)
#define ppc_divw(c,D,A,B) ppc_divwx(c,D,A,B,0,0)
#define ppc_divwd(c,D,A,B) ppc_divwx(c,D,A,B,0,1)
#define ppc_divwo(c,D,A,B) ppc_divwx(c,D,A,B,1,0)
#define ppc_divwod(c,D,A,B) ppc_divwx(c,D,A,B,1,1)
#define ppc_divwux(c,D,A,B,OE,Rc) ppc_emit32(c, (31 << 26) | (D << 21) | (A << 16) | (B << 11) | (OE << 10) | (459 << 1) | Rc)
#define ppc_divwu(c,D,A,B) ppc_divwux(c,D,A,B,0,0)
#define ppc_divwud(c,D,A,B) ppc_divwux(c,D,A,B,0,1)
#define ppc_divwuo(c,D,A,B) ppc_divwux(c,D,A,B,1,0)
#define ppc_divwuod(c,D,A,B) ppc_divwux(c,D,A,B,1,1)
#define ppc_eciwx(c,D,A,B) ppc_emit32(c, (31 << 26) | (D << 21) | (A << 16) | (B << 11) | (310 << 1) | 0)
#define ppc_ecowx(c,S,A,B) ppc_emit32(c, (31 << 26) | (S << 21) | (A << 16) | (B << 11) | (438 << 1) | 0)
#define ppc_eieio(c) ppc_emit32(c, (31 << 26) | (0 << 21) | (0 << 16) | (0 << 11) | (854 << 1) | 0)
#define ppc_eqvx(c,A,S,B,Rc) ppc_emit32(c, (31 << 26) | (S << 21) | (A << 16) | (B << 11) | (284 << 1) | Rc)
#define ppc_eqv(c,A,S,B) ppc_eqvx(c,A,S,B,0)
#define ppc_eqvd(c,A,S,B) ppc_eqvx(c,A,S,B,1)
#define ppc_extsbx(c,A,S,Rc) ppc_emit32(c, (31 << 26) | (S << 21) | (A << 16) | (0 << 11) | (954 << 1) | Rc)
#define ppc_extsb(c,A,S) ppc_extsbx(c,A,S,0)
#define ppc_extsbd(c,A,S) ppc_extsbx(c,A,S,1)
#define ppc_extshx(c,A,S,Rc) ppc_emit32(c, (31 << 26) | (S << 21) | (A << 16) | (0 << 11) | (922 << 1) | Rc)
#define ppc_extsh(c,A,S) ppc_extshx(c,A,S,0)
#define ppc_extshd(c,A,S) ppc_extshx(c,A,S,1)
#define ppc_fabsx(c,D,B,Rc) ppc_emit32(c, (63 << 26) | (D << 21) | (0 << 16) | (B << 11) | (264 << 1) | Rc)
#define ppc_fabs(c,D,B) ppc_fabsx(c,D,B,0)
#define ppc_fabsd(c,D,B) ppc_fabsx(c,D,B,1)
#define ppc_faddx(c,D,A,B,Rc) ppc_emit32(c, (63 << 26) | (D << 21) | (A << 16) | (B << 11) | (0 << 6) | (21 << 1) | Rc)
#define ppc_fadd(c,D,A,B) ppc_faddx(c,D,A,B,0)
#define ppc_faddd(c,D,A,B) ppc_faddx(c,D,A,B,1)
#define ppc_faddsx(c,D,A,B,Rc) ppc_emit32(c, (59 << 26) | (D << 21) | (A << 16) | (B << 11) | (0 << 6) | (21 << 1) | Rc)
#define ppc_fadds(c,D,A,B) ppc_faddsx(c,D,A,B,0)
#define ppc_faddsd(c,D,A,B) ppc_faddsx(c,D,A,B,1)
#define ppc_fcmpo(c,crfD,A,B) ppc_emit32(c, (63 << 26) | (crfD << 23) | (0 << 21) | (A << 16) | (B << 11) | (32 << 1) | 0)
#define ppc_fcmpu(c,crfD,A,B) ppc_emit32(c, (63 << 26) | (crfD << 23) | (0 << 21) | (A << 16) | (B << 11) | (0 << 1) | 0)
#define ppc_fctiwx(c,D,B,Rc) ppc_emit32(c, (63 << 26) | (D << 21) | (0 << 16) | (B << 11) | (14 << 1) | Rc)
#define ppc_fctiw(c,D,B) ppc_fctiwx(c,D,B,0)
#define ppc_fctiwd(c,D,B) ppc_fctiwx(c,D,B,1)
#define ppc_fctiwzx(c,D,B,Rc) ppc_emit32(c, (63 << 26) | (D << 21) | (0 << 16) | (B << 11) | (15 << 1) | Rc)
#define ppc_fctiwz(c,D,B) ppc_fctiwzx(c,D,B,0)
#define ppc_fctiwzd(c,D,B) ppc_fctiwzx(c,D,B,1)
#define ppc_fdivx(c,D,A,B,Rc) ppc_emit32(c, (63 << 26) | (D << 21) | (A << 16) | (B << 11) | (0 << 6) | (18 << 1) | Rc)
#define ppc_fdiv(c,D,A,B) ppc_fdivx(c,D,A,B,0)
#define ppc_fdivd(c,D,A,B) ppc_fdivx(c,D,A,B,1)
#define ppc_fdivsx(c,D,A,B,Rc) ppc_emit32(c, (59 << 26) | (D << 21) | (A << 16) | (B << 11) | (0 << 6) | (18 << 1) | Rc)
#define ppc_fdivs(c,D,A,B) ppc_fdivsx(c,D,A,B,0)
#define ppc_fdivsd(c,D,A,B) ppc_fdivsx(c,D,A,B,1)
#define ppc_fmaddx(c,D,A,B,C,Rc) ppc_emit32(c, (63 << 26) | (D << 21) | (A << 16) | (B << 11) | (C << 6) | (29 << 1) | Rc)
#define ppc_fmadd(c,D,A,B,C) ppc_fmaddx(c,D,A,B,C,0)
#define ppc_fmaddd(c,D,A,B,C) ppc_fmaddx(c,D,A,B,C,1)
#define ppc_fmaddsx(c,D,A,B,C,Rc) ppc_emit32(c, (59 << 26) | (D << 21) | (A << 16) | (B << 11) | (C << 6) | (29 << 1) | Rc)
#define ppc_fmadds(c,D,A,B,C) ppc_fmaddsx(c,D,A,B,C,0)
#define ppc_fmaddsd(c,D,A,B,C) ppc_fmaddsx(c,D,A,B,C,1)
#define ppc_fmrx(c,D,B,Rc) ppc_emit32(c, (63 << 26) | (D << 21) | (0 << 16) | (B << 11) | (72 << 1) | Rc)
#define ppc_fmr(c,D,B) ppc_fmrx(c,D,B,0)
#define ppc_fmrd(c,D,B) ppc_fmrx(c,D,B,1)
#define ppc_fmsubx(c,D,A,C,B,Rc) ppc_emit32(c, (63 << 26) | (D << 21) | (A << 16) | (B << 11) | (C << 6) | (28 << 1) | Rc)
#define ppc_fmsub(c,D,A,C,B) ppc_fmsubx(c,D,A,C,B,0)
#define ppc_fmsubd(c,D,A,C,B) ppc_fmsubx(c,D,A,C,B,1)
#define ppc_fmsubsx(c,D,A,C,B,Rc) ppc_emit32(c, (59 << 26) | (D << 21) | (A << 16) | (B << 11) | (C << 6) | (28 << 1) | Rc)
#define ppc_fmsubs(c,D,A,C,B) ppc_fmsubsx(c,D,A,C,B,0)
#define ppc_fmsubsd(c,D,A,C,B) ppc_fmsubsx(c,D,A,C,B,1)
#define ppc_fmulx(c,D,A,C,Rc) ppc_emit32(c, (63 << 26) | (D << 21) | (A << 16) | (0 << 11) | (C << 6) | (25 << 1) | Rc)
#define ppc_fmul(c,D,A,C) ppc_fmulx(c,D,A,C,0)
#define ppc_fmuld(c,D,A,C) ppc_fmulx(c,D,A,C,1)
#define ppc_fmulsx(c,D,A,C,Rc) ppc_emit32(c, (59 << 26) | (D << 21) | (A << 16) | (0 << 11) | (C << 6) | (25 << 1) | Rc)
#define ppc_fmuls(c,D,A,C) ppc_fmulsx(c,D,A,C,0)
#define ppc_fmulsd(c,D,A,C) ppc_fmulsx(c,D,A,C,1)
#define ppc_fnabsx(c,D,B,Rc) ppc_emit32(c, (63 << 26) | (D << 21) | (0 << 16) | (B << 11) | (136 << 1) | Rc)
#define ppc_fnabs(c,D,B) ppc_fnabsx(c,D,B,0)
#define ppc_fnabsd(c,D,B) ppc_fnabsx(c,D,B,1)
#define ppc_fnegx(c,D,B,Rc) ppc_emit32(c, (63 << 26) | (D << 21) | (0 << 16) | (B << 11) | (40 << 1) | Rc)
#define ppc_fneg(c,D,B) ppc_fnegx(c,D,B,0)
#define ppc_fnegd(c,D,B) ppc_fnegx(c,D,B,1)
#define ppc_fnmaddx(c,D,A,C,B,Rc) ppc_emit32(c, (63 << 26) | (D << 21) | (A << 16) | (B << 11) | (C << 6) | (31 << 1) | Rc)
#define ppc_fnmadd(c,D,A,C,B) ppc_fnmaddx(c,D,A,C,B,0)
#define ppc_fnmaddd(c,D,A,C,B) ppc_fnmaddx(c,D,A,C,B,1)
#define ppc_fnmaddsx(c,D,A,C,B,Rc) ppc_emit32(c, (59 << 26) | (D << 21) | (A << 16) | (B << 11) | (C << 6) | (31 << 1) | Rc)
#define ppc_fnmadds(c,D,A,C,B) ppc_fnmaddsx(c,D,A,C,B,0)
#define ppc_fnmaddsd(c,D,A,C,B) ppc_fnmaddsx(c,D,A,C,B,1)
#define ppc_fnmsubx(c,D,A,C,B,Rc) ppc_emit32(c, (63 << 26) | (D << 21) | (A << 16) | (B << 11) | (C << 6) | (30 << 1) | Rc)
#define ppc_fnmsub(c,D,A,C,B) ppc_fnmsubx(c,D,A,C,B,0)
#define ppc_fnmsubd(c,D,A,C,B) ppc_fnmsubx(c,D,A,C,B,1)
#define ppc_fnmsubsx(c,D,A,C,B,Rc) ppc_emit32(c, (59 << 26) | (D << 21) | (A << 16) | (B << 11) | (C << 6) | (30 << 1) | Rc)
#define ppc_fnmsubs(c,D,A,C,B) ppc_fnmsubsx(c,D,A,C,B,0)
#define ppc_fnmsubsd(c,D,A,C,B) ppc_fnmsubsx(c,D,A,C,B,1)
#define ppc_fresx(c,D,B,Rc) ppc_emit32(c, (59 << 26) | (D << 21) | (0 << 16) | (B << 11) | (0 << 6) | (24 << 1) | Rc)
#define ppc_fres(c,D,B) ppc_fresx(c,D,B,0)
#define ppc_fresd(c,D,B) ppc_fresx(c,D,B,1)
#define ppc_frspx(c,D,B,Rc) ppc_emit32(c, (63 << 26) | (D << 21) | (0 << 16) | (B << 11) | (12 << 1) | Rc)
#define ppc_frsp(c,D,B) ppc_frspx(c,D,B,0)
#define ppc_frspd(c,D,B) ppc_frspx(c,D,B,1)
#define ppc_frsqrtex(c,D,B,Rc) ppc_emit32(c, (63 << 26) | (D << 21) | (0 << 16) | (B << 11) | (0 << 6) | (26 << 1) | Rc)
#define ppc_frsqrte(c,D,B) ppc_frsqrtex(c,D,B,0)
#define ppc_frsqrted(c,D,B) ppc_frsqrtex(c,D,B,1)
#define ppc_fselx(c,D,A,C,B,Rc) ppc_emit32(c, (63 << 26) | (D << 21) | (A << 16) | (B << 11) | (C << 6) | (23 << 1) | Rc)
#define ppc_fsel(c,D,A,C,B) ppc_fselx(c,D,A,C,B,0)
#define ppc_fseld(c,D,A,C,B) ppc_fselx(c,D,A,C,B,1)
#define ppc_fsqrtx(c,D,B,Rc) ppc_emit32(c, (63 << 26) | (D << 21) | (0 << 16) | (B << 11) | (0 << 6) | (22 << 1) | Rc)
#define ppc_fsqrt(c,D,B) ppc_fsqrtx(c,D,B,0)
#define ppc_fsqrtd(c,D,B) ppc_fsqrtx(c,D,B,1)
#define ppc_fsqrtsx(c,D,B,Rc) ppc_emit32(c, (59 << 26) | (D << 21) | (0 << 16) | (B << 11) | (0 << 6) | (22 << 1) | Rc)
#define ppc_fsqrts(c,D,B) ppc_fsqrtsx(c,D,B,0)
#define ppc_fsqrtsd(c,D,B) ppc_fsqrtsx(c,D,B,1)
#define ppc_fsubx(c,D,A,B,Rc) ppc_emit32(c, (63 << 26) | (D << 21) | (A << 16) | (B << 11) | (0 << 6) | (20 << 1) | Rc)
#define ppc_fsub(c,D,A,B) ppc_fsubx(c,D,A,B,0)
#define ppc_fsubd(c,D,A,B) ppc_fsubx(c,D,A,B,1)
#define ppc_fsubsx(c,D,A,B,Rc) ppc_emit32(c, (59 << 26) | (D << 21) | (A << 16) | (B << 11) | (0 << 6) | (20 << 1) | Rc)
#define ppc_fsubs(c,D,A,B) ppc_fsubsx(c,D,A,B,0)
#define ppc_fsubsd(c,D,A,B) ppc_fsubsx(c,D,A,B,1)
#define ppc_icbi(c,A,B) ppc_emit32(c, (31 << 26) | (0 << 21) | (A << 16) | (B << 11) | (982 << 1) | 0)
#define ppc_isync(c) ppc_emit32(c, (19 << 26) | (0 << 11) | (150 << 1) | 0)
#define ppc_lbzu(c,D,d,A) ppc_emit32(c, (35 << 26) | (D << 21) | (A << 16) | (guint16)d)
#define ppc_lbzux(c,D,A,B) ppc_emit32(c, (31 << 26) | (D << 21) | (A << 16) | (B << 11) | (119 << 1) | 0)
#define ppc_lbzx(c,D,A,B) ppc_emit32(c, (31 << 26) | (D << 21) | (A << 16) | (B << 11) | (87 << 1) | 0)
#define ppc_lfdu(c,D,d,A) ppc_emit32(c, (51 << 26) | (D << 21) | (A << 16) | (guint16)d)
#define ppc_lfdux(c,D,A,B) ppc_emit32(c, (31 << 26) | (D << 21) | (A << 16) | (B << 11) | (631 << 1) | 0)
#define ppc_lfdx(c,D,A,B) ppc_emit32(c, (31 << 26) | (D << 21) | (A << 16) | (B << 11) | (599 << 1) | 0)
#define ppc_lfsu(c,D,d,A) ppc_emit32(c, (49 << 26) | (D << 21) | (A << 16) | (guint16)d)
#define ppc_lfsux(c,D,A,B) ppc_emit32(c, (31 << 26) | (D << 21) | (A << 16) | (B << 11) | (567 << 1) | 0)
#define ppc_lfsx(c,D,A,B) ppc_emit32(c, (31 << 26) | (D << 21) | (A << 16) | (B << 11) | (535 << 1) | 0)
#define ppc_lha(c,D,d,A) ppc_emit32(c, (42 << 26) | (D << 21) | (A << 16) | (guint16)d)
#define ppc_lhau(c,D,d,A) ppc_emit32(c, (43 << 26) | (D << 21) | (A << 16) | (guint16)d)
#define ppc_lhaux(c,D,A,B) ppc_emit32(c, (31 << 26) | (D << 21) | (A << 16) | (B << 11) | (375 << 1) | 0)
#define ppc_lhax(c,D,A,B) ppc_emit32(c, (31 << 26) | (D << 21) | (A << 16) | (B << 11) | (343 << 1) | 0)
#define ppc_lhbrx(c,D,A,B) ppc_emit32(c, (31 << 26) | (D << 21) | (A << 16) | (B << 11) | (790 << 1) | 0)
#define ppc_lhzu(c,D,d,A) ppc_emit32(c, (41 << 26) | (D << 21) | (A << 16) | (guint16)d)
#define ppc_lhzux(c,D,A,B) ppc_emit32(c, (31 << 26) | (D << 21) | (A << 16) | (B << 11) | (311 << 1) | 0)
#define ppc_lhzx(c,D,A,B) ppc_emit32(c, (31 << 26) | (D << 21) | (A << 16) | (B << 11) | (279 << 1) | 0)
#define ppc_lmw(c,D,d,A) ppc_emit32(c, (46 << 26) | (D << 21) | (A << 16) | (guint16)d)
#define ppc_lswi(c,D,A,NB) ppc_emit32(c, (31 << 26) | (D << 21) | (A << 16) | (NB << 11) | (597 << 1) | 0)
#define ppc_lswx(c,D,A,B) ppc_emit32(c, (31 << 26) | (D << 21) | (A << 16) | (B << 11) | (533 << 1) | 0)
#define ppc_lwarx(c,D,A,B) ppc_emit32(c, (31 << 26) | (D << 21) | (A << 16) | (B << 11) | (20 << 1) | 0)
#define ppc_lwbrx(c,D,A,B) ppc_emit32(c, (31 << 26) | (D << 21) | (A << 16) | (B << 11) | (534 << 1) | 0)
#define ppc_lwzu(c,D,d,A) ppc_emit32(c, (33 << 26) | (D << 21) | (A << 16) | (guint16)d)
#define ppc_lwzux(c,D,A,B) ppc_emit32(c, (31 << 26) | (D << 21) | (A << 16) | (B << 11) | (55 << 1) | 0)
#define ppc_lwzx(c,D,A,B) ppc_emit32(c, (31 << 26) | (D << 21) | (A << 16) | (B << 11) | (23 << 1) | 0)
#define ppc_mcrf(c,crfD,crfS) ppc_emit32(c, (19 << 26) | (crfD << 23) | (0 << 21) | (crfS << 18) | 0)
#define ppc_mcrfs(c,crfD,crfS) ppc_emit32(c, (63 << 26) | (crfD << 23) | (0 << 21) | (crfS << 18) | (0 << 16) | (64 << 1) | 0)
#define ppc_mcrxr(c,crfD) ppc_emit32(c, (31 << 26) | (crfD << 23) | (0 << 16) | (512 << 1) | 0)
#define ppc_mfcr(c,D) ppc_emit32(c, (31 << 26) | (D << 21) | (0 << 16) | (19 << 1) | 0)
#define ppc_mffsx(c,D,Rc) ppc_emit32(c, (63 << 26) | (D << 21) | (0 << 16) | (583 << 1) | Rc)
#define ppc_mffs(c,D) ppc_mffsx(c,D,0)
#define ppc_mffsd(c,D) ppc_mffsx(c,D,1)
#define ppc_mfmsr(c,D) ppc_emit32(c, (31 << 26) | (D << 21) | (0 << 16) | (83 << 1) | 0)
#define ppc_mfsr(c,D,SR) ppc_emit32(c, (31 << 26) | (D << 21) | (0 << 20) | (SR << 16) | (0 << 11) | (595 << 1) | 0)
#define ppc_mfsrin(c,D,B) ppc_emit32(c, (31 << 26) | (D << 21) | (0 << 16) | (B << 11) | (659 << 1) | 0)
#define ppc_mftb(c,D,TBR) ppc_emit32(c, (31 << 26) | (D << 21) | (TBR << 11) | (371 << 1) | 0)
#define ppc_mtcrf(c,CRM,S) ppc_emit32(c, (31 << 26) | (S << 21) | (0 << 20) | (CRM << 12) | (0 << 11) | (144 << 1) | 0)
#define ppc_mtfsb0x(c,CRB,Rc) ppc_emit32(c, (63 << 26) | (CRB << 21) | (0 << 11) | (70 << 1) | Rc)
#define ppc_mtfsb0(c,CRB) ppc_mtfsb0x(c,CRB,0)
#define ppc_mtfsb0d(c,CRB) ppc_mtfsb0x(c,CRB,1)
#define ppc_mtfsb1x(c,CRB,Rc) ppc_emit32(c, (63 << 26) | (CRB << 21) | (0 << 11) | (38 << 1) | Rc)
#define ppc_mtfsb1(c,CRB) ppc_mtfsb1x(c,CRB,0)
#define ppc_mtfsb1d(c,CRB) ppc_mtfsb1x(c,CRB,1)
#define ppc_mtfsfx(c,FM,B,Rc) ppc_emit32(c, (63 << 26) | (0 << 25) | (FM << 22) | (0 << 21) | (B << 11) | (711 << 1) | Rc)
#define ppc_mtfsf(c,FM,B) ppc_mtfsfx(c,FM,B,0)
#define ppc_mtfsfd(c,FM,B) ppc_mtfsfx(c,FM,B,1)
#define ppc_mtfsfix(c,crfD,IMM,Rc) ppc_emit32(c, (63 << 26) | (crfD << 23) | (0 << 16) | (IMM << 12) | (0 << 11) | (134 << 1) | Rc)
#define ppc_mtfsfi(c,crfD,IMM) ppc_mtfsfix(c,crfD,IMM,0)
#define ppc_mtfsfid(c,crfD,IMM) ppc_mtfsfix(c,crfD,IMM,1)
#define ppc_mtmsr(c, S) ppc_emit32(c, (31 << 26) | (S << 21) | (0 << 11) | (146 << 1) | 0)
#define ppc_mtsr(c,SR,S) ppc_emit32(c, (31 << 26) | (S << 21) | (0 << 20) | (SR << 16) | (0 << 11) | (210 << 1) | 0)
#define ppc_mtsrin(c,S,B) ppc_emit32(c, (31 << 26) | (S << 21) | (0 << 16) | (B << 11) | (242 << 1) | 0)
#define ppc_mulhwx(c,D,A,B,Rc) ppc_emit32(c, (31 << 26) | (D << 21) | (A << 16) | (B << 11) | (0 << 10) | (75 << 1) | Rc)
#define ppc_mulhw(c,D,A,B) ppc_mulhwx(c,D,A,B,0)
#define ppc_mulhwd(c,D,A,B) ppc_mulhwx(c,D,A,B,1)
#define ppc_mulhwux(c,D,A,B,Rc) ppc_emit32(c, (31 << 26) | (D << 21) | (A << 16) | (B << 11) | (0 << 10) | (11 << 1) | Rc)
#define ppc_mulhwu(c,D,A,B) ppc_mulhwux(c,D,A,B,0)
#define ppc_mulhwud(c,D,A,B) ppc_mulhwux(c,D,A,B,1)
#define ppc_mulli(c,D,A,SIMM) ppc_emit32(c, ((07) << 26) | (D << 21) | (A << 16) | (guint16)(SIMM))
#define ppc_mullwx(c,D,A,B,OE,Rc) ppc_emit32(c, (31 << 26) | (D << 21) | (A << 16) | (B << 11) | (OE << 10) | (235 << 1) | Rc)
#define ppc_mullw(c,D,A,B) ppc_mullwx(c,D,A,B,0,0)
#define ppc_mullwd(c,D,A,B) ppc_mullwx(c,D,A,B,0,1)
#define ppc_mullwo(c,D,A,B) ppc_mullwx(c,D,A,B,1,0)
#define ppc_mullwod(c,D,A,B) ppc_mullwx(c,D,A,B,1,1)
#define ppc_nandx(c,A,S,B,Rc) ppc_emit32(c, (31 << 26) | (S << 21) | (A << 16) | (B << 11) | (476 << 1) | Rc)
#define ppc_nand(c,A,S,B) ppc_nandx(c,A,S,B,0)
#define ppc_nandd(c,A,S,B) ppc_nandx(c,A,S,B,1)
#define ppc_negx(c,D,A,OE,Rc) ppc_emit32(c, (31 << 26) | (D << 21) | (A << 16) | (0 << 11) | (OE << 10) | (104 << 1) | Rc)
#define ppc_neg(c,D,A) ppc_negx(c,D,A,0,0)
#define ppc_negd(c,D,A) ppc_negx(c,D,A,0,1)
#define ppc_nego(c,D,A) ppc_negx(c,D,A,1,0)
#define ppc_negod(c,D,A) ppc_negx(c,D,A,1,1)
#define ppc_norx(c,A,S,B,Rc) ppc_emit32(c, (31 << 26) | (S << 21) | (A << 16) | (B << 11) | (124 << 1) | Rc)
#define ppc_nor(c,A,S,B) ppc_norx(c,A,S,B,0)
#define ppc_nord(c,A,S,B) ppc_norx(c,A,S,B,1)
#define ppc_not(c,A,S) ppc_norx(c,A,S,S,0)
#define ppc_orx(c,A,S,B,Rc) ppc_emit32(c, (31 << 26) | (S << 21) | (A << 16) | (B << 11) | (444 << 1) | Rc)
#define ppc_ord(c,A,S,B) ppc_orx(c,A,S,B,1)
#define ppc_orcx(c,A,S,B,Rc) ppc_emit32(c, (31 << 26) | (S << 21) | (A << 16) | (B << 11) | (412 << 1) | Rc)
#define ppc_orc(c,A,S,B) ppc_orcx(c,A,S,B,0)
#define ppc_orcd(c,A,S,B) ppc_orcx(c,A,S,B,1)
#define ppc_oris(c,A,S,UIMM) ppc_emit32(c, (25 << 26) | (S << 21) | (A << 16) | (guint16)(UIMM))
#define ppc_rfi(c) ppc_emit32(c, (19 << 26) | (0 << 11) | (50 << 1) | 0)
#define ppc_rlwimix(c,A,S,SH,MB,ME,Rc) ppc_emit32(c, (20 << 26) | (S << 21) | (A << 16) | (SH << 11) | (MB << 6) | (ME << 1) | Rc)
#define ppc_rlwimi(c,A,S,SH,MB,ME) ppc_rlwimix(c,A,S,SH,MB,ME,0)
#define ppc_rlwimid(c,A,S,SH,MB,ME) ppc_rlwimix(c,A,S,SH,MB,ME,1)
#define ppc_rlwinmx(c,A,S,SH,MB,ME,Rc) ppc_emit32(c, (21 << 26) | ((S) << 21) | ((A) << 16) | ((SH) << 11) | ((MB) << 6) | ((ME) << 1) | (Rc))
#define ppc_rlwinm(c,A,S,SH,MB,ME) ppc_rlwinmx(c,A,S,SH,MB,ME,0)
#define ppc_rlwinmd(c,A,S,SH,MB,ME) ppc_rlwinmx(c,A,S,SH,MB,ME,1)
#define ppc_extlwi(c,A,S,n,b) ppc_rlwinm(c,A,S, b, 0, (n) - 1)
#define ppc_extrwi(c,A,S,n,b) ppc_rlwinm(c,A,S, (b) + (n), 32 - (n), 31)
#define ppc_rotlwi(c,A,S,n) ppc_rlwinm(c,A,S, n, 0, 31)
#define ppc_rotrwi(c,A,S,n) ppc_rlwinm(c,A,S, 32 - (n), 0, 31)
#define ppc_slwi(c,A,S,n) ppc_rlwinm(c,A,S, n, 0, 31 - (n))
#define ppc_srwi(c,A,S,n) ppc_rlwinm(c,A,S, 32 - (n), n, 31)
#define ppc_clrlwi(c,A,S,n) ppc_rlwinm(c,A,S, 0, n, 31)
#define ppc_clrrwi(c,A,S,n) ppc_rlwinm(c,A,S, 0, 0, 31 - (n))
#define ppc_clrlslwi(c,A,S,b,n) ppc_rlwinm(c,A,S, n, (b) - (n), 31 - (n))
#define ppc_rlwnmx(c,A,S,SH,MB,ME,Rc) ppc_emit32(c, (23 << 26) | (S << 21) | (A << 16) | (SH << 11) | (MB << 6) | (ME << 1) | Rc)
#define ppc_rlwnm(c,A,S,SH,MB,ME) ppc_rlwnmx(c,A,S,SH,MB,ME,0)
#define ppc_rlwnmd(c,A,S,SH,MB,ME) ppc_rlwnmx(c,A,S,SH,MB,ME,1)
#define ppc_sc(c) ppc_emit32(c, (17 << 26) | (0 << 2) | (1 << 1) | 0)
#define ppc_slwx(c,S,A,B,Rc) ppc_emit32(c, (31 << 26) | (S << 21) | (A << 16) | (B << 11) | (24 << 1) | Rc)
#define ppc_slw(c,S,A,B) ppc_slwx(c,S,A,B,0)
#define ppc_slwd(c,S,A,B) ppc_slwx(c,S,A,B,1)
#define ppc_srawx(c,A,S,B,Rc) ppc_emit32(c, (31 << 26) | (S << 21) | (A << 16) | (B << 11) | (792 << 1) | Rc)
#define ppc_sraw(c,A,S,B) ppc_srawx(c,A,S,B,0)
#define ppc_srawd(c,A,S,B) ppc_srawx(c,A,S,B,1)
#define ppc_srawix(c,A,S,SH,Rc) ppc_emit32(c, (31 << 26) | (S << 21) | (A << 16) | (SH << 11) | (824 << 1) | Rc)
#define ppc_srawi(c,A,S,B) ppc_srawix(c,A,S,B,0)
#define ppc_srawid(c,A,S,B) ppc_srawix(c,A,S,B,1)
#define ppc_srwx(c,A,S,SH,Rc) ppc_emit32(c, (31 << 26) | (S << 21) | (A << 16) | (SH << 11) | (536 << 1) | Rc)
#define ppc_srw(c,A,S,B) ppc_srwx(c,A,S,B,0)
#define ppc_srwd(c,A,S,B) ppc_srwx(c,A,S,B,1)
#define ppc_stbu(c,S,d,A) ppc_emit32(c, (39 << 26) | (S << 21) | (A << 16) | (guint16)(d))
#define ppc_stbux(c,S,A,B) ppc_emit32(c, (31 << 26) | (S << 21) | (A << 16) | (B << 11) | (247 << 1) | 0)
#define ppc_stbx(c,S,A,B) ppc_emit32(c, (31 << 26) | (S << 21) | (A << 16) | (B << 11) | (215 << 1) | 0)
#define ppc_stfdu(c,S,d,A) ppc_emit32(c, (55 << 26) | (S << 21) | (A << 16) | (guint16)(d))
#define ppc_stfdx(c,S,A,B) ppc_emit32(c, (31 << 26) | (S << 21) | (A << 16) | (B << 11) | (727 << 1) | 0)
#define ppc_stfiwx(c,S,A,B) ppc_emit32(c, (31 << 26) | (S << 21) | (A << 16) | (B << 11) | (983 << 1) | 0)
#define ppc_stfsu(c,S,d,A) ppc_emit32(c, (53 << 26) | (S << 21) | (A << 16) | (guint16)(d))
#define ppc_stfsux(c,S,A,B) ppc_emit32(c, (31 << 26) | (S << 21) | (A << 16) | (B << 11) | (695 << 1) | 0)
#define ppc_stfsx(c,S,A,B) ppc_emit32(c, (31 << 26) | (S << 21) | (A << 16) | (B << 11) | (663 << 1) | 0)
#define ppc_sthbrx(c,S,A,B) ppc_emit32(c, (31 << 26) | (S << 21) | (A << 16) | (B << 11) | (918 << 1) | 0)
#define ppc_sthu(c,S,d,A) ppc_emit32(c, (45 << 26) | (S << 21) | (A << 16) | (guint16)(d))
#define ppc_sthux(c,S,A,B) ppc_emit32(c, (31 << 26) | (S << 21) | (A << 16) | (B << 11) | (439 << 1) | 0)
#define ppc_sthx(c,S,A,B) ppc_emit32(c, (31 << 26) | (S << 21) | (A << 16) | (B << 11) | (407 << 1) | 0)
#define ppc_stmw(c,S,d,A) ppc_emit32(c, (47 << 26) | (S << 21) | (A << 16) | (guint16)d)
#define ppc_stswi(c,S,A,NB) ppc_emit32(c, (31 << 26) | (S << 21) | (A << 16) | (NB << 11) | (725 << 1) | 0)
#define ppc_stswx(c,S,A,NB) ppc_emit32(c, (31 << 26) | (S << 21) | (A << 16) | (NB << 11) | (661 << 1) | 0)
#define ppc_stwbrx(c,S,A,B) ppc_emit32(c, (31 << 26) | (S << 21) | (A << 16) | (B << 11) | (662 << 1) | 0)
#define ppc_stwcxd(c,S,A,B) ppc_emit32(c, (31 << 26) | (S << 21) | (A << 16) | (B << 11) | (150 << 1) | 1)
#define ppc_stwux(c,S,A,B) ppc_emit32(c, (31 << 26) | (S << 21) | (A << 16) | (B << 11) | (183 << 1) | 0)
#define ppc_stwx(c,S,A,B) ppc_emit32(c, (31 << 26) | (S << 21) | (A << 16) | (B << 11) | (151 << 1) | 0)
#define ppc_subfx(c,D,A,B,OE,Rc) ppc_emit32(c, (31 << 26) | (D << 21) | (A << 16) | (B << 11) | (OE << 10) | (40 << 1) | Rc)
#define ppc_subf(c,D,A,B) ppc_subfx(c,D,A,B,0,0)
#define ppc_subfd(c,D,A,B) ppc_subfx(c,D,A,B,0,1)
#define ppc_subfo(c,D,A,B) ppc_subfx(c,D,A,B,1,0)
#define ppc_subfod(c,D,A,B) ppc_subfx(c,D,A,B,1,1)
#define ppc_sub(c,D,A,B) ppc_subf(c,D,B,A)
#define ppc_subfcx(c,D,A,B,OE,Rc) ppc_emit32(c, (31 << 26) | (D << 21) | (A << 16) | (B << 11) | (OE << 10) | (8 << 1) | Rc)
#define ppc_subfc(c,D,A,B) ppc_subfcx(c,D,A,B,0,0)
#define ppc_subfcd(c,D,A,B) ppc_subfcx(c,D,A,B,0,1)
#define ppc_subfco(c,D,A,B) ppc_subfcx(c,D,A,B,1,0)
#define ppc_subfcod(c,D,A,B) ppc_subfcx(c,D,A,B,1,1)
#define ppc_subfex(c,D,A,B,OE,Rc) ppc_emit32(c, (31 << 26) | (D << 21) | (A << 16) | (B << 11) | (OE << 10) | (136 << 1) | Rc)
#define ppc_subfe(c,D,A,B) ppc_subfex(c,D,A,B,0,0)
#define ppc_subfed(c,D,A,B) ppc_subfex(c,D,A,B,0,1)
#define ppc_subfeo(c,D,A,B) ppc_subfex(c,D,A,B,1,0)
#define ppc_subfeod(c,D,A,B) ppc_subfex(c,D,A,B,1,1)
#define ppc_subfic(c,D,A,SIMM) ppc_emit32(c, (8 << 26) | (D << 21) | (A << 16) | (guint16)(SIMM))
#define ppc_subfmex(c,D,A,OE,Rc) ppc_emit32(c, (31 << 26) | (D << 21) | (A << 16) | (0 << 11) | (OE << 10) | (232 << 1) | Rc)
#define ppc_subfme(c,D,A) ppc_subfmex(c,D,A,0,0)
#define ppc_subfmed(c,D,A) ppc_subfmex(c,D,A,0,1)
#define ppc_subfmeo(c,D,A) ppc_subfmex(c,D,A,1,0)
#define ppc_subfmeod(c,D,A) ppc_subfmex(c,D,A,1,1)
#define ppc_subfzex(c,D,A,OE,Rc) ppc_emit32(c, (31 << 26) | (D << 21) | (A << 16) | (0 << 11) | (OE << 10) | (200 << 1) | Rc)
#define ppc_subfze(c,D,A) ppc_subfzex(c,D,A,0,0)
#define ppc_subfzed(c,D,A) ppc_subfzex(c,D,A,0,1)
#define ppc_subfzeo(c,D,A) ppc_subfzex(c,D,A,1,0)
#define ppc_subfzeod(c,D,A) ppc_subfzex(c,D,A,1,1)
#define ppc_sync(c) ppc_emit32(c, (31 << 26) | (0 << 11) | (598 << 1) | 0)
#define ppc_tlbia(c) ppc_emit32(c, (31 << 26) | (0 << 11) | (370 << 1) | 0)
#define ppc_tlbie(c,B) ppc_emit32(c, (31 << 26) | (0 << 16) | (B << 11) | (306 << 1) | 0)
#define ppc_tlbsync(c) ppc_emit32(c, (31 << 26) | (0 << 11) | (566 << 1) | 0)
#define ppc_tw(c,TO,A,B) ppc_emit32(c, (31 << 26) | (TO << 21) | (A << 16) | (B << 11) | (4 << 1) | 0)
#define ppc_twi(c,TO,A,SIMM) ppc_emit32(c, (3 << 26) | (TO << 21) | (A << 16) | (guint16)(SIMM))
#define ppc_xorx(c,A,S,B,RC) ppc_emit32(c, (31 << 26) | (S << 21) | (A << 16) | (B << 11) | (316 << 1) | RC)
#define ppc_xor(c,A,S,B) ppc_xorx(c,A,S,B,0)
#define ppc_xord(c,A,S,B) ppc_xorx(c,A,S,B,1)
#define ppc_xori(c,S,A,UIMM) ppc_emit32(c, (26 << 26) | (S << 21) | (A << 16) | (guint16)(UIMM))
#define ppc_xoris(c,S,A,UIMM) ppc_emit32(c, (27 << 26) | (S << 21) | (A << 16) | (guint16)(UIMM))
/* this marks the end of my work, ct */
/* PPC64 */
/* The following FP instructions are not are available to 32-bit
implementations (prior to PowerISA-V2.01 but are available to
32-bit mode programs on 64-bit PowerPC implementations and all
processors compliant with PowerISA-2.01 or later. */
#define ppc_fcfidx(c,D,B,Rc) ppc_emit32(c, (63 << 26) | ((D) << 21) | (0 << 16) | ((B) << 11) | (846 << 1) | (Rc))
#define ppc_fcfid(c,D,B) ppc_fcfidx(c,D,B,0)
#define ppc_fcfidd(c,D,B) ppc_fcfidx(c,D,B,1)
#define ppc_fctidx(c,D,B,Rc) ppc_emit32(c, (63 << 26) | ((D) << 21) | (0 << 16) | ((B) << 11) | (814 << 1) | (Rc))
#define ppc_fctid(c,D,B) ppc_fctidx(c,D,B,0)
#define ppc_fctidd(c,D,B) ppc_fctidx(c,D,B,1)
#define ppc_fctidzx(c,D,B,Rc) ppc_emit32(c, (63 << 26) | ((D) << 21) | (0 << 16) | ((B) << 11) | (815 << 1) | (Rc))
#define ppc_fctidz(c,D,B) ppc_fctidzx(c,D,B,0)
#define ppc_fctidzd(c,D,B) ppc_fctidzx(c,D,B,1)
#ifdef __mono_ppc64__
#define ppc_load_sequence(c,D,v) G_STMT_START { \
ppc_lis ((c), (D), ((guint64)(v) >> 48) & 0xffff); \
ppc_ori ((c), (D), (D), ((guint64)(v) >> 32) & 0xffff); \
ppc_sldi ((c), (D), (D), 32); \
ppc_oris ((c), (D), (D), ((guint64)(v) >> 16) & 0xffff); \
ppc_ori ((c), (D), (D), (guint64)(v) & 0xffff); \
} G_STMT_END
#define PPC_LOAD_SEQUENCE_LENGTH 20
#define ppc_is_imm32(val) (((((gint64)val)>> 31) == 0) || ((((gint64)val)>> 31) == -1))
#define ppc_is_imm48(val) (((((gint64)val)>> 47) == 0) || ((((gint64)val)>> 47) == -1))
#define ppc_load48(c,D,v) G_STMT_START { \
ppc_li ((c), (D), ((gint64)(v) >> 32) & 0xffff); \
ppc_sldi ((c), (D), (D), 32); \
ppc_oris ((c), (D), (D), ((guint64)(v) >> 16) & 0xffff); \
ppc_ori ((c), (D), (D), (guint64)(v) & 0xffff); \
} G_STMT_END
#define ppc_load(c,D,v) G_STMT_START { \
if (ppc_is_imm16 ((guint64)(v))) { \
ppc_li ((c), (D), (guint16)(guint64)(v)); \
} else if (ppc_is_imm32 ((guint64)(v))) { \
ppc_load32 ((c), (D), (guint32)(guint64)(v)); \
} else if (ppc_is_imm48 ((guint64)(v))) { \
ppc_load48 ((c), (D), (guint64)(v)); \
} else { \
ppc_load_sequence ((c), (D), (guint64)(v)); \
} \
} G_STMT_END
#define ppc_load_func(c,D,v) G_STMT_START { \
ppc_load_sequence ((c), ppc_r11, (guint64)(gsize)(v)); \
ppc_ldptr ((c), ppc_r2, sizeof (gpointer), ppc_r11); \
ppc_ldptr ((c), (D), 0, ppc_r11); \
} G_STMT_END
#define ppc_load_multiple_regs(c,D,d,A) G_STMT_START { \
int __i, __o = (d); \
for (__i = (D); __i <= 31; ++__i) { \
ppc_ldr ((c), __i, __o, (A)); \
__o += sizeof (guint64); \
} \
} G_STMT_END
#define ppc_store_multiple_regs(c,S,d,A) G_STMT_START { \
int __i, __o = (d); \
for (__i = (S); __i <= 31; ++__i) { \
ppc_str ((c), __i, __o, (A)); \
__o += sizeof (guint64); \
} \
} G_STMT_END
#define ppc_compare(c,cfrD,A,B) ppc_cmp((c), (cfrD), 1, (A), (B))
#define ppc_compare_reg_imm(c,cfrD,A,B) ppc_cmpi((c), (cfrD), 1, (A), (B))
#define ppc_compare_log(c,cfrD,A,B) ppc_cmpl((c), (cfrD), 1, (A), (B))
#define ppc_shift_left(c,A,S,B) ppc_sld((c), (A), (S), (B))
#define ppc_shift_left_imm(c,A,S,n) ppc_sldi((c), (A), (S), (n))
#define ppc_shift_right_imm(c,A,S,B) ppc_srdi((c), (A), (S), (B))
#define ppc_shift_right_arith_imm(c,A,S,B) ppc_sradi((c), (A), (S), (B))
#define ppc_multiply(c,D,A,B) ppc_mulld((c), (D), (A), (B))
#define ppc_clear_right_imm(c,A,S,n) ppc_clrrdi((c), (A), (S), (n))
#define ppc_divdx(c,D,A,B,OE,Rc) ppc_emit32(c, (31 << 26) | ((D) << 21) | ((A) << 16) | ((B) << 11) | ((OE) << 10) | (489 << 1) | (Rc))
#define ppc_divd(c,D,A,B) ppc_divdx(c,D,A,B,0,0)
#define ppc_divdd(c,D,A,B) ppc_divdx(c,D,A,B,0,1)
#define ppc_divdo(c,D,A,B) ppc_divdx(c,D,A,B,1,0)
#define ppc_divdod(c,D,A,B) ppc_divdx(c,D,A,B,1,1)
#define ppc_divdux(c,D,A,B,OE,Rc) ppc_emit32(c, (31 << 26) | ((D) << 21) | ((A) << 16) | ((B) << 11) | ((OE) << 10) | (457 << 1) | (Rc))
#define ppc_divdu(c,D,A,B) ppc_divdux(c,D,A,B,0,0)
#define ppc_divdud(c,D,A,B) ppc_divdux(c,D,A,B,0,1)
#define ppc_divduo(c,D,A,B) ppc_divdux(c,D,A,B,1,0)
#define ppc_divduod(c,D,A,B) ppc_divdux(c,D,A,B,1,1)
#define ppc_extswx(c,S,A,Rc) ppc_emit32(c, (31 << 26) | ((S) << 21) | ((A) << 16) | (0 << 11) | (986 << 1) | (Rc))
#define ppc_extsw(c,A,S) ppc_extswx(c,S,A,0)
#define ppc_extswd(c,A,S) ppc_extswx(c,S,A,1)
/* These move float to/from instuctions are only available on POWER6 in
native mode. These instruction are faster then the equivalent
store/load because they avoid the store queue and associated delays.
These instructions should only be used in 64-bit mode unless the
kernel preserves the 64-bit GPR on signals and dispatch in 32-bit
mode. The Linux kernel does not. */
#define ppc_mftgpr(c,T,B) ppc_emit32(c, (31 << 26) | ((T) << 21) | (0 << 16) | ((B) << 11) | (735 << 1) | 0)
#define ppc_mffgpr(c,T,B) ppc_emit32(c, (31 << 26) | ((T) << 21) | (0 << 16) | ((B) << 11) | (607 << 1) | 0)
#define ppc_ld(c,D,ds,A) ppc_emit32(c, (58 << 26) | ((D) << 21) | ((A) << 16) | ((guint32)(ds) & 0xfffc) | 0)
#define ppc_lwa(c,D,ds,A) ppc_emit32(c, (58 << 26) | ((D) << 21) | ((A) << 16) | ((ds) & 0xfffc) | 2)
#define ppc_ldarx(c,D,A,B) ppc_emit32(c, (31 << 26) | ((D) << 21) | ((A) << 16) | ((B) << 11) | (84 << 1) | 0)
#define ppc_ldu(c,D,ds,A) ppc_emit32(c, (58 << 26) | ((D) << 21) | ((A) << 16) | ((guint32)(ds) & 0xfffc) | 1)
#define ppc_ldux(c,D,A,B) ppc_emit32(c, (31 << 26) | ((D) << 21) | ((A) << 16) | ((B) << 11) | (53 << 1) | 0)
#define ppc_lwaux(c,D,A,B) ppc_emit32(c, (31 << 26) | ((D) << 21) | ((A) << 16) | ((B) << 11) | (373 << 1) | 0)
#define ppc_ldx(c,D,A,B) ppc_emit32(c, (31 << 26) | ((D) << 21) | ((A) << 16) | ((B) << 11) | (21 << 1) | 0)
#define ppc_lwax(c,D,A,B) ppc_emit32(c, (31 << 26) | ((D) << 21) | ((A) << 16) | ((B) << 11) | (341 << 1) | 0)
#define ppc_mulhdx(c,D,A,B,Rc) ppc_emit32(c, (31 << 26) | ((D) << 21) | ((A) << 16) | ((B) << 11) | (0 << 10) | (73 << 1) | (Rc))
#define ppc_mulhd(c,D,A,B) ppc_mulhdx(c,D,A,B,0)
#define ppc_mulhdd(c,D,A,B) ppc_mulhdx(c,D,A,B,1)
#define ppc_mulhdux(c,D,A,B,Rc) ppc_emit32(c, (31 << 26) | ((D) << 21) | ((A) << 16) | ((B) << 11) | (0 << 10) | (9 << 1) | (Rc))
#define ppc_mulhdu(c,D,A,B) ppc_mulhdux(c,D,A,B,0)
#define ppc_mulhdud(c,D,A,B) ppc_mulhdux(c,D,A,B,1)
#define ppc_mulldx(c,D,A,B,OE,Rc) ppc_emit32(c, (31 << 26) | ((D) << 21) | ((A) << 16) | ((B) << 11) | ((OE) << 10) | (233 << 1) | (Rc))
#define ppc_mulld(c,D,A,B) ppc_mulldx(c,D,A,B,0,0)
#define ppc_mulldd(c,D,A,B) ppc_mulldx(c,D,A,B,0,1)
#define ppc_mulldo(c,D,A,B) ppc_mulldx(c,D,A,B,1,0)
#define ppc_mulldod(c,D,A,B) ppc_mulldx(c,D,A,B,1,1)
#define ppc_rldclx(c,A,S,B,MB,Rc) ppc_emit32(c, (30 << 26) | ((S) << 21) | ((A) << 16) | ((B) << 11) | (ppc_split_5_1(MB) << 5) | (8 << 1) | (Rc))
#define ppc_rldcl(c,A,S,B,MB) ppc_rldclx(c,A,S,B,MB,0)
#define ppc_rldcld(c,A,S,B,MB) ppc_rldclx(c,A,S,B,MB,1)
#define ppc_rotld(c,A,S,B) ppc_rldcl(c, A, S, B, 0)
#define ppc_rldcrx(c,A,S,B,ME,Rc) ppc_emit32(c, (30 << 26) | ((S) << 21) | ((A) << 16) | ((B) << 11) | (ppc_split_5_1(ME) << 5) | (9 << 1) | (Rc))
#define ppc_rldcr(c,A,S,B,ME) ppc_rldcrx(c,A,S,B,ME,0)
#define ppc_rldcrd(c,A,S,B,ME) ppc_rldcrx(c,A,S,B,ME,1)
#define ppc_rldicx(c,S,A,SH,MB,Rc) ppc_emit32(c, (30 << 26) | ((S) << 21) | ((A) << 16) | (ppc_split_5_1_5(SH) << 11) | (ppc_split_5_1(MB) << 5) | (2 << 2) | (ppc_split_5_1_1(SH) << 1) | (Rc))
#define ppc_rldic(c,A,S,SH,MB) ppc_rldicx(c,S,A,SH,MB,0)
#define ppc_rldicd(c,A,S,SH,MB) ppc_rldicx(c,S,A,SH,MB,1)
#define ppc_rldiclx(c,S,A,SH,MB,Rc) ppc_emit32(c, (30 << 26) | ((S) << 21) | ((A) << 16) | (ppc_split_5_1_5(SH) << 11) | (ppc_split_5_1(MB) << 5) | (0 << 2) | (ppc_split_5_1_1(SH) << 1) | (Rc))
#define ppc_rldicl(c,A,S,SH,MB) ppc_rldiclx(c,S,A,SH,MB,0)
#define ppc_rldicld(c,A,S,SH,MB) ppc_rldiclx(c,S,A,SH,MB,1)
#define ppc_extrdi(c,A,S,n,b) ppc_rldicl(c,A,S, (b) + (n), 64 - (n))
#define ppc_rotldi(c,A,S,n) ppc_rldicl(c,A,S, n, 0)
#define ppc_rotrdi(c,A,S,n) ppc_rldicl(c,A,S, 64 - (n), 0)
#define ppc_srdi(c,A,S,n) ppc_rldicl(c,A,S, 64 - (n), n)
#define ppc_clrldi(c,A,S,n) ppc_rldicl(c,A,S, 0, n)
#define ppc_rldicrx(c,A,S,SH,ME,Rc) ppc_emit32(c, (30 << 26) | ((S) << 21) | ((A) << 16) | (ppc_split_5_1_5(SH) << 11) | (ppc_split_5_1(ME) << 5) | (1 << 2) | (ppc_split_5_1_1(SH) << 1) | (Rc))
#define ppc_rldicr(c,A,S,SH,ME) ppc_rldicrx(c,A,S,SH,ME,0)
#define ppc_rldicrd(c,A,S,SH,ME) ppc_rldicrx(c,A,S,SH,ME,1)
#define ppc_extldi(c,A,S,n,b) ppc_rldicr(c, A, S, b, (n) - 1)
#define ppc_sldi(c,A,S,n) ppc_rldicr(c, A, S, n, 63 - (n))
#define ppc_clrrdi(c,A,S,n) ppc_rldicr(c, A, S, 0, 63 - (n))
#define ppc_rldimix(c,S,A,SH,MB,Rc) ppc_emit32(c, (30 << 26) | ((S) << 21) | ((A) << 16) | (ppc_split_5_1_5(SH) << 11) | (ppc_split_5_1(MB) << 5) | (3 << 2) | (ppc_split_5_1_1(SH) << 1) | (Rc))
#define ppc_rldimi(c,A,S,SH,MB) ppc_rldimix(c,S,A,SH,MB,0)
#define ppc_rldimid(c,A,S,SH,MB) ppc_rldimix(c,S,A,SH,MB,1)
#define ppc_slbia(c) ppc_emit32(c, (31 << 26) | (0 << 21) | (0 << 16) | (0 << 11) | (498 << 1) | 0)
#define ppc_slbie(c,B) ppc_emit32(c, (31 << 26) | (0 << 21) | (0 << 16) | ((B) << 11) | (434 << 1) | 0)
#define ppc_sldx(c,S,A,B,Rc) ppc_emit32(c, (31 << 26) | ((S) << 21) | ((A) << 16) | ((B) << 11) | (27 << 1) | (Rc))
#define ppc_sld(c,A,S,B) ppc_sldx(c,S,A,B,0)
#define ppc_sldd(c,A,S,B) ppc_sldx(c,S,A,B,1)
#define ppc_sradx(c,S,A,B,Rc) ppc_emit32(c, (31 << 26) | ((S) << 21) | ((A) << 16) | ((B) << 11) | (794 << 1) | (Rc))
#define ppc_srad(c,A,S,B) ppc_sradx(c,S,A,B,0)
#define ppc_sradd(c,A,S,B) ppc_sradx(c,S,A,B,1)
#define ppc_sradix(c,S,A,SH,Rc) ppc_emit32(c, (31 << 26) | ((S) << 21) | ((A) << 16) | (((SH) & 31) << 11) | (413 << 2) | (((SH) >> 5) << 1) | (Rc))
#define ppc_sradi(c,A,S,SH) ppc_sradix(c,S,A,SH,0)
#define ppc_sradid(c,A,S,SH) ppc_sradix(c,S,A,SH,1)
#define ppc_srdx(c,S,A,B,Rc) ppc_emit32(c, (31 << 26) | ((S) << 21) | ((A) << 16) | ((B) << 11) | (539 << 1) | (Rc))
#define ppc_srd(c,A,S,B) ppc_srdx(c,S,A,B,0)
#define ppc_srdd(c,A,S,B) ppc_srdx(c,S,A,B,1)
#define ppc_std(c,S,ds,A) ppc_emit32(c, (62 << 26) | ((S) << 21) | ((A) << 16) | ((guint32)(ds) & 0xfffc) | 0)
#define ppc_stdcxd(c,S,A,B) ppc_emit32(c, (31 << 26) | ((S) << 21) | ((A) << 16) | ((B) << 11) | (214 << 1) | 1)
#define ppc_stdu(c,S,ds,A) ppc_emit32(c, (62 << 26) | ((S) << 21) | ((A) << 16) | ((guint32)(ds) & 0xfffc) | 1)
#define ppc_stdux(c,S,A,B) ppc_emit32(c, (31 << 26) | ((S) << 21) | ((A) << 16) | ((B) << 11) | (181 << 1) | 0)
#define ppc_stdx(c,S,A,B) ppc_emit32(c, (31 << 26) | ((S) << 21) | ((A) << 16) | ((B) << 11) | (149 << 1) | 0)
#else
/* Always true for 32-bit */
#define ppc_is_imm32(val) (1)
#endif
#endif

@ -0,0 +1,6 @@
/Makefile
/Makefile.in
/.libs
/.deps
/*.la
/*.lo

@ -0,0 +1,35 @@
2010-03-23 Neale Ferguson <neale@sinenomine.net>
* s390x-codegen.h: Remove duplicate
2009-06-24 Neale Ferguson <neale@sinenomine.net>
* s390x-codegen.h: Add some new instructions.
2007-04-12 Neale Ferguson <neale@sinenomine.net>
* tramp.c: Add MONO_TYPE_PTR case.
2007-01-23 Neale Ferguson <neale@sinenomine.net>
* s390x-codegen.h: Add packed attribute to several instruction structures.
2006-03-13 Neale Ferguson <neale@sinenomine.net>
* s390x-codegen.h: Fix immediate checks.
2006-01-06 Neale Ferguson <neale@sinenomine.net>
* s390x-codegen.h: Add lpdbr instruction (OP_ABS).
2006-01-03 Neale Ferguson <neale@sinenomine.net>
* s390x-codegen.h: Add some new instructions.
2004-12-15 Neale Ferguson <Neale.Ferguson@SoftwareAG-usa.com>
* s390x-codegen.h: Add some new instructions (CS, CSG, CSY, CDS, CDSG, CDSY)
2004-08-03 Neale Ferguson <Neale.Ferguson@SoftwareAG-usa.com>
* s390x-codegen.h Makefile.am tramp.c: S/390 64-bit interpreter

@ -0,0 +1,7 @@
AM_CPPFLAGS = $(GLIB_CFLAGS) -I$(top_srcdir)
noinst_LTLIBRARIES = libmonoarch-s390x.la
libmonoarch_s390x_la_SOURCES = tramp.c s390x-codegen.h

@ -0,0 +1,997 @@
/*
Copyright (C) 2001 Radek Doulik
*/
#ifndef S390X_H
#define S390X_H
#include <glib.h>
#include <assert.h>
#include <limits.h>
#define FLOAT_REGS 2 /* No. float registers for parms */
#define GENERAL_REGS 5 /* No. general registers for parms */
#define ARG_BASE s390_r10 /* Register for addressing arguments*/
#define STKARG \
(i*(sizeof(stackval))) /* Displacement of ith argument */
#define MINV_POS 160 /* MonoInvocation stack offset */
#define STACK_POS (MINV_POS - sizeof (stackval) * sig->param_count)
#define OBJ_POS 8
#define TYPE_OFFSET (G_STRUCT_OFFSET (stackval, type))
#define MIN_CACHE_LINE 256
/*------------------------------------------------------------------*/
/* Sequence to add an int/long long to parameters to stack_from_data*/
/*------------------------------------------------------------------*/
#define ADD_ISTACK_PARM(r, i) \
if (reg_param < GENERAL_REGS-(r)) { \
s390_lay (p, s390_r4, 0, STK_BASE, \
local_start + (reg_param - this_flag) * sizeof(long)); \
reg_param += (i); \
} else { \
s390_lay (p, s390_r4, 0, STK_BASE, \
sz.stack_size + MINV_POS + stack_param * sizeof(long)); \
stack_param += (i); \
}
/*------------------------------------------------------------------*/
/* Sequence to add a float/double to parameters to stack_from_data */
/*------------------------------------------------------------------*/
#define ADD_RSTACK_PARM(i) \
if (fpr_param < FLOAT_REGS) { \
s390_lay (p, s390_r4, 0, STK_BASE, \
float_pos + (fpr_param * sizeof(float) * (i))); \
fpr_param++; \
} else { \
stack_param += (stack_param % (i)); \
s390_lay (p, s390_r4, 0, STK_BASE, \
sz.stack_size + MINV_POS + stack_param * sizeof(float) * (i)); \
stack_param += (i); \
}
/*------------------------------------------------------------------*/
/* Sequence to add a structure ptr to parameters to stack_from_data */
/*------------------------------------------------------------------*/
#define ADD_TSTACK_PARM \
if (reg_param < GENERAL_REGS) { \
s390_ly (p, s390_r4, 0, STK_BASE, \
local_start + (reg_param - this_flag) * sizeof(long)); \
reg_param++; \
} else { \
s390_ly (p, s390_r4, 0, STK_BASE, \
sz.stack_size + MINV_POS + stack_param * sizeof(long)); \
stack_param++; \
}
#define ADD_PSTACK_PARM(r, i) \
if (reg_param < GENERAL_REGS-(r)) { \
s390_lay (p, s390_r4, 0, STK_BASE, \
local_start + (reg_param - this_flag) * sizeof(long)); \
reg_param += (i); \
} else { \
s390_ly (p, s390_r4, 0, STK_BASE, \
sz.stack_size + MINV_POS + stack_param * sizeof(long)); \
stack_param++; \
}
typedef enum {
s390_r0 = 0,
s390_r1,
s390_r2,
s390_r3,
s390_r4,
s390_r5,
s390_r6,
s390_r7,
s390_r8,
s390_r9,
s390_r10,
s390_r11,
s390_r12,
s390_r13,
s390_r14,
s390_r15,
} S390IntRegister;
typedef enum {
s390_f0 = 0,
s390_f1,
s390_f2,
s390_f3,
s390_f4,
s390_f5,
s390_f6,
s390_f7,
s390_f8,
s390_f9,
s390_f10,
s390_f11,
s390_f12,
s390_f13,
s390_f14,
s390_f15,
} S390FloatRegister;
typedef enum {
s390_a0 = 0,
s390_a1,
s390_a2,
s390_a3,
s390_a4,
s390_a5,
s390_a6,
s390_a7,
s390_a8,
s390_a9,
s390_a10,
s390_a11,
s390_a12,
s390_a13,
s390_a14,
s390_a15,
} S390AccRegister;
typedef enum {
s390_fpc = 256,
} S390SpecialRegister;
#define s390_is_imm16(val) ((glong)val >= (glong) SHRT_MIN && \
(glong)val <= (glong) SHRT_MAX)
#define s390_is_imm32(val) ((glong)val >= (glong) INT_MIN && \
(glong)val <= (glong) INT_MAX)
#define s390_is_uimm16(val) ((glong)val >= 0 && (glong)val <= (glong) USHRT_MAX)
#define s390_is_uimm32(val) ((glong)val >= 0 && (glong)val <= (glong) UINT_MAX)
#define s390_is_uimm20(val) ((glong)val >= 0 && (glong)val <= 1048575)
#define s390_is_imm20(val) ((glong)val >= -524288 && (glong)val <= 524287)
#define s390_is_imm12(val) ((glong)val >= (glong)-4096 && \
(glong)val <= (glong)4095)
#define s390_is_uimm12(val) ((glong)val >= 0 && (glong)val <= 4095)
#define STK_BASE s390_r15
#define S390_SP s390_r15
#define S390_FP s390_r11
#define S390_MINIMAL_STACK_SIZE 160
#define S390_REG_SAVE_OFFSET 48
#define S390_PARM_SAVE_OFFSET 16
#define S390_RET_ADDR_OFFSET 112
#define S390_FLOAT_SAVE_OFFSET 128
#define S390_CC_ZR 8
#define S390_CC_NE 7
#define S390_CC_NZ 7
#define S390_CC_LT 4
#define S390_CC_GT 2
#define S390_CC_GE 11
#define S390_CC_NM 11
#define S390_CC_LE 13
#define S390_CC_OV 1
#define S390_CC_NO 14
#define S390_CC_CY 3
#define S390_CC_NC 12
#define S390_CC_UN 15
#define s390_word(addr, value) do \
{ \
* (guint32 *) addr = (guint32) value; \
addr += sizeof(guint32); \
} while (0)
#define s390_float(addr, value) do \
{ \
* (gfloat *) addr = (gfloat) value; \
addr += sizeof(gfloat); \
} while (0)
#define s390_llong(addr, value) do \
{ \
* (guint64 *) addr = (guint64) value; \
addr += sizeof(guint64); \
} while (0)
#define s390_double(addr, value) do \
{ \
* (gdouble *) addr = (gdouble) value; \
addr += sizeof(gdouble); \
} while (0)
typedef struct {
short op;
} E_Format;
typedef struct {
char op;
int im;
} I_Format;
typedef struct {
char op;
char r1 : 4;
char r2 : 4;
} RR_Format;
typedef struct {
short op;
char xx;
char r1 : 4;
char r2 : 4;
} RRE_Format;
typedef struct {
short op;
char r1 : 4;
char xx : 4;
char r3 : 4;
char r2 : 4;
} RRF_Format_1;
typedef struct {
short op;
char m3 : 4;
char xx : 4;
char r1 : 4;
char r2 : 4;
} RRF_Format_2;
typedef struct {
short op;
char r3 : 4;
char m4 : 4;
char r1 : 4;
char r2 : 4;
} RRF_Format_3;
typedef struct {
char op;
char r1 : 4;
char x2 : 4;
char b2 : 4;
short d2 : 12;
} RX_Format;
typedef struct {
char op1;
char r1 : 4;
char x2 : 4;
char b2 : 4;
int d2 : 12;
char xx;
char op2;
} RXE_Format;
typedef struct {
char op1;
char r3 : 4;
char x2 : 4;
char b2 : 4;
int d2 : 12;
char r1 : 4;
char xx : 4;
char op2;
} RXF_Format;
typedef struct {
char op1;
char r1 : 4;
char x2 : 4;
char b2 : 4;
int d2 : 20;
char op2;
} __attribute__ ((packed)) RXY_Format;
typedef struct {
char op;
char r1 : 4;
char r3 : 4;
char b2 : 4;
int d2 : 12;
} RS_Format_1;
typedef struct {
char op;
char r1 : 4;
char m3 : 4;
char b2 : 4;
int d2 : 12;
} RS_Format_2;
typedef struct {
char op;
char r1 : 4;
char xx : 4;
char b2 : 4;
int d2 : 12;
} RS_Format_3;
typedef struct {
char op1;
char r1 : 4;
char r3 : 4;
char b2 : 4;
int d2 : 20;
char op2;
} __attribute__ ((packed)) RSY_Format_1;
typedef struct {
char op1;
char r1 : 4;
char m3 : 4;
char b2 : 4;
int d2 : 20;
char op2;
} __attribute__ ((packed)) RSY_Format_2;
typedef struct {
char op1;
char l1 : 4;
char xx : 4;
char b1 : 4;
int d1 : 12;
char yy;
char op2;
} RSL_Format;
typedef struct {
char op;
char r1 : 4;
char r3 : 4;
short i2;
} RSI_Format;
typedef struct {
char op1;
char m1 : 4;
char op2 : 4;
short i2;
} RI_Format;
typedef struct {
char op1;
char r1 : 4;
char r3 : 4;
short i2;
char xx;
char op2;
} RIE_Format_1;
typedef struct {
char op1;
char r1 : 4;
char r3 : 4;
short i2;
char m2 : 4;
char xx : 4;
char op2;
} RIE_Format_2;
typedef struct {
char op1;
char r1 : 4;
char r3 : 4;
short d;
char i;
char op2;
} RIE_Format_3;
typedef struct {
char op1;
char r1 : 4;
char yy : 4;
short i2;
char m3 : 4;
char xx : 4;
char op2;
} RIE_Format_4;
typedef struct {
char op1;
char r1 : 4;
char op2 : 4;
int i2;
} __attribute__ ((packed)) RIL_Format_1;
typedef struct {
char op1;
char m1 : 4;
char op2 : 4;
int i2;
} __attribute__ ((packed)) RIL_Format_2;
typedef struct {
char op;
char i2;
char b1 : 4;
short d1 : 12;
} SI_Format;
typedef struct {
char op1;
char i2;
char b1 : 4;
int d1 : 20;
char op2;
} __attribute__ ((packed)) SIY_Format;
typedef struct {
short op;
char b2 : 4;
short d2 : 12;
} S_Format;
typedef struct {
char op;
char ll;
char b1 : 4;
short d1 : 12;
char b2 : 4;
short d2 : 12;
} SS_Format_1;
typedef struct {
char op;
char l1 : 4;
char l2 : 4;
char b1 : 4;
short d1 : 12;
char b2 : 4;
short d2 : 12;
} SS_Format_2;
typedef struct {
char op;
char r1 : 4;
char r3 : 4;
char b1 : 4;
short d1 : 12;
char b2 : 4;
short d2 : 12;
} SS_Format_3;
typedef struct {
char op;
char r1 : 4;
char r3 : 4;
char b2 : 4;
short d2 : 12;
char b4 : 4;
short d4 : 12;
} SS_Format_4;
typedef struct {
short op;
short tb1 : 4;
short d1 : 12;
short b2 : 4;
short d2 : 12;
} __attribute__ ((packed)) SSE_Format;
typedef struct {
short op;
char r3 : 4;
char o2 : 4;
short b1 : 4;
short d1 : 12;
short b2 : 4;
short d2 : 12;
} __attribute__ ((packed)) SSF_Format;
#define s390_emit16(c, x) do \
{ \
*((guint16 *) c) = (guint16) x; \
c += sizeof(guint16); \
} while(0)
#define s390_emit32(c, x) do \
{ \
*((guint32 *) c) = (guint32) x; \
c += sizeof(guint32); \
} while(0)
#define S390_E(c,opc) s390_emit16(c,opc)
#define S390_I(c,opc,imm) s390_emit16(c, (opc << 8 | imm))
#define S390_RR(c,opc,g1,g2) s390_emit16(c, (opc << 8 | (g1) << 4 | g2))
#define S390_RRE(c,opc,g1,g2) s390_emit32(c, (opc << 16 | (g1) << 4 | g2))
#define S390_RRF_1(c,opc,g1,g2,g3) s390_emit32(c, (opc << 16 | (g1) << 12 | (g3) << 4 | g2))
#define S390_RRF_2(c,opc,g1,k3,g2) s390_emit32(c, (opc << 16 | (k3) << 12 | (g1) << 4 | g2))
#define S390_RRF_3(c,opc,g1,g2,k4,g3) s390_emit32(c, (opc << 16 | (g3) << 12 | (k4) << 8 | (g1) << 4 | g2))
#define S390_RX(c,opc,g1,n2,s2,p2) s390_emit32(c, (opc << 24 | (g1) << 20 | (n2) << 16 | (s2) << 12 | ((p2) & 0xfff)))
#define S390_RXE(c,opc,g1,n2,s2,p2) do \
{ \
s390_emit16(c, ((opc & 0xff00) | (g1) << 4 | n2)); \
s390_emit32(c, ((s2) << 28 | (((p2) & 0xfff) << 16) | \
(opc & 0xff))); \
} while (0)
#define S390_RXY(c,opc,g1,n2,s2,p2) do \
{ \
s390_emit16(c, ((opc & 0xff00) | (g1) << 4 | n2)); \
s390_emit32(c, ((s2) << 28 | (((p2) & 0xfff) << 16) | \
((((p2) & 0xff000) >> 12) << 8) | \
(opc & 0xff))); \
} while (0)
#define S390_RS_1(c,opc,g1,g3,s2,p2) s390_emit32(c, (opc << 24 | (g1) << 20 | (g3) << 16 | (s2) << 12 | ((p2) & 0xfff)))
#define S390_RS_2(c,opc,g1,k3,s2,p2) s390_emit32(c, (opc << 24 | (g1) << 20 | (k3) << 16 | (s2) << 12 | ((p2) & 0xfff)))
#define S390_RS_3(c,opc,g1,s2,p2) s390_emit32(c, (opc << 24 | (g1) << 20 | (s2) << 12 | ((p2) & 0xfff)))
#define S390_RSY_1(c,opc,g1,g3,s2,p2) do \
{ \
s390_emit16(c, ((opc & 0xff00) | (g1) << 4 | g3)); \
s390_emit32(c, ((s2) << 28 | (((p2) & 0xfff) << 16) | \
((((p2) & 0xff000) >> 12) << 8) | \
(opc & 0xff))); \
} while (0)
#define S390_RSY_2(c,opc,g1,k3,s2,p2) do \
{ \
s390_emit16(c, ((opc & 0xff00) | (g1) << 4 | k3)); \
s390_emit32(c, ((s2) << 28 | (((p2) & 0xfff) << 16) | \
((((p2) & 0xff000) >> 12) << 8) | \
(opc & 0xff))); \
} while (0)
#define S390_RSL(c,opc,ln,s1,p1) do \
{ \
s390_emit16(c, ((opc & 0xff00) | (ln) << 4)); \
s390_emit32(c, ((s1) << 28 | ((s1 & 0xfff) << 16) | \
(opc & 0xff))); \
} while (0)
#define S390_RSI(c,opc,g1,g3,m2) s390_emit32(c, (opc << 24 | (g1) << 20 | (g3) << 16 | (m2 & 0xffff)))
#define S390_RI(c,opc,g1,m2) s390_emit32(c, ((opc >> 4) << 24 | (g1) << 20 | (opc & 0x0f) << 16 | (m2 & 0xffff)))
#define S390_RIE_1(c,opc,g1,g3,m2) do \
{ \
s390_emit16(c, ((opc & 0xff00) | (g1) << 4 | g3)); \
s390_emit32(c, ((m2) << 16 | (opc & 0xff))); \
} while (0)
#define S390_RIE_2(c,opc,g1,g2,m3,v) do \
{ \
s390_emit16(c, ((opc & 0xff00) | (g1) << 4 | g3)); \
s390_emit16(c, (v)); \
s390_emit16(c, ((m2) << 12 | (opc & 0xff))); \
} while (0)
#define S390_RIE_3(c,opc,g1,i,m3,d) do \
{ \
s390_emit16(c, ((opc & 0xff00) | (g1) << 4 | m3)); \
s390_emit16(c, (d)); \
s390_emit16(c, ((i) << 8 | (opc & 0xff))); \
} while (0)
#define S390_RIE_4(c,opc,g1,i2,m3) do \
{ \
s390_emit16(c, ((opc & 0xff00) | (g1) << 4); \
s390_emit16(c, (i2)); \
s390_emit16(c, ((m3) << 12 | (opc & 0xff))); \
} while (0)
#define S390_RIL_1(c,opc,g1,m2) do \
{ \
s390_emit16(c, ((opc >> 4) << 8 | (g1) << 4 | (opc & 0xf))); \
s390_emit32(c, m2); \
} while (0)
#define S390_RIL_2(c,opc,k1,m2) do \
{ \
s390_emit16(c, ((opc >> 4) << 8 | (k1) << 4 | (opc & 0xf))); \
s390_emit32(c, m2); \
} while (0)
#define S390_RIS(c,opc,r,i,m3,b,d) do \
{ \
s390_emit16(c, ((opc, & 0xff00) | (r1) << 4) | (r2)); \
s390_emit16(c, ((b) << 12) | (d)); \
s390_emit16(c, ((i) << 4) | ((opc) & 0xff)); \
}
#define S390_RRS(c,opc,r1,r2,m3,b,d) do \
{ \
s390_emit16(c, ((opc, & 0xff00) | (r1) << 4) | (r2)); \
s390_emit16(c, ((b) << 12) | (d)); \
s390_emit16(c, ((m3) << 12) | ((opc) & 0xff)); \
}
#define S390_SI(c,opc,s1,p1,m2) s390_emit32(c, (opc << 24 | (m2) << 16 | (s1) << 12 | ((p1) & 0xfff)));
#define S390_SIY(c,opc,s1,p1,m2) do \
{ \
s390_emit16(c, ((opc & 0xff00) | m2)); \
s390_emit32(c, ((s1) << 24 | (((p2) & 0xfffff) << 8) | \
(opc & 0xff))); \
} while (0)
#define S390_S(c,opc,s2,p2) s390_emit32(c, (opc << 16 | (s2) << 12 | ((p2) & 0xfff)))
#define S390_SS_1(c,opc,ln,s1,p1,s2,p2) do \
{ \
s390_emit32(c, (opc << 24 | ((ln-1) & 0xff) << 16 | \
(s1) << 12 | ((p1) & 0xfff))); \
s390_emit16(c, ((s2) << 12 | ((p2) & 0xfff))); \
} while (0)
#define S390_SS_2(c,opc,n1,n2,s1,p1,s2,p2) do \
{ \
s390_emit32(c, (opc << 24 | (n1) << 16 | (n2) << 12 | \
(s1) << 12 | ((p1) & 0xfff))); \
s390_emit16(c, ((s2) << 12 | ((p2) & 0xfff))); \
} while (0)
#define S390_SS_3(c,opc,g1,g3,s1,p1,s2,p2) do \
{ \
s390_emit32(c, (opc << 24 | (g1) << 16 | (g3) << 12 | \
(s1) << 12 | ((p1) & 0xfff))); \
s390_emit16(c, ((s2) << 12 | ((p2) & 0xfff))); \
} while (0)
#define S390_SS_4(c,opc,g1,g3,s2,p2,s4,p4) do \
{ \
s390_emit32(c, (opc << 24 | (g1) << 16 | (g3) << 12 | \
(s2) << 12 | ((p2) & 0xfff))); \
s390_emit16(c, ((s4) << 12 | ((p4) & 0xfff))); \
} while (0)
#define S390_SSE(c,opc,s1,p1,s2,p2) do \
{ \
s390_emit16(c, opc); \
s390_emit16(c, ((s1) << 12 | ((p1) & 0xfff))); \
s390_emit16(c, ((s2) << 12 | ((p2) & 0xfff))); \
} while (0)
#define S390_SSF(c,opc,r3,s1,p1,s2,p2) do \
{ \
s390_emit16(c, (((opc) & 0xff00) << 8) | ((r3) << 4) | \
((opc) & 0xf)); \
s390_emit16(c, ((s1) << 12 | ((p1) & 0xfff))); \
s390_emit16(c, ((s2) << 12 | ((p2) & 0xfff))); \
} while (0)
#define s390_a(c, r, x, b, d) S390_RX(c, 0x5a, r, x, b, d)
#define s390_adb(c, r, x, b, d) S390_RXE(c, 0xed1a, r, x, b, d)
#define s390_adbr(c, r1, r2) S390_RRE(c, 0xb31a, r1, r2)
#define s390_aebr(c, r1, r2) S390_RRE(c, 0xb30a, r1, r2)
#define s390_afi(c, r, v) S390_RIL_1(c, 0xc29, r, v);
#define s390_ag(c, r, x, b, d) S390_RXY(c, 0xe308, r, x, b, d)
#define s390_agf(c, r, x, b, d) S390_RXY(c, 0xe318, r, x, b, d)
#define s390_agfi(c, r, v) S390_RIL_1(c, 0xc28, r, v)
#define s390_afgr(c, r1, r2) S390_RRE(c, 0xb918, r1, r2)
#define s390_aghi(c, r, v) S390_RI(c, 0xa7b, r, v)
#define s390_aghik(c, r, v) S390_RIE_1(c, 0xecd9, r, v)
#define s390_agr(c, r1, r2) S390_RRE(c, 0xb908, r1, r2)
#define s390_agrk(c, r1, r2, r3) S390_RRF_1(c, 0xb9e8, r1, r2, r3)
#define s390_agsi(c, r, v) S390_SIY(c, 0xeb7a, r v)
#define s390_ahhhr(c, r1, r2, r3) S390_RRF_1(c, 0xb9c8, r1, r2, r3)
#define s390_ahhlr(c, r1, r2, r3) S390_RRF_1(c, 0xb9d8, r1, r2, r3)
#define s390_ahi(c, r, v) S390_RI(c, 0xa7a, r, v)
#define s390_ahik(c, r, v) S390_RIE_1(c, 0xecd8, r, v)
#define s390_ahy(c, r, x, b, d) S390_RXY(c, 0xe37a, r, b, d)
#define s390_aih(c, r, v) S390_RIL_1(c, 0xcc8, r, v)
#define s390_al(c, r, x, b, d) S390_RX(c, 0x5e, r, x, b, d)
#define s390_alc(c, r, x, b, d) S390_RXY(c, 0xe398, r, x, b, d)
#define s390_alcg(c, r, x, b, d) S390_RXY(c, 0xe388, r, x, b, d)
#define s390_alcgr(c, r1, r2) S390_RRE(c, 0xb988, r1, r2)
#define s390_alcr(c, r1, r2) S390_RRE(c, 0xb998, r1, r2)
#define s390_alfi(c, r, v) S390_RIL_1(c, 0xc2b, r, v)
#define s390_alg(c, r, x, b, d) S390_RXY(c, 0xe30a, r, x, b, d)
#define s390_algf(c, r, x, b, d) S390_RXY(c, 0xe31a, r, x, b, d)
#define s390_algfi(c, r, v) S390_RIL_1(c, 0xc2a, r, v)
#define s390_algfr(c, r1, r2) S390_RRE(c, 0xb91a, r1, r2)
#define s390_alghsik(c, r, v) S390_RIE_1(c, 0xecd8, r, v)
#define s390_algr(c, r1, r2) S390_RRE(c, 0xb90a, r1, r2)
#define s390_algsi(c, r, v) S390_SIY(c, 0xeb7e, r, v)
#define s390_alhhhr(c, r1, r2, r3) S390_RRF_1(c, 0xb9ca, r1, r2, r3)
#define s390_alhhlr(c, r1, r2, r3) S390_RRF_1(c, 0xb9da, r1, r2, r3)
#define s390_alhsik(c, r, v) S390_RIE_1(c, 0xecda, r, v)
#define s390_alr(c, r1, r2) S390_RR(c, 0x1e, r1, r2)
#define s390_alrk(c, r1, r2) S390_RRF(c, 0xb9fa, r1, r2)
#define s390_alsi(c, r, v) S390_SIY(c, 0xeb6e, r, v)
#define s390_alsih(c, r, v) S390_RIL_1(c, 0xcca, r, v)
#define s390_alsihn(c, r, v) S390_RIL_1(c, 0xccb, r, v)
#define s390_aly(c, r, x, b, d) S390_RXY(c, 0xe35e, r, x, b, d)
#define s390_ar(c, r1, r2) S390_RR(c, 0x1a, r1, r2)
#define s390_ark(c, r1, r2, r3) S390_RRF_1(c, 0xb9f8, r1, r2, r3)
#define s390_asi(c, r, v) S390_SIY(c, 0xeb6a, r, v)
#define s390_ay(c, r, x, b, d) S390_RXY(c, 0xe35a, r, x, b, d)
#define s390_basr(c, r1, r2) S390_RR(c, 0x0d, r1, r2)
#define s390_bctr(c, r1, r2) S390_RR(c, 0x06, r1, r2)
#define s390_bctrg(c, r1, r2) S390_RRE(c, 0xb946, r1, r2)
#define s390_bnzr(c, r) S390_RR(c, 0x07, 0x07, r)
#define s390_bras(c, r, o) S390_RI(c, 0xa75, r, o)
#define s390_brasl(c, r, o) S390_RIL_1(c, 0xc05, r, o)
#define s390_brc(c, m, d) S390_RI(c, 0xa74, m, d)
#define s390_brcl(c, m, d) S390_RIL_2(c, 0xc04, m, d)
#define s390_br(c, r) S390_RR(c, 0x07, 0xf, r)
#define s390_break(c) S390_RR(c, 0, 0, 0)
#define s390_bzr(c, r) S390_RR(c, 0x07, 0x08, r)
#define s390_c(c, r, x, b, d) S390_RX(c, 0x59, r, x, b, d)
#define s390_cdb(c, r, x, b, d) S390_RXE(c, 0xed19, r, x, b, d)
#define s390_cdbr(c, r1, r2) S390_RRE(c, 0xb319, r1, r2)
#define s390_cdfbr(c, r1, r2) S390_RRE(c, 0xb395, r1, r2)
#define s390_cdgbr(c, r1, r2) S390_RRE(c, 0xb3a5, r1, r2)
#define s390_cds(c, r1, r2, b, d) S390_RX(c, 0xbb, r1, r2, b, d)
#define s390_cdsg(c, r1, r2, b, d) S390_RSY_1(c, 0xeb3e, r1, r2, b, d)
#define s390_cdsy(c, r1, r2, b, d) S390_RSY_1(c, 0xeb31, r1, r2, b, d)
#define s390_cebr(c, r1, r2) S390_RRE(c, 0xb309, r1, r2)
#define s390_cegbr(c, r1, r2) S390_RRE(c, 0xb3a4, r1, r2)
#define s390_cfdbr(c, r1, m, r2) S390_RRF_2(c, 0xb399, r1, m, r2)
#define s390_cfi(c, r, v) S390_RIL_1(c, 0xc2d, r, v)
#define s390_cgdbr(c, r1, m, r2) S390_RRF_2(c, 0xb3a9, r1, m, r2)
#define s390_cg(c, r, x, b, d) S390_RXY(c, 0xe320, r, x, b, d)
#define s390_cgfi(c, r, v) S390_RIL_1(c, 0xc2c, r, v)
#define s390_cgfrl(c, r, v) S390_RIL_1(c, 0xc6c, r, v)
#define s390_cghi(c, r, i) S390_RI(c, 0xa7f, r, i)
#define s390_cgib(c, r, i, m, b, d) S390_RIS(c, 0xecfc, r, i, m, b, d)
#define s390_cgij(c, r, i, m, d) S390_RIE_3(c, 0xec7c, r, i, m, d)
#define s390_cgit(c, r, i, m) S390_RIE_4(c, 0xec70, r, i m);
#define s390_cgr(c, r1, r2) S390_RRE(c, 0xb920, r1, r2)
#define s390_cgrb(c, r1, r2, m3, b, d) S390_RRS(c, 0xece4, r1, r2, m3, b, d)
#define s390_cgrj(c, r1, r2, m3, v) S390_RIE_2(c, 0xec64, r1, r2, m3, v)
#define s390_cgrl(c, r, v) S390_RIL_1(c, 0xc68, r, v)
#define s390_chi(c, r, i) S390_RI(c, 0xa7e, r, i)
#define s390_cib(c, r, i, m, b, d) S390_RIS(c, 0xecfe, r, i, m, b, d)
#define s390_cij(c, r, i, m, d) S390_RIE_3(c, 0xec7e, r, i, m, d)
#define s390_cit(c, r, i, m) S390_RIE_4(c, 0xec72, r, i m);
#define s390_cl(c, r, x, b, d) S390_RX(c, 0x55, r, x, b, d)
#define s390_clg(c, r, x, b, d) S390_RXY(c, 0xe321, r, x, b, d)
#define s390_clgib(c, r, i, m, b, d) S390_RIS(c, 0xecfd, r, i, m, b, d)
#define s390_clgij(c, r, i, b) S390_RIE_3(c, 0xec7d, r, i, m, d)
#define s390_clgr(c, r1, r2) S390_RRE(c, 0xb921, r1, r2)
#define s390_clgrj(c, r1, r2, m, v) S390_RIE_2(c, 0xec65, r1, r2, m, v)
#define s390_clgrb(c, r1, r2, m3, b, d) S390_RRS(c, 0xece5, r1, r2, m3, b, d)
#define s390_clib(c, r, i, m, b, d) S390_RIS(c, 0xecff, r, i, m, b, d)
#define s390_clij(c, r, i, b) S390_RIE_3(c, 0xec7f, r, i, m, d)
#define s390_clr(c, r1, r2) S390_RR(c, 0x15, r1, r2)
#define s390_clrb(c, r1, r2, m3, b, d) S390_RRS(c, 0xecf7, r1, r2, m3, b, d)
#define s390_clrj(c, r1, r2, m, v) S390_RIE_2(c, 0xec77, r1, r2, m, v)
#define s390_cr(c, r1, r2) S390_RR(c, 0x19, r1, r2)
#define s390_crb(c, r1, r2, m3, b, d) S390_RRS(c, 0xecf6, r1, r2, m3, b, d)
#define s390_crj(c, r1, r2, m3, v) S390_RIE_2(c, 0xec76, r1, r2, m3, v)
#define s390_crl(c, r, v) S390_RIL_1(c, 0xc6d, r, v)
#define s390_crt(c, r1, r2, m3) S390_RRF_2(c, 0xb972, r1, r2, m3);
#define s390_cgrt(c, r1, r2, m3) S390_RRF_2(c, 0xb960, r1, r2, m3);
#define s390_cs(c, r1, r2, b, d) S390_RX(c, 0xba, r1, r2, b, d)
#define s390_csg(c, r1, r2, b, d) S390_RSY_1(c, 0xeb30, r1, r2, b, d)
#define s390_csst(c, d1, b1, d2, b2, r) S390_SSF(c, 0xc82, b1, d1, b2, d2, r)
#define s390_csy(c, r1, r2, b, d) S390_RSY_1(c, 0xeb14, r1, r2, b, d)
#define s390_ddbr(c, r1, r2) S390_RRE(c, 0xb31d, r1, r2)
#define s390_debr(c, r1, r2) S390_RRE(c, 0xb30d, r1, r2)
#define s390_didbr(c, r1, r2, m, r3) S390_RRF_3(c, 0xb35b, r1, r2, m, r3)
#define s390_dlgr(c, r1, r2) S390_RRE(c, 0xb987, r1, r2)
#define s390_dlr(c, r1, r2) S390_RRE(c, 0xb997, r1, r2)
#define s390_dr(c, r1, r2) S390_RR(c, 0x1d, r1, r2)
#define s390_dsgfr(c, r1, r2) S390_RRE(c, 0xb91d, r1, r2)
#define s390_dsgr(c, r1, r2) S390_RRE(c, 0xb90d, r1, r2)
#define s390_ear(c, r1, r2) S390_RRE(c, 0xb24f, r1, r2)
#define s390_ic(c, r, x, b, d) S390_RX(c, 0x43, r, x, b, d)
#define s390_icm(c, r, m, b, d) S390_RX(c, 0xbf, r, m, b, d)
#define s390_icmy(c, r, x, b, d) S390_RXY(c, 0xeb81, r, x, b, d)
#define s390_icy(c, r, x, b, d) S390_RXY(c, 0xe373, r, x, b, d)
#define s390_iihf(c, r, v) S390_RIL_1(c, 0xc08, r, v)
#define s390_iihh(c, r, v) S390_RI(c, 0xa50, r, v)
#define s390_iihl(c, r, v) S390_RI(c, 0xa51, r, v)
#define s390_iilf(c, r, v) S390_RIL_1(c, 0xc09, r, v)
#define s390_iilh(c, r, v) S390_RI(c, 0xa52, r, v)
#define s390_iill(c, r, v) S390_RI(c, 0xa53, r, v)
#define s390_j(c,d) s390_brc(c, S390_CC_UN, d)
#define s390_jc(c, m, d) s390_brc(c, m, d)
#define s390_jcl(c, m, d) s390_brcl(c, m, d)
#define s390_jcy(c, d) s390_brc(c, S390_CC_CY, d)
#define s390_je(c, d) s390_brc(c, S390_CC_EQ, d)
#define s390_jeo(c, d) s390_brc(c, S390_CC_ZR|S390_CC_OV, d)
#define s390_jh(c, d) s390_brc(c, S390_CC_GT, d)
#define s390_jho(c, d) s390_brc(c, S390_CC_GT|S390_CC_OV, d)
#define s390_jl(c, d) s390_brc(c, S390_CC_LT, d)
#define s390_jlo(c, d) s390_brc(c, S390_CC_LT|S390_CC_OV, d)
#define s390_jm(c, d) s390_brc(c, S390_CC_LT, d)
#define s390_jnc(c, d) s390_brc(c, S390_CC_NC, d)
#define s390_jne(c, d) s390_brc(c, S390_CC_NZ, d)
#define s390_jnh(c, d) s390_brc(c, S390_CC_LE, d)
#define s390_jnl(c, d) s390_brc(c, S390_CC_GE, d)
#define s390_jnz(c, d) s390_brc(c, S390_CC_NZ, d)
#define s390_jo(c, d) s390_brc(c, S390_CC_OV, d)
#define s390_jno(c, d) s390_brc(c, S390_CC_NO, d)
#define s390_jp(c, d) s390_brc(c, S390_CC_GT, d)
#define s390_jz(c, d) s390_brc(c, S390_CC_ZR, d)
#define s390_jg(c,d) s390_brcl(c, S390_CC_UN, d)
#define s390_jgcy(c, d) s390_brcl(c, S390_CC_CY, d)
#define s390_jge(c, d) s390_brcl(c, S390_CC_EQ, d)
#define s390_jgeo(c, d) s390_brcl(c, S390_CC_ZR|S390_CC_OV, d)
#define s390_jgh(c, d) s390_brcl(c, S390_CC_GT, d)
#define s390_jgho(c, d) s390_brcl(c, S390_CC_GT|S390_CC_OV, d)
#define s390_jgl(c, d) s390_brcl(c, S390_CC_LT, d)
#define s390_jglo(c, d) s390_brcl(c, S390_CC_LT|S390_CC_OV, d)
#define s390_jgm(c, d) s390_brcl(c, S390_CC_LT, d)
#define s390_jgnc(c, d) s390_brcl(c, S390_CC_NC, d)
#define s390_jgne(c, d) s390_brcl(c, S390_CC_NZ, d)
#define s390_jgnh(c, d) s390_brcl(c, S390_CC_LE, d)
#define s390_jgnl(c, d) s390_brcl(c, S390_CC_GE, d)
#define s390_jgnz(c, d) s390_brcl(c, S390_CC_NZ, d)
#define s390_jgo(c, d) s390_brcl(c, S390_CC_OV, d)
#define s390_jgno(c, d) s390_brcl(c, S390_CC_NO, d)
#define s390_jgp(c, d) s390_brcl(c, S390_CC_GT, d)
#define s390_jgz(c, d) s390_brcl(c, S390_CC_ZR, d)
#define s390_l(c, r, x, b, d) S390_RX(c, 0x58, r, x, b, d)
#define s390_ly(c, r, x, b, d) S390_RXY(c, 0xe358, r, x, b, d)
#define s390_la(c, r, x, b, d) S390_RX(c, 0x41, r, x, b, d)
#define s390_lay(c, r, x, b, d) S390_RXY(c, 0xe371, r, x, b, d)
#define s390_lam(c, r1, r2, b, d) S390_RS_1(c, 0x9a, r1, r2, b, d)
#define s390_larl(c, r, o) S390_RIL_1(c, 0xc00, r, o)
#define s390_lb(c, r, x, b, d) S390_RXY(c, 0xe376, r, x, b, d)
#define s390_lbr(c, r1, r2) S390_RRE(c, 0xb926, r1, r2)
#define s390_lcdbr(c, r1, r2) S390_RRE(c, 0xb313, r1, r2)
#define s390_lcgr(c, r1, r2) S390_RRE(c, 0xb903, r1, r2)
#define s390_lcr(c, r1, r2) S390_RR(c, 0x13, r1, r2)
#define s390_ld(c, f, x, b, d) S390_RX(c, 0x68, f, x, b, d)
#define s390_ldy(c, r, x, b, d) S390_RXY(c, 0xed65, r, x, b, d)
#define s390_ldeb(c, r, x, b, d) S390_RXE(c, 0xed04, r, x, b, d)
#define s390_ldebr(c, r1, r2) S390_RRE(c, 0xb304, r1, r2)
#define s390_ldgr(c, r1, r2) S390_RRE(c, 0xb3c1, r1, r2)
#define s390_ldr(c, r1, r2) S390_RR(c, 0x28, r1, r2)
#define s390_le(c, f, x, b, d) S390_RX(c, 0x78, f, x, b, d)
#define s390_ledbr(c, r1, r2) S390_RRE(c, 0xb344, r1, r2)
#define s390_ler(c, r1, r2) S390_RR(c, 0x38, r1, r2)
#define s390_ley(c, r, x, b, d) S390_RXY(c, 0xed64, r, x, b, d)
#define s390_lg(c, r, x, b, d) S390_RXY(c, 0xe304, r, x, b, d)
#define s390_lgb(c, r, x, b, d) S390_RXY(c, 0xe377, r, x, b, d)
#define s390_lgbr(c, r1, r2) S390_RRE(c, 0xb906, r1, r2)
#define s390_lgdr(c, r1, r2) S390_RRE(c, 0xb3cd, r1, r2)
#define s390_lgf(c, r, x, b, d) S390_RXY(c, 0xe314, r, x, b, d)
#define s390_lgfi(c, r, v) S390_RIL_1(c, 0xc01, r, v)
#define s390_lgfrl(c, r1, d) S390_RIL_1(c, 0xc4c, r1, d)
#define s390_lgfr(c, r1, r2) S390_RRE(c, 0xb914, r1, r2)
#define s390_lgh(c, r, x, b, d) S390_RXY(c, 0xe315, r, x, b, d)
#define s390_lghi(c, r, v) S390_RI(c, 0xa79, r, v)
#define s390_lghr(c, r1, r2) S390_RRE(c, 0xb907, r1, r2)
#define s390_lgr(c, r1, r2) S390_RRE(c, 0xb904, r1, r2)
#define s390_lgrl(c, r1, d) S390_RIL_1(c, 0xc48, r1, d)
#define s390_lh(c, r, x, b, d) S390_RX(c, 0x48, r, x, b, d)
#define s390_lhr(c, r1, r2) S390_RRE(c, 0xb927, r1, r2)
#define s390_lhg(c, r, x, b, d) S390_RXY(c, 0xe315, r, x, b, d)
#define s390_lhi(c, r, v) S390_RI(c, 0xa78, r, v)
#define s390_lhy(c, r, x, b, d) S390_RXY(c, 0xe378, r, x, b, d)
#define s390_llcr(c, r1, r2) S390_RRE(c, 0xb994, r1, r2)
#define s390_llgc(c, r, x, b, d) S390_RXY(c, 0xe390, r, x, b, d)
#define s390_llgcr(c, r1, r2) S390_RRE(c, 0xb984, r1, r2)
#define s390_llgf(c, r, x, b, d) S390_RXY(c, 0xe316, r, x, b, d)
#define s390_llgfr(c, r1, r2) S390_RRE(c, 0xb916, r1, r2)
#define s390_llgh(c, r, x, b, d) S390_RXY(c, 0xe391, r, x, b, d)
#define s390_llghr(c, r1, r2) S390_RRE(c, 0xb985, r1, r2)
#define s390_llhr(c, r1, r2) S390_RRE(c, 0xb995, r1, r2)
#define s390_llihf(c, r, v) S390_RIL_1(c, 0xc0e, r, v)
#define s390_llihh(c, r, v) S390_RI(c, 0xa5c, r, v)
#define s390_llihl(c, r, v) S390_RI(c, 0xa5d, r, v)
#define s390_llilf(c, r, v) S390_RIL_1(c, 0xc0f, r, v)
#define s390_llilh(c, r, v) S390_RI(c, 0xa5e, r, v)
#define s390_llill(c, r, v) S390_RI(c, 0xa5f, r, v)
#define s390_lm(c, r1, r2, b, d) S390_RS_1(c, 0x98, r1, r2, b, d)
#define s390_lmg(c, r1, r2, b, d) S390_RSY_1(c, 0xeb04, r1, r2, b, d)
#define s390_lndbr(c, r1, r2) S390_RRE(c, 0xb311, r1, r2)
#define s390_lngr(c, r1, r2) S390_RRE(c, 0xb901, r1, r2)
#define s390_lnr(c, r1, r2) S390_RR(c, 0x11, r1, r2)
#define s390_lpdbr(c, r1, r2) S390_RRE(c, 0xb310, r1, r2)
#define s390_lpgr(c, r1, r2) S390_RRE(c, 0xb900, r1, r2)
#define s390_lpr(c, r1, r2) S390_RR(c, 0x10, r1, r2)
#define s390_lr(c, r1, r2) S390_RR(c, 0x18, r1, r2)
#define s390_lrl(c, r1, d) S390_RIL_1(c, 0xc4d, r1, d)
#define s390_ltgfr(c, r1, r2) S390_RRE(c, 0xb912, r1, r2)
#define s390_ltgr(c, r1, r2) S390_RRE(c, 0xb902, r1, r2)
#define s390_ltr(c, r1, r2) S390_RR(c, 0x12, r1, r2)
#define s390_lzdr(c, r) S390_RRE(c, 0xb375, r, 0)
#define s390_lzer(c, r) S390_RRE(c, 0xb374, r, 0)
#define s390_m(c, r, x, b, d) S390_RX(c, 0x5c, r, x, b, d)
#define s390_mdbr(c, r1, r2) S390_RRE(c, 0xb31c, r1, r2)
#define s390_meebr(c, r1, r2) S390_RRE(c, 0xb317, r1, r2)
#define s390_mfy(c, r, x, b, d) S390_RXY(c, 0xe35c, r, x, b, d)
#define s390_mlgr(c, r1, r2) S390_RRE(c, 0xb986, r1, r2)
#define s390_mlr(c, r1, r2) S390_RRE(c, 0xb996, r1, r2)
#define s390_mr(c, r1, r2) S390_RR(c, 0x1c, r1, r2)
#define s390_ms(c, r, x, b, d) S390_RX(c, 0x71, r, x, b, d)
#define s390_msi(c, r, v) S390_RIL_1(c, 0xc21, r, v)
#define s390_msgfr(c, r1, r2) S390_RRE(c, 0xb91c, r1, r2)
#define s390_msgi(c, r, v) S390_RIL_1(c, 0xc20, r, v)
#define s390_msgr(c, r1, r2) S390_RRE(c, 0xb90c, r1, r2)
#define s390_msr(c, r1, r2) S390_RRE(c, 0xb252, r1, r2)
#define s390_mvc(c, l, b1, d1, b2, d2) S390_SS_1(c, 0xd2, l, b1, d1, b2, d2)
#define s390_mvcl(c, r1, r2) S390_RR(c, 0x0e, r1, r2)
#define s390_mvcle(c, r1, r3, d2, b2) S390_RS_1(c, 0xa8, r1, r3, d2, b2)
#define s390_n(c, r, x, b, d) S390_RX(c, 0x54, r, x, b, d)
#define s390_nc(c, l, b1, d1, b2, d2) S390_SS_1(c, 0xd4, l, b1, d1, b2, d2)
#define s390_ng(c, r, x, b, d) S390_RXY(c, 0xe380, r, x, b, d)
#define s390_ngr(c, r1, r2) S390_RRE(c, 0xb980, r1, r2)
#define s390_ngrk(c, r1, r2, r3) S390_RRF_1(c, 0xb9e4, r1, r2, r3)
#define s390_ni(c, b, d, v) S390_SI(c, 0x94, b, d, v)
#define s390_nihf(c, r, v) S390_RIL_1(c, 0xc0a, r, v)
#define s390_nihh(c, r, v) S390_RI(c, 0xa54, r, v)
#define s390_nihl(c, r, v) S390_RI(c, 0xa55, r, v)
#define s390_nilf(c, r, v) S390_RIL_1(c, 0xc0b, r, v)
#define s390_nilh(c, r, v) S390_RI(c, 0xa56, r, v)
#define s390_nill(c, r, v) S390_RI(c, 0xa57, r, v)
#define s390_niy(c, b, d, v) S390_SIY(c, 0xeb54, b, d, v)
#define s390_nop(c) S390_RR(c, 0x07, 0x0, 0)
#define s390_nr(c, r1, r2) S390_RR(c, 0x14, r1, r2)
#define s390_nrk(c, r1, r2) S390_RRF_1(c, 0xb9f4, r1, r2)
#define s390_ny(c, r, x, b, d) S390_RRY(c, 0xe354, r1, r2)
#define s390_o(c, r, x, b, d) S390_RX(c, 0x56, r, x, b, d)
#define s390_oihf(c, r, v) S390_RIL_1(c, 0xc0c, r, v)
#define s390_oihh(c, r, v) S390_RI(c, 0xa58, r, v)
#define s390_oihl(c, r, v) S390_RI(c, 0xa59, r, v)
#define s390_oilf(c, r, v) S390_RIL_1(c, 0xc0d, r, v)
#define s390_oilh(c, r, v) S390_RI(c, 0xa5a, r, v)
#define s390_oill(c, r, v) S390_RI(c, 0xa5b` r, v)
#define s390_oiy(c, b, d, v) S390_SIY(c, 0xeb56 b, d, v)
#define s390_og(c, r, x, b, d) S390_RXY(c, 0xe381, r, x, b, d)
#define s390_ogr(c, r1, r2) S390_RRE(c, 0xb981, r1, r2)
#define s390_or(c, r1, r2) S390_RR(c, 0x16, r1, r2)
#define s390_s(c, r, x, b, d) S390_RX(c, 0x5b, r, x, b, d)
#define s390_sdb(c, r, x, b, d) S390_RXE(c, 0xed1b, r, x, b, d)
#define s390_sdbr(c, r1, r2) S390_RRE(c, 0xb31b, r1, r2)
#define s390_sebr(c, r1, r2) S390_RRE(c, 0xb30b, r1, r2)
#define s390_sg(c, r, x, b, d) S390_RXY(c, 0xe309, r, x, b, d)
#define s390_sgf(c, r, x, b, d) S390_RXY(c, 0xe319, r, x, b, d)
#define s390_sgr(c, r1, r2) S390_RRE(c, 0xb909, r1, r2)
#define s390_sl(c, r, x, b, d) S390_RX(c, 0x5f, r, x, b, d)
#define s390_sla(c, r, b, d) S390_RS_3(c, 0x8b, r, b, d)
#define s390_slag(c, r1, r2, b, d) S390_RSY_1(c, 0xeb0b, r1, r2, b, d)
#define s390_slbg(c, r, x, b, d) S390_RXY(c, 0xe389, r, x, b, d)
#define s390_slbgr(c, r1, r2) S390_RRE(c, 0xb989, r1, r2)
#define s390_slbr(c, r1, r2) S390_RRE(c, 0xb999, r1, r2)
#define s390_slda(c, r, b, d) S390_RS_3(c, 0x8f, r, b, d)
#define s390_sldl(c, r, b, d) S390_RS_3(c, 0x8d, r, b, d)
#define s390_slfi(c, r, v) S390_RIL_1(c, 0xc25, r, v)
#define s390_slg(c, r, x, b, d) S390_RXY(c, 0xe30b, r, x, b, d)
#define s390_slgf(c, r, x, b, d) S390_RXY(c, 0xe31b, r, x, b, d)
#define s390_slgfr(c, r1, r2) S390_RRE(c, 0xb91b, r1, r2)
#define s390_slgfi(c, r, v) S390_RIL_1(c, 0xc24, r, v)
#define s390_slgr(c, r1, r2) S390_RRE(c, 0xb90b, r1, r2)
#define s390_sll(c, r, b, d) S390_RS_3(c, 0x89, r, b, d)
#define s390_sllg(c, r1, r2, b, d) S390_RSY_1(c, 0xeb0d, r1, r2, b, d)
#define s390_slr(c, r1, r2) S390_RR(c, 0x1f, r1, r2)
#define s390_sqdbr(c, r1, r2) S390_RRE(c, 0xb315, r1, r2)
#define s390_sqebr(c, r1, r2) S390_RRE(c, 0xb314, r1, r2)
#define s390_sra(c, r, b, d) S390_RS_3(c, 0x8a, r, b, d)
#define s390_srag(c, r1, r2, b, d) S390_RSY_1(c, 0xeb0a, r1, r2, b, d)
#define s390_sr(c, r1, r2) S390_RR(c, 0x1b, r1, r2)
#define s390_srda(c, r, b, d) S390_RS_3(c, 0x8e, r, b, d)
#define s390_srdl(c, r, b, d) S390_RS_3(c, 0x8c, r, b, d)
#define s390_srl(c, r, b, d) S390_RS_3(c, 0x88, r, b, d)
#define s390_srlg(c, r1, r2, b, d) S390_RSY_1(c, 0xeb0c, r1, r2, b, d)
#define s390_st(c, r, x, b, d) S390_RX(c, 0x50, r, x, b, d)
#define s390_stam(c, r1, r2, b, d) S390_RS_1(c, 0x9b, r1, r2, b, d)
#define s390_stc(c, r, x, b, d) S390_RX(c, 0x42, r, x, b, d)
#define s390_stcm(c, r, m, b, d) S390_RX(c, 0xbe, r, m, b, d)
#define s390_stcmy(c, r, x, b, d) S390_RXY(c, 0xeb2d, r, x, b, d)
#define s390_stcy(c, r, x, b, d) S390_RXY(c, 0xe372, r, x, b, d)
#define s390_std(c, f, x, b, d) S390_RX(c, 0x60, f, x, b, d)
#define s390_stdy(c, r, x, b, d) S390_RXY(c, 0xed67, r, x, b, d)
#define s390_ste(c, f, x, b, d) S390_RX(c, 0x70, f, x, b, d)
#define s390_stey(c, r, x, b, d) S390_RXY(c, 0xed66, r, x, b, d)
#define s390_stfpc(c, b, d) S390_S(c, 0xb29c, b, d)
#define s390_stg(c, r, x, b, d) S390_RXY(c, 0xe324, r, x, b, d)
#define s390_sth(c, r, x, b, d) S390_RX(c, 0x40, r, x, b, d)
#define s390_sthy(c, r, x, b, d) S390_RXY(c, 0xe370, r, x, b, d)
#define s390_stm(c, r1, r2, b, d) S390_RS_1(c, 0x90, r1, r2, b, d)
#define s390_stmg(c, r1, r2, b, d) S390_RSY_1(c, 0xeb24, r1, r2, b, d)
#define s390_sty(c, r, x, b, d) S390_RXY(c, 0xe350, r, x, b, d)
#define s390_tcdb(c, r, x, b, d) S390_RXE(c, 0xed11, r, x, b, d)
#define s390_tceb(c, r, x, b, d) S390_RXE(c, 0xed10, r, x, b, d)
#define s390_x(c, r, x, b, d) S390_RX(c, 0x57, r, x, b, d)
#define s390_xihf(c, r, v) S390_RIL_1(c, 0xc06, r, v)
#define s390_xilf(c, r, v) S390_RIL_1(c, 0xc07, r, v)
#define s390_xg(c, r, x, b, d) S390_RXY(c, 0xe382, r, x, b, d)
#define s390_xgr(c, r1, r2) S390_RRE(c, 0xb982, r1, r2)
#define s390_xr(c, r1, r2) S390_RR(c, 0x17, r1, r2)
#define s390_xy(c, r, x, b, d) S390_RXY(c, 0xe357, r, x, b, d)
#endif

File diff suppressed because it is too large Load Diff

@ -0,0 +1,3 @@
/Makefile
/Makefile.in
/.deps

@ -0,0 +1,7 @@
AM_CPPFLAGS = $(GLIB_CFLAGS) -I$(top_srcdir)
noinst_LTLIBRARIES = libmonoarch-sparc.la
libmonoarch_sparc_la_SOURCES = tramp.c sparc-codegen.h

@ -0,0 +1,955 @@
#ifndef __SPARC_CODEGEN_H__
#define __SPARC_CODEGEN_H__
#if SIZEOF_VOID_P == 8
#define SPARCV9 1
#else
#endif
typedef enum {
sparc_r0 = 0,
sparc_r1 = 1,
sparc_r2 = 2,
sparc_r3 = 3,
sparc_r4 = 4,
sparc_r5 = 5,
sparc_r6 = 6,
sparc_r7 = 7,
sparc_r8 = 8,
sparc_r9 = 9,
sparc_r10 = 10,
sparc_r11 = 11,
sparc_r12 = 12,
sparc_r13 = 13,
sparc_r14 = 14,
sparc_r15 = 15,
sparc_r16 = 16,
sparc_r17 = 17,
sparc_r18 = 18,
sparc_r19 = 19,
sparc_r20 = 20,
sparc_r21 = 21,
sparc_r22 = 22,
sparc_r23 = 23,
sparc_r24 = 24,
sparc_r25 = 25,
sparc_r26 = 26,
sparc_r27 = 27,
sparc_r28 = 28,
sparc_r29 = 29,
sparc_r30 = 30,
sparc_r31 = 31,
/* aliases */
/* global registers */
sparc_g0 = 0, sparc_zero = 0,
sparc_g1 = 1,
sparc_g2 = 2,
sparc_g3 = 3,
sparc_g4 = 4,
sparc_g5 = 5,
sparc_g6 = 6,
sparc_g7 = 7,
/* out registers */
sparc_o0 = 8,
sparc_o1 = 9,
sparc_o2 = 10,
sparc_o3 = 11,
sparc_o4 = 12,
sparc_o5 = 13,
sparc_o6 = 14, sparc_sp = 14,
sparc_o7 = 15, sparc_callsite = 15,
/* local registers */
sparc_l0 = 16,
sparc_l1 = 17,
sparc_l2 = 18,
sparc_l3 = 19,
sparc_l4 = 20,
sparc_l5 = 21,
sparc_l6 = 22,
sparc_l7 = 23,
/* in registers */
sparc_i0 = 24,
sparc_i1 = 25,
sparc_i2 = 26,
sparc_i3 = 27,
sparc_i4 = 28,
sparc_i5 = 29,
sparc_i6 = 30, sparc_fp = 30,
sparc_i7 = 31,
sparc_nreg = 32,
/* floating point registers */
sparc_f0 = 0,
sparc_f1 = 1,
sparc_f2 = 2,
sparc_f3 = 3,
sparc_f4 = 4,
sparc_f5 = 5,
sparc_f6 = 6,
sparc_f7 = 7,
sparc_f8 = 8,
sparc_f9 = 9,
sparc_f10 = 10,
sparc_f11 = 11,
sparc_f12 = 12,
sparc_f13 = 13,
sparc_f14 = 14,
sparc_f15 = 15,
sparc_f16 = 16,
sparc_f17 = 17,
sparc_f18 = 18,
sparc_f19 = 19,
sparc_f20 = 20,
sparc_f21 = 21,
sparc_f22 = 22,
sparc_f23 = 23,
sparc_f24 = 24,
sparc_f25 = 25,
sparc_f26 = 26,
sparc_f27 = 27,
sparc_f28 = 28,
sparc_f29 = 29,
sparc_f30 = 30,
sparc_f31 = 31,
} SparcRegister;
typedef enum {
sparc_bn = 0, sparc_bnever = 0,
sparc_be = 1,
sparc_ble = 2,
sparc_bl = 3,
sparc_bleu = 4,
sparc_bcs = 5, sparc_blu = 5,
sparc_bneg = 6,
sparc_bvs = 7, sparc_boverflow = 7,
sparc_ba = 8, sparc_balways = 8,
sparc_bne = 9,
sparc_bg = 10,
sparc_bge = 11,
sparc_bgu = 12,
sparc_bcc = 13, sparc_beu = 13,
sparc_bpos = 14,
sparc_bvc = 15
} SparcCond;
typedef enum {
/* with fcmp */
sparc_feq = 0,
sparc_fl = 1,
sparc_fg = 2,
sparc_unordered = 3,
/* branch ops */
sparc_fba = 8,
sparc_fbn = 0,
sparc_fbu = 7,
sparc_fbg = 6,
sparc_fbug = 5,
sparc_fbl = 4,
sparc_fbul = 3,
sparc_fblg = 2,
sparc_fbne = 1,
sparc_fbe = 9,
sparc_fbue = 10,
sparc_fbge = 11,
sparc_fbuge = 12,
sparc_fble = 13,
sparc_fbule = 14,
sparc_fbo = 15
} SparcFCond;
typedef enum {
sparc_icc = 4,
sparc_xcc = 6,
sparc_fcc0 = 0,
sparc_fcc1 = 1,
sparc_fcc2 = 2,
sparc_fcc3 = 3
} SparcCC;
typedef enum {
sparc_icc_short = 0,
sparc_xcc_short = 2
} SparcCCShort;
typedef enum {
/* fop1 format */
sparc_fitos_val = 196,
sparc_fitod_val = 200,
sparc_fitoq_val = 204,
sparc_fxtos_val = 132,
sparc_fxtod_val = 136,
sparc_fxtoq_val = 140,
sparc_fstoi_val = 209,
sparc_fdtoi_val = 210,
sparc_fqtoi_val = 211,
sparc_fstod_val = 201,
sparc_fstoq_val = 205,
sparc_fdtos_val = 198,
sparc_fdtoq_val = 206,
sparc_fqtos_val = 199,
sparc_fqtod_val = 203,
sparc_fmovs_val = 1,
sparc_fmovd_val = 2,
sparc_fnegs_val = 5,
sparc_fnegd_val = 6,
sparc_fabss_val = 9,
sparc_fabsd_val = 10,
sparc_fsqrts_val = 41,
sparc_fsqrtd_val = 42,
sparc_fsqrtq_val = 43,
sparc_fadds_val = 65,
sparc_faddd_val = 66,
sparc_faddq_val = 67,
sparc_fsubs_val = 69,
sparc_fsubd_val = 70,
sparc_fsubq_val = 71,
sparc_fmuls_val = 73,
sparc_fmuld_val = 74,
sparc_fmulq_val = 75,
sparc_fsmuld_val = 105,
sparc_fdmulq_val = 111,
sparc_fdivs_val = 77,
sparc_fdivd_val = 78,
sparc_fdivq_val = 79,
/* fop2 format */
sparc_fcmps_val = 81,
sparc_fcmpd_val = 82,
sparc_fcmpq_val = 83,
sparc_fcmpes_val = 85,
sparc_fcmped_val = 86,
sparc_fcmpeq_val = 87
} SparcFOp;
typedef enum {
sparc_membar_load_load = 0x1,
sparc_membar_store_load = 0x2,
sparc_membar_load_store = 0x4,
sparc_membar_store_store = 0x8,
sparc_membar_lookaside = 0x10,
sparc_membar_memissue = 0x20,
sparc_membar_sync = 0x40,
sparc_membar_all = 0x4f
} SparcMembarFlags;
typedef struct {
unsigned int op : 2; /* always 1 */
unsigned int disp : 30;
} sparc_format1;
typedef struct {
unsigned int op : 2; /* always 0 */
unsigned int rd : 5;
unsigned int op2 : 3;
unsigned int disp : 22;
} sparc_format2a;
typedef struct {
unsigned int op : 2; /* always 0 */
unsigned int a : 1;
unsigned int cond : 4;
unsigned int op2 : 3;
unsigned int disp : 22;
} sparc_format2b;
typedef struct {
unsigned int op : 2; /* always 0 */
unsigned int a : 1;
unsigned int cond : 4;
unsigned int op2 : 3;
unsigned int cc01 : 2;
unsigned int p : 1;
unsigned int d19 : 19;
} sparc_format2c;
typedef struct {
unsigned int op : 2; /* always 0 */
unsigned int a : 1;
unsigned int res : 1;
unsigned int rcond: 3;
unsigned int op2 : 3;
unsigned int d16hi: 2;
unsigned int p : 1;
unsigned int rs1 : 5;
unsigned int d16lo: 14;
} sparc_format2d;
typedef struct {
unsigned int op : 2; /* 2 or 3 */
unsigned int rd : 5;
unsigned int op3 : 6;
unsigned int rs1 : 5;
unsigned int i : 1;
unsigned int asi : 8;
unsigned int rs2 : 5;
} sparc_format3a;
typedef struct {
unsigned int op : 2; /* 2 or 3 */
unsigned int rd : 5;
unsigned int op3 : 6;
unsigned int rs1 : 5;
unsigned int i : 1;
unsigned int x : 1;
unsigned int asi : 7;
unsigned int rs2 : 5;
} sparc_format3ax;
typedef struct {
unsigned int op : 2; /* 2 or 3 */
unsigned int rd : 5;
unsigned int op3 : 6;
unsigned int rs1 : 5;
unsigned int i : 1;
unsigned int imm : 13;
} sparc_format3b;
typedef struct {
unsigned int op : 2; /* 2 or 3 */
unsigned int rd : 5;
unsigned int op3 : 6;
unsigned int rs1 : 5;
unsigned int i : 1;
unsigned int x : 1;
unsigned int imm : 12;
} sparc_format3bx;
typedef struct {
unsigned int op : 2; /* 2 or 3 */
unsigned int rd : 5;
unsigned int op3 : 6;
unsigned int rs1 : 5;
unsigned int opf : 9;
unsigned int rs2 : 5;
} sparc_format3c;
typedef struct {
unsigned int op : 2;
unsigned int rd : 5;
unsigned int op3 : 6;
unsigned int rs1 : 5;
unsigned int i : 1;
unsigned int cc01 : 2;
unsigned int res : 6;
unsigned int rs2 : 5;
} sparc_format4a;
typedef struct {
unsigned int op : 2;
unsigned int rd : 5;
unsigned int op3 : 6;
unsigned int rs1 : 5;
unsigned int i : 1;
unsigned int cc01 : 2;
unsigned int simm : 11;
} sparc_format4b;
typedef struct {
unsigned int op : 2;
unsigned int rd : 5;
unsigned int op3 : 6;
unsigned int cc2 : 1;
unsigned int cond : 4;
unsigned int i : 1;
unsigned int cc01 : 2;
unsigned int res : 6;
unsigned int rs2 : 5;
} sparc_format4c;
typedef struct {
unsigned int op : 2;
unsigned int rd : 5;
unsigned int op3 : 6;
unsigned int cc2 : 1;
unsigned int cond : 4;
unsigned int i : 1;
unsigned int cc01 : 2;
unsigned int simm : 11;
} sparc_format4d;
/* for use in logical ops, use 0 to not set flags */
#define sparc_cc 16
#define sparc_is_imm13(val) ((glong)val >= (glong)-(1<<12) && (glong)val <= (glong)((1<<12)-1))
#define sparc_is_imm22(val) ((glong)val >= (glong)-(1<<21) && (glong)val <= (glong)((1<<21)-1))
#define sparc_is_imm16(val) ((glong)val >= (glong)-(1<<15) && (glong)val <= (glong)((1<<15)-1))
#define sparc_is_imm19(val) ((glong)val >= (glong)-(1<<18) && (glong)val <= (glong)((1<<18)-1))
#define sparc_is_imm30(val) ((glong)val >= (glong)-(1<<29) && (glong)val <= (glong)((1<<29)-1))
/* disassembly */
#define sparc_inst_op(inst) ((inst) >> 30)
#define sparc_inst_op2(inst) (((inst) >> 22) & 0x7)
#define sparc_inst_rd(inst) (((inst) >> 25) & 0x1f)
#define sparc_inst_op3(inst) (((inst) >> 19) & 0x3f)
#define sparc_inst_i(inst) (((inst) >> 13) & 0x1)
#define sparc_inst_rs1(inst) (((inst) >> 14) & 0x1f)
#define sparc_inst_rs2(inst) (((inst) >> 0) & 0x1f)
#define sparc_inst_imm(inst) (((inst) >> 13) & 0x1)
#define sparc_inst_imm13(inst) (((inst) >> 0) & 0x1fff)
#define sparc_encode_call(ins,addr) \
do { \
sparc_format1 *__f = (sparc_format1*)(ins); \
__f->op = 1; \
__f->disp = ((unsigned int)(addr) >> 2); \
(ins) = (unsigned int*)__f + 1; \
} while (0)
#define sparc_encode_format2a(ins,val,oper,dest) \
do { \
sparc_format2a *__f = (sparc_format2a*)(ins); \
__f->op = 0; \
__f->rd = (dest); \
__f->op2 = (oper); \
__f->disp = (val) & 0x3fffff; \
(ins) = (unsigned int*)__f + 1; \
} while (0)
#define sparc_encode_format2b(ins,aval,bcond,oper,disp22) \
do { \
sparc_format2b *__f = (sparc_format2b*)(ins); \
__f->op = 0; \
__f->a = (aval); \
__f->cond = (bcond); \
__f->op2 = (oper); \
__f->disp = (disp22); \
(ins) = (unsigned int*)__f + 1; \
} while (0)
#define sparc_encode_format2c(ins,aval,bcond,oper,xcc,predict,disp19) \
do { \
sparc_format2c *__f = (sparc_format2c*)(ins); \
__f->op = 0; \
__f->a = (aval); \
__f->cond = (bcond); \
__f->op2 = (oper); \
__f->cc01 = (xcc); \
__f->p = (predict); \
__f->d19 = (disp19); \
(ins) = (unsigned int*)__f + 1; \
} while (0)
#define sparc_encode_format2d(ins,aval,bcond,oper,predict,r1,disp16) \
do { \
sparc_format2d *__f = (sparc_format2d*)(ins); \
__f->op = 0; \
__f->a = (aval); \
__f->res = 0; \
__f->rcond = (bcond); \
__f->op2 = (oper); \
__f->d16hi = ((disp16) >> 14); \
__f->p = (predict); \
__f->rs1 = (r1); \
__f->d16lo = ((disp16) & 0x3fff); \
(ins) = (unsigned int*)__f + 1; \
} while (0)
#define sparc_encode_format3a(ins,opval,asival,r1,r2,oper,dest) \
do { \
sparc_format3a *__f = (sparc_format3a*)(ins); \
__f->op = (opval); \
__f->asi = (asival); \
__f->i = 0; \
__f->rd = (dest); \
__f->rs1 = (r1); \
__f->rs2 = (r2); \
__f->op3 = (oper); \
(ins) = (unsigned int*)__f + 1; \
} while (0)
#define sparc_encode_format3ax(ins,opval,asival,r1,r2,oper,dest) \
do { \
sparc_format3ax *__f = (sparc_format3ax*)(ins); \
__f->op = (opval); \
__f->asi = (asival); \
__f->i = 0; \
__f->x = 1; \
__f->rd = (dest); \
__f->rs1 = (r1); \
__f->rs2 = (r2); \
__f->op3 = (oper); \
(ins) = (unsigned int*)__f + 1; \
} while (0)
#define sparc_encode_format3b(ins,opval,r1,val,oper,dest) \
do { \
sparc_format3b *__f = (sparc_format3b*)(ins); \
__f->op = (opval); \
__f->imm = (val); \
__f->i = 1; \
__f->rd = (dest); \
__f->rs1 = (r1); \
__f->op3 = (oper); \
(ins) = (unsigned int*)__f + 1; \
} while (0)
#define sparc_encode_format3bx(ins,opval,r1,val,oper,dest) \
do { \
sparc_format3bx *__f = (sparc_format3bx*)(ins); \
__f->op = (opval); \
__f->imm = (val); \
__f->i = 1; \
__f->x = 1; \
__f->rd = (dest); \
__f->rs1 = (r1); \
__f->op3 = (oper); \
(ins) = (unsigned int*)__f + 1; \
} while (0)
#define sparc_encode_format3c(ins,opval,opfval,r1,oper,r2,dest) \
do { \
sparc_format3c *__f = (sparc_format3c*)(ins); \
__f->op = (opval); \
__f->opf = (opfval); \
__f->rd = (dest); \
__f->rs1 = (r1); \
__f->rs2 = (r2); \
__f->op3 = (oper); \
(ins) = (unsigned int*)__f + 1; \
} while (0)
#define sparc_encode_format4a(ins,opval,oper,cc,r1,r2,dest) \
do { \
sparc_format4a *__f = (sparc_format4a*)(ins); \
__f->op = (opval); \
__f->rd = (dest); \
__f->op3 = (oper); \
__f->rs1 = (r1); \
__f->i = 0; \
__f->cc01= (cc) & 0x3; \
__f->res = 0; \
__f->rs2 = (r2); \
(ins) = (unsigned int*)__f + 1; \
} while (0)
#define sparc_encode_format4b(ins,opval,oper,cc,r1,imm,dest) \
do { \
sparc_format4b *__f = (sparc_format4b*)(ins); \
__f->op = (opval); \
__f->rd = (dest); \
__f->op3 = (oper); \
__f->rs1 = (r1); \
__f->i = 1; \
__f->cc01= (cc) & 0x3; \
__f->simm = (imm); \
(ins) = (unsigned int*)__f + 1; \
} while (0)
#define sparc_encode_format4c(ins,opval,oper,cc,bcond,r2,dest) \
do { \
sparc_format4c *__f = (sparc_format4c*)(ins); \
__f->op = (opval); \
__f->rd = (dest); \
__f->op3 = (oper); \
__f->cc2 = ((xcc) >> 2) & 0x1; \
__f->cond = bcond; \
__f->i = 0; \
__f->cc01= (xcc) & 0x3; \
__f->res = 0; \
__f->rs2 = (r2); \
(ins) = (unsigned int*)__f + 1; \
} while (0)
#define sparc_encode_format4d(ins,opval,oper,xcc,bcond,imm,dest) \
do { \
sparc_format4d *__f = (sparc_format4d*)(ins); \
__f->op = (opval); \
__f->rd = (dest); \
__f->op3 = (oper); \
__f->cc2 = ((xcc) >> 2) & 0x1; \
__f->cond = bcond; \
__f->i = 1; \
__f->cc01= (xcc) & 0x3; \
__f->simm = (imm); \
(ins) = (unsigned int*)__f + 1; \
} while (0)
/* is it useful to provide a non-default value? */
#define sparc_asi 0x0
/* load */
#define sparc_ldsb(ins,base,disp,dest) sparc_encode_format3a((ins),3,sparc_asi,(base),(disp),9,(dest))
#define sparc_ldsb_imm(ins,base,disp,dest) sparc_encode_format3b((ins),3,(base),(disp),9,(dest))
#define sparc_ldsh(ins,base,disp,dest) sparc_encode_format3a((ins),3,sparc_asi,(base),(disp),10,(dest))
#define sparc_ldsh_imm(ins,base,disp,dest) sparc_encode_format3b((ins),3,(base),(disp),10,(dest))
#define sparc_ldub(ins,base,disp,dest) sparc_encode_format3a((ins),3,sparc_asi,(base),(disp),1,(dest))
#define sparc_ldub_imm(ins,base,disp,dest) sparc_encode_format3b((ins),3,(base),(disp),1,(dest))
#define sparc_lduh(ins,base,disp,dest) sparc_encode_format3a((ins),3,sparc_asi,(base),(disp),2,(dest))
#define sparc_lduh_imm(ins,base,disp,dest) sparc_encode_format3b((ins),3,(base),(disp),2,(dest))
#define sparc_ld(ins,base,disp,dest) sparc_encode_format3a((ins),3,sparc_asi,(base),(disp),0,(dest))
#define sparc_ld_imm(ins,base,disp,dest) sparc_encode_format3b((ins),3,(base),(disp),0,(dest))
/* Sparc V9 */
#define sparc_ldx(ins,base,disp,dest) sparc_encode_format3a((ins),3,sparc_asi,(base),(disp),11,(dest))
#define sparc_ldx_imm(ins,base,disp,dest) sparc_encode_format3b((ins),3,(base),(disp),11,(dest))
#define sparc_ldsw(ins,base,disp,dest) sparc_encode_format3a((ins),3,sparc_asi,(base),(disp),8,(dest))
#define sparc_ldsw_imm(ins,base,disp,dest) sparc_encode_format3b((ins),3,(base),(disp),8,(dest))
#define sparc_ldd(ins,base,disp,dest) sparc_encode_format3a((ins),3,sparc_asi,(base),(disp),3,(dest))
#define sparc_ldd_imm(ins,base,disp,dest) sparc_encode_format3b((ins),3,(base),(disp),3,(dest))
#define sparc_ldf(ins,base,disp,dest) sparc_encode_format3a((ins),3,0,(base),(disp),32,(dest))
#define sparc_ldf_imm(ins,base,disp,dest) sparc_encode_format3b((ins),3,(base),(disp),32,(dest))
#define sparc_lddf(ins,base,disp,dest) sparc_encode_format3a((ins),3,0,(base),(disp),35,(dest))
#define sparc_lddf_imm(ins,base,disp,dest) sparc_encode_format3b((ins),3,(base),(disp),35,(dest))
/* store */
#define sparc_stb(ins,src,base,disp) sparc_encode_format3a((ins),3,sparc_asi,(base),(disp),5,(src))
#define sparc_stb_imm(ins,src,base,disp) sparc_encode_format3b((ins),3,(base),(disp),5,(src))
#define sparc_sth(ins,src,base,disp) sparc_encode_format3a((ins),3,sparc_asi,(base),(disp),6,(src))
#define sparc_sth_imm(ins,src,base,disp) sparc_encode_format3b((ins),3,(base),(disp),6,(src))
#define sparc_st(ins,src,base,disp) sparc_encode_format3a((ins),3,sparc_asi,(base),(disp),4,(src))
#define sparc_st_imm(ins,src,base,disp) sparc_encode_format3b((ins),3,(base),(disp),4,(src))
/* Sparc V9 */
#define sparc_stx(ins,src,base,disp) sparc_encode_format3a((ins),3,sparc_asi,(base),(disp),14,(src))
#define sparc_stx_imm(ins,src,base,disp) sparc_encode_format3b((ins),3,(base),(disp),14,(src))
#define sparc_std(ins,src,base,disp) sparc_encode_format3a((ins),3,sparc_asi,(base),(disp),7,(src))
#define sparc_std_imm(ins,src,base,disp) sparc_encode_format3b((ins),3,(base),(disp),7,(src))
#define sparc_stf(ins,src,base,disp) sparc_encode_format3a((ins),3,sparc_asi,(base),(disp),36,(src))
#define sparc_stf_imm(ins,src,base,disp) sparc_encode_format3b((ins),3,(base),(disp),36,(src))
#define sparc_stdf(ins,src,base,disp) sparc_encode_format3a((ins),3,sparc_asi,(base),(disp),39,(src))
#define sparc_stdf_imm(ins,src,base,disp) sparc_encode_format3b((ins),3,(base),(disp),39,(src))
/* swap */
#define sparc_ldstub(ins,base,disp,dest) sparc_encode_format3a((ins),3,sparc_asi,(base),(disp),13,(dest))
#define sparc_ldstub_imm(ins,base,disp,dest) sparc_encode_format3b((ins),3,(base),(disp),13,(dest))
#define sparc_swap(ins,base,disp,dest) sparc_encode_format3a((ins),3,sparc_asi,(base),(disp),15,(dest))
#define sparc_swap_imm(ins,base,disp,dest) sparc_encode_format3b((ins),3,(base),(disp),15,(dest))
/* misc */
/* note: with sethi val is the full 32 bit value (think of it as %hi(val)) */
#define sparc_sethi(ins,val,dest) sparc_encode_format2a((ins),((val)>>10),4,(dest))
#define sparc_nop(ins) sparc_sethi((ins),0,sparc_zero)
#define sparc_save(ins,src,disp,dest) sparc_encode_format3a((ins),2,0,(src),(disp),60,(dest))
#define sparc_save_imm(ins,src,disp,dest) sparc_encode_format3b((ins),2,(src),(disp),60,(dest))
#define sparc_restore(ins,src,disp,dest) sparc_encode_format3a((ins),2,0,(src),(disp),61,(dest))
#define sparc_restore_imm(ins,src,disp,dest) sparc_encode_format3b((ins),2,(src),(disp),61,(dest))
#define sparc_rett(ins,src,disp) sparc_encode_format3a((ins),2,0,(src),(disp),0x39,0)
#define sparc_rett_imm(ins,src,disp) sparc_encode_format3b((ins),2,(src),(disp),0x39,0)
#define sparc_jmpl(ins,base,disp,dest) sparc_encode_format3a((ins),2,0,(base),(disp),56,(dest))
#define sparc_jmpl_imm(ins,base,disp,dest) sparc_encode_format3b((ins),2,(base),(disp),56,(dest))
#define sparc_call_simple(ins,disp) sparc_encode_call((ins),((unsigned int)(disp)))
#define sparc_rdy(ins,dest) sparc_encode_format3a((ins),2,0,0,0,40,(dest))
#define sparc_wry(ins,base,disp) sparc_encode_format3a((ins),2,0,(base),(disp),48,0)
#define sparc_wry_imm(ins,base,disp) sparc_encode_format3b((ins),2,(base),(disp),48,0)
/* stbar, unimp, flush */
#define sparc_stbar(ins) sparc_encode_format3a((ins),2,0,15,0,40,0)
#define sparc_unimp(ins,val) sparc_encode_format2b((ins),0,0,0,(val))
#define sparc_flush(ins,base,disp) sparc_encode_format3a((ins),2,0,(base),(disp),59,0)
#define sparc_flush_imm(ins,base,disp) sparc_encode_format3b((ins),2,(base),(disp),59,0)
#define sparc_flushw(ins) sparc_encode_format3a((ins),2,0,0,0,43,0)
#define sparc_membar(ins,flags) sparc_encode_format3b ((ins), 2, 0xf, (flags), 0x28, 0)
/* trap */
#define sparc_ta(ins,tt) sparc_encode_format3b((ins),2,0,(tt),58,0x8)
/* alu fop */
/* provide wrappers for: fitos, fitod, fstoi, fdtoi, fstod, fdtos, fmov, fneg, fabs */
#define sparc_fop(ins,r1,op,r2,dest) sparc_encode_format3c((ins),2,(op),(r1),52,(r2),(dest))
#define sparc_fcmp(ins,r1,op,r2) sparc_encode_format3c((ins),2,(op),(r1),53,(r2),0)
/* format 1 fops */
#define sparc_fadds(ins, r1, r2, dest) sparc_fop( ins, r1, sparc_fadds_val, r2, dest )
#define sparc_faddd(ins, r1, r2, dest) sparc_fop( ins, r1, sparc_faddd_val, r2, dest )
#define sparc_faddq(ins, r1, r2, dest) sparc_fop( ins, r1, sparc_faddq_val, r2, dest )
#define sparc_fsubs(ins, r1, r2, dest) sparc_fop( ins, r1, sparc_fsubs_val, r2, dest )
#define sparc_fsubd(ins, r1, r2, dest) sparc_fop( ins, r1, sparc_fsubd_val, r2, dest )
#define sparc_fsubq(ins, r1, r2, dest) sparc_fop( ins, r1, sparc_fsubq_val, r2, dest )
#define sparc_fmuls( ins, r1, r2, dest ) sparc_fop( ins, r1, sparc_fmuls_val, r2, dest )
#define sparc_fmuld( ins, r1, r2, dest ) sparc_fop( ins, r1, sparc_fmuld_val, r2, dest )
#define sparc_fmulq( ins, r1, r2, dest ) sparc_fop( ins, r1, sparc_fmulq_val, r2, dest )
#define sparc_fsmuld( ins, r1, r2, dest ) sparc_fop( ins, r1, sparc_fsmuld_val, r2, dest )
#define sparc_fdmulq( ins, r1, r2, dest ) sparc_fop( ins, r1, sparc_fdmulq_val, r2, dest )
#define sparc_fdivs( ins, r1, r2, dest ) sparc_fop( ins, r1, sparc_fdivs_val, r2, dest )
#define sparc_fdivd( ins, r1, r2, dest ) sparc_fop( ins, r1, sparc_fdivd_val, r2, dest )
#define sparc_fdivq( ins, r1, r2, dest ) sparc_fop( ins, r1, sparc_fdivq_val, r2, dest )
#define sparc_fitos( ins, r2, dest ) sparc_fop( ins, 0, sparc_fitos_val, r2, dest )
#define sparc_fitod( ins, r2, dest ) sparc_fop( ins, 0, sparc_fitod_val, r2, dest )
#define sparc_fitoq( ins, r2, dest ) sparc_fop( ins, 0, sparc_fitoq_val, r2, dest )
#define sparc_fxtos( ins, r2, dest) sparc_fop( ins, 0, sparc_fxtos_val, r2, dest )
#define sparc_fxtod( ins, r2, dest) sparc_fop( ins, 0, sparc_fxtod_val, r2, dest )
#define sparc_fxtoq( ins, r2, dest) sparc_fop( ins, 0, sparc_fxtoq_val, r2, dest )
#define sparc_fstoi( ins, r2, dest ) sparc_fop( ins, 0, sparc_fstoi_val, r2, dest )
#define sparc_fdtoi( ins, r2, dest ) sparc_fop( ins, 0, sparc_fdtoi_val, r2, dest )
#define sparc_fqtoi( ins, r2, dest ) sparc_fop( ins, 0, sparc_fqtoi_val, r2, dest )
#define sparc_fstod( ins, r2, dest ) sparc_fop( ins, 0, sparc_fstod_val, r2, dest )
#define sparc_fstoq( ins, r2, dest ) sparc_fop( ins, 0, sparc_fstoq_val, r2, dest )
#define sparc_fdtos( ins, r2, dest ) sparc_fop( ins, 0, sparc_fdtos_val, r2, dest )
#define sparc_fdtoq( ins, r2, dest ) sparc_fop( ins, 0, sparc_fdtoq_val, r2, dest )
#define sparc_fqtos( ins, r2, dest ) sparc_fop( ins, 0, sparc_fqtos_val, r2, dest )
#define sparc_fqtod( ins, r2, dest ) sparc_fop( ins, 0, sparc_fqtod_val, r2, dest )
#define sparc_fmovs( ins, r2, dest ) sparc_fop( ins, 0, sparc_fmovs_val, r2, dest )
#define sparc_fnegs( ins, r2, dest ) sparc_fop( ins, 0, sparc_fnegs_val, r2, dest )
#define sparc_fabss( ins, r2, dest ) sparc_fop( ins, 0, sparc_fabss_val, r2, dest )
#define sparc_fmovd( ins, r2, dest) sparc_fop (ins, 0, sparc_fmovd_val, r2, dest);
#define sparc_fnegd( ins, r2, dest) sparc_fop (ins, 0, sparc_fnegd_val, r2, dest);
#define sparc_fabsd( ins, r2, dest) sparc_fop (ins, 0, sparc_fabsd_val, r2, dest);
#define sparc_fsqrts( ins, r2, dest ) sparc_fop( ins, 0, sparc_fsqrts_val, r2, dest )
#define sparc_fsqrtd( ins, r2, dest ) sparc_fop( ins, 0, sparc_fsqrtd_val, r2, dest )
#define sparc_fsqrtq( ins, r2, dest ) sparc_fop( ins, 0, sparc_fsqrtq_val, r2, dest )
/* format 2 fops */
#define sparc_fcmps( ins, r1, r2 ) sparc_fcmp( ins, r1, sparc_fcmps_val, r2 )
#define sparc_fcmpd( ins, r1, r2 ) sparc_fcmp( ins, r1, sparc_fcmpd_val, r2 )
#define sparc_fcmpq( ins, r1, r2 ) sparc_fcmp( ins, r1, sparc_fcmpq_val, r2 )
#define sparc_fcmpes( ins, r1, r2 ) sparc_fcmpes( ins, r1, sparc_fcmpes_val, r2 )
#define sparc_fcmped( ins, r1, r2 ) sparc_fcmped( ins, r1, sparc_fcmped_val, r2 )
#define sparc_fcmpeq( ins, r1, r2 ) sparc_fcmpeq( ins, r1, sparc_fcmpeq_val, r2 )
/* logical */
/* FIXME: condense this using macros */
/* FIXME: the setcc stuff is wrong in lots of places */
#define sparc_logic(ins,op,setcc,r1,r2,dest) sparc_encode_format3a((ins),2,0,(r1),(r2),((setcc) ? 0x10 : 0) | (op), (dest))
#define sparc_logic_imm(ins,op,setcc,r1,imm,dest) sparc_encode_format3b((ins),2,(r1),(imm),((setcc) ? 0x10 : 0) | (op), (dest))
#define sparc_and(ins,setcc,r1,r2,dest) sparc_logic(ins,1,setcc,r1,r2,dest)
#define sparc_and_imm(ins,setcc,r1,imm,dest) sparc_logic_imm(ins,1,setcc,r1,imm,dest)
#define sparc_andn(ins,setcc,r1,r2,dest) sparc_encode_format3a((ins),2,0,(r1),(r2),(setcc)|5,(dest))
#define sparc_andn_imm(ins,setcc,r1,imm,dest) sparc_encode_format3b((ins),2,(r1),(imm),(setcc)|5,(dest))
#define sparc_or(ins,setcc,r1,r2,dest) sparc_encode_format3a((ins),2,0,(r1),(r2),(setcc)|2,(dest))
#define sparc_or_imm(ins,setcc,r1,imm,dest) sparc_encode_format3b((ins),2,(r1),(imm),(setcc)|2,(dest))
#define sparc_orn(ins,setcc,r1,r2,dest) sparc_encode_format3a((ins),2,0,(r1),(r2),(setcc)|6,(dest))
#define sparc_orn_imm(ins,setcc,r1,imm,dest) sparc_encode_format3b((ins),2,(r1),(imm),(setcc)|6,(dest))
#define sparc_xor(ins,setcc,r1,r2,dest) sparc_encode_format3a((ins),2,0,(r1),(r2),(setcc)|3,(dest))
#define sparc_xor_imm(ins,setcc,r1,imm,dest) sparc_encode_format3b((ins),2,(r1),(imm), (setcc)|3,(dest))
#define sparc_xnor(ins,setcc,r1,r2,dest) sparc_encode_format3a((ins),2,0,(r1),(r2),(setcc)|7,(dest))
#define sparc_xnor_imm(ins,setcc,r1,imm,dest) sparc_encode_format3b((ins),2,(r1),(imm),(setcc)|7,(dest))
/* shift */
#define sparc_sll(ins,src,disp,dest) sparc_encode_format3a((ins),2,0,(src),(disp),37,(dest))
#define sparc_sll_imm(ins,src,disp,dest) sparc_encode_format3b((ins),2,(src),(disp),37,(dest))
/* Sparc V9 */
#define sparc_sllx(ins,src,disp,dest) sparc_encode_format3ax((ins),2,0,(src),(disp),37,(dest))
#define sparc_sllx_imm(ins,src,disp,dest) sparc_encode_format3bx((ins),2,(src),(disp),37,(dest))
#define sparc_srl(ins,src,disp,dest) sparc_encode_format3a((ins),2,0,(src),(disp),38,(dest))
#define sparc_srl_imm(ins,src,disp,dest) sparc_encode_format3b((ins),2,(src),(disp),38,(dest))
/* Sparc V9 */
#define sparc_srlx(ins,src,disp,dest) sparc_encode_format3ax((ins),2,0,(src),(disp),38,(dest))
#define sparc_srlx_imm(ins,src,disp,dest) sparc_encode_format3bx((ins),2,(src),(disp),38,(dest))
#define sparc_sra(ins,src,disp,dest) sparc_encode_format3a((ins),2,0,(src),(disp),39,(dest))
#define sparc_sra_imm(ins,src,disp,dest) sparc_encode_format3b((ins),2,(src),(disp),39,(dest))
/* Sparc V9 */
#define sparc_srax(ins,src,disp,dest) sparc_encode_format3ax((ins),2,0,(src),(disp),39,(dest))
#define sparc_srax_imm(ins,src,disp,dest) sparc_encode_format3bx((ins),2,(src),(disp),39,(dest))
/* alu */
#define sparc_alu_reg(ins,op,setcc,r1,r2,dest) sparc_encode_format3a((ins),2,0,(r1),(r2),op|((setcc) ? 0x10 : 0),(dest))
#define sparc_alu_imm(ins,op,setcc,r1,imm,dest) sparc_encode_format3b((ins),2,(r1),(imm),op|((setcc) ? 0x10 : 0),(dest))
#define sparc_add(ins,setcc,r1,r2,dest) sparc_alu_reg((ins),0,(setcc),(r1),(r2),(dest))
#define sparc_add_imm(ins,setcc,r1,imm,dest) sparc_alu_imm((ins),0,(setcc),(r1),(imm),(dest))
#define sparc_addx(ins,setcc,r1,r2,dest) sparc_alu_reg((ins),0x8,(setcc),(r1),(r2),(dest))
#define sparc_addx_imm(ins,setcc,r1,imm,dest) sparc_alu_imm((ins),0x8,(setcc),(r1),(imm),(dest))
#define sparc_sub(ins,setcc,r1,r2,dest) sparc_alu_reg((ins),0x4,(setcc),(r1),(r2),(dest))
#define sparc_sub_imm(ins,setcc,r1,imm,dest) sparc_alu_imm((ins),0x4,(setcc),(r1),(imm),(dest))
#define sparc_subx(ins,setcc,r1,r2,dest) sparc_alu_reg((ins),0xc,(setcc),(r1),(r2),(dest))
#define sparc_subx_imm(ins,setcc,r1,imm,dest) sparc_alu_imm((ins),0xc,(setcc),(r1),(imm),(dest))
#define sparc_muls(ins,r1,r2,dest) sparc_encode_format3a((ins),2,0,(r1),(r2),36,(dest))
#define sparc_muls_imm(ins,r1,imm,dest) sparc_encode_format3b((ins),2,(r1),(imm),36,(dest))
#define sparc_umul(ins,setcc,r1,r2,dest) sparc_alu_reg((ins),0xa,(setcc),(r1),(r2),(dest))
#define sparc_umul_imm(ins,setcc,r1,imm,dest) sparc_alu_imm((ins),0xa,(setcc),(r1),(imm),(dest))
#define sparc_smul(ins,setcc,r1,r2,dest) sparc_alu_reg((ins),0xb,(setcc),(r1),(r2),(dest))
#define sparc_smul_imm(ins,setcc,r1,imm,dest) sparc_alu_imm((ins),0xb,(setcc),(r1),(imm),(dest))
#define sparc_udiv(ins,setcc,r1,r2,dest) sparc_alu_reg((ins),0xe,(setcc),(r1),(r2),(dest))
#define sparc_udiv_imm(ins,setcc,r1,imm,dest) sparc_alu_imm((ins),0xe,(setcc),(r1),(imm),(dest))
#define sparc_sdiv(ins,setcc,r1,r2,dest) sparc_alu_reg((ins),0xf,(setcc),(r1),(r2),(dest))
#define sparc_sdiv_imm(ins,setcc,r1,imm,dest) sparc_alu_imm((ins),0xf,(setcc),(r1),(imm),(dest))
/* branch */
#define sparc_branch(ins,aval,condval,displ) sparc_encode_format2b((ins),(aval),(condval),2,(displ))
/* FIXME: float condition codes are different: unify. */
#define sparc_fbranch(ins,aval,condval,displ) sparc_encode_format2b((ins),(aval),(condval),6,(displ))
#define sparc_branchp(ins,aval,condval,xcc,predict,displ) sparc_encode_format2c((ins),(aval),(condval),0x1,(xcc),(predict),(displ))
#define sparc_brz(ins,aval,predict,rs1,disp) sparc_encode_format2d((ins), (aval),0x1,0x3,(predict),(rs1),(disp))
#define sparc_brlez(ins,aval,predict,rs1,disp) sparc_encode_format2d((ins), (aval),0x2,0x3,(predict),(rs1),(disp))
#define sparc_brlz(ins,aval,predict,rs1,disp) sparc_encode_format2d((ins), (aval),0x3,0x3,(predict),(rs1),(disp))
#define sparc_brnz(ins,aval,predict,rs1,disp) sparc_encode_format2d((ins), (aval),0x5,0x3,(predict),(rs1),(disp))
#define sparc_brgz(ins,aval,predict,rs1,disp) sparc_encode_format2d((ins), (aval),0x6,0x3,(predict),(rs1),(disp))
#define sparc_brgez(ins,aval,predict,rs1,disp) sparc_encode_format2d((ins), (aval),0x7,0x3,(predict),(rs1),(disp))
/* conditional moves */
#define sparc_movcc(ins,cc,condval,r1,dest) sparc_encode_format4c((ins), 0x2, 0x2c, cc, condval, r1, dest)
#define sparc_movcc_imm(ins,cc,condval,imm,dest) sparc_encode_format4d((ins), 0x2, 0x2c, cc, condval, imm, dest)
/* synthetic instructions */
#define sparc_cmp(ins,r1,r2) sparc_sub((ins),sparc_cc,(r1),(r2),sparc_g0)
#define sparc_cmp_imm(ins,r1,imm) sparc_sub_imm((ins),sparc_cc,(r1),(imm),sparc_g0)
#define sparc_jmp(ins,base,disp) sparc_jmpl((ins),(base),(disp),sparc_g0)
#define sparc_jmp_imm(ins,base,disp) sparc_jmpl_imm((ins),(base),(disp),sparc_g0)
#define sparc_call(ins,base,disp) sparc_jmpl((ins),(base),(disp),sparc_o7)
#define sparc_call_imm(ins,base,disp) sparc_jmpl_imm((ins),(base),(disp),sparc_o7)
#define sparc_test(ins,reg) sparc_or ((ins),sparc_cc,sparc_g0,(reg),sparc_g0)
#define sparc_ret(ins) sparc_jmpl_imm((ins),sparc_i7,8,sparc_g0)
#define sparc_retl(ins) sparc_jmpl_imm((ins),sparc_o7,8,sparc_g0)
#define sparc_restore_simple(ins) sparc_restore((ins),sparc_g0,sparc_g0,sparc_g0)
#define sparc_rett_simple(ins) sparc_rett_imm((ins),sparc_i7,8)
#define sparc_set32(ins,val,reg) \
do { \
if ((val) == 0) \
sparc_clr_reg((ins),(reg)); \
else if (((guint32)(val) & 0x3ff) == 0) \
sparc_sethi((ins),(guint32)(val),(reg)); \
else if (((gint32)(val) >= -4096) && ((gint32)(val) <= 4095)) \
sparc_or_imm((ins),FALSE,sparc_g0,(gint32)(val),(reg)); \
else { \
sparc_sethi((ins),(guint32)(val),(reg)); \
sparc_or_imm((ins),FALSE,(reg),(guint32)(val)&0x3ff,(reg)); \
} \
} while (0)
#ifdef SPARCV9
#define SPARC_SET_MAX_SIZE (6 * 4)
#else
#define SPARC_SET_MAX_SIZE (2 * 4)
#endif
#if SPARCV9
#define sparc_set(ins,ptr,reg) \
do { \
g_assert ((reg) != sparc_g1); \
gint64 val = (gint64)ptr; \
guint32 top_word = (val) >> 32; \
guint32 bottom_word = (val) & 0xffffffff; \
if (val == 0) \
sparc_clr_reg ((ins), reg); \
else if ((val >= -4096) && ((val) <= 4095)) \
sparc_or_imm((ins),FALSE,sparc_g0,bottom_word,(reg)); \
else if ((val >= 0) && (val <= 4294967295L)) { \
sparc_sethi((ins),bottom_word,(reg)); \
if (bottom_word & 0x3ff) \
sparc_or_imm((ins),FALSE,(reg),bottom_word&0x3ff,(reg)); \
} \
else if ((val >= 0) && (val <= (1L << 44) - 1)) { \
sparc_sethi ((ins), (val >> 12), (reg)); \
sparc_or_imm ((ins), FALSE, (reg), (val >> 12) & 0x3ff, (reg)); \
sparc_sllx_imm ((ins),(reg), 12, (reg)); \
sparc_or_imm ((ins), FALSE, (reg), (val) & 0xfff, (reg)); \
} \
else if (top_word == 0xffffffff) { \
sparc_xnor ((ins), FALSE, sparc_g0, sparc_g0, sparc_g1); \
sparc_sethi((ins),bottom_word,(reg)); \
sparc_sllx_imm((ins),sparc_g1,32,sparc_g1); \
sparc_or_imm((ins),FALSE,(reg),bottom_word&0x3ff,(reg)); \
sparc_or((ins),FALSE,(reg),sparc_g1,(reg)); \
} \
else { \
sparc_sethi((ins),top_word,sparc_g1); \
sparc_sethi((ins),bottom_word,(reg)); \
sparc_or_imm((ins),FALSE,sparc_g1,top_word&0x3ff,sparc_g1); \
sparc_or_imm((ins),FALSE,(reg),bottom_word&0x3ff,(reg)); \
sparc_sllx_imm((ins),sparc_g1,32,sparc_g1); \
sparc_or((ins),FALSE,(reg),sparc_g1,(reg)); \
} \
} while (0)
#else
#define sparc_set(ins,val,reg) \
do { \
if ((val) == 0) \
sparc_clr_reg((ins),(reg)); \
else if (((guint32)(val) & 0x3ff) == 0) \
sparc_sethi((ins),(guint32)(val),(reg)); \
else if (((gint32)(val) >= -4096) && ((gint32)(val) <= 4095)) \
sparc_or_imm((ins),FALSE,sparc_g0,(gint32)(val),(reg)); \
else { \
sparc_sethi((ins),(guint32)(val),(reg)); \
sparc_or_imm((ins),FALSE,(reg),(guint32)(val)&0x3ff,(reg)); \
} \
} while (0)
#endif
#define sparc_set_ptr(ins,val,reg) sparc_set(ins,val,reg)
#ifdef SPARCV9
#define sparc_set_template(ins,reg) sparc_set (ins,0x7fffffff7fffffff, reg)
#else
#define sparc_set_template(ins,reg) sparc_set (ins,0x7fffffff, reg)
#endif
#define sparc_not(ins,reg) sparc_xnor((ins),FALSE,(reg),sparc_g0,(reg))
#define sparc_neg(ins,reg) sparc_sub((ins),FALSE,sparc_g0,(reg),(reg))
#define sparc_clr_reg(ins,reg) sparc_or((ins),FALSE,sparc_g0,sparc_g0,(reg))
#define sparc_mov_reg_reg(ins,src,dest) sparc_or((ins),FALSE,sparc_g0,(src),(dest))
#ifdef SPARCV9
#define sparc_sti_imm sparc_stx_imm
#define sparc_ldi_imm sparc_ldx_imm
#define sparc_sti sparc_stx
#define sparc_ldi sparc_ldx
#else
#define sparc_sti_imm sparc_st_imm
#define sparc_ldi_imm sparc_ld_imm
#define sparc_sti sparc_st
#define sparc_ldi sparc_ld
#endif
#endif /* __SPARC_CODEGEN_H__ */

@ -0,0 +1,123 @@
#include <glib.h>
#include "sparc-codegen.h"
/* don't run the resulting program, it will destroy your computer,
* just objdump -d it to inspect we generated the correct assembler.
*/
int
main ()
{
guint32 *p;
guint32 code_buffer [500];
guint32 local_size = 0, stack_size = 0, code_size = 6;
guint32 arg_pos, simpletype;
unsigned char *ins;
int i, stringp, cur_out_reg, size;
p = code_buffer;
printf (".text\n.align 4\n.globl main\n.type main,@function\nmain:\n");
/*
* Standard function prolog.
*/
sparc_save_imm (p, sparc_sp, -112-stack_size, sparc_sp);
cur_out_reg = sparc_o0;
arg_pos = 0;
if (1) {
sparc_mov_reg_reg (p, sparc_i2, cur_out_reg);
++cur_out_reg;
}
sparc_ld_imm (p, sparc_i3, arg_pos, cur_out_reg);
++cur_out_reg;
sparc_ld_imm (p, sparc_i3, arg_pos+4, cur_out_reg);
++cur_out_reg;
/*
* Insert call to function
*/
sparc_jmpl (p, sparc_i0, 0, sparc_callsite);
sparc_nop (p);
sparc_jmpl_imm (p, sparc_i7, 8, sparc_zero);
sparc_restore (p, sparc_zero, sparc_zero, sparc_zero);
sparc_ldsb (p, sparc_i3, sparc_l0, sparc_o5);
sparc_ldsb_imm (p, sparc_i3, 2, sparc_o5);
sparc_ldsh (p, sparc_i3, sparc_l0, sparc_o5);
sparc_ldsh_imm (p, sparc_i3, 2, sparc_o5);
sparc_ldub (p, sparc_i3, sparc_l0, sparc_o5);
sparc_ldub_imm (p, sparc_i3, 2, sparc_o5);
sparc_lduh (p, sparc_i3, sparc_l0, sparc_o5);
sparc_lduh_imm (p, sparc_i3, 2, sparc_o5);
sparc_ldf (p, sparc_i3, sparc_l0, sparc_o5);
sparc_ldf_imm (p, sparc_i3, 2, sparc_o5);
sparc_stb (p, sparc_i3, sparc_l0, sparc_l2);
sparc_stb_imm (p, sparc_i3, sparc_o5, 2);
sparc_sethi (p, 0xff000000, sparc_o2);
sparc_rdy (p, sparc_l0);
sparc_wry (p, sparc_l0, sparc_l1);
sparc_wry_imm (p, sparc_l0, 16);
sparc_stbar (p);
sparc_unimp (p, 24);
sparc_flush (p, sparc_l4, 0);
sparc_and (p, sparc_cc, sparc_l0, sparc_l1, sparc_o1);
sparc_and_imm (p, FALSE, sparc_l0, 0xff, sparc_o1);
sparc_andn (p, sparc_cc, sparc_l0, sparc_l1, sparc_o1);
sparc_or (p, sparc_cc, sparc_l0, sparc_l1, sparc_o1);
sparc_orn (p, sparc_cc, sparc_l0, sparc_l1, sparc_o1);
sparc_xor (p, sparc_cc, sparc_l0, sparc_l1, sparc_o1);
sparc_xnor (p, sparc_cc, sparc_l0, sparc_l1, sparc_o1);
sparc_sll (p, sparc_l0, sparc_l1, sparc_o1);
sparc_sll_imm (p, sparc_l0, 2, sparc_o1);
sparc_srl (p, sparc_l0, sparc_l1, sparc_o1);
sparc_srl_imm (p, sparc_l0, 2, sparc_o1);
sparc_sra (p, sparc_l0, sparc_l1, sparc_o1);
sparc_sra_imm (p, sparc_l0, 2, sparc_o1);
sparc_add (p, sparc_cc, sparc_l0, sparc_l1, sparc_o1);
sparc_add_imm (p, FALSE, sparc_l0, 0xff, sparc_o1);
sparc_addx (p, sparc_cc, sparc_l0, sparc_l1, sparc_o1);
sparc_sub (p, sparc_cc, sparc_l0, sparc_l1, sparc_o1);
sparc_subx (p, sparc_cc, sparc_l0, sparc_l1, sparc_o1);
sparc_muls (p, sparc_l0, sparc_l1, sparc_o1);
sparc_umul (p, sparc_cc, sparc_l0, sparc_l1, sparc_o1);
sparc_smul (p, sparc_cc, sparc_l0, sparc_l1, sparc_o1);
sparc_udiv (p, sparc_cc, sparc_l0, sparc_l1, sparc_o1);
sparc_sdiv (p, sparc_cc, sparc_l0, sparc_l1, sparc_o1);
sparc_branch (p, FALSE, sparc_bne, -12);
sparc_ret (p);
sparc_retl (p);
sparc_test (p, sparc_l4);
sparc_cmp (p, sparc_l4, sparc_l6);
sparc_cmp_imm (p, sparc_l4, 4);
sparc_restore_simple (p);
sparc_set (p, 0xff000000, sparc_l7);
sparc_set (p, 1, sparc_l7);
sparc_set (p, 0xff0000ff, sparc_l7);
sparc_not (p, sparc_g2);
sparc_neg (p, sparc_g3);
sparc_clr_reg (p, sparc_g4);
size = (p-code_buffer)*4;
ins = (gchar*)code_buffer;
for (i = 0; i < size; ++i)
printf (".byte %d\n", (unsigned int) ins [i]);
return 0;
}

File diff suppressed because it is too large Load Diff

@ -0,0 +1,4 @@
/Makefile.in
/Makefile
/.deps
/.libs

@ -0,0 +1,2 @@
EXTRA_DIST = x64-codegen.h

File diff suppressed because it is too large Load Diff

@ -0,0 +1,6 @@
/Makefile
/Makefile.in
/.libs
/.deps
/*.la
/*.lo

@ -0,0 +1 @@
EXTRA_DIST = x86-codegen.h

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

@ -1,10 +1,10 @@
/*
This file is part of FFTS -- The Fastest Fourier Transform in the South
Copyright (c) 2012, Anthony M. Blake <amb@anthonix.com>
Copyright (c) 2012, The University of Waikato
Copyright (c) 2012, The University of Waikato
All rights reserved.
Redistribution and use in source and binary forms, with or without
@ -31,19 +31,16 @@
*/
#ifndef __CODEGEN_H__
#define __CODEGEN_H__
#ifndef FFTS_CODEGEN_H
#define FFTS_CODEGEN_H
#include <stddef.h>
#include <stdio.h>
#include <stdlib.h>
#include <errno.h>
#include <sys/mman.h>
#include <string.h>
#include <limits.h> /* for PAGESIZE */
#if defined (_MSC_VER) && (_MSC_VER >= 1020)
#pragma once
#endif
#include "ffts.h"
#include "ffts_internal.h"
void ffts_generate_func_code(ffts_plan_t *, size_t N, size_t leafN, int sign);
transform_func_t ffts_generate_func_code(ffts_plan_t *p, size_t N, size_t leaf_N, int sign);
#endif
#endif /* FFTS_CODEGEN_H */

@ -31,10 +31,14 @@
*/
#ifndef __CODEGEN_ARM_H__
#define __CODEGEN_ARM_H__
#ifndef FFTS_CODEGEN_ARM_H
#define FFTS_CODEGEN_ARM_H
#include "neon.h"
#ifdef HAVE_STRING_H
#include <string.h>
#endif
uint32_t BL(void *pos, void *target) {
return 0xeb000000 | (((target - pos) / 4) & 0xffffff);
@ -95,7 +99,130 @@ void MOVI(uint32_t **p, uint8_t dst, uint32_t imm) {
uint32_t PUSH_LR() { return 0xe92d4ff0; } //0xe92d4000; }
uint32_t POP_LR() { return 0xe8bd8ff0; } //0xe8bd8000; }
static FFTS_INLINE insns_t* generate_size4_base_case(insns_t **fp, int sign)
{
insns_t *x_4_addr;
size_t len;
x_4_addr = *fp;
#ifdef HAVE_NEON
len = (char*) neon_x8 - (char*) neon_x4;
memcpy(x_4_addr, neon_x4, len);
if (sign < 0) {
x_4_addr[26] ^= 0x00200000;
x_4_addr[28] ^= 0x00200000;
x_4_addr[31] ^= 0x00200000;
x_4_addr[32] ^= 0x00200000;
}
#else
len = (char*) vfp_x8 - (char*) vfp_x4;
memcpy(x_4_addr, vfp_x4, len);
if (sign > 0) {
x_4_addr[36] ^= 0x00000040;
x_4_addr[38] ^= 0x00000040;
x_4_addr[43] ^= 0x00000040;
x_4_addr[44] ^= 0x00000040;
}
#endif
*fp += len / 4;
return x_4_addr;
}
static FFTS_INLINE insns_t* generate_size8_base_case(insns_t **fp, int sign)
{
insns_t *x_8_addr;
ptrdiff_t len;
x_8_addr = *fp;
#ifdef HAVE_NEON
len = (char*) neon_x8_t - (char*) neon_x8;
memcpy(x_8_addr, neon_x8, len);
/*
* Changes adds to subtracts and vice versa to allow the computation
* of both the IFFT and FFT
*/
if (sign < 0) {
x_8_addr[31] ^= 0x00200000;
x_8_addr[32] ^= 0x00200000;
x_8_addr[33] ^= 0x00200000;
x_8_addr[34] ^= 0x00200000;
x_8_addr[65] ^= 0x00200000;
x_8_addr[66] ^= 0x00200000;
x_8_addr[70] ^= 0x00200000;
x_8_addr[74] ^= 0x00200000;
x_8_addr[97] ^= 0x00200000;
x_8_addr[98] ^= 0x00200000;
x_8_addr[102] ^= 0x00200000;
x_8_addr[104] ^= 0x00200000;
}
*fp += len / 4;
//uint32_t *x_8_t_addr = fp;
//memcpy(fp, neon_x8_t, neon_end - neon_x8_t);
//fp += (neon_end - neon_x8_t) / 4;
#else
len = (char*) vfp_end - (char*) vfp_x8;
memcpy(x_8_addr, vfp_x8, len);
if (sign > 0) {
x_8_addr[65] ^= 0x00000040;
x_8_addr[66] ^= 0x00000040;
x_8_addr[68] ^= 0x00000040;
x_8_addr[70] ^= 0x00000040;
x_8_addr[103] ^= 0x00000040;
x_8_addr[104] ^= 0x00000040;
x_8_addr[105] ^= 0x00000040;
x_8_addr[108] ^= 0x00000040;
x_8_addr[113] ^= 0x00000040;
x_8_addr[114] ^= 0x00000040;
x_8_addr[117] ^= 0x00000040;
x_8_addr[118] ^= 0x00000040;
}
*fp += len / 4;
#endif
return x_8_addr;
}
static FFTS_INLINE insns_t* generate_prologue(insns_t **fp, ffts_plan_t *p)
{
insns_t *start = *fp;
*(*fp)++ = PUSH_LR();
*(*fp)++ = 0xed2d8b10;
ADDI(fp, 3, 1, 0);
ADDI(fp, 7, 1, p->N);
ADDI(fp, 5, 1, 2 * p->N);
ADDI(fp, 10, 7, 2 * p->N);
ADDI(fp, 4, 5, 2 * p->N);
ADDI(fp, 8, 10, 2 * p->N);
ADDI(fp, 6, 4, 2 * p->N);
ADDI(fp, 9, 8, 2 * p->N);
// load offsets into r12
*(*fp)++ = LDRI(12, 0, ((uint32_t) &p->offsets) - ((uint32_t) p));
// *(*fp)++ = LDRI(1, 0, 4); // load ws into r1
ADDI(fp, 1, 0, 0);
ADDI(fp, 0, 2, 0), // mov out into r0
*(*fp)++ = LDRI(2, 1, ((uint32_t) &p->ee_ws) - ((uint32_t) p));
#ifdef HAVE_NEON
MOVI(fp, 11, p->i0);
#else
MOVI(fp, 11, p->i0);
#endif
return start;
}
#endif /* FFTS_CODEGEN_ARM_H */

File diff suppressed because it is too large Load Diff

@ -1,398 +1,539 @@
/*
This file is part of FFTS -- The Fastest Fourier Transform in the South
Copyright (c) 2012, Anthony M. Blake <amb@anthonix.com>
Copyright (c) 2012, The University of Waikato
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of the organization nor the
names of its contributors may be used to endorse or promote products
derived from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
DISCLAIMED. IN NO EVENT SHALL ANTHONY M. BLAKE BE LIABLE FOR ANY
DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
This file is part of FFTS -- The Fastest Fourier Transform in the South
Copyright (c) 2012, Anthony M. Blake <amb@anthonix.com>
Copyright (c) 2012, The University of Waikato
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of the organization nor the
names of its contributors may be used to endorse or promote products
derived from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
DISCLAIMED. IN NO EVENT SHALL ANTHONY M. BLAKE BE LIABLE FOR ANY
DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#include "ffts.h"
#include "ffts_internal.h"
#include "ffts_static.h"
#include "ffts_trig.h"
#include "macros.h"
//#include "mini_macros.h"
#include "patterns.h"
#include "ffts_small.h"
#ifdef DYNAMIC_DISABLED
#include "ffts_static.h"
#ifndef DYNAMIC_DISABLED
#include "codegen.h"
#endif
#if _WIN32
#include <windows.h>
#else
#if __APPLE__
#include <libkern/OSCacheControl.h>
#endif
#if HAVE_SYS_MMAN_H
#include <sys/mman.h>
#endif
#endif
#if defined(HAVE_NEON)
static const FFTS_ALIGN(64) float w_data[16] = {
0.70710678118654757273731092936941f,
0.70710678118654746171500846685376f,
-0.70710678118654757273731092936941f,
-0.70710678118654746171500846685376f,
1.0f,
0.70710678118654757273731092936941f,
-0.0f,
-0.70710678118654746171500846685376f,
0.70710678118654757273731092936941f,
0.70710678118654746171500846685376f,
0.70710678118654757273731092936941f,
0.70710678118654746171500846685376f,
1.0f,
0.70710678118654757273731092936941f,
0.0f,
0.70710678118654746171500846685376f
};
#endif
static FFTS_INLINE int ffts_allow_execute(void *start, size_t len)
{
int result;
#ifdef _WIN32
DWORD old_protect;
result = !VirtualProtect(start, len, PAGE_EXECUTE_READ, &old_protect);
#else
#include "codegen.h"
result = mprotect(start, len, PROT_READ | PROT_EXEC);
#endif
#include <errno.h>
#include <sys/mman.h>
#include <string.h>
#include <limits.h> /* for PAGESIZE */
return result;
}
static FFTS_INLINE int ffts_deny_execute(void *start, size_t len)
{
int result;
#ifdef _WIN32
DWORD old_protect;
result = (int) VirtualProtect(start, len, PAGE_READWRITE, &old_protect);
#else
result = mprotect(start, len, PROT_READ | PROT_WRITE);
#endif
return result;
}
static FFTS_INLINE int ffts_flush_instruction_cache(void *start, size_t length)
{
#ifdef _WIN32
return !FlushInstructionCache(GetCurrentProcess(), start, length);
#else
#ifdef __APPLE__
sys_icache_invalidate(start, length);
#elif __ANDROID__
cacheflush((long) start, (long) start + length, 0);
#elif __linux__
#if GCC_VERSION_AT_LEAST(4,3)
__builtin___clear_cache(start, (char*) start + length);
#elif __GNUC__
__clear_cache((long) start, (long) start + length);
#endif
#endif
return 0;
#endif
}
static FFTS_INLINE void *ffts_vmem_alloc(size_t length)
{
#if __APPLE__
#include <libkern/OSCacheControl.h>
return mmap(NULL, length, PROT_READ | PROT_WRITE, MAP_ANON | MAP_SHARED, -1, 0);
#elif _WIN32
return VirtualAlloc(NULL, length, MEM_COMMIT | MEM_RESERVE, PAGE_READWRITE);
#else
#ifndef MAP_ANONYMOUS
#define MAP_ANONYMOUS 0x20
#endif
void ffts_execute(ffts_plan_t *p, const void * in, void * out) {
p->transform(p, (const float *)in, (float *)out);
return mmap(NULL, length, PROT_READ | PROT_WRITE, MAP_ANONYMOUS | MAP_SHARED, -1, 0);
#endif
}
void ffts_free(ffts_plan_t *p) {
p->destroy(p);
static FFTS_INLINE void ffts_vmem_free(void *addr, size_t length)
{
#ifdef _WIN32
(void) length;
VirtualFree(addr, 0, MEM_RELEASE);
#else
munmap(addr, length);
#endif
}
void ffts_free_1d(ffts_plan_t *p) {
size_t i;
if(p->ws) {
FFTS_FREE(p->ws);
}
if(p->is) free(p->is);
if(p->ws_is) free(p->ws_is);
if(p->offsets) free(p->offsets);
//free(p->transforms);
if(p->transforms) free(p->transforms);
if(p->transform_base) {
if (mprotect(p->transform_base, p->transform_size, PROT_READ | PROT_WRITE)) {
perror("Couldn't mprotect");
exit(errno);
}
munmap(p->transform_base, p->transform_size);
//free(p->transform_base);
}
free(p);
FFTS_API void
ffts_execute(ffts_plan_t *p, const void *in, void *out)
{
/* TODO: Define NEEDS_ALIGNED properly instead */
#if defined(HAVE_SSE) || defined(HAVE_NEON)
if (((uintptr_t) in % 16) != 0) {
LOG("ffts_execute: input buffer needs to be aligned to a 128bit boundary\n");
}
if (((uintptr_t) out % 16) != 0) {
LOG("ffts_execute: output buffer needs to be aligned to a 128bit boundary\n");
}
#endif
p->transform(p, (const float*) in, (float*) out);
}
ffts_plan_t *ffts_init_1d(size_t N, int sign) {
ffts_plan_t *p = malloc(sizeof(ffts_plan_t));
size_t leafN = 8;
size_t i;
#ifdef __arm__
//#ifdef HAVE_NEON
V MULI_SIGN;
if(sign < 0) MULI_SIGN = VLIT4(-0.0f, 0.0f, -0.0f, 0.0f);
else MULI_SIGN = VLIT4(0.0f, -0.0f, 0.0f, -0.0f);
//#endif
FFTS_API void
ffts_free(ffts_plan_t *p)
{
if (p) {
p->destroy(p);
}
}
void ffts_free_1d(ffts_plan_t *p)
{
#if !defined(DYNAMIC_DISABLED)
if (p->transform_base) {
ffts_deny_execute(p->transform_base, p->transform_size);
ffts_vmem_free(p->transform_base, p->transform_size);
}
#endif
if (p->ws_is) {
free(p->ws_is);
}
if (p->ws) {
FFTS_FREE(p->ws);
}
if (p->is) {
free(p->is);
}
if (p->offsets) {
free(p->offsets);
}
free(p);
}
static int
ffts_generate_luts(ffts_plan_t *p, size_t N, size_t leaf_N, int sign)
{
V4SF MULI_SIGN;
size_t n_luts;
ffts_cpx_32f *w;
ffts_cpx_32f *tmp;
size_t i, j, m, n;
int stride;
if (sign < 0) {
MULI_SIGN = V4SF_LIT4(-0.0f, 0.0f, -0.0f, 0.0f);
} else {
MULI_SIGN = V4SF_LIT4(0.0f, -0.0f, 0.0f, -0.0f);
}
/* LUTS */
n_luts = ffts_ctzl(N / leaf_N);
if (n_luts >= 32) {
n_luts = 0;
}
if (n_luts) {
size_t lut_size;
#if defined(__arm__) && !defined(HAVE_NEON)
lut_size = leaf_N * (((1 << n_luts) - 2) * 3 + 1) * sizeof(ffts_cpx_32f) / 2;
#else
V MULI_SIGN;
if(sign < 0) MULI_SIGN = VLIT4(-0.0f, 0.0f, -0.0f, 0.0f);
else MULI_SIGN = VLIT4(0.0f, -0.0f, 0.0f, -0.0f);
lut_size = leaf_N * (((1 << n_luts) - 2) * 3 + 1) * sizeof(ffts_cpx_32f);
#endif
p->transform = NULL;
p->transform_base = NULL;
p->transforms = NULL;
p->is = NULL;
p->ws_is = NULL;
p->ws = NULL;
p->offsets = NULL;
p->destroy = ffts_free_1d;
if(N >= 32) {
ffts_init_offsets(p, N, leafN);
#ifdef __arm__
p->ws = FFTS_MALLOC(lut_size, 32);
if (!p->ws) {
goto cleanup;
}
p->ws_is = (size_t*) malloc(n_luts * sizeof(*p->ws_is));
if (!p->ws_is) {
goto cleanup;
}
}
w = p->ws;
n = leaf_N * 2;
#ifdef HAVE_NEON
ffts_init_is(p, N, leafN, 1);
V4SF neg = (sign < 0) ? V4SF_LIT4(0.0f, 0.0f, 0.0f, 0.0f) : V4SF_LIT4(-0.0f, -0.0f, -0.0f, -0.0f);
#endif
/* calculate factors */
m = leaf_N << (n_luts - 2);
tmp = FFTS_MALLOC(m * sizeof(ffts_cpx_32f), 32);
ffts_generate_cosine_sine_pow2_32f(tmp, m);
/* generate lookup tables */
stride = 1 << (n_luts - 1);
for (i = 0; i < n_luts; i++) {
p->ws_is[i] = w - (ffts_cpx_32f*) p->ws;
if (!i) {
ffts_cpx_32f *w0 = FFTS_MALLOC(n/4 * sizeof(ffts_cpx_32f), 32);
float *fw0 = (float*) w0;
float *fw = (float*) w;
for (j = 0; j < n/4; j++) {
w0[j][0] = tmp[j * stride][0];
w0[j][1] = tmp[j * stride][1];
}
#if defined(__arm__)
#ifdef HAVE_NEON
for (j = 0; j < n/4; j += 4) {
V4SF2 temp0 = V4SF2_LD(fw0 + j*2);
temp0.val[1] = V4SF_XOR(temp0.val[1], neg);
V4SF2_STORE_SPR(fw + j*2, temp0);
}
#else
ffts_init_is(p, N, leafN, 1);
for (j = 0; j < n/4; j++) {
fw[j*2+0] = fw0[j*2+0];
fw[j*2+1] = (sign < 0) ? fw0[j*2+1] : -fw0[j*2+1];
}
#endif
w += n/4;
#else
ffts_init_is(p, N, leafN, 1);
for (j = 0; j < n/4; j += 2) {
V4SF re, im, temp0;
temp0 = V4SF_LD(fw0 + j*2);
re = V4SF_DUPLICATE_RE(temp0);
im = V4SF_DUPLICATE_IM(temp0);
im = V4SF_XOR(im, MULI_SIGN);
V4SF_ST(fw + j*4 + 0, re);
V4SF_ST(fw + j*4 + 4, im);
}
w += n/4 * 2;
#endif
p->i0 = N/leafN/3+1;
p->i1 = N/leafN/3;
if((N/leafN) % 3 > 1) p->i1++;
p->i2 = N/leafN/3;
#ifdef __arm__
#ifdef HAVE_NEON
p->i0/=2;
p->i1/=2;
#endif
#else
p->i0/=2;
p->i1/=2;
#endif
}else{
p->transforms = malloc(2 * sizeof(transform_index_t));
p->transforms[0] = 0;
p->transforms[1] = 1;
if(N == 2) p->transform = &firstpass_2;
else if(N == 4 && sign == -1) p->transform = &firstpass_4_f;
else if(N == 4 && sign == 1) p->transform = &firstpass_4_b;
else if(N == 8 && sign == -1) p->transform = &firstpass_8_f;
else if(N == 8 && sign == 1) p->transform = &firstpass_8_b;
else if(N == 16 && sign == -1) p->transform = &firstpass_16_f;
else if(N == 16 && sign == 1) p->transform = &firstpass_16_b;
p->is = NULL;
p->offsets = NULL;
}
int hardcoded = 0;
/* LUTS */
size_t n_luts = __builtin_ctzl(N/leafN);
if(N < 32) { n_luts = __builtin_ctzl(N/4); hardcoded = 1; }
if(n_luts >= 32) n_luts = 0;
// fprintf(stderr, "n_luts = %zu\n", n_luts);
cdata_t *w;
int n = leafN*2;
if(hardcoded) n = 8;
size_t lut_size = 0;
for(i=0;i<n_luts;i++) {
if(!i || hardcoded) {
#ifdef __arm__
if(N <= 32) lut_size += n/4 * 2 * sizeof(cdata_t);
else lut_size += n/4 * sizeof(cdata_t);
#else
lut_size += n/4 * 2 * sizeof(cdata_t);
#endif
n *= 2;
} else {
#ifdef __arm__
lut_size += n/8 * 3 * sizeof(cdata_t);
#else
lut_size += n/8 * 3 * 2 * sizeof(cdata_t);
#endif
}
n *= 2;
}
// lut_size *= 16;
// fprintf(stderr, "lut size = %zu\n", lut_size);
if(n_luts) {
p->ws = FFTS_MALLOC(lut_size,32);
p->ws_is = malloc(n_luts * sizeof(size_t));
}else{
p->ws = NULL;
p->ws_is = NULL;
}
w = p->ws;
n = leafN*2;
if(hardcoded) n = 8;
#ifdef HAVE_NEON
V neg = (sign < 0) ? VLIT4(0.0f, 0.0f, 0.0f, 0.0f) : VLIT4(-0.0f, -0.0f, -0.0f, -0.0f);
#endif
for(i=0;i<n_luts;i++) {
p->ws_is[i] = w - (cdata_t *)p->ws;
//fprintf(stderr, "LUT[%zu] = %d @ %08x - %zu\n", i, n, w, p->ws_is[i]);
if(!i || hardcoded) {
cdata_t *w0 = FFTS_MALLOC(n/4 * sizeof(cdata_t), 32);
size_t j;
for(j=0;j<n/4;j++) {
w0[j][0] = W_re(n,j);
w0[j][1] = W_im(n,j);
}
float *fw0 = (float *)w0;
#ifdef __arm__
if(N < 32) {
//w = FFTS_MALLOC(n/4 * 2 * sizeof(cdata_t), 32);
float *fw = (float *)w;
V temp0, temp1, temp2;
for(j=0;j<n/4;j+=2) {
// #ifdef HAVE_NEON
temp0 = VLD(fw0 + j*2);
V re, im;
re = VDUPRE(temp0);
im = VDUPIM(temp0);
#ifdef HAVE_NEON
im = VXOR(im, MULI_SIGN);
//im = IMULI(sign>0, im);
#else
im = MULI(sign>0, im);
#endif
VST(fw + j*4 , re);
VST(fw + j*4+4, im);
// #endif
}
w += n/4 * 2;
}else{
//w = FFTS_MALLOC(n/4 * sizeof(cdata_t), 32);
float *fw = (float *)w;
#ifdef HAVE_NEON
VS temp0, temp1, temp2;
for(j=0;j<n/4;j+=4) {
temp0 = VLD2(fw0 + j*2);
temp0.val[1] = VXOR(temp0.val[1], neg);
STORESPR(fw + j*2, temp0);
}
#else
for(j=0;j<n/4;j+=1) {
fw[j*2] = fw0[j*2];
fw[j*2+1] = (sign < 0) ? fw0[j*2+1] : -fw0[j*2+1];
}
#endif
w += n/4;
}
#else
//w = FFTS_MALLOC(n/4 * 2 * sizeof(cdata_t), 32);
float *fw = (float *)w;
V temp0, temp1, temp2;
for(j=0;j<n/4;j+=2) {
temp0 = VLD(fw0 + j*2);
V re, im;
re = VDUPRE(temp0);
im = VDUPIM(temp0);
im = VXOR(im, MULI_SIGN);
VST(fw + j*4 , re);
VST(fw + j*4+4, im);
}
w += n/4 * 2;
#endif
FFTS_FREE(w0);
}else{
cdata_t *w0 = FFTS_MALLOC(n/8 * sizeof(cdata_t), 32);
cdata_t *w1 = FFTS_MALLOC(n/8 * sizeof(cdata_t), 32);
cdata_t *w2 = FFTS_MALLOC(n/8 * sizeof(cdata_t), 32);
size_t j;
for(j=0;j<n/8;j++) {
w0[j][0] = W_re(n,j*2);
w0[j][1] = W_im(n,j*2);
w1[j][0] = W_re(n,j);
w1[j][1] = W_im(n,j);
w2[j][0] = W_re(n,j + (n/8));
w2[j][1] = W_im(n,j + (n/8));
}
float *fw0 = (float *)w0;
float *fw1 = (float *)w1;
float *fw2 = (float *)w2;
#ifdef __arm__
//w = FFTS_MALLOC(n/8 * 3 * sizeof(cdata_t), 32);
float *fw = (float *)w;
#ifdef HAVE_NEON
VS temp0, temp1, temp2;
for(j=0;j<n/8;j+=4) {
temp0 = VLD2(fw0 + j*2);
temp0.val[1] = VXOR(temp0.val[1], neg);
STORESPR(fw + j*2*3, temp0);
temp1 = VLD2(fw1 + j*2);
temp1.val[1] = VXOR(temp1.val[1], neg);
STORESPR(fw + j*2*3 + 8, temp1);
temp2 = VLD2(fw2 + j*2);
temp2.val[1] = VXOR(temp2.val[1], neg);
STORESPR(fw + j*2*3 + 16, temp2);
}
#else
for(j=0;j<n/8;j+=1) {
fw[j*6] = fw0[j*2];
fw[j*6+1] = (sign < 0) ? fw0[j*2+1] : -fw0[j*2+1];
fw[j*6+2] = fw1[j*2+0];
fw[j*6+3] = (sign < 0) ? fw1[j*2+1] : -fw1[j*2+1];
fw[j*6+4] = fw2[j*2+0];
fw[j*6+5] = (sign < 0) ? fw2[j*2+1] : -fw2[j*2+1];
}
#endif
w += n/8 * 3;
#else
//w = FFTS_MALLOC(n/8 * 3 * 2 * sizeof(cdata_t), 32);
float *fw = (float *)w;
V temp0, temp1, temp2, re, im;
for(j=0;j<n/8;j+=2) {
temp0 = VLD(fw0 + j*2);
re = VDUPRE(temp0);
im = VDUPIM(temp0);
im = VXOR(im, MULI_SIGN);
VST(fw + j*2*6 , re);
VST(fw + j*2*6+4, im);
temp1 = VLD(fw1 + j*2);
re = VDUPRE(temp1);
im = VDUPIM(temp1);
im = VXOR(im, MULI_SIGN);
VST(fw + j*2*6+8 , re);
VST(fw + j*2*6+12, im);
temp2 = VLD(fw2 + j*2);
re = VDUPRE(temp2);
im = VDUPIM(temp2);
im = VXOR(im, MULI_SIGN);
VST(fw + j*2*6+16, re);
VST(fw + j*2*6+20, im);
}
w += n/8 * 3 * 2;
#endif
FFTS_FREE(w0);
FFTS_FREE(w1);
FFTS_FREE(w2);
}
///p->ws[i] = w;
n *= 2;
}
float *tmp = (float *)p->ws;
if(sign < 0) {
p->oe_ws = (void *)(&w_data[4]);
p->ee_ws = (void *)(w_data);
p->eo_ws = (void *)(&w_data[4]);
}else{
p->oe_ws = (void *)(w_data + 12);
p->ee_ws = (void *)(w_data + 8);
p->eo_ws = (void *)(w_data + 12);
}
p->N = N;
p->lastlut = w;
p->n_luts = n_luts;
#ifdef DYNAMIC_DISABLED
if(sign < 0) {
if(N >= 32) p->transform = ffts_static_transform_f;
}else{
if(N >= 32) p->transform = ffts_static_transform_i;
}
FFTS_FREE(w0);
} else {
ffts_cpx_32f *w0 = (ffts_cpx_32f*) FFTS_MALLOC(n/8 * sizeof(ffts_cpx_32f), 32);
ffts_cpx_32f *w1 = (ffts_cpx_32f*) FFTS_MALLOC(n/8 * sizeof(ffts_cpx_32f), 32);
ffts_cpx_32f *w2 = (ffts_cpx_32f*) FFTS_MALLOC(n/8 * sizeof(ffts_cpx_32f), 32);
float *fw0 = (float*) w0;
float *fw1 = (float*) w1;
float *fw2 = (float*) w2;
float *fw = (float *)w;
for (j = 0; j < n/8; j++) {
w0[j][0] = tmp[2 * j * stride][0];
w0[j][1] = tmp[2 * j * stride][1];
w1[j][0] = tmp[j * stride][0];
w1[j][1] = tmp[j * stride][1];
w2[j][0] = tmp[(j + (n/8)) * stride][0];
w2[j][1] = tmp[(j + (n/8)) * stride][1];
}
#if defined(__arm__)
#ifdef HAVE_NEON
for (j = 0; j < n/8; j += 4) {
V4SF2 temp0, temp1, temp2;
temp0 = V4SF2_LD(fw0 + j*2);
temp0.val[1] = V4SF_XOR(temp0.val[1], neg);
V4SF2_STORE_SPR(fw + j*2*3, temp0);
temp1 = V4SF2_LD(fw1 + j*2);
temp1.val[1] = V4SF_XOR(temp1.val[1], neg);
V4SF2_STORE_SPR(fw + j*2*3 + 8, temp1);
temp2 = V4SF2_LD(fw2 + j*2);
temp2.val[1] = V4SF_XOR(temp2.val[1], neg);
V4SF2_STORE_SPR(fw + j*2*3 + 16, temp2);
}
#else
if(N>=32) ffts_generate_func_code(p, N, leafN, sign);
for (j = 0; j < n/8; j++) {
fw[j*6+0] = fw0[j*2+0];
fw[j*6+1] = (sign < 0) ? fw0[j*2+1] : -fw0[j*2+1];
fw[j*6+2] = fw1[j*2+0];
fw[j*6+3] = (sign < 0) ? fw1[j*2+1] : -fw1[j*2+1];
fw[j*6+4] = fw2[j*2+0];
fw[j*6+5] = (sign < 0) ? fw2[j*2+1] : -fw2[j*2+1];
}
#endif
w += n/8 * 3;
#else
for (j = 0; j < n/8; j += 2) {
V4SF temp0, temp1, temp2, re, im;
temp0 = V4SF_LD(fw0 + j*2);
re = V4SF_DUPLICATE_RE(temp0);
im = V4SF_DUPLICATE_IM(temp0);
im = V4SF_XOR(im, MULI_SIGN);
V4SF_ST(fw + j*2*6+0, re);
V4SF_ST(fw + j*2*6+4, im);
temp1 = V4SF_LD(fw1 + j*2);
re = V4SF_DUPLICATE_RE(temp1);
im = V4SF_DUPLICATE_IM(temp1);
im = V4SF_XOR(im, MULI_SIGN);
V4SF_ST(fw + j*2*6+8 , re);
V4SF_ST(fw + j*2*6+12, im);
temp2 = V4SF_LD(fw2 + j*2);
re = V4SF_DUPLICATE_RE(temp2);
im = V4SF_DUPLICATE_IM(temp2);
im = V4SF_XOR(im, MULI_SIGN);
V4SF_ST(fw + j*2*6+16, re);
V4SF_ST(fw + j*2*6+20, im);
}
w += n/8 * 3 * 2;
#endif
FFTS_FREE(w0);
FFTS_FREE(w1);
FFTS_FREE(w2);
}
n *= 2;
stride >>= 1;
}
#if defined(HAVE_NEON)
if (sign < 0) {
p->oe_ws = (void*)(w_data + 4);
p->ee_ws = (void*)(w_data);
p->eo_ws = (void*)(w_data + 4);
} else {
p->oe_ws = (void*)(w_data + 12);
p->ee_ws = (void*)(w_data + 8);
p->eo_ws = (void*)(w_data + 12);
}
#endif
FFTS_FREE(tmp);
return p;
p->lastlut = w;
p->n_luts = n_luts;
return 0;
cleanup:
return -1;
}
FFTS_API ffts_plan_t*
ffts_init_1d(size_t N, int sign)
{
const size_t leaf_N = 8;
ffts_plan_t *p;
if (N < 2 || (N & (N - 1)) != 0) {
LOG("FFT size must be a power of two\n");
return NULL;
}
p = calloc(1, sizeof(*p));
if (!p) {
return NULL;
}
p->destroy = ffts_free_1d;
p->N = N;
if (N >= 32) {
/* generate lookup tables */
if (ffts_generate_luts(p, N, leaf_N, sign)) {
goto cleanup;
}
p->offsets = ffts_init_offsets(N, leaf_N);
if (!p->offsets) {
goto cleanup;
}
p->is = ffts_init_is(N, leaf_N, 1);
if (!p->is) {
goto cleanup;
}
p->i0 = N/leaf_N/3 + 1;
p->i1 = p->i2 = N/leaf_N/3;
if ((N/leaf_N) % 3 > 1) {
p->i1++;
}
#if !defined(HAVE_VFP) || defined(DYNAMIC_DISABLED)
p->i0 /= 2;
p->i1 /= 2;
#endif
#ifdef DYNAMIC_DISABLED
if (sign < 0) {
p->transform = ffts_static_transform_f_32f;
} else {
p->transform = ffts_static_transform_i_32f;
}
#else
/* determinate transform size */
#if defined(__arm__)
if (N < 8192) {
p->transform_size = 8192;
} else {
p->transform_size = N;
}
#else
if (N < 2048) {
p->transform_size = 16384;
} else {
p->transform_size = 16384 + 2*N/8 * ffts_ctzl(N);
}
#endif
/* allocate code/function buffer */
p->transform_base = ffts_vmem_alloc(p->transform_size);
if (!p->transform_base) {
goto cleanup;
}
/* generate code */
p->transform = ffts_generate_func_code(p, N, leaf_N, sign);
if (!p->transform) {
goto cleanup;
}
/* enable execution with read access for the block */
if (ffts_allow_execute(p->transform_base, p->transform_size)) {
goto cleanup;
}
/* flush from the instruction cache */
if (ffts_flush_instruction_cache(p->transform_base, p->transform_size)) {
goto cleanup;
}
#endif
} else {
switch (N) {
case 2:
p->transform = &ffts_small_2_32f;
break;
case 4:
if (sign == -1) {
p->transform = &ffts_small_forward4_32f;
} else if (sign == 1) {
p->transform = &ffts_small_backward4_32f;
}
break;
case 8:
if (sign == -1) {
p->transform = &ffts_small_forward8_32f;
} else if (sign == 1) {
p->transform = &ffts_small_backward8_32f;
}
break;
case 16:
default:
if (sign == -1) {
p->transform = &ffts_small_forward16_32f;
} else {
p->transform = &ffts_small_backward16_32f;
}
break;
}
}
return p;
cleanup:
ffts_free_1d(p);
return NULL;
}

@ -1,177 +0,0 @@
/*
This file is part of FFTS -- The Fastest Fourier Transform in the South
Copyright (c) 2012, Anthony M. Blake <amb@anthonix.com>
Copyright (c) 2012, The University of Waikato
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of the organization nor the
names of its contributors may be used to endorse or promote products
derived from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
DISCLAIMED. IN NO EVENT SHALL ANTHONY M. BLAKE BE LIABLE FOR ANY
DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef __CP_SSE_H__
#define __CP_SSE_H__
#include "config.h"
#include <stdio.h>
#include <stdlib.h>
#include <math.h>
#include <stddef.h>
#include <stdint.h>
//#include <stdalign.h>
//#include "codegen.h"
#include "types.h"
#define PI 3.1415926535897932384626433832795028841971693993751058209
static const __attribute__ ((aligned(64))) float w_data[16] = {
0.70710678118654757273731092936941, 0.70710678118654746171500846685376,
-0.70710678118654757273731092936941, -0.70710678118654746171500846685376,
1.0f, 0.70710678118654757273731092936941f,
-0.0f, -0.70710678118654746171500846685376,
0.70710678118654757273731092936941, 0.70710678118654746171500846685376,
0.70710678118654757273731092936941, 0.70710678118654746171500846685376,
1.0f, 0.70710678118654757273731092936941f,
0.0f, 0.70710678118654746171500846685376
};
__INLINE float W_re(float N, float k) { return cos(-2.0f * PI * k / N); }
__INLINE float W_im(float N, float k) { return sin(-2.0f * PI * k / N); }
typedef size_t transform_index_t;
//typedef void (*transform_func_t)(float *data, size_t N, float *LUT);
typedef void (*transform_func_t)(float *data, size_t N, float *LUT);
typedef struct _ffts_plan_t ffts_plan_t;
/**
* Contains all the Information need to perform FFT
*
*
* DO NOT CHANGE THE ORDER OF MEMBERS
* ASSEMBLY CODE USES HARD CODED OFFSETS TO REFERENCE
* SOME OF THESE VARIABES!!
*/
struct _ffts_plan_t {
/**
*
*/
ptrdiff_t *offsets;
#ifdef DYNAMIC_DISABLED
/**
* Twiddle factors
*/
void *ws;
/**
* ee - 2 size x size8
* oo - 2 x size4 in parallel
* oe -
*/
void *oe_ws, *eo_ws, *ee_ws;
#else
void __attribute__((aligned(32))) *ws;
void __attribute__((aligned(32))) *oe_ws, *eo_ws, *ee_ws;
#endif
/**
* Pointer into an array of precomputed indexes for the input data array
*/
ptrdiff_t *is;
/**
* Twiddle Factor Indexes
*/
size_t *ws_is;
/**
* Size of the loops for the base cases
*/
size_t i0, i1, n_luts;
/**
* Size fo the Transform
*/
size_t N;
void *lastlut;
/**
* Used in multidimensional Code ??
*/
transform_index_t *transforms;
//transform_func_t transform;
/**
* Pointer to the dynamically generated function
* that will execute the FFT
*/
void (*transform)(ffts_plan_t * , const void * , void * );
/**
* Pointer to the base memory address of
* of the transform function
*/
void *transform_base;
/**
* Size of the memory block contain the
* generated code
*/
size_t transform_size;
/**
* Points to the cosnant variables used by
* the Assembly Code
*/
void *constants;
// multi-dimensional stuff:
struct _ffts_plan_t **plans;
int rank;
size_t *Ns, *Ms;
void *buf;
void *transpose_buf;
/**
* Pointer to the destroy function
* to clean up the plan after use
* (differs for real and multi dimension transforms
*/
void (*destroy)(ffts_plan_t *);
/**
* Coefficiants for the real valued transforms
*/
float *A, *B;
size_t i2;
};
void ffts_free(ffts_plan_t *);
ffts_plan_t *ffts_init_1d(size_t N, int sign);
void ffts_execute(ffts_plan_t *, const void *, void *);
#endif

@ -0,0 +1,111 @@
/*
This file is part of FFTS -- The Fastest Fourier Transform in the South
Copyright (c) 2012, Anthony M. Blake <amb@anthonix.com>
Copyright (c) 2012, The University of Waikato
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of the organization nor the
names of its contributors may be used to endorse or promote products
derived from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
DISCLAIMED. IN NO EVENT SHALL ANTHONY M. BLAKE BE LIABLE FOR ANY
DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef FFTS_ATTRIBUTES_H
#define FFTS_ATTRIBUTES_H
#if defined (_MSC_VER) && (_MSC_VER >= 1020)
#pragma once
#endif
/* Macro definitions for various function/variable attributes */
#ifdef __GNUC__
#define GCC_VERSION_AT_LEAST(x,y) \
(__GNUC__ > x || __GNUC__ == x && __GNUC_MINOR__ >= y)
#else
#define GCC_VERSION_AT_LEAST(x,y) 0
#endif
#ifdef __GNUC__
#define FFTS_ALIGN(x) __attribute__((aligned(x)))
#elif defined(_MSC_VER)
#define FFTS_ALIGN(x) __declspec(align(x))
#else
#define FFTS_ALIGN(x)
#endif
#if GCC_VERSION_AT_LEAST(3,1)
#define FFTS_ALWAYS_INLINE __attribute__((always_inline)) inline
#elif defined(_MSC_VER)
#define FFTS_ALWAYS_INLINE __forceinline
#else
#define FFTS_ALWAYS_INLINE inline
#endif
#if defined(_MSC_VER)
#define FFTS_INLINE __inline
#else
#define FFTS_INLINE inline
#endif
#if defined(__GNUC__)
#define FFTS_RESTRICT __restrict
#elif defined(_MSC_VER)
#define FFTS_RESTRICT __restrict
#else
#define FFTS_RESTRICT
#endif
#if GCC_VERSION_AT_LEAST(4,5)
#define FFTS_ASSUME(cond) do { if (!(cond)) __builtin_unreachable(); } while (0)
#elif defined(_MSC_VER)
#define FFTS_ASSUME(cond) __assume(cond)
#else
#define FFTS_ASSUME(cond)
#endif
#if GCC_VERSION_AT_LEAST(4,7)
#define FFTS_ASSUME_ALIGNED_16(x) __builtin_assume_aligned(x, 16)
#else
#define FFTS_ASSUME_ALIGNED_16(x) x
#endif
#if GCC_VERSION_AT_LEAST(4,7)
#define FFTS_ASSUME_ALIGNED_32(x) __builtin_assume_aligned(x, 32)
#else
#define FFTS_ASSUME_ALIGNED_32(x) x
#endif
#if defined(__GNUC__)
#define FFTS_LIKELY(cond) __builtin_expect(!!(cond), 1)
#else
#define FFTS_LIKELY(cond) cond
#endif
#if defined(__GNUC__)
#define FFTS_UNLIKELY(cond) __builtin_expect(!!(cond), 0)
#else
#define FFTS_UNLIKELY(cond) cond
#endif
#endif /* FFTS_ATTRIBUTES_H */

@ -0,0 +1,230 @@
/*
This file is part of FFTS -- The Fastest Fourier Transform in the South
Copyright (c) 2015, Jukka Ojanen <jukka.ojanen@kolumbus.fi>
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of the organization nor the
names of its contributors may be used to endorse or promote products
derived from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
DISCLAIMED. IN NO EVENT SHALL ANTHONY M. BLAKE BE LIABLE FOR ANY
DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef FFTS_DD_H
#define FFTS_DD_H
#if defined (_MSC_VER) && (_MSC_VER >= 1020)
#pragma once
#endif
#include "ffts_attributes.h"
#if HAVE_SSE2
#include <emmintrin.h>
#endif
/* double-double number */
struct ffts_dd_t
{
double hi;
double lo;
};
#if HAVE_SSE2
/* double-double vector */
struct ffts_dd2_t {
__m128d hi;
__m128d lo;
};
#endif
static FFTS_INLINE struct ffts_dd_t
ffts_dd_add_dd_unnormalized(const struct ffts_dd_t a,
const struct ffts_dd_t b);
static FFTS_INLINE struct ffts_dd_t
ffts_dd_mul_dd_unnormalized(const struct ffts_dd_t a,
const struct ffts_dd_t b);
static FFTS_INLINE struct ffts_dd_t
ffts_dd_split(double a);
/* aka quick-two-sum */
static FFTS_INLINE struct ffts_dd_t
ffts_dd_add(double a, double b)
{
struct ffts_dd_t dd;
dd.hi = a + b;
dd.lo = b - (dd.hi - a);
return dd;
}
static FFTS_INLINE struct ffts_dd_t
ffts_dd_add_dd(const struct ffts_dd_t a,
const struct ffts_dd_t b)
{
struct ffts_dd_t t1 = ffts_dd_add_dd_unnormalized(a, b);
return ffts_dd_add(t1.hi, t1.lo);
}
static FFTS_INLINE struct ffts_dd_t
ffts_dd_add_dd_unnormalized(const struct ffts_dd_t a,
const struct ffts_dd_t b)
{
struct ffts_dd_t dd;
double e1;
dd.hi = a.hi + b.hi;
e1 = dd.hi - a.hi;
dd.lo = ((a.hi - (dd.hi - e1)) + (b.hi - e1)) + (a.lo + b.lo);
return dd;
}
static FFTS_INLINE struct ffts_dd_t
ffts_dd_mul(const double a, const double b)
{
struct ffts_dd_t dd;
struct ffts_dd_t t1 = ffts_dd_split(a);
struct ffts_dd_t t2 = ffts_dd_split(b);
dd.hi = a * b;
dd.lo = (t1.hi * t2.hi - dd.hi);
dd.lo += (t1.hi * t2.lo + t1.lo * t2.hi);
dd.lo += t1.lo * t2.lo;
return dd;
}
static FFTS_INLINE struct ffts_dd_t
ffts_dd_mul_dd(const struct ffts_dd_t a,
const struct ffts_dd_t b)
{
struct ffts_dd_t dd = ffts_dd_mul_dd_unnormalized(a, b);
return ffts_dd_add(dd.hi, dd.lo);
}
static FFTS_INLINE struct ffts_dd_t
ffts_dd_mul_dd_unnormalized(const struct ffts_dd_t a,
const struct ffts_dd_t b)
{
struct ffts_dd_t dd = ffts_dd_mul(a.hi, b.hi);
dd.lo += (a.hi * b.lo + a.lo * b.hi);
return dd;
}
static FFTS_INLINE struct ffts_dd_t
ffts_dd_split(double a)
{
/* 2^27+1 = 134217729 */
struct ffts_dd_t dd;
double t = 134217729.0 * a;
dd.hi = t - (t - a);
dd.lo = a - dd.hi;
return dd;
}
#if HAVE_SSE2
static FFTS_INLINE struct ffts_dd2_t
ffts_dd2_add_dd2_unnormalized(const struct ffts_dd2_t *const FFTS_RESTRICT a,
const struct ffts_dd2_t *const FFTS_RESTRICT b);
static FFTS_INLINE struct ffts_dd2_t
ffts_dd2_mul_dd2_unnormalized(const struct ffts_dd2_t *const FFTS_RESTRICT a,
const struct ffts_dd2_t *const FFTS_RESTRICT b);
static FFTS_INLINE struct ffts_dd2_t
ffts_dd2_split(__m128d a);
static FFTS_INLINE struct ffts_dd2_t
ffts_dd2_add(__m128d a, __m128d b)
{
struct ffts_dd2_t dd2;
dd2.hi = _mm_add_pd(a, b);
dd2.lo = _mm_sub_pd(b, _mm_sub_pd(dd2.hi, a));
return dd2;
}
static FFTS_INLINE struct ffts_dd2_t
ffts_dd2_add_dd2(const struct ffts_dd2_t *const FFTS_RESTRICT a,
const struct ffts_dd2_t *const FFTS_RESTRICT b)
{
struct ffts_dd2_t t1 = ffts_dd2_add_dd2_unnormalized(a, b);
return ffts_dd2_add(t1.hi, t1.lo);
}
static FFTS_INLINE struct ffts_dd2_t
ffts_dd2_add_dd2_unnormalized(const struct ffts_dd2_t *const FFTS_RESTRICT a,
const struct ffts_dd2_t *const FFTS_RESTRICT b)
{
struct ffts_dd2_t dd2;
__m128d e1;
dd2.hi = _mm_add_pd(a->hi, b->hi);
e1 = _mm_sub_pd(dd2.hi, a->hi);
dd2.lo = _mm_add_pd(_mm_add_pd(_mm_sub_pd(a->hi, _mm_sub_pd(dd2.hi, e1)),
_mm_sub_pd(b->hi, e1)), _mm_add_pd(a->lo, b->lo));
return dd2;
}
static FFTS_INLINE struct ffts_dd2_t
ffts_dd2_mul(const __m128d a, const __m128d b)
{
struct ffts_dd2_t dd2;
struct ffts_dd2_t t1 = ffts_dd2_split(a);
struct ffts_dd2_t t2 = ffts_dd2_split(b);
dd2.hi = _mm_mul_pd(a, b);
dd2.lo = _mm_add_pd(_mm_add_pd(_mm_sub_pd(
_mm_mul_pd(t1.hi, t2.hi), dd2.hi),
_mm_add_pd(_mm_mul_pd(t1.hi, t2.lo),
_mm_mul_pd(t1.lo, t2.hi))),
_mm_mul_pd(t1.lo, t2.lo));
return dd2;
}
static FFTS_INLINE struct ffts_dd2_t
ffts_dd2_mul_dd2(const struct ffts_dd2_t *const FFTS_RESTRICT a,
const struct ffts_dd2_t *const FFTS_RESTRICT b)
{
struct ffts_dd2_t dd2 = ffts_dd2_mul_dd2_unnormalized(a, b);
return ffts_dd2_add(dd2.hi, dd2.lo);
}
static FFTS_INLINE struct ffts_dd2_t
ffts_dd2_mul_dd2_unnormalized(const struct ffts_dd2_t *const FFTS_RESTRICT a,
const struct ffts_dd2_t *const FFTS_RESTRICT b)
{
struct ffts_dd2_t dd2 = ffts_dd2_mul(a->hi, b->hi);
dd2.lo = _mm_add_pd(dd2.lo, _mm_add_pd(
_mm_mul_pd(a->hi, b->lo), _mm_mul_pd(a->lo, b->hi)));
return dd2;
}
static FFTS_INLINE struct ffts_dd2_t
ffts_dd2_split(__m128d a)
{
/* 2^27+1 = 134217729 */
struct ffts_dd2_t dd2;
__m128d t = _mm_mul_pd(a, _mm_set1_pd(134217729.0));
dd2.hi = _mm_sub_pd(t, _mm_sub_pd(t, a));
dd2.lo = _mm_sub_pd(a, dd2.hi);
return dd2;
}
#endif /* HAVE_SSE2 */
#endif /* FFTS_DD_H */

@ -0,0 +1,215 @@
/*
This file is part of FFTS -- The Fastest Fourier Transform in the South
Copyright (c) 2012, Anthony M. Blake <amb@anthonix.com>
Copyright (c) 2012, The University of Waikato
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of the organization nor the
names of its contributors may be used to endorse or promote products
derived from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
DISCLAIMED. IN NO EVENT SHALL ANTHONY M. BLAKE BE LIABLE FOR ANY
DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef FFTS_INTERNAL_H
#define FFTS_INTERNAL_H
//#include "config.h"
#include "ffts_attributes.h"
#include "types.h"
#ifdef HAVE_MALLOC_H
#include <malloc.h>
#endif
#include <stddef.h>
#ifdef HAVE_STDINT_H
#include <stdint.h>
#endif
#ifdef HAVE_STDLIB_H
#include <stdlib.h>
#endif
#include <stdio.h>
#ifdef ENABLE_LOG
#ifdef __ANDROID__
#include <android/log.h>
#define LOG(s) __android_log_print(ANDROID_LOG_ERROR, "FFTS", s)
#else
#define LOG(s) fprintf(stderr, s)
#endif
#else
#define LOG(s)
#endif
struct _ffts_plan_t;
typedef void (*transform_func_t)(struct _ffts_plan_t *p, const void *in, void *out);
/**
* Contains all the Information need to perform FFT
*
*
* DO NOT CHANGE THE ORDER OF MEMBERS
* ASSEMBLY CODE USES HARD CODED OFFSETS TO REFERENCE
* SOME OF THESE VARIABES!!
*/
struct _ffts_plan_t {
/**
*
*/
ptrdiff_t *offsets;
#ifdef DYNAMIC_DISABLED
/**
* Twiddle factors
*/
void *ws;
/**
* ee - 2 size x size8
* oo - 2 x size4 in parallel
* oe -
*/
void *oe_ws, *eo_ws, *ee_ws;
#else
void FFTS_ALIGN(32) *ws;
void FFTS_ALIGN(32) *oe_ws, *eo_ws, *ee_ws;
#endif
/**
* Pointer into an array of precomputed indexes for the input data array
*/
ptrdiff_t *is;
/**
* Twiddle Factor Indexes
*/
size_t *ws_is;
/**
* Size of the loops for the base cases
*/
size_t i0, i1, n_luts;
/**
* Size fo the Transform
*/
size_t N;
void *lastlut;
#ifdef __arm__
size_t *temporary_fix_as_dynamic_code_assumes_fixed_offset;
#endif
/**
* Pointer to the dynamically generated function
* that will execute the FFT
*/
transform_func_t transform;
/**
* Pointer to the base memory address of
* of the transform function
*/
void *transform_base;
/**
* Size of the memory block contain the
* generated code
*/
size_t transform_size;
/**
* Points to the cosnant variables used by
* the Assembly Code
*/
void *constants;
// multi-dimensional stuff:
struct _ffts_plan_t **plans;
int rank;
size_t *Ns, *Ms;
void *buf;
void *transpose_buf;
/**
* Pointer to the destroy function
* to clean up the plan after use
* (differs for real and multi dimension transforms
*/
void (*destroy)(struct _ffts_plan_t *);
/**
* Coefficiants for the real valued transforms
*/
float *A, *B;
size_t i2;
};
static FFTS_INLINE void *ffts_aligned_malloc(size_t size)
{
#if defined(_WIN32)
return _aligned_malloc(size, 32);
#else
return valloc(size);
#endif
}
static FFTS_INLINE void ffts_aligned_free(void *p)
{
#if defined(_WIN32)
_aligned_free(p);
#else
free(p);
#endif
}
#if GCC_VERSION_AT_LEAST(3,3)
#define ffts_ctzl __builtin_ctzl
#elif defined(_MSC_VER)
#include <intrin.h>
#ifdef _M_X64
#pragma intrinsic(_BitScanForward64)
static __inline unsigned long ffts_ctzl(size_t N)
{
unsigned long count;
_BitScanForward64((unsigned long*) &count, N);
return count;
}
#else
#pragma intrinsic(_BitScanForward)
static __inline unsigned long ffts_ctzl(size_t N)
{
unsigned long count;
_BitScanForward((unsigned long*) &count, N);
return count;
}
#endif /* _WIN64 */
#endif /* _MSC_VER */
#endif /* FFTS_INTERNAL_H */

@ -1,282 +1,193 @@
/*
This file is part of FFTS -- The Fastest Fourier Transform in the South
Copyright (c) 2012, Anthony M. Blake <amb@anthonix.com>
Copyright (c) 2012, The University of Waikato
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of the organization nor the
names of its contributors may be used to endorse or promote products
derived from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
DISCLAIMED. IN NO EVENT SHALL ANTHONY M. BLAKE BE LIABLE FOR ANY
DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#include "ffts_nd.h"
This file is part of FFTS -- The Fastest Fourier Transform in the South
Copyright (c) 2016, Jukka Ojanen <jukka.ojanen@kolumbus.fi>
Copyright (c) 2012, Anthony M. Blake <amb@anthonix.com>
Copyright (c) 2012, The University of Waikato
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of the organization nor the
names of its contributors may be used to endorse or promote products
derived from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
DISCLAIMED. IN NO EVENT SHALL ANTHONY M. BLAKE BE LIABLE FOR ANY
DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#ifdef HAVE_NEON
#include "neon.h"
#endif
void ffts_free_nd(ffts_plan_t *p) {
int i;
for(i=0;i<p->rank;i++) {
ffts_plan_t *x = p->plans[i];
int k;
for(k=0;k<i;k++) {
if(p->Ms[i] == p->Ms[k]) x = NULL;
}
if(x) ffts_free(x);
}
free(p->Ns);
free(p->Ms);
free(p->plans);
free(p->buf);
free(p->transpose_buf);
free(p);
}
#define TSIZE 8
#include <string.h>
void ffts_transpose(uint64_t *in, uint64_t *out, int w, int h, uint64_t *buf) {
#ifdef HAVE_NEON
size_t i,j,k;
int linebytes = w*8;
for(j=0;j<h;j+=8) {
for(i=0;i<w;i+=8) {
neon_transpose_to_buf(in + j*w + i, buf, w);
uint64_t *p = out + i*h + j;
uint64_t *pbuf = buf;
uint64_t *ptemp;
__asm__ __volatile__(
"mov %[ptemp], %[p]\n\t"
"add %[p], %[p], %[w], lsl #3\n\t"
"vld1.32 {q8,q9}, [%[pbuf], :128]!\n\t"
"vld1.32 {q10,q11}, [%[pbuf], :128]!\n\t"
"vld1.32 {q12,q13}, [%[pbuf], :128]!\n\t"
"vld1.32 {q14,q15}, [%[pbuf], :128]!\n\t"
"vst1.32 {q8,q9}, [%[ptemp], :128]!\n\t"
"vst1.32 {q10,q11}, [%[ptemp], :128]!\n\t"
"mov %[ptemp], %[p]\n\t"
"add %[p], %[p], %[w], lsl #3\n\t"
"vst1.32 {q12,q13}, [%[ptemp], :128]!\n\t"
"vst1.32 {q14,q15}, [%[ptemp], :128]!\n\t"
"mov %[ptemp], %[p]\n\t"
"add %[p], %[p], %[w], lsl #3\n\t"
"vld1.32 {q8,q9}, [%[pbuf], :128]!\n\t"
"vld1.32 {q10,q11}, [%[pbuf], :128]!\n\t"
"vld1.32 {q12,q13}, [%[pbuf], :128]!\n\t"
"vld1.32 {q14,q15}, [%[pbuf], :128]!\n\t"
"vst1.32 {q8,q9}, [%[ptemp], :128]!\n\t"
"vst1.32 {q10,q11}, [%[ptemp], :128]!\n\t"
"mov %[ptemp], %[p]\n\t"
"add %[p], %[p], %[w], lsl #3\n\t"
"vst1.32 {q12,q13}, [%[ptemp], :128]!\n\t"
"vst1.32 {q14,q15}, [%[ptemp], :128]!\n\t"
"mov %[ptemp], %[p]\n\t"
"add %[p], %[p], %[w], lsl #3\n\t"
"vld1.32 {q8,q9}, [%[pbuf], :128]!\n\t"
"vld1.32 {q10,q11}, [%[pbuf], :128]!\n\t"
"vld1.32 {q12,q13}, [%[pbuf], :128]!\n\t"
"vld1.32 {q14,q15}, [%[pbuf], :128]!\n\t"
"vst1.32 {q8,q9}, [%[ptemp], :128]!\n\t"
"vst1.32 {q10,q11}, [%[ptemp], :128]!\n\t"
"mov %[ptemp], %[p]\n\t"
"add %[p], %[p], %[w], lsl #3\n\t"
"vst1.32 {q12,q13}, [%[ptemp], :128]!\n\t"
"vst1.32 {q14,q15}, [%[ptemp], :128]!\n\t"
"mov %[ptemp], %[p]\n\t"
"add %[p], %[p], %[w], lsl #3\n\t"
"vld1.32 {q8,q9}, [%[pbuf], :128]!\n\t"
"vld1.32 {q10,q11}, [%[pbuf], :128]!\n\t"
"vld1.32 {q12,q13}, [%[pbuf], :128]!\n\t"
"vld1.32 {q14,q15}, [%[pbuf], :128]!\n\t"
"vst1.32 {q8,q9}, [%[ptemp], :128]!\n\t"
"vst1.32 {q10,q11}, [%[ptemp], :128]!\n\t"
"mov %[ptemp], %[p]\n\t"
"vst1.32 {q12,q13}, [%[ptemp], :128]!\n\t"
"vst1.32 {q14,q15}, [%[ptemp], :128]!\n\t"
: [p] "+r" (p), [pbuf] "+r" (pbuf), [ptemp] "+r" (ptemp)
: [w] "r" (w)
: "memory", "q8", "q9", "q10", "q11"
);
// out[i*h + j] = in[j*w + i];
}
}
#else
#ifdef HAVE_SSE
uint64_t tmp[TSIZE*TSIZE] __attribute__((aligned(64)));
int tx, ty;
int x, y;
int tw = w / TSIZE;
int th = h / TSIZE;
for (ty=0;ty<th;ty++) {
for (tx=0;tx<tw;tx++) {
uint64_t *ip0 = in + w*TSIZE*ty + tx * TSIZE;
uint64_t *op0 = tmp;//out + h*TSIZE*tx + ty*TSIZE;
// Copy/transpose to tmp
for (y=0;y<TSIZE;y+=2) {
//for (x=0;x<TSIZE;x+=2) {
//op[x*TSIZE] = ip[x];
__m128d q0 = _mm_load_pd((double *)(ip0 + 0*w));
__m128d q1 = _mm_load_pd((double *)(ip0 + 1*w));
__m128d q2 = _mm_load_pd((double *)(ip0 + 2*w));
__m128d q3 = _mm_load_pd((double *)(ip0 + 3*w));
__m128d q4 = _mm_load_pd((double *)(ip0 + 4*w));
__m128d q5 = _mm_load_pd((double *)(ip0 + 5*w));
__m128d q6 = _mm_load_pd((double *)(ip0 + 6*w));
__m128d q7 = _mm_load_pd((double *)(ip0 + 7*w));
ip0 += 2;
__m128d t0 = _mm_shuffle_pd(q0, q1, _MM_SHUFFLE2(0, 0));
__m128d t1 = _mm_shuffle_pd(q0, q1, _MM_SHUFFLE2(1, 1));
__m128d t2 = _mm_shuffle_pd(q2, q3, _MM_SHUFFLE2(0, 0));
__m128d t3 = _mm_shuffle_pd(q2, q3, _MM_SHUFFLE2(1, 1));
__m128d t4 = _mm_shuffle_pd(q4, q5, _MM_SHUFFLE2(0, 0));
__m128d t5 = _mm_shuffle_pd(q4, q5, _MM_SHUFFLE2(1, 1));
__m128d t6 = _mm_shuffle_pd(q6, q7, _MM_SHUFFLE2(0, 0));
__m128d t7 = _mm_shuffle_pd(q6, q7, _MM_SHUFFLE2(1, 1));
//_mm_store_pd((double *)(op0 + y*h + x), t0);
//_mm_store_pd((double *)(op0 + y*h + x + h), t1);
_mm_store_pd((double *)(op0 + 0), t0);
_mm_store_pd((double *)(op0 + 0 + TSIZE), t1);
_mm_store_pd((double *)(op0 + 2 ), t2);
_mm_store_pd((double *)(op0 + 2 + TSIZE), t3);
_mm_store_pd((double *)(op0 + 4 ), t4);
_mm_store_pd((double *)(op0 + 4 + TSIZE), t5);
_mm_store_pd((double *)(op0 + 6 ), t6);
_mm_store_pd((double *)(op0 + 6 + TSIZE), t7);
//}
op0 += 2*TSIZE;
}
op0 = out + h*tx*TSIZE + ty*TSIZE;
ip0 = tmp;
for (y=0;y<TSIZE;y+=1) {
// memcpy(op0, ip0, TSIZE * sizeof(*ip0));
__m128d q0 = _mm_load_pd((double *)(ip0 + 0));
__m128d q1 = _mm_load_pd((double *)(ip0 + 2));
__m128d q2 = _mm_load_pd((double *)(ip0 + 4));
__m128d q3 = _mm_load_pd((double *)(ip0 + 6));
_mm_store_pd((double *)(op0 + 0), q0);
_mm_store_pd((double *)(op0 + 2), q1);
_mm_store_pd((double *)(op0 + 4), q2);
_mm_store_pd((double *)(op0 + 6), q3);
op0 += h;
ip0 += TSIZE;
}
}
}
/*
size_t i,j;
for(i=0;i<w;i+=2) {
for(j=0;j<h;j+=2) {
// out[i*h + j] = in[j*w + i];
__m128d q0 = _mm_load_pd((double *)(in + j*w + i));
__m128d q1 = _mm_load_pd((double *)(in + j*w + i + w));
__m128d t0 = _mm_shuffle_pd(q0, q1, _MM_SHUFFLE2(0, 0));
__m128d t1 = _mm_shuffle_pd(q0, q1, _MM_SHUFFLE2(1, 1));
_mm_store_pd((double *)(out + i*h + j), t0);
_mm_store_pd((double *)(out + i*h + j + h), t1);
}
}
*/
#endif
#endif
}
void ffts_execute_nd(ffts_plan_t *p, const void * in, void * out) {
uint64_t *din = (uint64_t *)in;
uint64_t *buf = p->buf;
uint64_t *dout = (uint64_t *)out;
size_t i,j;
for(i=0;i<p->Ns[0];i++) {
p->plans[0]->transform(p->plans[0], din + (i * p->Ms[0]), buf + (i * p->Ms[0]));
}
ffts_transpose(buf, dout, p->Ms[0], p->Ns[0], p->transpose_buf);
for(i=1;i<p->rank;i++) {
for(j=0;j<p->Ns[i];j++) {
p->plans[i]->transform(p->plans[i], dout + (j * p->Ms[i]), buf + (j * p->Ms[i]));
}
ffts_transpose(buf, dout, p->Ms[i], p->Ns[i], p->transpose_buf);
}
#include "ffts_nd.h"
#include "ffts_internal.h"
#include "ffts_transpose.h"
static void
ffts_free_nd(ffts_plan_t *p)
{
if (p->plans) {
int i, j;
for (i = 0; i < p->rank; i++) {
ffts_plan_t *plan = p->plans[i];
if (plan) {
for (j = 0; j < i; j++) {
if (p->Ns[i] == p->Ns[j]) {
plan = NULL;
break;
}
}
if (plan) {
ffts_free(plan);
}
}
}
free(p->plans);
}
if (p->Ns) {
free(p->Ns);
}
if (p->Ms) {
free(p->Ms);
}
if (p->buf) {
ffts_aligned_free(p->buf);
}
free(p);
}
ffts_plan_t *ffts_init_nd(int rank, size_t *Ns, int sign) {
size_t vol = 1;
static void
ffts_execute_nd(ffts_plan_t *p, const void *in, void *out)
{
uint64_t *din = (uint64_t*) in;
uint64_t *buf = p->buf;
uint64_t *dout = (uint64_t*) out;
ffts_plan_t *p = malloc(sizeof(ffts_plan_t));
ffts_plan_t *plan;
int i;
size_t j;
p->transform = &ffts_execute_nd;
p->destroy = &ffts_free_nd;
plan = p->plans[0];
for (j = 0; j < p->Ms[0]; j++) {
plan->transform(plan, din + (j * p->Ns[0]), buf + (j * p->Ns[0]));
}
p->rank = rank;
p->Ns = malloc(sizeof(size_t) * rank);
p->Ms = malloc(sizeof(size_t) * rank);
p->plans = malloc(sizeof(ffts_plan_t **) * rank);
int i;
for(i=0;i<rank;i++) {
p->Ns[i] = Ns[i];
vol *= Ns[i];
}
p->buf = valloc(sizeof(float) * 2 * vol);
ffts_transpose(buf, dout, p->Ns[0], p->Ms[0]);
for(i=0;i<rank;i++) {
p->Ms[i] = vol / p->Ns[i];
for (i = 1; i < p->rank; i++) {
plan = p->plans[i];
p->plans[i] = NULL;
int k;
for(k=0;k<i;k++) {
if(p->Ms[k] == p->Ms[i])
p->plans[i] = p->plans[k];
}
for (j = 0; j < p->Ms[i]; j++) {
plan->transform(plan, dout + (j * p->Ns[i]), buf + (j * p->Ns[i]));
}
if(!p->plans[i]) p->plans[i] = ffts_init_1d(p->Ms[i], sign);
}
ffts_transpose(buf, dout, p->Ns[i], p->Ms[i]);
}
}
p->transpose_buf = valloc(sizeof(float) * 2 * 8 * 8);
return p;
FFTS_API ffts_plan_t*
ffts_init_nd(int rank, size_t *Ns, int sign)
{
ffts_plan_t *p;
size_t vol = 1;
int i, j;
if (!Ns) {
return NULL;
}
if (rank == 1) {
return ffts_init_1d(Ns[0], sign);
}
p = calloc(1, sizeof(*p));
if (!p) {
return NULL;
}
p->transform = &ffts_execute_nd;
p->destroy = &ffts_free_nd;
p->rank = rank;
p->Ms = malloc(rank * sizeof(*p->Ms));
if (!p->Ms) {
goto cleanup;
}
p->Ns = malloc(rank * sizeof(*p->Ns));
if (!p->Ns) {
goto cleanup;
}
/* reverse the order */
for (i = 0; i < rank; i++) {
size_t N = Ns[rank - i - 1];
p->Ns[i] = N;
vol *= N;
}
p->buf = ffts_aligned_malloc(2 * vol * sizeof(float));
if (!p->buf) {
goto cleanup;
}
p->plans = calloc(rank, sizeof(*p->plans));
if (!p->plans) {
goto cleanup;
}
for (i = 0; i < rank; i++) {
p->Ms[i] = vol / p->Ns[i];
for (j = 0; j < i; j++) {
if (p->Ns[i] == p->Ns[j]) {
p->plans[i] = p->plans[j];
break;
}
}
if (!p->plans[i]) {
p->plans[i] = ffts_init_1d(p->Ns[i], sign);
if (!p->plans) {
goto cleanup;
}
}
}
return p;
cleanup:
ffts_free_nd(p);
return NULL;
}
FFTS_API ffts_plan_t*
ffts_init_2d(size_t N1, size_t N2, int sign)
{
size_t Ns[2];
ffts_plan_t *ffts_init_2d(size_t N1, size_t N2, int sign) {
size_t Ns[2];
Ns[0] = N1;
Ns[1] = N2;
return ffts_init_nd(2, Ns, sign);
Ns[0] = N1; /* x */
Ns[1] = N2; /* y */
return ffts_init_nd(2, Ns, sign);
}

@ -1,58 +1,50 @@
/*
This file is part of FFTS -- The Fastest Fourier Transform in the South
Copyright (c) 2012, Anthony M. Blake <amb@anthonix.com>
Copyright (c) 2012, The University of Waikato
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of the organization nor the
names of its contributors may be used to endorse or promote products
derived from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
DISCLAIMED. IN NO EVENT SHALL ANTHONY M. BLAKE BE LIABLE FOR ANY
DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef __FFTS_ND_H__
#define __FFTS_ND_H__
This file is part of FFTS -- The Fastest Fourier Transform in the South
Copyright (c) 2012, Anthony M. Blake <amb@anthonix.com>
Copyright (c) 2012, The University of Waikato
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of the organization nor the
names of its contributors may be used to endorse or promote products
derived from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
DISCLAIMED. IN NO EVENT SHALL ANTHONY M. BLAKE BE LIABLE FOR ANY
DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#include <stdint.h>
#include <stddef.h>
#include <stdio.h>
*/
#include "ffts.h"
#ifndef FFTS_ND_H
#define FFTS_ND_H
#ifdef HAVE_NEON
#include <arm_neon.h>
#endif
#ifdef HAVE_SSE
#include <xmmintrin.h>
#if defined (_MSC_VER) && (_MSC_VER >= 1020)
#pragma once
#endif
void ffts_free_nd(ffts_plan_t *p);
void ffts_transpose(uint64_t *in, uint64_t *out, int w, int h, uint64_t *buf);
#include "ffts.h"
#include <stddef.h>
void ffts_execute_nd(ffts_plan_t *p, const void * in, void * out);
ffts_plan_t *ffts_init_nd(int rank, size_t *Ns, int sign);
ffts_plan_t *ffts_init_2d(size_t N1, size_t N2, int sign);
ffts_plan_t*
ffts_init_nd(int rank, size_t *Ns, int sign);
#endif
ffts_plan_t*
ffts_init_2d(size_t N1, size_t N2, int sign);
#endif /* FFTS_ND_H */

@ -1,226 +1,654 @@
/*
This file is part of FFTS -- The Fastest Fourier Transform in the South
Copyright (c) 2012, Anthony M. Blake <amb@anthonix.com>
Copyright (c) 2012, The University of Waikato
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of the organization nor the
names of its contributors may be used to endorse or promote products
derived from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
DISCLAIMED. IN NO EVENT SHALL ANTHONY M. BLAKE BE LIABLE FOR ANY
DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
This file is part of FFTS -- The Fastest Fourier Transform in the South
Copyright (c) 2012, Anthony M. Blake <amb@anthonix.com>
Copyright (c) 2012, The University of Waikato
Copyright (c) 2015, Jukka Ojanen <jukka.ojanen@kolumbus.fi>
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of the organization nor the
names of its contributors may be used to endorse or promote products
derived from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
DISCLAIMED. IN NO EVENT SHALL ANTHONY M. BLAKE BE LIABLE FOR ANY
DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#include "ffts_real.h"
#include "ffts_internal.h"
#include "ffts_trig.h"
#ifdef HAVE_NEON
#include <arm_neon.h>
#elif HAVE_SSE
#include <xmmintrin.h>
/* check if have SSE3 intrinsics */
#ifdef HAVE_PMMINTRIN_H
#include <pmmintrin.h>
#elif HAVE_INTRIN_H
#include <intrin.h>
#else
/* avoid using negative zero as some configurations have problems with those */
static const FFTS_ALIGN(16) unsigned int sign_mask_even[4] = {
0x80000000, 0, 0x80000000, 0
};
static const FFTS_ALIGN(16) unsigned int sign_mask_odd[4] = {
0, 0x80000000, 0, 0x80000000
};
#endif
#endif
static void
ffts_free_1d_real(ffts_plan_t *p)
{
if (p->B) {
ffts_aligned_free(p->B);
}
if (p->A) {
ffts_aligned_free(p->A);
}
void ffts_free_1d_real(ffts_plan_t *p) {
ffts_free(p->plans[0]);
free(p->A);
free(p->B);
free(p->plans);
free(p->buf);
free(p);
if (p->buf) {
ffts_aligned_free(p->buf);
}
if (p->plans[0]) {
ffts_free(p->plans[0]);
}
free(p);
}
void ffts_execute_1d_real(ffts_plan_t *p, const void *vin, void *vout) {
float *out = (float *)vout;
float *buf = (float *)p->buf;
float *A = p->A;
float *B = p->B;
static void
ffts_execute_1d_real(ffts_plan_t *p, const void *input, void *output)
{
float *const FFTS_RESTRICT out =
(float *const FFTS_RESTRICT) FFTS_ASSUME_ALIGNED_16(output);
float *const FFTS_RESTRICT buf =
(float *const FFTS_RESTRICT) FFTS_ASSUME_ALIGNED_32(p->buf);
const float *const FFTS_RESTRICT A =
(const float *const FFTS_RESTRICT) FFTS_ASSUME_ALIGNED_32(p->A);
const float *const FFTS_RESTRICT B =
(const float *const FFTS_RESTRICT) FFTS_ASSUME_ALIGNED_32(p->B);
const int N = (const int) p->N;
int i;
#ifdef __ARM_NEON__
float *p_buf0 = buf;
float *p_buf1 = buf + N - 2;
float *p_out = out;
#endif
p->plans[0]->transform(p->plans[0], vin, buf);
/* we know this */
FFTS_ASSUME(N/2 > 0);
size_t N = p->N;
buf[N] = buf[0];
buf[N+1] = buf[1];
p->plans[0]->transform(p->plans[0], input, buf);
float *p_buf0 = buf;
float *p_buf1 = buf + N - 2;
float *p_out = out;
#ifndef HAVE_SSE
buf[N + 0] = buf[0];
buf[N + 1] = buf[1];
#endif
size_t i;
#ifdef __ARM_NEON__
for(i=0;i<N/2;i+=2) {
__asm__ __volatile__ ("vld1.32 {q8}, [%[pa], :128]!\n\t"
"vld1.32 {q9}, [%[pb], :128]!\n\t"
"vld1.32 {q10}, [%[buf0], :128]!\n\t"
"vld1.32 {q11}, [%[buf1], :64]\n\t"
"sub %[buf1], %[buf1], #16\n\t"
"vdup.32 d26, d16[1]\n\t"
"vdup.32 d27, d17[1]\n\t"
"vdup.32 d24, d16[0]\n\t"
"vdup.32 d25, d17[0]\n\t"
"vdup.32 d30, d23[1]\n\t"
"vdup.32 d31, d22[1]\n\t"
"vdup.32 d28, d23[0]\n\t"
"vdup.32 d29, d22[0]\n\t"
"vmul.f32 q13, q13, q10\n\t"
"vmul.f32 q15, q15, q9\n\t"
"vmul.f32 q12, q12, q10\n\t"
"vmul.f32 q14, q14, q9\n\t"
"vrev64.f32 q13, q13\n\t"
"vrev64.f32 q15, q15\n\t"
"vtrn.32 d26, d27\n\t"
"vtrn.32 d30, d31\n\t"
"vneg.f32 d26, d26\n\t"
"vneg.f32 d31, d31\n\t"
"vtrn.32 d26, d27\n\t"
"vtrn.32 d30, d31\n\t"
"vadd.f32 q12, q12, q14\n\t"
"vadd.f32 q13, q13, q15\n\t"
"vadd.f32 q12, q12, q13\n\t"
"vst1.32 {q12}, [%[pout], :128]!\n\t"
: [pa] "+r" (A), [pb] "+r" (B), [buf0] "+r" (p_buf0), [buf1] "+r" (p_buf1),
[pout] "+r" (p_out)
:
: "memory", "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15"
);
for (i = 0; i < N; i += 4) {
__asm__ __volatile__ (
"vld1.32 {q8}, [%[pa]]!\n\t"
"vld1.32 {q9}, [%[pb]]!\n\t"
"vld1.32 {q10}, [%[buf0]]!\n\t"
"vld1.32 {q11}, [%[buf1]]\n\t"
"sub %[buf1], %[buf1], #16\n\t"
"vdup.32 d26, d16[1]\n\t"
"vdup.32 d27, d17[1]\n\t"
"vdup.32 d24, d16[0]\n\t"
"vdup.32 d25, d17[0]\n\t"
"vdup.32 d30, d23[1]\n\t"
"vdup.32 d31, d22[1]\n\t"
"vdup.32 d28, d23[0]\n\t"
"vdup.32 d29, d22[0]\n\t"
"vmul.f32 q13, q13, q10\n\t"
"vmul.f32 q15, q15, q9\n\t"
"vmul.f32 q12, q12, q10\n\t"
"vmul.f32 q14, q14, q9\n\t"
"vrev64.f32 q13, q13\n\t"
"vrev64.f32 q15, q15\n\t"
"vtrn.32 d26, d27\n\t"
"vtrn.32 d30, d31\n\t"
"vneg.f32 d26, d26\n\t"
"vneg.f32 d31, d31\n\t"
"vtrn.32 d26, d27\n\t"
"vtrn.32 d30, d31\n\t"
"vadd.f32 q12, q12, q14\n\t"
"vadd.f32 q13, q13, q15\n\t"
"vadd.f32 q12, q12, q13\n\t"
"vst1.32 {q12}, [%[pout]]!\n\t"
: [buf0] "+r" (p_buf0), [buf1] "+r" (p_buf1), [pout] "+r" (p_out)
: [pa] "r" (A), [pb] "r" (B)
: "memory", "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15"
);
}
#elif HAVE_SSE3
if (FFTS_UNLIKELY(N <= 8)) {
__m128 t0 = _mm_load_ps(buf);
__m128 t1 = _mm_load_ps(buf + N - 4);
__m128 t2 = _mm_load_ps(A);
__m128 t3 = _mm_load_ps(B);
_mm_store_ps(out, _mm_add_ps(_mm_addsub_ps(
_mm_mul_ps(t0, _mm_moveldup_ps(t2)),
_mm_mul_ps(_mm_shuffle_ps(t0, t0, _MM_SHUFFLE(2,3,0,1)),
_mm_movehdup_ps(t2))), _mm_addsub_ps(
_mm_mul_ps(_mm_shuffle_ps(t0, t1, _MM_SHUFFLE(3,3,1,1)),
_mm_shuffle_ps(t3, t3, _MM_SHUFFLE(2,3,0,1))),
_mm_mul_ps(_mm_shuffle_ps(t0, t1, _MM_SHUFFLE(2,2,0,0)), t3))));
if (N == 8) {
t2 = _mm_load_ps(A + 4);
t3 = _mm_load_ps(B + 4);
_mm_store_ps(out + 4, _mm_add_ps(_mm_addsub_ps(
_mm_mul_ps(t1, _mm_moveldup_ps(t2)),
_mm_mul_ps(_mm_shuffle_ps(t1, t1, _MM_SHUFFLE(2,3,0,1)),
_mm_movehdup_ps(t2))), _mm_addsub_ps(
_mm_mul_ps(_mm_shuffle_ps(t1, t0, _MM_SHUFFLE(3,3,1,1)),
_mm_shuffle_ps(t3, t3, _MM_SHUFFLE(2,3,0,1))),
_mm_mul_ps(_mm_shuffle_ps(t1, t0, _MM_SHUFFLE(2,2,0,0)), t3))));
}
} else {
__m128 t0 = _mm_load_ps(buf);
for (i = 0; i < N; i += 16) {
__m128 t1 = _mm_load_ps(buf + i);
__m128 t2 = _mm_load_ps(buf + N - i - 4);
__m128 t3 = _mm_load_ps(A + i);
__m128 t4 = _mm_load_ps(B + i);
_mm_store_ps(out + i, _mm_add_ps(_mm_addsub_ps(
_mm_mul_ps(t1, _mm_moveldup_ps(t3)),
_mm_mul_ps(_mm_shuffle_ps(t1, t1, _MM_SHUFFLE(2,3,0,1)),
_mm_movehdup_ps(t3))), _mm_addsub_ps(
_mm_mul_ps(_mm_shuffle_ps(t0, t2, _MM_SHUFFLE(3,3,1,1)),
_mm_shuffle_ps(t4, t4, _MM_SHUFFLE(2,3,0,1))),
_mm_mul_ps(_mm_shuffle_ps(t0, t2, _MM_SHUFFLE(2,2,0,0)), t4))));
t0 = _mm_load_ps(buf + N - i - 8);
t1 = _mm_load_ps(buf + i + 4);
t3 = _mm_load_ps(A + i + 4);
t4 = _mm_load_ps(B + i + 4);
_mm_store_ps(out + i + 4, _mm_add_ps(_mm_addsub_ps(
_mm_mul_ps(t1, _mm_moveldup_ps(t3)),
_mm_mul_ps(_mm_shuffle_ps(t1, t1, _MM_SHUFFLE(2,3,0,1)),
_mm_movehdup_ps(t3))), _mm_addsub_ps(
_mm_mul_ps(_mm_shuffle_ps(t2, t0, _MM_SHUFFLE(3,3,1,1)),
_mm_shuffle_ps(t4, t4, _MM_SHUFFLE(2,3,0,1))),
_mm_mul_ps(_mm_shuffle_ps(t2, t0, _MM_SHUFFLE(2,2,0,0)), t4))));
t1 = _mm_load_ps(buf + i + 8);
t2 = _mm_load_ps(buf + N - i - 12);
t3 = _mm_load_ps(A + i + 8);
t4 = _mm_load_ps(B + i + 8);
_mm_store_ps(out + i + 8, _mm_add_ps(_mm_addsub_ps(
_mm_mul_ps(t1, _mm_moveldup_ps(t3)),
_mm_mul_ps(_mm_shuffle_ps(t1, t1, _MM_SHUFFLE(2,3,0,1)),
_mm_movehdup_ps(t3))), _mm_addsub_ps(
_mm_mul_ps(_mm_shuffle_ps(t0, t2, _MM_SHUFFLE(3,3,1,1)),
_mm_shuffle_ps(t4, t4, _MM_SHUFFLE(2,3,0,1))),
_mm_mul_ps(_mm_shuffle_ps(t0, t2, _MM_SHUFFLE(2,2,0,0)), t4))));
t0 = _mm_load_ps(buf + N - i - 16);
t1 = _mm_load_ps(buf + i + 12);
t3 = _mm_load_ps(A + i + 12);
t4 = _mm_load_ps(B + i + 12);
_mm_store_ps(out + i + 12, _mm_add_ps(_mm_addsub_ps(
_mm_mul_ps(t1, _mm_moveldup_ps(t3)),
_mm_mul_ps(_mm_shuffle_ps(t1, t1, _MM_SHUFFLE(2,3,0,1)),
_mm_movehdup_ps(t3))), _mm_addsub_ps(
_mm_mul_ps(_mm_shuffle_ps(t2, t0, _MM_SHUFFLE(3,3,1,1)),
_mm_shuffle_ps(t4, t4, _MM_SHUFFLE(2,3,0,1))),
_mm_mul_ps(_mm_shuffle_ps(t2, t0, _MM_SHUFFLE(2,2,0,0)), t4))));
}
}
#elif HAVE_SSE
if (FFTS_UNLIKELY(N <= 8)) {
__m128 c0 = _mm_load_ps((const float*) sign_mask_even);
__m128 t0 = _mm_load_ps(buf);
__m128 t1 = _mm_load_ps(buf + N - 4);
__m128 t2 = _mm_load_ps(A);
__m128 t3 = _mm_load_ps(B);
_mm_store_ps(out, _mm_add_ps(_mm_add_ps(_mm_add_ps(
_mm_mul_ps(t0, _mm_shuffle_ps(t2, t2, _MM_SHUFFLE(2,2,0,0))),
_mm_mul_ps(_mm_shuffle_ps(t0, t0, _MM_SHUFFLE(2,3,0,1)),
_mm_xor_ps(_mm_shuffle_ps(t2, t2, _MM_SHUFFLE(3,3,1,1)), c0))),
_mm_mul_ps(_mm_shuffle_ps(t0, t1, _MM_SHUFFLE(2,2,0,0)), t3)),
_mm_mul_ps(_mm_shuffle_ps(t0, t1, _MM_SHUFFLE(3,3,1,1)),
_mm_shuffle_ps(_mm_xor_ps(t3, c0), _mm_xor_ps(t3, c0),
_MM_SHUFFLE(2,3,0,1)))));
if (N == 8) {
t2 = _mm_load_ps(A + 4);
t3 = _mm_load_ps(B + 4);
_mm_store_ps(out + 4, _mm_add_ps(_mm_add_ps(_mm_add_ps(
_mm_mul_ps(t1, _mm_shuffle_ps(t2, t2, _MM_SHUFFLE(2,2,0,0))),
_mm_mul_ps(_mm_shuffle_ps(t1, t1, _MM_SHUFFLE(2,3,0,1)),
_mm_xor_ps(_mm_shuffle_ps(t2, t2, _MM_SHUFFLE(3,3,1,1)), c0))),
_mm_mul_ps(_mm_shuffle_ps(t1, t0, _MM_SHUFFLE(2,2,0,0)), t3)),
_mm_mul_ps(_mm_shuffle_ps(t1, t0, _MM_SHUFFLE(3,3,1,1)),
_mm_shuffle_ps(_mm_xor_ps(t3, c0), _mm_xor_ps(t3, c0),
_MM_SHUFFLE(2,3,0,1)))));
}
} else {
__m128 c0 = _mm_load_ps((const float*) sign_mask_even);
__m128 t0 = _mm_load_ps(buf);
for (i = 0; i < N; i += 16) {
__m128 t1 = _mm_load_ps(buf + i);
__m128 t2 = _mm_load_ps(buf + N - i - 4);
__m128 t3 = _mm_load_ps(A + i);
__m128 t4 = _mm_load_ps(B + i);
_mm_store_ps(out + i, _mm_add_ps(_mm_add_ps(_mm_add_ps(
_mm_mul_ps(t1, _mm_shuffle_ps(t3, t3, _MM_SHUFFLE(2,2,0,0))),
_mm_mul_ps(_mm_shuffle_ps(t1, t1, _MM_SHUFFLE(2,3,0,1)),
_mm_xor_ps(_mm_shuffle_ps(t3, t3, _MM_SHUFFLE(3,3,1,1)), c0))),
_mm_mul_ps(_mm_shuffle_ps(t0, t2, _MM_SHUFFLE(2,2,0,0)), t4)),
_mm_mul_ps(_mm_shuffle_ps(t0, t2, _MM_SHUFFLE(3,3,1,1)),
_mm_shuffle_ps(_mm_xor_ps(t4, c0), _mm_xor_ps(t4, c0),
_MM_SHUFFLE(2,3,0,1)))));
t0 = _mm_load_ps(buf + N - i - 8);
t1 = _mm_load_ps(buf + i + 4);
t3 = _mm_load_ps(A + i + 4);
t4 = _mm_load_ps(B + i + 4);
_mm_store_ps(out + i + 4, _mm_add_ps(_mm_add_ps(_mm_add_ps(
_mm_mul_ps(t1, _mm_shuffle_ps(t3, t3, _MM_SHUFFLE(2,2,0,0))),
_mm_mul_ps(_mm_shuffle_ps(t1, t1, _MM_SHUFFLE(2,3,0,1)),
_mm_xor_ps(_mm_shuffle_ps(t3, t3, _MM_SHUFFLE(3,3,1,1)), c0))),
_mm_mul_ps(_mm_shuffle_ps(t2, t0, _MM_SHUFFLE(2,2,0,0)), t4)),
_mm_mul_ps(_mm_shuffle_ps(t2, t0, _MM_SHUFFLE(3,3,1,1)),
_mm_shuffle_ps(_mm_xor_ps(t4, c0), _mm_xor_ps(t4, c0),
_MM_SHUFFLE(2,3,0,1)))));
t1 = _mm_load_ps(buf + i + 8);
t2 = _mm_load_ps(buf + N - i - 12);
t3 = _mm_load_ps(A + i + 8);
t4 = _mm_load_ps(B + i + 8);
_mm_store_ps(out + i + 8, _mm_add_ps(_mm_add_ps(_mm_add_ps(
_mm_mul_ps(t1, _mm_shuffle_ps(t3, t3, _MM_SHUFFLE(2,2,0,0))),
_mm_mul_ps(_mm_shuffle_ps(t1, t1, _MM_SHUFFLE(2,3,0,1)),
_mm_xor_ps(_mm_shuffle_ps(t3, t3, _MM_SHUFFLE(3,3,1,1)), c0))),
_mm_mul_ps(_mm_shuffle_ps(t0, t2, _MM_SHUFFLE(2,2,0,0)), t4)),
_mm_mul_ps(_mm_shuffle_ps(t0, t2, _MM_SHUFFLE(3,3,1,1)),
_mm_shuffle_ps(_mm_xor_ps(t4, c0), _mm_xor_ps(t4, c0),
_MM_SHUFFLE(2,3,0,1)))));
t0 = _mm_load_ps(buf + N - i - 16);
t1 = _mm_load_ps(buf + i + 12);
t3 = _mm_load_ps(A + i + 12);
t4 = _mm_load_ps(B + i + 12);
_mm_store_ps(out + i + 12, _mm_add_ps(_mm_add_ps(_mm_add_ps(
_mm_mul_ps(t1, _mm_shuffle_ps(t3, t3, _MM_SHUFFLE(2,2,0,0))),
_mm_mul_ps(_mm_shuffle_ps(t1, t1, _MM_SHUFFLE(2,3,0,1)),
_mm_xor_ps(_mm_shuffle_ps(t3, t3, _MM_SHUFFLE(3,3,1,1)), c0))),
_mm_mul_ps(_mm_shuffle_ps(t2, t0, _MM_SHUFFLE(2,2,0,0)), t4)),
_mm_mul_ps(_mm_shuffle_ps(t2, t0, _MM_SHUFFLE(3,3,1,1)),
_mm_shuffle_ps(_mm_xor_ps(t4, c0), _mm_xor_ps(t4, c0),
_MM_SHUFFLE(2,3,0,1)))));
}
}
#else
for(i=0;i<N/2;i++) {
out[2*i] = buf[2*i]*A[2*i] - buf[2*i+1]*A[2*i+1] + buf[N-2*i]*B[2*i] + buf[N-2*i+1]*B[2*i+1];
out[2*i+1] = buf[2*i+1]*A[2*i] + buf[2*i]*A[2*i+1] + buf[N-2*i]*B[2*i+1] - buf[N-2*i+1]*B[2*i];
// out[2*N-2*i] = out[2*i];
// out[2*N-2*i+1] = -out[2*i+1];
#endif
}
out[N] = buf[0] - buf[1];
out[N+1] = 0.0f;
for (i = 0; i < N/2; i++) {
out[2*i + 0] =
buf[ 2*i + 0] * A[2*i + 0] - buf[ 2*i + 1] * A[2*i + 1] +
buf[N - 2*i + 0] * B[2*i + 0] + buf[N - 2*i + 1] * B[2*i + 1];
out[2*i + 1] =
buf[ 2*i + 1] * A[2*i + 0] + buf[ 2*i + 0] * A[2*i + 1] +
buf[N - 2*i + 0] * B[2*i + 1] - buf[N - 2*i + 1] * B[2*i + 0];
}
#endif
out[N + 0] = buf[0] - buf[1];
out[N + 1] = 0.0f;
}
void ffts_execute_1d_real_inv(ffts_plan_t *p, const void *vin, void *vout) {
float *out = (float *)vout;
float *in = (float *)vin;
float *buf = (float *)p->buf;
float *A = p->A;
float *B = p->B;
size_t N = p->N;
float *p_buf0 = in;
float *p_buf1 = in + N - 2;
float *p_out = buf;
size_t i;
static void
ffts_execute_1d_real_inv(ffts_plan_t *p, const void *input, void *output)
{
float *const FFTS_RESTRICT in =
(float *const FFTS_RESTRICT) FFTS_ASSUME_ALIGNED_16(input);
float *const FFTS_RESTRICT buf =
(float *const FFTS_RESTRICT) FFTS_ASSUME_ALIGNED_32(p->buf);
const float *const FFTS_RESTRICT A =
(const float *const FFTS_RESTRICT) FFTS_ASSUME_ALIGNED_32(p->A);
const float *const FFTS_RESTRICT B =
(const float *const FFTS_RESTRICT) FFTS_ASSUME_ALIGNED_32(p->B);
const int N = (const int) p->N;
int i;
#ifdef __ARM_NEON__
float *p_buf0 = in;
float *p_buf1 = in + N - 2;
float *p_out = buf;
#endif
/* we know this */
FFTS_ASSUME(N/2 > 0);
#ifdef __ARM_NEON__
for(i=0;i<N/2;i+=2) {
__asm__ __volatile__ ("vld1.32 {q8}, [%[pa], :128]!\n\t"
"vld1.32 {q9}, [%[pb], :128]!\n\t"
"vld1.32 {q10}, [%[buf0], :128]!\n\t"
"vld1.32 {q11}, [%[buf1], :64]\n\t"
"sub %[buf1], %[buf1], #16\n\t"
"vdup.32 d26, d16[1]\n\t"
"vdup.32 d27, d17[1]\n\t"
"vdup.32 d24, d16[0]\n\t"
"vdup.32 d25, d17[0]\n\t"
"vdup.32 d30, d23[1]\n\t"
"vdup.32 d31, d22[1]\n\t"
"vdup.32 d28, d23[0]\n\t"
"vdup.32 d29, d22[0]\n\t"
"vmul.f32 q13, q13, q10\n\t"
"vmul.f32 q15, q15, q9\n\t"
"vmul.f32 q12, q12, q10\n\t"
"vmul.f32 q14, q14, q9\n\t"
"vrev64.f32 q13, q13\n\t"
"vrev64.f32 q15, q15\n\t"
"vtrn.32 d26, d27\n\t"
"vtrn.32 d28, d29\n\t"
"vneg.f32 d27, d27\n\t"
"vneg.f32 d29, d29\n\t"
"vtrn.32 d26, d27\n\t"
"vtrn.32 d28, d29\n\t"
"vadd.f32 q12, q12, q14\n\t"
"vsub.f32 q13, q13, q15\n\t"
"vadd.f32 q12, q12, q13\n\t"
"vst1.32 {q12}, [%[pout], :128]!\n\t"
: [pa] "+r" (A), [pb] "+r" (B), [buf0] "+r" (p_buf0), [buf1] "+r" (p_buf1),
[pout] "+r" (p_out)
:
: "memory", "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15"
);
for (i = 0; i < N/2; i += 2) {
__asm__ __volatile__ (
"vld1.32 {q8}, [%[pa]]!\n\t"
"vld1.32 {q9}, [%[pb]]!\n\t"
"vld1.32 {q10}, [%[buf0]]!\n\t"
"vld1.32 {q11}, [%[buf1]]\n\t"
"sub %[buf1], %[buf1], #16\n\t"
"vdup.32 d26, d16[1]\n\t"
"vdup.32 d27, d17[1]\n\t"
"vdup.32 d24, d16[0]\n\t"
"vdup.32 d25, d17[0]\n\t"
"vdup.32 d30, d23[1]\n\t"
"vdup.32 d31, d22[1]\n\t"
"vdup.32 d28, d23[0]\n\t"
"vdup.32 d29, d22[0]\n\t"
"vmul.f32 q13, q13, q10\n\t"
"vmul.f32 q15, q15, q9\n\t"
"vmul.f32 q12, q12, q10\n\t"
"vmul.f32 q14, q14, q9\n\t"
"vrev64.f32 q13, q13\n\t"
"vrev64.f32 q15, q15\n\t"
"vtrn.32 d26, d27\n\t"
"vtrn.32 d28, d29\n\t"
"vneg.f32 d27, d27\n\t"
"vneg.f32 d29, d29\n\t"
"vtrn.32 d26, d27\n\t"
"vtrn.32 d28, d29\n\t"
"vadd.f32 q12, q12, q14\n\t"
"vsub.f32 q13, q13, q15\n\t"
"vadd.f32 q12, q12, q13\n\t"
"vst1.32 {q12}, [%[pout]]!\n\t"
: [buf0] "+r" (p_buf0), [buf1] "+r" (p_buf1), [pout] "+r" (p_out)
: [pa] "r" (A), [pb] "r" (B)
: "memory", "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15"
);
}
#elif HAVE_SSE3
if (FFTS_UNLIKELY(N <= 8)) {
__m128 t0 = _mm_loadl_pi(_mm_setzero_ps(), (const __m64*) &in[N]);
__m128 t1 = _mm_load_ps(in);
__m128 t2 = _mm_load_ps(in + N - 4);
__m128 t3 = _mm_load_ps(A);
__m128 t4 = _mm_load_ps(B);
_mm_store_ps(buf, _mm_sub_ps(_mm_addsub_ps(
_mm_mul_ps(t1, _mm_moveldup_ps(t3)),
_mm_mul_ps(_mm_shuffle_ps(t1, t1, _MM_SHUFFLE(2,3,0,1)),
_mm_movehdup_ps(t3))), _mm_addsub_ps(
_mm_mul_ps(_mm_shuffle_ps(t0, t2, _MM_SHUFFLE(3,3,1,1)),
_mm_shuffle_ps(t4, t4, _MM_SHUFFLE(2,3,0,1))),
_mm_mul_ps(_mm_shuffle_ps(t0, t2, _MM_SHUFFLE(2,2,0,0)), t4))));
if (N == 8) {
t3 = _mm_load_ps(A + 4);
t4 = _mm_load_ps(B + 4);
_mm_store_ps(buf + 4, _mm_sub_ps(_mm_addsub_ps(
_mm_mul_ps(t2, _mm_moveldup_ps(t3)),
_mm_mul_ps(_mm_shuffle_ps(t2, t2, _MM_SHUFFLE(2,3,0,1)),
_mm_movehdup_ps(t3))), _mm_addsub_ps(
_mm_mul_ps(_mm_shuffle_ps(t2, t1, _MM_SHUFFLE(3,3,1,1)),
_mm_shuffle_ps(t4, t4, _MM_SHUFFLE(2,3,0,1))),
_mm_mul_ps(_mm_shuffle_ps(t2, t1, _MM_SHUFFLE(2,2,0,0)), t4))));
}
} else {
__m128 t0 = _mm_loadl_pi(_mm_setzero_ps(), (const __m64*) &in[N]);
for (i = 0; i < N; i += 16) {
__m128 t1 = _mm_load_ps(in + i);
__m128 t2 = _mm_load_ps(in + N - i - 4);
__m128 t3 = _mm_load_ps(A + i);
__m128 t4 = _mm_load_ps(B + i);
_mm_store_ps(buf + i, _mm_sub_ps(_mm_addsub_ps(
_mm_mul_ps(t1, _mm_moveldup_ps(t3)),
_mm_mul_ps(_mm_shuffle_ps(t1, t1, _MM_SHUFFLE(2,3,0,1)),
_mm_movehdup_ps(t3))), _mm_addsub_ps(
_mm_mul_ps(_mm_shuffle_ps(t0, t2, _MM_SHUFFLE(3,3,1,1)),
_mm_shuffle_ps(t4, t4, _MM_SHUFFLE(2,3,0,1))),
_mm_mul_ps(_mm_shuffle_ps(t0, t2, _MM_SHUFFLE(2,2,0,0)), t4))));
t0 = _mm_load_ps(in + N - i - 8);
t1 = _mm_load_ps(in + i + 4);
t3 = _mm_load_ps(A + i + 4);
t4 = _mm_load_ps(B + i + 4);
_mm_store_ps(buf + i + 4, _mm_sub_ps(_mm_addsub_ps(
_mm_mul_ps(t1, _mm_moveldup_ps(t3)),
_mm_mul_ps(_mm_shuffle_ps(t1, t1, _MM_SHUFFLE(2,3,0,1)),
_mm_movehdup_ps(t3))), _mm_addsub_ps(
_mm_mul_ps(_mm_shuffle_ps(t2, t0, _MM_SHUFFLE(3,3,1,1)),
_mm_shuffle_ps(t4, t4, _MM_SHUFFLE(2,3,0,1))),
_mm_mul_ps(_mm_shuffle_ps(t2, t0, _MM_SHUFFLE(2,2,0,0)), t4))));
t1 = _mm_load_ps(in + i + 8);
t2 = _mm_load_ps(in + N - i - 12);
t3 = _mm_load_ps(A + i + 8);
t4 = _mm_load_ps(B + i + 8);
_mm_store_ps(buf + i + 8, _mm_sub_ps(_mm_addsub_ps(
_mm_mul_ps(t1, _mm_moveldup_ps(t3)),
_mm_mul_ps(_mm_shuffle_ps(t1, t1, _MM_SHUFFLE(2,3,0,1)),
_mm_movehdup_ps(t3))), _mm_addsub_ps(
_mm_mul_ps(_mm_shuffle_ps(t0, t2, _MM_SHUFFLE(3,3,1,1)),
_mm_shuffle_ps(t4, t4, _MM_SHUFFLE(2,3,0,1))),
_mm_mul_ps(_mm_shuffle_ps(t0, t2, _MM_SHUFFLE(2,2,0,0)), t4))));
t0 = _mm_load_ps(in + N - i - 16);
t1 = _mm_load_ps(in + i + 12);
t3 = _mm_load_ps(A + i + 12);
t4 = _mm_load_ps(B + i + 12);
_mm_store_ps(buf + i + 12, _mm_sub_ps(_mm_addsub_ps(
_mm_mul_ps(t1, _mm_moveldup_ps(t3)),
_mm_mul_ps(_mm_shuffle_ps(t1, t1, _MM_SHUFFLE(2,3,0,1)),
_mm_movehdup_ps(t3))), _mm_addsub_ps(
_mm_mul_ps(_mm_shuffle_ps(t2, t0, _MM_SHUFFLE(3,3,1,1)),
_mm_shuffle_ps(t4, t4, _MM_SHUFFLE(2,3,0,1))),
_mm_mul_ps(_mm_shuffle_ps(t2, t0, _MM_SHUFFLE(2,2,0,0)), t4))));
}
}
#elif HAVE_SSE
if (FFTS_UNLIKELY(N <= 8)) {
__m128 c0 = _mm_load_ps((const float*) sign_mask_odd);
__m128 t0 = _mm_loadl_pi(_mm_setzero_ps(), (const __m64*) &in[N]);
__m128 t1 = _mm_load_ps(in);
__m128 t2 = _mm_load_ps(in + N - 4);
__m128 t3 = _mm_load_ps(A);
__m128 t4 = _mm_load_ps(B);
_mm_store_ps(buf, _mm_add_ps(_mm_sub_ps(_mm_add_ps(
_mm_mul_ps(t1, _mm_shuffle_ps(t3, t3, _MM_SHUFFLE(2,2,0,0))),
_mm_mul_ps(_mm_shuffle_ps(t1, t1, _MM_SHUFFLE(2,3,0,1)),
_mm_xor_ps(_mm_shuffle_ps(t3, t3, _MM_SHUFFLE(3,3,1,1)), c0))),
_mm_mul_ps(_mm_shuffle_ps(t0, t2, _MM_SHUFFLE(3,3,1,1)),
_mm_shuffle_ps(t4, t4, _MM_SHUFFLE(2,3,0,1)))),
_mm_mul_ps(_mm_shuffle_ps(t0, t2, _MM_SHUFFLE(2,2,0,0)),
_mm_xor_ps(t4, c0))));
if (N == 8) {
t3 = _mm_load_ps(A + 4);
t4 = _mm_load_ps(B + 4);
_mm_store_ps(buf + 4, _mm_add_ps(_mm_sub_ps(_mm_add_ps(
_mm_mul_ps(t2, _mm_shuffle_ps(t3, t3, _MM_SHUFFLE(2,2,0,0))),
_mm_mul_ps(_mm_shuffle_ps(t2, t2, _MM_SHUFFLE(2,3,0,1)),
_mm_xor_ps(_mm_shuffle_ps(t3, t3, _MM_SHUFFLE(3,3,1,1)), c0))),
_mm_mul_ps(_mm_shuffle_ps(t2, t1, _MM_SHUFFLE(3,3,1,1)),
_mm_shuffle_ps(t4, t4, _MM_SHUFFLE(2,3,0,1)))),
_mm_mul_ps(_mm_shuffle_ps(t2, t1, _MM_SHUFFLE(2,2,0,0)),
_mm_xor_ps(t4, c0))));
}
} else {
__m128 c0 = _mm_load_ps((const float*) sign_mask_odd);
__m128 t0 = _mm_loadl_pi(_mm_setzero_ps(), (const __m64*) &in[N]);
for (i = 0; i < N; i += 16) {
__m128 t1 = _mm_load_ps(in + i);
__m128 t2 = _mm_load_ps(in + N - i - 4);
__m128 t3 = _mm_load_ps(A + i);
__m128 t4 = _mm_load_ps(B + i);
_mm_store_ps(buf + i, _mm_add_ps(_mm_sub_ps(_mm_add_ps(
_mm_mul_ps(t1, _mm_shuffle_ps(t3, t3, _MM_SHUFFLE(2,2,0,0))),
_mm_mul_ps(_mm_shuffle_ps(t1, t1, _MM_SHUFFLE(2,3,0,1)),
_mm_xor_ps(_mm_shuffle_ps(t3, t3, _MM_SHUFFLE(3,3,1,1)), c0))),
_mm_mul_ps(_mm_shuffle_ps(t0, t2, _MM_SHUFFLE(3,3,1,1)),
_mm_shuffle_ps(t4, t4, _MM_SHUFFLE(2,3,0,1)))),
_mm_mul_ps(_mm_shuffle_ps(t0, t2, _MM_SHUFFLE(2,2,0,0)),
_mm_xor_ps(t4, c0))));
t0 = _mm_load_ps(in + N - i - 8);
t1 = _mm_load_ps(in + i + 4);
t3 = _mm_load_ps(A + i + 4);
t4 = _mm_load_ps(B + i + 4);
_mm_store_ps(buf + i + 4, _mm_add_ps(_mm_sub_ps(_mm_add_ps(
_mm_mul_ps(t1, _mm_shuffle_ps(t3, t3, _MM_SHUFFLE(2,2,0,0))),
_mm_mul_ps(_mm_shuffle_ps(t1, t1, _MM_SHUFFLE(2,3,0,1)),
_mm_xor_ps(_mm_shuffle_ps(t3, t3, _MM_SHUFFLE(3,3,1,1)), c0))),
_mm_mul_ps(_mm_shuffle_ps(t2, t0, _MM_SHUFFLE(3,3,1,1)),
_mm_shuffle_ps(t4, t4, _MM_SHUFFLE(2,3,0,1)))),
_mm_mul_ps(_mm_shuffle_ps(t2, t0, _MM_SHUFFLE(2,2,0,0)),
_mm_xor_ps(t4, c0))));
t1 = _mm_load_ps(in + i + 8);
t2 = _mm_load_ps(in + N - i - 12);
t3 = _mm_load_ps(A + i + 8);
t4 = _mm_load_ps(B + i + 8);
_mm_store_ps(buf + i + 8, _mm_add_ps(_mm_sub_ps(_mm_add_ps(
_mm_mul_ps(t1, _mm_shuffle_ps(t3, t3, _MM_SHUFFLE(2,2,0,0))),
_mm_mul_ps(_mm_shuffle_ps(t1, t1, _MM_SHUFFLE(2,3,0,1)),
_mm_xor_ps(_mm_shuffle_ps(t3, t3, _MM_SHUFFLE(3,3,1,1)), c0))),
_mm_mul_ps(_mm_shuffle_ps(t0, t2, _MM_SHUFFLE(3,3,1,1)),
_mm_shuffle_ps(t4, t4, _MM_SHUFFLE(2,3,0,1)))),
_mm_mul_ps(_mm_shuffle_ps(t0, t2, _MM_SHUFFLE(2,2,0,0)),
_mm_xor_ps(t4, c0))));
t0 = _mm_load_ps(in + N - i - 16);
t1 = _mm_load_ps(in + i + 12);
t3 = _mm_load_ps(A + i + 12);
t4 = _mm_load_ps(B + i + 12);
_mm_store_ps(buf + i + 12, _mm_add_ps(_mm_sub_ps(_mm_add_ps(
_mm_mul_ps(t1, _mm_shuffle_ps(t3, t3, _MM_SHUFFLE(2,2,0,0))),
_mm_mul_ps(_mm_shuffle_ps(t1, t1, _MM_SHUFFLE(2,3,0,1)),
_mm_xor_ps(_mm_shuffle_ps(t3, t3, _MM_SHUFFLE(3,3,1,1)), c0))),
_mm_mul_ps(_mm_shuffle_ps(t2, t0, _MM_SHUFFLE(3,3,1,1)),
_mm_shuffle_ps(t4, t4, _MM_SHUFFLE(2,3,0,1)))),
_mm_mul_ps(_mm_shuffle_ps(t2, t0, _MM_SHUFFLE(2,2,0,0)),
_mm_xor_ps(t4, c0))));
}
}
#else
for(i=0;i<N/2;i++) {
buf[2*i] = in[2*i]*A[2*i] + in[2*i+1]*A[2*i+1] + in[N-2*i]*B[2*i] - in[N-2*i+1]*B[2*i+1];
buf[2*i+1] = in[2*i+1]*A[2*i] - in[2*i]*A[2*i+1] - in[N-2*i]*B[2*i+1] - in[N-2*i+1]*B[2*i];
for (i = 0; i < N/2; i++) {
buf[2*i + 0] =
in[ 2*i + 0] * A[2*i + 0] + in[ 2*i + 1] * A[2*i + 1] +
in[N - 2*i + 0] * B[2*i + 0] - in[N - 2*i + 1] * B[2*i + 1];
buf[2*i + 1] =
in[ 2*i + 1] * A[2*i + 0] - in[ 2*i + 0] * A[2*i + 1] -
in[N - 2*i + 0] * B[2*i + 1] - in[N - 2*i + 1] * B[2*i + 0];
}
#endif
}
p->plans[0]->transform(p->plans[0], buf, out);
}
ffts_plan_t *ffts_init_1d_real(size_t N, int sign) {
ffts_plan_t *p = malloc(sizeof(ffts_plan_t));
if(sign < 0) p->transform = &ffts_execute_1d_real;
else p->transform = &ffts_execute_1d_real_inv;
p->destroy = &ffts_free_1d_real;
p->N = N;
p->rank = 1;
p->plans = malloc(sizeof(ffts_plan_t **) * 1);
p->plans[0] = ffts_init_1d(N/2, sign);
p->buf = valloc(sizeof(float) * 2 * ((N/2) + 1));
p->A = valloc(sizeof(float) * N);
p->B = valloc(sizeof(float) * N);
if(sign < 0) {
int i;
for (i = 0; i < N/2; i++) {
p->A[2 * i] = 0.5 * (1.0 - sin (2.0f * PI / (double) (N) * (double) i));
p->A[2 * i + 1] = 0.5 * (-1.0 * cos (2.0f * PI / (double) (N) * (double) i));
p->B[2 * i] = 0.5 * (1.0 + sin (2.0f * PI / (double) (N) * (double) i));
p->B[2 * i + 1] = 0.5 * (1.0 * cos (2.0f * PI / (double) (N) * (double) i));
}
}else{
int i;
for (i = 0; i < N/2; i++) {
p->A[2 * i] = 1.0 * (1.0 - sin (2.0f * PI / (double) (N) * (double) i));
p->A[2 * i + 1] = 1.0 * (-1.0 * cos (2.0f * PI / (double) (N) * (double) i));
p->B[2 * i] = 1.0 * (1.0 + sin (2.0f * PI / (double) (N) * (double) i));
p->B[2 * i + 1] = 1.0 * (1.0 * cos (2.0f * PI / (double) (N) * (double) i));
}
}
return p;
p->plans[0]->transform(p->plans[0], buf, output);
}
FFTS_API ffts_plan_t*
ffts_init_1d_real(size_t N, int sign)
{
ffts_plan_t *p;
p = (ffts_plan_t*) calloc(1, sizeof(*p) + sizeof(*p->plans));
if (!p) {
return NULL;
}
if (sign < 0) {
p->transform = &ffts_execute_1d_real;
} else {
p->transform = &ffts_execute_1d_real_inv;
}
p->destroy = &ffts_free_1d_real;
p->N = N;
p->rank = 1;
p->plans = (ffts_plan_t**) &p[1];
p->plans[0] = ffts_init_1d(N/2, sign);
if (!p->plans[0]) {
goto cleanup;
}
p->buf = ffts_aligned_malloc(2 * ((N/2) + 1) * sizeof(float));
if (!p->buf) {
goto cleanup;
}
p->A = (float*) ffts_aligned_malloc(N * sizeof(float));
if (!p->A) {
goto cleanup;
}
p->B = (float*) ffts_aligned_malloc(N * sizeof(float));
if (!p->B) {
goto cleanup;
}
#ifdef HAVE_SSE3
ffts_generate_table_1d_real_32f(p, sign, 1);
#else
ffts_generate_table_1d_real_32f(p, sign, 0);
#endif
return p;
cleanup:
ffts_free_1d_real(p);
return NULL;
}

@ -1,53 +1,47 @@
/*
This file is part of FFTS -- The Fastest Fourier Transform in the South
Copyright (c) 2012, Anthony M. Blake <amb@anthonix.com>
Copyright (c) 2012, The University of Waikato
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of the organization nor the
names of its contributors may be used to endorse or promote products
derived from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
DISCLAIMED. IN NO EVENT SHALL ANTHONY M. BLAKE BE LIABLE FOR ANY
DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef __FFTS_REAL_H__
#define __FFTS_REAL_H__
This file is part of FFTS -- The Fastest Fourier Transform in the South
Copyright (c) 2012, Anthony M. Blake <amb@anthonix.com>
Copyright (c) 2012, The University of Waikato
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of the organization nor the
names of its contributors may be used to endorse or promote products
derived from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
DISCLAIMED. IN NO EVENT SHALL ANTHONY M. BLAKE BE LIABLE FOR ANY
DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#include <stdint.h>
#include <stddef.h>
#include <stdio.h>
*/
#include "ffts.h"
#ifndef FFTS_REAL_H
#define FFTS_REAL_H
#ifdef HAVE_NEON
#include <arm_neon.h>
#endif
#ifdef HAVE_SSE
#include <xmmintrin.h>
#if defined (_MSC_VER) && (_MSC_VER >= 1020)
#pragma once
#endif
ffts_plan_t *ffts_init_1d_real(size_t N, int sign);
#include "ffts.h"
#include <stddef.h>
#endif
ffts_plan_t*
ffts_init_1d_real(size_t N, int sign);
#endif /* FFTS_REAL_H */

@ -1,177 +1,269 @@
/*
This file is part of FFTS -- The Fastest Fourier Transform in the South
Copyright (c) 2012, Anthony M. Blake <amb@anthonix.com>
Copyright (c) 2012, The University of Waikato
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of the organization nor the
names of its contributors may be used to endorse or promote products
derived from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
DISCLAIMED. IN NO EVENT SHALL ANTHONY M. BLAKE BE LIABLE FOR ANY
DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
This file is part of FFTS -- The Fastest Fourier Transform in the South
Copyright (c) 2012, Anthony M. Blake <amb@anthonix.com>
Copyright (c) 2012, The University of Waikato
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of the organization nor the
names of its contributors may be used to endorse or promote products
derived from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
DISCLAIMED. IN NO EVENT SHALL ANTHONY M. BLAKE BE LIABLE FOR ANY
DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#include "ffts_real_nd.h"
#include "ffts_real.h"
#include "ffts_internal.h"
#include "ffts_transpose.h"
#ifdef __ARM_NEON__
#include "neon.h"
#endif
void ffts_free_nd_real(ffts_plan_t *p) {
int i;
for(i=0;i<p->rank;i++) {
ffts_plan_t *x = p->plans[i];
int k;
for(k=i+1;k<p->rank;k++) {
if(x == p->plans[k]) p->plans[k] = NULL;
}
if(x) ffts_free(x);
}
free(p->Ns);
free(p->Ms);
free(p->plans);
free(p->buf);
free(p->transpose_buf);
free(p);
}
static void
ffts_free_nd_real(ffts_plan_t *p)
{
if (p->plans) {
int i, j;
for (i = 0; i < p->rank; i++) {
ffts_plan_t *plan = p->plans[i];
if (plan) {
for (j = 0; j < i; j++) {
if (p->Ns[i] == p->Ns[j]) {
plan = NULL;
break;
}
}
if (plan) {
ffts_free(plan);
}
}
}
void ffts_scalar_transpose(uint64_t *in, uint64_t *out, int w, int h, uint64_t *buf) {
free(p->plans);
}
size_t i,j;
for(i=0;i<w;i+=1) {
for(j=0;j<h;j+=1) {
out[i*h + j] = in[j*w + i];
}
}
if (p->buf) {
ffts_aligned_free(p->buf);
}
if (p->Ns) {
free(p->Ns);
}
if (p->Ms) {
free(p->Ms);
}
free(p);
}
void ffts_execute_nd_real(ffts_plan_t *p, const void * in, void * out) {
static void
ffts_execute_nd_real(ffts_plan_t *p, const void *in, void *out)
{
const size_t Ms0 = p->Ms[0];
const size_t Ns0 = p->Ns[0];
uint32_t *din = (uint32_t*) in;
uint64_t *buf = p->buf;
uint64_t *dout = (uint64_t*) out;
ffts_plan_t *plan;
int i;
size_t j;
plan = p->plans[0];
for (j = 0; j < Ns0; j++) {
plan->transform(plan, din + (j * Ms0), buf + (j * (Ms0 / 2 + 1)));
}
uint32_t *din = (uint32_t *)in;
uint64_t *buf = p->buf;
uint64_t *dout = (uint64_t *)out;
ffts_transpose(buf, dout, Ms0 / 2 + 1, Ns0);
size_t i,j;
for(i=0;i<p->Ns[0];i++) {
p->plans[0]->transform(p->plans[0], din + (i * p->Ms[0]), buf + (i * (p->Ms[0] / 2 + 1)));
}
ffts_scalar_transpose(buf, dout, p->Ms[0] / 2 + 1, p->Ns[0], p->transpose_buf);
for (i = 1; i < p->rank; i++) {
const size_t Ms = p->Ms[i];
const size_t Ns = p->Ns[i];
for(i=1;i<p->rank;i++) {
for(j=0;j<p->Ns[i];j++) {
p->plans[i]->transform(p->plans[i], dout + (j * p->Ms[i]), buf + (j * p->Ms[i]));
}
ffts_scalar_transpose(buf, dout, p->Ms[i], p->Ns[i], p->transpose_buf);
}
plan = p->plans[i];
for (j = 0; j < Ns; j++) {
plan->transform(plan, dout + (j * Ms), buf + (j * Ms));
}
ffts_transpose(buf, dout, Ms, Ns);
}
}
void ffts_execute_nd_real_inv(ffts_plan_t *p, const void * in, void * out) {
uint64_t *din = (uint64_t *)in;
uint64_t *buf = p->buf;
uint64_t *dout = (uint64_t *)out;
float *bufr = (float *)(p->buf);
float *doutr = (float *)out;
size_t i,j;
ffts_scalar_transpose(din, buf, p->Ms[0], p->Ns[0], p->transpose_buf);
for(i=0;i<p->Ms[0];i++) {
p->plans[0]->transform(p->plans[0], buf + (i * p->Ns[0]), dout + (i * p->Ns[0]));
}
ffts_scalar_transpose(dout, buf, p->Ns[0], p->Ms[0], p->transpose_buf);
for(j=0;j<p->Ms[1];j++) {
p->plans[1]->transform(p->plans[1], buf + (j * (p->Ms[0])), &doutr[j * p->Ns[1]]);
}
static void
ffts_execute_nd_real_inv(ffts_plan_t *p, const void *in, void *out)
{
const size_t Ms0 = p->Ms[0];
const size_t Ms1 = p->Ms[1];
const size_t Ns0 = p->Ns[0];
const size_t Ns1 = p->Ns[1];
uint64_t *din = (uint64_t*) in;
uint64_t *buf = p->buf;
uint64_t *buf2;
float *doutr = (float*) out;
ffts_plan_t *plan;
size_t vol;
int i;
size_t j;
vol = p->Ns[0];
for (i = 1; i < p->rank; i++) {
vol *= p->Ns[i];
}
buf2 = buf + vol;
ffts_transpose(din, buf, Ms0, Ns0);
plan = p->plans[0];
for (j = 0; j < Ms0; j++) {
plan->transform(plan, buf + (j * Ns0), buf2 + (j * Ns0));
}
ffts_transpose(buf2, buf, Ns0, Ms0);
plan = p->plans[1];
for (j = 0; j < Ms1; j++) {
plan->transform(plan, buf + (j * Ms0), &doutr[j * Ns1]);
}
}
ffts_plan_t *ffts_init_nd_real(int rank, size_t *Ns, int sign) {
size_t vol = 1;
FFTS_API ffts_plan_t*
ffts_init_nd_real(int rank, size_t *Ns, int sign)
{
int i;
size_t vol = 1;
size_t bufsize;
ffts_plan_t *p;
ffts_plan_t *p = malloc(sizeof(ffts_plan_t));
p = (ffts_plan_t*) calloc(1, sizeof(*p));
if (!p) {
return NULL;
}
if(sign < 0) p->transform = &ffts_execute_nd_real;
else p->transform = &ffts_execute_nd_real_inv;
if (sign < 0) {
p->transform = &ffts_execute_nd_real;
} else {
p->transform = &ffts_execute_nd_real_inv;
}
p->destroy = &ffts_free_nd_real;
p->destroy = &ffts_free_nd_real;
p->rank = rank;
p->rank = rank;
p->Ns = malloc(sizeof(size_t) * rank);
p->Ms = malloc(sizeof(size_t) * rank);
p->plans = malloc(sizeof(ffts_plan_t **) * rank);
int i;
for(i=0;i<rank;i++) {
p->Ns[i] = Ns[i];
vol *= Ns[i];
}
p->buf = valloc(sizeof(float) * 2 * vol);
p->Ms = (size_t*) malloc(rank * sizeof(*p->Ms));
if (!p->Ms) {
goto cleanup;
}
for(i=0;i<rank;i++) {
p->Ms[i] = vol / p->Ns[i];
p->plans[i] = NULL;
int k;
p->Ns = (size_t*) malloc(rank * sizeof(*p->Ns));
if (!p->Ns) {
goto cleanup;
}
if(sign < 0) {
for(k=1;k<i;k++) {
if(p->Ms[k] == p->Ms[i]) p->plans[i] = p->plans[k];
}
if(!i) p->plans[i] = ffts_init_1d_real(p->Ms[i], sign);
else if(!p->plans[i]) p->plans[i] = ffts_init_1d(p->Ms[i], sign);
}else{
for(k=0;k<i;k++) {
if(p->Ns[k] == p->Ns[i]) p->plans[i] = p->plans[k];
}
if(i==rank-1) p->plans[i] = ffts_init_1d_real(p->Ns[i], sign);
else if(!p->plans[i]) p->plans[i] = ffts_init_1d(p->Ns[i], sign);
}
}
if(sign < 0) {
for(i=1;i<rank;i++) {
p->Ns[i] = p->Ns[i] / 2 + 1;
}
}else{
for(i=0;i<rank-1;i++) {
p->Ms[i] = p->Ms[i] / 2 + 1;
}
}
p->transpose_buf = valloc(sizeof(float) * 2 * 8 * 8);
return p;
for (i = 0; i < rank; i++) {
p->Ns[i] = Ns[i];
vol *= Ns[i];
}
/* there is probably a prettier way of doing this, but it works.. */
if (sign < 0) {
bufsize = 2 * vol;
} else {
bufsize = 2 * (Ns[0] * ((vol / Ns[0]) / 2 + 1) + vol);
}
p->buf = ffts_aligned_malloc(bufsize * sizeof(float));
if (!p->buf) {
goto cleanup;
}
p->plans = (ffts_plan_t**) calloc(rank, sizeof(*p->plans));
if (!p->plans) {
goto cleanup;
}
for (i = 0; i < rank; i++) {
int k;
p->Ms[i] = vol / p->Ns[i];
if (sign < 0) {
if (!i) {
p->plans[i] = ffts_init_1d_real(p->Ms[i], sign);
} else {
for (k = 1; k < i; k++) {
if (p->Ms[k] == p->Ms[i]) {
p->plans[i] = p->plans[k];
break;
}
}
if (!p->plans[i]) {
p->plans[i] = ffts_init_1d(p->Ms[i], sign);
p->Ns[i] = p->Ns[i] / 2 + 1;
}
}
} else {
if (i == rank - 1) {
p->plans[i] = ffts_init_1d_real(p->Ns[i], sign);
} else {
for (k = 0; k < i; k++) {
if (p->Ns[k] == p->Ns[i]) {
p->plans[i] = p->plans[k];
break;
}
}
if (!p->plans[i]) {
p->plans[i] = ffts_init_1d(p->Ns[i], sign);
p->Ms[i] = p->Ms[i] / 2 + 1;
}
}
}
if (!p->plans[i]) {
goto cleanup;
}
}
return p;
cleanup:
ffts_free_nd_real(p);
return NULL;
}
FFTS_API ffts_plan_t*
ffts_init_2d_real(size_t N1, size_t N2, int sign)
{
size_t Ns[2];
ffts_plan_t *ffts_init_2d_real(size_t N1, size_t N2, int sign) {
size_t Ns[2];
Ns[0] = N1;
Ns[1] = N2;
return ffts_init_nd_real(2, Ns, sign);
Ns[0] = N1;
Ns[1] = N2;
return ffts_init_nd_real(2, Ns, sign);
}

@ -1,53 +1,50 @@
/*
This file is part of FFTS -- The Fastest Fourier Transform in the South
Copyright (c) 2012, Anthony M. Blake <amb@anthonix.com>
Copyright (c) 2012, The University of Waikato
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of the organization nor the
names of its contributors may be used to endorse or promote products
derived from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
DISCLAIMED. IN NO EVENT SHALL ANTHONY M. BLAKE BE LIABLE FOR ANY
DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
This file is part of FFTS -- The Fastest Fourier Transform in the South
Copyright (c) 2012, Anthony M. Blake <amb@anthonix.com>
Copyright (c) 2012, The University of Waikato
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of the organization nor the
names of its contributors may be used to endorse or promote products
derived from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
DISCLAIMED. IN NO EVENT SHALL ANTHONY M. BLAKE BE LIABLE FOR ANY
DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef __FFTS_REAL_ND_H__
#define __FFTS_REAL_ND_H__
#ifndef FFTS_REAL_ND_H
#define FFTS_REAL_ND_H
#include <stdint.h>
#include <stddef.h>
#include <stdio.h>
#if defined (_MSC_VER) && (_MSC_VER >= 1020)
#pragma once
#endif
#include "ffts_nd.h"
#include "ffts_real.h"
#include "ffts.h"
#include <stddef.h>
#ifdef HAVE_NEON
#include <arm_neon.h>
#endif
#ifdef HAVE_SSE
#include <xmmintrin.h>
#endif
ffts_plan_t*
ffts_init_nd_real(int rank, size_t *Ns, int sign);
#endif
ffts_plan_t*
ffts_init_2d_real(size_t N1, size_t N2, int sign);
#endif /* FFTS_REAL_ND_H */

@ -1,156 +0,0 @@
/*
This file is part of FFTS -- The Fastest Fourier Transform in the South
Copyright (c) 2013, Michael J. Cree <mcree@orcon.net.nz>
Copyright (c) 2012, 2013, Anthony M. Blake <amb@anthonix.com>
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of the organization nor the
names of its contributors may be used to endorse or promote products
derived from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
DISCLAIMED. IN NO EVENT SHALL ANTHONY M. BLAKE BE LIABLE FOR ANY
DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#include "ffts.h"
#include "macros.h"
#include <stdlib.h>
#define DEBUG(x)
#include "ffts_small.h"
void firstpass_16_f(ffts_plan_t * p, const void * in, void * out)
{
const data_t *din = (const data_t *)in;
data_t *dout = (data_t *)out;
V r0_1,r2_3,r4_5,r6_7,r8_9,r10_11,r12_13,r14_15;
float *LUT8 = p->ws;
L_4_4(0, din+0,din+16,din+8,din+24,&r0_1,&r2_3,&r8_9,&r10_11);
L_2_4(0, din+4,din+20,din+28,din+12,&r4_5,&r6_7,&r14_15,&r12_13);
K_N(0, VLD(LUT8),VLD(LUT8+4),&r0_1,&r2_3,&r4_5,&r6_7);
K_N(0, VLD(LUT8+8),VLD(LUT8+12),&r0_1,&r4_5,&r8_9,&r12_13);
S_4(r0_1,r4_5,r8_9,r12_13,dout+0,dout+8,dout+16,dout+24);
K_N(0, VLD(LUT8+16),VLD(LUT8+20),&r2_3,&r6_7,&r10_11,&r14_15);
S_4(r2_3,r6_7,r10_11,r14_15,dout+4,dout+12,dout+20,dout+28);
}
void firstpass_16_b(ffts_plan_t * p, const void * in, void * out)
{
const data_t *din = (const data_t *)in;
data_t *dout = (data_t *)out;
V r0_1,r2_3,r4_5,r6_7,r8_9,r10_11,r12_13,r14_15;
float *LUT8 = p->ws;
L_4_4(1, din+0,din+16,din+8,din+24,&r0_1,&r2_3,&r8_9,&r10_11);
L_2_4(1, din+4,din+20,din+28,din+12,&r4_5,&r6_7,&r14_15,&r12_13);
K_N(1, VLD(LUT8),VLD(LUT8+4),&r0_1,&r2_3,&r4_5,&r6_7);
K_N(1, VLD(LUT8+8),VLD(LUT8+12),&r0_1,&r4_5,&r8_9,&r12_13);
S_4(r0_1,r4_5,r8_9,r12_13,dout+0,dout+8,dout+16,dout+24);
K_N(1, VLD(LUT8+16),VLD(LUT8+20),&r2_3,&r6_7,&r10_11,&r14_15);
S_4(r2_3,r6_7,r10_11,r14_15,dout+4,dout+12,dout+20,dout+28);
}
void firstpass_8_f(ffts_plan_t *p, const void *in, void *out)
{
const data_t *din = (const data_t *)in;
data_t *dout = (data_t *)out;
V r0_1, r2_3, r4_5, r6_7;
float *LUT8 = p->ws + p->ws_is[0];
L_4_2(0, din, din+8, din+4, din+12, &r0_1, &r2_3, &r4_5, &r6_7);
K_N(0, VLD(LUT8), VLD(LUT8+4), &r0_1, &r2_3, &r4_5, &r6_7);
S_4(r0_1,r2_3,r4_5,r6_7,dout+0,dout+4,dout+8,dout+12);
}
void firstpass_8_b(ffts_plan_t *p, const void *in, void *out)
{
const data_t *din = (const data_t *)in;
data_t *dout = (data_t *)out;
V r0_1, r2_3, r4_5, r6_7;
float *LUT8 = p->ws + p->ws_is[0];
L_4_2(1, din, din+8, din+4, din+12, &r0_1, &r2_3, &r4_5, &r6_7);
K_N(1, VLD(LUT8), VLD(LUT8+4), &r0_1, &r2_3, &r4_5, &r6_7);
S_4(r0_1,r2_3,r4_5,r6_7,dout+0,dout+4,dout+8,dout+12);
}
void firstpass_4_f(ffts_plan_t *p, const void *in, void *out)
{
const data_t *din = (const data_t *)in;
data_t *dout = (data_t *)out;
cdata_t t0, t1, t2, t3, t4, t5, t6, t7;
t0[0] = din[0]; t0[1] = din[1];
t1[0] = din[4]; t1[1] = din[5];
t2[0] = din[2]; t2[1] = din[3];
t3[0] = din[6]; t3[1] = din[7];
t4[0] = t0[0] + t1[0]; t4[1] = t0[1] + t1[1];
t5[0] = t0[0] - t1[0]; t5[1] = t0[1] - t1[1];
t6[0] = t2[0] + t3[0]; t6[1] = t2[1] + t3[1];
t7[0] = t2[0] - t3[0]; t7[1] = t2[1] - t3[1];
dout[0] = t4[0] + t6[0]; dout[1] = t4[1] + t6[1];
dout[4] = t4[0] - t6[0]; dout[5] = t4[1] - t6[1];
dout[2] = t5[0] + t7[1]; dout[3] = t5[1] - t7[0];
dout[6] = t5[0] - t7[1]; dout[7] = t5[1] + t7[0];
}
void firstpass_4_b(ffts_plan_t *p, const void *in, void *out)
{
const data_t *din = (const data_t *)in;
data_t *dout = (data_t *)out;
cdata_t t0, t1, t2, t3, t4, t5, t6, t7;
t0[0] = din[0]; t0[1] = din[1];
t1[0] = din[4]; t1[1] = din[5];
t2[0] = din[2]; t2[1] = din[3];
t3[0] = din[6]; t3[1] = din[7];
t4[0] = t0[0] + t1[0]; t4[1] = t0[1] + t1[1];
t5[0] = t0[0] - t1[0]; t5[1] = t0[1] - t1[1];
t6[0] = t2[0] + t3[0]; t6[1] = t2[1] + t3[1];
t7[0] = t2[0] - t3[0]; t7[1] = t2[1] - t3[1];
dout[0] = t4[0] + t6[0]; dout[1] = t4[1] + t6[1];
dout[4] = t4[0] - t6[0]; dout[5] = t4[1] - t6[1];
dout[2] = t5[0] - t7[1]; dout[3] = t5[1] + t7[0];
dout[6] = t5[0] + t7[1]; dout[7] = t5[1] - t7[0];
}
void firstpass_2(ffts_plan_t *p, const void *in, void *out)
{
const data_t *din = (const data_t *)in;
data_t *dout = (data_t *)out;
cdata_t t0, t1, r0,r1;
t0[0] = din[0]; t0[1] = din[1];
t1[0] = din[2]; t1[1] = din[3];
r0[0] = t0[0] + t1[0];
r0[1] = t0[1] + t1[1];
r1[0] = t0[0] - t1[0];
r1[1] = t0[1] - t1[1];
dout[0] = r0[0]; dout[1] = r0[1];
dout[2] = r1[0]; dout[3] = r1[1];
}

@ -1,13 +0,0 @@
#ifndef __FFTS_SMALL_H__
#define __FFTS_SMALL_H__
void firstpass_16_f(ffts_plan_t * p, const void * in, void * out);
void firstpass_16_b(ffts_plan_t * p, const void * in, void * out);
void firstpass_8_f(ffts_plan_t * p, const void * in, void * out);
void firstpass_8_b(ffts_plan_t * p, const void * in, void * out);
void firstpass_4_f(ffts_plan_t * p, const void * in, void * out);
void firstpass_4_b(ffts_plan_t * p, const void * in, void * out);
void firstpass_2(ffts_plan_t * p, const void * in, void * out);
#endif

File diff suppressed because it is too large Load Diff

@ -1,46 +1,91 @@
/*
This file is part of FFTS -- The Fastest Fourier Transform in the South
Copyright (c) 2012, Anthony M. Blake <amb@anthonix.com>
Copyright (c) 2012, The University of Waikato
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of the organization nor the
names of its contributors may be used to endorse or promote products
derived from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
DISCLAIMED. IN NO EVENT SHALL ANTHONY M. BLAKE BE LIABLE FOR ANY
DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
This file is part of FFTS -- The Fastest Fourier Transform in the South
Copyright (c) 2012, Anthony M. Blake <amb@anthonix.com>
Copyright (c) 2012, The University of Waikato
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of the organization nor the
names of its contributors may be used to endorse or promote products
derived from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
DISCLAIMED. IN NO EVENT SHALL ANTHONY M. BLAKE BE LIABLE FOR ANY
DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef __FFTS_STATIC_H__
#define __FFTS_STATIC_H__
#ifndef FFTS_STATIC_H
#define FFTS_STATIC_H
#if defined (_MSC_VER) && (_MSC_VER >= 1020)
#pragma once
#endif
#include "ffts.h"
#include "neon.h"
void ffts_static_rec_f(ffts_plan_t *p, float *data, size_t N) ;
void ffts_static_transform_f(ffts_plan_t *p, const void *in, void *out);
void
ffts_small_2_32f(ffts_plan_t *p, const void *in, void *out);
void ffts_static_rec_i(ffts_plan_t *p, float *data, size_t N) ;
void ffts_static_transform_i(ffts_plan_t *p, const void *in, void *out);
void
ffts_small_2_64f(ffts_plan_t *p, const void *in, void *out);
#endif
void
ffts_small_forward4_32f(ffts_plan_t *p, const void *in, void *out);
void
ffts_small_forward4_64f(ffts_plan_t *p, const void *in, void *out);
void
ffts_small_backward4_32f(ffts_plan_t *p, const void *in, void *out);
void
ffts_small_backward4_64f(ffts_plan_t *p, const void *in, void *out);
void
ffts_small_forward8_32f(ffts_plan_t *p, const void *in, void *out);
void
ffts_small_forward8_64f(ffts_plan_t *p, const void *in, void *out);
void
ffts_small_backward8_32f(ffts_plan_t *p, const void *in, void *out);
void
ffts_small_backward8_64f(ffts_plan_t *p, const void *in, void *out);
void
ffts_small_forward16_32f(ffts_plan_t *p, const void *in, void *out);
void
ffts_small_forward16_64f(ffts_plan_t *p, const void *in, void *out);
void
ffts_small_backward16_32f(ffts_plan_t *p, const void *in, void *out);
void
ffts_small_backward16_64f(ffts_plan_t *p, const void *in, void *out);
void
ffts_static_transform_f_32f(ffts_plan_t *p, const void *in, void *out);
void
ffts_static_transform_i_32f(ffts_plan_t *p, const void *in, void *out);
#endif /* FFTS_STATIC_H */

@ -0,0 +1,194 @@
/*
This file is part of FFTS -- The Fastest Fourier Transform in the South
Copyright (c) 2016, Jukka Ojanen <jukka.ojanen@kolumbus.fi>
Copyright (c) 2012, Anthony M. Blake <amb@anthonix.com>
Copyright (c) 2012, The University of Waikato
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of the organization nor the
names of its contributors may be used to endorse or promote products
derived from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
DISCLAIMED. IN NO EVENT SHALL ANTHONY M. BLAKE BE LIABLE FOR ANY
DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#include "ffts_transpose.h"
#include "ffts_internal.h"
#ifdef HAVE_NEON
#include "neon.h"
#include <arm_neon.h>
#elif HAVE_SSE2
#include <emmintrin.h>
#endif
#define TSIZE 8
void
ffts_transpose(uint64_t *in, uint64_t *out, int w, int h)
{
#ifdef HAVE_NEON
#if 0
neon_transpose4(in, out, w, h);
#else
neon_transpose8(in, out, w, h);
#endif
#elif HAVE_SSE2
uint64_t FFTS_ALIGN(64) tmp[TSIZE*TSIZE];
int tx, ty;
/* int x; */
int y;
int tw = w / TSIZE;
int th = h / TSIZE;
for (ty = 0; ty < th; ty++) {
for (tx = 0; tx < tw; tx++) {
uint64_t *ip0 = in + w*TSIZE*ty + tx * TSIZE;
uint64_t *op0 = tmp; /* out + h*TSIZE*tx + ty*TSIZE; */
/* copy/transpose to tmp */
for (y = 0; y < TSIZE; y += 2) {
/* for (x=0;x<TSIZE;x+=2) {
op[x*TSIZE] = ip[x];
*/
__m128d q0 = _mm_load_pd((double*)(ip0 + 0*w));
__m128d q1 = _mm_load_pd((double*)(ip0 + 1*w));
__m128d q2 = _mm_load_pd((double*)(ip0 + 2*w));
__m128d q3 = _mm_load_pd((double*)(ip0 + 3*w));
__m128d q4 = _mm_load_pd((double*)(ip0 + 4*w));
__m128d q5 = _mm_load_pd((double*)(ip0 + 5*w));
__m128d q6 = _mm_load_pd((double*)(ip0 + 6*w));
__m128d q7 = _mm_load_pd((double*)(ip0 + 7*w));
__m128d t0 = _mm_shuffle_pd(q0, q1, _MM_SHUFFLE2(0, 0));
__m128d t1 = _mm_shuffle_pd(q0, q1, _MM_SHUFFLE2(1, 1));
__m128d t2 = _mm_shuffle_pd(q2, q3, _MM_SHUFFLE2(0, 0));
__m128d t3 = _mm_shuffle_pd(q2, q3, _MM_SHUFFLE2(1, 1));
__m128d t4 = _mm_shuffle_pd(q4, q5, _MM_SHUFFLE2(0, 0));
__m128d t5 = _mm_shuffle_pd(q4, q5, _MM_SHUFFLE2(1, 1));
__m128d t6 = _mm_shuffle_pd(q6, q7, _MM_SHUFFLE2(0, 0));
__m128d t7 = _mm_shuffle_pd(q6, q7, _MM_SHUFFLE2(1, 1));
ip0 += 2;
/* _mm_store_pd((double *)(op0 + y*h + x), t0);
_mm_store_pd((double *)(op0 + y*h + x + h), t1);
*/
_mm_store_pd((double*)(op0 + 0 ), t0);
_mm_store_pd((double*)(op0 + 0 + TSIZE), t1);
_mm_store_pd((double*)(op0 + 2 ), t2);
_mm_store_pd((double*)(op0 + 2 + TSIZE), t3);
_mm_store_pd((double*)(op0 + 4 ), t4);
_mm_store_pd((double*)(op0 + 4 + TSIZE), t5);
_mm_store_pd((double*)(op0 + 6 ), t6);
_mm_store_pd((double*)(op0 + 6 + TSIZE), t7);
/* } */
op0 += 2*TSIZE;
}
op0 = out + h*tx*TSIZE + ty*TSIZE;
ip0 = tmp;
for (y = 0; y < TSIZE; y += 1) {
/* memcpy(op0, ip0, TSIZE * sizeof(*ip0)); */
__m128d q0 = _mm_load_pd((double*)(ip0 + 0));
__m128d q1 = _mm_load_pd((double*)(ip0 + 2));
__m128d q2 = _mm_load_pd((double*)(ip0 + 4));
__m128d q3 = _mm_load_pd((double*)(ip0 + 6));
_mm_store_pd((double*)(op0 + 0), q0);
_mm_store_pd((double*)(op0 + 2), q1);
_mm_store_pd((double*)(op0 + 4), q2);
_mm_store_pd((double*)(op0 + 6), q3);
op0 += h;
ip0 += TSIZE;
}
}
}
/*
size_t i,j;
for(i=0;i<w;i+=2) {
for(j=0;j<h;j+=2) {
// out[i*h + j] = in[j*w + i];
__m128d q0 = _mm_load_pd((double *)(in + j*w + i));
__m128d q1 = _mm_load_pd((double *)(in + j*w + i + w));
__m128d t0 = _mm_shuffle_pd(q0, q1, _MM_SHUFFLE2(0, 0));
__m128d t1 = _mm_shuffle_pd(q0, q1, _MM_SHUFFLE2(1, 1));
_mm_store_pd((double *)(out + i*h + j), t0);
_mm_store_pd((double *)(out + i*h + j + h), t1);
}
}
*/
#else
const int bw = 1;
const int bh = 8;
int i = 0, j = 0;
for (; i <= h - bh; i += bh) {
for (j = 0; j <= w - bw; j += bw) {
uint64_t const *ib = &in[w*i + j];
uint64_t *ob = &out[h*j + i];
uint64_t s_0_0 = ib[0*w + 0];
uint64_t s_1_0 = ib[1*w + 0];
uint64_t s_2_0 = ib[2*w + 0];
uint64_t s_3_0 = ib[3*w + 0];
uint64_t s_4_0 = ib[4*w + 0];
uint64_t s_5_0 = ib[5*w + 0];
uint64_t s_6_0 = ib[6*w + 0];
uint64_t s_7_0 = ib[7*w + 0];
ob[0*h + 0] = s_0_0;
ob[0*h + 1] = s_1_0;
ob[0*h + 2] = s_2_0;
ob[0*h + 3] = s_3_0;
ob[0*h + 4] = s_4_0;
ob[0*h + 5] = s_5_0;
ob[0*h + 6] = s_6_0;
ob[0*h + 7] = s_7_0;
}
}
if (i < h) {
int i1;
for (i1 = 0; i1 < w; i1++) {
for (j = i; j < h; j++) {
out[i1*h + j] = in[j*w + i1];
}
}
}
if (j < w) {
int j1;
for (i = j; i < w; i++) {
for (j1 = 0; j1 < h; j1++) {
out[i*h + j1] = in[j1*w + i];
}
}
}
#endif
}

@ -0,0 +1,46 @@
/*
This file is part of FFTS -- The Fastest Fourier Transform in the South
Copyright (c) 2012, Anthony M. Blake <amb@anthonix.com>
Copyright (c) 2012, The University of Waikato
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of the organization nor the
names of its contributors may be used to endorse or promote products
derived from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
DISCLAIMED. IN NO EVENT SHALL ANTHONY M. BLAKE BE LIABLE FOR ANY
DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef FFTS_TRANSPOSE_H
#define FFTS_TRANSPOSE_H
#if defined (_MSC_VER) && (_MSC_VER >= 1020)
#pragma once
#endif
#include "ffts_internal.h"
void
ffts_transpose(uint64_t *in, uint64_t *out, int w, int h);
#endif /* FFTS_TRANSPOSE_H */

@ -0,0 +1,628 @@
/*
This file is part of FFTS -- The Fastest Fourier Transform in the South
Copyright (c) 2015, Jukka Ojanen <jukka.ojanen@kolumbus.fi>
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of the organization nor the
names of its contributors may be used to endorse or promote products
derived from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
DISCLAIMED. IN NO EVENT SHALL ANTHONY M. BLAKE BE LIABLE FOR ANY
DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#include "ffts_trig.h"
#include "ffts_dd.h"
/* 1/(2*cos(pow(2,-p)*pi)) */
static const FFTS_ALIGN(16) unsigned int half_secant[132] = {
0x00000000, 0x3fe00000, 0xc9be45de, 0x3be3bd3c,
0x00000000, 0x3fe00000, 0xc9be45de, 0x3c03bd3c,
0x00000000, 0x3fe00000, 0xc9be45de, 0x3c23bd3c,
0x00000000, 0x3fe00000, 0xc9be45de, 0x3c43bd3c,
0x00000000, 0x3fe00000, 0xc9be45de, 0x3c63bd3c,
0x00000000, 0x3fe00000, 0xc9be45df, 0x3c83bd3c,
0x00000001, 0x3fe00000, 0x4df22efd, 0x3c7de9e6,
0x00000005, 0x3fe00000, 0x906e8725, 0xbc60b0cd,
0x00000014, 0x3fe00000, 0x906e8357, 0xbc80b0cd,
0x0000004f, 0x3fe00000, 0x0dce83c9, 0xbc5619b2,
0x0000013c, 0x3fe00000, 0x0dc6e79a, 0xbc7619b2,
0x000004ef, 0x3fe00000, 0xe4af1240, 0x3c83cc9b,
0x000013bd, 0x3fe00000, 0x2d14c08a, 0x3c7e64df,
0x00004ef5, 0x3fe00000, 0x47a85465, 0xbc59b20b,
0x00013bd4, 0x3fe00000, 0xab79c897, 0xbc79b203,
0x0004ef4f, 0x3fe00000, 0x15019a96, 0x3c79386b,
0x0013bd3d, 0x3fe00000, 0x7d6dbf4b, 0xbc7b16b7,
0x004ef4f3, 0x3fe00000, 0xf30832e0, 0x3c741ee4,
0x013bd3cd, 0x3fe00000, 0xd3bcd4bb, 0xbc83f41e,
0x04ef4f34, 0x3fe00000, 0xdd75aebb, 0xbc82ef06,
0x13bd3cde, 0x3fe00000, 0xb2b41b3d, 0x3c52d979,
0x4ef4f46c, 0x3fe00000, 0x4f0fb458, 0xbc851db3,
0x3bd3e0e7, 0x3fe00001, 0x8a0ce3f0, 0x3c58dbab,
0xef507722, 0x3fe00004, 0x2a8ec295, 0x3c83e351,
0xbd5114f9, 0x3fe00013, 0xc4c0d92d, 0x3c8b3ca4,
0xf637de7d, 0x3fe0004e, 0xb74de729, 0x3c45974e,
0xe8190891, 0x3fe0013b, 0x26edf4da, 0xbc814c20,
0x9436640e, 0x3fe004f0, 0xe2b34b50, 0x3c8091ab,
0x9c61d971, 0x3fe013d1, 0x6ce01b8e, 0x3c7f7df7,
0xd17cba53, 0x3fe0503e, 0x74ad7633, 0xbc697609,
0x7bdb3895, 0x3fe1517a, 0x82f9091b, 0xbc8008d1,
0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000
};
/* cos(pow(2,-p)*pi), sin(pow(2,-p)*pi) */
static const FFTS_ALIGN(16) unsigned int cos_sin_pi_table[264] = {
0x00000000, 0x3ff00000, 0x54442d18, 0x3df921fb,
0xc9be45de, 0xbbf3bd3c, 0xbb77974f, 0x3a91a390,
0x00000000, 0x3ff00000, 0x54442d18, 0x3e0921fb,
0xc9be45de, 0xbc13bd3c, 0x54a14928, 0x3aa19bd0,
0x00000000, 0x3ff00000, 0x54442d18, 0x3e1921fb,
0xc9be45de, 0xbc33bd3c, 0xb948108a, 0x3ab17cce,
0x00000000, 0x3ff00000, 0x54442d18, 0x3e2921fb,
0xc9be45de, 0xbc53bd3c, 0x4be32e14, 0x3ac100c8,
0x00000000, 0x3ff00000, 0x54442d18, 0x3e3921fb,
0xc9be45de, 0xbc73bd3c, 0x2c9f4879, 0x3ace215d,
0xffffffff, 0x3fefffff, 0x54442d18, 0x3e4921fb,
0x6c837443, 0x3c888586, 0x0005f376, 0x3acd411f,
0xfffffffe, 0x3fefffff, 0x54442d18, 0x3e5921fb,
0x4df22ef1, 0xbc8de9e6, 0x9937209e, 0xbaf7b153,
0xfffffff6, 0x3fefffff, 0x54442d16, 0x3e6921fb,
0x906e88aa, 0x3c70b0cd, 0xfe19968a, 0xbb03b7c0,
0xffffffd9, 0x3fefffff, 0x54442d0e, 0x3e7921fb,
0xdf22ed26, 0xbc8e9e64, 0x8d1b6ffb, 0xbaee8bb4,
0xffffff62, 0x3fefffff, 0x54442cef, 0x3e8921fb,
0x0dd18f0f, 0x3c6619b2, 0x7f2b20fb, 0xbb00e133,
0xfffffd88, 0x3fefffff, 0x54442c73, 0x3e9921fb,
0x0dd314b2, 0x3c8619b2, 0x619fdf6e, 0xbb174e98,
0xfffff621, 0x3fefffff, 0x54442a83, 0x3ea921fb,
0x3764acf5, 0x3c8866c8, 0xf5b2407f, 0xbb388215,
0xffffd886, 0x3fefffff, 0x544422c2, 0x3eb921fb,
0x20e7a944, 0xbc8e64df, 0x7b9b9f23, 0x3b5a0961,
0xffff6216, 0x3fefffff, 0x544403c1, 0x3ec921fb,
0x52ee25ea, 0x3c69b20e, 0x4df6a86a, 0xbb5999d9,
0xfffd8858, 0x3fefffff, 0x544387ba, 0x3ed921fb,
0xd8910ead, 0x3c89b20f, 0x0809d04d, 0x3b77d9db,
0xfff62162, 0x3fefffff, 0x544197a1, 0x3ee921fb,
0x438d3925, 0xbc8937a8, 0xa5d27f7a, 0xbb858b02,
0xffd88586, 0x3fefffff, 0x5439d73a, 0x3ef921fb,
0x94b3ddd2, 0x3c8b22e4, 0xf8a3b73d, 0xbb863c7f,
0xff62161a, 0x3fefffff, 0x541ad59e, 0x3f0921fb,
0x7ea469b2, 0xbc835c13, 0xb8cee262, 0x3bae9860,
0xfd885867, 0x3fefffff, 0x539ecf31, 0x3f1921fb,
0x23a32e63, 0xbc77d556, 0xfcd23a30, 0x3b96b111,
0xf621619c, 0x3fefffff, 0x51aeb57c, 0x3f2921fb,
0xbbbd8fe6, 0xbc87507d, 0x4916c435, 0xbbca6e1d,
0xd8858675, 0x3fefffff, 0x49ee4ea6, 0x3f3921fb,
0x54748eab, 0xbc879f0e, 0x744a453e, 0x3bde894d,
0x62161a34, 0x3fefffff, 0x2aecb360, 0x3f4921fb,
0xb1f9b9c4, 0xbc6136dc, 0x7e566b4c, 0x3be87615,
0x88586ee6, 0x3feffffd, 0xaee6472e, 0x3f5921fa,
0xf173ae5b, 0x3c81af64, 0x284a9df8, 0xbbfee52e,
0x21621d02, 0x3feffff6, 0xbecca4ba, 0x3f6921f8,
0xebc82813, 0xbc76acfc, 0x7bcab5b2, 0x3c02ba40,
0x858e8a92, 0x3fefffd8, 0xfe670071, 0x3f7921f0,
0x1883bcf7, 0x3c8359c7, 0xfe6b7a9b, 0x3bfab967,
0x169b92db, 0x3fefff62, 0xfcdec784, 0x3f8921d1,
0xc81fbd0d, 0x3c85dda3, 0xbe836d9d, 0x3c29878e,
0x6084cd0d, 0x3feffd88, 0xf7a3667e, 0x3f992155,
0x4556e4cb, 0xbc81354d, 0x091a0130, 0xbbfb1d63,
0xe3796d7e, 0x3feff621, 0xf10dd814, 0x3fa91f65,
0x2e24aa15, 0xbc6c57bc, 0x0d569a90, 0xbc2912bd,
0xa3d12526, 0x3fefd88d, 0xbc29b42c, 0x3fb917a6,
0x378811c7, 0xbc887df6, 0xd26ed688, 0xbc3e2718,
0xcff75cb0, 0x3fef6297, 0x3c69a60b, 0x3fc8f8b8,
0x2a361fd3, 0x3c756217, 0xb9ff8d82, 0xbc626d19,
0xcf328d46, 0x3fed906b, 0xa6aea963, 0x3fd87de2,
0x10231ac2, 0x3c7457e6, 0xd3d5a610, 0xbc672ced,
0x667f3bcd, 0x3fe6a09e, 0x667f3bcd, 0x3fe6a09e,
0x13b26456, 0xbc8bdd34, 0x13b26456, 0xbc8bdd34,
0x00000000, 0x00000000, 0x00000000, 0x3ff00000,
0x00000000, 0x00000000, 0x00000000, 0x00000000
};
int
ffts_generate_cosine_sine_32f(ffts_cpx_32f *const table, int table_size)
{
double alpha, beta;
double c[2], s[2];
double x, z;
int i;
if (!table || !table_size) {
return -1;
}
/* the first */
table[0][0] = 1.0f;
table[0][1] = -0.0f;
if (FFTS_UNLIKELY(table_size == 1)) {
goto exit;
}
if (FFTS_UNLIKELY(table_size == 2)) {
/* skip over */
i = 1;
goto mid_point;
}
/* polynomial approximations calculated using Sollya */
x = 1.0 / table_size;
z = x * x;
/* alpha = 2 * sin(M_PI_4 / m) * sin(M_PI_4 / m) */
alpha = x * (1.1107207345394952717884501203293686870741139540138 +
z * (-0.114191397993514079911985272577099412137126013186879 +
z * 3.52164670852685621720746817665316575239342815885835e-3));
alpha = alpha * alpha;
/* beta = sin(M_PI_2 / m) */
beta = x * (1.57079632679489455959753740899031981825828552246094 +
z * (-0.64596409735041482313988581154262647032737731933593 +
z * 7.9690915468332887416913479228242067620158195495605e-2));
/* cos(0) = 1.0, sin(0) = 0.0 */
c[0] = 1.0;
s[0] = 0.0;
/* generate sine and cosine tables with maximum error less than 1 ULP */
for (i = 1; i < (table_size + 1)/2; i++) {
c[1] = c[0] - ((alpha * c[0]) + (beta * s[0]));
s[1] = s[0] - ((alpha * s[0]) - (beta * c[0]));
table[i + 0][0] = (float) c[1];
table[i + 0][1] = (float) -s[1];
table[table_size - i][0] = (float) s[1];
table[table_size - i][1] = (float) -c[1];
c[0] = c[1];
s[0] = s[1];
}
if (FFTS_UNLIKELY(table_size & 1)) {
goto exit;
}
mid_point:
table[i][0] = 0.70710677f;
table[i][1] = -0.70710677f;
exit:
return 0;
}
/* Oscar Buneman's method for generating a sequence of sines and cosines.
* Expired US Patent 4,878,187 A
*
* D. Potts, G. Steidl, M. Tasche, Numerical stability of fast
* trigonometric transforms a worst case study,
* J. Concrete Appl. Math. 1 (2003) 136
*
* O. Buneman, Stable online creation of sines and cosines of
* successive angles, Proc. IEEE 75, 1434 1435 (1987).
*/
#if HAVE_SSE2
int
ffts_generate_cosine_sine_pow2_32f(ffts_cpx_32f *const table, int table_size)
{
static const __m128d sign_swap = { 0.0, -0.0 };
const __m128d *FFTS_RESTRICT ct;
const double *FFTS_RESTRICT hs;
__m128d FFTS_ALIGN(16) w[32];
__m128d FFTS_ALIGN(16) h[32];
int i, log_2, offset;
/* size must be a power of two */
if (!table || !table_size || (table_size & (table_size - 1))) {
return -1;
}
/* the first */
table[0][0] = 1.0f;
table[0][1] = -0.0f;
if (FFTS_UNLIKELY(table_size == 1)) {
goto exit;
}
if (FFTS_UNLIKELY(table_size == 2)) {
/* skip over */
i = 1;
goto mid_point;
}
/* calculate table offset */
FFTS_ASSUME(table_size/2 > 1);
log_2 = ffts_ctzl(table_size);
FFTS_ASSUME(log_2 > 1);
offset = 32 - log_2;
ct = (const __m128d*)
FFTS_ASSUME_ALIGNED_32(&cos_sin_pi_table[8 * offset]);
hs = (const double*) &half_secant[4 * offset];
/* initialize from lookup table */
for (i = 0; i <= log_2; i++) {
w[i] = ct[2*i];
/* duplicate the high part */
h[i] = _mm_set1_pd(hs[2*i]);
}
/* generate sine and cosine tables with maximum error less than 0.5 ULP */
for (i = 1; i < table_size/2; i++) {
/* calculate trailing zeros in index */
log_2 = ffts_ctzl(i);
/* note that storing is not 16 byte aligned */
_mm_storel_pi((__m64*) &table[i + 0],
_mm_cvtpd_ps(_mm_or_pd(w[log_2], sign_swap)));
_mm_storel_pi((__m64*) &table[table_size - i], _mm_cvtpd_ps(
_mm_or_pd(_mm_shuffle_pd(w[log_2], w[log_2], 1), sign_swap)));
/* skip and find next trailing zero */
offset = (log_2 + 2 + ffts_ctzl(~i >> (log_2 + 2)));
w[log_2] = _mm_mul_pd(h[log_2], _mm_add_pd(w[log_2 + 1], w[offset]));
}
mid_point:
table[i][0] = 0.70710677f;
table[i][1] = -0.70710677f;
exit:
return 0;
}
int
ffts_generate_cosine_sine_pow2_64f(ffts_cpx_64f *const table, int table_size)
{
static const __m128d sign_swap = { 0.0, -0.0 };
const struct ffts_dd2_t *FFTS_RESTRICT ct;
const double *FFTS_RESTRICT hs;
struct ffts_dd2_t FFTS_ALIGN(16) w[32];
struct ffts_dd2_t FFTS_ALIGN(16) h[32];
struct ffts_dd2_t FFTS_ALIGN(16) sum;
int i, log_2, offset;
/* size must be a power of two */
if (!table || !table_size || (table_size & (table_size - 1))) {
return -1;
}
/* the first */
table[0][0] = 1.0;
table[0][1] = -0.0;
if (FFTS_UNLIKELY(table_size == 1)) {
goto exit;
}
if (FFTS_UNLIKELY(table_size == 2)) {
/* skip over */
i = 1;
goto mid_point;
}
/* calculate table offset */
FFTS_ASSUME(table_size/2 > 1);
log_2 = ffts_ctzl(table_size);
FFTS_ASSUME(log_2 > 1);
offset = 32 - log_2;
ct = (const struct ffts_dd2_t*)
FFTS_ASSUME_ALIGNED_32(&cos_sin_pi_table[8 * offset]);
hs = (const double*) &half_secant[4 * offset];
/* initialize from lookup table */
for (i = 0; i <= log_2; i++) {
w[i] = ct[i];
/* duplicate the high and low parts */
h[i].hi = _mm_set1_pd(hs[2*i + 0]);
h[i].lo = _mm_set1_pd(hs[2*i + 1]);
}
/* generate sine and cosine tables with maximum error less than 0.5 ULP */
for (i = 1; i < table_size/2; i++) {
/* calculate trailing zeros in index */
log_2 = ffts_ctzl(i);
/* result of ffts_dd_mul_dd is normalized */
_mm_store_pd((double*) &table[i + 0],
_mm_or_pd(w[log_2].hi, sign_swap));
_mm_store_pd((double*) &table[table_size - i],
_mm_or_pd(_mm_shuffle_pd(w[log_2].hi, w[log_2].hi, 1), sign_swap));
/* skip and find next trailing zero */
offset = (log_2 + 2 + ffts_ctzl(~i >> (log_2 + 2)));
sum = ffts_dd2_add_dd2_unnormalized(&w[log_2 + 1], &w[offset]);
w[log_2] = ffts_dd2_mul_dd2(&h[log_2], &sum);
}
mid_point:
table[i][0] = 0.707106781186547524;
table[i][1] = -0.707106781186547524;
exit:
return 0;
}
#else
int
ffts_generate_cosine_sine_pow2_32f(ffts_cpx_32f *const table, int table_size)
{
const ffts_cpx_64f *FFTS_RESTRICT ct;
const double *FFTS_RESTRICT hs;
ffts_cpx_64f FFTS_ALIGN(16) w[32];
int i, log_2, offset;
/* size must be a power of two */
if (!table || !table_size || (table_size & (table_size - 1))) {
return -1;
}
/* the first */
table[0][0] = 1.0f;
table[0][1] = -0.0f;
if (FFTS_UNLIKELY(table_size == 1)) {
goto exit;
}
if (FFTS_UNLIKELY(table_size == 2)) {
/* skip over */
i = 1;
goto mid_point;
}
/* calculate table offset */
FFTS_ASSUME(table_size/2 > 1);
log_2 = ffts_ctzl(table_size);
FFTS_ASSUME(log_2 > 1);
offset = 32 - log_2;
ct = (const ffts_cpx_64f*)
FFTS_ASSUME_ALIGNED_32(&cos_sin_pi_table[8 * offset]);
hs = (const double*) &half_secant[4 * offset];
/* initialize from lookup table */
for (i = 0; i <= log_2; i++) {
w[i][0] = ct[2*i][0];
w[i][1] = ct[2*i][1];
}
/* generate sine and cosine tables with maximum error less than 0.5 ULP */
for (i = 1; i < table_size/2; i++) {
/* calculate trailing zeros in index */
log_2 = ffts_ctzl(i);
table[i + 0][0] = (float) w[log_2][0];
table[i + 0][1] = (float) -w[log_2][1];
table[table_size - i][0] = (float) w[log_2][1];
table[table_size - i][1] = (float) -w[log_2][0];
/* skip and find next trailing zero */
offset = (log_2 + 2 + ffts_ctzl(~i >> (log_2 + 2)));
w[log_2][0] = hs[2 * log_2] * (w[log_2 + 1][0] + w[offset][0]);
w[log_2][1] = hs[2 * log_2] * (w[log_2 + 1][1] + w[offset][1]);
}
mid_point:
table[i][0] = 0.70710677f;
table[i][1] = -0.70710677f;
exit:
return 0;
}
int
ffts_generate_cosine_sine_pow2_64f(ffts_cpx_64f *const table, int table_size)
{
const struct ffts_dd_t *FFTS_RESTRICT ct;
const struct ffts_dd_t *FFTS_RESTRICT hs;
struct ffts_dd_t FFTS_ALIGN(16) w[32][2];
int i, log_2, offset;
/* size must be a power of two */
if (!table || !table_size || (table_size & (table_size - 1))) {
return -1;
}
/* the first */
table[0][0] = 1.0;
table[0][1] = -0.0;
if (FFTS_UNLIKELY(table_size == 1)) {
goto exit;
}
if (FFTS_UNLIKELY(table_size == 2)) {
/* skip over */
i = 1;
goto mid_point;
}
/* calculate table offset */
FFTS_ASSUME(table_size/2 > 1);
log_2 = ffts_ctzl(table_size);
FFTS_ASSUME(log_2 > 1);
offset = 32 - log_2;
ct = (const struct ffts_dd_t*)
FFTS_ASSUME_ALIGNED_32(&cos_sin_pi_table[8 * offset]);
hs = (const struct ffts_dd_t*) &half_secant[4 * offset];
/* initialize from lookup table */
for (i = 0; i <= log_2; i++) {
w[i][0].hi = ct[2*i + 0].hi;
w[i][0].lo = ct[2*i + 1].hi;
w[i][1].hi = ct[2*i + 0].lo;
w[i][1].lo = ct[2*i + 1].lo;
}
/* generate sine and cosine tables with maximum error less than 0.5 ULP */
for (i = 1; i < table_size/2; i++) {
/* calculate trailing zeros in index */
log_2 = ffts_ctzl(i);
/* result of ffts_dd_mul_dd is normalized */
table[i + 0][0] = w[log_2][0].hi;
table[i + 0][1] = -w[log_2][1].hi;
table[table_size - i][0] = w[log_2][1].hi;
table[table_size - i][1] = -w[log_2][0].hi;
/* skip and find next trailing zero */
offset = (log_2 + 2 + ffts_ctzl(~i >> (log_2 + 2)));
w[log_2][0] = ffts_dd_mul_dd(hs[log_2],
ffts_dd_add_dd_unnormalized(w[log_2 + 1][0], w[offset][0]));
w[log_2][1] = ffts_dd_mul_dd(hs[log_2],
ffts_dd_add_dd_unnormalized(w[log_2 + 1][1], w[offset][1]));
}
mid_point:
table[i][0] = 0.707106781186547524;
table[i][1] = -0.707106781186547524;
exit:
return 0;
}
#endif
int
ffts_generate_table_1d_real_32f(struct _ffts_plan_t *const p,
int sign,
int invert)
{
const ffts_cpx_64f *FFTS_RESTRICT ct;
const double *FFTS_RESTRICT hs;
ffts_cpx_64f FFTS_ALIGN(16) w[32];
int i, log_2, offset, N;
float *A, *B;
if (!p) {
return -1;
}
A = (float*) FFTS_ASSUME_ALIGNED_32(p->A);
B = (float*) FFTS_ASSUME_ALIGNED_32(p->B);
N = (int) p->N;
/* the first */
if (sign < 0) {
A[0] = 0.5f;
A[1] = -0.5f;
B[0] = invert ? -0.5f : 0.5f;
B[1] = 0.5f;
} else {
/* peel of the first */
A[0] = 1.0f;
A[1] = invert ? 1.0f : -1.0f;
B[0] = 1.0f;
B[1] = 1.0f;
}
if (FFTS_UNLIKELY(N == 4)) {
i = 1;
goto last;
}
/* calculate table offset */
FFTS_ASSUME(N / 4 > 1);
log_2 = ffts_ctzl(N);
FFTS_ASSUME(log_2 > 2);
offset = 34 - log_2;
ct = (const ffts_cpx_64f*)
FFTS_ASSUME_ALIGNED_32(&cos_sin_pi_table[8 * offset]);
hs = (const double*) &half_secant[4 * offset];
/* initialize from lookup table */
for (i = 0; i <= log_2; i++) {
w[i][0] = ct[2*i][0];
w[i][1] = ct[2*i][1];
}
/* generate sine and cosine tables with maximum error less than 0.5 ULP */
if (sign < 0) {
for (i = 1; i < N/4; i++) {
float t0, t1, t2;
/* calculate trailing zeros in index */
log_2 = ffts_ctzl(i);
t0 = (float) (0.5 * (1.0 - w[log_2][1]));
t1 = (float) (0.5 * w[log_2][0]);
t2 = (float) (0.5 * (1.0 + w[log_2][1]));
A[ 2 * i + 0] = t0;
A[N - 2 * i + 0] = t0;
A[ 2 * i + 1] = -t1;
A[N - 2 * i + 1] = t1;
B[ 2 * i + 0] = invert ? -t2 : t2;
B[N - 2 * i + 0] = invert ? -t2 : t2;
B[ 2 * i + 1] = t1;
B[N - 2 * i + 1] = -t1;
/* skip and find next trailing zero */
offset = (log_2 + 2 + ffts_ctzl(~i >> (log_2 + 2)));
w[log_2][0] = hs[2 * log_2] * (w[log_2 + 1][0] + w[offset][0]);
w[log_2][1] = hs[2 * log_2] * (w[log_2 + 1][1] + w[offset][1]);
}
} else {
for (i = 1; i < N/4; i++) {
float t0, t1, t2;
/* calculate trailing zeros in index */
log_2 = ffts_ctzl(i);
t0 = (float) (1.0 - w[log_2][1]);
t1 = (float) w[log_2][0];
t2 = (float) (1.0 + w[log_2][1]);
A[ 2 * i + 0] = t0;
A[N - 2 * i + 0] = t0;
A[ 2 * i + 1] = invert ? t1 : -t1;
A[N - 2 * i + 1] = invert ? -t1 : t1;
B[ 2 * i + 0] = t2;
B[N - 2 * i + 0] = t2;
B[ 2 * i + 1] = t1;
B[N - 2 * i + 1] = -t1;
/* skip and find next trailing zero */
offset = (log_2 + 2 + ffts_ctzl(~i >> (log_2 + 2)));
w[log_2][0] = hs[2 * log_2] * (w[log_2 + 1][0] + w[offset][0]);
w[log_2][1] = hs[2 * log_2] * (w[log_2 + 1][1] + w[offset][1]);
}
}
last:
if (sign < 0) {
A[2 * i + 0] = 0.0f;
A[2 * i + 1] = 0.0f;
B[2 * i + 0] = invert ? -1.0f : 1.0f;
B[2 * i + 1] = 0.0f;
} else {
A[2 * i + 0] = 0.0f;
A[2 * i + 1] = 0.0f;
B[2 * i + 0] = 2.0f;
B[2 * i + 1] = 0.0f;
}
return 0;
}

@ -0,0 +1,56 @@
/*
This file is part of FFTS -- The Fastest Fourier Transform in the South
Copyright (c) 2015, Jukka Ojanen <jukka.ojanen@kolumbus.fi>
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of the organization nor the
names of its contributors may be used to endorse or promote products
derived from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
DISCLAIMED. IN NO EVENT SHALL ANTHONY M. BLAKE BE LIABLE FOR ANY
DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef FFTS_TRIG_H
#define FFTS_TRIG_H
#if defined (_MSC_VER) && (_MSC_VER >= 1020)
#pragma once
#endif
#include "ffts_internal.h"
int
ffts_generate_cosine_sine_32f(ffts_cpx_32f *const table, int table_size);
int
ffts_generate_cosine_sine_pow2_32f(ffts_cpx_32f *const table, int table_size);
int
ffts_generate_cosine_sine_pow2_64f(ffts_cpx_64f *const table, int table_size);
int
ffts_generate_table_1d_real_32f(struct _ffts_plan_t *const p,
int sign,
int invert);
#endif /* FFTS_TRIG_H */

@ -1,206 +1,264 @@
/*
This file is part of FFTS -- The Fastest Fourier Transform in the South
Copyright (c) 2013, Michael J. Cree <mcree@orcon.net.nz>
Copyright (c) 2012, 2013, Anthony M. Blake <amb@anthonix.com>
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of the organization nor the
names of its contributors may be used to endorse or promote products
derived from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
DISCLAIMED. IN NO EVENT SHALL ANTHONY M. BLAKE BE LIABLE FOR ANY
DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
This file is part of FFTS -- The Fastest Fourier Transform in the South
Copyright (c) 2013, Michael J. Cree <mcree@orcon.net.nz>
Copyright (c) 2012, 2013, Anthony M. Blake <amb@anthonix.com>
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of the organization nor the
names of its contributors may be used to endorse or promote products
derived from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
DISCLAIMED. IN NO EVENT SHALL ANTHONY M. BLAKE BE LIABLE FOR ANY
DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#ifndef __MACROS_ALPHA_H__
#define __MACROS_ALPHA_H__
*/
#include <math.h>
#ifndef FFTS_MACROS_ALPHA_H
#define FFTS_MACROS_ALPHA_H
#ifdef __alpha__
#define restrict
#if defined (_MSC_VER) && (_MSC_VER >= 1020)
#pragma once
#endif
typedef struct {float r1, i1, r2, i2;} V;
#include "ffts_attributes.h"
#define FFTS_MALLOC(d,a) malloc(d)
#define FFTS_FREE(d) free(d)
#ifdef HAVE_STRING_H
#include <string.h>
#endif
#define VLIT4(f3,f2,f1,f0) ((V){f0,f1,f2,f3})
#ifdef HAVE_STDLIB_H
#include <stdlib.h>
#endif
static inline V VADD(V x, V y)
typedef union {
struct {
float r1;
float i1;
float r2;
float i2;
} r;
uint32_t u[4];
} V4SF;
#define FFTS_MALLOC(d,a) (malloc(d))
#define FFTS_FREE(d) (free(d))
static FFTS_ALWAYS_INLINE V4SF
V4SF_LIT4(float f3, float f2, float f1, float f0)
{
V z;
z.r1 = x.r1 + y.r1;
z.i1 = x.i1 + y.i1;
z.r2 = x.r2 + y.r2;
z.i2 = x.i2 + y.i2;
V4SF z;
z.r.r1 = f0;
z.r.i1 = f1;
z.r.r2 = f2;
z.r.i2 = f3;
return z;
}
static inline V VSUB(V x, V y)
static FFTS_ALWAYS_INLINE V4SF
V4SF_ADD(V4SF x, V4SF y)
{
V z;
z.r1 = x.r1 - y.r1;
z.i1 = x.i1 - y.i1;
z.r2 = x.r2 - y.r2;
z.i2 = x.i2 - y.i2;
V4SF z;
z.r.r1 = x.r.r1 + y.r.r1;
z.r.i1 = x.r.i1 + y.r.i1;
z.r.r2 = x.r.r2 + y.r.r2;
z.r.i2 = x.r.i2 + y.r.i2;
return z;
}
static inline V VMUL(V x, V y)
static FFTS_ALWAYS_INLINE V4SF
V4SF_SUB(V4SF x, V4SF y)
{
V z;
z.r1 = x.r1 * y.r1;
z.i1 = x.i1 * y.i1;
z.r2 = x.r2 * y.r2;
z.i2 = x.i2 * y.i2;
V4SF z;
z.r.r1 = x.r.r1 - y.r.r1;
z.r.i1 = x.r.i1 - y.r.i1;
z.r.r2 = x.r.r2 - y.r.r2;
z.r.i2 = x.r.i2 - y.r.i2;
return z;
}
static inline V VXOR(V x, V y)
static FFTS_ALWAYS_INLINE V4SF
V4SF_MUL(V4SF x, V4SF y)
{
V r;
r.r1 = (uint32_t)x.r1 ^ (uint32_t)y.r1;
r.i1 = (uint32_t)x.i1 ^ (uint32_t)y.i1;
r.r2 = (uint32_t)x.r2 ^ (uint32_t)y.r2;
r.i2 = (uint32_t)x.i2 ^ (uint32_t)y.i2;
return r;
V4SF z;
z.r.r1 = x.r.r1 * y.r.r1;
z.r.i1 = x.r.i1 * y.r.i1;
z.r.r2 = x.r.r2 * y.r.r2;
z.r.i2 = x.r.i2 * y.r.i2;
return z;
}
static inline V VSWAPPAIRS(V x)
static FFTS_ALWAYS_INLINE V4SF
V4SF_XOR(V4SF x, V4SF y)
{
V z;
z.r1 = x.i1;
z.i1 = x.r1;
z.r2 = x.i2;
z.i2 = x.r2;
V4SF z;
z.u[0] = x.u[0] ^ y.u[0];
z.u[1] = x.u[1] ^ y.u[1];
z.u[2] = x.u[2] ^ y.u[2];
z.u[3] = x.u[3] ^ y.u[3];
return z;
}
static inline V VBLEND(V x, V y)
static FFTS_ALWAYS_INLINE V4SF
V4SF_SWAP_PAIRS(V4SF x)
{
V z;
z.r1 = x.r1;
z.i1 = x.i1;
z.r2 = y.r2;
z.i2 = y.i2;
V4SF z;
z.r.r1 = x.r.i1;
z.r.i1 = x.r.r1;
z.r.r2 = x.r.i2;
z.r.i2 = x.r.r2;
return z;
}
static inline V VUNPACKHI(V x, V y)
static FFTS_ALWAYS_INLINE V4SF
V4SF_BLEND(V4SF x, V4SF y)
{
V z;
z.r1 = x.r2;
z.i1 = x.i2;
z.r2 = y.r2;
z.i2 = y.i2;
V4SF z;
z.r.r1 = x.r.r1;
z.r.i1 = x.r.i1;
z.r.r2 = y.r.r2;
z.r.i2 = y.r.i2;
return z;
}
static inline V VUNPACKLO(V x, V y)
static FFTS_ALWAYS_INLINE V4SF
V4SF_UNPACK_HI(V4SF x, V4SF y)
{
V z;
z.r1 = x.r1;
z.i1 = x.i1;
z.r2 = y.r1;
z.i2 = y.i1;
V4SF z;
z.r.r1 = x.r.r2;
z.r.i1 = x.r.i2;
z.r.r2 = y.r.r2;
z.r.i2 = y.r.i2;
return z;
}
static inline V VDUPRE(V x)
static FFTS_ALWAYS_INLINE V4SF
V4SF_UNPACK_LO(V4SF x, V4SF y)
{
V z;
z.r1 = x.r1;
z.i1 = x.r1;
z.r2 = x.r2;
z.i2 = x.r2;
V4SF z;
z.r.r1 = x.r.r1;
z.r.i1 = x.r.i1;
z.r.r2 = y.r.r1;
z.r.i2 = y.r.i1;
return z;
}
static inline V VDUPIM(V x)
static FFTS_ALWAYS_INLINE V4SF
V4SF_DUPLICATE_RE(V4SF x)
{
V z;
z.r1 = x.i1;
z.i1 = x.i1;
z.r2 = x.i2;
z.i2 = x.i2;
V4SF z;
z.r.r1 = x.r.r1;
z.r.i1 = x.r.r1;
z.r.r2 = x.r.r2;
z.r.i2 = x.r.r2;
return z;
}
static inline V IMUL(V d, V re, V im)
static FFTS_ALWAYS_INLINE V4SF
V4SF_DUPLICATE_IM(V4SF x)
{
re = VMUL(re, d);
im = VMUL(im, VSWAPPAIRS(d));
return VSUB(re, im);
V4SF z;
z.r.r1 = x.r.i1;
z.r.i1 = x.r.i1;
z.r.r2 = x.r.i2;
z.r.i2 = x.r.i2;
return z;
}
static FFTS_ALWAYS_INLINE V4SF
V4SF_IMUL(V4SF d, V4SF re, V4SF im)
{
re = V4SF_MUL(re, d);
im = V4SF_MUL(im, V4SF_SWAP_PAIRS(d));
return V4SF_SUB(re, im);
}
static inline V IMULJ(V d, V re, V im)
static FFTS_ALWAYS_INLINE V4SF
V4SF_IMULJ(V4SF d, V4SF re, V4SF im)
{
re = VMUL(re, d);
im = VMUL(im, VSWAPPAIRS(d));
return VADD(re, im);
re = V4SF_MUL(re, d);
im = V4SF_MUL(im, V4SF_SWAP_PAIRS(d));
return V4SF_ADD(re, im);
}
static inline V MULI(int inv, V x)
static FFTS_ALWAYS_INLINE V4SF
V4SF_MULI(int inv, V4SF x)
{
V z;
V4SF z;
if (inv) {
z.r1 = -x.r1;
z.i1 = x.i1;
z.r2 = -x.r2;
z.i2 = x.i2;
}else{
z.r1 = x.r1;
z.i1 = -x.i1;
z.r2 = x.r2;
z.i2 = -x.i2;
z.r.r1 = -x.r.r1;
z.r.i1 = x.r.i1;
z.r.r2 = -x.r.r2;
z.r.i2 = x.r.i2;
} else {
z.r.r1 = x.r.r1;
z.r.i1 = -x.r.i1;
z.r.r2 = x.r.r2;
z.r.i2 = -x.r.i2;
}
return z;
}
static inline V IMULI(int inv, V x)
static FFTS_ALWAYS_INLINE V4SF
V4SF_IMULI(int inv, V4SF x)
{
return VSWAPPAIRS(MULI(inv, x));
return V4SF_SWAP_PAIRS(V4SF_MULI(inv, x));
}
static inline V VLD(const void *s)
static FFTS_ALWAYS_INLINE V4SF
V4SF_LD(const void *s)
{
V *d = (V *)s;
return *d;
V4SF z;
memcpy(&z, s, sizeof(z));
return z;
}
static inline void VST(void *d, V s)
static FFTS_ALWAYS_INLINE void
V4SF_ST(void *d, V4SF s)
{
V *r = (V *)d;
V4SF *r = (V4SF*) d;
*r = s;
}
#endif
#endif /* FFTS_MACROS_ALPHA_H */

@ -135,3 +135,4 @@ static inline void VST(void *d, V s)
*r = s;
}
#endif
// vim: set autoindent noexpandtab tabstop=3 shiftwidth=3:

@ -1,96 +1,119 @@
/*
This file is part of FFTS -- The Fastest Fourier Transform in the South
Copyright (c) 2012, 2013, Anthony M. Blake <amb@anthonix.com>
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of the organization nor the
names of its contributors may be used to endorse or promote products
derived from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
DISCLAIMED. IN NO EVENT SHALL ANTHONY M. BLAKE BE LIABLE FOR ANY
DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
This file is part of FFTS -- The Fastest Fourier Transform in the South
Copyright (c) 2012, 2013, Anthony M. Blake <amb@anthonix.com>
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of the organization nor the
names of its contributors may be used to endorse or promote products
derived from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
DISCLAIMED. IN NO EVENT SHALL ANTHONY M. BLAKE BE LIABLE FOR ANY
DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef __MACROS_NEON_H__
#define __MACROS_NEON_H__
#include "neon.h"
#ifndef FFTS_MACROS_NEON_H
#define FFTS_MACROS_NEON_H
#include <arm_neon.h>
typedef float32x4_t V;
#ifdef HAVE_STDLIB_H
#include <stdlib.h>
#endif
typedef float32x4x2_t VS;
#define FFTS_MALLOC(d,a) (valloc(d))
#define FFTS_FREE(d) (free(d))
#define ADD vaddq_f32
#define SUB vsubq_f32
#define MUL vmulq_f32
#define VADD vaddq_f32
#define VSUB vsubq_f32
#define VMUL vmulq_f32
#define VXOR(x,y) (vreinterpretq_f32_u32(veorq_u32(vreinterpretq_u32_f32(x), vreinterpretq_u32_f32(y))))
#define VST vst1q_f32
#define VLD vld1q_f32
#define VST2 vst2q_f32
#define VLD2 vld2q_f32
typedef float32x4_t V4SF;
typedef float32x4x2_t V4SF2;
#define VSWAPPAIRS(x) (vrev64q_f32(x))
#define V4SF_ADD vaddq_f32
#define V4SF_SUB vsubq_f32
#define V4SF_MUL vmulq_f32
#define VUNPACKHI(a,b) (vcombine_f32(vget_high_f32(a), vget_high_f32(b)))
#define VUNPACKLO(a,b) (vcombine_f32(vget_low_f32(a), vget_low_f32(b)))
#define V4SF_XOR(x,y) \
(vreinterpretq_f32_u32(veorq_u32(vreinterpretq_u32_f32(x), vreinterpretq_u32_f32(y))))
#define VBLEND(x,y) (vcombine_f32(vget_low_f32(x), vget_high_f32(y)))
#define V4SF_ST vst1q_f32
#define V4SF_LD vld1q_f32
__INLINE V VLIT4(data_t f3, data_t f2, data_t f1, data_t f0) {
data_t __attribute__ ((aligned(16))) d[4] = {f0, f1, f2, f3};
return VLD(d);
}
#define V4SF_SWAP_PAIRS(x) \
(vrev64q_f32(x))
#define VDUPRE(r) vcombine_f32(vdup_lane_f32(vget_low_f32(r),0), vdup_lane_f32(vget_high_f32(r),0))
#define VDUPIM(r) vcombine_f32(vdup_lane_f32(vget_low_f32(r),1), vdup_lane_f32(vget_high_f32(r),1))
#define V4SF_UNPACK_HI(a,b) \
(vcombine_f32(vget_high_f32(a), vget_high_f32(b)))
#define FFTS_MALLOC(d,a) (valloc(d))
#define FFTS_FREE(d) (free(d))
#define V4SF_UNPACK_LO(a,b) \
(vcombine_f32(vget_low_f32(a), vget_low_f32(b)))
__INLINE void STORESPR(data_t * addr, VS p) {
#define V4SF_BLEND(x,y) \
(vcombine_f32(vget_low_f32(x), vget_high_f32(y)))
vst1q_f32(addr, p.val[0]);
vst1q_f32(addr + 4, p.val[1]);
static FFTS_ALWAYS_INLINE V4SF
V4SF_LIT4(float f3, float f2, float f1, float f0)
{
float FFTS_ALIGN(16) d[4] = {f0, f1, f2, f3};
return V4SF_LD(d);
}
__INLINE V IMULI(int inv, V a) {
if(inv) return VSWAPPAIRS(VXOR(a, VLIT4(0.0f, -0.0f, 0.0f, -0.0f)));
else return VSWAPPAIRS(VXOR(a, VLIT4(-0.0f, 0.0f, -0.0f, 0.0f)));
#define V4SF_DUPLICATE_RE(r) \
vcombine_f32(vdup_lane_f32(vget_low_f32(r),0), vdup_lane_f32(vget_high_f32(r),0))
#define V4SF_DUPLICATE_IM(r) \
vcombine_f32(vdup_lane_f32(vget_low_f32(r),1), vdup_lane_f32(vget_high_f32(r),1))
static FFTS_ALWAYS_INLINE V4SF
V4SF_IMULI(int inv, V4SF a)
{
if (inv) {
return V4SF_SWAP_PAIRS(V4SF_XOR(a, V4SF_LIT4(0.0f, -0.0f, 0.0f, -0.0f)));
} else {
return V4SF_SWAP_PAIRS(V4SF_XOR(a, V4SF_LIT4(-0.0f, 0.0f, -0.0f, 0.0f)));
}
}
__INLINE V IMUL(V d, V re, V im) {
re = VMUL(re, d);
im = VMUL(im, VSWAPPAIRS(d));
return VSUB(re, im);
static FFTS_ALWAYS_INLINE V4SF
V4SF_IMUL(V4SF d, V4SF re, V4SF im)
{
re = V4SF_MUL(re, d);
im = V4SF_MUL(im, V4SF_SWAP_PAIRS(d));
return V4SF_SUB(re, im);
}
__INLINE V IMULJ(V d, V re, V im) {
re = VMUL(re, d);
im = VMUL(im, VSWAPPAIRS(d));
return VADD(re, im);
static FFTS_ALWAYS_INLINE V4SF
V4SF_IMULJ(V4SF d, V4SF re, V4SF im)
{
re = V4SF_MUL(re, d);
im = V4SF_MUL(im, V4SF_SWAP_PAIRS(d));
return V4SF_ADD(re, im);
}
#endif
#define V4SF2_ST vst2q_f32
#define V4SF2_LD vld2q_f32
static FFTS_ALWAYS_INLINE void
V4SF2_STORE_SPR(float *addr, V4SF2 p)
{
vst1q_f32(addr, p.val[0]);
vst1q_f32(addr + 4, p.val[1]);
}
#endif /* FFTS_MACROS_NEON_H */

@ -1,84 +1,100 @@
/*
This file is part of FFTS -- The Fastest Fourier Transform in the South
Copyright (c) 2012, Anthony M. Blake <amb@anthonix.com>
Copyright (c) 2012, The University of Waikato
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of the organization nor the
names of its contributors may be used to endorse or promote products
derived from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
DISCLAIMED. IN NO EVENT SHALL ANTHONY M. BLAKE BE LIABLE FOR ANY
DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
This file is part of FFTS -- The Fastest Fourier Transform in the South
Copyright (c) 2012, Anthony M. Blake <amb@anthonix.com>
Copyright (c) 2012, The University of Waikato
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of the organization nor the
names of its contributors may be used to endorse or promote products
derived from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
DISCLAIMED. IN NO EVENT SHALL ANTHONY M. BLAKE BE LIABLE FOR ANY
DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef __SSE_FLOAT_H__
#define __SSE_FLOAT_H__
#ifndef FFTS_MACROS_SSE_H
#define FFTS_MACROS_SSE_H
#if defined (_MSC_VER) && (_MSC_VER >= 1020)
#pragma once
#endif
#include <xmmintrin.h>
//#define VL 4
#define FFTS_MALLOC(d,a) (_mm_malloc(d,a))
#define FFTS_FREE(d) (_mm_free(d))
typedef __m128 V;
typedef __m128 V4SF;
#define VADD _mm_add_ps
#define VSUB _mm_sub_ps
#define VMUL _mm_mul_ps
//#define VLIT4 _mm_set_ps
#define VXOR _mm_xor_ps
#define VST _mm_store_ps
#define VLD _mm_load_ps
#define V4SF_ADD _mm_add_ps
#define V4SF_SUB _mm_sub_ps
#define V4SF_MUL _mm_mul_ps
#define V4SF_LIT4 _mm_set_ps
#define V4SF_XOR _mm_xor_ps
#define V4SF_ST _mm_store_ps
#define V4SF_LD _mm_load_ps
#define VSWAPPAIRS(x) (_mm_shuffle_ps(x,x,_MM_SHUFFLE(2,3,0,1)))
#define V4SF_SWAP_PAIRS(x) \
(_mm_shuffle_ps(x, x, _MM_SHUFFLE(2,3,0,1)))
#define VUNPACKHI(x,y) (_mm_shuffle_ps(x,y,_MM_SHUFFLE(3,2,3,2)))
#define VUNPACKLO(x,y) (_mm_shuffle_ps(x,y,_MM_SHUFFLE(1,0,1,0)))
#define V4SF_UNPACK_HI(x,y) \
(_mm_shuffle_ps(x, y, _MM_SHUFFLE(3,2,3,2)))
#define VBLEND(x,y) (_mm_shuffle_ps(x,y,_MM_SHUFFLE(3,2,1,0)))
#define V4SF_UNPACK_LO(x,y) \
(_mm_movelh_ps(x, y))
#define VLIT4 _mm_set_ps
#define V4SF_BLEND(x, y) \
(_mm_shuffle_ps(x, y, _MM_SHUFFLE(3,2,1,0)))
#define VDUPRE(r) (_mm_shuffle_ps(r,r,_MM_SHUFFLE(2,2,0,0)))
#define VDUPIM(r) (_mm_shuffle_ps(r,r,_MM_SHUFFLE(3,3,1,1)))
#define V4SF_DUPLICATE_RE(r) \
(_mm_shuffle_ps(r, r, _MM_SHUFFLE(2,2,0,0)))
#define FFTS_MALLOC(d,a) (_mm_malloc(d,a))
#define FFTS_FREE(d) (_mm_free(d))
#define V4SF_DUPLICATE_IM(r) \
(_mm_shuffle_ps(r, r, _MM_SHUFFLE(3,3,1,1)))
__INLINE V IMULI(int inv, V a) {
if(inv) return VSWAPPAIRS(VXOR(a, VLIT4(0.0f, -0.0f, 0.0f, -0.0f)));
else return VSWAPPAIRS(VXOR(a, VLIT4(-0.0f, 0.0f, -0.0f, 0.0f)));
static FFTS_ALWAYS_INLINE V4SF
V4SF_IMULI(int inv, V4SF a)
{
if (inv) {
return V4SF_SWAP_PAIRS(V4SF_XOR(a, V4SF_LIT4(0.0f, -0.0f, 0.0f, -0.0f)));
} else {
return V4SF_SWAP_PAIRS(V4SF_XOR(a, V4SF_LIT4(-0.0f, 0.0f, -0.0f, 0.0f)));
}
}
__INLINE V IMUL(V d, V re, V im) {
re = VMUL(re, d);
im = VMUL(im, VSWAPPAIRS(d));
return VSUB(re, im);
static FFTS_ALWAYS_INLINE V4SF
V4SF_IMUL(V4SF d, V4SF re, V4SF im)
{
re = V4SF_MUL(re, d);
im = V4SF_MUL(im, V4SF_SWAP_PAIRS(d));
return V4SF_SUB(re, im);
}
__INLINE V IMULJ(V d, V re, V im) {
re = VMUL(re, d);
im = VMUL(im, VSWAPPAIRS(d));
return VADD(re, im);
static FFTS_ALWAYS_INLINE V4SF
V4SF_IMULJ(V4SF d, V4SF re, V4SF im)
{
re = V4SF_MUL(re, d);
im = V4SF_MUL(im, V4SF_SWAP_PAIRS(d));
return V4SF_ADD(re, im);
}
#endif
#endif /* FFTS_MACROS_SSE_H */

@ -1,161 +1,204 @@
/*
This file is part of FFTS -- The Fastest Fourier Transform in the South
Copyright (c) 2013, Michael J. Cree <mcree@orcon.net.nz>
Copyright (c) 2012, 2013, Anthony M. Blake <amb@anthonix.com>
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of the organization nor the
names of its contributors may be used to endorse or promote products
derived from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
DISCLAIMED. IN NO EVENT SHALL ANTHONY M. BLAKE BE LIABLE FOR ANY
DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
This file is part of FFTS -- The Fastest Fourier Transform in the South
Copyright (c) 2013, Michael J. Cree <mcree@orcon.net.nz>
Copyright (c) 2012, 2013, Anthony M. Blake <amb@anthonix.com>
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of the organization nor the
names of its contributors may be used to endorse or promote products
derived from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
DISCLAIMED. IN NO EVENT SHALL ANTHONY M. BLAKE BE LIABLE FOR ANY
DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef __MACROS_H__
#define __MACROS_H__
#ifndef FFTS_MACROS_H
#define FFTS_MACROS_H
#if defined (_MSC_VER) && (_MSC_VER >= 1020)
#pragma once
#endif
#ifdef HAVE_NEON
#include "macros-neon.h"
#elif HAVE_SSE
#include "macros-sse.h"
#elif __powerpc__
#include "macros-altivec.h"
#else
#ifdef __alpha__
#include "macros-alpha.h"
#else
#ifdef __powerpc__
#include "macros-altivec.h"
#endif
#endif
#endif
static FFTS_INLINE void
V4SF_TX2(V4SF *a, V4SF *b)
{
V4SF t0 = V4SF_UNPACK_LO(*a, *b);
V4SF t1 = V4SF_UNPACK_HI(*a, *b);
*a = t0;
*b = t1;
}
static FFTS_INLINE void
V4SF_K_N(int inv,
V4SF re,
V4SF im,
V4SF *r0,
V4SF *r1,
V4SF *r2,
V4SF *r3)
{
V4SF uk, uk2, zk_p, zk_n, zk, zk_d;
#ifdef HAVE_VFP
#include "macros-alpha.h"
#endif
#ifdef HAVE_SSE
#include "macros-sse.h"
#endif
uk = *r0;
uk2 = *r1;
static inline void TX2(V *a, V *b)
{
V TX2_t0 = VUNPACKLO(*a, *b);
V TX2_t1 = VUNPACKHI(*a, *b);
*a = TX2_t0; *b = TX2_t1;
zk_p = V4SF_IMUL(*r2, re, im);
zk_n = V4SF_IMULJ(*r3, re, im);
zk = V4SF_ADD(zk_p, zk_n);
zk_d = V4SF_IMULI(inv, V4SF_SUB(zk_p, zk_n));
*r2 = V4SF_SUB(uk, zk);
*r0 = V4SF_ADD(uk, zk);
*r3 = V4SF_ADD(uk2, zk_d);
*r1 = V4SF_SUB(uk2, zk_d);
}
static inline void K_N(int inv, V re, V im, V *r0, V *r1, V *r2, V *r3)
static FFTS_INLINE void
V4SF_L_2_4(int inv,
const float *FFTS_RESTRICT i0,
const float *FFTS_RESTRICT i1,
const float *FFTS_RESTRICT i2,
const float *FFTS_RESTRICT i3,
V4SF *r0,
V4SF *r1,
V4SF *r2,
V4SF *r3)
{
V uk, uk2, zk_p, zk_n, zk, zk_d;
uk = *r0; uk2 = *r1;
zk_p = IMUL(*r2, re, im);
zk_n = IMULJ(*r3, re, im);
zk = VADD(zk_p, zk_n);
zk_d = IMULI(inv, VSUB(zk_p, zk_n));
*r2 = VSUB(uk, zk);
*r0 = VADD(uk, zk);
*r3 = VADD(uk2, zk_d);
*r1 = VSUB(uk2, zk_d);
}
V4SF t0, t1, t2, t3, t4, t5, t6, t7;
t0 = V4SF_LD(i0);
t1 = V4SF_LD(i1);
t2 = V4SF_LD(i2);
t3 = V4SF_LD(i3);
static inline void S_4(V r0, V r1, V r2, V r3,
data_t * restrict o0, data_t * restrict o1,
data_t * restrict o2, data_t * restrict o3)
{
VST(o0, r0); VST(o1, r1); VST(o2, r2); VST(o3, r3);
}
t4 = V4SF_ADD(t0, t1);
t5 = V4SF_SUB(t0, t1);
t6 = V4SF_ADD(t2, t3);
t7 = V4SF_SUB(t2, t3);
*r0 = V4SF_UNPACK_LO(t4, t5);
*r1 = V4SF_UNPACK_LO(t6, t7);
static inline void L_2_4(int inv,
const data_t * restrict i0, const data_t * restrict i1,
const data_t * restrict i2, const data_t * restrict i3,
V *r0, V *r1, V *r2, V *r3)
{
V t0, t1, t2, t3, t4, t5, t6, t7;
t0 = VLD(i0); t1 = VLD(i1); t2 = VLD(i2); t3 = VLD(i3);
t4 = VADD(t0, t1);
t5 = VSUB(t0, t1);
t6 = VADD(t2, t3);
t7 = VSUB(t2, t3);
*r0 = VUNPACKLO(t4, t5);
*r1 = VUNPACKLO(t6, t7);
t5 = IMULI(inv, t5);
t0 = VADD(t6, t4);
t2 = VSUB(t6, t4);
t1 = VSUB(t7, t5);
t3 = VADD(t7, t5);
*r3 = VUNPACKHI(t0, t1);
*r2 = VUNPACKHI(t2, t3);
}
t5 = V4SF_IMULI(inv, t5);
t0 = V4SF_ADD(t6, t4);
t2 = V4SF_SUB(t6, t4);
t1 = V4SF_SUB(t7, t5);
t3 = V4SF_ADD(t7, t5);
static inline void L_4_4(int inv,
const data_t * restrict i0, const data_t * restrict i1,
const data_t * restrict i2, const data_t * restrict i3,
V *r0, V *r1, V *r2, V *r3)
{
V t0, t1, t2, t3, t4, t5, t6, t7;
t0 = VLD(i0); t1 = VLD(i1); t2 = VLD(i2); t3 = VLD(i3);
t4 = VADD(t0, t1);
t5 = VSUB(t0, t1);
t6 = VADD(t2, t3);
t7 = IMULI(inv, VSUB(t2, t3));
t0 = VADD(t4, t6);
t2 = VSUB(t4, t6);
t1 = VSUB(t5, t7);
t3 = VADD(t5, t7);
TX2(&t0, &t1);
TX2(&t2, &t3);
*r0 = t0; *r2 = t1; *r1 = t2; *r3 = t3;
*r3 = V4SF_UNPACK_HI(t0, t1);
*r2 = V4SF_UNPACK_HI(t2, t3);
}
static FFTS_INLINE void
V4SF_L_4_4(int inv,
const float *FFTS_RESTRICT i0,
const float *FFTS_RESTRICT i1,
const float *FFTS_RESTRICT i2,
const float *FFTS_RESTRICT i3,
V4SF *r0,
V4SF *r1,
V4SF *r2,
V4SF *r3)
{
V4SF t0, t1, t2, t3, t4, t5, t6, t7;
t0 = V4SF_LD(i0);
t1 = V4SF_LD(i1);
t2 = V4SF_LD(i2);
t3 = V4SF_LD(i3);
t4 = V4SF_ADD(t0, t1);
t5 = V4SF_SUB(t0, t1);
t6 = V4SF_ADD(t2, t3);
t7 = V4SF_IMULI(inv, V4SF_SUB(t2, t3));
t0 = V4SF_ADD(t4, t6);
t2 = V4SF_SUB(t4, t6);
t1 = V4SF_SUB(t5, t7);
t3 = V4SF_ADD(t5, t7);
V4SF_TX2(&t0, &t1);
V4SF_TX2(&t2, &t3);
*r0 = t0;
*r2 = t1;
*r1 = t2;
*r3 = t3;
}
static inline void L_4_2(int inv,
const data_t * restrict i0, const data_t * restrict i1,
const data_t * restrict i2, const data_t * restrict i3,
V *r0, V *r1, V *r2, V *r3)
static FFTS_INLINE void
V4SF_L_4_2(int inv,
const float *FFTS_RESTRICT i0,
const float *FFTS_RESTRICT i1,
const float *FFTS_RESTRICT i2,
const float *FFTS_RESTRICT i3,
V4SF *r0,
V4SF *r1,
V4SF *r2,
V4SF *r3)
{
V t0, t1, t2, t3, t4, t5, t6, t7;
t0 = VLD(i0); t1 = VLD(i1); t6 = VLD(i2); t7 = VLD(i3);
t2 = VBLEND(t6, t7);
t3 = VBLEND(t7, t6);
t4 = VADD(t0, t1);
t5 = VSUB(t0, t1);
t6 = VADD(t2, t3);
t7 = VSUB(t2, t3);
*r2 = VUNPACKHI(t4, t5);
*r3 = VUNPACKHI(t6, t7);
t7 = IMULI(inv, t7);
t0 = VADD(t4, t6);
t2 = VSUB(t4, t6);
t1 = VSUB(t5, t7);
t3 = VADD(t5, t7);
*r0 = VUNPACKLO(t0, t1);
*r1 = VUNPACKLO(t2, t3);
V4SF t0, t1, t2, t3, t4, t5, t6, t7;
t0 = V4SF_LD(i0);
t1 = V4SF_LD(i1);
t6 = V4SF_LD(i2);
t7 = V4SF_LD(i3);
t2 = V4SF_BLEND(t6, t7);
t3 = V4SF_BLEND(t7, t6);
t4 = V4SF_ADD(t0, t1);
t5 = V4SF_SUB(t0, t1);
t6 = V4SF_ADD(t2, t3);
t7 = V4SF_SUB(t2, t3);
*r2 = V4SF_UNPACK_HI(t4, t5);
*r3 = V4SF_UNPACK_HI(t6, t7);
t7 = V4SF_IMULI(inv, t7);
t0 = V4SF_ADD(t4, t6);
t2 = V4SF_SUB(t4, t6);
t1 = V4SF_SUB(t5, t7);
t3 = V4SF_ADD(t5, t7);
*r0 = V4SF_UNPACK_LO(t0, t1);
*r1 = V4SF_UNPACK_LO(t2, t3);
}
#endif
#define V4SF_S_4(r0, r1, r2, r3, o0, o1, o2, o3) \
V4SF_ST(o0, r0); V4SF_ST(o1, r1); V4SF_ST(o2, r2); V4SF_ST(o3, r3);
#endif /* FFTS_MACROS_H */

@ -1,38 +1,38 @@
/*
This file is part of FFTS -- The Fastest Fourier Transform in the South
Copyright (c) 2012, Anthony M. Blake <amb@anthonix.com>
Copyright (c) 2012, The University of Waikato
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of the organization nor the
names of its contributors may be used to endorse or promote products
derived from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
DISCLAIMED. IN NO EVENT SHALL ANTHONY M. BLAKE BE LIABLE FOR ANY
DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
This file is part of FFTS -- The Fastest Fourier Transform in the South
Copyright (c) 2012, Anthony M. Blake <amb@anthonix.com>
Copyright (c) 2012, The University of Waikato
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of the organization nor the
names of its contributors may be used to endorse or promote products
derived from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
DISCLAIMED. IN NO EVENT SHALL ANTHONY M. BLAKE BE LIABLE FOR ANY
DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef __NEON_H__
#define __NEON_H__
#ifndef FFTS_NEON_H
#define FFTS_NEON_H
#include "ffts.h"
@ -45,21 +45,19 @@ void neon_eo();
void neon_oe();
void neon_end();
void neon_transpose(uint64_t *in, uint64_t *out, int w, int h);
void neon_transpose_to_buf(uint64_t *in, uint64_t *out, int w);
//typedef struct _ffts_plan_t ffts_plan_t;
void neon_transpose4(uint64_t *in, uint64_t *out, int w, int h);
void neon_transpose8(uint64_t *in, uint64_t *out, int w, int h);
void neon_static_e_f(ffts_plan_t * , const void * , void * );
void neon_static_o_f(ffts_plan_t * , const void * , void * );
void neon_static_x4_f(float *, size_t, float *);
void neon_static_x8_f(float *, size_t, float *);
void neon_static_x8_t_f(float *, size_t, float *);
void neon_static_e_f(ffts_plan_t*, const void*, void*);
void neon_static_o_f(ffts_plan_t*, const void*, void*);
void neon_static_x4_f(float*, const float*);
void neon_static_x8_f(float*, size_t, const float*);
void neon_static_x8_t_f(float*, size_t, const float*);
void neon_static_e_i(ffts_plan_t * , const void * , void * );
void neon_static_o_i(ffts_plan_t * , const void * , void * );
void neon_static_x4_i(float *, size_t, float *);
void neon_static_x8_i(float *, size_t, float *);
void neon_static_x8_t_i(float *, size_t, float *);
void neon_static_e_i(ffts_plan_t*, const void*, void*);
void neon_static_o_i(ffts_plan_t*, const void*, void*);
void neon_static_x4_i(float*, const float*);
void neon_static_x8_i(float*, size_t, const float*);
void neon_static_x8_t_i(float*, size_t, const float*);
#endif
#endif /* FFTS_NEON_H */

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

@ -1,956 +0,0 @@
/*
This file is part of FFTS -- The Fastest Fourier Transform in the South
Copyright (c) 2012, Anthony M. Blake <amb@anthonix.com>
Copyright (c) 2012, The University of Waikato
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of the organization nor the
names of its contributors may be used to endorse or promote products
derived from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
DISCLAIMED. IN NO EVENT SHALL ANTHONY M. BLAKE BE LIABLE FOR ANY
DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
.align 4
#ifdef __APPLE__
.globl _neon_static_e_f
_neon_static_e_f:
#else
.globl neon_static_e_f
neon_static_e_f:
#endif
push {r4, r5, r6, r7, r8, r9, r10, r11, lr}
vstmdb sp!, {d8-d15}
ldr lr, [r0, #40] @ this is p->N
add r3, r1, #0
add r7, r1, lr
add r5, r7, lr
add r10, r5, lr
add r4, r10, lr
add r8, r4, lr
add r6, r8, lr
add r9, r6, lr
ldr r12, [r0]
add r1, r0, #0
add r0, r2, #0
ldr r2, [r1, #16] @ this is p->ee_ws
ldr r11, [r1, #28] @ this is p->i0
vld1.32 {d16, d17}, [r2, :128]
_neon_ee_loop:
vld2.32 {q15}, [r10, :128]!
vld2.32 {q13}, [r8, :128]!
vld2.32 {q14}, [r7, :128]!
vld2.32 {q9}, [r4, :128]!
vld2.32 {q10}, [r3, :128]!
vld2.32 {q11}, [r6, :128]!
vld2.32 {q12}, [r5, :128]!
vsub.f32 q1, q14, q13
vld2.32 {q0}, [r9, :128]!
subs r11, r11, #1
vsub.f32 q2, q0, q15
vadd.f32 q0, q0, q15
vmul.f32 d10, d2, d17
vmul.f32 d11, d3, d16
vmul.f32 d12, d3, d17
vmul.f32 d6, d4, d17
vmul.f32 d7, d5, d16
vmul.f32 d8, d4, d16
vmul.f32 d9, d5, d17
vmul.f32 d13, d2, d16
vsub.f32 d7, d7, d6
vadd.f32 d11, d11, d10
vsub.f32 q1, q12, q11
vsub.f32 q2, q10, q9
vadd.f32 d6, d9, d8
vadd.f32 q4, q14, q13
vadd.f32 q11, q12, q11
vadd.f32 q12, q10, q9
vsub.f32 d10, d13, d12
vsub.f32 q7, q4, q0
vsub.f32 q9, q12, q11
vsub.f32 q13, q5, q3
vsub.f32 d29, d5, d2 @
vadd.f32 q5, q5, q3
vadd.f32 q10, q4, q0
vadd.f32 q11, q12, q11
vadd.f32 d31, d5, d2 @
vadd.f32 d28, d4, d3 @
vsub.f32 d30, d4, d3 @
vsub.f32 d5, d19, d14 @
vsub.f32 d7, d31, d26 @
vadd.f32 q1, q14, q5
vadd.f32 q0, q11, q10
vadd.f32 d6, d30, d27 @
vadd.f32 d4, d18, d15 @
vadd.f32 d13, d19, d14 @
vsub.f32 d12, d18, d15 @
vadd.f32 d15, d31, d26 @
ldr r2, [r12], #4
vtrn.32 q1, q3
ldr lr, [r12], #4
vtrn.32 q0, q2
add r2, r0, r2, lsl #2
vsub.f32 q4, q11, q10
add lr, r0, lr, lsl #2
vsub.f32 q5, q14, q5
vsub.f32 d14, d30, d27 @
vst2.32 {q0,q1}, [r2, :128]!
vst2.32 {q2,q3}, [lr, :128]!
vtrn.32 q4, q6
vtrn.32 q5, q7
vst2.32 {q4,q5}, [r2, :128]!
vst2.32 {q6,q7}, [lr, :128]!
bne _neon_ee_loop
ldr r11, [r1, #12]
vld2.32 {q9}, [r5, :128]! @tag2
vld2.32 {q13}, [r3, :128]! @tag0
vld2.32 {q12}, [r4, :128]! @tag1
vld2.32 {q0}, [r7, :128]! @tag4
vsub.f32 q11, q13, q12
vld2.32 {q8}, [r6, :128]! @tag3
vadd.f32 q12, q13, q12
vsub.f32 q10, q9, q8
vadd.f32 q8, q9, q8
vadd.f32 q9, q12, q8
vsub.f32 d9, d23, d20 @
vadd.f32 d11, d23, d20 @
vsub.f32 q8, q12, q8
vadd.f32 d8, d22, d21 @
vsub.f32 d10, d22, d21 @
ldr r2, [r12], #4
vld1.32 {d20, d21}, [r11, :128]
ldr lr, [r12], #4
vtrn.32 q9, q4
add r2, r0, r2, lsl #2
vtrn.32 q8, q5
add lr, r0, lr, lsl #2
vswp d9,d10
vst1.32 {d8,d9,d10,d11}, [lr, :128]!
vld2.32 {q13}, [r10, :128]! @tag7
vld2.32 {q15}, [r9, :128]! @tag6
vld2.32 {q11}, [r8, :128]! @tag5
vsub.f32 q14, q15, q13
vsub.f32 q12, q0, q11
vadd.f32 q11, q0, q11
vadd.f32 q13, q15, q13
vsub.f32 d13, d29, d24 @
vadd.f32 q15, q13, q11
vadd.f32 d12, d28, d25 @
vadd.f32 d15, d29, d24 @
vsub.f32 d14, d28, d25 @
vtrn.32 q15, q6
vsub.f32 q15, q13, q11
vtrn.32 q15, q7
vswp d13, d14
vst1.32 {d12,d13,d14,d15}, [lr, :128]!
vtrn.32 q13, q14
vtrn.32 q11, q12
vmul.f32 d24, d26, d21
vmul.f32 d28, d27, d20
vmul.f32 d25, d26, d20
vmul.f32 d26, d27, d21
vmul.f32 d27, d22, d21
vmul.f32 d30, d23, d20
vmul.f32 d29, d23, d21
vmul.f32 d22, d22, d20
vsub.f32 d21, d28, d24
vadd.f32 d20, d26, d25
vadd.f32 d25, d30, d27
vsub.f32 d24, d22, d29
vadd.f32 q11, q12, q10
vsub.f32 q10, q12, q10
vadd.f32 q0, q9, q11
vsub.f32 q2, q9, q11
vsub.f32 d3, d17, d20 @
vadd.f32 d7, d17, d20 @
vadd.f32 d2, d16, d21 @
vsub.f32 d6, d16, d21 @
vswp d1, d2
vswp d5, d6
vstmia r2!, {q0-q3}
add r2, r7, #0
add r7, r9, #0
add r9, r2, #0
add r2, r8, #0
add r8, r10, #0
add r10, r2, #0
ldr r11, [r1, #32] @ this is p->i1
cmp r11, #0
beq _neon_oo_loop_exit
_neon_oo_loop:
vld2.32 {q8}, [r6, :128]!
vld2.32 {q9}, [r5, :128]!
vld2.32 {q10}, [r4, :128]!
vld2.32 {q13}, [r3, :128]!
vadd.f32 q11, q9, q8
vsub.f32 q8, q9, q8
vsub.f32 q9, q13, q10
vadd.f32 q12, q13, q10
subs r11, r11, #1
vld2.32 {q10}, [r7, :128]!
vld2.32 {q13}, [r9, :128]!
vsub.f32 q2, q12, q11
vadd.f32 d7, d19, d16 @
vsub.f32 d3, d19, d16 @
vsub.f32 d6, d18, d17 @
vadd.f32 d2, d18, d17 @
vld2.32 {q9}, [r8, :128]!
vld2.32 {q8}, [r10, :128]!
vadd.f32 q0, q12, q11
vadd.f32 q11, q13, q8
vadd.f32 q12, q10, q9
vsub.f32 q8, q13, q8
vsub.f32 q9, q10, q9
vsub.f32 q6, q12, q11
vadd.f32 q4, q12, q11
vtrn.32 q0, q2
ldr r2, [r12], #4
vadd.f32 d15, d19, d16 @
ldr lr, [r12], #4
vsub.f32 d11, d19, d16 @
vsub.f32 d14, d18, d17 @
vadd.f32 d10, d18, d17 @
add r2, r0, r2, lsl #2
vtrn.32 q1, q3
add lr, r0, lr, lsl #2
vst2.32 {q0,q1}, [r2, :128]!
vst2.32 {q2,q3}, [lr, :128]!
vtrn.32 q4, q6
vtrn.32 q5, q7
vst2.32 {q4,q5}, [r2, :128]!
vst2.32 {q6,q7}, [lr, :128]!
bne _neon_oo_loop
_neon_oo_loop_exit:
add r2, r3, #0
add r3, r7, #0
add r7, r2, #0
add r2, r4, #0
add r4, r8, #0
add r8, r2, #0
add r2, r5, #0
add r5, r9, #0
add r9, r2, #0
add r2, r6, #0
add r6, r10, #0
add r10, r2, #0
add r2, r9, #0
add r9, r10, #0
add r10, r2, #0
ldr r2, [r1, #16]
ldr r11, [r1, #32] @ this is p->i1
cmp r11, #0
beq _neon_ee_loop2_exit
vld1.32 {d16, d17}, [r2, :128]
_neon_ee_loop2:
vld2.32 {q15}, [r10, :128]!
vld2.32 {q13}, [r8, :128]!
vld2.32 {q14}, [r7, :128]!
vld2.32 {q9}, [r4, :128]!
vld2.32 {q10}, [r3, :128]!
vld2.32 {q11}, [r6, :128]!
vld2.32 {q12}, [r5, :128]!
vsub.f32 q1, q14, q13
vld2.32 {q0}, [r9, :128]!
subs r11, r11, #1
vsub.f32 q2, q0, q15
vadd.f32 q0, q0, q15
vmul.f32 d10, d2, d17
vmul.f32 d11, d3, d16
vmul.f32 d12, d3, d17
vmul.f32 d6, d4, d17
vmul.f32 d7, d5, d16
vmul.f32 d8, d4, d16
vmul.f32 d9, d5, d17
vmul.f32 d13, d2, d16
vsub.f32 d7, d7, d6
vadd.f32 d11, d11, d10
vsub.f32 q1, q12, q11
vsub.f32 q2, q10, q9
vadd.f32 d6, d9, d8
vadd.f32 q4, q14, q13
vadd.f32 q11, q12, q11
vadd.f32 q12, q10, q9
vsub.f32 d10, d13, d12
vsub.f32 q7, q4, q0
vsub.f32 q9, q12, q11
vsub.f32 q13, q5, q3
vsub.f32 d29, d5, d2 @
vadd.f32 q5, q5, q3
vadd.f32 q10, q4, q0
vadd.f32 q11, q12, q11
vadd.f32 d31, d5, d2 @
vadd.f32 d28, d4, d3 @
vsub.f32 d30, d4, d3 @
vsub.f32 d5, d19, d14 @
vsub.f32 d7, d31, d26 @
vadd.f32 q1, q14, q5
vadd.f32 q0, q11, q10
vadd.f32 d6, d30, d27 @
vadd.f32 d4, d18, d15 @
vadd.f32 d13, d19, d14 @
vsub.f32 d12, d18, d15 @
vadd.f32 d15, d31, d26 @
ldr r2, [r12], #4
vtrn.32 q1, q3
ldr lr, [r12], #4
vtrn.32 q0, q2
add r2, r0, r2, lsl #2
vsub.f32 q4, q11, q10
add lr, r0, lr, lsl #2
vsub.f32 q5, q14, q5
vsub.f32 d14, d30, d27 @
vst2.32 {q0,q1}, [r2, :128]!
vst2.32 {q2,q3}, [lr, :128]!
vtrn.32 q4, q6
vtrn.32 q5, q7
vst2.32 {q4,q5}, [r2, :128]!
vst2.32 {q6,q7}, [lr, :128]!
bne _neon_ee_loop2
_neon_ee_loop2_exit:
vldmia sp!, {d8-d15}
pop {r4, r5, r6, r7, r8, r9, r10, r11, pc}
.align 4
#ifdef __APPLE__
.globl _neon_static_o_f
_neon_static_o_f:
#else
.globl neon_static_o_f
neon_static_o_f:
#endif
push {r4, r5, r6, r7, r8, r9, r10, r11, lr}
vstmdb sp!, {d8-d15}
ldr lr, [r0, #40] @ this is p->N
add r3, r1, #0
add r7, r1, lr
add r5, r7, lr
add r10, r5, lr
add r4, r10, lr
add r8, r4, lr
add r6, r8, lr
add r9, r6, lr
ldr r12, [r0]
add r1, r0, #0
add r0, r2, #0
ldr r2, [r1, #16] @ this is p->ee_ws
ldr r11, [r1, #28] @ this is p->i0
vld1.32 {d16, d17}, [r2, :128]
_neon_ee_o_loop:
vld2.32 {q15}, [r10, :128]!
vld2.32 {q13}, [r8, :128]!
vld2.32 {q14}, [r7, :128]!
vld2.32 {q9}, [r4, :128]!
vld2.32 {q10}, [r3, :128]!
vld2.32 {q11}, [r6, :128]!
vld2.32 {q12}, [r5, :128]!
vsub.f32 q1, q14, q13
vld2.32 {q0}, [r9, :128]!
subs r11, r11, #1
vsub.f32 q2, q0, q15
vadd.f32 q0, q0, q15
vmul.f32 d10, d2, d17
vmul.f32 d11, d3, d16
vmul.f32 d12, d3, d17
vmul.f32 d6, d4, d17
vmul.f32 d7, d5, d16
vmul.f32 d8, d4, d16
vmul.f32 d9, d5, d17
vmul.f32 d13, d2, d16
vsub.f32 d7, d7, d6
vadd.f32 d11, d11, d10
vsub.f32 q1, q12, q11
vsub.f32 q2, q10, q9
vadd.f32 d6, d9, d8
vadd.f32 q4, q14, q13
vadd.f32 q11, q12, q11
vadd.f32 q12, q10, q9
vsub.f32 d10, d13, d12
vsub.f32 q7, q4, q0
vsub.f32 q9, q12, q11
vsub.f32 q13, q5, q3
vsub.f32 d29, d5, d2 @
vadd.f32 q5, q5, q3
vadd.f32 q10, q4, q0
vadd.f32 q11, q12, q11
vadd.f32 d31, d5, d2 @
vadd.f32 d28, d4, d3 @
vsub.f32 d30, d4, d3 @
vsub.f32 d5, d19, d14 @
vsub.f32 d7, d31, d26 @
vadd.f32 q1, q14, q5
vadd.f32 q0, q11, q10
vadd.f32 d6, d30, d27 @
vadd.f32 d4, d18, d15 @
vadd.f32 d13, d19, d14 @
vsub.f32 d12, d18, d15 @
vadd.f32 d15, d31, d26 @
ldr r2, [r12], #4
vtrn.32 q1, q3
ldr lr, [r12], #4
vtrn.32 q0, q2
add r2, r0, r2, lsl #2
vsub.f32 q4, q11, q10
add lr, r0, lr, lsl #2
vsub.f32 q5, q14, q5
vsub.f32 d14, d30, d27 @
vst2.32 {q0,q1}, [r2, :128]!
vst2.32 {q2,q3}, [lr, :128]!
vtrn.32 q4, q6
vtrn.32 q5, q7
vst2.32 {q4,q5}, [r2, :128]!
vst2.32 {q6,q7}, [lr, :128]!
bne _neon_ee_o_loop
add r2, r7, #0
add r7, r9, #0
add r9, r2, #0
add r2, r8, #0
add r8, r10, #0
add r10, r2, #0
ldr r11, [r1, #32] @ this is p->i1
cmp r11, #0
beq _neon_oo_o_loop_exit
_neon_oo_o_loop:
vld2.32 {q8}, [r6, :128]!
vld2.32 {q9}, [r5, :128]!
vld2.32 {q10}, [r4, :128]!
vld2.32 {q13}, [r3, :128]!
vadd.f32 q11, q9, q8
vsub.f32 q8, q9, q8
vsub.f32 q9, q13, q10
vadd.f32 q12, q13, q10
subs r11, r11, #1
vld2.32 {q10}, [r7, :128]!
vld2.32 {q13}, [r9, :128]!
vsub.f32 q2, q12, q11
vadd.f32 d7, d19, d16 @
vsub.f32 d3, d19, d16 @
vsub.f32 d6, d18, d17 @
vadd.f32 d2, d18, d17 @
vld2.32 {q9}, [r8, :128]!
vld2.32 {q8}, [r10, :128]!
vadd.f32 q0, q12, q11
vadd.f32 q11, q13, q8
vadd.f32 q12, q10, q9
vsub.f32 q8, q13, q8
vsub.f32 q9, q10, q9
vsub.f32 q6, q12, q11
vadd.f32 q4, q12, q11
vtrn.32 q0, q2
ldr r2, [r12], #4
vadd.f32 d15, d19, d16 @
ldr lr, [r12], #4
vsub.f32 d11, d19, d16 @
vsub.f32 d14, d18, d17 @
vadd.f32 d10, d18, d17 @
add r2, r0, r2, lsl #2
vtrn.32 q1, q3
add lr, r0, lr, lsl #2
vst2.32 {q0,q1}, [r2, :128]!
vst2.32 {q2,q3}, [lr, :128]!
vtrn.32 q4, q6
vtrn.32 q5, q7
vst2.32 {q4,q5}, [r2, :128]!
vst2.32 {q6,q7}, [lr, :128]!
bne _neon_oo_o_loop
_neon_oo_o_loop_exit:
ldr r11, [r1, #8]
vld1.32 {q8}, [r5, :128]!
vld1.32 {q10}, [r6, :128]!
vld2.32 {q11}, [r4, :128]!
vld2.32 {q13}, [r3, :128]!
vld2.32 {q15}, [r10, :128]!
vorr d25, d17, d17
vorr d24, d20, d20
vorr d20, d16, d16
vsub.f32 q9, q13, q11
vadd.f32 q11, q13, q11
ldr r2, [r12], #4
vtrn.32 d24, d25
ldr lr, [r12], #4
vtrn.32 d20, d21
add r2, r0, r2, lsl #2
vsub.f32 q8, q10, q12
add lr, r0, lr, lsl #2
vadd.f32 q10, q10, q12
vadd.f32 q0, q11, q10
vsub.f32 d25, d19, d16 @
vadd.f32 d27, d19, d16 @
vsub.f32 q1, q11, q10
vadd.f32 d24, d18, d17 @
vsub.f32 d26, d18, d17 @
vtrn.32 q0, q12
vtrn.32 q1, q13
vld1.32 {d24, d25}, [r11, :128]
vswp d1, d2
vst1.32 {q0, q1}, [r2, :128]!
vld2.32 {q0}, [r9, :128]!
vadd.f32 q1, q0, q15
vld2.32 {q13}, [r8, :128]!
vld2.32 {q14}, [r7, :128]!
vsub.f32 q15, q0, q15
vsub.f32 q0, q14, q13
vadd.f32 q3, q14, q13
vadd.f32 q2, q3, q1
vsub.f32 d29, d1, d30 @
vadd.f32 d27, d1, d30 @
vsub.f32 q3, q3, q1
vadd.f32 d28, d0, d31 @
vsub.f32 d26, d0, d31 @
vtrn.32 q2, q14
vtrn.32 q3, q13
vswp d5, d6
vst1.32 {q2, q3}, [r2, :128]!
vtrn.32 q11, q9
vtrn.32 q10, q8
vmul.f32 d20, d18, d25
vmul.f32 d22, d19, d24
vmul.f32 d21, d19, d25
vmul.f32 d18, d18, d24
vmul.f32 d19, d16, d25
vmul.f32 d30, d17, d24
vmul.f32 d23, d16, d24
vmul.f32 d24, d17, d25
vadd.f32 d17, d22, d20
vsub.f32 d16, d18, d21
vsub.f32 d21, d30, d19
vadd.f32 d20, d24, d23
vadd.f32 q9, q8, q10
vsub.f32 q8, q8, q10
vadd.f32 q4, q14, q9
vsub.f32 q6, q14, q9
vsub.f32 d11, d27, d16 @
vadd.f32 d15, d27, d16 @
vadd.f32 d10, d26, d17 @
vsub.f32 d14, d26, d17 @
vswp d9, d10
vswp d13, d14
vstmia lr!, {q4-q7}
add r2, r3, #0
add r3, r7, #0
add r7, r2, #0
add r2, r4, #0
add r4, r8, #0
add r8, r2, #0
add r2, r5, #0
add r5, r9, #0
add r9, r2, #0
add r2, r6, #0
add r6, r10, #0
add r10, r2, #0
add r2, r9, #0
add r9, r10, #0
add r10, r2, #0
ldr r2, [r1, #16]
ldr r11, [r1, #32] @ this is p->i1
cmp r11, #0
beq _neon_ee_o_loop2_exit
vld1.32 {d16, d17}, [r2, :128]
_neon_ee_o_loop2:
vld2.32 {q15}, [r10, :128]!
vld2.32 {q13}, [r8, :128]!
vld2.32 {q14}, [r7, :128]!
vld2.32 {q9}, [r4, :128]!
vld2.32 {q10}, [r3, :128]!
vld2.32 {q11}, [r6, :128]!
vld2.32 {q12}, [r5, :128]!
vsub.f32 q1, q14, q13
vld2.32 {q0}, [r9, :128]!
subs r11, r11, #1
vsub.f32 q2, q0, q15
vadd.f32 q0, q0, q15
vmul.f32 d10, d2, d17
vmul.f32 d11, d3, d16
vmul.f32 d12, d3, d17
vmul.f32 d6, d4, d17
vmul.f32 d7, d5, d16
vmul.f32 d8, d4, d16
vmul.f32 d9, d5, d17
vmul.f32 d13, d2, d16
vsub.f32 d7, d7, d6
vadd.f32 d11, d11, d10
vsub.f32 q1, q12, q11
vsub.f32 q2, q10, q9
vadd.f32 d6, d9, d8
vadd.f32 q4, q14, q13
vadd.f32 q11, q12, q11
vadd.f32 q12, q10, q9
vsub.f32 d10, d13, d12
vsub.f32 q7, q4, q0
vsub.f32 q9, q12, q11
vsub.f32 q13, q5, q3
vsub.f32 d29, d5, d2 @
vadd.f32 q5, q5, q3
vadd.f32 q10, q4, q0
vadd.f32 q11, q12, q11
vadd.f32 d31, d5, d2 @
vadd.f32 d28, d4, d3 @
vsub.f32 d30, d4, d3 @
vsub.f32 d5, d19, d14 @
vsub.f32 d7, d31, d26 @
vadd.f32 q1, q14, q5
vadd.f32 q0, q11, q10
vadd.f32 d6, d30, d27 @
vadd.f32 d4, d18, d15 @
vadd.f32 d13, d19, d14 @
vsub.f32 d12, d18, d15 @
vadd.f32 d15, d31, d26 @
ldr r2, [r12], #4
vtrn.32 q1, q3
ldr lr, [r12], #4
vtrn.32 q0, q2
add r2, r0, r2, lsl #2
vsub.f32 q4, q11, q10
add lr, r0, lr, lsl #2
vsub.f32 q5, q14, q5
vsub.f32 d14, d30, d27 @
vst2.32 {q0,q1}, [r2, :128]!
vst2.32 {q2,q3}, [lr, :128]!
vtrn.32 q4, q6
vtrn.32 q5, q7
vst2.32 {q4,q5}, [r2, :128]!
vst2.32 {q6,q7}, [lr, :128]!
bne _neon_ee_o_loop2
_neon_ee_o_loop2_exit:
vldmia sp!, {d8-d15}
pop {r4, r5, r6, r7, r8, r9, r10, r11, pc}
.align 4
#ifdef __APPLE__
.globl _neon_static_x4_f
_neon_static_x4_f:
#else
.globl neon_static_x4_f
neon_static_x4_f:
#endif
@ add r3, r0, #0
push {r4, r5, r6, lr}
vstmdb sp!, {d8-d15}
vld1.32 {q8,q9}, [r0, :128]
add r4, r0, r1, lsl #1
vld1.32 {q10,q11}, [r4, :128]
add r5, r0, r1, lsl #2
vld1.32 {q12,q13}, [r5, :128]
add r6, r4, r1, lsl #2
vld1.32 {q14,q15}, [r6, :128]
vld1.32 {q2,q3}, [r2, :128]
vmul.f32 q0, q13, q3
vmul.f32 q5, q12, q2
vmul.f32 q1, q14, q2
vmul.f32 q4, q14, q3
vmul.f32 q14, q12, q3
vmul.f32 q13, q13, q2
vmul.f32 q12, q15, q3
vmul.f32 q2, q15, q2
vsub.f32 q0, q5, q0
vadd.f32 q13, q13, q14
vadd.f32 q12, q12, q1
vsub.f32 q1, q2, q4
vadd.f32 q15, q0, q12
vsub.f32 q12, q0, q12
vadd.f32 q14, q13, q1
vsub.f32 q13, q13, q1
vadd.f32 q0, q8, q15
vadd.f32 q1, q9, q14
vadd.f32 q2, q10, q13 @
vsub.f32 q4, q8, q15
vsub.f32 q3, q11, q12 @
vst1.32 {q0,q1}, [r0, :128]
vsub.f32 q5, q9, q14
vsub.f32 q6, q10, q13 @
vadd.f32 q7, q11, q12 @
vst1.32 {q2,q3}, [r4, :128]
vst1.32 {q4,q5}, [r5, :128]
vst1.32 {q6,q7}, [r6, :128]
vldmia sp!, {d8-d15}
pop {r4, r5, r6, pc}
.align 4
#ifdef __APPLE__
.globl _neon_static_x8_f
_neon_static_x8_f:
#else
.globl neon_static_x8_f
neon_static_x8_f:
#endif
push {r4, r5, r6, r7, r8, r9, r10, r11, lr}
vstmdb sp!, {d8-d15}
mov r11, #0
add r3, r0, #0 @ data0
add r5, r0, r1, lsl #1 @ data2
add r4, r0, r1 @ data1
add r7, r5, r1, lsl #1 @ data4
add r6, r5, r1 @ data3
add r9, r7, r1, lsl #1 @ data6
add r8, r7, r1 @ data5
add r10, r9, r1 @ data7
add r12, r2, #0 @ LUT
sub r11, r11, r1, lsr #5
neon_x8_loop:
vld1.32 {q2,q3}, [r12, :128]!
vld1.32 {q14,q15}, [r6, :128]
vld1.32 {q10,q11}, [r5, :128]
adds r11, r11, #1
vmul.f32 q12, q15, q2
vmul.f32 q8, q14, q3
vmul.f32 q13, q14, q2
vmul.f32 q9, q10, q3
vmul.f32 q1, q10, q2
vmul.f32 q0, q11, q2
vmul.f32 q14, q11, q3
vmul.f32 q15, q15, q3
vld1.32 {q2,q3}, [r12, :128]!
vsub.f32 q10, q12, q8
vadd.f32 q11, q0, q9
vadd.f32 q8, q15, q13
vld1.32 {q12,q13}, [r4, :128]
vsub.f32 q9, q1, q14
vsub.f32 q15, q11, q10
vsub.f32 q14, q9, q8
vadd.f32 q4, q12, q15 @
vsub.f32 q6, q12, q15 @
vsub.f32 q5, q13, q14 @
vadd.f32 q7, q13, q14 @
vld1.32 {q14,q15}, [r9, :128]
vld1.32 {q12,q13}, [r7, :128]
vmul.f32 q1, q14, q2
vmul.f32 q0, q14, q3
vst1.32 {q4,q5}, [r4, :128]
vmul.f32 q14, q15, q3
vmul.f32 q4, q15, q2
vadd.f32 q15, q9, q8
vst1.32 {q6,q7}, [r6, :128]
vmul.f32 q8, q12, q3
vmul.f32 q5, q13, q3
vmul.f32 q12, q12, q2
vmul.f32 q9, q13, q2
vadd.f32 q14, q14, q1
vsub.f32 q13, q4, q0
vadd.f32 q0, q9, q8
vld1.32 {q8,q9}, [r3, :128]
vadd.f32 q1, q11, q10
vsub.f32 q12, q12, q5
vadd.f32 q11, q8, q15
vsub.f32 q8, q8, q15
vadd.f32 q2, q12, q14
vsub.f32 q10, q0, q13
vadd.f32 q15, q0, q13
vadd.f32 q13, q9, q1
vsub.f32 q9, q9, q1
vsub.f32 q12, q12, q14
vadd.f32 q0, q11, q2
vadd.f32 q1, q13, q15
vsub.f32 q4, q11, q2
vadd.f32 q2, q8, q10 @
vsub.f32 q3, q9, q12 @
vst1.32 {q0,q1}, [r3, :128]!
vsub.f32 q5, q13, q15
vld1.32 {q14,q15}, [r10, :128]
vadd.f32 q7, q9, q12 @
vld1.32 {q12,q13}, [r8, :128]
vst1.32 {q2,q3}, [r5, :128]!
vld1.32 {q2,q3}, [r12, :128]!
vsub.f32 q6, q8, q10 @
vmul.f32 q8, q14, q2
vst1.32 {q4,q5}, [r7, :128]!
vmul.f32 q10, q15, q3
vmul.f32 q9, q13, q3
vmul.f32 q11, q12, q2
vmul.f32 q14, q14, q3
vst1.32 {q6,q7}, [r9, :128]!
vmul.f32 q15, q15, q2
vmul.f32 q12, q12, q3
vmul.f32 q13, q13, q2
vadd.f32 q10, q10, q8
vsub.f32 q11, q11, q9
vld1.32 {q8,q9}, [r4, :128]
vsub.f32 q14, q15, q14
vadd.f32 q15, q13, q12
vadd.f32 q13, q11, q10
vadd.f32 q12, q15, q14
vsub.f32 q15, q15, q14
vsub.f32 q14, q11, q10
vld1.32 {q10,q11}, [r6, :128]
vadd.f32 q0, q8, q13
vadd.f32 q1, q9, q12
vadd.f32 q2, q10, q15 @
vsub.f32 q3, q11, q14 @
vsub.f32 q4, q8, q13
vst1.32 {q0,q1}, [r4, :128]!
vsub.f32 q5, q9, q12
vsub.f32 q6, q10, q15 @
vst1.32 {q2,q3}, [r6, :128]!
vadd.f32 q7, q11, q14 @
vst1.32 {q4,q5}, [r8, :128]!
vst1.32 {q6,q7}, [r10, :128]!
bne neon_x8_loop
vldmia sp!, {d8-d15}
pop {r4, r5, r6, r7, r8, r9, r10, r11, pc}
.align 4
#ifdef __APPLE__
.globl _neon_static_x8_t_f
_neon_static_x8_t_f:
#else
.globl neon_static_x8_t_f
neon_static_x8_t_f:
#endif
push {r4, r5, r6, r7, r8, r9, r10, r11, lr}
vstmdb sp!, {d8-d15}
mov r11, #0
add r3, r0, #0 @ data0
add r5, r0, r1, lsl #1 @ data2
add r4, r0, r1 @ data1
add r7, r5, r1, lsl #1 @ data4
add r6, r5, r1 @ data3
add r9, r7, r1, lsl #1 @ data6
add r8, r7, r1 @ data5
add r10, r9, r1 @ data7
add r12, r2, #0 @ LUT
sub r11, r11, r1, lsr #5
neon_x8_t_loop:
vld1.32 {q2,q3}, [r12, :128]!
vld1.32 {q14,q15}, [r6, :128]
vld1.32 {q10,q11}, [r5, :128]
adds r11, r11, #1
vmul.f32 q12, q15, q2
vmul.f32 q8, q14, q3
vmul.f32 q13, q14, q2
vmul.f32 q9, q10, q3
vmul.f32 q1, q10, q2
vmul.f32 q0, q11, q2
vmul.f32 q14, q11, q3
vmul.f32 q15, q15, q3
vld1.32 {q2,q3}, [r12, :128]!
vsub.f32 q10, q12, q8
vadd.f32 q11, q0, q9
vadd.f32 q8, q15, q13
vld1.32 {q12,q13}, [r4, :128]
vsub.f32 q9, q1, q14
vsub.f32 q15, q11, q10
vsub.f32 q14, q9, q8
vadd.f32 q4, q12, q15 @
vsub.f32 q6, q12, q15 @
vsub.f32 q5, q13, q14 @
vadd.f32 q7, q13, q14 @
vld1.32 {q14,q15}, [r9, :128]
vld1.32 {q12,q13}, [r7, :128]
vmul.f32 q1, q14, q2
vmul.f32 q0, q14, q3
vst1.32 {q4,q5}, [r4, :128]
vmul.f32 q14, q15, q3
vmul.f32 q4, q15, q2
vadd.f32 q15, q9, q8
vst1.32 {q6,q7}, [r6, :128]
vmul.f32 q8, q12, q3
vmul.f32 q5, q13, q3
vmul.f32 q12, q12, q2
vmul.f32 q9, q13, q2
vadd.f32 q14, q14, q1
vsub.f32 q13, q4, q0
vadd.f32 q0, q9, q8
vld1.32 {q8,q9}, [r3, :128]
vadd.f32 q1, q11, q10
vsub.f32 q12, q12, q5
vadd.f32 q11, q8, q15
vsub.f32 q8, q8, q15
vadd.f32 q2, q12, q14
vsub.f32 q10, q0, q13
vadd.f32 q15, q0, q13
vadd.f32 q13, q9, q1
vsub.f32 q9, q9, q1
vsub.f32 q12, q12, q14
vadd.f32 q0, q11, q2
vadd.f32 q1, q13, q15
vsub.f32 q4, q11, q2
vadd.f32 q2, q8, q10 @
vsub.f32 q3, q9, q12 @
vst2.32 {q0,q1}, [r3, :128]!
vsub.f32 q5, q13, q15
vld1.32 {q14,q15}, [r10, :128]
vadd.f32 q7, q9, q12 @
vld1.32 {q12,q13}, [r8, :128]
vst2.32 {q2,q3}, [r5, :128]!
vld1.32 {q2,q3}, [r12, :128]!
vsub.f32 q6, q8, q10 @
vmul.f32 q8, q14, q2
vst2.32 {q4,q5}, [r7, :128]!
vmul.f32 q10, q15, q3
vmul.f32 q9, q13, q3
vmul.f32 q11, q12, q2
vmul.f32 q14, q14, q3
vst2.32 {q6,q7}, [r9, :128]!
vmul.f32 q15, q15, q2
vmul.f32 q12, q12, q3
vmul.f32 q13, q13, q2
vadd.f32 q10, q10, q8
vsub.f32 q11, q11, q9
vld1.32 {q8,q9}, [r4, :128]
vsub.f32 q14, q15, q14
vadd.f32 q15, q13, q12
vadd.f32 q13, q11, q10
vadd.f32 q12, q15, q14
vsub.f32 q15, q15, q14
vsub.f32 q14, q11, q10
vld1.32 {q10,q11}, [r6, :128]
vadd.f32 q0, q8, q13
vadd.f32 q1, q9, q12
vadd.f32 q2, q10, q15 @
vsub.f32 q3, q11, q14 @
vsub.f32 q4, q8, q13
vst2.32 {q0,q1}, [r4, :128]!
vsub.f32 q5, q9, q12
vsub.f32 q6, q10, q15 @
vst2.32 {q2,q3}, [r6, :128]!
vadd.f32 q7, q11, q14 @
vst2.32 {q4,q5}, [r8, :128]!
vst2.32 {q6,q7}, [r10, :128]!
bne neon_x8_t_loop
vldmia sp!, {d8-d15}
pop {r4, r5, r6, r7, r8, r9, r10, r11, pc}

@ -1,955 +0,0 @@
/*
This file is part of FFTS -- The Fastest Fourier Transform in the South
Copyright (c) 2012, Anthony M. Blake <amb@anthonix.com>
Copyright (c) 2012, The University of Waikato
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of the organization nor the
names of its contributors may be used to endorse or promote products
derived from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
DISCLAIMED. IN NO EVENT SHALL ANTHONY M. BLAKE BE LIABLE FOR ANY
DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
.align 4
#ifdef __APPLE__
.globl _neon_static_e_i
_neon_static_e_i:
#else
.globl neon_static_e_i
neon_static_e_i:
#endif
push {r4, r5, r6, r7, r8, r9, r10, r11, lr}
vstmdb sp!, {d8-d15}
ldr lr, [r0, #40] @ this is p->N
add r3, r1, #0
add r7, r1, lr
add r5, r7, lr
add r10, r5, lr
add r4, r10, lr
add r8, r4, lr
add r6, r8, lr
add r9, r6, lr
ldr r12, [r0]
add r1, r0, #0
add r0, r2, #0
ldr r2, [r1, #16] @ this is p->ee_ws
ldr r11, [r1, #28] @ this is p->i0
vld1.32 {d16, d17}, [r2, :128]
_neon_ee_loop:
vld2.32 {q15}, [r10, :128]!
vld2.32 {q13}, [r8, :128]!
vld2.32 {q14}, [r7, :128]!
vld2.32 {q9}, [r4, :128]!
vld2.32 {q10}, [r3, :128]!
vld2.32 {q11}, [r6, :128]!
vld2.32 {q12}, [r5, :128]!
vsub.f32 q1, q14, q13
vld2.32 {q0}, [r9, :128]!
subs r11, r11, #1
vsub.f32 q2, q0, q15
vadd.f32 q0, q0, q15
vmul.f32 d10, d2, d17
vmul.f32 d11, d3, d16
vmul.f32 d12, d3, d17
vmul.f32 d6, d4, d17
vmul.f32 d7, d5, d16
vmul.f32 d8, d4, d16
vmul.f32 d9, d5, d17
vmul.f32 d13, d2, d16
vsub.f32 d7, d7, d6
vadd.f32 d11, d11, d10
vsub.f32 q1, q12, q11
vsub.f32 q2, q10, q9
vadd.f32 d6, d9, d8
vadd.f32 q4, q14, q13
vadd.f32 q11, q12, q11
vadd.f32 q12, q10, q9
vsub.f32 d10, d13, d12
vsub.f32 q7, q4, q0
vsub.f32 q9, q12, q11
vsub.f32 q13, q5, q3
vadd.f32 d29, d5, d2 @
vadd.f32 q5, q5, q3
vadd.f32 q10, q4, q0
vadd.f32 q11, q12, q11
vsub.f32 d31, d5, d2 @
vsub.f32 d28, d4, d3 @
vadd.f32 d30, d4, d3 @
vadd.f32 d5, d19, d14 @
vadd.f32 d7, d31, d26 @
vadd.f32 q1, q14, q5
vadd.f32 q0, q11, q10
vsub.f32 d6, d30, d27 @
vsub.f32 d4, d18, d15 @
vsub.f32 d13, d19, d14 @
vadd.f32 d12, d18, d15 @
vsub.f32 d15, d31, d26 @
ldr r2, [r12], #4
vtrn.32 q1, q3
ldr lr, [r12], #4
vtrn.32 q0, q2
add r2, r0, r2, lsl #2
vsub.f32 q4, q11, q10
add lr, r0, lr, lsl #2
vsub.f32 q5, q14, q5
vadd.f32 d14, d30, d27 @
vst2.32 {q0,q1}, [r2, :128]!
vst2.32 {q2,q3}, [lr, :128]!
vtrn.32 q4, q6
vtrn.32 q5, q7
vst2.32 {q4,q5}, [r2, :128]!
vst2.32 {q6,q7}, [lr, :128]!
bne _neon_ee_loop
ldr r11, [r1, #12]
vld2.32 {q9}, [r5, :128]! @tag2
vld2.32 {q13}, [r3, :128]! @tag0
vld2.32 {q12}, [r4, :128]! @tag1
vld2.32 {q0}, [r7, :128]! @tag4
vsub.f32 q11, q13, q12
vld2.32 {q8}, [r6, :128]! @tag3
vadd.f32 q12, q13, q12
vsub.f32 q10, q9, q8
vadd.f32 q8, q9, q8
vadd.f32 q9, q12, q8
vadd.f32 d9, d23, d20 @
vsub.f32 d11, d23, d20 @
vsub.f32 q8, q12, q8
vsub.f32 d8, d22, d21 @
vadd.f32 d10, d22, d21 @
ldr r2, [r12], #4
vld1.32 {d20, d21}, [r11, :128]
ldr lr, [r12], #4
vtrn.32 q9, q4
add r2, r0, r2, lsl #2
vtrn.32 q8, q5
add lr, r0, lr, lsl #2
vswp d9,d10
vst1.32 {d8,d9,d10,d11}, [lr, :128]!
vld2.32 {q13}, [r10, :128]! @tag7
vld2.32 {q15}, [r9, :128]! @tag6
vld2.32 {q11}, [r8, :128]! @tag5
vsub.f32 q14, q15, q13
vsub.f32 q12, q0, q11
vadd.f32 q11, q0, q11
vadd.f32 q13, q15, q13
vadd.f32 d13, d29, d24 @
vadd.f32 q15, q13, q11
vsub.f32 d12, d28, d25 @
vsub.f32 d15, d29, d24 @
vadd.f32 d14, d28, d25 @
vtrn.32 q15, q6
vsub.f32 q15, q13, q11
vtrn.32 q15, q7
vswp d13, d14
vst1.32 {d12,d13,d14,d15}, [lr, :128]!
vtrn.32 q13, q14
vtrn.32 q11, q12
vmul.f32 d24, d26, d21
vmul.f32 d28, d27, d20
vmul.f32 d25, d26, d20
vmul.f32 d26, d27, d21
vmul.f32 d27, d22, d21
vmul.f32 d30, d23, d20
vmul.f32 d29, d23, d21
vmul.f32 d22, d22, d20
vsub.f32 d21, d28, d24
vadd.f32 d20, d26, d25
vadd.f32 d25, d30, d27
vsub.f32 d24, d22, d29
vadd.f32 q11, q12, q10
vsub.f32 q10, q12, q10
vadd.f32 q0, q9, q11
vsub.f32 q2, q9, q11
vadd.f32 d3, d17, d20 @
vsub.f32 d7, d17, d20 @
vsub.f32 d2, d16, d21 @
vadd.f32 d6, d16, d21 @
vswp d1, d2
vswp d5, d6
vstmia r2!, {q0-q3}
add r2, r7, #0
add r7, r9, #0
add r9, r2, #0
add r2, r8, #0
add r8, r10, #0
add r10, r2, #0
ldr r11, [r1, #32] @ this is p->i1
cmp r11, #0
beq _neon_oo_loop_exit
_neon_oo_loop:
vld2.32 {q8}, [r6, :128]!
vld2.32 {q9}, [r5, :128]!
vld2.32 {q10}, [r4, :128]!
vld2.32 {q13}, [r3, :128]!
vadd.f32 q11, q9, q8
vsub.f32 q8, q9, q8
vsub.f32 q9, q13, q10
vadd.f32 q12, q13, q10
subs r11, r11, #1
vld2.32 {q10}, [r7, :128]!
vld2.32 {q13}, [r9, :128]!
vsub.f32 q2, q12, q11
vsub.f32 d7, d19, d16 @
vadd.f32 d3, d19, d16 @
vadd.f32 d6, d18, d17 @
vsub.f32 d2, d18, d17 @
vld2.32 {q9}, [r8, :128]!
vld2.32 {q8}, [r10, :128]!
vadd.f32 q0, q12, q11
vadd.f32 q11, q13, q8
vadd.f32 q12, q10, q9
vsub.f32 q8, q13, q8
vsub.f32 q9, q10, q9
vsub.f32 q6, q12, q11
vadd.f32 q4, q12, q11
vtrn.32 q0, q2
ldr r2, [r12], #4
vsub.f32 d15, d19, d16 @
ldr lr, [r12], #4
vadd.f32 d11, d19, d16 @
vadd.f32 d14, d18, d17 @
vsub.f32 d10, d18, d17 @
add r2, r0, r2, lsl #2
vtrn.32 q1, q3
add lr, r0, lr, lsl #2
vst2.32 {q0,q1}, [r2, :128]!
vst2.32 {q2,q3}, [lr, :128]!
vtrn.32 q4, q6
vtrn.32 q5, q7
vst2.32 {q4,q5}, [r2, :128]!
vst2.32 {q6,q7}, [lr, :128]!
bne _neon_oo_loop
_neon_oo_loop_exit:
add r2, r3, #0
add r3, r7, #0
add r7, r2, #0
add r2, r4, #0
add r4, r8, #0
add r8, r2, #0
add r2, r5, #0
add r5, r9, #0
add r9, r2, #0
add r2, r6, #0
add r6, r10, #0
add r10, r2, #0
add r2, r9, #0
add r9, r10, #0
add r10, r2, #0
ldr r2, [r1, #16]
ldr r11, [r1, #32] @ this is p->i1
cmp r11, #0
beq _neon_ee_loop2_exit
vld1.32 {d16, d17}, [r2, :128]
_neon_ee_loop2:
vld2.32 {q15}, [r10, :128]!
vld2.32 {q13}, [r8, :128]!
vld2.32 {q14}, [r7, :128]!
vld2.32 {q9}, [r4, :128]!
vld2.32 {q10}, [r3, :128]!
vld2.32 {q11}, [r6, :128]!
vld2.32 {q12}, [r5, :128]!
vsub.f32 q1, q14, q13
vld2.32 {q0}, [r9, :128]!
subs r11, r11, #1
vsub.f32 q2, q0, q15
vadd.f32 q0, q0, q15
vmul.f32 d10, d2, d17
vmul.f32 d11, d3, d16
vmul.f32 d12, d3, d17
vmul.f32 d6, d4, d17
vmul.f32 d7, d5, d16
vmul.f32 d8, d4, d16
vmul.f32 d9, d5, d17
vmul.f32 d13, d2, d16
vsub.f32 d7, d7, d6
vadd.f32 d11, d11, d10
vsub.f32 q1, q12, q11
vsub.f32 q2, q10, q9
vadd.f32 d6, d9, d8
vadd.f32 q4, q14, q13
vadd.f32 q11, q12, q11
vadd.f32 q12, q10, q9
vsub.f32 d10, d13, d12
vsub.f32 q7, q4, q0
vsub.f32 q9, q12, q11
vsub.f32 q13, q5, q3
vadd.f32 d29, d5, d2 @
vadd.f32 q5, q5, q3
vadd.f32 q10, q4, q0
vadd.f32 q11, q12, q11
vsub.f32 d31, d5, d2 @
vsub.f32 d28, d4, d3 @
vadd.f32 d30, d4, d3 @
vadd.f32 d5, d19, d14 @
vadd.f32 d7, d31, d26 @
vadd.f32 q1, q14, q5
vadd.f32 q0, q11, q10
vsub.f32 d6, d30, d27 @
vsub.f32 d4, d18, d15 @
vsub.f32 d13, d19, d14 @
vadd.f32 d12, d18, d15 @
vsub.f32 d15, d31, d26 @
ldr r2, [r12], #4
vtrn.32 q1, q3
ldr lr, [r12], #4
vtrn.32 q0, q2
add r2, r0, r2, lsl #2
vsub.f32 q4, q11, q10
add lr, r0, lr, lsl #2
vsub.f32 q5, q14, q5
vadd.f32 d14, d30, d27 @
vst2.32 {q0,q1}, [r2, :128]!
vst2.32 {q2,q3}, [lr, :128]!
vtrn.32 q4, q6
vtrn.32 q5, q7
vst2.32 {q4,q5}, [r2, :128]!
vst2.32 {q6,q7}, [lr, :128]!
bne _neon_ee_loop2
_neon_ee_loop2_exit:
vldmia sp!, {d8-d15}
pop {r4, r5, r6, r7, r8, r9, r10, r11, pc}
.align 4
#ifdef __APPLE__
.globl _neon_static_o_i
_neon_static_o_i:
#else
.globl neon_static_o_i
neon_static_o_i:
#endif
push {r4, r5, r6, r7, r8, r9, r10, r11, lr}
vstmdb sp!, {d8-d15}
ldr lr, [r0, #40] @ this is p->N
add r3, r1, #0
add r7, r1, lr
add r5, r7, lr
add r10, r5, lr
add r4, r10, lr
add r8, r4, lr
add r6, r8, lr
add r9, r6, lr
ldr r12, [r0]
add r1, r0, #0
add r0, r2, #0
ldr r2, [r1, #16] @ this is p->ee_ws
ldr r11, [r1, #28] @ this is p->i0
vld1.32 {d16, d17}, [r2, :128]
_neon_ee_o_loop:
vld2.32 {q15}, [r10, :128]!
vld2.32 {q13}, [r8, :128]!
vld2.32 {q14}, [r7, :128]!
vld2.32 {q9}, [r4, :128]!
vld2.32 {q10}, [r3, :128]!
vld2.32 {q11}, [r6, :128]!
vld2.32 {q12}, [r5, :128]!
vsub.f32 q1, q14, q13
vld2.32 {q0}, [r9, :128]!
subs r11, r11, #1
vsub.f32 q2, q0, q15
vadd.f32 q0, q0, q15
vmul.f32 d10, d2, d17
vmul.f32 d11, d3, d16
vmul.f32 d12, d3, d17
vmul.f32 d6, d4, d17
vmul.f32 d7, d5, d16
vmul.f32 d8, d4, d16
vmul.f32 d9, d5, d17
vmul.f32 d13, d2, d16
vsub.f32 d7, d7, d6
vadd.f32 d11, d11, d10
vsub.f32 q1, q12, q11
vsub.f32 q2, q10, q9
vadd.f32 d6, d9, d8
vadd.f32 q4, q14, q13
vadd.f32 q11, q12, q11
vadd.f32 q12, q10, q9
vsub.f32 d10, d13, d12
vsub.f32 q7, q4, q0
vsub.f32 q9, q12, q11
vsub.f32 q13, q5, q3
vadd.f32 d29, d5, d2 @
vadd.f32 q5, q5, q3
vadd.f32 q10, q4, q0
vadd.f32 q11, q12, q11
vsub.f32 d31, d5, d2 @
vsub.f32 d28, d4, d3 @
vadd.f32 d30, d4, d3 @
vadd.f32 d5, d19, d14 @
vadd.f32 d7, d31, d26 @
vadd.f32 q1, q14, q5
vadd.f32 q0, q11, q10
vsub.f32 d6, d30, d27 @
vsub.f32 d4, d18, d15 @
vsub.f32 d13, d19, d14 @
vadd.f32 d12, d18, d15 @
vsub.f32 d15, d31, d26 @
ldr r2, [r12], #4
vtrn.32 q1, q3
ldr lr, [r12], #4
vtrn.32 q0, q2
add r2, r0, r2, lsl #2
vsub.f32 q4, q11, q10
add lr, r0, lr, lsl #2
vsub.f32 q5, q14, q5
vadd.f32 d14, d30, d27 @
vst2.32 {q0,q1}, [r2, :128]!
vst2.32 {q2,q3}, [lr, :128]!
vtrn.32 q4, q6
vtrn.32 q5, q7
vst2.32 {q4,q5}, [r2, :128]!
vst2.32 {q6,q7}, [lr, :128]!
bne _neon_ee_o_loop
add r2, r7, #0
add r7, r9, #0
add r9, r2, #0
add r2, r8, #0
add r8, r10, #0
add r10, r2, #0
ldr r11, [r1, #32] @ this is p->i1
cmp r11, #0
beq _neon_oo_o_loop_exit
_neon_oo_o_loop:
vld2.32 {q8}, [r6, :128]!
vld2.32 {q9}, [r5, :128]!
vld2.32 {q10}, [r4, :128]!
vld2.32 {q13}, [r3, :128]!
vadd.f32 q11, q9, q8
vsub.f32 q8, q9, q8
vsub.f32 q9, q13, q10
vadd.f32 q12, q13, q10
subs r11, r11, #1
vld2.32 {q10}, [r7, :128]!
vld2.32 {q13}, [r9, :128]!
vsub.f32 q2, q12, q11
vsub.f32 d7, d19, d16 @
vadd.f32 d3, d19, d16 @
vadd.f32 d6, d18, d17 @
vsub.f32 d2, d18, d17 @
vld2.32 {q9}, [r8, :128]!
vld2.32 {q8}, [r10, :128]!
vadd.f32 q0, q12, q11
vadd.f32 q11, q13, q8
vadd.f32 q12, q10, q9
vsub.f32 q8, q13, q8
vsub.f32 q9, q10, q9
vsub.f32 q6, q12, q11
vadd.f32 q4, q12, q11
vtrn.32 q0, q2
ldr r2, [r12], #4
vsub.f32 d15, d19, d16 @
ldr lr, [r12], #4
vadd.f32 d11, d19, d16 @
vadd.f32 d14, d18, d17 @
vsub.f32 d10, d18, d17 @
add r2, r0, r2, lsl #2
vtrn.32 q1, q3
add lr, r0, lr, lsl #2
vst2.32 {q0,q1}, [r2, :128]!
vst2.32 {q2,q3}, [lr, :128]!
vtrn.32 q4, q6
vtrn.32 q5, q7
vst2.32 {q4,q5}, [r2, :128]!
vst2.32 {q6,q7}, [lr, :128]!
bne _neon_oo_o_loop
_neon_oo_o_loop_exit:
ldr r11, [r1, #8]
vld1.32 {q8}, [r5, :128]!
vld1.32 {q10}, [r6, :128]!
vld2.32 {q11}, [r4, :128]!
vld2.32 {q13}, [r3, :128]!
vld2.32 {q15}, [r10, :128]!
vorr d25, d17, d17
vorr d24, d20, d20
vorr d20, d16, d16
vsub.f32 q9, q13, q11
vadd.f32 q11, q13, q11
ldr r2, [r12], #4
vtrn.32 d24, d25
ldr lr, [r12], #4
vtrn.32 d20, d21
add r2, r0, r2, lsl #2
vsub.f32 q8, q10, q12
add lr, r0, lr, lsl #2
vadd.f32 q10, q10, q12
vadd.f32 q0, q11, q10
vadd.f32 d25, d19, d16 @
vsub.f32 d27, d19, d16 @
vsub.f32 q1, q11, q10
vsub.f32 d24, d18, d17 @
vadd.f32 d26, d18, d17 @
vtrn.32 q0, q12
vtrn.32 q1, q13
vld1.32 {d24, d25}, [r11, :128]
vswp d1, d2
vst1.32 {q0, q1}, [r2, :128]!
vld2.32 {q0}, [r9, :128]!
vadd.f32 q1, q0, q15
vld2.32 {q13}, [r8, :128]!
vld2.32 {q14}, [r7, :128]!
vsub.f32 q15, q0, q15
vsub.f32 q0, q14, q13
vadd.f32 q3, q14, q13
vadd.f32 q2, q3, q1
vadd.f32 d29, d1, d30 @
vsub.f32 d27, d1, d30 @
vsub.f32 q3, q3, q1
vsub.f32 d28, d0, d31 @
vadd.f32 d26, d0, d31 @
vtrn.32 q2, q14
vtrn.32 q3, q13
vswp d5, d6
vst1.32 {q2, q3}, [r2, :128]!
vtrn.32 q11, q9
vtrn.32 q10, q8
vmul.f32 d20, d18, d25
vmul.f32 d22, d19, d24
vmul.f32 d21, d19, d25
vmul.f32 d18, d18, d24
vmul.f32 d19, d16, d25
vmul.f32 d30, d17, d24
vmul.f32 d23, d16, d24
vmul.f32 d24, d17, d25
vadd.f32 d17, d22, d20
vsub.f32 d16, d18, d21
vsub.f32 d21, d30, d19
vadd.f32 d20, d24, d23
vadd.f32 q9, q8, q10
vsub.f32 q8, q8, q10
vadd.f32 q4, q14, q9
vsub.f32 q6, q14, q9
vadd.f32 d11, d27, d16 @
vsub.f32 d15, d27, d16 @
vsub.f32 d10, d26, d17 @
vadd.f32 d14, d26, d17 @
vswp d9, d10
vswp d13, d14
vstmia lr!, {q4-q7}
add r2, r3, #0
add r3, r7, #0
add r7, r2, #0
add r2, r4, #0
add r4, r8, #0
add r8, r2, #0
add r2, r5, #0
add r5, r9, #0
add r9, r2, #0
add r2, r6, #0
add r6, r10, #0
add r10, r2, #0
add r2, r9, #0
add r9, r10, #0
add r10, r2, #0
ldr r2, [r1, #16]
ldr r11, [r1, #32] @ this is p->i1
cmp r11, #0
beq _neon_ee_o_loop2_exit
vld1.32 {d16, d17}, [r2, :128]
_neon_ee_o_loop2:
vld2.32 {q15}, [r10, :128]!
vld2.32 {q13}, [r8, :128]!
vld2.32 {q14}, [r7, :128]!
vld2.32 {q9}, [r4, :128]!
vld2.32 {q10}, [r3, :128]!
vld2.32 {q11}, [r6, :128]!
vld2.32 {q12}, [r5, :128]!
vsub.f32 q1, q14, q13
vld2.32 {q0}, [r9, :128]!
subs r11, r11, #1
vsub.f32 q2, q0, q15
vadd.f32 q0, q0, q15
vmul.f32 d10, d2, d17
vmul.f32 d11, d3, d16
vmul.f32 d12, d3, d17
vmul.f32 d6, d4, d17
vmul.f32 d7, d5, d16
vmul.f32 d8, d4, d16
vmul.f32 d9, d5, d17
vmul.f32 d13, d2, d16
vsub.f32 d7, d7, d6
vadd.f32 d11, d11, d10
vsub.f32 q1, q12, q11
vsub.f32 q2, q10, q9
vadd.f32 d6, d9, d8
vadd.f32 q4, q14, q13
vadd.f32 q11, q12, q11
vadd.f32 q12, q10, q9
vsub.f32 d10, d13, d12
vsub.f32 q7, q4, q0
vsub.f32 q9, q12, q11
vsub.f32 q13, q5, q3
vadd.f32 d29, d5, d2 @
vadd.f32 q5, q5, q3
vadd.f32 q10, q4, q0
vadd.f32 q11, q12, q11
vsub.f32 d31, d5, d2 @
vsub.f32 d28, d4, d3 @
vadd.f32 d30, d4, d3 @
vadd.f32 d5, d19, d14 @
vadd.f32 d7, d31, d26 @
vadd.f32 q1, q14, q5
vadd.f32 q0, q11, q10
vsub.f32 d6, d30, d27 @
vsub.f32 d4, d18, d15 @
vsub.f32 d13, d19, d14 @
vadd.f32 d12, d18, d15 @
vsub.f32 d15, d31, d26 @
ldr r2, [r12], #4
vtrn.32 q1, q3
ldr lr, [r12], #4
vtrn.32 q0, q2
add r2, r0, r2, lsl #2
vsub.f32 q4, q11, q10
add lr, r0, lr, lsl #2
vsub.f32 q5, q14, q5
vadd.f32 d14, d30, d27 @
vst2.32 {q0,q1}, [r2, :128]!
vst2.32 {q2,q3}, [lr, :128]!
vtrn.32 q4, q6
vtrn.32 q5, q7
vst2.32 {q4,q5}, [r2, :128]!
vst2.32 {q6,q7}, [lr, :128]!
bne _neon_ee_o_loop2
_neon_ee_o_loop2_exit:
vldmia sp!, {d8-d15}
pop {r4, r5, r6, r7, r8, r9, r10, r11, pc}
.align 4
#ifdef __APPLE__
.globl _neon_static_x4_i
_neon_static_x4_i:
#else
.globl neon_static_x4_i
neon_static_x4_i:
#endif
@ add r3, r0, #0
push {r4, r5, r6, lr}
vstmdb sp!, {d8-d15}
vld1.32 {q8,q9}, [r0, :128]
add r4, r0, r1, lsl #1
vld1.32 {q10,q11}, [r4, :128]
add r5, r0, r1, lsl #2
vld1.32 {q12,q13}, [r5, :128]
add r6, r4, r1, lsl #2
vld1.32 {q14,q15}, [r6, :128]
vld1.32 {q2,q3}, [r2, :128]
vmul.f32 q0, q13, q3
vmul.f32 q5, q12, q2
vmul.f32 q1, q14, q2
vmul.f32 q4, q14, q3
vmul.f32 q14, q12, q3
vmul.f32 q13, q13, q2
vmul.f32 q12, q15, q3
vmul.f32 q2, q15, q2
vsub.f32 q0, q5, q0
vadd.f32 q13, q13, q14
vadd.f32 q12, q12, q1
vsub.f32 q1, q2, q4
vadd.f32 q15, q0, q12
vsub.f32 q12, q0, q12
vadd.f32 q14, q13, q1
vsub.f32 q13, q13, q1
vadd.f32 q0, q8, q15
vadd.f32 q1, q9, q14
vsub.f32 q2, q10, q13 @
vsub.f32 q4, q8, q15
vadd.f32 q3, q11, q12 @
vst1.32 {q0,q1}, [r0, :128]
vsub.f32 q5, q9, q14
vadd.f32 q6, q10, q13 @
vsub.f32 q7, q11, q12 @
vst1.32 {q2,q3}, [r4, :128]
vst1.32 {q4,q5}, [r5, :128]
vst1.32 {q6,q7}, [r6, :128]
vldmia sp!, {d8-d15}
pop {r4, r5, r6, pc}
.align 4
#ifdef __APPLE__
.globl _neon_static_x8_i
_neon_static_x8_i:
#else
.globl neon_static_x8_i
neon_static_x8_i:
#endif
push {r4, r5, r6, r7, r8, r9, r10, r11, lr}
vstmdb sp!, {d8-d15}
mov r11, #0
add r3, r0, #0 @ data0
add r5, r0, r1, lsl #1 @ data2
add r4, r0, r1 @ data1
add r7, r5, r1, lsl #1 @ data4
add r6, r5, r1 @ data3
add r9, r7, r1, lsl #1 @ data6
add r8, r7, r1 @ data5
add r10, r9, r1 @ data7
add r12, r2, #0 @ LUT
sub r11, r11, r1, lsr #5
neon_x8_loop:
vld1.32 {q2,q3}, [r12, :128]!
vld1.32 {q14,q15}, [r6, :128]
vld1.32 {q10,q11}, [r5, :128]
adds r11, r11, #1
vmul.f32 q12, q15, q2
vmul.f32 q8, q14, q3
vmul.f32 q13, q14, q2
vmul.f32 q9, q10, q3
vmul.f32 q1, q10, q2
vmul.f32 q0, q11, q2
vmul.f32 q14, q11, q3
vmul.f32 q15, q15, q3
vld1.32 {q2,q3}, [r12, :128]!
vsub.f32 q10, q12, q8
vadd.f32 q11, q0, q9
vadd.f32 q8, q15, q13
vld1.32 {q12,q13}, [r4, :128]
vsub.f32 q9, q1, q14
vsub.f32 q15, q11, q10
vsub.f32 q14, q9, q8
vsub.f32 q4, q12, q15 @
vadd.f32 q6, q12, q15 @
vadd.f32 q5, q13, q14 @
vsub.f32 q7, q13, q14 @
vld1.32 {q14,q15}, [r9, :128]
vld1.32 {q12,q13}, [r7, :128]
vmul.f32 q1, q14, q2
vmul.f32 q0, q14, q3
vst1.32 {q4,q5}, [r4, :128]
vmul.f32 q14, q15, q3
vmul.f32 q4, q15, q2
vadd.f32 q15, q9, q8
vst1.32 {q6,q7}, [r6, :128]
vmul.f32 q8, q12, q3
vmul.f32 q5, q13, q3
vmul.f32 q12, q12, q2
vmul.f32 q9, q13, q2
vadd.f32 q14, q14, q1
vsub.f32 q13, q4, q0
vadd.f32 q0, q9, q8
vld1.32 {q8,q9}, [r3, :128]
vadd.f32 q1, q11, q10
vsub.f32 q12, q12, q5
vadd.f32 q11, q8, q15
vsub.f32 q8, q8, q15
vadd.f32 q2, q12, q14
vsub.f32 q10, q0, q13
vadd.f32 q15, q0, q13
vadd.f32 q13, q9, q1
vsub.f32 q9, q9, q1
vsub.f32 q12, q12, q14
vadd.f32 q0, q11, q2
vadd.f32 q1, q13, q15
vsub.f32 q4, q11, q2
vsub.f32 q2, q8, q10 @
vadd.f32 q3, q9, q12 @
vst1.32 {q0,q1}, [r3, :128]!
vsub.f32 q5, q13, q15
vld1.32 {q14,q15}, [r10, :128]
vsub.f32 q7, q9, q12 @
vld1.32 {q12,q13}, [r8, :128]
vst1.32 {q2,q3}, [r5, :128]!
vld1.32 {q2,q3}, [r12, :128]!
vadd.f32 q6, q8, q10 @
vmul.f32 q8, q14, q2
vst1.32 {q4,q5}, [r7, :128]!
vmul.f32 q10, q15, q3
vmul.f32 q9, q13, q3
vmul.f32 q11, q12, q2
vmul.f32 q14, q14, q3
vst1.32 {q6,q7}, [r9, :128]!
vmul.f32 q15, q15, q2
vmul.f32 q12, q12, q3
vmul.f32 q13, q13, q2
vadd.f32 q10, q10, q8
vsub.f32 q11, q11, q9
vld1.32 {q8,q9}, [r4, :128]
vsub.f32 q14, q15, q14
vadd.f32 q15, q13, q12
vadd.f32 q13, q11, q10
vadd.f32 q12, q15, q14
vsub.f32 q15, q15, q14
vsub.f32 q14, q11, q10
vld1.32 {q10,q11}, [r6, :128]
vadd.f32 q0, q8, q13
vadd.f32 q1, q9, q12
vsub.f32 q2, q10, q15 @
vadd.f32 q3, q11, q14 @
vsub.f32 q4, q8, q13
vst1.32 {q0,q1}, [r4, :128]!
vsub.f32 q5, q9, q12
vadd.f32 q6, q10, q15 @
vst1.32 {q2,q3}, [r6, :128]!
vsub.f32 q7, q11, q14 @
vst1.32 {q4,q5}, [r8, :128]!
vst1.32 {q6,q7}, [r10, :128]!
bne neon_x8_loop
vldmia sp!, {d8-d15}
pop {r4, r5, r6, r7, r8, r9, r10, r11, pc}
.align 4
#ifdef __APPLE__
.globl _neon_static_x8_t_i
_neon_static_x8_t_i:
#else
.globl neon_static_x8_t_i
neon_static_x8_t_i:
#endif
push {r4, r5, r6, r7, r8, r9, r10, r11, lr}
vstmdb sp!, {d8-d15}
mov r11, #0
add r3, r0, #0 @ data0
add r5, r0, r1, lsl #1 @ data2
add r4, r0, r1 @ data1
add r7, r5, r1, lsl #1 @ data4
add r6, r5, r1 @ data3
add r9, r7, r1, lsl #1 @ data6
add r8, r7, r1 @ data5
add r10, r9, r1 @ data7
add r12, r2, #0 @ LUT
sub r11, r11, r1, lsr #5
neon_x8_t_loop:
vld1.32 {q2,q3}, [r12, :128]!
vld1.32 {q14,q15}, [r6, :128]
vld1.32 {q10,q11}, [r5, :128]
adds r11, r11, #1
vmul.f32 q12, q15, q2
vmul.f32 q8, q14, q3
vmul.f32 q13, q14, q2
vmul.f32 q9, q10, q3
vmul.f32 q1, q10, q2
vmul.f32 q0, q11, q2
vmul.f32 q14, q11, q3
vmul.f32 q15, q15, q3
vld1.32 {q2,q3}, [r12, :128]!
vsub.f32 q10, q12, q8
vadd.f32 q11, q0, q9
vadd.f32 q8, q15, q13
vld1.32 {q12,q13}, [r4, :128]
vsub.f32 q9, q1, q14
vsub.f32 q15, q11, q10
vsub.f32 q14, q9, q8
vsub.f32 q4, q12, q15 @
vadd.f32 q6, q12, q15 @
vadd.f32 q5, q13, q14 @
vsub.f32 q7, q13, q14 @
vld1.32 {q14,q15}, [r9, :128]
vld1.32 {q12,q13}, [r7, :128]
vmul.f32 q1, q14, q2
vmul.f32 q0, q14, q3
vst1.32 {q4,q5}, [r4, :128]
vmul.f32 q14, q15, q3
vmul.f32 q4, q15, q2
vadd.f32 q15, q9, q8
vst1.32 {q6,q7}, [r6, :128]
vmul.f32 q8, q12, q3
vmul.f32 q5, q13, q3
vmul.f32 q12, q12, q2
vmul.f32 q9, q13, q2
vadd.f32 q14, q14, q1
vsub.f32 q13, q4, q0
vadd.f32 q0, q9, q8
vld1.32 {q8,q9}, [r3, :128]
vadd.f32 q1, q11, q10
vsub.f32 q12, q12, q5
vadd.f32 q11, q8, q15
vsub.f32 q8, q8, q15
vadd.f32 q2, q12, q14
vsub.f32 q10, q0, q13
vadd.f32 q15, q0, q13
vadd.f32 q13, q9, q1
vsub.f32 q9, q9, q1
vsub.f32 q12, q12, q14
vadd.f32 q0, q11, q2
vadd.f32 q1, q13, q15
vsub.f32 q4, q11, q2
vsub.f32 q2, q8, q10 @
vadd.f32 q3, q9, q12 @
vst2.32 {q0,q1}, [r3, :128]!
vsub.f32 q5, q13, q15
vld1.32 {q14,q15}, [r10, :128]
vsub.f32 q7, q9, q12 @
vld1.32 {q12,q13}, [r8, :128]
vst2.32 {q2,q3}, [r5, :128]!
vld1.32 {q2,q3}, [r12, :128]!
vadd.f32 q6, q8, q10 @
vmul.f32 q8, q14, q2
vst2.32 {q4,q5}, [r7, :128]!
vmul.f32 q10, q15, q3
vmul.f32 q9, q13, q3
vmul.f32 q11, q12, q2
vmul.f32 q14, q14, q3
vst2.32 {q6,q7}, [r9, :128]!
vmul.f32 q15, q15, q2
vmul.f32 q12, q12, q3
vmul.f32 q13, q13, q2
vadd.f32 q10, q10, q8
vsub.f32 q11, q11, q9
vld1.32 {q8,q9}, [r4, :128]
vsub.f32 q14, q15, q14
vadd.f32 q15, q13, q12
vadd.f32 q13, q11, q10
vadd.f32 q12, q15, q14
vsub.f32 q15, q15, q14
vsub.f32 q14, q11, q10
vld1.32 {q10,q11}, [r6, :128]
vadd.f32 q0, q8, q13
vadd.f32 q1, q9, q12
vsub.f32 q2, q10, q15 @
vadd.f32 q3, q11, q14 @
vsub.f32 q4, q8, q13
vst2.32 {q0,q1}, [r4, :128]!
vsub.f32 q5, q9, q12
vadd.f32 q6, q10, q15 @
vst2.32 {q2,q3}, [r6, :128]!
vsub.f32 q7, q11, q14 @
vst2.32 {q4,q5}, [r8, :128]!
vst2.32 {q6,q7}, [r10, :128]!
bne neon_x8_t_loop
vldmia sp!, {d8-d15}
pop {r4, r5, r6, r7, r8, r9, r10, r11, pc}

@ -1,208 +0,0 @@
/*
This file is part of FFTS -- The Fastest Fourier Transform in the South
Copyright (c) 2012, Anthony M. Blake <amb@anthonix.com>
Copyright (c) 2012, The University of Waikato
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of the organization nor the
names of its contributors may be used to endorse or promote products
derived from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
DISCLAIMED. IN NO EVENT SHALL ANTHONY M. BLAKE BE LIABLE FOR ANY
DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#include "patterns.h"
void permute_addr(int N, int offset, int stride, int *d) {
int i, a[4] = {0,2,1,3};
for(i=0;i<4;i++) {
d[i] = offset + (a[i] << stride);
if(d[i] < 0) d[i] += N;
}
}
void ffts_hardcodedleaf_is_rec(ptrdiff_t **is, int bigN, int N, int poffset, int offset, int stride, int even, int VL) {
if(N > 4) {
ffts_hardcodedleaf_is_rec(is, bigN, N/2, poffset, offset, stride + 1, even, VL);
if(N/4 >= 4) ffts_hardcodedleaf_is_rec(is, bigN, N/4, poffset+(1<<stride),offset+(N/2), stride + 2, 0, VL);
if(N/4 >= 4) ffts_hardcodedleaf_is_rec(is, bigN, N/4, poffset-(1<<stride),offset+(3*N/4), stride + 2, 0, VL);
else {
int temp = poffset+(1<<stride);
if(temp < 0) temp += bigN;
temp *= 2;
if(!(temp % (VL*2))) {
(*is)[0] = poffset+(1<<stride);
(*is)[1] = poffset+(1<<stride)+(1<<(stride+2));
(*is)[2] = poffset-(1<<stride);
(*is)[3] = poffset-(1<<stride)+(1<<(stride+2));
int i;
for(i=0;i<4;i++) if((*is)[i] < 0) (*is)[i] += bigN;
for(i=0;i<4;i++) (*is)[i] *= 2;
*is += 4;
}
}
}else if(N == 4) {
int perm[4];
permute_addr(bigN, poffset, stride, perm);
if(!((perm[0]*2) % (VL*2))) {
int i;
for(i=0;i<4;i++) {
(*is)[i] = perm[i] * 2;
}
*is += 4;
}
}
}
void ffts_init_is(ffts_plan_t *p, int N, int leafN, int VL) {
int i, i0 = N/leafN/3+1, i1=N/leafN/3, i2 = N/leafN/3;
int stride = log(N/leafN)/log(2);
p->is = malloc(N/VL * sizeof(ptrdiff_t));
ptrdiff_t *is = p->is;
if((N/leafN) % 3 > 1) i1++;
for(i=0;i<i0;i++) ffts_hardcodedleaf_is_rec(&is, N, leafN, i, 0, stride, 1, VL);
for(i=i0;i<i0+i1;i++) {
ffts_hardcodedleaf_is_rec(&is, N, leafN/2, i, 0, stride+1, 1, VL);
ffts_hardcodedleaf_is_rec(&is, N, leafN/2, i-(1<<stride), 0, stride+1, 1, VL);
}
for(i=0-i2;i<0;i++) ffts_hardcodedleaf_is_rec(&is, N, leafN, i, 0, stride, 1, VL);
//for(i=0;i<N/VL;i++) {
// printf("%td ", p->is[i]);
// if(i % 16 == 15) printf("\n");
//}
p->i0 = i0; p->i1 = i1;
}
/**
*
*
*/
void ffts_elaborate_offsets(ptrdiff_t *offsets, int leafN, int N, int ioffset, int ooffset, int stride, int even) {
if((even && N == leafN) || (!even && N <= leafN)) {
offsets[2*(ooffset/leafN)] = ioffset*2;
offsets[2*(ooffset/leafN)+1] = ooffset;
}else if(N > 4) {
ffts_elaborate_offsets(offsets, leafN, N/2, ioffset, ooffset, stride+1, even);
ffts_elaborate_offsets(offsets, leafN, N/4, ioffset+(1<<stride), ooffset+N/2, stride+2, 0);
if(N/4 >= leafN)
ffts_elaborate_offsets(offsets, leafN, N/4, ioffset-(1<<stride), ooffset+3*N/4, stride+2, 0);
}
}
int compare_offsets(const void *a, const void *b) {
return ((ptrdiff_t *)a)[0] - ((ptrdiff_t *)b)[0];
}
uint32_t reverse_bits(uint32_t a, int n) {
uint32_t x = 0;
int i;
for(i=0;i<n;i++) {
if(a & (1 << i)) x |= 1 << (n-i-1);
}
return x;
}
void ffts_init_offsets(ffts_plan_t *p, int N, int leafN) {
ptrdiff_t *offsets = malloc(2 * N/leafN * sizeof(ptrdiff_t));
ffts_elaborate_offsets(offsets, leafN, N, 0, 0, 1, 1);
size_t i;
for(i=0;i<2*N/leafN;i+=2) {
if(offsets[i] < 0) offsets[i] = N + offsets[i];
}
qsort(offsets, N/leafN, 2 * sizeof(ptrdiff_t), compare_offsets);
//elaborate_is(p, N, 0, 0, 1);
p->offsets = malloc(N/leafN * sizeof(ptrdiff_t));
for(i=0;i<N/leafN;i++) {
p->offsets[i] = offsets[i*2+1]*2;
}
//for(i=0;i<N/leafN;i++) {
// printf("%4d %4d\n", p->offsets[i], reverse_bits(p->offsets[i], __builtin_ctzl(2*N)));
//}
free(offsets);
}
/*
int tree_count(int N, int leafN, int offset) {
if(N <= leafN) return 0;
int count = 0;
count += tree_count(N/4, leafN, offset);
count += tree_count(N/8, leafN, offset + N/4);
count += tree_count(N/8, leafN, offset + N/4 + N/8);
count += tree_count(N/4, leafN, offset + N/2);
count += tree_count(N/4, leafN, offset + 3*N/4);
return 1 + count;
}
void elaborate_tree(transform_index_t **p, int N, int leafN, int offset) {
if(N <= leafN) return;
elaborate_tree(p, N/4, leafN, offset);
elaborate_tree(p, N/8, leafN, offset + N/4);
elaborate_tree(p, N/8, leafN, offset + N/4 + N/8);
elaborate_tree(p, N/4, leafN, offset + N/2);
elaborate_tree(p, N/4, leafN, offset + 3*N/4);
(*p)[0] = N;
(*p)[1] = offset*2;
(*p)+=2;
}
void ffts_init_tree(ffts_plan_t *p, int N, int leafN) {
int count = tree_count(N, leafN, 0) + 1;
transform_index_t *ps = p->transforms = malloc(count * 2 * sizeof(transform_index_t));
//printf("count = %d\n", count);
elaborate_tree(&ps, N, leafN, 0);
#ifdef __ARM_NEON__
ps -= 2;
#endif
ps[0] = 0;
ps[1] = 0;
//int i;
//for(i=0;i<count;i++) {
// fprintf(stderr, "%lu %lu - %d\n", p->transforms[i*2], p->transforms[i*2+1],
// __builtin_ctzl(p->transforms[i*2]) - 5);
//}
}
*/

@ -1,44 +1,520 @@
/*
This file is part of FFTS -- The Fastest Fourier Transform in the South
Copyright (c) 2012, Anthony M. Blake <amb@anthonix.com>
Copyright (c) 2012, The University of Waikato
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of the organization nor the
names of its contributors may be used to endorse or promote products
derived from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
DISCLAIMED. IN NO EVENT SHALL ANTHONY M. BLAKE BE LIABLE FOR ANY
DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
This file is part of FFTS -- The Fastest Fourier Transform in the South
Copyright (c) 2012, Anthony M. Blake <amb@anthonix.com>
Copyright (c) 2012, The University of Waikato
Copyright (c) 2015, Jukka Ojanen <jukka.ojanen@kolumbus.fi>
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of the organization nor the
names of its contributors may be used to endorse or promote products
derived from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
DISCLAIMED. IN NO EVENT SHALL ANTHONY M. BLAKE BE LIABLE FOR ANY
DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef FFTS_PATTERNS_H
#define FFTS_PATTERNS_H
#if defined (_MSC_VER) && (_MSC_VER >= 1020)
#pragma once
#endif
#include <stddef.h>
#ifdef HAVE_STDLIB_H
#include <stdlib.h>
#endif
#ifndef LEAF_N
#define LEAF_N 8
#endif
#if LEAF_N == 8
static void
ffts_elaborate_offsets_even8(ptrdiff_t *const offsets,
int log_N);
static void
ffts_elaborate_offsets_odd8(ptrdiff_t *const offsets,
int log_N,
int input_offset,
int output_offset,
int stride);
static void
ffts_hardcodedleaf_is_rec_even4(ptrdiff_t **is,
int big_N,
int offset,
int stride,
int VL);
static void
ffts_hardcodedleaf_is_rec_even8(ptrdiff_t **is,
int big_N,
int offset,
int stride,
int VL);
#else
static void
ffts_elaborate_offsets_even(ptrdiff_t *const offsets,
int leaf_N,
int N,
int input_offset,
int output_offset,
int stride);
static void
ffts_elaborate_offsets_odd(ptrdiff_t *const offsets,
int leaf_N,
int N,
int input_offset,
int output_offset,
int stride);
static void
ffts_hardcodedleaf_is_rec_even(ptrdiff_t **is,
int big_N,
int N,
int offset,
int stride,
int VL);
static void
ffts_hardcodedleaf_is_rec_odd(ptrdiff_t **is,
int big_N,
int N,
int offset,
int stride,
int VL);
#endif
static int
ffts_compare_offsets(const void *pa, const void *pb)
{
const ptrdiff_t a = *(const ptrdiff_t*) pa;
const ptrdiff_t b = *(const ptrdiff_t*) pb;
return (a > b) - (a < b);
}
static void
ffts_permute_addr(int N, int offset, int stride, int *const d)
{
int a[4] = {0,2,1,3};
int i;
for (i = 0; i < 4; i++) {
d[i] = offset + (a[i] << stride);
if (d[i] < 0) {
d[i] += N;
}
}
}
#if LEAF_N == 8
static void
ffts_elaborate_offsets_even8(ptrdiff_t *const offsets, int log_N)
{
int offset = 1 << (log_N - 4);
int stride = 1;
offsets[0] = 0;
offsets[1] = 0;
offsets[2] = offset * 2;
offsets[3] = 8;
offsets[4] = offset;
offsets[5] = 16;
offsets[6] = -offset;
offsets[7] = 24;
for(; log_N > 5; --log_N, stride *= 2) {
ffts_elaborate_offsets_odd8(offsets, log_N - 2,
stride, 1 << (log_N - 1), stride * 4);
ffts_elaborate_offsets_odd8(offsets, log_N - 2,
-stride, 3 * (1 << (log_N - 2)), stride * 4);
}
}
static void
ffts_elaborate_offsets_odd8(ptrdiff_t *const offsets,
int log_N,
int input_offset,
int output_offset,
int stride)
{
if (log_N <= 4) {
offsets[(output_offset / 4) + 0] = input_offset * 2;
offsets[(output_offset / 4) + 1] = output_offset;
if (log_N == 4) {
offsets[(output_offset / 4) + 2] = (input_offset + stride) * 2;
offsets[(output_offset / 4) + 3] = output_offset + 8;
}
} else {
ffts_elaborate_offsets_odd8(offsets, log_N - 1, input_offset,
output_offset, stride * 2);
ffts_elaborate_offsets_odd8(offsets, log_N - 2, input_offset + stride,
output_offset + (1 << (log_N - 1)), stride * 4);
ffts_elaborate_offsets_odd8(offsets, log_N - 2, input_offset - stride,
output_offset + 3 * (1 << (log_N - 2)), stride * 4);
}
}
static void
ffts_hardcodedleaf_is_rec_even4(ptrdiff_t **is,
int big_N,
int offset,
int stride,
int VL)
{
int i, perm[4];
ffts_permute_addr(big_N, offset, stride, perm);
if (!((2 * perm[0]) % (2 * VL))) {
for (i = 0; i < 4; i++) {
(*is)[i] = 2 * perm[i];
}
*is += 4;
}
}
static void
ffts_hardcodedleaf_is_rec_even8(ptrdiff_t **is,
int big_N,
int offset,
int stride,
int VL)
{
int temp;
ffts_hardcodedleaf_is_rec_even4(is, big_N, offset, stride + 1, VL);
temp = offset + (1 << stride);
if (temp < 0) {
temp += big_N;
}
temp *= 2;
if (!(temp % (2 * VL))) {
int i;
(*is)[0] = offset + (1 << stride);
(*is)[1] = offset + (1 << stride) + (1 << (stride + 2));
(*is)[2] = offset - (1 << stride);
(*is)[3] = offset - (1 << stride) + (1 << (stride + 2));
for (i = 0; i < 4; i++) {
if ((*is)[i] < 0) {
(*is)[i] += big_N;
}
}
for (i = 0; i < 4; i++) {
(*is)[i] *= 2;
}
*is += 4;
}
}
#else
static void
ffts_elaborate_offsets_even(ptrdiff_t *const offsets,
int leaf_N,
int N,
int input_offset,
int output_offset,
int stride)
{
if (N == leaf_N) {
offsets[2 * (output_offset / leaf_N) + 0] = input_offset * 2;
offsets[2 * (output_offset / leaf_N) + 1] = output_offset;
} else if (N > 4) {
ffts_elaborate_offsets_even(offsets, leaf_N,
N/2, input_offset, output_offset, stride * 2);
ffts_elaborate_offsets_odd(offsets, leaf_N,
N/4, input_offset + stride, output_offset + N/2, stride * 4);
if (N/4 >= leaf_N) {
ffts_elaborate_offsets_odd(offsets, leaf_N,
N/4, input_offset - stride, output_offset + 3*N/4, stride * 4);
}
}
}
#ifndef __PATTERNS_H__
#define __PATTERNS_H__
static void
ffts_elaborate_offsets_odd(ptrdiff_t *const offsets,
int leaf_N,
int N,
int input_offset,
int output_offset,
int stride)
{
if (N <= leaf_N) {
offsets[2 * (output_offset / leaf_N) + 0] = input_offset * 2;
offsets[2 * (output_offset / leaf_N) + 1] = output_offset;
} else if (N > 4) {
ffts_elaborate_offsets_odd(offsets, leaf_N, N/2,
input_offset, output_offset, stride * 2);
#include "ffts.h"
ffts_elaborate_offsets_odd(offsets, leaf_N, N/4,
input_offset + stride, output_offset + N/2, stride * 4);
void ffts_init_is(ffts_plan_t *p, int N, int leafN, int VL);
void ffts_init_offsets(ffts_plan_t *p, int N, int leafN);
//void ffts_init_tree(ffts_plan_t *p, int N, int leafN);
if (N/4 >= leaf_N) {
ffts_elaborate_offsets_odd(offsets, leaf_N, N/4,
input_offset - stride, output_offset + 3*N/4, stride * 4);
}
}
}
static void
ffts_hardcodedleaf_is_rec_even(ptrdiff_t **is,
int big_N,
int N,
int offset,
int stride,
int VL)
{
if (N > 4) {
ffts_hardcodedleaf_is_rec_even(is, big_N, N/2, offset, stride + 1, VL);
if (N/4 >= 4) {
ffts_hardcodedleaf_is_rec_odd(
is, big_N, N/4, offset + (1 << stride), stride + 2, VL);
ffts_hardcodedleaf_is_rec_odd(
is, big_N, N/4, offset - (1 << stride), stride + 2, VL);
} else {
int temp = offset + (1 << stride);
if (temp < 0) {
temp += big_N;
}
temp *= 2;
if (!(temp % (2 * VL))) {
int i;
(*is)[0] = offset + (1 << stride);
(*is)[1] = offset + (1 << stride) + (1 << (stride + 2));
(*is)[2] = offset - (1 << stride);
(*is)[3] = offset - (1 << stride) + (1 << (stride + 2));
for (i = 0; i < 4; i++) {
if ((*is)[i] < 0) {
(*is)[i] += big_N;
}
}
for (i = 0; i < 4; i++) {
(*is)[i] *= 2;
}
*is += 4;
}
}
} else if (N == 4) {
int perm[4];
ffts_permute_addr(big_N, offset, stride, perm);
if (!((2 * perm[0]) % (2 * VL))) {
int i;
for (i = 0; i < 4; i++) {
(*is)[i] = 2 * perm[i];
}
*is += 4;
}
}
}
static void
ffts_hardcodedleaf_is_rec_odd(ptrdiff_t **is,
int big_N,
int N,
int offset,
int stride,
int VL)
{
if (N > 4) {
ffts_hardcodedleaf_is_rec_odd(is, big_N, N/2, offset, stride + 1, VL);
if (N/4 >= 4) {
ffts_hardcodedleaf_is_rec_odd(
is, big_N, N/4, offset + (1 << stride), stride + 2, VL);
ffts_hardcodedleaf_is_rec_odd(
is, big_N, N/4, offset - (1 << stride), stride + 2, VL);
} else {
int temp = offset + (1 << stride);
if (temp < 0) {
temp += big_N;
}
temp *= 2;
if (!(temp % (2 * VL))) {
int i;
(*is)[0] = offset + (1 << stride);
(*is)[1] = offset + (1 << stride) + (1 << (stride + 2));
(*is)[2] = offset - (1 << stride);
(*is)[3] = offset - (1 << stride) + (1 << (stride + 2));
for (i = 0; i < 4; i++) {
if ((*is)[i] < 0) {
(*is)[i] += big_N;
}
}
for (i = 0; i < 4; i++) {
(*is)[i] *= 2;
}
*is += 4;
}
}
} else if (N == 4) {
int perm[4];
ffts_permute_addr(big_N, offset, stride, perm);
if (!((2 * perm[0]) % (2 * VL))) {
int i;
for (i = 0; i < 4; i++) {
(*is)[i] = 2 * perm[i];
}
*is += 4;
}
}
}
#endif
static ptrdiff_t*
ffts_init_is(size_t N, size_t leaf_N, int VL)
{
int i, i0, i1, i2;
int stride = ffts_ctzl(N/leaf_N);
ptrdiff_t *is, *pis;
is = malloc(N / VL * sizeof(*is));
if (!is) {
return NULL;
}
i0 = N/leaf_N/3 + 1;
i1 = i2 = N/leaf_N/3;
if ((N/leaf_N) % 3 > 1) {
i1++;
}
pis = is;
#if LEAF_N == 8
for (i = 0; i < i0; i++) {
ffts_hardcodedleaf_is_rec_even8(
&pis, N, i, stride, VL);
}
for (i = i0; i < i0 + i1; i++) {
ffts_hardcodedleaf_is_rec_even4(
&pis, N, i, stride + 1, VL);
ffts_hardcodedleaf_is_rec_even4(
&pis, N, i - (1 << stride), stride + 1, VL);
}
for (i = 0 - i2; i < 0; i++) {
ffts_hardcodedleaf_is_rec_even8(
&pis, N, i, stride, VL);
}
#else
for (i = 0; i < i0; i++) {
ffts_hardcodedleaf_is_rec_even(
&pis, N, leaf_N, i, stride, VL);
}
for (i = i0; i < i0 + i1; i++) {
ffts_hardcodedleaf_is_rec_even(
&pis, N, leaf_N / 2, i, stride + 1, VL);
ffts_hardcodedleaf_is_rec_even(
&pis, N, leaf_N / 2, i - (1 << stride), stride + 1, VL);
}
for (i = 0 - i2; i < 0; i++) {
ffts_hardcodedleaf_is_rec_even(
&pis, N, leaf_N, i, stride, VL);
}
#endif
return is;
}
static ptrdiff_t*
ffts_init_offsets(size_t N, size_t leaf_N)
{
ptrdiff_t *offsets, *tmp;
size_t i;
offsets = malloc(N/leaf_N * sizeof(*offsets));
if (!offsets) {
return NULL;
}
tmp = malloc(2 * N/leaf_N * sizeof(*tmp));
if (!tmp) {
free(offsets);
return NULL;
}
#if LEAF_N == 8
ffts_elaborate_offsets_even8(tmp, ffts_ctzl(N));
#else
ffts_elaborate_offsets_even(tmp, leaf_N, N, 0, 0, 1);
#endif
for (i = 0; i < 2*N/leaf_N; i += 2) {
if (tmp[i] < 0) {
tmp[i] += N;
}
}
qsort(tmp, N/leaf_N, 2 * sizeof(*tmp), ffts_compare_offsets);
for (i = 0; i < N/leaf_N; i++) {
offsets[i] = 2 * tmp[2*i + 1];
}
free(tmp);
return offsets;
}
#endif /* FFTS_PATTERNS_H */

@ -0,0 +1,448 @@
/*
This file is part of FFTS -- The Fastest Fourier Transform in the South
Copyright (c) 2012, Anthony M. Blake <amb@anthonix.com>
Copyright (c) 2012, The University of Waikato
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of the organization nor the
names of its contributors may be used to endorse or promote products
derived from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
DISCLAIMED. IN NO EVENT SHALL ANTHONY M. BLAKE BE LIABLE FOR ANY
DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
typedef struct _sym_t {
int c;
struct _sym_t *pPrev, *pNext;
struct _seq_rule_t *r;
int offset;
} sym_t;
typedef struct _seq_rule_t {
int c;
sym_t *ss;
struct _seq_rule_t *pPrev, *pNext;
int count;
int length;
} seq_rule_t;
void sym_tail_insert(sym_t **ss, sym_t *s)
{
if (!*ss) {
*ss = s;
s->pPrev = s->pNext = NULL;
} else {
while (*ss) {
s->pPrev = *ss;
ss = &(*ss)->pNext;
}
*ss = s;
}
}
sym_t* sym_init(int c)
{
sym_t *s;
s = (sym_t*) malloc(sizeof(*s));
if (!s) {
return NULL;
}
s->c = c;
s->pPrev = s->pNext = NULL;
s->r = NULL;
return s;
}
sym_t* sym_init_from_sym(sym_t *s2)
{
sym_t *s;
s = (sym_t*) malloc(sizeof(*s));
if (!s) {
return NULL;
}
s->c = s2->c;
s->pPrev = s->pNext = NULL;
s->r = s2->r;
s->offset = s2->offset;
return s;
}
seq_rule_t* seq_init_rule(int c)
{
seq_rule_t *G;
G = (seq_rule_t *)malloc(sizeof(*G));
if (!G) {
return NULL;
}
G->c = c;
G->count = 2;
G->ss = NULL;
G->pPrev = NULL;
G->pNext = NULL;
return G;
}
seq_rule_t* seq_grammer_insert_new_rule(seq_rule_t *G, char r, sym_t *a, sym_t *b)
{
sym_t *sa, *sb;
while (G->pNext) {
G = G->pNext;
}
G->pNext = seq_init_rule(r);
if (!G->pNext) {
return NULL;
}
sa = sym_init_from_sym(a);
if (!sa) {
goto cleanup_pnext;
}
sb = sym_init_from_sym(b);
if (!sb) {
goto cleanup_sa;
}
sb->offset = sb->offset - sa->offset;
sa->offset = 0;
sym_tail_insert(&G->pNext->ss, sa);
sym_tail_insert(&G->pNext->ss, sb);
return G->pNext;
cleanup_sa:
free(sa);
cleanup_pnext:
free(G->pNext);
G->pNext = NULL;
return NULL;
}
sym_t* sym_match_digram(sym_t *s, sym_t *term, sym_t *a, sym_t *b)
{
while (s != term) {
if (s->c == a->c && s->pNext->c == b->c &&
s->pNext->offset - s->offset == b->offset-a->offset) {
return s;
}
s = s->pNext;
}
return NULL;
}
seq_rule_t* seq_match_digram(seq_rule_t *R, sym_t *a, sym_t *b)
{
while (R) {
if (R->ss->c == a->c && R->ss->pNext->c == b->c &&
R->ss->pNext->offset - R->ss->offset == b->offset - a->offset) {
return R;
}
R = R->pNext;
}
return NULL;
}
sym_t* sym_tail(sym_t *s)
{
while (s->pNext) {
s = s->pNext;
}
return s;
}
int sym_count(sym_t *s)
{
int count = 0;
while (s) {
count++;
s = s->pNext;
}
return count;
}
sym_t* sym_copylist(sym_t *s)
{
sym_t *head = NULL;
sym_t *prev = head;
while (s) {
sym_t *copy = sym_init_from_sym(s);
if (!copy) {
return NULL;
}
copy->pPrev = prev;
if (prev) {
prev->pNext = copy;
}
if (!head) {
head = copy;
}
prev = copy;
s = s->pNext;
}
return head;
}
void seq_enforce_uniqueness(seq_rule_t *G)
{
seq_rule_t *R = G;//->pNext;
seq_rule_t **ppr = &G->pNext;
while (R) {
if (R == G || R->count > 1) {
sym_t *s = R->ss;
sym_t **pp = &R->ss;
while (s) {
if (s->r && s->r->count == 1) {
sym_t *temp_itr;
*pp = s->r->ss;
temp_itr = s->r->ss;
while (temp_itr) {
temp_itr->offset += s->offset;
temp_itr = temp_itr->pNext;
}
s->r->ss->pPrev = s->pPrev;
if (s->pNext) {
s->pNext->pPrev = sym_tail(s->r->ss);
}
sym_tail(s->r->ss)->pNext = s->pNext;
s = s->r->ss;
continue;
}
pp = &s->pNext;
s = s->pNext;
}
ppr = &R->pNext;
} else {
*ppr = R->pNext;
}
R = R->pNext;
}
}
void seq_merge_small_rules(seq_rule_t *G, int thresh)
{
seq_rule_t *R = G;
while (R) {
if (sym_count(R->ss) <= thresh) {
//printf("count %d > %d for %d\n", sym_count(R->ss), thresh, R->c);
sym_t *s = R->ss;
sym_t **pp = &R->ss;
while (s) {
if (s->r) {
sym_t *copylist;
sym_t *copylist_itr;
s->r->count--;
copylist = sym_copylist(s->r->ss);
if (!copylist) {
return;
}
copylist_itr = copylist;
while (copylist_itr) {
copylist_itr->offset += s->offset;
copylist_itr = copylist_itr->pNext;
}
*pp = copylist;
copylist->pPrev = s->pPrev;
if (s->pNext) {
s->pNext->pPrev = sym_tail(copylist);
}
sym_tail(copylist)->pNext = s->pNext;
pp = &(sym_tail(copylist)->pNext);
s = sym_tail(copylist)->pNext;
continue;
}
pp = &s->pNext;
s = s->pNext;
}
}
R = R->pNext;
}
seq_enforce_uniqueness(G);
}
void seq_extract_hierarchy(seq_rule_t *G)
{
int next_rule = -2;
sym_t *cursym = G->ss;
while (cursym) {
sym_t *m = NULL;
seq_rule_t *mr = NULL;
if (cursym->pPrev && cursym->pPrev->pPrev) {
mr = seq_match_digram(G->pNext, cursym->pPrev, cursym);
if (mr) {
if (cursym->pPrev->r) {
cursym->pPrev->r->count--;
}
if(cursym->r) {
cursym->r->count--;
}
mr->count++;
cursym->pPrev->r = mr;
cursym->pPrev->c = mr->c;
cursym->pPrev->pNext = cursym->pNext;
cursym->pNext->pPrev = cursym->pPrev;
cursym = cursym->pPrev;
}
m = sym_match_digram(G->ss, cursym->pPrev->pPrev, cursym->pPrev, cursym);
if (m) {
seq_rule_t *newr;
if (cursym->pPrev->r) {
cursym->pPrev->r->count--;
}
if (cursym->r) {
cursym->r->count--;
}
newr = seq_grammer_insert_new_rule(G, next_rule, m, m->pNext);
if (!newr) {
return;
}
m->r = newr;
m->c = next_rule;
m->pNext = m->pNext->pNext;
m->pNext->pPrev = m;
cursym->pPrev->r = newr;
cursym->pPrev->c = next_rule;
cursym->pPrev->pNext = cursym->pNext;
cursym->pNext->pPrev = cursym->pPrev;
cursym = cursym->pPrev;
next_rule--;
}
}
if (!m && !mr) {
cursym = cursym->pNext;
}
}
seq_enforce_uniqueness(G);
seq_merge_small_rules(G, 2);
// seq_enforce_uniqueness(G);
}
void seq_compute_lengths(seq_rule_t *G)
{
seq_rule_t *R = G->pNext;
sym_t *s;
int sum;
while (R) {
sum = 0;
s = R->ss;
while (s) {
if (s->c >= 0) {
if (s->offset + s->c > sum) {
sum = s->offset + s->c;
}
}
if (s->c < 0) {
if (s->offset + s->r->length > sum) {
sum = s->offset + s->r->length;
}
}
s = s->pNext;
}
R->length = sum;
R = R->pNext;
}
sum = 0;
s = G->ss;
while (s) {
if (s->c >= 0) {
if (s->offset + s->c > sum) {
sum = s->offset + s->c;
}
}
if (s->c < 0) {
if (s->offset + s->r->length > sum) {
sum = s->offset + s->r->length;
}
}
s = s->pNext;
}
G->length = sum;
}

@ -1,878 +0,0 @@
/*
This file is part of FFTS -- The Fastest Fourier Transform in the South
Copyright (c) 2012, Anthony M. Blake <amb@anthonix.com>
Copyright (c) 2012, The University of Waikato
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of the organization nor the
names of its contributors may be used to endorse or promote products
derived from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
DISCLAIMED. IN NO EVENT SHALL ANTHONY M. BLAKE BE LIABLE FOR ANY
DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
.globl _neon_x4
.align 4
_neon_x4:
.globl _neon_x8
.align 4
_neon_x8:
.globl _neon_x8_t
.align 4
_neon_x8_t:
#ifdef __APPLE__
.globl _leaf_ee_init
_leaf_ee_init:
#else
.globl leaf_ee_init
leaf_ee_init:
#endif
#lea L_sse_constants(%rip), %r9
movq 0xe0(%rdi), %r9
xorl %eax, %eax
# eax is loop counter (init to 0)
# rcx is loop max count
# rsi is 'in' base pointer
# rdx is 'out' base pointer
# r8 is offsets pointer
# r9 is constants pointer
# scratch: rax r11 r12
# .align 4, 0x90
# _leaf_ee + 9 needs 16 byte alignment
#ifdef __APPLE__
.globl _leaf_ee
_leaf_ee:
#else
.globl leaf_ee
leaf_ee:
#endif
movaps 32(%r9), %xmm0 #83.5
movaps (%r9), %xmm8 #83.5
LEAF_EE_1:
LEAF_EE_const_0:
movaps 0xFECA(%rsi,%rax,4), %xmm7 #83.5
LEAF_EE_const_2:
movaps 0xFECA(%rsi,%rax,4), %xmm12 #83.5
movaps %xmm7, %xmm6 #83.5
LEAF_EE_const_3:
movaps 0xFECA(%rsi,%rax,4), %xmm10 #83.5
movaps %xmm12, %xmm11 #83.5
subps %xmm10, %xmm12 #83.5
addps %xmm10, %xmm11 #83.5
xorps %xmm8, %xmm12 #83.5
LEAF_EE_const_1:
movaps 0xFECA(%rsi,%rax,4), %xmm9 #83.5
LEAF_EE_const_4:
movaps 0xFECA(%rsi,%rax,4), %xmm10 #83.5
addps %xmm9, %xmm6 #83.5
subps %xmm9, %xmm7 #83.5
LEAF_EE_const_5:
movaps 0xFECA(%rsi,%rax,4), %xmm13 #83.5
movaps %xmm10, %xmm9 #83.5
LEAF_EE_const_6:
movaps 0xFECA(%rsi,%rax,4), %xmm3 #83.5
movaps %xmm6, %xmm5 #83.5
LEAF_EE_const_7:
movaps 0xFECA(%rsi,%rax,4), %xmm14 #83.5
movaps %xmm3, %xmm15 #83.5
shufps $177, %xmm12, %xmm12 #83.5
movaps %xmm7, %xmm4 #83.5
movslq (%r8, %rax, 4), %r11 #83.44
subps %xmm13, %xmm10 #83.5
subps %xmm14, %xmm3 #83.5
addps %xmm11, %xmm5 #83.5
subps %xmm11, %xmm6 #83.5
subps %xmm12, %xmm4 #83.5
addps %xmm12, %xmm7 #83.5
addps %xmm13, %xmm9 #83.5
addps %xmm14, %xmm15 #83.5
movaps 16(%r9), %xmm12 #83.5
movaps %xmm9, %xmm1 #83.5
movaps 16(%r9), %xmm11 #83.5
movaps %xmm5, %xmm2 #83.5
mulps %xmm10, %xmm12 #83.5
subps %xmm15, %xmm9 #83.5
addps %xmm15, %xmm1 #83.5
mulps %xmm3, %xmm11 #83.5
addps %xmm1, %xmm2 #83.5
subps %xmm1, %xmm5 #83.5
shufps $177, %xmm10, %xmm10 #83.5
xorps %xmm8, %xmm9 #83.5
shufps $177, %xmm3, %xmm3 #83.5
movaps %xmm6, %xmm1 #83.5
mulps %xmm0, %xmm10 #83.5
movaps %xmm4, %xmm13 #83.5
mulps %xmm0, %xmm3 #83.5
subps %xmm10, %xmm12 #83.5
addps %xmm3, %xmm11 #83.5
movaps %xmm12, %xmm3 #83.5
movaps %xmm7, %xmm14 #83.5
shufps $177, %xmm9, %xmm9 #83.5
subps %xmm11, %xmm12 #83.5
addps %xmm11, %xmm3 #83.5
subps %xmm9, %xmm1 #83.5
addps %xmm9, %xmm6 #83.5
addps %xmm3, %xmm4 #83.5
subps %xmm3, %xmm13 #83.5
xorps %xmm8, %xmm12 #83.5
movaps %xmm2, %xmm3 #83.5
shufps $177, %xmm12, %xmm12 #83.5
movaps %xmm6, %xmm9 #83.5
movslq 8(%r8, %rax, 4), %r12 #83.59
movlhps %xmm4, %xmm3 #83.5
addq $4, %rax
shufps $238, %xmm4, %xmm2 #83.5
movaps %xmm1, %xmm4 #83.5
#movntdq %xmm3, (%rdx,%r11,4) #83.5
subps %xmm12, %xmm7 #83.5
addps %xmm12, %xmm14 #83.5
movlhps %xmm7, %xmm4 #83.5
shufps $238, %xmm7, %xmm1 #83.5
movaps %xmm5, %xmm7 #83.5
movlhps %xmm13, %xmm7 #83.5
movlhps %xmm14, %xmm9 #83.5
shufps $238, %xmm13, %xmm5 #83.5
shufps $238, %xmm14, %xmm6 #83.5
movaps %xmm3, (%rdx,%r11,4) #83.5
movaps %xmm4, 16(%rdx,%r11,4) #83.5
movaps %xmm7, 32(%rdx,%r11,4) #83.5
movaps %xmm9, 48(%rdx,%r11,4) #83.5
movaps %xmm2, (%rdx,%r12,4) #83.5
movaps %xmm1, 16(%rdx,%r12,4) #83.5
movaps %xmm5, 32(%rdx,%r12,4) #83.5
movaps %xmm6, 48(%rdx,%r12,4) #83.5
cmpq %rcx, %rax
jne LEAF_EE_1
# _leaf_oo + 4 needs to be 16 byte aligned
#ifdef __APPLE__
.globl _leaf_oo
_leaf_oo:
#else
.globl leaf_oo
leaf_oo:
#endif
movaps (%r9), %xmm5 #92.7
LEAF_OO_1:
LEAF_OO_const_0:
movaps 0xFECA(%rsi,%rax,4), %xmm4 #93.5
movaps %xmm4, %xmm6 #93.5
LEAF_OO_const_1:
movaps 0xFECA(%rsi,%rax,4), %xmm7 #93.5
LEAF_OO_const_2:
movaps 0xFECA(%rsi,%rax,4), %xmm10 #93.5
addps %xmm7, %xmm6 #93.5
subps %xmm7, %xmm4 #93.5
LEAF_OO_const_3:
movaps 0xFECA(%rsi,%rax,4), %xmm8 #93.5
movaps %xmm10, %xmm9 #93.5
LEAF_OO_const_4:
movaps 0xFECA(%rsi,%rax,4), %xmm1 #93.5
movaps %xmm6, %xmm3 #93.5
LEAF_OO_const_5:
movaps 0xFECA(%rsi,%rax,4), %xmm11 #93.5
movaps %xmm1, %xmm2 #93.5
LEAF_OO_const_6:
movaps 0xFECA(%rsi,%rax,4), %xmm14 #93.5
movaps %xmm4, %xmm15 #93.5
LEAF_OO_const_7:
movaps 0xFECA(%rsi,%rax,4), %xmm12 #93.5
movaps %xmm14, %xmm13 #93.5
movslq (%r8, %rax, 4), %r11 #83.44
subps %xmm8, %xmm10 #93.5
addps %xmm8, %xmm9 #93.5
addps %xmm11, %xmm2 #93.5
subps %xmm12, %xmm14 #93.5
subps %xmm11, %xmm1 #93.5
addps %xmm12, %xmm13 #93.5
addps %xmm9, %xmm3 #93.5
subps %xmm9, %xmm6 #93.5
xorps %xmm5, %xmm10 #93.5
xorps %xmm5, %xmm14 #93.5
shufps $177, %xmm10, %xmm10 #93.5
movaps %xmm2, %xmm9 #93.5
shufps $177, %xmm14, %xmm14 #93.5
movaps %xmm6, %xmm7 #93.5
movslq 8(%r8, %rax, 4), %r12 #83.59
addq $4, %rax #92.18
addps %xmm10, %xmm4 #93.5
addps %xmm13, %xmm9 #93.5
subps %xmm13, %xmm2 #93.5
subps %xmm10, %xmm15 #93.5
movaps %xmm1, %xmm13 #93.5
movaps %xmm2, %xmm8 #93.5
movlhps %xmm4, %xmm7 #93.5
subps %xmm14, %xmm13 #93.5
addps %xmm14, %xmm1 #93.5
shufps $238, %xmm4, %xmm6 #93.5
movaps %xmm3, %xmm14 #93.5
movaps %xmm9, %xmm4 #93.5
movlhps %xmm15, %xmm14 #93.5
movlhps %xmm13, %xmm4 #93.5
movlhps %xmm1, %xmm8 #93.5
shufps $238, %xmm15, %xmm3 #93.5
shufps $238, %xmm13, %xmm9 #93.5
shufps $238, %xmm1, %xmm2 #93.5
movaps %xmm14, (%rdx,%r11,4) #93.5
movaps %xmm7, 16(%rdx,%r11,4) #93.5
movaps %xmm4, 32(%rdx,%r11,4) #93.5
movaps %xmm8, 48(%rdx,%r11,4) #93.5
movaps %xmm3, (%rdx,%r12,4) #93.5
movaps %xmm6, 16(%rdx,%r12,4) #93.5
movaps %xmm9, 32(%rdx,%r12,4) #93.5
movaps %xmm2, 48(%rdx,%r12,4) #93.5
cmpq %rcx, %rax
jne LEAF_OO_1 # Prob 95% #92.14
#ifdef __APPLE__
.globl _leaf_eo
_leaf_eo:
#else
.globl leaf_eo
leaf_eo:
#endif
LEAF_EO_const_0:
movaps 0xFECA(%rsi,%rax,4), %xmm9 #88.5
LEAF_EO_const_2:
movaps 0xFECA(%rsi,%rax,4), %xmm7 #88.5
movaps %xmm9, %xmm11 #88.5
LEAF_EO_const_3:
movaps 0xFECA(%rsi,%rax,4), %xmm5 #88.5
movaps %xmm7, %xmm6 #88.5
LEAF_EO_const_1:
movaps 0xFECA(%rsi,%rax,4), %xmm4 #88.5
subps %xmm5, %xmm7 #88.5
addps %xmm4, %xmm11 #88.5
subps %xmm4, %xmm9 #88.5
addps %xmm5, %xmm6 #88.5
movaps (%r9), %xmm3 #88.5
movaps %xmm11, %xmm10 #88.5
xorps %xmm3, %xmm7 #88.5
movaps %xmm9, %xmm8 #88.5
shufps $177, %xmm7, %xmm7 #88.5
addps %xmm6, %xmm10 #88.5
subps %xmm6, %xmm11 #88.5
subps %xmm7, %xmm8 #88.5
addps %xmm7, %xmm9 #88.5
movslq 8(%r8, %rax, 4), %r12 #83.59
movaps %xmm10, %xmm2 #88.5
movslq (%r8, %rax, 4), %r11 #83.44
movaps %xmm11, %xmm1 #88.5
shufps $238, %xmm8, %xmm10 #88.5
shufps $238, %xmm9, %xmm11 #88.5
movaps %xmm10, (%rdx,%r12,4) #88.5
movaps %xmm11, 16(%rdx,%r12,4) #88.5
LEAF_EO_const_4:
movaps 0xFECA(%rsi,%rax,4), %xmm15 #88.5
LEAF_EO_const_5:
movaps 0xFECA(%rsi,%rax,4), %xmm12 #88.5
movaps %xmm15, %xmm14 #88.5
LEAF_EO_const_6:
movaps 0xFECA(%rsi,%rax,4), %xmm4 #88.5
addps %xmm12, %xmm14 #88.5
subps %xmm12, %xmm15 #88.5
LEAF_EO_const_7:
movaps 0xFECA(%rsi,%rax,4), %xmm13 #88.5
movaps %xmm4, %xmm5 #88.5
movaps %xmm14, %xmm7 #88.5
addps %xmm13, %xmm5 #88.5
subps %xmm13, %xmm4 #88.5
movlhps %xmm8, %xmm2 #88.5
movaps %xmm5, %xmm8 #88.5
movlhps %xmm15, %xmm7 #88.5
xorps %xmm3, %xmm15 #88.5
movaps %xmm5, %xmm6 #88.5
subps %xmm14, %xmm5 #88.5
addps %xmm14, %xmm6 #88.5
movlhps %xmm9, %xmm1 #88.5
movaps %xmm4, %xmm14 #88.5
movlhps %xmm4, %xmm8 #88.5
movaps %xmm1, %xmm12 #88.5
shufps $177, %xmm15, %xmm15 #88.5
movaps 0x30(%r9), %xmm11 #88.5
addq $4, %rax #90.5
subps %xmm15, %xmm14 #88.5
mulps %xmm7, %xmm11 #88.5
addps %xmm15, %xmm4 #88.5
movaps 0x30(%r9), %xmm9 #88.5
movaps 0x40(%r9), %xmm15 #88.5
shufps $177, %xmm7, %xmm7 #88.5
mulps %xmm8, %xmm9 #88.5
mulps %xmm15, %xmm7 #88.5
shufps $177, %xmm8, %xmm8 #88.5
subps %xmm7, %xmm11 #88.5
mulps %xmm15, %xmm8 #88.5
movaps %xmm11, %xmm10 #88.5
addps %xmm8, %xmm9 #88.5
shufps $238, %xmm14, %xmm6 #88.5
subps %xmm9, %xmm11 #88.5
addps %xmm9, %xmm10 #88.5
xorps %xmm3, %xmm11 #88.5
movaps %xmm2, %xmm3 #88.5
shufps $177, %xmm11, %xmm11 #88.5
subps %xmm10, %xmm3 #88.5
addps %xmm10, %xmm2 #88.5
addps %xmm11, %xmm12 #88.5
subps %xmm11, %xmm1 #88.5
shufps $238, %xmm4, %xmm5 #88.5
movaps %xmm5, 48(%rdx,%r12,4) #88.5
movaps %xmm6, 32(%rdx,%r12,4) #88.5
movaps %xmm2, (%rdx,%r11,4) #88.5
movaps %xmm1, 16(%rdx,%r11,4) #88.5
movaps %xmm3, 32(%rdx,%r11,4) #88.5
movaps %xmm12, 48(%rdx,%r11,4) #88.5
#ifdef __APPLE__
.globl _leaf_oe
_leaf_oe:
#else
.globl leaf_oe
leaf_oe:
#endif
movaps (%r9), %xmm0 #59.5
#movaps 0x20(%r9), %xmm1 #59.5
LEAF_OE_const_2:
movaps 0xFECA(%rsi,%rax,4), %xmm6 #70.5
LEAF_OE_const_3:
movaps 0xFECA(%rsi,%rax,4), %xmm8 #70.5
movaps %xmm6, %xmm10 #70.5
shufps $228, %xmm8, %xmm10 #70.5
movaps %xmm10, %xmm9 #70.5
shufps $228, %xmm6, %xmm8 #70.5
LEAF_OE_const_0:
movaps 0xFECA(%rsi,%rax,4), %xmm12 #70.5
LEAF_OE_const_1:
movaps 0xFECA(%rsi,%rax,4), %xmm7 #70.5
movaps %xmm12, %xmm14 #70.5
movslq (%r8, %rax, 4), %r11 #83.44
addps %xmm8, %xmm9 #70.5
subps %xmm8, %xmm10 #70.5
addps %xmm7, %xmm14 #70.5
subps %xmm7, %xmm12 #70.5
movaps %xmm9, %xmm4 #70.5
movaps %xmm14, %xmm13 #70.5
shufps $238, %xmm10, %xmm4 #70.5
xorps %xmm0, %xmm10 #70.5
shufps $177, %xmm10, %xmm10 #70.5
movaps %xmm12, %xmm11 #70.5
movaps %xmm14, %xmm5 #70.5
addps %xmm9, %xmm13 #70.5
subps %xmm10, %xmm11 #70.5
subps %xmm9, %xmm14 #70.5
shufps $238, %xmm12, %xmm5 #70.5
addps %xmm10, %xmm12 #70.5
movslq 8(%r8, %rax, 4), %r12 #83.59
movlhps %xmm11, %xmm13 #70.5
movaps %xmm13, (%rdx,%r11,4) #70.5
movaps 0x30(%r9), %xmm13 #70.5
movlhps %xmm12, %xmm14 #70.5
movaps 0x40(%r9), %xmm12 #70.5
mulps %xmm5, %xmm13 #70.5
shufps $177, %xmm5, %xmm5 #70.5
mulps %xmm12, %xmm5 #70.5
movaps %xmm14, 16(%rdx,%r11,4) #70.5
subps %xmm5, %xmm13 #70.5
movaps 0x30(%r9), %xmm5 #70.5
mulps %xmm4, %xmm5 #70.5
shufps $177, %xmm4, %xmm4 #70.5
mulps %xmm12, %xmm4 #70.5
LEAF_OE_const_4:
movaps 0xFECA(%rsi,%rax,4), %xmm9 #70.5
addps %xmm4, %xmm5 #70.5
LEAF_OE_const_6:
movaps 0xFECA(%rsi,%rax,4), %xmm7 #70.5
movaps %xmm9, %xmm3 #70.5
LEAF_OE_const_7:
movaps 0xFECA(%rsi,%rax,4), %xmm2 #70.5
movaps %xmm7, %xmm6 #70.5
LEAF_OE_const_5:
movaps 0xFECA(%rsi,%rax,4), %xmm15 #70.5
movaps %xmm13, %xmm4 #70.5
subps %xmm2, %xmm7 #70.5
addps %xmm15, %xmm3 #70.5
subps %xmm15, %xmm9 #70.5
addps %xmm2, %xmm6 #70.5
subps %xmm5, %xmm13 #70.5
addps %xmm5, %xmm4 #70.5
xorps %xmm0, %xmm7 #70.5
addq $4, %rax #72.5
movaps %xmm3, %xmm2 #70.5
shufps $177, %xmm7, %xmm7 #70.5
movaps %xmm9, %xmm8 #70.5
xorps %xmm0, %xmm13 #70.5
addps %xmm6, %xmm2 #70.5
subps %xmm7, %xmm8 #70.5
subps %xmm6, %xmm3 #70.5
addps %xmm7, %xmm9 #70.5
movaps %xmm2, %xmm10 #70.5
movaps %xmm3, %xmm11 #70.5
shufps $238, %xmm8, %xmm2 #70.5
shufps $238, %xmm9, %xmm3 #70.5
movaps %xmm2, %xmm14 #70.5
shufps $177, %xmm13, %xmm13 #70.5
subps %xmm4, %xmm14 #70.5
addps %xmm4, %xmm2 #70.5
movaps %xmm3, %xmm4 #70.5
subps %xmm13, %xmm3 #70.5
addps %xmm13, %xmm4 #70.5
movlhps %xmm8, %xmm10 #70.5
movlhps %xmm9, %xmm11 #70.5
movaps %xmm10, 32(%rdx,%r11,4) #70.5
movaps %xmm11, 48(%rdx,%r11,4) #70.5
movaps %xmm2, (%rdx,%r12,4) #70.5
movaps %xmm3, 16(%rdx,%r12,4) #70.5
movaps %xmm14, 32(%rdx,%r12,4) #70.5
movaps %xmm4, 48(%rdx,%r12,4) #70.5
#ifdef __APPLE__
.globl _leaf_end
_leaf_end:
#else
.globl leaf_end
leaf_end:
#endif
#ifdef __APPLE__
.globl _x_init
_x_init:
#else
.globl x_init
x_init:
#endif
#movaps L_sse_constants(%rip), %xmm3 #34.3
movaps (%r9), %xmm3 #34.3
movq 0x20(%rdi),%r8
#ifdef __APPLE__
.globl _x4
_x4:
#else
.globl x4
x4:
#endif
movaps 64(%rdx), %xmm0 #34.3
movaps 96(%rdx), %xmm1 #34.3
movaps (%rdx), %xmm7 #34.3
movaps (%r8), %xmm4 #const
movaps %xmm7, %xmm9 #34.3
movaps %xmm4, %xmm6 #34.3
movaps 16(%r8), %xmm2 #const
mulps %xmm0, %xmm6 #34.3
mulps %xmm1, %xmm4 #34.3
shufps $177, %xmm0, %xmm0 #34.3
shufps $177, %xmm1, %xmm1 #34.3
mulps %xmm2, %xmm0 #34.3
mulps %xmm1, %xmm2 #34.3
subps %xmm0, %xmm6 #34.3
addps %xmm2, %xmm4 #34.3
movaps %xmm6, %xmm5 #34.3
subps %xmm4, %xmm6 #34.3
addps %xmm4, %xmm5 #34.3
movaps 32(%rdx), %xmm8 #34.3
xorps %xmm3, %xmm6 #34.3
shufps $177, %xmm6, %xmm6 #34.3
movaps %xmm8, %xmm10 #34.3
movaps 112(%rdx), %xmm12 #34.3
subps %xmm5, %xmm9 #34.3
addps %xmm5, %xmm7 #34.3
addps %xmm6, %xmm10 #34.3
subps %xmm6, %xmm8 #34.3
movaps %xmm7, (%rdx) #34.3
movaps %xmm8, 32(%rdx) #34.3
movaps %xmm9, 64(%rdx) #34.3
movaps %xmm10, 96(%rdx) #34.3
movaps 32(%r8), %xmm14 #const #34.3
movaps 80(%rdx), %xmm11 #34.3
movaps %xmm14, %xmm0 #34.3
movaps 48(%r8), %xmm13 #const #34.3
mulps %xmm11, %xmm0 #34.3
mulps %xmm12, %xmm14 #34.3
shufps $177, %xmm11, %xmm11 #34.3
shufps $177, %xmm12, %xmm12 #34.3
mulps %xmm13, %xmm11 #34.3
mulps %xmm12, %xmm13 #34.3
subps %xmm11, %xmm0 #34.3
addps %xmm13, %xmm14 #34.3
movaps %xmm0, %xmm15 #34.3
subps %xmm14, %xmm0 #34.3
addps %xmm14, %xmm15 #34.3
xorps %xmm3, %xmm0 #34.3
movaps 16(%rdx), %xmm1 #34.3
movaps 48(%rdx), %xmm2 #34.3
movaps %xmm1, %xmm4 #34.3
shufps $177, %xmm0, %xmm0 #34.3
movaps %xmm2, %xmm5 #34.3
addps %xmm15, %xmm1 #34.3
subps %xmm0, %xmm2 #34.3
subps %xmm15, %xmm4 #34.3
addps %xmm0, %xmm5 #34.3
movaps %xmm1, 16(%rdx) #34.3
movaps %xmm2, 48(%rdx) #34.3
movaps %xmm4, 80(%rdx) #34.3
movaps %xmm5, 112(%rdx) #34.3
ret
# _x8_soft + 5 needs to be 16 byte aligned
#ifdef __APPLE__
.globl _x8_soft
_x8_soft:
#else
.globl x8_soft
x8_soft:
#endif
xorl %eax, %eax
movq %rdx, %rbx
movq %r8, %rsi
leaq (%rdx,%rcx,4), %r9
leaq (%r9,%rcx,4), %r10
leaq (%r10,%rcx,4), %r11
leaq (%r11,%rcx,4), %r12
leaq (%r12,%rcx,4), %r13
leaq (%r13,%rcx,4), %r14
leaq (%r14,%rcx,4), %r15
X8_soft_loop:
movaps (%rsi), %xmm9
movaps (%r10,%rax,4), %xmm6
movaps %xmm9, %xmm11
movaps (%r11,%rax,4), %xmm7
movaps 16(%rsi), %xmm8
mulps %xmm6, %xmm11
mulps %xmm7, %xmm9
shufps $177, %xmm6, %xmm6
mulps %xmm8, %xmm6
shufps $177, %xmm7, %xmm7
subps %xmm6, %xmm11
mulps %xmm7, %xmm8
movaps %xmm11, %xmm10
addps %xmm8, %xmm9
movaps 32(%rsi), %xmm15
addps %xmm9, %xmm10
subps %xmm9, %xmm11
movaps (%rbx,%rax,4), %xmm5
movaps %xmm15, %xmm6
movaps (%r12,%rax,4), %xmm12
movaps %xmm5, %xmm2
movaps (%r14,%rax,4), %xmm13
xorps %xmm3, %xmm11 #const
movaps 48(%rsi), %xmm14
subps %xmm10, %xmm2
mulps %xmm12, %xmm6
addps %xmm10, %xmm5
mulps %xmm13, %xmm15
movaps 64(%rsi), %xmm10
movaps %xmm5, %xmm0
shufps $177, %xmm12, %xmm12
shufps $177, %xmm13, %xmm13
mulps %xmm14, %xmm12
mulps %xmm13, %xmm14
subps %xmm12, %xmm6
addps %xmm14, %xmm15
movaps (%r13,%rax,4), %xmm7
movaps %xmm10, %xmm13
movaps (%r15,%rax,4), %xmm8
movaps %xmm6, %xmm12
movaps 80(%rsi), %xmm9
addq $96, %rsi
mulps %xmm7, %xmm13
subps %xmm15, %xmm6
addps %xmm15, %xmm12
mulps %xmm8, %xmm10
subps %xmm12, %xmm0
addps %xmm12, %xmm5
shufps $177, %xmm7, %xmm7
xorps %xmm3, %xmm6 #const
shufps $177, %xmm8, %xmm8
movaps %xmm2, %xmm12
mulps %xmm9, %xmm7
mulps %xmm8, %xmm9
subps %xmm7, %xmm13
addps %xmm9, %xmm10
movaps (%r9,%rax,4), %xmm4
shufps $177, %xmm11, %xmm11
movaps %xmm4, %xmm1
shufps $177, %xmm6, %xmm6
addps %xmm11, %xmm1
subps %xmm11, %xmm4
addps %xmm6, %xmm12
subps %xmm6, %xmm2
movaps %xmm13, %xmm11
movaps %xmm4, %xmm14
movaps %xmm1, %xmm6
subps %xmm10, %xmm13
addps %xmm10, %xmm11
xorps %xmm3, %xmm13 #const
addps %xmm11, %xmm4
subps %xmm11, %xmm14
shufps $177, %xmm13, %xmm13
movaps %xmm5, (%rbx,%rax,4)
movaps %xmm4, (%r9,%rax,4)
movaps %xmm2, (%r10,%rax,4)
subps %xmm13, %xmm1
addps %xmm13, %xmm6
movaps %xmm1, (%r11,%rax,4)
movaps %xmm0, (%r12,%rax,4)
movaps %xmm14, (%r13,%rax,4)
movaps %xmm12, (%r14,%rax,4)
movaps %xmm6, (%r15,%rax,4)
addq $4, %rax
cmpq %rcx, %rax
jne X8_soft_loop
ret
#ifdef __APPLE__
.globl _x8_hard
_x8_hard:
#else
.globl x8_hard
x8_hard:
#endif
movaps (%r9), %xmm5
X8_loop:
movaps (%r8), %xmm9
X8_const_2:
movaps 0xFECA(%rdx,%rax,4), %xmm6
movaps %xmm9, %xmm11
X8_const_3:
movaps 0xFECA(%rdx,%rax,4), %xmm7
movaps 16(%r8), %xmm8
mulps %xmm6, %xmm11
mulps %xmm7, %xmm9
shufps $177, %xmm6, %xmm6
mulps %xmm8, %xmm6
shufps $177, %xmm7, %xmm7
subps %xmm6, %xmm11
mulps %xmm7, %xmm8
movaps %xmm11, %xmm10
addps %xmm8, %xmm9
movaps 32(%r8), %xmm15
addps %xmm9, %xmm10
subps %xmm9, %xmm11
X8_const_0:
movaps 0xFECA(%rdx,%rax,4), %xmm3
movaps %xmm15, %xmm6
X8_const_4:
movaps 0xFECA(%rdx,%rax,4), %xmm12
movaps %xmm3, %xmm2
X8_const_6:
movaps 0xFECA(%rdx,%rax,4), %xmm13
xorps %xmm5, %xmm11
movaps 48(%r8), %xmm14
subps %xmm10, %xmm2
mulps %xmm12, %xmm6
addps %xmm10, %xmm3
mulps %xmm13, %xmm15
movaps 64(%r8), %xmm10
movaps %xmm3, %xmm0
shufps $177, %xmm12, %xmm12
shufps $177, %xmm13, %xmm13
mulps %xmm14, %xmm12
mulps %xmm13, %xmm14
subps %xmm12, %xmm6
addps %xmm14, %xmm15
X8_const_5:
movaps 0xFECA(%rdx,%rax,4), %xmm7
movaps %xmm10, %xmm13
X8_const_7:
movaps 0xFECA(%rdx,%rax,4), %xmm8
movaps %xmm6, %xmm12
movaps 80(%r8), %xmm9
addq $96, %r8
mulps %xmm7, %xmm13
subps %xmm15, %xmm6
addps %xmm15, %xmm12
mulps %xmm8, %xmm10
subps %xmm12, %xmm0
addps %xmm12, %xmm3
shufps $177, %xmm7, %xmm7
xorps %xmm5, %xmm6
shufps $177, %xmm8, %xmm8
movaps %xmm2, %xmm12
mulps %xmm9, %xmm7
mulps %xmm8, %xmm9
subps %xmm7, %xmm13
addps %xmm9, %xmm10
X8_const_1:
movaps 0xFECA(%rdx,%rax,4), %xmm4
shufps $177, %xmm11, %xmm11
movaps %xmm4, %xmm1
shufps $177, %xmm6, %xmm6
addps %xmm11, %xmm1
subps %xmm11, %xmm4
addps %xmm6, %xmm12
subps %xmm6, %xmm2
movaps %xmm13, %xmm11
movaps %xmm4, %xmm14
movaps %xmm1, %xmm6
subps %xmm10, %xmm13
addps %xmm10, %xmm11
xorps %xmm5, %xmm13
addps %xmm11, %xmm4
subps %xmm11, %xmm14
shufps $177, %xmm13, %xmm13
X8_const1_0:
movaps %xmm3, 0xFECA(%rdx,%rax,4)
X8_const1_1:
movaps %xmm4, 0xFECA(%rdx,%rax,4)
X8_const1_2:
movaps %xmm2, 0xFECA(%rdx,%rax,4)
subps %xmm13, %xmm1
addps %xmm13, %xmm6
X8_const1_3:
movaps %xmm1, 0xFECA(%rdx,%rax,4)
X8_const1_4:
movaps %xmm0, 0xFECA(%rdx,%rax,4)
X8_const1_5:
movaps %xmm14, 0xFECA(%rdx,%rax,4)
X8_const1_6:
movaps %xmm12, 0xFECA(%rdx,%rax,4)
X8_const1_7:
movaps %xmm6, 0xFECA(%rdx,%rax,4)
addq $4, %rax
cmpq %rcx, %rax
jne X8_loop
#ifdef __APPLE__
.globl _sse_leaf_ee_offsets
.globl _sse_leaf_oo_offsets
.globl _sse_leaf_eo_offsets
.globl _sse_leaf_oe_offsets
.align 4
_sse_leaf_ee_offsets:
.long LEAF_EE_const_0-_leaf_ee+0x4
.long LEAF_EE_const_1-_leaf_ee+0x5
.long LEAF_EE_const_2-_leaf_ee+0x5
.long LEAF_EE_const_3-_leaf_ee+0x5
.long LEAF_EE_const_4-_leaf_ee+0x5
.long LEAF_EE_const_5-_leaf_ee+0x5
.long LEAF_EE_const_6-_leaf_ee+0x4
.long LEAF_EE_const_7-_leaf_ee+0x5
_sse_leaf_oo_offsets:
.long LEAF_OO_const_0-_leaf_oo+0x4
.long LEAF_OO_const_1-_leaf_oo+0x4
.long LEAF_OO_const_2-_leaf_oo+0x5
.long LEAF_OO_const_3-_leaf_oo+0x5
.long LEAF_OO_const_4-_leaf_oo+0x4
.long LEAF_OO_const_5-_leaf_oo+0x5
.long LEAF_OO_const_6-_leaf_oo+0x5
.long LEAF_OO_const_7-_leaf_oo+0x5
_sse_leaf_eo_offsets:
.long LEAF_EO_const_0-_leaf_eo+0x5
.long LEAF_EO_const_1-_leaf_eo+0x4
.long LEAF_EO_const_2-_leaf_eo+0x4
.long LEAF_EO_const_3-_leaf_eo+0x4
.long LEAF_EO_const_4-_leaf_eo+0x5
.long LEAF_EO_const_5-_leaf_eo+0x5
.long LEAF_EO_const_6-_leaf_eo+0x4
.long LEAF_EO_const_7-_leaf_eo+0x5
_sse_leaf_oe_offsets:
.long LEAF_OE_const_0-_leaf_oe+0x5
.long LEAF_OE_const_1-_leaf_oe+0x4
.long LEAF_OE_const_2-_leaf_oe+0x4
.long LEAF_OE_const_3-_leaf_oe+0x5
.long LEAF_OE_const_4-_leaf_oe+0x5
.long LEAF_OE_const_5-_leaf_oe+0x5
.long LEAF_OE_const_6-_leaf_oe+0x4
.long LEAF_OE_const_7-_leaf_oe+0x4
#else
.globl sse_leaf_ee_offsets
.globl sse_leaf_oo_offsets
.globl sse_leaf_eo_offsets
.globl sse_leaf_oe_offsets
.align 4
sse_leaf_ee_offsets:
.long LEAF_EE_const_0-leaf_ee+0x4
.long LEAF_EE_const_1-leaf_ee+0x5
.long LEAF_EE_const_2-leaf_ee+0x5
.long LEAF_EE_const_3-leaf_ee+0x5
.long LEAF_EE_const_4-leaf_ee+0x5
.long LEAF_EE_const_5-leaf_ee+0x5
.long LEAF_EE_const_6-leaf_ee+0x4
.long LEAF_EE_const_7-leaf_ee+0x5
sse_leaf_oo_offsets:
.long LEAF_OO_const_0-leaf_oo+0x4
.long LEAF_OO_const_1-leaf_oo+0x4
.long LEAF_OO_const_2-leaf_oo+0x5
.long LEAF_OO_const_3-leaf_oo+0x5
.long LEAF_OO_const_4-leaf_oo+0x4
.long LEAF_OO_const_5-leaf_oo+0x5
.long LEAF_OO_const_6-leaf_oo+0x5
.long LEAF_OO_const_7-leaf_oo+0x5
sse_leaf_eo_offsets:
.long LEAF_EO_const_0-leaf_eo+0x5
.long LEAF_EO_const_1-leaf_eo+0x4
.long LEAF_EO_const_2-leaf_eo+0x4
.long LEAF_EO_const_3-leaf_eo+0x4
.long LEAF_EO_const_4-leaf_eo+0x5
.long LEAF_EO_const_5-leaf_eo+0x5
.long LEAF_EO_const_6-leaf_eo+0x4
.long LEAF_EO_const_7-leaf_eo+0x5
sse_leaf_oe_offsets:
.long LEAF_OE_const_0-leaf_oe+0x5
.long LEAF_OE_const_1-leaf_oe+0x4
.long LEAF_OE_const_2-leaf_oe+0x4
.long LEAF_OE_const_3-leaf_oe+0x5
.long LEAF_OE_const_4-leaf_oe+0x5
.long LEAF_OE_const_5-leaf_oe+0x5
.long LEAF_OE_const_6-leaf_oe+0x4
.long LEAF_OE_const_7-leaf_oe+0x4
#endif
#ifdef __APPLE__
.data
#else
.section .data
#endif
.p2align 4
#ifdef __APPLE__
.globl _sse_constants
_sse_constants:
#else
.globl sse_constants
sse_constants:
#endif
.long 0x00000000,0x80000000,0x00000000,0x80000000
.long 0x3f3504f3,0x3f3504f3,0x3f3504f3,0x3f3504f3
.long 0xbf3504f3,0x3f3504f3,0xbf3504f3,0x3f3504f3
.long 0x3f800000,0x3f800000,0x3f3504f3,0x3f3504f3
.long 0x00000000,0x00000000,0xbf3504f3,0x3f3504f3
#ifdef __APPLE__
.globl _sse_constants_inv
_sse_constants_inv:
#else
.globl sse_constants_inv
sse_constants_inv:
#endif
.long 0x80000000,0x00000000,0x80000000,0x00000000
.long 0x3f3504f3,0x3f3504f3,0x3f3504f3,0x3f3504f3
.long 0x3f3504f3,0xbf3504f3,0x3f3504f3,0xbf3504f3
.long 0x3f800000,0x3f800000,0x3f3504f3,0x3f3504f3
.long 0x00000000,0x00000000,0x3f3504f3,0xbf3504f3

@ -1,10 +1,10 @@
/*
This file is part of FFTS -- The Fastest Fourier Transform in the South
Copyright (c) 2012, Anthony M. Blake <amb@anthonix.com>
Copyright (c) 2012, The University of Waikato
Copyright (c) 2012, The University of Waikato
All rights reserved.
Redistribution and use in source and binary forms, with or without
@ -31,19 +31,15 @@
*/
#ifndef FFTS_TYPES_H
#define FFTS_TYPES_H
#ifndef __TYPES_H__
#define __TYPES_H__
#define __INLINE static inline __attribute__((always_inline))
#if defined(complex)
typedef complex float cdata_t;
#else
typedef float cdata_t[2];
#endif
typedef float data_t;
#if defined (_MSC_VER) && (_MSC_VER >= 1020)
#pragma once
#endif
/* Define complex number as two element array */
typedef float ffts_cpx_32f[2];
typedef double ffts_cpx_64f[2];
#endif /* FFTS_TYPES_H */

@ -43,3 +43,4 @@ void vfp_x8();
void vfp_end();
#endif
// vim: set autoindent noexpandtab tabstop=3 shiftwidth=3:

@ -30,7 +30,7 @@
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
.fpu vfp
@ assumes r0 = out
@ r1 = in ?
@ -41,7 +41,7 @@
@ r2 = const pointer
@ & lr = temps
.align 4
.align 4
#ifdef __APPLE__
.globl _vfp_e
_vfp_e:
@ -50,44 +50,44 @@ _vfp_e:
vfp_e:
#endif
_vfp_e_loop:
vldr s15, [r2, #8]
vldr s2, [r3] @ x0
vldr s0, [r3, #4]
vldr s4, [r4] @ x1
vldr s11, [r2]
vldr s10, [r7] @ x4
vldr s3, [r7, #4]
vldr s8, [r8] @ x5
vldr s1, [r8, #4]
vldr s14, [r9] @ x6
vldr s9, [r9, #4]
vldr s6, [r10] @ x7
vldr s12, [r10, #4]
vldr s15, [r2, #8]
vldr s2, [r3] @ x0
vldr s0, [r3, #4]
vldr s4, [r4] @ x1
vldr s11, [r2]
vldr s10, [r7] @ x4
vldr s3, [r7, #4]
vldr s8, [r8] @ x5
vldr s1, [r8, #4]
vldr s14, [r9] @ x6
vldr s9, [r9, #4]
vldr s6, [r10] @ x7
vldr s12, [r10, #4]
vsub.f32 s18, s3, s1
vsub.f32 s7, s10, s8
vsub.f32 s5, s14, s6
vadd.f32 s6, s14, s6
vldr s24, [r5, #4]
vldr s24, [r5, #4]
vsub.f32 s14, s9, s12
vldr s22, [r6, #4]
vldr s22, [r6, #4]
vadd.f32 s8, s10, s8
vldr s28, [r6] @ x3
vldr s17, [r5] @ x2
vldr s28, [r6] @ x3
vldr s17, [r5] @ x2
vadd.f32 s10, s9, s12
vmul.f32 s13, s18, s15
vmul.f32 s9, s7, s11
vmul.f32 s16, s5, s11
vmul.f32 s18, s18, s11
vmul.f32 s30, s14, s11
vldr s11, [r4, #4]
add r3, r3, #8
add r4, r4, #8
add r5, r5, #8
add r6, r6, #8
add r7, r7, #8
add r8, r8, #8
add r9, r9, #8
add r10, r10, #8
vldr s11, [r4, #4]
add r3, r3, #8
add r4, r4, #8
add r5, r5, #8
add r6, r6, #8
add r7, r7, #8
add r8, r8, #8
add r9, r9, #8
add r10, r10, #8
vmul.f32 s12, s5, s15
vmul.f32 s20, s14, s15
vadd.f32 s5, s2, s4
@ -111,7 +111,7 @@ _vfp_e_loop:
vsub.f32 s12, s30, s12
vadd.f32 s20, s3, s10
vsub.f32 s15, s3, s10
vsub.f32 s3, s26, s1
vsub.f32 s3, s26, s1
vadd.f32 s18, s9, s13
vadd.f32 s10, s14, s4
vadd.f32 s6, s2, s7 @
@ -120,15 +120,15 @@ _vfp_e_loop:
vsub.f32 s4, s14, s4
vsub.f32 s8, s22, s16 @
vadd.f32 s1, s28, s12
ldr lr, [r12], #4
add lr, r0, lr, lsl #2
subs r11, r11, #1
vstr s18, [lr]
ldr lr, [r12], #4
add lr, r0, lr, lsl #2
subs r11, r11, #1
vstr s18, [lr]
vsub.f32 s2, s28, s12
vadd.f32 s12, s22, s16 @
vsub.f32 s16, s3, s24 @
vsub.f32 s13, s9, s13
vstr s26, [lr, #4]
vstr s26, [lr, #4]
vadd.f32 s28, s5, s15 @
vsub.f32 s7, s5, s15 @
vadd.f32 s14, s6, s10
@ -136,26 +136,26 @@ subs r11, r11, #1
vadd.f32 s9, s0, s2 @
vsub.f32 s2, s0, s2 @
vsub.f32 s11, s11, s20
vstr s28, [lr, #16]
vstr s28, [lr, #16]
vadd.f32 s3, s3, s24 @
vstr s16, [lr, #20]
vstr s16, [lr, #20]
vsub.f32 s6, s6, s10
vstr s13, [lr, #32]
vstr s13, [lr, #32]
vsub.f32 s13, s12, s4 @
vsub.f32 s8, s8, s1
vadd.f32 s0, s12, s4 @
vstr s11, [lr, #36]
vstr s7, [lr, #48]
vstr s3, [lr, #52]
vstr s14, [lr, #8]
vstr s5, [lr, #12]
vstr s9, [lr, #24]
vstr s13, [lr, #28]
vstr s6, [lr, #40]
vstr s8, [lr, #44]
vstr s2, [lr, #56]
vstr s0, [lr, #60]
bne _vfp_e_loop
vstr s11, [lr, #36]
vstr s7, [lr, #48]
vstr s3, [lr, #52]
vstr s14, [lr, #8]
vstr s5, [lr, #12]
vstr s9, [lr, #24]
vstr s13, [lr, #28]
vstr s6, [lr, #40]
vstr s8, [lr, #44]
vstr s2, [lr, #56]
vstr s0, [lr, #60]
bne _vfp_e_loop
@ assumes r0 = out
@ r1 = in ?
@ -461,7 +461,6 @@ _vfp_x8_loop:
bne _vfp_x8_loop
bx lr
.align 4
#ifdef __APPLE__
.globl _vfp_end

@ -1,7 +1,7 @@
# Makefile.in generated by automake 1.12.4 from Makefile.am.
# Makefile.in generated by automake 1.14 from Makefile.am.
# @configure_input@
# Copyright (C) 1994-2012 Free Software Foundation, Inc.
# Copyright (C) 1994-2013 Free Software Foundation, Inc.
# This Makefile.in is free software; the Free Software Foundation
# gives unlimited permission to copy and/or distribute it,
@ -15,23 +15,51 @@
@SET_MAKE@
VPATH = @srcdir@
am__make_dryrun = \
{ \
am__dry=no; \
am__is_gnu_make = test -n '$(MAKEFILE_LIST)' && test -n '$(MAKELEVEL)'
am__make_running_with_option = \
case $${target_option-} in \
?) ;; \
*) echo "am__make_running_with_option: internal error: invalid" \
"target option '$${target_option-}' specified" >&2; \
exit 1;; \
esac; \
has_opt=no; \
sane_makeflags=$$MAKEFLAGS; \
if $(am__is_gnu_make); then \
sane_makeflags=$$MFLAGS; \
else \
case $$MAKEFLAGS in \
*\\[\ \ ]*) \
echo 'am--echo: ; @echo "AM" OK' | $(MAKE) -f - 2>/dev/null \
| grep '^AM OK$$' >/dev/null || am__dry=yes;; \
*) \
for am__flg in $$MAKEFLAGS; do \
case $$am__flg in \
*=*|--*) ;; \
*n*) am__dry=yes; break;; \
esac; \
done;; \
bs=\\; \
sane_makeflags=`printf '%s\n' "$$MAKEFLAGS" \
| sed "s/$$bs$$bs[$$bs $$bs ]*//g"`;; \
esac; \
fi; \
skip_next=no; \
strip_trailopt () \
{ \
flg=`printf '%s\n' "$$flg" | sed "s/$$1.*$$//"`; \
}; \
for flg in $$sane_makeflags; do \
test $$skip_next = yes && { skip_next=no; continue; }; \
case $$flg in \
*=*|--*) continue;; \
-*I) strip_trailopt 'I'; skip_next=yes;; \
-*I?*) strip_trailopt 'I';; \
-*O) strip_trailopt 'O'; skip_next=yes;; \
-*O?*) strip_trailopt 'O';; \
-*l) strip_trailopt 'l'; skip_next=yes;; \
-*l?*) strip_trailopt 'l';; \
-[dEDm]) skip_next=yes;; \
-[JT]) skip_next=yes;; \
esac; \
case $$flg in \
*$$target_option*) has_opt=yes; break;; \
esac; \
test $$am__dry = yes; \
}
done; \
test $$has_opt = yes
am__make_dryrun = (target_option=n; $(am__make_running_with_option))
am__make_keepgoing = (target_option=k; $(am__make_running_with_option))
pkgdatadir = $(datadir)/@PACKAGE@
pkgincludedir = $(includedir)/@PACKAGE@
pkglibdir = $(libdir)/@PACKAGE@
@ -52,7 +80,7 @@ build_triplet = @build@
host_triplet = @host@
noinst_PROGRAMS = test$(EXEEXT)
subdir = tests
DIST_COMMON = $(srcdir)/Makefile.am $(srcdir)/Makefile.in \
DIST_COMMON = $(srcdir)/Makefile.in $(srcdir)/Makefile.am \
$(top_srcdir)/depcomp
ACLOCAL_M4 = $(top_srcdir)/aclocal.m4
am__aclocal_m4_deps = $(top_srcdir)/m4/ax_check_classpath.m4 \
@ -73,19 +101,44 @@ PROGRAMS = $(noinst_PROGRAMS)
am_test_OBJECTS = test.$(OBJEXT)
test_OBJECTS = $(am_test_OBJECTS)
test_DEPENDENCIES = $(top_builddir)/src/libffts.la
AM_V_lt = $(am__v_lt_@AM_V@)
am__v_lt_ = $(am__v_lt_@AM_DEFAULT_V@)
am__v_lt_0 = --silent
am__v_lt_1 =
AM_V_P = $(am__v_P_@AM_V@)
am__v_P_ = $(am__v_P_@AM_DEFAULT_V@)
am__v_P_0 = false
am__v_P_1 = :
AM_V_GEN = $(am__v_GEN_@AM_V@)
am__v_GEN_ = $(am__v_GEN_@AM_DEFAULT_V@)
am__v_GEN_0 = @echo " GEN " $@;
am__v_GEN_1 =
AM_V_at = $(am__v_at_@AM_V@)
am__v_at_ = $(am__v_at_@AM_DEFAULT_V@)
am__v_at_0 = @
am__v_at_1 =
DEFAULT_INCLUDES = -I.@am__isrc@ -I$(top_builddir)
depcomp = $(SHELL) $(top_srcdir)/depcomp
am__depfiles_maybe = depfiles
am__mv = mv -f
COMPILE = $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) \
$(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS)
LTCOMPILE = $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) \
--mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) \
$(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS)
LTCOMPILE = $(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) \
$(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) \
$(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) \
$(AM_CFLAGS) $(CFLAGS)
AM_V_CC = $(am__v_CC_@AM_V@)
am__v_CC_ = $(am__v_CC_@AM_DEFAULT_V@)
am__v_CC_0 = @echo " CC " $@;
am__v_CC_1 =
CCLD = $(CC)
LINK = $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) \
--mode=link $(CCLD) $(AM_CFLAGS) $(CFLAGS) $(AM_LDFLAGS) \
$(LDFLAGS) -o $@
LINK = $(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) \
$(LIBTOOLFLAGS) --mode=link $(CCLD) $(AM_CFLAGS) $(CFLAGS) \
$(AM_LDFLAGS) $(LDFLAGS) -o $@
AM_V_CCLD = $(am__v_CCLD_@AM_V@)
am__v_CCLD_ = $(am__v_CCLD_@AM_DEFAULT_V@)
am__v_CCLD_0 = @echo " CCLD " $@;
am__v_CCLD_1 =
SOURCES = $(test_SOURCES)
DIST_SOURCES = $(test_SOURCES)
am__can_run_installinfo = \
@ -93,11 +146,29 @@ am__can_run_installinfo = \
n|no|NO) false;; \
*) (install-info --version) >/dev/null 2>&1;; \
esac
am__tagged_files = $(HEADERS) $(SOURCES) $(TAGS_FILES) $(LISP)
# Read a list of newline-separated strings from the standard input,
# and print each of them once, without duplicates. Input order is
# *not* preserved.
am__uniquify_input = $(AWK) '\
BEGIN { nonempty = 0; } \
{ items[$$0] = 1; nonempty = 1; } \
END { if (nonempty) { for (i in items) print i; }; } \
'
# Make sure the list of sources is unique. This is necessary because,
# e.g., the same source file might be shared among _SOURCES variables
# for different programs/libraries.
am__define_uniq_tagged_files = \
list='$(am__tagged_files)'; \
unique=`for i in $$list; do \
if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \
done | $(am__uniquify_input)`
ETAGS = etags
CTAGS = ctags
DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST)
ACLOCAL = @ACLOCAL@
AMTAR = @AMTAR@
AM_DEFAULT_VERBOSITY = @AM_DEFAULT_VERBOSITY@
AR = @AR@
AUTOCONF = @AUTOCONF@
AUTOHEADER = @AUTOHEADER@
@ -271,9 +342,10 @@ clean-noinstPROGRAMS:
list=`for p in $$list; do echo "$$p"; done | sed 's/$(EXEEXT)$$//'`; \
echo " rm -f" $$list; \
rm -f $$list
test$(EXEEXT): $(test_OBJECTS) $(test_DEPENDENCIES) $(EXTRA_test_DEPENDENCIES)
@rm -f test$(EXEEXT)
$(LINK) $(test_OBJECTS) $(test_LDADD) $(LIBS)
$(AM_V_CCLD)$(LINK) $(test_OBJECTS) $(test_LDADD) $(LIBS)
mostlyclean-compile:
-rm -f *.$(OBJEXT)
@ -284,25 +356,25 @@ distclean-compile:
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/test.Po@am__quote@
.c.o:
@am__fastdepCC_TRUE@ $(COMPILE) -MT $@ -MD -MP -MF $(DEPDIR)/$*.Tpo -c -o $@ $<
@am__fastdepCC_TRUE@ $(am__mv) $(DEPDIR)/$*.Tpo $(DEPDIR)/$*.Po
@AMDEP_TRUE@@am__fastdepCC_FALSE@ source='$<' object='$@' libtool=no @AMDEPBACKSLASH@
@am__fastdepCC_TRUE@ $(AM_V_CC)$(COMPILE) -MT $@ -MD -MP -MF $(DEPDIR)/$*.Tpo -c -o $@ $<
@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/$*.Tpo $(DEPDIR)/$*.Po
@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$<' object='$@' libtool=no @AMDEPBACKSLASH@
@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
@am__fastdepCC_FALSE@ $(COMPILE) -c $<
@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(COMPILE) -c -o $@ $<
.c.obj:
@am__fastdepCC_TRUE@ $(COMPILE) -MT $@ -MD -MP -MF $(DEPDIR)/$*.Tpo -c -o $@ `$(CYGPATH_W) '$<'`
@am__fastdepCC_TRUE@ $(am__mv) $(DEPDIR)/$*.Tpo $(DEPDIR)/$*.Po
@AMDEP_TRUE@@am__fastdepCC_FALSE@ source='$<' object='$@' libtool=no @AMDEPBACKSLASH@
@am__fastdepCC_TRUE@ $(AM_V_CC)$(COMPILE) -MT $@ -MD -MP -MF $(DEPDIR)/$*.Tpo -c -o $@ `$(CYGPATH_W) '$<'`
@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/$*.Tpo $(DEPDIR)/$*.Po
@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$<' object='$@' libtool=no @AMDEPBACKSLASH@
@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
@am__fastdepCC_FALSE@ $(COMPILE) -c `$(CYGPATH_W) '$<'`
@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(COMPILE) -c -o $@ `$(CYGPATH_W) '$<'`
.c.lo:
@am__fastdepCC_TRUE@ $(LTCOMPILE) -MT $@ -MD -MP -MF $(DEPDIR)/$*.Tpo -c -o $@ $<
@am__fastdepCC_TRUE@ $(am__mv) $(DEPDIR)/$*.Tpo $(DEPDIR)/$*.Plo
@AMDEP_TRUE@@am__fastdepCC_FALSE@ source='$<' object='$@' libtool=yes @AMDEPBACKSLASH@
@am__fastdepCC_TRUE@ $(AM_V_CC)$(LTCOMPILE) -MT $@ -MD -MP -MF $(DEPDIR)/$*.Tpo -c -o $@ $<
@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/$*.Tpo $(DEPDIR)/$*.Plo
@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$<' object='$@' libtool=yes @AMDEPBACKSLASH@
@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
@am__fastdepCC_FALSE@ $(LTCOMPILE) -c -o $@ $<
@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LTCOMPILE) -c -o $@ $<
mostlyclean-libtool:
-rm -f *.lo
@ -310,26 +382,15 @@ mostlyclean-libtool:
clean-libtool:
-rm -rf .libs _libs
ID: $(HEADERS) $(SOURCES) $(LISP) $(TAGS_FILES)
list='$(SOURCES) $(HEADERS) $(LISP) $(TAGS_FILES)'; \
unique=`for i in $$list; do \
if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \
done | \
$(AWK) '{ files[$$0] = 1; nonempty = 1; } \
END { if (nonempty) { for (i in files) print i; }; }'`; \
mkid -fID $$unique
tags: TAGS
TAGS: $(HEADERS) $(SOURCES) $(TAGS_DEPENDENCIES) \
$(TAGS_FILES) $(LISP)
ID: $(am__tagged_files)
$(am__define_uniq_tagged_files); mkid -fID $$unique
tags: tags-am
TAGS: tags
tags-am: $(TAGS_DEPENDENCIES) $(am__tagged_files)
set x; \
here=`pwd`; \
list='$(SOURCES) $(HEADERS) $(LISP) $(TAGS_FILES)'; \
unique=`for i in $$list; do \
if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \
done | \
$(AWK) '{ files[$$0] = 1; nonempty = 1; } \
END { if (nonempty) { for (i in files) print i; }; }'`; \
$(am__define_uniq_tagged_files); \
shift; \
if test -z "$(ETAGS_ARGS)$$*$$unique"; then :; else \
test -n "$$unique" || unique=$$empty_fix; \
@ -341,15 +402,11 @@ TAGS: $(HEADERS) $(SOURCES) $(TAGS_DEPENDENCIES) \
$$unique; \
fi; \
fi
ctags: CTAGS
CTAGS: $(HEADERS) $(SOURCES) $(TAGS_DEPENDENCIES) \
$(TAGS_FILES) $(LISP)
list='$(SOURCES) $(HEADERS) $(LISP) $(TAGS_FILES)'; \
unique=`for i in $$list; do \
if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \
done | \
$(AWK) '{ files[$$0] = 1; nonempty = 1; } \
END { if (nonempty) { for (i in files) print i; }; }'`; \
ctags: ctags-am
CTAGS: ctags
ctags-am: $(TAGS_DEPENDENCIES) $(am__tagged_files)
$(am__define_uniq_tagged_files); \
test -z "$(CTAGS_ARGS)$$unique" \
|| $(CTAGS) $(CTAGSFLAGS) $(AM_CTAGSFLAGS) $(CTAGS_ARGS) \
$$unique
@ -358,9 +415,10 @@ GTAGS:
here=`$(am__cd) $(top_builddir) && pwd` \
&& $(am__cd) $(top_srcdir) \
&& gtags -i $(GTAGS_ARGS) "$$here"
cscopelist: cscopelist-am
cscopelist: $(HEADERS) $(SOURCES) $(LISP)
list='$(SOURCES) $(HEADERS) $(LISP)'; \
cscopelist-am: $(am__tagged_files)
list='$(am__tagged_files)'; \
case "$(srcdir)" in \
[\\/]* | ?:[\\/]*) sdir="$(srcdir)" ;; \
*) sdir=$(subdir)/$(srcdir) ;; \
@ -513,18 +571,19 @@ uninstall-am:
.MAKE: install-am install-strip
.PHONY: CTAGS GTAGS all all-am check check-am clean clean-generic \
clean-libtool clean-noinstPROGRAMS cscopelist ctags distclean \
distclean-compile distclean-generic distclean-libtool \
distclean-tags distdir dvi dvi-am html html-am info info-am \
install install-am install-data install-data-am install-dvi \
install-dvi-am install-exec install-exec-am install-html \
install-html-am install-info install-info-am install-man \
install-pdf install-pdf-am install-ps install-ps-am \
install-strip installcheck installcheck-am installdirs \
maintainer-clean maintainer-clean-generic mostlyclean \
mostlyclean-compile mostlyclean-generic mostlyclean-libtool \
pdf pdf-am ps ps-am tags uninstall uninstall-am
.PHONY: CTAGS GTAGS TAGS all all-am check check-am clean clean-generic \
clean-libtool clean-noinstPROGRAMS cscopelist-am ctags \
ctags-am distclean distclean-compile distclean-generic \
distclean-libtool distclean-tags distdir dvi dvi-am html \
html-am info info-am install install-am install-data \
install-data-am install-dvi install-dvi-am install-exec \
install-exec-am install-html install-html-am install-info \
install-info-am install-man install-pdf install-pdf-am \
install-ps install-ps-am install-strip installcheck \
installcheck-am installdirs maintainer-clean \
maintainer-clean-generic mostlyclean mostlyclean-compile \
mostlyclean-generic mostlyclean-libtool pdf pdf-am ps ps-am \
tags tags-am uninstall uninstall-am
# Tell versions [3.59,3.63) of GNU make to not export all variables.

@ -1,7 +1,7 @@
/*
This file is part of SFFT.
This file is part of FFTS.
Copyright (c) 2012, Anthony M. Blake
All rights reserved.
@ -29,148 +29,164 @@
*/
#include <stdio.h>
#include <math.h>
#include "../include/ffts.h"
#include "../src/ffts_attributes.h"
#ifdef __ARM_NEON__
#endif
#ifdef HAVE_SSE
#include <xmmintrin.h>
#endif
#include "../include/ffts.h"
#ifdef HAVE_SSE
#include <xmmintrin.h>
#endif
#include <stdio.h>
#include <stdlib.h>
#include <math.h>
#define PI 3.1415926535897932384626433832795028841971693993751058209
#ifndef M_PI
#define M_PI 3.1415926535897932384626433832795028841971693993751058209
#endif
float impulse_error(int N, int sign, float *data) {
static float impulse_error(int N, int sign, float *data)
{
#ifdef __ANDROID__
double delta_sum = 0.0f;
double sum = 0.0f;
double delta_sum = 0.0f;
double sum = 0.0f;
#else
long double delta_sum = 0.0f;
long double sum = 0.0f;
#endif
long double delta_sum = 0.0f;
long double sum = 0.0f;
#endif
int i;
int i;
for(i=0;i<N;i++) {
for (i = 0; i < N; i++) {
#ifdef __ANDROID__
double re, im;
if(sign < 0) {
re = cos(2 * PI * (double)i / (double)N);
im = -sin(2 * PI * (double)i / (double)N);
}else{
re = cos(2 * PI * (double)i / (double)N);
im = sin(2 * PI * (double)i / (double)N);
}
double re, im;
if (sign < 0) {
re = cos(2 * M_PI * (double) i / (double) N);
im = -sin(2 * M_PI * (double) i / (double) N);
} else {
re = cos(2 * M_PI * (double) i / (double) N);
im = sin(2 * M_PI * (double) i / (double) N);
}
#else
long double re, im;
if(sign < 0) {
re = cosl(2 * PI * (long double)i / (long double)N);
im = -sinl(2 * PI * (long double)i / (long double)N);
}else{
re = cosl(2 * PI * (long double)i / (long double)N);
im = sinl(2 * PI * (long double)i / (long double)N);
}
long double re, im;
if (sign < 0) {
re = cosl(2 * M_PI * (long double) i / (long double) N);
im = -sinl(2 * M_PI * (long double) i / (long double) N);
} else {
re = cosl(2 * M_PI * (long double) i / (long double) N);
im = sinl(2 * M_PI * (long double) i / (long double) N);
}
#endif
sum += re * re + im * im;
re = re - data[2*i];
im = im - data[2*i+1];
delta_sum += re * re + im * im;
sum += re * re + im * im;
re = re - data[2*i];
im = im - data[2*i+1];
delta_sum += re * re + im * im;
}
}
#ifdef __ANDROID__
return sqrt(delta_sum) / sqrt(sum);
return (float) (sqrt(delta_sum) / sqrt(sum));
#else
return sqrtl(delta_sum) / sqrtl(sum);
return (float) (sqrtl(delta_sum) / sqrtl(sum));
#endif
}
int
test_transform(int n, int sign) {
int test_transform(int n, int sign)
{
ffts_plan_t *p;
#ifdef HAVE_SSE
float __attribute__ ((aligned(32))) *input = _mm_malloc(2 * n * sizeof(float), 32);
float __attribute__ ((aligned(32))) *output = _mm_malloc(2 * n * sizeof(float), 32);
#ifdef HAVE_SSE
float FFTS_ALIGN(32) *input = _mm_malloc(2 * n * sizeof(float), 32);
float FFTS_ALIGN(32) *output = _mm_malloc(2 * n * sizeof(float), 32);
#else
float __attribute__ ((aligned(32))) *input = valloc(2 * n * sizeof(float));
float __attribute__ ((aligned(32))) *output = valloc(2 * n * sizeof(float));
float FFTS_ALIGN(32) *input = valloc(2 * n * sizeof(float));
float FFTS_ALIGN(32) *output = valloc(2 * n * sizeof(float));
#endif
int i;
for(i=0;i<n;i++) {
input[2*i] = 0.0f;
input[2*i+1] = 0.0f;
}
input[2] = 1.0f;
ffts_plan_t *p = ffts_init_1d(i, sign);
if(p) {
ffts_execute(p, input, output);
printf(" %3d | %9d | %10E\n", sign, n, impulse_error(n, sign, output));
ffts_free(p);
}else{
printf("Plan unsupported\n");
return 0;
}
return 1;
int i;
for (i = 0; i < n; i++) {
input[2*i + 0] = 0.0f;
input[2*i + 1] = 0.0f;
}
input[2] = 1.0f;
p = ffts_init_1d(i, sign);
if (!p) {
printf("Plan unsupported\n");
return 0;
}
ffts_execute(p, input, output);
printf(" %3d | %9d | %10E\n", sign, n, impulse_error(n, sign, output));
ffts_free(p);
return 1;
}
int
main(int argc, char *argv[]) {
if(argc == 3) {
// test specific transform with test pattern and display output
int n = atoi(argv[1]);
int sign = atoi(argv[2]);
int main(int argc, char *argv[])
{
if (argc == 3) {
ffts_plan_t *p;
int i;
/* test specific transform with test pattern and display output */
int n = atoi(argv[1]);
int sign = atoi(argv[2]);
#ifdef HAVE_SSE
float __attribute__ ((aligned(32))) *input = _mm_malloc(2 * n * sizeof(float), 32);
float __attribute__ ((aligned(32))) *output = _mm_malloc(2 * n * sizeof(float), 32);
float FFTS_ALIGN(32) *input = _mm_malloc(2 * n * sizeof(float), 32);
float FFTS_ALIGN(32) *output = _mm_malloc(2 * n * sizeof(float), 32);
#else
float __attribute__ ((aligned(32))) *input = valloc(2 * n * sizeof(float));
float __attribute__ ((aligned(32))) *output = valloc(2 * n * sizeof(float));
float FFTS_ALIGN(32) *input = valloc(2 * n * sizeof(float));
float FFTS_ALIGN(32) *output = valloc(2 * n * sizeof(float));
#endif
int i;
for(i=0;i<n;i++) {
input[2*i] = i;
input[2*i+1] = 0.0f;
}
// input[2] = 1.0f;
ffts_plan_t *p = ffts_init_1d(i, sign);
if(p) {
ffts_execute(p, input, output);
for(i=0;i<n;i++) printf("%d %d %f %f\n", i, sign, output[2*i], output[2*i+1]);
ffts_free(p);
}else{
printf("Plan unsupported\n");
return 0;
}
#ifdef HAVE_NEON
_mm_free(input);
_mm_free(output);
for (i = 0; i < n; i++) {
input[2*i + 0] = (float) i;
input[2*i + 1] = 0.0f;
}
/* input[2] = 1.0f; */
p = ffts_init_1d(i, sign);
if (!p) {
printf("Plan unsupported\n");
return 0;
}
ffts_execute(p, input, output);
for (i = 0; i < n; i++)
printf("%d %d %f %f\n", i, sign, output[2*i], output[2*i+1]);
ffts_free(p);
#ifdef HAVE_SSE
_mm_free(input);
_mm_free(output);
#else
free(input);
free(output);
free(input);
free(output);
#endif
} else {
int n, power2;
/* test various sizes and display error */
printf(" Sign | Size | L2 Error\n");
printf("------+-----------+-------------\n");
for (n = 1, power2 = 2; n <= 18; n++, power2 <<= 1) {
test_transform(power2, -1);
}
for (n = 1, power2 = 2; n <= 18; n++, power2 <<= 1) {
test_transform(power2, 1);
}
}
}else{
// test various sizes and display error
printf(" Sign | Size | L2 Error\n");
printf("------+-----------+-------------\n");
int n;
for(n=1;n<=18;n++) {
test_transform(pow(2,n), -1);
}
for(n=1;n<=18;n++) {
test_transform(pow(2,n), 1);
}
}
return 0;
return 0;
}

Loading…
Cancel
Save