parent
d8856bdf08
commit
e4e92bf2b0
@ -0,0 +1,462 @@
|
||||
cmake_minimum_required(VERSION 2.8.12 FATAL_ERROR)
|
||||
|
||||
project(ffts C ASM)
|
||||
|
||||
# TODO: to support AutoConfigure building, this should came from "template" file
|
||||
set(FFTS_MAJOR 0)
|
||||
set(FFTS_MINOR 9)
|
||||
set(FFTS_MICRO 0)
|
||||
|
||||
set(FFTS_VERSION "ffts-${FFTS_MAJOR}.${FFTS_MINOR}.${FFTS_MICRO}")
|
||||
|
||||
set(CMAKE_MODULE_PATH ${CMAKE_CURRENT_SOURCE_DIR}/cmake)
|
||||
set_property(GLOBAL PROPERTY USE_FOLDERS ON)
|
||||
|
||||
# default build type is Debug which means no optimization
|
||||
if(NOT CMAKE_BUILD_TYPE)
|
||||
set(CMAKE_BUILD_TYPE "Release")
|
||||
endif(NOT CMAKE_BUILD_TYPE)
|
||||
|
||||
# common options
|
||||
option(ENABLE_NEON
|
||||
"Enables the use of NEON instructions." OFF
|
||||
)
|
||||
|
||||
option(ENABLE_VFP
|
||||
"Enables the use of VFP instructions." OFF
|
||||
)
|
||||
|
||||
option(DISABLE_DYNAMIC_CODE
|
||||
"Disables the use of dynamic machine code generation." OFF
|
||||
)
|
||||
|
||||
option(GENERATE_POSITION_INDEPENDENT_CODE
|
||||
"Generate position independent code" OFF
|
||||
)
|
||||
|
||||
option(ENABLE_SHARED
|
||||
"Enable building a shared library." OFF
|
||||
)
|
||||
|
||||
option(ENABLE_STATIC
|
||||
"Enable building a static library." ON
|
||||
)
|
||||
|
||||
include(CheckCSourceCompiles)
|
||||
include(CheckCSourceRuns)
|
||||
include(CheckIncludeFile)
|
||||
|
||||
# Ensure defined when building FFTS (as opposed to using it from
|
||||
# another project). Used to export functions from Windows DLL.
|
||||
add_definitions(-DFFTS_BUILD)
|
||||
|
||||
# check existence of various headers
|
||||
check_include_file(malloc.h HAVE_MALLOC_H)
|
||||
check_include_file(stdint.h HAVE_STDINT_H)
|
||||
check_include_file(stdlib.h HAVE_STDLIB_H)
|
||||
check_include_file(string.h HAVE_STRING_H)
|
||||
check_include_file(sys/mman.h HAVE_SYS_MMAN_H)
|
||||
check_include_file(unistd.h HAVE_UNISTD_H)
|
||||
|
||||
if(HAVE_MALLOC_H)
|
||||
add_definitions(-DHAVE_MALLOC_H)
|
||||
endif(HAVE_MALLOC_H)
|
||||
|
||||
if(HAVE_STDINT_H)
|
||||
add_definitions(-DHAVE_STDINT_H)
|
||||
endif(HAVE_STDINT_H)
|
||||
|
||||
if(HAVE_STDLIB_H)
|
||||
add_definitions(-DHAVE_STDLIB_H)
|
||||
endif(HAVE_STDLIB_H)
|
||||
|
||||
if(HAVE_STRING_H)
|
||||
add_definitions(-DHAVE_STRING_H)
|
||||
endif(HAVE_STRING_H)
|
||||
|
||||
if(HAVE_SYS_MMAN_H)
|
||||
add_definitions(-DHAVE_SYS_MMAN_H)
|
||||
endif(HAVE_SYS_MMAN_H)
|
||||
|
||||
if(HAVE_UNISTD_H)
|
||||
add_definitions(-DHAVE_UNISTD_H)
|
||||
endif(HAVE_UNISTD_H)
|
||||
|
||||
# backup flags
|
||||
set(CMAKE_REQUIRED_FLAGS_SAVE ${CMAKE_REQUIRED_FLAGS})
|
||||
|
||||
# Determinate if we are cross-compiling
|
||||
if(NOT CMAKE_CROSSCOMPILING)
|
||||
if(CMAKE_SYSTEM_PROCESSOR MATCHES "^arm")
|
||||
# Determinate ARM architecture
|
||||
|
||||
# Try to execute quietly without messages
|
||||
set(CMAKE_REQUIRED_QUIET 1)
|
||||
|
||||
# The test for ARM architecture
|
||||
set(TEST_SOURCE_CODE "int main() { return 0; }")
|
||||
|
||||
# GCC documentation says "native" is only supported on Linux, but let's try
|
||||
set(CMAKE_REQUIRED_FLAGS "${CMAKE_REQUIRED_FLAGS_SAVE} -march=native")
|
||||
check_c_source_runs("${TEST_SOURCE_CODE}" GCC_MARCH_NATIVE_FLAG_SUPPORTED)
|
||||
|
||||
if(NOT GCC_MARCH_NATIVE_FLAG_SUPPORTED)
|
||||
# Fallback trying generic ARMv7
|
||||
set(CMAKE_REQUIRED_FLAGS "${CMAKE_REQUIRED_FLAGS_SAVE} -march=armv7-a")
|
||||
check_c_source_runs("${TEST_SOURCE_CODE}" GCC_MARCH_ARMV7A_FLAG_SUPPORTED)
|
||||
|
||||
if(NOT GCC_MARCH_ARMV7A_FLAG_SUPPORTED)
|
||||
# Fallback trying generic ARMv6
|
||||
set(CMAKE_REQUIRED_FLAGS "${CMAKE_REQUIRED_FLAGS_SAVE} -march=armv6")
|
||||
check_c_source_runs("${TEST_SOURCE_CODE}" GCC_MARCH_ARMV6_FLAG_SUPPORTED)
|
||||
|
||||
if(NOT GCC_MARCH_ARMV6_FLAG_SUPPORTED)
|
||||
message(WARNING "FFTS failed to determinate ARM architecture")
|
||||
set(CMAKE_REQUIRED_FLAGS ${CMAKE_REQUIRED_FLAGS_SAVE})
|
||||
else()
|
||||
message("FFTS is build using 'march=armv6'")
|
||||
set(CMAKE_ASM_FLAGS "${CMAKE_ASM_FLAGS} -march=armv6")
|
||||
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -march=armv6")
|
||||
endif(NOT GCC_MARCH_ARMV6_FLAG_SUPPORTED)
|
||||
else()
|
||||
message("FFTS is build using 'march=armv7-a'")
|
||||
set(CMAKE_ASM_FLAGS "${CMAKE_ASM_FLAGS} -march=armv7-a")
|
||||
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -march=armv7-a")
|
||||
endif(NOT GCC_MARCH_ARMV7A_FLAG_SUPPORTED)
|
||||
else()
|
||||
message("FFTS is build using 'march=native'")
|
||||
set(CMAKE_ASM_FLAGS "${CMAKE_ASM_FLAGS} -march=native")
|
||||
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -march=native")
|
||||
endif(NOT GCC_MARCH_NATIVE_FLAG_SUPPORTED)
|
||||
|
||||
# Determinate what floating-point hardware (or hardware emulation) is available
|
||||
set(CMAKE_REQUIRED_FLAGS_SAVE ${CMAKE_REQUIRED_FLAGS})
|
||||
|
||||
# The test for ARM NEON support
|
||||
set(TEST_SOURCE_CODE "
|
||||
#include <arm_neon.h>
|
||||
int main()
|
||||
{
|
||||
float32x4_t v;
|
||||
float zeros[4] = {0.0f, 0.0f, 0.0f, 0.0f};
|
||||
v = vld1q_f32(zeros);
|
||||
return 0;
|
||||
}"
|
||||
)
|
||||
|
||||
# Test running with -mfpu=neon and -mfloat-abi=hard
|
||||
set(CMAKE_REQUIRED_FLAGS "${CMAKE_REQUIRED_FLAGS_SAVE} -mfpu=neon -mfloat-abi=hard")
|
||||
check_c_source_runs("${TEST_SOURCE_CODE}" NEON_HARDFP_SUPPORTED)
|
||||
|
||||
if(NOT NEON_HARDFP_SUPPORTED)
|
||||
# Test running with -mfpu=neon and -mfloat-abi=softfp
|
||||
set(CMAKE_REQUIRED_FLAGS "${CMAKE_REQUIRED_FLAGS_SAVE} -mfpu=neon -mfloat-abi=softfp")
|
||||
check_c_source_runs("${TEST_SOURCE_CODE}" NEON_SOFTFP_SUPPORTED)
|
||||
|
||||
if(NOT NEON_SOFTFP_SUPPORTED)
|
||||
if(ENABLE_NEON)
|
||||
message(FATAL_ERROR "FFTS cannot enable NEON on this platform")
|
||||
endif(ENABLE_NEON)
|
||||
else()
|
||||
message("FFTS is using 'neon' FPU and 'softfp' float ABI")
|
||||
set(CMAKE_ASM_FLAGS "${CMAKE_ASM_FLAGS} -mfpu=neon -mfloat-abi=softfp")
|
||||
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -mfpu=neon -mfloat-abi=softfp")
|
||||
set(ENABLE_NEON ON)
|
||||
endif(NOT NEON_SOFTFP_SUPPORTED)
|
||||
else()
|
||||
message("FFTS is using 'neon' FPU and 'hard' float ABI")
|
||||
set(CMAKE_ASM_FLAGS "${CMAKE_ASM_FLAGS} -mfpu=neon -mfloat-abi=hard")
|
||||
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -mfpu=neon -mfloat-abi=hard")
|
||||
set(ENABLE_NEON ON)
|
||||
endif(NOT NEON_HARDFP_SUPPORTED)
|
||||
|
||||
# Fallback using VFP if NEON is not supported
|
||||
if(NOT NEON_HARDFP_SUPPORTED AND NOT NEON_SOFTFP_SUPPORTED)
|
||||
# Test for ARM VFP support
|
||||
set(TEST_SOURCE_CODE "
|
||||
double sum(double a, double b)
|
||||
{
|
||||
return a + b;
|
||||
}
|
||||
int main()
|
||||
{
|
||||
double s1, s2, v1 = 1.0, v2 = 2.0, v3 = 1.0e-322;
|
||||
s1 = sum(v1, v2);
|
||||
s2 = sum(v3, v3);
|
||||
return 0;
|
||||
}"
|
||||
)
|
||||
|
||||
# Test running with -mfpu=vfp
|
||||
set(CMAKE_REQUIRED_FLAGS "${CMAKE_REQUIRED_FLAGS_SAVE} -mfpu=vfp")
|
||||
check_c_source_runs("${TEST_SOURCE_CODE}" VFP_SUPPORTED)
|
||||
|
||||
if(NOT VFP_SUPPORTED)
|
||||
# Fallback using emulation if VFP is not supported
|
||||
if(ENABLE_VFP)
|
||||
message(FATAL_ERROR "FFTS cannot enable VFP on this platform")
|
||||
endif(ENABLE_VFP)
|
||||
|
||||
message(WARNING "FFTS is using 'soft' FPU")
|
||||
else()
|
||||
message("FFTS is using 'vfp' FPU")
|
||||
set(CMAKE_ASM_FLAGS "${CMAKE_ASM_FLAGS} -mfpu=vfp")
|
||||
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -mfpu=vfp")
|
||||
set(CMAKE_REQUIRED_FLAGS_SAVE ${CMAKE_REQUIRED_FLAGS})
|
||||
set(ENABLE_VFP ON)
|
||||
endif(NOT VFP_SUPPORTED)
|
||||
|
||||
# Test running with -mfloat-abi=hard
|
||||
set(CMAKE_REQUIRED_FLAGS "${CMAKE_REQUIRED_FLAGS_SAVE} -mfloat-abi=hard")
|
||||
|
||||
# Use the same test as before
|
||||
check_c_source_runs("${TEST_SOURCE_CODE}" HARDFP_SUPPORTED)
|
||||
|
||||
if(NOT HARDFP_SUPPORTED)
|
||||
# Test running with -mfloat-abi=softfp
|
||||
set(CMAKE_REQUIRED_FLAGS "${CMAKE_REQUIRED_FLAGS_SAVE} -mfloat-abi=softfp")
|
||||
check_c_source_runs("${TEST_SOURCE_CODE}" SOFTFP_SUPPORTED)
|
||||
|
||||
if(NOT SOFTFP_SUPPORTED)
|
||||
# Most likely development libraries are missing
|
||||
message(WARNING "FFTS is using 'soft' float ABI")
|
||||
else()
|
||||
message("FFTS is using 'softfp' float ABI")
|
||||
set(CMAKE_ASM_FLAGS "${CMAKE_ASM_FLAGS} -mfloat-abi=softfp")
|
||||
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -mfloat-abi=softfp")
|
||||
endif(NOT SOFTFP_SUPPORTED)
|
||||
else()
|
||||
message("FFTS is using 'hard' float ABI")
|
||||
set(CMAKE_ASM_FLAGS "${CMAKE_ASM_FLAGS} -mfloat-abi=hard")
|
||||
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -mfloat-abi=hard")
|
||||
endif(NOT HARDFP_SUPPORTED)
|
||||
endif(NOT NEON_HARDFP_SUPPORTED AND NOT NEON_SOFTFP_SUPPORTED)
|
||||
else()
|
||||
# enable SSE code generation
|
||||
if(CMAKE_COMPILER_IS_GNUCC)
|
||||
set(CMAKE_REQUIRED_FLAGS "${CMAKE_REQUIRED_FLAGS_SAVE} -msse")
|
||||
endif(CMAKE_COMPILER_IS_GNUCC)
|
||||
|
||||
# check if the platform has support for SSE intrinsics
|
||||
check_include_file(xmmintrin.h HAVE_XMMINTRIN_H)
|
||||
if(HAVE_XMMINTRIN_H)
|
||||
add_definitions(-DHAVE_SSE)
|
||||
set(CMAKE_REQUIRED_FLAGS_SAVE ${CMAKE_REQUIRED_FLAGS})
|
||||
endif(HAVE_XMMINTRIN_H)
|
||||
|
||||
# enable SSE2 code generation
|
||||
if(CMAKE_COMPILER_IS_GNUCC)
|
||||
set(CMAKE_REQUIRED_FLAGS "${CMAKE_REQUIRED_FLAGS_SAVE} -msse2")
|
||||
endif(CMAKE_COMPILER_IS_GNUCC)
|
||||
|
||||
# check if the platform has support for SSE2 intrinsics
|
||||
check_include_file(emmintrin.h HAVE_EMMINTRIN_H)
|
||||
if(HAVE_EMMINTRIN_H)
|
||||
add_definitions(-DHAVE_SSE2)
|
||||
set(CMAKE_REQUIRED_FLAGS_SAVE ${CMAKE_REQUIRED_FLAGS})
|
||||
endif(HAVE_EMMINTRIN_H)
|
||||
|
||||
# enable SSE3 code generation
|
||||
if(CMAKE_COMPILER_IS_GNUCC)
|
||||
set(CMAKE_REQUIRED_FLAGS "${CMAKE_REQUIRED_FLAGS_SAVE} -msse3")
|
||||
endif(CMAKE_COMPILER_IS_GNUCC)
|
||||
|
||||
# check if the platform has support for SSE3 intrinsics
|
||||
check_include_file(pmmintrin.h HAVE_PMMINTRIN_H)
|
||||
if(HAVE_PMMINTRIN_H)
|
||||
add_definitions(-DHAVE_PMMINTRIN_H)
|
||||
add_definitions(-DHAVE_SSE3)
|
||||
set(CMAKE_REQUIRED_FLAGS_SAVE ${CMAKE_REQUIRED_FLAGS})
|
||||
else()
|
||||
# check if the platform has specific intrinsics
|
||||
check_include_file(intrin.h HAVE_INTRIN_H)
|
||||
if(HAVE_INTRIN_H)
|
||||
add_definitions(-DHAVE_INTRIN_H)
|
||||
|
||||
check_c_source_compiles("
|
||||
#include<intrin.h>
|
||||
int main(int argc, char** argv)
|
||||
{
|
||||
(void) argv;
|
||||
(void) argc;
|
||||
return _mm_movemask_ps(_mm_moveldup_ps(_mm_set_ss(1.0f)));
|
||||
}" HAVE__MM_MOVELDUP_PS
|
||||
)
|
||||
|
||||
if(HAVE__MM_MOVELDUP_PS)
|
||||
# assume that we have all SSE3 intrinsics
|
||||
add_definitions(-DHAVE_SSE3)
|
||||
endif(HAVE__MM_MOVELDUP_PS)
|
||||
endif(HAVE_INTRIN_H)
|
||||
endif(HAVE_PMMINTRIN_H)
|
||||
endif(CMAKE_SYSTEM_PROCESSOR MATCHES "^arm")
|
||||
else()
|
||||
# TODO: Add detections for compiler support and headers
|
||||
endif(NOT CMAKE_CROSSCOMPILING)
|
||||
|
||||
# restore flags
|
||||
set(CMAKE_REQUIRED_FLAGS ${CMAKE_REQUIRED_FLAGS_SAVE})
|
||||
|
||||
# compiler settings
|
||||
if(MSVC)
|
||||
# enable all warnings but also disable some..
|
||||
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} /W4 /wd4127")
|
||||
|
||||
# mark debug versions
|
||||
set(CMAKE_DEBUG_POSTFIX "d")
|
||||
|
||||
add_definitions(-D_USE_MATH_DEFINES)
|
||||
elseif(CMAKE_COMPILER_IS_GNUCC)
|
||||
include(CheckCCompilerFlag)
|
||||
include(CheckLibraryExists)
|
||||
|
||||
# enable all warnings
|
||||
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -Wall -Wextra")
|
||||
|
||||
# check if we can control visibility of symbols
|
||||
check_c_compiler_flag(-fvisibility=hidden HAVE_GCC_VISIBILITY)
|
||||
if(HAVE_GCC_VISIBILITY)
|
||||
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -fvisibility=hidden")
|
||||
add_definitions(-DHAVE_GCC_VISIBILITY)
|
||||
endif(HAVE_GCC_VISIBILITY)
|
||||
|
||||
# some systems need libm for the math functions to work
|
||||
check_library_exists(m pow "" HAVE_LIBM)
|
||||
if(HAVE_LIBM)
|
||||
list(APPEND CMAKE_REQUIRED_LIBRARIES m)
|
||||
list(APPEND FFTS_EXTRA_LIBRARIES m)
|
||||
endif(HAVE_LIBM)
|
||||
|
||||
if(HAVE_PMMINTRIN_H)
|
||||
add_definitions(-msse3)
|
||||
elseif(HAVE_EMMINTRIN_H)
|
||||
add_definitions(-msse2)
|
||||
elseif(HAVE_XMMINTRIN_H)
|
||||
add_definitions(-msse)
|
||||
endif(HAVE_PMMINTRIN_H)
|
||||
endif(MSVC)
|
||||
|
||||
include_directories(include)
|
||||
include_directories(src)
|
||||
include_directories(${CMAKE_CURRENT_BINARY_DIR})
|
||||
|
||||
set(FFTS_HEADERS
|
||||
include/ffts.h
|
||||
)
|
||||
|
||||
set(FFTS_SOURCES
|
||||
src/ffts_attributes.h
|
||||
src/ffts.c
|
||||
src/ffts_internal.h
|
||||
src/ffts_nd.c
|
||||
src/ffts_nd.h
|
||||
src/ffts_real.h
|
||||
src/ffts_real.c
|
||||
src/ffts_real_nd.c
|
||||
src/ffts_real_nd.h
|
||||
src/ffts_transpose.c
|
||||
src/ffts_transpose.h
|
||||
src/ffts_trig.c
|
||||
src/ffts_trig.h
|
||||
src/ffts_static.c
|
||||
src/ffts_static.h
|
||||
src/macros.h
|
||||
src/patterns.h
|
||||
src/types.h
|
||||
)
|
||||
|
||||
if(ENABLE_NEON)
|
||||
list(APPEND FFTS_SOURCES
|
||||
src/neon.s
|
||||
)
|
||||
|
||||
if(DISABLE_DYNAMIC_CODE)
|
||||
list(APPEND FFTS_SOURCES
|
||||
src/neon_static.s
|
||||
)
|
||||
endif(DISABLE_DYNAMIC_CODE)
|
||||
|
||||
add_definitions(-DHAVE_NEON)
|
||||
elseif(ENABLE_VFP)
|
||||
if(NOT DISABLE_DYNAMIC_CODE)
|
||||
list(APPEND FFTS_SOURCES
|
||||
src/vfp.s
|
||||
)
|
||||
endif(NOT DISABLE_DYNAMIC_CODE)
|
||||
|
||||
add_definitions(-DHAVE_VFP)
|
||||
elseif(HAVE_XMMINTRIN_H)
|
||||
add_definitions(-DHAVE_SSE)
|
||||
|
||||
list(APPEND FFTS_SOURCES
|
||||
src/macros-sse.h
|
||||
)
|
||||
|
||||
if(NOT DISABLE_DYNAMIC_CODE)
|
||||
if(CMAKE_SIZEOF_VOID_P EQUAL 8)
|
||||
list(APPEND FFTS_SOURCES
|
||||
src/codegen_sse.h
|
||||
)
|
||||
else()
|
||||
message(WARNING "Dynamic code is only supported with x64, disabling dynamic code.")
|
||||
set(DISABLE_DYNAMIC_CODE ON)
|
||||
endif(CMAKE_SIZEOF_VOID_P EQUAL 8)
|
||||
endif(NOT DISABLE_DYNAMIC_CODE)
|
||||
endif(ENABLE_NEON)
|
||||
|
||||
if(DISABLE_DYNAMIC_CODE)
|
||||
add_definitions(-DDYNAMIC_DISABLED)
|
||||
else()
|
||||
list(APPEND FFTS_SOURCES
|
||||
src/codegen.c
|
||||
src/codegen.h
|
||||
)
|
||||
endif(DISABLE_DYNAMIC_CODE)
|
||||
|
||||
if(GENERATE_POSITION_INDEPENDENT_CODE)
|
||||
set(CMAKE_POSITION_INDEPENDENT_CODE ON)
|
||||
endif(GENERATE_POSITION_INDEPENDENT_CODE)
|
||||
|
||||
if(ENABLE_SHARED)
|
||||
add_library(ffts_shared SHARED
|
||||
${FFTS_HEADERS}
|
||||
${FFTS_SOURCES}
|
||||
)
|
||||
|
||||
# On unix-like platforms the library is called "libffts.so" and on Windows "ffts.dll"
|
||||
set_target_properties(ffts_shared PROPERTIES
|
||||
DEFINE_SYMBOL FFTS_SHARED
|
||||
OUTPUT_NAME ffts
|
||||
VERSION ${FFTS_MAJOR}.${FFTS_MINOR}.${FFTS_MICRO}
|
||||
)
|
||||
endif(ENABLE_SHARED)
|
||||
|
||||
if(ENABLE_STATIC)
|
||||
add_library(ffts_static STATIC
|
||||
${FFTS_HEADERS}
|
||||
${FFTS_SOURCES}
|
||||
)
|
||||
|
||||
if(UNIX)
|
||||
# On unix-like platforms the library is called "libffts.a"
|
||||
set_target_properties(ffts_static PROPERTIES OUTPUT_NAME ffts)
|
||||
endif(UNIX)
|
||||
endif(ENABLE_STATIC)
|
||||
|
||||
if(ENABLE_STATIC OR ENABLE_SHARED)
|
||||
add_executable(ffts_test
|
||||
tests/test.c
|
||||
)
|
||||
|
||||
# link with static library by default
|
||||
if(ENABLE_STATIC)
|
||||
add_library(ffts ALIAS ffts_static)
|
||||
else()
|
||||
add_library(ffts ALIAS ffts_shared)
|
||||
endif(ENABLE_STATIC)
|
||||
|
||||
target_link_libraries(ffts_test
|
||||
ffts
|
||||
${FFTS_EXTRA_LIBRARIES}
|
||||
)
|
||||
endif(ENABLE_STATIC OR ENABLE_SHARED)
|
@ -1,27 +0,0 @@
|
||||
FFTS -- The Fastest Fourier Transform in the South
|
||||
by Anthony Blake <anthonix@me.com>
|
||||
|
||||
To build for Android, edit and run build_android.sh
|
||||
|
||||
To build for iOS, edit and run build_iphone.sh
|
||||
|
||||
To build for Linux or OS X on x86, run
|
||||
./configure --enable-sse --enable-single --prefix=/usr/local
|
||||
make
|
||||
make install
|
||||
|
||||
FFTS dynamically generates code at runtime. This can be disabled with
|
||||
--disable-dynamic-code
|
||||
|
||||
For JNI targets: --enable-jni will build the jni stuff automatically for
|
||||
the host target, and --enable-shared must also be added manually for it to
|
||||
work.
|
||||
|
||||
If you like FFTS, please show your support by sending a postcard to:
|
||||
|
||||
Anthony Blake
|
||||
Department of Computer Science
|
||||
The University of Waikato
|
||||
Private Bag 3105
|
||||
Hamilton 3240
|
||||
NEW ZEALAND
|
@ -0,0 +1,35 @@
|
||||
# FFTS -- The Fastest Fourier Transform in the South
|
||||
|
||||
[![Build Status](https://travis-ci.org/linkotec/ffts.svg?branch=master)](https://travis-ci.org/linkotec/ffts)
|
||||
|
||||
To build for Android, edit and run build_android.sh
|
||||
|
||||
To build for iOS, edit and run build_iphone.sh
|
||||
|
||||
To build for Linux or OS X on x86, run
|
||||
./configure --enable-sse --enable-single --prefix=/usr/local
|
||||
make
|
||||
make install
|
||||
|
||||
Optionally build for Windows and Linux with CMake, run
|
||||
mkdir build
|
||||
cd build
|
||||
cmake ..
|
||||
|
||||
FFTS dynamically generates code at runtime. This can be disabled with
|
||||
--disable-dynamic-code
|
||||
|
||||
Note that 32 bit x86 dynamic machine code generation is not supported at the moment.
|
||||
|
||||
For JNI targets: --enable-jni will build the jni stuff automatically for
|
||||
the host target, and --enable-shared must also be added manually for it to
|
||||
work.
|
||||
|
||||
If you like FFTS, please show your support by sending a postcard to:
|
||||
|
||||
Anthony Blake<br>
|
||||
Department of Computer Science<br>
|
||||
The University of Waikato<br>
|
||||
Private Bag 3105<br>
|
||||
Hamilton 3240<br>
|
||||
NEW ZEALAND
|
@ -0,0 +1,9 @@
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<classpath>
|
||||
<classpathentry kind="src" path="gen"/>
|
||||
<classpathentry kind="src" path="src"/>
|
||||
<classpathentry kind="con" path="com.android.ide.eclipse.adt.ANDROID_FRAMEWORK"/>
|
||||
<classpathentry exported="true" kind="con" path="com.android.ide.eclipse.adt.LIBRARIES"/>
|
||||
<classpathentry exported="true" kind="con" path="com.android.ide.eclipse.adt.DEPENDENCIES"/>
|
||||
<classpathentry kind="output" path="bin/classes"/>
|
||||
</classpath>
|
@ -0,0 +1,40 @@
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<projectDescription>
|
||||
<name>ffts-android</name>
|
||||
<comment></comment>
|
||||
<projects>
|
||||
</projects>
|
||||
<buildSpec>
|
||||
<buildCommand>
|
||||
<name>com.android.ide.eclipse.adt.ResourceManagerBuilder</name>
|
||||
<arguments>
|
||||
</arguments>
|
||||
</buildCommand>
|
||||
<buildCommand>
|
||||
<name>com.android.ide.eclipse.adt.PreCompilerBuilder</name>
|
||||
<arguments>
|
||||
</arguments>
|
||||
</buildCommand>
|
||||
<buildCommand>
|
||||
<name>org.eclipse.jdt.core.javabuilder</name>
|
||||
<arguments>
|
||||
</arguments>
|
||||
</buildCommand>
|
||||
<buildCommand>
|
||||
<name>com.android.ide.eclipse.adt.ApkBuilder</name>
|
||||
<arguments>
|
||||
</arguments>
|
||||
</buildCommand>
|
||||
</buildSpec>
|
||||
<natures>
|
||||
<nature>com.android.ide.eclipse.adt.AndroidNature</nature>
|
||||
<nature>org.eclipse.jdt.core.javanature</nature>
|
||||
</natures>
|
||||
<linkedResources>
|
||||
<link>
|
||||
<name>src</name>
|
||||
<type>2</type>
|
||||
<locationURI>PARENT-1-PROJECT_LOC/src</locationURI>
|
||||
</link>
|
||||
</linkedResources>
|
||||
</projectDescription>
|
@ -0,0 +1,4 @@
|
||||
eclipse.preferences.version=1
|
||||
org.eclipse.jdt.core.compiler.codegen.targetPlatform=1.6
|
||||
org.eclipse.jdt.core.compiler.compliance=1.6
|
||||
org.eclipse.jdt.core.compiler.source=1.6
|
@ -0,0 +1,2 @@
|
||||
eclipse.preferences.version=1
|
||||
org.eclipse.ltk.core.refactoring.enable.project.refactoring.history=false
|
@ -0,0 +1,7 @@
|
||||
<?xml version="1.0" encoding="utf-8"?>
|
||||
<manifest xmlns:android="http://schemas.android.com/apk/res/android"
|
||||
package="nz.waikato.ffts"
|
||||
android:versionCode="1"
|
||||
android:versionName="1.0">
|
||||
<uses-sdk android:minSdkVersion="8" />
|
||||
</manifest>
|
@ -0,0 +1,18 @@
|
||||
# This file is used to override default values used by the Ant build system.
|
||||
#
|
||||
# This file must be checked into Version Control Systems, as it is
|
||||
# integral to the build system of your project.
|
||||
|
||||
# This file is only used by the Ant script.
|
||||
|
||||
# You can use this to override default values such as
|
||||
# 'source.dir' for the location of your java source folder and
|
||||
# 'out.dir' for the location of your output folder.
|
||||
source.dir=../src
|
||||
|
||||
# You can also use it define how the release builds are signed by declaring
|
||||
# the following properties:
|
||||
# 'key.store' for the location of your keystore and
|
||||
# 'key.alias' for the name of the key to use.
|
||||
# The password will be asked during the build when you use the 'release' target.
|
||||
|
@ -0,0 +1,92 @@
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<project name="ffts" default="help">
|
||||
|
||||
<!-- The local.properties file is created and updated by the 'android' tool.
|
||||
It contains the path to the SDK. It should *NOT* be checked into
|
||||
Version Control Systems. -->
|
||||
<property file="local.properties" />
|
||||
|
||||
<!-- The ant.properties file can be created by you. It is only edited by the
|
||||
'android' tool to add properties to it.
|
||||
This is the place to change some Ant specific build properties.
|
||||
Here are some properties you may want to change/update:
|
||||
|
||||
source.dir
|
||||
The name of the source directory. Default is 'src'.
|
||||
out.dir
|
||||
The name of the output directory. Default is 'bin'.
|
||||
|
||||
For other overridable properties, look at the beginning of the rules
|
||||
files in the SDK, at tools/ant/build.xml
|
||||
|
||||
Properties related to the SDK location or the project target should
|
||||
be updated using the 'android' tool with the 'update' action.
|
||||
|
||||
This file is an integral part of the build system for your
|
||||
application and should be checked into Version Control Systems.
|
||||
|
||||
-->
|
||||
<property file="ant.properties" />
|
||||
|
||||
<!-- if sdk.dir was not set from one of the property file, then
|
||||
get it from the ANDROID_HOME env var.
|
||||
This must be done before we load project.properties since
|
||||
the proguard config can use sdk.dir -->
|
||||
<property environment="env" />
|
||||
<condition property="sdk.dir" value="${env.ANDROID_HOME}">
|
||||
<isset property="env.ANDROID_HOME" />
|
||||
</condition>
|
||||
|
||||
<!-- The project.properties file is created and updated by the 'android'
|
||||
tool, as well as ADT.
|
||||
|
||||
This contains project specific properties such as project target, and library
|
||||
dependencies. Lower level build properties are stored in ant.properties
|
||||
(or in .classpath for Eclipse projects).
|
||||
|
||||
This file is an integral part of the build system for your
|
||||
application and should be checked into Version Control Systems. -->
|
||||
<loadproperties srcFile="project.properties" />
|
||||
|
||||
<!-- quick check on sdk.dir -->
|
||||
<fail
|
||||
message="sdk.dir is missing. Make sure to generate local.properties using 'android update project' or to inject it through the ANDROID_HOME environment variable."
|
||||
unless="sdk.dir"
|
||||
/>
|
||||
|
||||
<!--
|
||||
Import per project custom build rules if present at the root of the project.
|
||||
This is the place to put custom intermediary targets such as:
|
||||
-pre-build
|
||||
-pre-compile
|
||||
-post-compile (This is typically used for code obfuscation.
|
||||
Compiled code location: ${out.classes.absolute.dir}
|
||||
If this is not done in place, override ${out.dex.input.absolute.dir})
|
||||
-post-package
|
||||
-post-build
|
||||
-pre-clean
|
||||
-->
|
||||
<import file="custom_rules.xml" optional="true" />
|
||||
|
||||
<!-- Import the actual build file.
|
||||
|
||||
To customize existing targets, there are two options:
|
||||
- Customize only one target:
|
||||
- copy/paste the target into this file, *before* the
|
||||
<import> task.
|
||||
- customize it to your needs.
|
||||
- Customize the whole content of build.xml
|
||||
- copy/paste the content of the rules files (minus the top node)
|
||||
into this file, replacing the <import> task.
|
||||
- customize to your needs.
|
||||
|
||||
***********************
|
||||
****** IMPORTANT ******
|
||||
***********************
|
||||
In all cases you must update the value of version-tag below to read 'custom' instead of an integer,
|
||||
in order to avoid having your file be overridden by tools such as "android update project"
|
||||
-->
|
||||
<!-- version-tag: 1 -->
|
||||
<import file="${sdk.dir}/tools/ant/build.xml" />
|
||||
|
||||
</project>
|
@ -0,0 +1,25 @@
|
||||
LOCAL_PATH := $(call my-dir)
|
||||
|
||||
TOP=../../..
|
||||
|
||||
# Include the shared library
|
||||
#include $(CLEAR_VARS)
|
||||
#LOCAL_MODULE := ffts
|
||||
#LOCAL_SRC_FILES := ../../../src/.libs/libffts.so
|
||||
#include $(PREBUILT_SHARED_LIBRARY)
|
||||
|
||||
# Include the static library in shared lib
|
||||
include $(CLEAR_VARS)
|
||||
LOCAL_MODULE := ffts
|
||||
LOCAL_SRC_FILES := $(TOP)/java/android/bin/lib/libffts.a
|
||||
LOCAL_EXPORT_C_INCLUDES := $(TOP)/include
|
||||
include $(PREBUILT_STATIC_LIBRARY)
|
||||
|
||||
include $(CLEAR_VARS)
|
||||
LOCAL_MODULE := ffts_jni
|
||||
LOCAL_CFLAGS := -I$(TOP)/include -I$(TOP)/java/jni -I$(TOP) -Wno-pointer-to-int-cast -Wno-int-to-pointer-cast
|
||||
LOCAL_SRC_FILES := $(TOP)/java/jni/ffts_jni.c
|
||||
LOCAL_LDLIBS := -L$(SYSROOT)/usr/lib -llog
|
||||
LOCAL_STATIC_LIBRARIES := ffts
|
||||
|
||||
include $(BUILD_SHARED_LIBRARY)
|
@ -0,0 +1,2 @@
|
||||
# requires NEON atm
|
||||
APP_ABI := armeabi-v7a
|
@ -0,0 +1,20 @@
|
||||
# To enable ProGuard in your project, edit project.properties
|
||||
# to define the proguard.config property as described in that file.
|
||||
#
|
||||
# Add project specific ProGuard rules here.
|
||||
# By default, the flags in this file are appended to flags specified
|
||||
# in ${sdk.dir}/tools/proguard/proguard-android.txt
|
||||
# You can edit the include path and order by changing the ProGuard
|
||||
# include property in project.properties.
|
||||
#
|
||||
# For more details, see
|
||||
# http://developer.android.com/guide/developing/tools/proguard.html
|
||||
|
||||
# Add any project specific keep options here:
|
||||
|
||||
# If your project uses WebView with JS, uncomment the following
|
||||
# and specify the fully qualified class name to the JavaScript interface
|
||||
# class:
|
||||
#-keepclassmembers class fqcn.of.javascript.interface.for.webview {
|
||||
# public *;
|
||||
#}
|
@ -0,0 +1,15 @@
|
||||
# This file is automatically generated by Android Tools.
|
||||
# Do not modify this file -- YOUR CHANGES WILL BE ERASED!
|
||||
#
|
||||
# This file must be checked in Version Control Systems.
|
||||
#
|
||||
# To customize properties used by the Ant build system edit
|
||||
# "ant.properties", and override values to adapt the script to your
|
||||
# project structure.
|
||||
#
|
||||
# To enable ProGuard to shrink and obfuscate your code, uncomment this (available properties: sdk.dir, user.home):
|
||||
#proguard.config=${sdk.dir}/tools/proguard/proguard-android.txt:proguard-project.txt
|
||||
|
||||
android.library=true
|
||||
# Project target.
|
||||
target=android-10
|
@ -0,0 +1,203 @@
|
||||
/*
|
||||
* This file is part of FFTS -- The Fastest Fourier Transform in the South
|
||||
*
|
||||
* Copyright (c) 2013, Michael Zucchi <notzed@gmail.com>
|
||||
*
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* * Neither the name of the organization nor the
|
||||
* names of its contributors may be used to endorse or promote products
|
||||
* derived from this software without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
* DISCLAIMED. IN NO EVENT SHALL ANTHONY M. BLAKE BE LIABLE FOR ANY
|
||||
* DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
|
||||
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
||||
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
|
||||
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
package nz.ac.waikato.ffts;
|
||||
|
||||
import java.nio.FloatBuffer;
|
||||
|
||||
/**
|
||||
* A java wrapper for ffts plans.
|
||||
*
|
||||
* Plans must currently be freed explicitly.
|
||||
*
|
||||
* @author notzed
|
||||
*/
|
||||
public class FFTS {
|
||||
|
||||
/**
|
||||
* C pointer
|
||||
*/
|
||||
private long p;
|
||||
/**
|
||||
* Minimum size of input
|
||||
*/
|
||||
final protected long inSize;
|
||||
/**
|
||||
* Minimum size of output
|
||||
*/
|
||||
final protected long outSize;
|
||||
|
||||
private FFTS(long p, long inSize) {
|
||||
this(p, inSize, inSize);
|
||||
}
|
||||
|
||||
private FFTS(long p, long inSize, long outSize) {
|
||||
this.p = p;
|
||||
this.inSize = inSize;
|
||||
this.outSize = inSize;
|
||||
}
|
||||
/**
|
||||
* The sign to use for a forward transform.
|
||||
*/
|
||||
public static final int FORWARD = -1;
|
||||
/**
|
||||
* The sign to use for a backward transform.
|
||||
*/
|
||||
public static final int BACKWARD = 1;
|
||||
|
||||
/**
|
||||
* Create a FFT plan for a 1-dimensional complex transform.
|
||||
*
|
||||
* The src and dst parameters to execute() use complex data.
|
||||
*
|
||||
* @param sign The direction of the transform.
|
||||
* @param N The size of the transform.
|
||||
* @return
|
||||
*/
|
||||
public static FFTS complex(int sign, int N) {
|
||||
return new FFTS(complex_1d(N, sign), N * 2);
|
||||
}
|
||||
|
||||
/**
|
||||
* Create a FFT plan for a 2-dimensional complex transform.
|
||||
* @param sign The direction of the transform.
|
||||
* @param N1 The size of the transform.
|
||||
* @param N2 The size of the transform.
|
||||
* @return
|
||||
*/
|
||||
public static FFTS complex(int sign, int N1, int N2) {
|
||||
return new FFTS(complex_2d(N1, N2, sign), N1 * N2 * 2);
|
||||
}
|
||||
|
||||
public static FFTS complex(int sign, int... Ns) {
|
||||
return new FFTS(complex_nd(Ns, sign), size(Ns) * 2);
|
||||
}
|
||||
|
||||
public static FFTS real(int sign, int N) {
|
||||
return new FFTS(real_1d(N, sign), sign == FORWARD ? N : (N / 2 + 1) * 2, sign == FORWARD ? (N / 2 + 1) * 2 : N);
|
||||
}
|
||||
|
||||
public static FFTS real(int sign, int N1, int N2) {
|
||||
return new FFTS(real_2d(N1, N2, sign), sign == FORWARD ? N1 * N2 : (N1 * N2 / 2 + 1) * 2, sign == FORWARD ? (N1 * N2 / 2 + 1) * 2 : N1 * N2);
|
||||
}
|
||||
|
||||
public static FFTS real(int sign, int... Ns) {
|
||||
return new FFTS(real_nd(Ns, sign), sign == FORWARD ? size(Ns) : (size(Ns) / 2 + 1) * 2, sign == FORWARD ? (size(Ns) / 2 + 1) * 2 : size(Ns));
|
||||
}
|
||||
|
||||
/**
|
||||
* Execute this plan with the given array data.
|
||||
*
|
||||
* @param src
|
||||
* @param dst
|
||||
*/
|
||||
public void execute(float[] src, float[] dst) {
|
||||
execute(src, 0, dst, 0);
|
||||
}
|
||||
|
||||
/**
|
||||
* Execute this plan with the given array data.
|
||||
* @param src
|
||||
* @param soff Start offset into src array.
|
||||
* @param dst
|
||||
* @param doff Start offset into dst array.
|
||||
*/
|
||||
public void execute(float[] src, int soff, float[] dst, int doff) {
|
||||
if (src.length - soff < inSize || dst.length - doff < outSize)
|
||||
throw new ArrayIndexOutOfBoundsException();
|
||||
if (p == 0)
|
||||
throw new NullPointerException();
|
||||
|
||||
execute(p, inSize, src, soff, dst, doff);
|
||||
}
|
||||
|
||||
/**
|
||||
* Execute this plan with the given nio buffers. The bufffers
|
||||
* must be derived from direct buffers.
|
||||
*
|
||||
* The buffer position and limits are ignored.
|
||||
*
|
||||
* @param src
|
||||
* @param dst
|
||||
*/
|
||||
public void execute(FloatBuffer src, FloatBuffer dst) {
|
||||
if (src.capacity() < inSize || dst.capacity() < outSize)
|
||||
throw new ArrayIndexOutOfBoundsException();
|
||||
if (p == 0)
|
||||
throw new NullPointerException();
|
||||
|
||||
execute(p, inSize, src, dst);
|
||||
}
|
||||
|
||||
/**
|
||||
* Free the plan.
|
||||
*/
|
||||
public void free() {
|
||||
if (p == 0)
|
||||
throw new NullPointerException();
|
||||
free(p);
|
||||
}
|
||||
|
||||
/*
|
||||
* Calculate the number of elements required to store one
|
||||
* set of n-dimensional data.
|
||||
*/
|
||||
protected static long size(int[] Ns) {
|
||||
long s = Ns[0];
|
||||
for (int i = 1; i < Ns.length; i++)
|
||||
s *= Ns[i];
|
||||
return s;
|
||||
}
|
||||
|
||||
static {
|
||||
System.loadLibrary("ffts_jni");
|
||||
}
|
||||
|
||||
/*
|
||||
* Native interface
|
||||
*/
|
||||
protected static native long complex_1d(int N, int sign);
|
||||
|
||||
protected static native long complex_2d(int N1, int N2, int sign);
|
||||
|
||||
protected static native long complex_nd(int[] Ns, int sign);
|
||||
|
||||
protected static native long real_1d(int N, int sign);
|
||||
|
||||
protected static native long real_2d(int N1, int N2, int sign);
|
||||
|
||||
protected static native long real_nd(int[] Ns, int sign);
|
||||
|
||||
protected static native void execute(long p, long size, float[] src, int soff, float[] dst, int doff);
|
||||
|
||||
protected static native void execute(long p, long size, FloatBuffer src, FloatBuffer dst);
|
||||
|
||||
protected static native void free(long p);
|
||||
}
|
@ -0,0 +1,144 @@
|
||||
# ===========================================================================
|
||||
# http://www.gnu.org/software/autoconf-archive/ax_check_class.html
|
||||
# ===========================================================================
|
||||
#
|
||||
# SYNOPSIS
|
||||
#
|
||||
# AX_CHECK_CLASS
|
||||
#
|
||||
# DESCRIPTION
|
||||
#
|
||||
# AX_CHECK_CLASS tests the existence of a given Java class, either in a
|
||||
# jar or in a '.class' file.
|
||||
#
|
||||
# *Warning*: its success or failure can depend on a proper setting of the
|
||||
# CLASSPATH env. variable.
|
||||
#
|
||||
# Note: This is part of the set of autoconf M4 macros for Java programs.
|
||||
# It is VERY IMPORTANT that you download the whole set, some macros depend
|
||||
# on other. Unfortunately, the autoconf archive does not support the
|
||||
# concept of set of macros, so I had to break it for submission. The
|
||||
# general documentation, as well as the sample configure.in, is included
|
||||
# in the AX_PROG_JAVA macro.
|
||||
#
|
||||
# LICENSE
|
||||
#
|
||||
# Copyright (c) 2008 Stephane Bortzmeyer <bortzmeyer@pasteur.fr>
|
||||
#
|
||||
# This program is free software; you can redistribute it and/or modify it
|
||||
# under the terms of the GNU General Public License as published by the
|
||||
# Free Software Foundation; either version 2 of the License, or (at your
|
||||
# option) any later version.
|
||||
#
|
||||
# This program is distributed in the hope that it will be useful, but
|
||||
# WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
|
||||
# Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the GNU General Public License along
|
||||
# with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
#
|
||||
# As a special exception, the respective Autoconf Macro's copyright owner
|
||||
# gives unlimited permission to copy, distribute and modify the configure
|
||||
# scripts that are the output of Autoconf when processing the Macro. You
|
||||
# need not follow the terms of the GNU General Public License when using
|
||||
# or distributing such scripts, even though portions of the text of the
|
||||
# Macro appear in them. The GNU General Public License (GPL) does govern
|
||||
# all other use of the material that constitutes the Autoconf Macro.
|
||||
#
|
||||
# This special exception to the GPL applies to versions of the Autoconf
|
||||
# Macro released by the Autoconf Archive. When you make and distribute a
|
||||
# modified version of the Autoconf Macro, you may extend this special
|
||||
# exception to the GPL to apply to your modified version as well.
|
||||
|
||||
#serial 7
|
||||
|
||||
AU_ALIAS([AC_CHECK_CLASS], [AX_CHECK_CLASS])
|
||||
AC_DEFUN([AX_CHECK_CLASS],[
|
||||
AC_REQUIRE([AX_PROG_JAVA])
|
||||
ac_var_name=`echo $1 | sed 's/\./_/g'`
|
||||
dnl Normaly I'd use a AC_CACHE_CHECK here but since the variable name is
|
||||
dnl dynamic I need an extra level of extraction
|
||||
AC_MSG_CHECKING([for $1 class])
|
||||
AC_CACHE_VAL(ax_cv_class_$ac_var_name, [
|
||||
if test x$ac_cv_prog_uudecode_base64 = xyes; then
|
||||
dnl /**
|
||||
dnl * Test.java: used to test dynamicaly if a class exists.
|
||||
dnl */
|
||||
dnl public class Test
|
||||
dnl {
|
||||
dnl
|
||||
dnl public static void
|
||||
dnl main( String[] argv )
|
||||
dnl {
|
||||
dnl Class lib;
|
||||
dnl if (argv.length < 1)
|
||||
dnl {
|
||||
dnl System.err.println ("Missing argument");
|
||||
dnl System.exit (77);
|
||||
dnl }
|
||||
dnl try
|
||||
dnl {
|
||||
dnl lib = Class.forName (argv[0]);
|
||||
dnl }
|
||||
dnl catch (ClassNotFoundException e)
|
||||
dnl {
|
||||
dnl System.exit (1);
|
||||
dnl }
|
||||
dnl lib = null;
|
||||
dnl System.exit (0);
|
||||
dnl }
|
||||
dnl
|
||||
dnl }
|
||||
cat << \EOF > Test.uue
|
||||
begin-base64 644 Test.class
|
||||
yv66vgADAC0AKQcAAgEABFRlc3QHAAQBABBqYXZhL2xhbmcvT2JqZWN0AQAE
|
||||
bWFpbgEAFihbTGphdmEvbGFuZy9TdHJpbmc7KVYBAARDb2RlAQAPTGluZU51
|
||||
bWJlclRhYmxlDAAKAAsBAANlcnIBABVMamF2YS9pby9QcmludFN0cmVhbTsJ
|
||||
AA0ACQcADgEAEGphdmEvbGFuZy9TeXN0ZW0IABABABBNaXNzaW5nIGFyZ3Vt
|
||||
ZW50DAASABMBAAdwcmludGxuAQAVKExqYXZhL2xhbmcvU3RyaW5nOylWCgAV
|
||||
ABEHABYBABNqYXZhL2lvL1ByaW50U3RyZWFtDAAYABkBAARleGl0AQAEKEkp
|
||||
VgoADQAXDAAcAB0BAAdmb3JOYW1lAQAlKExqYXZhL2xhbmcvU3RyaW5nOylM
|
||||
amF2YS9sYW5nL0NsYXNzOwoAHwAbBwAgAQAPamF2YS9sYW5nL0NsYXNzBwAi
|
||||
AQAgamF2YS9sYW5nL0NsYXNzTm90Rm91bmRFeGNlcHRpb24BAAY8aW5pdD4B
|
||||
AAMoKVYMACMAJAoAAwAlAQAKU291cmNlRmlsZQEACVRlc3QuamF2YQAhAAEA
|
||||
AwAAAAAAAgAJAAUABgABAAcAAABtAAMAAwAAACkqvgSiABCyAAwSD7YAFBBN
|
||||
uAAaKgMyuAAeTKcACE0EuAAaAUwDuAAasQABABMAGgAdACEAAQAIAAAAKgAK
|
||||
AAAACgAAAAsABgANAA4ADgATABAAEwASAB4AFgAiABgAJAAZACgAGgABACMA
|
||||
JAABAAcAAAAhAAEAAQAAAAUqtwAmsQAAAAEACAAAAAoAAgAAAAQABAAEAAEA
|
||||
JwAAAAIAKA==
|
||||
====
|
||||
EOF
|
||||
if $UUDECODE Test.uue; then
|
||||
:
|
||||
else
|
||||
echo "configure: __oline__: uudecode had trouble decoding base 64 file 'Test.uue'" >&AS_MESSAGE_LOG_FD
|
||||
echo "configure: failed file was:" >&AS_MESSAGE_LOG_FD
|
||||
cat Test.uue >&AS_MESSAGE_LOG_FD
|
||||
ac_cv_prog_uudecode_base64=no
|
||||
fi
|
||||
rm -f Test.uue
|
||||
if AC_TRY_COMMAND($JAVA $JAVAFLAGS Test $1) >/dev/null 2>&1; then
|
||||
eval "ac_cv_class_$ac_var_name=yes"
|
||||
else
|
||||
eval "ac_cv_class_$ac_var_name=no"
|
||||
fi
|
||||
rm -f Test.class
|
||||
else
|
||||
AX_TRY_COMPILE_JAVA([$1], , [eval "ac_cv_class_$ac_var_name=yes"],
|
||||
[eval "ac_cv_class_$ac_var_name=no"])
|
||||
fi
|
||||
eval "ac_var_val=$`eval echo ac_cv_class_$ac_var_name`"
|
||||
eval "HAVE_$ac_var_name=$`echo ac_cv_class_$ac_var_val`"
|
||||
HAVE_LAST_CLASS=$ac_var_val
|
||||
if test x$ac_var_val = xyes; then
|
||||
ifelse([$2], , :, [$2])
|
||||
else
|
||||
ifelse([$3], , :, [$3])
|
||||
fi
|
||||
])
|
||||
dnl for some reason the above statment didn't fall though here?
|
||||
dnl do scripts have variable scoping?
|
||||
eval "ac_var_val=$`eval echo ac_cv_class_$ac_var_name`"
|
||||
AC_MSG_RESULT($ac_var_val)
|
||||
])
|
@ -0,0 +1,101 @@
|
||||
# ===========================================================================
|
||||
# http://www.gnu.org/software/autoconf-archive/ax_check_java_plugin.html
|
||||
# ===========================================================================
|
||||
#
|
||||
# SYNOPSIS
|
||||
#
|
||||
# AX_CHECK_JAVA_PLUGIN(<shell-variable>)
|
||||
#
|
||||
# DESCRIPTION
|
||||
#
|
||||
# This macro sets <shell-variable> to empty on failure and to a compatible
|
||||
# version of plugin.jar otherwise. Directories searched are /usr/java/*
|
||||
# and /usr/local/java/*, which are assumed to be j{dk,re} installations.
|
||||
# Apply the shell variable as you see fit. If sun changes things so
|
||||
# <jre>/lib/plugin.jar is not the magic file it will stop working.
|
||||
#
|
||||
# This macro assumes that unzip, zipinfo or pkzipc is avialable (and can
|
||||
# list the contents of the jar archive). The first two are assumed to work
|
||||
# similarly enough to the infozip versisonms. The pkzipc version is
|
||||
# assumed to work if I undertstand the documentation on pkware's site but
|
||||
# YMMV. I do not have access to pwkware's version to test it.
|
||||
#
|
||||
# LICENSE
|
||||
#
|
||||
# Copyright (c) 2008 Duncan Simpson <dps@simpson.demon.co.uk>
|
||||
#
|
||||
# This program is free software; you can redistribute it and/or modify it
|
||||
# under the terms of the GNU General Public License as published by the
|
||||
# Free Software Foundation; either version 2 of the License, or (at your
|
||||
# option) any later version.
|
||||
#
|
||||
# This program is distributed in the hope that it will be useful, but
|
||||
# WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
|
||||
# Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the GNU General Public License along
|
||||
# with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
#
|
||||
# As a special exception, the respective Autoconf Macro's copyright owner
|
||||
# gives unlimited permission to copy, distribute and modify the configure
|
||||
# scripts that are the output of Autoconf when processing the Macro. You
|
||||
# need not follow the terms of the GNU General Public License when using
|
||||
# or distributing such scripts, even though portions of the text of the
|
||||
# Macro appear in them. The GNU General Public License (GPL) does govern
|
||||
# all other use of the material that constitutes the Autoconf Macro.
|
||||
#
|
||||
# This special exception to the GPL applies to versions of the Autoconf
|
||||
# Macro released by the Autoconf Archive. When you make and distribute a
|
||||
# modified version of the Autoconf Macro, you may extend this special
|
||||
# exception to the GPL to apply to your modified version as well.
|
||||
|
||||
#serial 6
|
||||
|
||||
AU_ALIAS([DPS_CHECK_PLUGIN], [AX_CHECK_JAVA_PLUGIN])
|
||||
AC_DEFUN([AX_CHECK_JAVA_PLUGIN],
|
||||
[AC_REQUIRE([AC_PROG_AWK])
|
||||
AC_REQUIRE([AC_PROG_FGREP])
|
||||
AC_CHECK_PROG(ZIPINFO,[zipinfo unzip pkzipc])
|
||||
AC_MSG_CHECKING([for the java plugin])
|
||||
case "x$ZIPINFO" in
|
||||
[*/zipinfo)]
|
||||
zipinf="zipinfo -1" ;;
|
||||
[*/unzip)]
|
||||
zipinf="unzip -l";;
|
||||
[*/pkzipc)]
|
||||
ziping="unzipc -view";;
|
||||
[x*)]
|
||||
AC_MSG_RESULT([skiped, none of zipinfo, unzip and pkzipc found])
|
||||
AC_SUBST($1,[])
|
||||
zipinf="";;
|
||||
esac
|
||||
if test "x$zipinf" != "x"; then
|
||||
jplugin=""
|
||||
for jhome in `ls -dr /usr/java/* /usr/local/java/* 2> /dev/null`; do
|
||||
for jfile in lib/plugin.jar jre/lib/plugin.jar; do
|
||||
if test "x$jplugin" = "x" && test -f "$jhome/$jfile"; then
|
||||
eval "$zipinf $jhome/$jfile | $AWK '{ print \$NF; }' | $FGREP netscape/javascript/JSObject" >/dev/null 2>/dev/null
|
||||
if test $? -eq 0; then
|
||||
dnl Some version of gcj (and javac) refuse to work with some files
|
||||
dnl that pass this test. To stop this problem make sure that the compiler
|
||||
dnl still works with this jar file in the classpath
|
||||
cat << \EOF > Test.java
|
||||
/* [#]line __oline__ "configure" */
|
||||
public class Test {
|
||||
}
|
||||
EOF
|
||||
if eval "$JAVAC -classpath $jhome/$jfile Test.java 2>/dev/null >/dev/null" && test -f Test.class; then
|
||||
jplugin="$jhome/$jfile"
|
||||
fi
|
||||
rm -f Test.java Test.class
|
||||
fi; fi; done; done
|
||||
if test "x$jplugin" != "x"; then
|
||||
AC_SUBST($1,$jplugin)
|
||||
AC_MSG_RESULT($jplugin)
|
||||
else
|
||||
AC_MSG_RESULT([java plugin not found])
|
||||
AC_SUBST($1,[])
|
||||
fi
|
||||
fi
|
||||
])
|
@ -0,0 +1,85 @@
|
||||
# ===========================================================================
|
||||
# http://www.gnu.org/software/autoconf-archive/ax_java_check_class.html
|
||||
# ===========================================================================
|
||||
#
|
||||
# SYNOPSIS
|
||||
#
|
||||
# AX_JAVA_CHECK_CLASS(<class>,<action-if-found>,<action-if-not-found>)
|
||||
#
|
||||
# DESCRIPTION
|
||||
#
|
||||
# Test if a Java class is available. Based on AX_PROG_JAVAC_WORKS. This
|
||||
# version uses a cache variable which is both compiler, options and
|
||||
# classpath dependent (so if you switch from javac to gcj it correctly
|
||||
# notices and redoes the test).
|
||||
#
|
||||
# The macro tries to compile a minimal program importing <class>. Some
|
||||
# newer compilers moan about the failure to use this but fail or produce a
|
||||
# class file anyway. All moaing is sunk to /dev/null since I only wanted
|
||||
# to know if the class could be imported. This is a recommended followup
|
||||
# to AX_CHECK_JAVA_PLUGIN with classpath appropriately adjusted.
|
||||
#
|
||||
# LICENSE
|
||||
#
|
||||
# Copyright (c) 2008 Duncan Simpson <dps@simpson.demon.co.uk>
|
||||
#
|
||||
# This program is free software; you can redistribute it and/or modify it
|
||||
# under the terms of the GNU General Public License as published by the
|
||||
# Free Software Foundation; either version 2 of the License, or (at your
|
||||
# option) any later version.
|
||||
#
|
||||
# This program is distributed in the hope that it will be useful, but
|
||||
# WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
|
||||
# Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the GNU General Public License along
|
||||
# with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
#
|
||||
# As a special exception, the respective Autoconf Macro's copyright owner
|
||||
# gives unlimited permission to copy, distribute and modify the configure
|
||||
# scripts that are the output of Autoconf when processing the Macro. You
|
||||
# need not follow the terms of the GNU General Public License when using
|
||||
# or distributing such scripts, even though portions of the text of the
|
||||
# Macro appear in them. The GNU General Public License (GPL) does govern
|
||||
# all other use of the material that constitutes the Autoconf Macro.
|
||||
#
|
||||
# This special exception to the GPL applies to versions of the Autoconf
|
||||
# Macro released by the Autoconf Archive. When you make and distribute a
|
||||
# modified version of the Autoconf Macro, you may extend this special
|
||||
# exception to the GPL to apply to your modified version as well.
|
||||
|
||||
#serial 8
|
||||
|
||||
AU_ALIAS([DPS_JAVA_CHECK_CLASS], [AX_JAVA_CHECK_CLASS])
|
||||
AC_DEFUN([AX_JAVA_CHECK_CLASS],[
|
||||
m4_define([cache_val],[m4_translit(ax_cv_have_java_class_$1, " ." ,"__")])
|
||||
if test "x$CLASSPATH" != "x"; then
|
||||
xtra=" with classpath ${CLASSPATH}"
|
||||
xopts=`echo ${CLASSPATH} | ${SED} 's/^ *://'`
|
||||
xopts="-classpath $xopts"
|
||||
else xtra=""; xopts=""; fi
|
||||
cache_var="cache_val"AS_TR_SH([_Jc_${JAVAC}_Cp_${CLASSPATH}])
|
||||
AC_CACHE_CHECK([if the $1 class is avialable$xtra], [$cache_var], [
|
||||
JAVA_TEST=Test.java
|
||||
CLASS_TEST=Test.class
|
||||
cat << \EOF > $JAVA_TEST
|
||||
/* [#]xline __oline__ "configure" */
|
||||
import $1;
|
||||
public class Test {
|
||||
}
|
||||
EOF
|
||||
if AC_TRY_COMMAND($JAVAC $JAVACFLAGS $xopts $JAVA_TEST) >/dev/null 2>&1; then
|
||||
eval "${cache_var}=yes"
|
||||
else
|
||||
eval "${cache_var}=no"
|
||||
echo "configure: failed program was:" >&AS_MESSAGE_LOG_FD
|
||||
cat $JAVA_TEST >&AS_MESSAGE_LOG_FD
|
||||
fi
|
||||
rm -f $JAVA_TEST $CLASS_TEST
|
||||
])
|
||||
if eval 'test "x$'${cache_var}'" = "xyes"'; then
|
||||
$2
|
||||
true; else
|
||||
$3
|
||||
false; fi])
|
@ -0,0 +1,115 @@
|
||||
# ===========================================================================
|
||||
# http://www.gnu.org/software/autoconf-archive/ax_prog_java.html
|
||||
# ===========================================================================
|
||||
#
|
||||
# SYNOPSIS
|
||||
#
|
||||
# AX_PROG_JAVA
|
||||
#
|
||||
# DESCRIPTION
|
||||
#
|
||||
# Here is a summary of the main macros:
|
||||
#
|
||||
# AX_PROG_JAVAC: finds a Java compiler.
|
||||
#
|
||||
# AX_PROG_JAVA: finds a Java virtual machine.
|
||||
#
|
||||
# AX_CHECK_CLASS: finds if we have the given class (beware of CLASSPATH!).
|
||||
#
|
||||
# AX_CHECK_RQRD_CLASS: finds if we have the given class and stops
|
||||
# otherwise.
|
||||
#
|
||||
# AX_TRY_COMPILE_JAVA: attempt to compile user given source.
|
||||
#
|
||||
# AX_TRY_RUN_JAVA: attempt to compile and run user given source.
|
||||
#
|
||||
# AX_JAVA_OPTIONS: adds Java configure options.
|
||||
#
|
||||
# AX_PROG_JAVA tests an existing Java virtual machine. It uses the
|
||||
# environment variable JAVA then tests in sequence various common Java
|
||||
# virtual machines. For political reasons, it starts with the free ones.
|
||||
# You *must* call [AX_PROG_JAVAC] before.
|
||||
#
|
||||
# If you want to force a specific VM:
|
||||
#
|
||||
# - at the configure.in level, set JAVA=yourvm before calling AX_PROG_JAVA
|
||||
#
|
||||
# (but after AC_INIT)
|
||||
#
|
||||
# - at the configure level, setenv JAVA
|
||||
#
|
||||
# You can use the JAVA variable in your Makefile.in, with @JAVA@.
|
||||
#
|
||||
# *Warning*: its success or failure can depend on a proper setting of the
|
||||
# CLASSPATH env. variable.
|
||||
#
|
||||
# TODO: allow to exclude virtual machines (rationale: most Java programs
|
||||
# cannot run with some VM like kaffe).
|
||||
#
|
||||
# Note: This is part of the set of autoconf M4 macros for Java programs.
|
||||
# It is VERY IMPORTANT that you download the whole set, some macros depend
|
||||
# on other. Unfortunately, the autoconf archive does not support the
|
||||
# concept of set of macros, so I had to break it for submission.
|
||||
#
|
||||
# A Web page, with a link to the latest CVS snapshot is at
|
||||
# <http://www.internatif.org/bortzmeyer/autoconf-Java/>.
|
||||
#
|
||||
# This is a sample configure.in Process this file with autoconf to produce
|
||||
# a configure script.
|
||||
#
|
||||
# AC_INIT(UnTag.java)
|
||||
#
|
||||
# dnl Checks for programs.
|
||||
# AC_CHECK_CLASSPATH
|
||||
# AX_PROG_JAVAC
|
||||
# AX_PROG_JAVA
|
||||
#
|
||||
# dnl Checks for classes
|
||||
# AX_CHECK_RQRD_CLASS(org.xml.sax.Parser)
|
||||
# AX_CHECK_RQRD_CLASS(com.jclark.xml.sax.Driver)
|
||||
#
|
||||
# AC_OUTPUT(Makefile)
|
||||
#
|
||||
# LICENSE
|
||||
#
|
||||
# Copyright (c) 2008 Stephane Bortzmeyer <bortzmeyer@pasteur.fr>
|
||||
#
|
||||
# This program is free software; you can redistribute it and/or modify it
|
||||
# under the terms of the GNU General Public License as published by the
|
||||
# Free Software Foundation; either version 2 of the License, or (at your
|
||||
# option) any later version.
|
||||
#
|
||||
# This program is distributed in the hope that it will be useful, but
|
||||
# WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
|
||||
# Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the GNU General Public License along
|
||||
# with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
#
|
||||
# As a special exception, the respective Autoconf Macro's copyright owner
|
||||
# gives unlimited permission to copy, distribute and modify the configure
|
||||
# scripts that are the output of Autoconf when processing the Macro. You
|
||||
# need not follow the terms of the GNU General Public License when using
|
||||
# or distributing such scripts, even though portions of the text of the
|
||||
# Macro appear in them. The GNU General Public License (GPL) does govern
|
||||
# all other use of the material that constitutes the Autoconf Macro.
|
||||
#
|
||||
# This special exception to the GPL applies to versions of the Autoconf
|
||||
# Macro released by the Autoconf Archive. When you make and distribute a
|
||||
# modified version of the Autoconf Macro, you may extend this special
|
||||
# exception to the GPL to apply to your modified version as well.
|
||||
|
||||
#serial 8
|
||||
|
||||
AU_ALIAS([AC_PROG_JAVA], [AX_PROG_JAVA])
|
||||
AC_DEFUN([AX_PROG_JAVA],[
|
||||
if test x$JAVAPREFIX = x; then
|
||||
test x$JAVA = x && AC_CHECK_PROGS(JAVA, kaffe java)
|
||||
else
|
||||
test x$JAVA = x && AC_CHECK_PROGS(JAVA, kaffe java, $JAVAPREFIX)
|
||||
fi
|
||||
test x$JAVA = x && AC_MSG_ERROR([no acceptable Java virtual machine found in \$PATH])
|
||||
AX_PROG_JAVA_WORKS
|
||||
AC_PROVIDE([$0])dnl
|
||||
])
|
@ -0,0 +1,104 @@
|
||||
# ===========================================================================
|
||||
# http://www.gnu.org/software/autoconf-archive/ax_prog_java_cc.html
|
||||
# ===========================================================================
|
||||
#
|
||||
# SYNOPSIS
|
||||
#
|
||||
# AX_PROG_JAVA_CC
|
||||
#
|
||||
# DESCRIPTION
|
||||
#
|
||||
# Finds the appropriate java compiler on your path. By preference the java
|
||||
# compiler is gcj, then jikes then javac.
|
||||
#
|
||||
# The macro can take one argument specifying a space separated list of
|
||||
# java compiler names.
|
||||
#
|
||||
# For example:
|
||||
#
|
||||
# AX_PROG_JAVA_CC(javac, gcj)
|
||||
#
|
||||
# The macro also sets the compiler options variable: JAVA_CC_OPTS to
|
||||
# something sensible:
|
||||
#
|
||||
# - for GCJ it sets it to: @GCJ_OPTS@
|
||||
# (if GCJ_OPTS is not yet defined then it is set to "-C")
|
||||
#
|
||||
# - no other compiler has applicable options yet
|
||||
#
|
||||
# Here's an example configure.in:
|
||||
#
|
||||
# AC_INIT(Makefile.in)
|
||||
# AX_PROG_JAVA_CC()
|
||||
# AC_OUTPUT(Makefile)
|
||||
# dnl End.
|
||||
#
|
||||
# And here's the start of the Makefile.in:
|
||||
#
|
||||
# PROJECT_ROOT := @srcdir@
|
||||
# # Tool definitions.
|
||||
# JAVAC := @JAVA_CC@
|
||||
# JAVAC_OPTS := @JAVA_CC_OPTS@
|
||||
# JAR_TOOL := @jar_tool@
|
||||
#
|
||||
# LICENSE
|
||||
#
|
||||
# Copyright (c) 2008 Nic Ferrier <nferrier@tapsellferrier.co.uk>
|
||||
#
|
||||
# This program is free software; you can redistribute it and/or modify it
|
||||
# under the terms of the GNU General Public License as published by the
|
||||
# Free Software Foundation; either version 2 of the License, or (at your
|
||||
# option) any later version.
|
||||
#
|
||||
# This program is distributed in the hope that it will be useful, but
|
||||
# WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
|
||||
# Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the GNU General Public License along
|
||||
# with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
#
|
||||
# As a special exception, the respective Autoconf Macro's copyright owner
|
||||
# gives unlimited permission to copy, distribute and modify the configure
|
||||
# scripts that are the output of Autoconf when processing the Macro. You
|
||||
# need not follow the terms of the GNU General Public License when using
|
||||
# or distributing such scripts, even though portions of the text of the
|
||||
# Macro appear in them. The GNU General Public License (GPL) does govern
|
||||
# all other use of the material that constitutes the Autoconf Macro.
|
||||
#
|
||||
# This special exception to the GPL applies to versions of the Autoconf
|
||||
# Macro released by the Autoconf Archive. When you make and distribute a
|
||||
# modified version of the Autoconf Macro, you may extend this special
|
||||
# exception to the GPL to apply to your modified version as well.
|
||||
|
||||
#serial 4
|
||||
|
||||
# AX_PROG_JAVA_CC([COMPILER ...])
|
||||
# --------------------------
|
||||
# COMPILER ... is a space separated list of java compilers to search for.
|
||||
# This just gives the user an opportunity to specify an alternative
|
||||
# search list for the java compiler.
|
||||
AU_ALIAS([AC_PROG_JAVA_CC], [AX_PROG_JAVA_CC])
|
||||
AC_DEFUN([AX_PROG_JAVA_CC],
|
||||
[AC_ARG_VAR([JAVA_CC], [java compiler command])dnl
|
||||
AC_ARG_VAR([JAVA_CC_FLAGS], [java compiler flags])dnl
|
||||
m4_ifval([$1],
|
||||
[AC_CHECK_TOOLS(JAVA_CC, [$1])],
|
||||
[AC_CHECK_TOOL(JAVA_CC, gcj)
|
||||
if test -z "$JAVA_CC"; then
|
||||
AC_CHECK_TOOL(JAVA_CC, javac)
|
||||
fi
|
||||
if test -z "$JAVA_CC"; then
|
||||
AC_CHECK_TOOL(JAVA_CC, jikes)
|
||||
fi
|
||||
])
|
||||
|
||||
if test "$JAVA_CC" = "gcj"; then
|
||||
if test "$GCJ_OPTS" = ""; then
|
||||
AC_SUBST(GCJ_OPTS,-C)
|
||||
fi
|
||||
AC_SUBST(JAVA_CC_OPTS, @GCJ_OPTS@,
|
||||
[Define the compilation options for GCJ])
|
||||
fi
|
||||
test -z "$JAVA_CC" && AC_MSG_ERROR([no acceptable java compiler found in \$PATH])
|
||||
])# AX_PROG_JAVA_CC
|
@ -0,0 +1,134 @@
|
||||
# ===========================================================================
|
||||
# http://www.gnu.org/software/autoconf-archive/ax_prog_java_works.html
|
||||
# ===========================================================================
|
||||
#
|
||||
# SYNOPSIS
|
||||
#
|
||||
# AX_PROG_JAVA_WORKS
|
||||
#
|
||||
# DESCRIPTION
|
||||
#
|
||||
# Internal use ONLY.
|
||||
#
|
||||
# Note: This is part of the set of autoconf M4 macros for Java programs.
|
||||
# It is VERY IMPORTANT that you download the whole set, some macros depend
|
||||
# on other. Unfortunately, the autoconf archive does not support the
|
||||
# concept of set of macros, so I had to break it for submission. The
|
||||
# general documentation, as well as the sample configure.in, is included
|
||||
# in the AX_PROG_JAVA macro.
|
||||
#
|
||||
# LICENSE
|
||||
#
|
||||
# Copyright (c) 2008 Stephane Bortzmeyer <bortzmeyer@pasteur.fr>
|
||||
#
|
||||
# This program is free software; you can redistribute it and/or modify it
|
||||
# under the terms of the GNU General Public License as published by the
|
||||
# Free Software Foundation; either version 2 of the License, or (at your
|
||||
# option) any later version.
|
||||
#
|
||||
# This program is distributed in the hope that it will be useful, but
|
||||
# WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
|
||||
# Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the GNU General Public License along
|
||||
# with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
#
|
||||
# As a special exception, the respective Autoconf Macro's copyright owner
|
||||
# gives unlimited permission to copy, distribute and modify the configure
|
||||
# scripts that are the output of Autoconf when processing the Macro. You
|
||||
# need not follow the terms of the GNU General Public License when using
|
||||
# or distributing such scripts, even though portions of the text of the
|
||||
# Macro appear in them. The GNU General Public License (GPL) does govern
|
||||
# all other use of the material that constitutes the Autoconf Macro.
|
||||
#
|
||||
# This special exception to the GPL applies to versions of the Autoconf
|
||||
# Macro released by the Autoconf Archive. When you make and distribute a
|
||||
# modified version of the Autoconf Macro, you may extend this special
|
||||
# exception to the GPL to apply to your modified version as well.
|
||||
|
||||
#serial 8
|
||||
|
||||
AU_ALIAS([AC_PROG_JAVA_WORKS], [AX_PROG_JAVA_WORKS])
|
||||
AC_DEFUN([AX_PROG_JAVA_WORKS], [
|
||||
AC_PATH_PROG(UUDECODE, uudecode, [no])
|
||||
if test x$UUDECODE != xno; then
|
||||
AC_CACHE_CHECK([if uudecode can decode base 64 file], ac_cv_prog_uudecode_base64, [
|
||||
dnl /**
|
||||
dnl * Test.java: used to test if java compiler works.
|
||||
dnl */
|
||||
dnl public class Test
|
||||
dnl {
|
||||
dnl
|
||||
dnl public static void
|
||||
dnl main( String[] argv )
|
||||
dnl {
|
||||
dnl System.exit (0);
|
||||
dnl }
|
||||
dnl
|
||||
dnl }
|
||||
cat << \EOF > Test.uue
|
||||
begin-base64 644 Test.class
|
||||
yv66vgADAC0AFQcAAgEABFRlc3QHAAQBABBqYXZhL2xhbmcvT2JqZWN0AQAE
|
||||
bWFpbgEAFihbTGphdmEvbGFuZy9TdHJpbmc7KVYBAARDb2RlAQAPTGluZU51
|
||||
bWJlclRhYmxlDAAKAAsBAARleGl0AQAEKEkpVgoADQAJBwAOAQAQamF2YS9s
|
||||
YW5nL1N5c3RlbQEABjxpbml0PgEAAygpVgwADwAQCgADABEBAApTb3VyY2VG
|
||||
aWxlAQAJVGVzdC5qYXZhACEAAQADAAAAAAACAAkABQAGAAEABwAAACEAAQAB
|
||||
AAAABQO4AAyxAAAAAQAIAAAACgACAAAACgAEAAsAAQAPABAAAQAHAAAAIQAB
|
||||
AAEAAAAFKrcAErEAAAABAAgAAAAKAAIAAAAEAAQABAABABMAAAACABQ=
|
||||
====
|
||||
EOF
|
||||
if $UUDECODE Test.uue; then
|
||||
ac_cv_prog_uudecode_base64=yes
|
||||
else
|
||||
echo "configure: __oline__: uudecode had trouble decoding base 64 file 'Test.uue'" >&AS_MESSAGE_LOG_FD
|
||||
echo "configure: failed file was:" >&AS_MESSAGE_LOG_FD
|
||||
cat Test.uue >&AS_MESSAGE_LOG_FD
|
||||
ac_cv_prog_uudecode_base64=no
|
||||
fi
|
||||
rm -f Test.uue])
|
||||
fi
|
||||
if test x$ac_cv_prog_uudecode_base64 != xyes; then
|
||||
rm -f Test.class
|
||||
AC_MSG_WARN([I have to compile Test.class from scratch])
|
||||
if test x$ac_cv_prog_javac_works = xno; then
|
||||
AC_MSG_ERROR([Cannot compile java source. $JAVAC does not work properly])
|
||||
fi
|
||||
if test x$ac_cv_prog_javac_works = x; then
|
||||
AX_PROG_JAVAC
|
||||
fi
|
||||
fi
|
||||
AC_CACHE_CHECK(if $JAVA works, ac_cv_prog_java_works, [
|
||||
JAVA_TEST=Test.java
|
||||
CLASS_TEST=Test.class
|
||||
TEST=Test
|
||||
changequote(, )dnl
|
||||
cat << \EOF > $JAVA_TEST
|
||||
/* [#]line __oline__ "configure" */
|
||||
public class Test {
|
||||
public static void main (String args[]) {
|
||||
System.exit (0);
|
||||
} }
|
||||
EOF
|
||||
changequote([, ])dnl
|
||||
if test x$ac_cv_prog_uudecode_base64 != xyes; then
|
||||
if AC_TRY_COMMAND($JAVAC $JAVACFLAGS $JAVA_TEST) && test -s $CLASS_TEST; then
|
||||
:
|
||||
else
|
||||
echo "configure: failed program was:" >&AS_MESSAGE_LOG_FD
|
||||
cat $JAVA_TEST >&AS_MESSAGE_LOG_FD
|
||||
AC_MSG_ERROR(The Java compiler $JAVAC failed (see config.log, check the CLASSPATH?))
|
||||
fi
|
||||
fi
|
||||
if AC_TRY_COMMAND($JAVA $JAVAFLAGS $TEST) >/dev/null 2>&1; then
|
||||
ac_cv_prog_java_works=yes
|
||||
else
|
||||
echo "configure: failed program was:" >&AS_MESSAGE_LOG_FD
|
||||
cat $JAVA_TEST >&AS_MESSAGE_LOG_FD
|
||||
AC_MSG_ERROR(The Java VM $JAVA failed (see config.log, check the CLASSPATH?))
|
||||
fi
|
||||
rm -fr $JAVA_TEST $CLASS_TEST Test.uue
|
||||
])
|
||||
AC_PROVIDE([$0])dnl
|
||||
]
|
||||
)
|
@ -0,0 +1,52 @@
|
||||
# ===========================================================================
|
||||
# http://www.gnu.org/software/autoconf-archive/ax_prog_javadoc.html
|
||||
# ===========================================================================
|
||||
#
|
||||
# SYNOPSIS
|
||||
#
|
||||
# AX_PROG_JAVADOC
|
||||
#
|
||||
# DESCRIPTION
|
||||
#
|
||||
# AX_PROG_JAVADOC tests for an existing javadoc generator. It uses the
|
||||
# environment variable JAVADOC then tests in sequence various common
|
||||
# javadoc generator.
|
||||
#
|
||||
# If you want to force a specific compiler:
|
||||
#
|
||||
# - at the configure.in level, set JAVADOC=yourgenerator before calling
|
||||
# AX_PROG_JAVADOC
|
||||
#
|
||||
# - at the configure level, setenv JAVADOC
|
||||
#
|
||||
# You can use the JAVADOC variable in your Makefile.in, with @JAVADOC@.
|
||||
#
|
||||
# Note: This macro depends on the autoconf M4 macros for Java programs. It
|
||||
# is VERY IMPORTANT that you download that whole set, some macros depend
|
||||
# on other. Unfortunately, the autoconf archive does not support the
|
||||
# concept of set of macros, so I had to break it for submission.
|
||||
#
|
||||
# The general documentation of those macros, as well as the sample
|
||||
# configure.in, is included in the AX_PROG_JAVA macro.
|
||||
#
|
||||
# LICENSE
|
||||
#
|
||||
# Copyright (c) 2008 Egon Willighagen <e.willighagen@science.ru.nl>
|
||||
#
|
||||
# Copying and distribution of this file, with or without modification, are
|
||||
# permitted in any medium without royalty provided the copyright notice
|
||||
# and this notice are preserved. This file is offered as-is, without any
|
||||
# warranty.
|
||||
|
||||
#serial 7
|
||||
|
||||
AU_ALIAS([AC_PROG_JAVADOC], [AX_PROG_JAVADOC])
|
||||
AC_DEFUN([AX_PROG_JAVADOC],[
|
||||
if test "x$JAVAPREFIX" = x; then
|
||||
test "x$JAVADOC" = x && AC_CHECK_PROGS(JAVADOC, javadoc)
|
||||
else
|
||||
test "x$JAVADOC" = x && AC_CHECK_PROGS(JAVADOC, javadoc, $JAVAPREFIX)
|
||||
fi
|
||||
test "x$JAVADOC" = x && AC_MSG_ERROR([no acceptable javadoc generator found in \$PATH])
|
||||
AC_PROVIDE([$0])dnl
|
||||
])
|
@ -0,0 +1,43 @@
|
||||
# ===========================================================================
|
||||
# http://www.gnu.org/software/autoconf-archive/ax_prog_javah.html
|
||||
# ===========================================================================
|
||||
#
|
||||
# SYNOPSIS
|
||||
#
|
||||
# AX_PROG_JAVAH
|
||||
#
|
||||
# DESCRIPTION
|
||||
#
|
||||
# AX_PROG_JAVAH tests the availability of the javah header generator and
|
||||
# looks for the jni.h header file. If available, JAVAH is set to the full
|
||||
# path of javah and CPPFLAGS is updated accordingly.
|
||||
#
|
||||
# LICENSE
|
||||
#
|
||||
# Copyright (c) 2008 Luc Maisonobe <luc@spaceroots.org>
|
||||
#
|
||||
# Copying and distribution of this file, with or without modification, are
|
||||
# permitted in any medium without royalty provided the copyright notice
|
||||
# and this notice are preserved. This file is offered as-is, without any
|
||||
# warranty.
|
||||
|
||||
#serial 5
|
||||
|
||||
AU_ALIAS([AC_PROG_JAVAH], [AX_PROG_JAVAH])
|
||||
AC_DEFUN([AX_PROG_JAVAH],[
|
||||
AC_REQUIRE([AC_CANONICAL_SYSTEM])dnl
|
||||
AC_REQUIRE([AC_PROG_CPP])dnl
|
||||
AC_PATH_PROG(JAVAH,javah)
|
||||
if test x"`eval 'echo $ac_cv_path_JAVAH'`" != x ; then
|
||||
AC_TRY_CPP([#include <jni.h>],,[
|
||||
ac_save_CPPFLAGS="$CPPFLAGS"
|
||||
changequote(, )dnl
|
||||
ac_dir=`echo $ac_cv_path_JAVAH | sed 's,\(.*\)/[^/]*/[^/]*$,\1/include,'`
|
||||
ac_machdep=`echo $build_os | sed 's,[-0-9].*,,' | sed 's,cygwin,win32,'`
|
||||
changequote([, ])dnl
|
||||
CPPFLAGS="$ac_save_CPPFLAGS -I$ac_dir -I$ac_dir/$ac_machdep"
|
||||
AC_TRY_CPP([#include <jni.h>],
|
||||
ac_save_CPPFLAGS="$CPPFLAGS",
|
||||
AC_MSG_WARN([unable to include <jni.h>]))
|
||||
CPPFLAGS="$ac_save_CPPFLAGS"])
|
||||
fi])
|
@ -0,0 +1,55 @@
|
||||
# ===========================================================================
|
||||
# http://www.gnu.org/software/autoconf-archive/ax_try_compile_java.html
|
||||
# ===========================================================================
|
||||
#
|
||||
# SYNOPSIS
|
||||
#
|
||||
# AX_TRY_COMPILE_JAVA
|
||||
#
|
||||
# DESCRIPTION
|
||||
#
|
||||
# AX_TRY_COMPILE_JAVA attempt to compile user given source.
|
||||
#
|
||||
# *Warning*: its success or failure can depend on a proper setting of the
|
||||
# CLASSPATH env. variable.
|
||||
#
|
||||
# Note: This is part of the set of autoconf M4 macros for Java programs.
|
||||
# It is VERY IMPORTANT that you download the whole set, some macros depend
|
||||
# on other. Unfortunately, the autoconf archive does not support the
|
||||
# concept of set of macros, so I had to break it for submission. The
|
||||
# general documentation, as well as the sample configure.in, is included
|
||||
# in the AX_PROG_JAVA macro.
|
||||
#
|
||||
# LICENSE
|
||||
#
|
||||
# Copyright (c) 2008 Devin Weaver <ktohg@tritarget.com>
|
||||
#
|
||||
# Copying and distribution of this file, with or without modification, are
|
||||
# permitted in any medium without royalty provided the copyright notice
|
||||
# and this notice are preserved. This file is offered as-is, without any
|
||||
# warranty.
|
||||
|
||||
#serial 7
|
||||
|
||||
AU_ALIAS([AC_TRY_COMPILE_JAVA], [AX_TRY_COMPILE_JAVA])
|
||||
AC_DEFUN([AX_TRY_COMPILE_JAVA],[
|
||||
AC_REQUIRE([AX_PROG_JAVAC])dnl
|
||||
cat << \EOF > Test.java
|
||||
/* [#]line __oline__ "configure" */
|
||||
ifelse([$1], , , [import $1;])
|
||||
public class Test {
|
||||
[$2]
|
||||
}
|
||||
EOF
|
||||
if AC_TRY_COMMAND($JAVAC $JAVACFLAGS Test.java) && test -s Test.class
|
||||
then
|
||||
dnl Don't remove the temporary files here, so they can be examined.
|
||||
ifelse([$3], , :, [$3])
|
||||
else
|
||||
echo "configure: failed program was:" >&AS_MESSAGE_LOG_FD
|
||||
cat Test.java >&AS_MESSAGE_LOG_FD
|
||||
ifelse([$4], , , [ rm -fr Test*
|
||||
$4
|
||||
])dnl
|
||||
fi
|
||||
rm -fr Test*])
|
@ -0,0 +1,56 @@
|
||||
# ===========================================================================
|
||||
# http://www.gnu.org/software/autoconf-archive/ax_try_run_java.html
|
||||
# ===========================================================================
|
||||
#
|
||||
# SYNOPSIS
|
||||
#
|
||||
# AX_TRY_RUN_JAVA
|
||||
#
|
||||
# DESCRIPTION
|
||||
#
|
||||
# AX_TRY_RUN_JAVA attempt to compile and run user given source.
|
||||
#
|
||||
# *Warning*: its success or failure can depend on a proper setting of the
|
||||
# CLASSPATH env. variable.
|
||||
#
|
||||
# Note: This is part of the set of autoconf M4 macros for Java programs.
|
||||
# It is VERY IMPORTANT that you download the whole set, some macros depend
|
||||
# on other. Unfortunately, the autoconf archive does not support the
|
||||
# concept of set of macros, so I had to break it for submission. The
|
||||
# general documentation, as well as the sample configure.in, is included
|
||||
# in the AX_PROG_JAVA macro.
|
||||
#
|
||||
# LICENSE
|
||||
#
|
||||
# Copyright (c) 2008 Devin Weaver <ktohg@tritarget.com>
|
||||
#
|
||||
# Copying and distribution of this file, with or without modification, are
|
||||
# permitted in any medium without royalty provided the copyright notice
|
||||
# and this notice are preserved. This file is offered as-is, without any
|
||||
# warranty.
|
||||
|
||||
#serial 1
|
||||
|
||||
AU_ALIAS([AC_TRY_RUN_JAVA], [AX_TRY_RUN_JAVA])
|
||||
AC_DEFUN([AX_TRY_RUN_JAVA],[
|
||||
AC_REQUIRE([AX_PROG_JAVAC])dnl
|
||||
AC_REQUIRE([AX_PROG_JAVA])dnl
|
||||
cat << \EOF > Test.java
|
||||
/* [#]line __oline__ "configure" */
|
||||
ifelse([$1], , , [include $1;])
|
||||
public class Test {
|
||||
[$2]
|
||||
}
|
||||
EOF
|
||||
if AC_TRY_COMMAND($JAVAC $JAVACFLAGS Test.java) && test -s Test.class && ($JAVA $JAVAFLAGS Test; exit) 2>/dev/null
|
||||
then
|
||||
dnl Don't remove the temporary files here, so they can be examined.
|
||||
ifelse([$3], , :, [$3])
|
||||
else
|
||||
echo "configure: failed program was:" >&AS_MESSAGE_LOG_FD
|
||||
cat Test.java >&AS_MESSAGE_LOG_FD
|
||||
ifelse([$4], , , [ rm -fr Test*
|
||||
$4
|
||||
])dnl
|
||||
fi
|
||||
rm -fr Test*])
|
@ -0,0 +1,6 @@
|
||||
/Makefile
|
||||
/Makefile.in
|
||||
/.deps
|
||||
/.libs
|
||||
/*.la
|
||||
/*.lo
|
File diff suppressed because it is too large
Load Diff
@ -0,0 +1,21 @@
|
||||
Copyright (c) 2001, 2002, 2003 Ximian, Inc and the individuals listed
|
||||
on the ChangeLog entries.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining
|
||||
a copy of this software and associated documentation files (the
|
||||
"Software"), to deal in the Software without restriction, including
|
||||
without limitation the rights to use, copy, modify, merge, publish,
|
||||
distribute, sublicense, and/or sell copies of the Software, and to
|
||||
permit persons to whom the Software is furnished to do so, subject to
|
||||
the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be
|
||||
included in all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
|
||||
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
||||
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
||||
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
@ -0,0 +1,11 @@
|
||||
DIST_SUBDIRS = x86 ppc sparc arm arm64 s390x amd64 ia64 mips
|
||||
|
||||
AM_CPPFLAGS = $(GLIB_CFLAGS) -I$(top_srcdir)
|
||||
|
||||
if ARM
|
||||
# arm needs to build some stuff even in JIT mode
|
||||
SUBDIRS = $(arch_target)
|
||||
endif
|
||||
|
||||
EXTRA_DIST = ChangeLog
|
||||
|
@ -0,0 +1,7 @@
|
||||
mono_arch
|
||||
=========
|
||||
|
||||
Part of Mono project, https://github.com/mono
|
||||
|
||||
These are C macros that are useful when generating native code on various platforms.
|
||||
This code is MIT X11 licensed.
|
@ -0,0 +1 @@
|
||||
/arm-wmmx.h -crlf
|
@ -0,0 +1,15 @@
|
||||
/Makefile
|
||||
/Makefile.in
|
||||
/.deps
|
||||
/.libs
|
||||
/*.o
|
||||
/*.la
|
||||
/*.lo
|
||||
/*.lib
|
||||
/*.obj
|
||||
/*.exe
|
||||
/*.dll
|
||||
/arm_dpimacros.h
|
||||
/arm_fpamacros.h
|
||||
/arm_vfpmacros.h
|
||||
/fixeol.sh
|
@ -0,0 +1,27 @@
|
||||
|
||||
AM_CPPFLAGS = $(GLIB_CFLAGS) -I$(top_srcdir)
|
||||
|
||||
noinst_LTLIBRARIES = libmonoarch-arm.la
|
||||
|
||||
BUILT_SOURCES = arm_dpimacros.h arm_vfpmacros.h
|
||||
|
||||
|
||||
libmonoarch_arm_la_SOURCES = $(BUILT_SOURCES) \
|
||||
arm-codegen.c \
|
||||
arm-codegen.h \
|
||||
arm-dis.c \
|
||||
arm-dis.h
|
||||
|
||||
arm_dpimacros.h: dpiops.sh mov_macros.th dpi_macros.th cmp_macros.th
|
||||
(cd $(srcdir); bash ./dpiops.sh) > $@t
|
||||
mv $@t $@
|
||||
|
||||
arm_vfpmacros.h: vfpops.sh vfpm_macros.th vfp_macros.th
|
||||
(cd $(srcdir); bash ./vfpops.sh) > $@t
|
||||
mv $@t $@
|
||||
|
||||
CLEANFILES = $(BUILT_SOURCES)
|
||||
|
||||
EXTRA_DIST = dpiops.sh mov_macros.th dpi_macros.th cmp_macros.th \
|
||||
vfpm_macros.th vfp_macros.th arm-vfp-codegen.h vfpops.sh
|
||||
|
@ -0,0 +1,193 @@
|
||||
/*
|
||||
* arm-codegen.c
|
||||
* Copyright (c) 2002 Sergey Chaban <serge@wildwestsoftware.com>
|
||||
*/
|
||||
|
||||
#include "arm-codegen.h"
|
||||
|
||||
|
||||
arminstr_t* arm_emit_std_prologue(arminstr_t* p, unsigned int local_size) {
|
||||
ARM_MOV_REG_REG(p, ARMREG_IP, ARMREG_SP);
|
||||
|
||||
/* save args */
|
||||
ARM_PUSH(p, (1 << ARMREG_A1)
|
||||
| (1 << ARMREG_A2)
|
||||
| (1 << ARMREG_A3)
|
||||
| (1 << ARMREG_A4));
|
||||
|
||||
ARM_PUSH(p, (1U << ARMREG_IP) | (1U << ARMREG_LR));
|
||||
|
||||
if (local_size != 0) {
|
||||
if ((local_size & (~0xFF)) == 0) {
|
||||
ARM_SUB_REG_IMM8(p, ARMREG_SP, ARMREG_SP, local_size);
|
||||
} else {
|
||||
/* TODO: optimize */
|
||||
p = arm_mov_reg_imm32(p, ARMREG_IP, local_size);
|
||||
ARM_SUB_REG_REG(p, ARMREG_SP, ARMREG_SP, ARMREG_IP);
|
||||
ARM_ADD_REG_IMM8(p, ARMREG_IP, ARMREG_IP, sizeof(armword_t));
|
||||
ARM_LDR_REG_REG(p, ARMREG_IP, ARMREG_SP, ARMREG_IP);
|
||||
}
|
||||
}
|
||||
|
||||
return p;
|
||||
}
|
||||
|
||||
arminstr_t* arm_emit_std_epilogue(arminstr_t* p, unsigned int local_size, int pop_regs) {
|
||||
if (local_size != 0) {
|
||||
if ((local_size & (~0xFF)) == 0) {
|
||||
ARM_ADD_REG_IMM8(p, ARMREG_SP, ARMREG_SP, local_size);
|
||||
} else {
|
||||
/* TODO: optimize */
|
||||
p = arm_mov_reg_imm32(p, ARMREG_IP, local_size);
|
||||
ARM_ADD_REG_REG(p, ARMREG_SP, ARMREG_SP, ARMREG_IP);
|
||||
}
|
||||
}
|
||||
|
||||
ARM_POP_NWB(p, (1 << ARMREG_SP) | (1 << ARMREG_PC) | (pop_regs & 0x3FF));
|
||||
|
||||
return p;
|
||||
}
|
||||
|
||||
|
||||
/* do not push A1-A4 */
|
||||
arminstr_t* arm_emit_lean_prologue(arminstr_t* p, unsigned int local_size, int push_regs) {
|
||||
ARM_MOV_REG_REG(p, ARMREG_IP, ARMREG_SP);
|
||||
/* push_regs upto R10 will be saved */
|
||||
ARM_PUSH(p, (1U << ARMREG_IP) | (1U << ARMREG_LR) | (push_regs & 0x3FF));
|
||||
|
||||
if (local_size != 0) {
|
||||
if ((local_size & (~0xFF)) == 0) {
|
||||
ARM_SUB_REG_IMM8(p, ARMREG_SP, ARMREG_SP, local_size);
|
||||
} else {
|
||||
/* TODO: optimize */
|
||||
p = arm_mov_reg_imm32(p, ARMREG_IP, local_size);
|
||||
ARM_SUB_REG_REG(p, ARMREG_SP, ARMREG_SP, ARMREG_IP);
|
||||
/* restore IP from stack */
|
||||
ARM_ADD_REG_IMM8(p, ARMREG_IP, ARMREG_IP, sizeof(armword_t));
|
||||
ARM_LDR_REG_REG(p, ARMREG_IP, ARMREG_SP, ARMREG_IP);
|
||||
}
|
||||
}
|
||||
|
||||
return p;
|
||||
}
|
||||
|
||||
/* Bit scan forward. */
|
||||
int arm_bsf(armword_t val) {
|
||||
int i;
|
||||
armword_t mask;
|
||||
|
||||
if (val == 0) return 0;
|
||||
for (i=1, mask=1; (i <= 8 * sizeof(armword_t)) && ((val & mask) == 0); ++i, mask<<=1);
|
||||
|
||||
return i;
|
||||
}
|
||||
|
||||
|
||||
int arm_is_power_of_2(armword_t val) {
|
||||
return ((val & (val-1)) == 0);
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* returns:
|
||||
* 1 - unable to represent
|
||||
* positive even number - MOV-representable
|
||||
* negative even number - MVN-representable
|
||||
*/
|
||||
int calc_arm_mov_const_shift(armword_t val) {
|
||||
armword_t mask;
|
||||
int res = 1, shift;
|
||||
|
||||
for (shift=0; shift < 32; shift+=2) {
|
||||
mask = ARM_SCALE(0xFF, shift);
|
||||
if ((val & (~mask)) == 0) {
|
||||
res = shift;
|
||||
break;
|
||||
}
|
||||
if (((~val) & (~mask)) == 0) {
|
||||
res = -shift - 2;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
return res;
|
||||
}
|
||||
|
||||
|
||||
int is_arm_const(armword_t val) {
|
||||
int res;
|
||||
res = arm_is_power_of_2(val);
|
||||
if (!res) {
|
||||
res = calc_arm_mov_const_shift(val);
|
||||
res = !(res < 0 || res == 1);
|
||||
}
|
||||
return res;
|
||||
}
|
||||
|
||||
|
||||
int arm_const_steps(armword_t val) {
|
||||
int shift, steps = 0;
|
||||
|
||||
while (val != 0) {
|
||||
shift = (arm_bsf(val) - 1) & (~1);
|
||||
val &= ~(0xFF << shift);
|
||||
++steps;
|
||||
}
|
||||
return steps;
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* ARM cannot load arbitrary 32-bit constants directly into registers;
|
||||
* widely used work-around for this is to store constants into a
|
||||
* PC-addressable pool and use LDR instruction with PC-relative address
|
||||
* to load constant into register. Easiest way to implement this is to
|
||||
* embed constant inside a function with unconditional branch around it.
|
||||
* The above method is not used at the moment.
|
||||
* This routine always emits sequence of instructions to generate
|
||||
* requested constant. In the worst case it takes 4 instructions to
|
||||
* synthesize a constant - 1 MOV and 3 subsequent ORRs.
|
||||
*/
|
||||
arminstr_t* arm_mov_reg_imm32_cond(arminstr_t* p, int reg, armword_t imm32, int cond) {
|
||||
int mov_op;
|
||||
int step_op;
|
||||
int snip;
|
||||
int shift = calc_arm_mov_const_shift(imm32);
|
||||
|
||||
if ((shift & 0x80000001) != 1) {
|
||||
if (shift >= 0) {
|
||||
ARM_MOV_REG_IMM_COND(p, reg, imm32 >> ((32 - shift) & 31), shift, cond);
|
||||
} else {
|
||||
ARM_MVN_REG_IMM_COND(p, reg, (imm32 ^ (~0)) >> ((32 + 2 + shift) & 31), (-shift - 2), cond);
|
||||
}
|
||||
} else {
|
||||
mov_op = ARMOP_MOV;
|
||||
step_op = ARMOP_ORR;
|
||||
|
||||
if (arm_const_steps(imm32) > arm_const_steps(~imm32)) {
|
||||
mov_op = ARMOP_MVN;
|
||||
step_op = ARMOP_SUB;
|
||||
imm32 = ~imm32;
|
||||
}
|
||||
|
||||
shift = (arm_bsf(imm32) - 1) & (~1);
|
||||
snip = imm32 & (0xFF << shift);
|
||||
ARM_EMIT(p, ARM_DEF_DPI_IMM_COND((unsigned)snip >> shift, (32 - shift) >> 1, reg, 0, 0, mov_op, cond));
|
||||
|
||||
while ((imm32 ^= snip) != 0) {
|
||||
shift = (arm_bsf(imm32) - 1) & (~1);
|
||||
snip = imm32 & (0xFF << shift);
|
||||
ARM_EMIT(p, ARM_DEF_DPI_IMM_COND((unsigned)snip >> shift, (32 - shift) >> 1, reg, reg, 0, step_op, cond));
|
||||
}
|
||||
}
|
||||
|
||||
return p;
|
||||
}
|
||||
|
||||
|
||||
arminstr_t* arm_mov_reg_imm32(arminstr_t* p, int reg, armword_t imm32) {
|
||||
return arm_mov_reg_imm32_cond(p, reg, imm32, ARMCOND_AL);
|
||||
}
|
||||
|
||||
|
||||
|
File diff suppressed because it is too large
Load Diff
@ -0,0 +1,509 @@
|
||||
/*
|
||||
* Copyright (c) 2002 Sergey Chaban <serge@wildwestsoftware.com>
|
||||
*/
|
||||
|
||||
|
||||
#include <stdarg.h>
|
||||
|
||||
#include "arm-dis.h"
|
||||
#include "arm-codegen.h"
|
||||
|
||||
|
||||
static ARMDis* gdisasm = NULL;
|
||||
|
||||
static int use_reg_alias = 1;
|
||||
|
||||
const static char* cond[] = {
|
||||
"eq", "ne", "cs", "cc", "mi", "pl", "vs", "vc",
|
||||
"hi", "ls", "ge", "lt", "gt", "le", "", "nv"
|
||||
};
|
||||
|
||||
const static char* ops[] = {
|
||||
"and", "eor", "sub", "rsb", "add", "adc", "sbc", "rsc",
|
||||
"tst", "teq", "cmp", "cmn", "orr", "mov", "bic", "mvn"
|
||||
};
|
||||
|
||||
const static char* shift_types[] = {"lsl", "lsr", "asr", "ror"};
|
||||
|
||||
const static char* mul_ops[] = {
|
||||
"mul", "mla", "?", "?", "umull", "umlal", "smull", "smlal"
|
||||
};
|
||||
|
||||
const static char* reg_alias[] = {
|
||||
"a1", "a2", "a3", "a4",
|
||||
"r4", "r5", "r6", "r7", "r8", "r9", "r10",
|
||||
"fp", "ip", "sp", "lr", "pc"
|
||||
};
|
||||
|
||||
const static char* msr_fld[] = {"f", "c", "x", "?", "s"};
|
||||
|
||||
|
||||
/* private functions prototypes (to keep compiler happy) */
|
||||
void chk_out(ARMDis* dis);
|
||||
void dump_reg(ARMDis* dis, int reg);
|
||||
void dump_creg(ARMDis* dis, int creg);
|
||||
void dump_reglist(ARMDis* dis, int reg_list);
|
||||
void init_gdisasm(void);
|
||||
|
||||
void dump_br(ARMDis* dis, ARMInstr i);
|
||||
void dump_cdp(ARMDis* dis, ARMInstr i);
|
||||
void dump_cdt(ARMDis* dis, ARMInstr i);
|
||||
void dump_crt(ARMDis* dis, ARMInstr i);
|
||||
void dump_dpi(ARMDis* dis, ARMInstr i);
|
||||
void dump_hxfer(ARMDis* dis, ARMInstr i);
|
||||
void dump_mrs(ARMDis* dis, ARMInstr i);
|
||||
void dump_mrt(ARMDis* dis, ARMInstr i);
|
||||
void dump_msr(ARMDis* dis, ARMInstr i);
|
||||
void dump_mul(ARMDis* dis, ARMInstr i);
|
||||
void dump_swi(ARMDis* dis, ARMInstr i);
|
||||
void dump_swp(ARMDis* dis, ARMInstr i);
|
||||
void dump_wxfer(ARMDis* dis, ARMInstr i);
|
||||
void dump_clz(ARMDis* dis, ARMInstr i);
|
||||
|
||||
|
||||
/*
|
||||
void out(ARMDis* dis, const char* format, ...) {
|
||||
va_list arglist;
|
||||
va_start(arglist, format);
|
||||
fprintf(dis->dis_out, format, arglist);
|
||||
va_end(arglist);
|
||||
}
|
||||
*/
|
||||
|
||||
|
||||
void chk_out(ARMDis* dis) {
|
||||
if (dis != NULL && dis->dis_out == NULL) dis->dis_out = stdout;
|
||||
}
|
||||
|
||||
|
||||
void armdis_set_output(ARMDis* dis, FILE* f) {
|
||||
if (dis != NULL) {
|
||||
dis->dis_out = f;
|
||||
chk_out(dis);
|
||||
}
|
||||
}
|
||||
|
||||
FILE* armdis_get_output(ARMDis* dis) {
|
||||
return (dis != NULL ? dis->dis_out : NULL);
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
void dump_reg(ARMDis* dis, int reg) {
|
||||
reg &= 0xF;
|
||||
if (!use_reg_alias || (reg > 3 && reg < 11)) {
|
||||
fprintf(dis->dis_out, "r%d", reg);
|
||||
} else {
|
||||
fprintf(dis->dis_out, "%s", reg_alias[reg]);
|
||||
}
|
||||
}
|
||||
|
||||
void dump_creg(ARMDis* dis, int creg) {
|
||||
if (dis != NULL) {
|
||||
creg &= 0xF;
|
||||
fprintf(dis->dis_out, "c%d", creg);
|
||||
}
|
||||
}
|
||||
|
||||
void dump_reglist(ARMDis* dis, int reg_list) {
|
||||
int i = 0, j, n = 0;
|
||||
int m1 = 1, m2, rn;
|
||||
while (i < 16) {
|
||||
if ((reg_list & m1) != 0) {
|
||||
if (n != 0) fprintf(dis->dis_out, ", ");
|
||||
n++;
|
||||
dump_reg(dis, i);
|
||||
for (j = i+1, rn = 0, m2 = m1<<1; j < 16; ++j, m2<<=1) {
|
||||
if ((reg_list & m2) != 0) ++rn;
|
||||
else break;
|
||||
}
|
||||
i+=rn;
|
||||
if (rn > 1) {
|
||||
fprintf(dis->dis_out, "-");
|
||||
dump_reg(dis, i);
|
||||
} else if (rn == 1) {
|
||||
fprintf(dis->dis_out, ", ");
|
||||
dump_reg(dis, i);
|
||||
}
|
||||
m1<<=(rn+1);
|
||||
i++;
|
||||
} else {
|
||||
++i;
|
||||
m1<<=1;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
void dump_br(ARMDis* dis, ARMInstr i) {
|
||||
fprintf(dis->dis_out, "b%s%s\t%x\t; %p -> %#x",
|
||||
(i.br.link == 1) ? "l" : "",
|
||||
cond[i.br.cond], i.br.offset, dis->pi, (int)dis->pi + 4*2 + ((int)(i.br.offset << 8) >> 6));
|
||||
}
|
||||
|
||||
|
||||
void dump_dpi(ARMDis* dis, ARMInstr i) {
|
||||
fprintf(dis->dis_out, "%s%s", ops[i.dpi.all.opcode], cond[i.dpi.all.cond]);
|
||||
|
||||
if ((i.dpi.all.opcode < ARMOP_TST || i.dpi.all.opcode > ARMOP_CMN) && (i.dpi.all.s != 0)) {
|
||||
fprintf(dis->dis_out, "s");
|
||||
}
|
||||
|
||||
fprintf(dis->dis_out, "\t");
|
||||
|
||||
if ((i.dpi.all.opcode < ARMOP_TST) || (i.dpi.all.opcode > ARMOP_CMN)) {
|
||||
/* for comparison operations Rd is ignored */
|
||||
dump_reg(dis, i.dpi.all.rd);
|
||||
fprintf(dis->dis_out, ", ");
|
||||
}
|
||||
|
||||
if ((i.dpi.all.opcode != ARMOP_MOV) && (i.dpi.all.opcode != ARMOP_MVN)) {
|
||||
/* for MOV/MVN Rn is ignored */
|
||||
dump_reg(dis, i.dpi.all.rn);
|
||||
fprintf(dis->dis_out, ", ");
|
||||
}
|
||||
|
||||
if (i.dpi.all.type == 1) {
|
||||
/* immediate */
|
||||
if (i.dpi.op2_imm.rot != 0) {
|
||||
fprintf(dis->dis_out, "#%d, %d\t; 0x%x", i.dpi.op2_imm.imm, i.dpi.op2_imm.rot << 1,
|
||||
ARM_SCALE(i.dpi.op2_imm.imm, (i.dpi.op2_imm.rot << 1)) );
|
||||
} else {
|
||||
fprintf(dis->dis_out, "#%d\t; 0x%x", i.dpi.op2_imm.imm, i.dpi.op2_imm.imm);
|
||||
}
|
||||
} else {
|
||||
/* reg-reg */
|
||||
if (i.dpi.op2_reg.tag == 0) {
|
||||
/* op2 is reg shift by imm */
|
||||
dump_reg(dis, i.dpi.op2_reg_imm.r2.rm);
|
||||
if (i.dpi.op2_reg_imm.imm.shift != 0) {
|
||||
fprintf(dis->dis_out, " %s #%d", shift_types[i.dpi.op2_reg_imm.r2.type], i.dpi.op2_reg_imm.imm.shift);
|
||||
}
|
||||
} else {
|
||||
/* op2 is reg shift by reg */
|
||||
dump_reg(dis, i.dpi.op2_reg_reg.r2.rm);
|
||||
fprintf(dis->dis_out, " %s ", shift_types[i.dpi.op2_reg_reg.r2.type]);
|
||||
dump_reg(dis, i.dpi.op2_reg_reg.reg.rs);
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
void dump_wxfer(ARMDis* dis, ARMInstr i) {
|
||||
fprintf(dis->dis_out, "%s%s%s%s\t",
|
||||
(i.wxfer.all.ls == 0) ? "str" : "ldr",
|
||||
cond[i.generic.cond],
|
||||
(i.wxfer.all.b == 0) ? "" : "b",
|
||||
(i.wxfer.all.ls != 0 && i.wxfer.all.wb != 0) ? "t" : "");
|
||||
dump_reg(dis, i.wxfer.all.rd);
|
||||
fprintf(dis->dis_out, ", [");
|
||||
dump_reg(dis, i.wxfer.all.rn);
|
||||
fprintf(dis->dis_out, "%s, ", (i.wxfer.all.p == 0) ? "]" : "");
|
||||
|
||||
if (i.wxfer.all.type == 0) { /* imm */
|
||||
fprintf(dis->dis_out, "#%s%d", (i.wxfer.all.u == 0) ? "-" : "", i.wxfer.all.op2_imm);
|
||||
} else {
|
||||
dump_reg(dis, i.wxfer.op2_reg_imm.r2.rm);
|
||||
if (i.wxfer.op2_reg_imm.imm.shift != 0) {
|
||||
fprintf(dis->dis_out, " %s #%d", shift_types[i.wxfer.op2_reg_imm.r2.type], i.wxfer.op2_reg_imm.imm.shift);
|
||||
}
|
||||
}
|
||||
|
||||
if (i.wxfer.all.p != 0) {
|
||||
/* close pre-index instr, also check for write-back */
|
||||
fprintf(dis->dis_out, "]%s", (i.wxfer.all.wb != 0) ? "!" : "");
|
||||
}
|
||||
}
|
||||
|
||||
void dump_hxfer(ARMDis* dis, ARMInstr i) {
|
||||
fprintf(dis->dis_out, "%s%s%s%s\t",
|
||||
(i.hxfer.ls == 0) ? "str" : "ldr",
|
||||
cond[i.generic.cond],
|
||||
(i.hxfer.s != 0) ? "s" : "",
|
||||
(i.hxfer.h != 0) ? "h" : "b");
|
||||
dump_reg(dis, i.hxfer.rd);
|
||||
fprintf(dis->dis_out, ", [");
|
||||
dump_reg(dis, i.hxfer.rn);
|
||||
fprintf(dis->dis_out, "%s, ", (i.hxfer.p == 0) ? "]" : "");
|
||||
|
||||
if (i.hxfer.type != 0) { /* imm */
|
||||
fprintf(dis->dis_out, "#%s%d", (i.hxfer.u == 0) ? "-" : "", (i.hxfer.imm_hi << 4) | i.hxfer.rm);
|
||||
} else {
|
||||
dump_reg(dis, i.hxfer.rm);
|
||||
}
|
||||
|
||||
if (i.hxfer.p != 0) {
|
||||
/* close pre-index instr, also check for write-back */
|
||||
fprintf(dis->dis_out, "]%s", (i.hxfer.wb != 0) ? "!" : "");
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
void dump_mrt(ARMDis* dis, ARMInstr i) {
|
||||
fprintf(dis->dis_out, "%s%s%s%s\t", (i.mrt.ls == 0) ? "stm" : "ldm", cond[i.mrt.cond],
|
||||
(i.mrt.u == 0) ? "d" : "i", (i.mrt.p == 0) ? "a" : "b");
|
||||
dump_reg(dis, i.mrt.rn);
|
||||
fprintf(dis->dis_out, "%s, {", (i.mrt.wb != 0) ? "!" : "");
|
||||
dump_reglist(dis, i.mrt.reg_list);
|
||||
fprintf(dis->dis_out, "}");
|
||||
}
|
||||
|
||||
|
||||
void dump_swp(ARMDis* dis, ARMInstr i) {
|
||||
fprintf(dis->dis_out, "swp%s%s ", cond[i.swp.cond], (i.swp.b != 0) ? "b" : "");
|
||||
dump_reg(dis, i.swp.rd);
|
||||
fprintf(dis->dis_out, ", ");
|
||||
dump_reg(dis, i.swp.rm);
|
||||
fprintf(dis->dis_out, ", [");
|
||||
dump_reg(dis, i.swp.rn);
|
||||
fprintf(dis->dis_out, "]");
|
||||
}
|
||||
|
||||
|
||||
void dump_mul(ARMDis* dis, ARMInstr i) {
|
||||
fprintf(dis->dis_out, "%s%s%s\t", mul_ops[i.mul.opcode], cond[i.mul.cond], (i.mul.s != 0) ? "s" : "");
|
||||
switch (i.mul.opcode) {
|
||||
case ARMOP_MUL:
|
||||
dump_reg(dis, i.mul.rd);
|
||||
fprintf(dis->dis_out, ", ");
|
||||
dump_reg(dis, i.mul.rm);
|
||||
fprintf(dis->dis_out, ", ");
|
||||
dump_reg(dis, i.mul.rs);
|
||||
break;
|
||||
case ARMOP_MLA:
|
||||
dump_reg(dis, i.mul.rd);
|
||||
fprintf(dis->dis_out, ", ");
|
||||
dump_reg(dis, i.mul.rm);
|
||||
fprintf(dis->dis_out, ", ");
|
||||
dump_reg(dis, i.mul.rs);
|
||||
fprintf(dis->dis_out, ", ");
|
||||
dump_reg(dis, i.mul.rn);
|
||||
break;
|
||||
case ARMOP_UMULL:
|
||||
case ARMOP_UMLAL:
|
||||
case ARMOP_SMULL:
|
||||
case ARMOP_SMLAL:
|
||||
dump_reg(dis, i.mul.rd);
|
||||
fprintf(dis->dis_out, ", ");
|
||||
dump_reg(dis, i.mul.rn);
|
||||
fprintf(dis->dis_out, ", ");
|
||||
dump_reg(dis, i.mul.rm);
|
||||
fprintf(dis->dis_out, ", ");
|
||||
dump_reg(dis, i.mul.rs);
|
||||
break;
|
||||
default:
|
||||
fprintf(dis->dis_out, "DCD 0x%x\t; <unknown>", i.raw);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
void dump_cdp(ARMDis* dis, ARMInstr i) {
|
||||
fprintf(dis->dis_out, "cdp%s\tp%d, %d, ", cond[i.generic.cond], i.cdp.cpn, i.cdp.op);
|
||||
dump_creg(dis, i.cdp.crd);
|
||||
fprintf(dis->dis_out, ", ");
|
||||
dump_creg(dis, i.cdp.crn);
|
||||
fprintf(dis->dis_out, ", ");
|
||||
dump_creg(dis, i.cdp.crm);
|
||||
|
||||
if (i.cdp.op2 != 0) {
|
||||
fprintf(dis->dis_out, ", %d", i.cdp.op2);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
void dump_cdt(ARMDis* dis, ARMInstr i) {
|
||||
fprintf(dis->dis_out, "%s%s%s\tp%d, ", (i.cdt.ls == 0) ? "stc" : "ldc",
|
||||
cond[i.generic.cond], (i.cdt.n != 0) ? "l" : "", i.cdt.cpn);
|
||||
dump_creg(dis, i.cdt.crd);
|
||||
fprintf(dis->dis_out, ", ");
|
||||
dump_reg(dis, i.cdt.rn);
|
||||
|
||||
if (i.cdt.p == 0) {
|
||||
fprintf(dis->dis_out, "]");
|
||||
}
|
||||
|
||||
if (i.cdt.offs != 0) {
|
||||
fprintf(dis->dis_out, ", #%d", i.cdt.offs);
|
||||
}
|
||||
|
||||
if (i.cdt.p != 0) {
|
||||
fprintf(dis->dis_out, "]%s", (i.cdt.wb != 0) ? "!" : "");
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
void dump_crt(ARMDis* dis, ARMInstr i) {
|
||||
fprintf(dis->dis_out, "%s%s\tp%d, %d, ", (i.crt.ls == 0) ? "mrc" : "mcr",
|
||||
cond[i.generic.cond], i.crt.cpn, i.crt.op1);
|
||||
dump_reg(dis, i.crt.rd);
|
||||
fprintf(dis->dis_out, ", ");
|
||||
dump_creg(dis, i.crt.crn);
|
||||
fprintf(dis->dis_out, ", ");
|
||||
dump_creg(dis, i.crt.crm);
|
||||
|
||||
if (i.crt.op2 != 0) {
|
||||
fprintf(dis->dis_out, ", %d", i.crt.op2);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
void dump_msr(ARMDis* dis, ARMInstr i) {
|
||||
fprintf(dis->dis_out, "msr%s\t%spsr_, ", cond[i.generic.cond],
|
||||
(i.msr.all.sel == 0) ? "s" : "c");
|
||||
if (i.msr.all.type == 0) {
|
||||
/* reg */
|
||||
fprintf(dis->dis_out, "%s, ", msr_fld[i.msr.all.fld]);
|
||||
dump_reg(dis, i.msr.all.rm);
|
||||
} else {
|
||||
/* imm */
|
||||
fprintf(dis->dis_out, "f, #%d", i.msr.op2_imm.imm << i.msr.op2_imm.rot);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
void dump_mrs(ARMDis* dis, ARMInstr i) {
|
||||
fprintf(dis->dis_out, "mrs%s\t", cond[i.generic.cond]);
|
||||
dump_reg(dis, i.mrs.rd);
|
||||
fprintf(dis->dis_out, ", %spsr", (i.mrs.sel == 0) ? "s" : "c");
|
||||
}
|
||||
|
||||
|
||||
void dump_swi(ARMDis* dis, ARMInstr i) {
|
||||
fprintf(dis->dis_out, "swi%s\t%d", cond[i.generic.cond], i.swi.num);
|
||||
}
|
||||
|
||||
|
||||
void dump_clz(ARMDis* dis, ARMInstr i) {
|
||||
fprintf(dis->dis_out, "clz\t");
|
||||
dump_reg(dis, i.clz.rd);
|
||||
fprintf(dis->dis_out, ", ");
|
||||
dump_reg(dis, i.clz.rm);
|
||||
fprintf(dis->dis_out, "\n");
|
||||
}
|
||||
|
||||
|
||||
|
||||
void armdis_decode(ARMDis* dis, void* p, int size) {
|
||||
int i;
|
||||
arminstr_t* pi = (arminstr_t*)p;
|
||||
ARMInstr instr;
|
||||
|
||||
if (dis == NULL) return;
|
||||
|
||||
chk_out(dis);
|
||||
|
||||
size/=sizeof(arminstr_t);
|
||||
|
||||
for (i=0; i<size; ++i) {
|
||||
fprintf(dis->dis_out, "%p:\t%08x\t", pi, *pi);
|
||||
dis->pi = pi;
|
||||
instr.raw = *pi++;
|
||||
|
||||
if ((instr.raw & ARM_BR_MASK) == ARM_BR_TAG) {
|
||||
dump_br(dis, instr);
|
||||
} else if ((instr.raw & ARM_SWP_MASK) == ARM_SWP_TAG) {
|
||||
dump_swp(dis, instr);
|
||||
} else if ((instr.raw & ARM_MUL_MASK) == ARM_MUL_TAG) {
|
||||
dump_mul(dis, instr);
|
||||
} else if ((instr.raw & ARM_CLZ_MASK) == ARM_CLZ_TAG) {
|
||||
dump_clz(dis, instr);
|
||||
} else if ((instr.raw & ARM_WXFER_MASK) == ARM_WXFER_TAG) {
|
||||
dump_wxfer(dis, instr);
|
||||
} else if ((instr.raw & ARM_HXFER_MASK) == ARM_HXFER_TAG) {
|
||||
dump_hxfer(dis, instr);
|
||||
} else if ((instr.raw & ARM_DPI_MASK) == ARM_DPI_TAG) {
|
||||
dump_dpi(dis, instr);
|
||||
} else if ((instr.raw & ARM_MRT_MASK) == ARM_MRT_TAG) {
|
||||
dump_mrt(dis, instr);
|
||||
} else if ((instr.raw & ARM_CDP_MASK) == ARM_CDP_TAG) {
|
||||
dump_cdp(dis, instr);
|
||||
} else if ((instr.raw & ARM_CDT_MASK) == ARM_CDT_TAG) {
|
||||
dump_cdt(dis, instr);
|
||||
} else if ((instr.raw & ARM_CRT_MASK) == ARM_CRT_TAG) {
|
||||
dump_crt(dis, instr);
|
||||
} else if ((instr.raw & ARM_MSR_MASK) == ARM_MSR_TAG) {
|
||||
dump_msr(dis, instr);
|
||||
} else if ((instr.raw & ARM_MRS_MASK) == ARM_MRS_TAG) {
|
||||
dump_mrs(dis, instr);
|
||||
} else if ((instr.raw & ARM_SWI_MASK) == ARM_SWI_TAG) {
|
||||
dump_swi(dis, instr);
|
||||
} else {
|
||||
fprintf(dis->dis_out, "DCD 0x%x\t; <unknown>", instr.raw);
|
||||
}
|
||||
|
||||
fprintf(dis->dis_out, "\n");
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
void armdis_open(ARMDis* dis, const char* dump_name) {
|
||||
if (dis != NULL && dump_name != NULL) {
|
||||
armdis_set_output(dis, fopen(dump_name, "w"));
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
void armdis_close(ARMDis* dis) {
|
||||
if (dis->dis_out != NULL && dis->dis_out != stdout && dis->dis_out != stderr) {
|
||||
fclose(dis->dis_out);
|
||||
dis->dis_out = NULL;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
void armdis_dump(ARMDis* dis, const char* dump_name, void* p, int size) {
|
||||
armdis_open(dis, dump_name);
|
||||
armdis_decode(dis, p, size);
|
||||
armdis_close(dis);
|
||||
}
|
||||
|
||||
|
||||
void armdis_init(ARMDis* dis) {
|
||||
if (dis != NULL) {
|
||||
/* set to stdout */
|
||||
armdis_set_output(dis, NULL);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
void init_gdisasm() {
|
||||
if (gdisasm == NULL) {
|
||||
gdisasm = (ARMDis*)malloc(sizeof(ARMDis));
|
||||
armdis_init(gdisasm);
|
||||
}
|
||||
}
|
||||
|
||||
void _armdis_set_output(FILE* f) {
|
||||
init_gdisasm();
|
||||
armdis_set_output(gdisasm, f);
|
||||
}
|
||||
|
||||
FILE* _armdis_get_output() {
|
||||
init_gdisasm();
|
||||
return armdis_get_output(gdisasm);
|
||||
}
|
||||
|
||||
void _armdis_decode(void* p, int size) {
|
||||
init_gdisasm();
|
||||
armdis_decode(gdisasm, p, size);
|
||||
}
|
||||
|
||||
void _armdis_open(const char* dump_name) {
|
||||
init_gdisasm();
|
||||
armdis_open(gdisasm, dump_name);
|
||||
}
|
||||
|
||||
void _armdis_close() {
|
||||
init_gdisasm();
|
||||
armdis_close(gdisasm);
|
||||
}
|
||||
|
||||
void _armdis_dump(const char* dump_name, void* p, int size) {
|
||||
init_gdisasm();
|
||||
armdis_dump(gdisasm, dump_name, p, size);
|
||||
}
|
||||
|
@ -0,0 +1,41 @@
|
||||
/*
|
||||
* Copyright (c) 2002 Sergey Chaban <serge@wildwestsoftware.com>
|
||||
*/
|
||||
|
||||
#ifndef ARM_DIS
|
||||
#define ARM_DIS
|
||||
|
||||
#include <stdlib.h>
|
||||
#include <stdio.h>
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
typedef struct _ARMDis {
|
||||
FILE* dis_out;
|
||||
void* pi;
|
||||
} ARMDis;
|
||||
|
||||
|
||||
void _armdis_set_output(FILE* f);
|
||||
FILE* _armdis_get_output(void);
|
||||
void _armdis_decode(void* p, int size);
|
||||
void _armdis_open(const char* dump_name);
|
||||
void _armdis_close(void);
|
||||
void _armdis_dump(const char* dump_name, void* p, int size);
|
||||
|
||||
|
||||
void armdis_init(ARMDis* dis);
|
||||
void armdis_set_output(ARMDis* dis, FILE* f);
|
||||
FILE* armdis_get_output(ARMDis* dis);
|
||||
void armdis_decode(ARMDis* dis, void* p, int size);
|
||||
void armdis_open(ARMDis* dis, const char* dump_name);
|
||||
void armdis_close(ARMDis* dis);
|
||||
void armdis_dump(ARMDis* dis, const char* dump_name, void* p, int size);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif /* ARM_DIS */
|
@ -0,0 +1,247 @@
|
||||
//
|
||||
// Copyright 2011 Xamarin Inc
|
||||
//
|
||||
|
||||
#ifndef __MONO_ARM_VFP_CODEGEN_H__
|
||||
#define __MONO_ARM_VFP_CODEGEN_H__
|
||||
|
||||
#include "arm-codegen.h"
|
||||
|
||||
enum {
|
||||
/* VFP registers */
|
||||
ARM_VFP_F0,
|
||||
ARM_VFP_F1,
|
||||
ARM_VFP_F2,
|
||||
ARM_VFP_F3,
|
||||
ARM_VFP_F4,
|
||||
ARM_VFP_F5,
|
||||
ARM_VFP_F6,
|
||||
ARM_VFP_F7,
|
||||
ARM_VFP_F8,
|
||||
ARM_VFP_F9,
|
||||
ARM_VFP_F10,
|
||||
ARM_VFP_F11,
|
||||
ARM_VFP_F12,
|
||||
ARM_VFP_F13,
|
||||
ARM_VFP_F14,
|
||||
ARM_VFP_F15,
|
||||
ARM_VFP_F16,
|
||||
ARM_VFP_F17,
|
||||
ARM_VFP_F18,
|
||||
ARM_VFP_F19,
|
||||
ARM_VFP_F20,
|
||||
ARM_VFP_F21,
|
||||
ARM_VFP_F22,
|
||||
ARM_VFP_F23,
|
||||
ARM_VFP_F24,
|
||||
ARM_VFP_F25,
|
||||
ARM_VFP_F26,
|
||||
ARM_VFP_F27,
|
||||
ARM_VFP_F28,
|
||||
ARM_VFP_F29,
|
||||
ARM_VFP_F30,
|
||||
ARM_VFP_F31,
|
||||
|
||||
ARM_VFP_D0 = ARM_VFP_F0,
|
||||
ARM_VFP_D1 = ARM_VFP_F2,
|
||||
ARM_VFP_D2 = ARM_VFP_F4,
|
||||
ARM_VFP_D3 = ARM_VFP_F6,
|
||||
ARM_VFP_D4 = ARM_VFP_F8,
|
||||
ARM_VFP_D5 = ARM_VFP_F10,
|
||||
ARM_VFP_D6 = ARM_VFP_F12,
|
||||
ARM_VFP_D7 = ARM_VFP_F14,
|
||||
ARM_VFP_D8 = ARM_VFP_F16,
|
||||
ARM_VFP_D9 = ARM_VFP_F18,
|
||||
ARM_VFP_D10 = ARM_VFP_F20,
|
||||
ARM_VFP_D11 = ARM_VFP_F22,
|
||||
ARM_VFP_D12 = ARM_VFP_F24,
|
||||
ARM_VFP_D13 = ARM_VFP_F26,
|
||||
ARM_VFP_D14 = ARM_VFP_F28,
|
||||
ARM_VFP_D15 = ARM_VFP_F30,
|
||||
|
||||
ARM_VFP_COPROC_SINGLE = 10,
|
||||
ARM_VFP_COPROC_DOUBLE = 11,
|
||||
|
||||
#define ARM_VFP_OP(p,q,r,s) (((p) << 23) | ((q) << 21) | ((r) << 20) | ((s) << 6))
|
||||
#define ARM_VFP_OP2(Fn,N) (ARM_VFP_OP (1,1,1,1) | ((Fn) << 16) | ((N) << 7))
|
||||
|
||||
ARM_VFP_MUL = ARM_VFP_OP (0,1,0,0),
|
||||
ARM_VFP_NMUL = ARM_VFP_OP (0,1,0,1),
|
||||
ARM_VFP_ADD = ARM_VFP_OP (0,1,1,0),
|
||||
ARM_VFP_SUB = ARM_VFP_OP (0,1,1,1),
|
||||
ARM_VFP_DIV = ARM_VFP_OP (1,0,0,0),
|
||||
|
||||
ARM_VFP_CPY = ARM_VFP_OP2 (0,0),
|
||||
ARM_VFP_ABS = ARM_VFP_OP2 (0,1),
|
||||
ARM_VFP_NEG = ARM_VFP_OP2 (1,0),
|
||||
ARM_VFP_SQRT = ARM_VFP_OP2 (1,1),
|
||||
ARM_VFP_CMP = ARM_VFP_OP2 (4,0),
|
||||
ARM_VFP_CMPE = ARM_VFP_OP2 (4,1),
|
||||
ARM_VFP_CMPZ = ARM_VFP_OP2 (5,0),
|
||||
ARM_VFP_CMPEZ = ARM_VFP_OP2 (5,1),
|
||||
ARM_VFP_CVT = ARM_VFP_OP2 (7,1),
|
||||
ARM_VFP_UITO = ARM_VFP_OP2 (8,0),
|
||||
ARM_VFP_SITO = ARM_VFP_OP2 (8,1),
|
||||
ARM_VFP_TOUI = ARM_VFP_OP2 (12,0),
|
||||
ARM_VFP_TOSI = ARM_VFP_OP2 (13,0),
|
||||
ARM_VFP_TOUIZ = ARM_VFP_OP2 (12,1),
|
||||
ARM_VFP_TOSIZ = ARM_VFP_OP2 (13,1),
|
||||
|
||||
ARM_VFP_SID = 0,
|
||||
ARM_VFP_SCR = 1 << 1,
|
||||
ARM_VFP_EXC = 8 << 1
|
||||
};
|
||||
|
||||
#define ARM_DEF_VFP_DYADIC(cond,cp,op,Fd,Fn,Fm) \
|
||||
(14 << 24) | \
|
||||
((cp) << 8) | \
|
||||
(op) | \
|
||||
(((Fd) >> 1) << 12) | \
|
||||
(((Fd) & 1) << 22) | \
|
||||
(((Fn) >> 1) << 16) | \
|
||||
(((Fn) & 1) << 7) | \
|
||||
(((Fm) >> 1) << 0) | \
|
||||
(((Fm) & 1) << 5) | \
|
||||
ARM_DEF_COND(cond)
|
||||
|
||||
#define ARM_DEF_VFP_MONADIC(cond,cp,op,Fd,Fm) \
|
||||
(14 << 24) | \
|
||||
((cp) << 8) | \
|
||||
(op) | \
|
||||
(((Fd) >> 1) << 12) | \
|
||||
(((Fd) & 1) << 22) | \
|
||||
(((Fm) >> 1) << 0) | \
|
||||
(((Fm) & 1) << 5) | \
|
||||
ARM_DEF_COND(cond)
|
||||
|
||||
#define ARM_DEF_VFP_LSF(cond,cp,post,ls,wback,basereg,Fd,offset) \
|
||||
((offset) >= 0? (offset)>>2: -(offset)>>2) | \
|
||||
(6 << 25) | \
|
||||
((cp) << 8) | \
|
||||
(((Fd) >> 1) << 12) | \
|
||||
(((Fd) & 1) << 22) | \
|
||||
((basereg) << 16) | \
|
||||
((ls) << 20) | \
|
||||
((wback) << 21) | \
|
||||
(((offset) >= 0) << 23) | \
|
||||
((wback) << 21) | \
|
||||
((post) << 24) | \
|
||||
ARM_DEF_COND(cond)
|
||||
|
||||
#define ARM_DEF_VFP_CPT(cond,cp,op,L,Fn,Rd) \
|
||||
(14 << 24) | \
|
||||
(1 << 4) | \
|
||||
((cp) << 8) | \
|
||||
((op) << 21) | \
|
||||
((L) << 20) | \
|
||||
((Rd) << 12) | \
|
||||
(((Fn) >> 1) << 16) | \
|
||||
(((Fn) & 1) << 7) | \
|
||||
ARM_DEF_COND(cond)
|
||||
|
||||
/* FP load and stores */
|
||||
#define ARM_FLDS_COND(p,freg,base,offset,cond) \
|
||||
ARM_EMIT((p), ARM_DEF_VFP_LSF((cond),ARM_VFP_COPROC_SINGLE,1,ARMOP_LDR,0,(base),(freg),(offset)))
|
||||
#define ARM_FLDS(p,freg,base,offset) \
|
||||
ARM_FLDS_COND(p,freg,base,offset,ARMCOND_AL)
|
||||
|
||||
#define ARM_FLDD_COND(p,freg,base,offset,cond) \
|
||||
ARM_EMIT((p), ARM_DEF_VFP_LSF((cond),ARM_VFP_COPROC_DOUBLE,1,ARMOP_LDR,0,(base),(freg),(offset)))
|
||||
#define ARM_FLDD(p,freg,base,offset) \
|
||||
ARM_FLDD_COND(p,freg,base,offset,ARMCOND_AL)
|
||||
|
||||
#define ARM_FSTS_COND(p,freg,base,offset,cond) \
|
||||
ARM_EMIT((p), ARM_DEF_VFP_LSF((cond),ARM_VFP_COPROC_SINGLE,1,ARMOP_STR,0,(base),(freg),(offset)))
|
||||
#define ARM_FSTS(p,freg,base,offset) \
|
||||
ARM_FSTS_COND(p,freg,base,offset,ARMCOND_AL)
|
||||
|
||||
#define ARM_FSTD_COND(p,freg,base,offset,cond) \
|
||||
ARM_EMIT((p), ARM_DEF_VFP_LSF((cond),ARM_VFP_COPROC_DOUBLE,1,ARMOP_STR,0,(base),(freg),(offset)))
|
||||
#define ARM_FSTD(p,freg,base,offset) \
|
||||
ARM_FSTD_COND(p,freg,base,offset,ARMCOND_AL)
|
||||
|
||||
#define ARM_FLDMD_COND(p,first_reg,nregs,base,cond) \
|
||||
ARM_EMIT((p), ARM_DEF_VFP_LSF((cond),ARM_VFP_COPROC_DOUBLE,0,ARMOP_LDR,0,(base),(first_reg),((nregs) * 2) << 2))
|
||||
|
||||
#define ARM_FLDMD(p,first_reg,nregs,base) \
|
||||
ARM_FLDMD_COND(p,first_reg,nregs,base,ARMCOND_AL)
|
||||
|
||||
#define ARM_FSTMD_COND(p,first_reg,nregs,base,cond) \
|
||||
ARM_EMIT((p), ARM_DEF_VFP_LSF((cond),ARM_VFP_COPROC_DOUBLE,0,ARMOP_STR,0,(base),(first_reg),((nregs) * 2) << 2))
|
||||
|
||||
#define ARM_FSTMD(p,first_reg,nregs,base) \
|
||||
ARM_FSTMD_COND(p,first_reg,nregs,base,ARMCOND_AL)
|
||||
|
||||
#include <mono/arch/arm/arm_vfpmacros.h>
|
||||
|
||||
/* coprocessor register transfer */
|
||||
#define ARM_FMSR(p,freg,reg) \
|
||||
ARM_EMIT((p), ARM_DEF_VFP_CPT(ARMCOND_AL,ARM_VFP_COPROC_SINGLE,0,0,(freg),(reg)))
|
||||
#define ARM_FMRS(p,reg,freg) \
|
||||
ARM_EMIT((p), ARM_DEF_VFP_CPT(ARMCOND_AL,ARM_VFP_COPROC_SINGLE,0,1,(freg),(reg)))
|
||||
|
||||
#define ARM_FMDLR(p,freg,reg) \
|
||||
ARM_EMIT((p), ARM_DEF_VFP_CPT(ARMCOND_AL,ARM_VFP_COPROC_DOUBLE,0,0,(freg),(reg)))
|
||||
#define ARM_FMRDL(p,reg,freg) \
|
||||
ARM_EMIT((p), ARM_DEF_VFP_CPT(ARMCOND_AL,ARM_VFP_COPROC_DOUBLE,0,1,(freg),(reg)))
|
||||
#define ARM_FMDHR(p,freg,reg) \
|
||||
ARM_EMIT((p), ARM_DEF_VFP_CPT(ARMCOND_AL,ARM_VFP_COPROC_DOUBLE,1,0,(freg),(reg)))
|
||||
#define ARM_FMRDH(p,reg,freg) \
|
||||
ARM_EMIT((p), ARM_DEF_VFP_CPT(ARMCOND_AL,ARM_VFP_COPROC_DOUBLE,1,1,(freg),(reg)))
|
||||
|
||||
#define ARM_FMXR(p,freg,reg) \
|
||||
ARM_EMIT((p), ARM_DEF_VFP_CPT(ARMCOND_AL,ARM_VFP_COPROC_SINGLE,7,0,(freg),(reg)))
|
||||
#define ARM_FMRX(p,reg,fcreg) \
|
||||
ARM_EMIT((p), ARM_DEF_VFP_CPT(ARMCOND_AL,ARM_VFP_COPROC_SINGLE,7,1,(fcreg),(reg)))
|
||||
|
||||
#define ARM_FMSTAT(p) \
|
||||
ARM_FMRX((p),ARMREG_R15,ARM_VFP_SCR)
|
||||
|
||||
#define ARM_DEF_MCRR(cond,cp,rn,rd,Fm,M) \
|
||||
((Fm) << 0) | \
|
||||
(1 << 4) | \
|
||||
((M) << 5) | \
|
||||
((cp) << 8) | \
|
||||
((rd) << 12) | \
|
||||
((rn) << 16) | \
|
||||
((2) << 21) | \
|
||||
(12 << 24) | \
|
||||
ARM_DEF_COND(cond)
|
||||
|
||||
#define ARM_FMDRR(p,rd,rn,dm) \
|
||||
ARM_EMIT((p), ARM_DEF_MCRR(ARMCOND_AL,ARM_VFP_COPROC_DOUBLE,(rn),(rd),(dm) >> 1, (dm) & 1))
|
||||
|
||||
#define ARM_DEF_FMRRD(cond,cp,rn,rd,Dm,D) \
|
||||
((Dm) << 0) | \
|
||||
(1 << 4) | \
|
||||
((cp) << 8) | \
|
||||
((rd) << 12) | \
|
||||
((rn) << 16) | \
|
||||
((0xc5) << 20) | \
|
||||
ARM_DEF_COND(cond)
|
||||
|
||||
#define ARM_FMRRD(p,rd,rn,dm) \
|
||||
ARM_EMIT((p), ARM_DEF_FMRRD(ARMCOND_AL,ARM_VFP_COPROC_DOUBLE,(rn),(rd),(dm) >> 1, (dm) & 1))
|
||||
|
||||
#define ARM_DEF_FUITOS(cond,Dd,D,Fm,M) ((cond) << 28) | ((0x1d) << 23) | ((D) << 22) | ((0x3) << 20) | ((8) << 16) | ((Dd) << 12) | ((0xa) << 8) | ((1) << 6) | ((M) << 5) | ((Fm) << 0)
|
||||
|
||||
#define ARM_FUITOS(p,dreg,sreg) \
|
||||
ARM_EMIT((p), ARM_DEF_FUITOS (ARMCOND_AL, (dreg) >> 1, (dreg) & 1, (sreg) >> 1, (sreg) & 1))
|
||||
|
||||
#define ARM_DEF_FUITOD(cond,Dd,D,Fm,M) ((cond) << 28) | ((0x1d) << 23) | ((D) << 22) | ((0x3) << 20) | ((8) << 16) | ((Dd) << 12) | ((0xb) << 8) | ((1) << 6) | ((M) << 5) | ((Fm) << 0)
|
||||
|
||||
#define ARM_FUITOD(p,dreg,sreg) \
|
||||
ARM_EMIT((p), ARM_DEF_FUITOD (ARMCOND_AL, (dreg) >> 1, (dreg) & 1, (sreg) >> 1, (sreg) & 1))
|
||||
|
||||
#define ARM_DEF_FSITOS(cond,Dd,D,Fm,M) ((cond) << 28) | ((0x1d) << 23) | ((D) << 22) | ((0x3) << 20) | ((8) << 16) | ((Dd) << 12) | ((0xa) << 8) | ((1) << 7) | ((1) << 6) | ((M) << 5) | ((Fm) << 0)
|
||||
|
||||
#define ARM_FSITOS(p,dreg,sreg) \
|
||||
ARM_EMIT((p), ARM_DEF_FSITOS (ARMCOND_AL, (dreg) >> 1, (dreg) & 1, (sreg) >> 1, (sreg) & 1))
|
||||
|
||||
#define ARM_DEF_FSITOD(cond,Dd,D,Fm,M) ((cond) << 28) | ((0x1d) << 23) | ((D) << 22) | ((0x3) << 20) | ((8) << 16) | ((Dd) << 12) | ((0xb) << 8) | ((1) << 7) | ((1) << 6) | ((M) << 5) | ((Fm) << 0)
|
||||
|
||||
#define ARM_FSITOD(p,dreg,sreg) \
|
||||
ARM_EMIT((p), ARM_DEF_FSITOD (ARMCOND_AL, (dreg) >> 1, (dreg) & 1, (sreg) >> 1, (sreg) & 1))
|
||||
|
||||
#endif /* __MONO_ARM_VFP_CODEGEN_H__ */
|
||||
|
@ -0,0 +1,177 @@
|
||||
/*
|
||||
* ARM CodeGen
|
||||
* XScale WirelessMMX extensions
|
||||
* Copyright 2002 Wild West Software
|
||||
*/
|
||||
|
||||
#ifndef __WMMX_H__
|
||||
#define __WMMX_H__ 1
|
||||
|
||||
#if 0
|
||||
#include <arm-codegen.h>
|
||||
#endif
|
||||
|
||||
#if defined(ARM_IASM)
|
||||
# define WM_ASM(_expr) ARM_IASM(_expr)
|
||||
#else
|
||||
# define WM_ASM(_expr) __emit (_expr)
|
||||
#endif
|
||||
|
||||
#if defined(ARM_EMIT)
|
||||
# define WM_EMIT(p, i) ARM_EMIT(p, i)
|
||||
#else
|
||||
# define WM_EMIT(p, i)
|
||||
#endif
|
||||
|
||||
enum {
|
||||
WM_CC_EQ = 0x0,
|
||||
WM_CC_NE = 0x1,
|
||||
WM_CC_CS = 0x2,
|
||||
WM_CC_HS = WM_CC_CS,
|
||||
WM_CC_CC = 0x3,
|
||||
WM_CC_LO = WM_CC_CC,
|
||||
WM_CC_MI = 0x4,
|
||||
WM_CC_PL = 0x5,
|
||||
WM_CC_VS = 0x6,
|
||||
WM_CC_VC = 0x7,
|
||||
WM_CC_HI = 0x8,
|
||||
WM_CC_LS = 0x9,
|
||||
WM_CC_GE = 0xA,
|
||||
WM_CC_LT = 0xB,
|
||||
WM_CC_GT = 0xC,
|
||||
WM_CC_LE = 0xD,
|
||||
WM_CC_AL = 0xE,
|
||||
WM_CC_NV = 0xF,
|
||||
WM_CC_SHIFT = 28
|
||||
};
|
||||
|
||||
#if defined(ARM_DEF_COND)
|
||||
# define WM_DEF_CC(_cc) ARM_DEF_COND(_cc)
|
||||
#else
|
||||
# define WM_DEF_CC(_cc) ((_cc & 0xF) << WM_CC_SHIFT)
|
||||
#endif
|
||||
|
||||
|
||||
enum {
|
||||
WM_R0 = 0x0,
|
||||
WM_R1 = 0x1,
|
||||
WM_R2 = 0x2,
|
||||
WM_R3 = 0x3,
|
||||
WM_R4 = 0x4,
|
||||
WM_R5 = 0x5,
|
||||
WM_R6 = 0x6,
|
||||
WM_R7 = 0x7,
|
||||
WM_R8 = 0x8,
|
||||
WM_R9 = 0x9,
|
||||
WM_R10 = 0xA,
|
||||
WM_R11 = 0xB,
|
||||
WM_R12 = 0xC,
|
||||
WM_R13 = 0xD,
|
||||
WM_R14 = 0xE,
|
||||
WM_R15 = 0xF,
|
||||
|
||||
WM_wR0 = 0x0,
|
||||
WM_wR1 = 0x1,
|
||||
WM_wR2 = 0x2,
|
||||
WM_wR3 = 0x3,
|
||||
WM_wR4 = 0x4,
|
||||
WM_wR5 = 0x5,
|
||||
WM_wR6 = 0x6,
|
||||
WM_wR7 = 0x7,
|
||||
WM_wR8 = 0x8,
|
||||
WM_wR9 = 0x9,
|
||||
WM_wR10 = 0xA,
|
||||
WM_wR11 = 0xB,
|
||||
WM_wR12 = 0xC,
|
||||
WM_wR13 = 0xD,
|
||||
WM_wR14 = 0xE,
|
||||
WM_wR15 = 0xF
|
||||
};
|
||||
|
||||
|
||||
/*
|
||||
* Qualifiers:
|
||||
* H - 16-bit (HalfWord) SIMD
|
||||
* W - 32-bit (Word) SIMD
|
||||
* D - 64-bit (Double)
|
||||
*/
|
||||
enum {
|
||||
WM_B = 0,
|
||||
WM_H = 1,
|
||||
WM_D = 2
|
||||
};
|
||||
|
||||
/*
|
||||
* B.2.3 Transfers From Coprocessor Register (MRC)
|
||||
* Table B-5
|
||||
*/
|
||||
enum {
|
||||
WM_TMRC_OP2 = 0,
|
||||
WM_TMRC_CPNUM = 1,
|
||||
|
||||
WM_TMOVMSK_OP2 = 1,
|
||||
WM_TMOVMSK_CPNUM = 0,
|
||||
|
||||
WM_TANDC_OP2 = 1,
|
||||
WM_TANDC_CPNUM = 1,
|
||||
|
||||
WM_TORC_OP2 = 2,
|
||||
WM_TORC_CPNUM = 1,
|
||||
|
||||
WM_TEXTRC_OP2 = 3,
|
||||
WM_TEXTRC_CPNUM = 1,
|
||||
|
||||
WM_TEXTRM_OP2 = 3,
|
||||
WM_TEXTRM_CPNUM = 0
|
||||
};
|
||||
|
||||
|
||||
/*
|
||||
* TANDC<B,H,W>{Cond} R15
|
||||
* Performs AND across the fields of the SIMD PSR register (wCASF) and sends the result
|
||||
* to CPSR; can be performed after a Byte, Half-word or Word operation that sets the flags.
|
||||
* NOTE: R15 is omitted from the macro declaration;
|
||||
*/
|
||||
#define DEF_WM_TNADC_CC(_q, _cc) WM_DEF_CC((_cc)) + ((_q) << 0x16) + 0xE13F130
|
||||
|
||||
#define _WM_TNADC_CC(_q, _cc) WM_ASM(DEF_WM_TNADC_CC(_q, _cc))
|
||||
#define ARM_WM_TNADC_CC(_p, _q, _cc) WM_EMIT(_p, DEF_WM_TNADC_CC(_q, _cc))
|
||||
|
||||
/* inline assembly */
|
||||
#define _WM_TNADC(_q) _WM_TNADC_CC((_q), WM_CC_AL)
|
||||
#define _WM_TNADCB() _WM_TNADC(WM_B)
|
||||
#define _WM_TNADCH() _WM_TNADC(WM_H)
|
||||
#define _WM_TNADCD() _WM_TNADC(WM_D)
|
||||
|
||||
/* codegen */
|
||||
#define ARM_WM_TNADC(_p, _q) ARM_WM_TNADC_CC((_p), (_q), WM_CC_AL)
|
||||
#define ARM_WM_TNADCB(_p) ARM_WM_TNADC(_p, WM_B)
|
||||
#define ARM_WM_TNADCH(_p) ARM_WM_TNADC(_p, WM_H)
|
||||
#define ARM_WM_TNADCD(_p) ARM_WM_TNADC(_p, WM_D)
|
||||
|
||||
|
||||
/*
|
||||
* TBCST<B,H,W>{Cond} wRd, Rn
|
||||
* Broadcasts a value from the ARM Source reg (Rn) to every SIMD position
|
||||
* in the WMMX Destination reg (wRd).
|
||||
*/
|
||||
#define DEF_WM_TBCST_CC(_q, _cc, _wrd, _rn) \
|
||||
WM_DEF_CC((_cc)) + ((_q) << 6) + ((_wrd) << 16) + ((_rn) << 12) + 0xE200010
|
||||
|
||||
#define _WM_TBCST_CC(_q, _cc, _wrd, _rn) WM_ASM(DEF_WM_TBCST_CC(_q, _cc, _wrd, _rn))
|
||||
#define ARM_WM_TBCST_CC(_p, _q, _cc, _wrd, _rn) WM_EMIT(_p, DEF_WM_TBCST_CC(_q, _cc, _wrd, _rn))
|
||||
|
||||
/* inline */
|
||||
#define _WM_TBCST(_q, _wrd, _rn) _WM_TBCST_CC(_q, WM_CC_AL, _wrd, _rn)
|
||||
#define _WM_TBCSTB(_wrd, _rn) _WM_TBCST(WM_B)
|
||||
#define _WM_TBCSTH(_wrd, _rn) _WM_TBCST(WM_H)
|
||||
#define _WM_TBCSTD(_wrd, _rn) _WM_TBCST(WM_D)
|
||||
|
||||
/* codegen */
|
||||
#define ARM_WM_TBCST(_p, _q, _wrd, _rn) ARM_WM_TBCST_CC(_p, _q, WM_CC_AL, _wrd, _rn)
|
||||
#define ARM_WM_TBCSTB(_p, _wrd, _rn) _WM_TBCST(_p, WM_B)
|
||||
#define ARM_WM_TBCSTH(_p, _wrd, _rn) _WM_TBCST(_p, WM_H)
|
||||
#define ARM_WM_TBCSTD(_p, _wrd, _rn) _WM_TBCST(_p, WM_D)
|
||||
|
||||
|
||||
#endif /* __WMMX_H__ */
|
@ -0,0 +1,56 @@
|
||||
/* PSR := <Op> Rn, (imm8 ROR 2*rot) */
|
||||
#define ARM_<Op>_REG_IMM_COND(p, rn, imm8, rot, cond) \
|
||||
ARM_DPIOP_S_REG_IMM8ROT_COND(p, ARMOP_<Op>, 0, rn, imm8, rot, cond)
|
||||
#define ARM_<Op>_REG_IMM(p, rn, imm8, rot) \
|
||||
ARM_<Op>_REG_IMM_COND(p, rn, imm8, rot, ARMCOND_AL)
|
||||
|
||||
#ifndef ARM_NOIASM
|
||||
#define _<Op>_REG_IMM_COND(rn, imm8, rot, cond) \
|
||||
ARM_IASM_DPIOP_S_REG_IMM8ROT_COND(ARMOP_<Op>, 0, rn, imm8, rot, cond)
|
||||
#define _<Op>_REG_IMM(rn, imm8, rot) \
|
||||
_<Op>_REG_IMM_COND(rn, imm8, rot, ARMCOND_AL)
|
||||
#endif
|
||||
|
||||
|
||||
/* PSR := <Op> Rn, imm8 */
|
||||
#define ARM_<Op>_REG_IMM8_COND(p, rn, imm8, cond) \
|
||||
ARM_<Op>_REG_IMM_COND(p, rn, imm8, 0, cond)
|
||||
#define ARM_<Op>_REG_IMM8(p, rn, imm8) \
|
||||
ARM_<Op>_REG_IMM8_COND(p, rn, imm8, ARMCOND_AL)
|
||||
|
||||
#ifndef ARM_NOIASM
|
||||
#define _<Op>_REG_IMM8_COND(rn, imm8, cond) \
|
||||
_<Op>_REG_IMM_COND(rn, imm8, 0, cond)
|
||||
#define _<Op>_REG_IMM8(rn, imm8) \
|
||||
_<Op>_REG_IMM8_COND(rn, imm8, ARMCOND_AL)
|
||||
#endif
|
||||
|
||||
|
||||
/* PSR := <Op> Rn, Rm */
|
||||
#define ARM_<Op>_REG_REG_COND(p, rn, rm, cond) \
|
||||
ARM_DPIOP_S_REG_REG_COND(p, ARMOP_<Op>, 0, rn, rm, cond)
|
||||
#define ARM_<Op>_REG_REG(p, rn, rm) \
|
||||
ARM_<Op>_REG_REG_COND(p, rn, rm, ARMCOND_AL)
|
||||
|
||||
#ifndef ARM_NOIASM
|
||||
#define _<Op>_REG_REG_COND(rn, rm, cond) \
|
||||
ARM_IASM_DPIOP_S_REG_REG_COND(ARMOP_<Op>, 0, rn, rm, cond)
|
||||
#define _<Op>_REG_REG(rn, rm) \
|
||||
_<Op>_REG_REG_COND(rn, rm, ARMCOND_AL)
|
||||
#endif
|
||||
|
||||
|
||||
/* PSR := <Op> Rn, (Rm <shift_type> imm8) */
|
||||
#define ARM_<Op>_REG_IMMSHIFT_COND(p, rn, rm, shift_type, imm_shift, cond) \
|
||||
ARM_DPIOP_S_REG_IMMSHIFT_COND(p, ARMOP_<Op>, 0, rn, rm, shift_type, imm_shift, cond)
|
||||
#define ARM_<Op>_REG_IMMSHIFT(p, rn, rm, shift_type, imm_shift) \
|
||||
ARM_<Op>_REG_IMMSHIFT_COND(p, rn, rm, shift_type, imm_shift, ARMCOND_AL)
|
||||
|
||||
#ifndef ARM_NOIASM
|
||||
#define _<Op>_REG_IMMSHIFT_COND(rn, rm, shift_type, imm_shift, cond) \
|
||||
ARM_IASM_DPIOP_S_REG_IMMSHIFT_COND(ARMOP_<Op>, 0, rn, rm, shift_type, imm_shift, cond)
|
||||
#define _<Op>_REG_IMMSHIFT(rn, rm, shift_type, imm_shift) \
|
||||
_<Op>_REG_IMMSHIFT_COND(rn, rm, shift_type, imm_shift, ARMCOND_AL)
|
||||
#endif
|
||||
|
||||
|
@ -0,0 +1,112 @@
|
||||
/* -- <Op> -- */
|
||||
|
||||
/* Rd := Rn <Op> (imm8 ROR rot) ; rot is power of 2 */
|
||||
#define ARM_<Op>_REG_IMM_COND(p, rd, rn, imm8, rot, cond) \
|
||||
ARM_DPIOP_REG_IMM8ROT_COND(p, ARMOP_<Op>, rd, rn, imm8, rot, cond)
|
||||
#define ARM_<Op>_REG_IMM(p, rd, rn, imm8, rot) \
|
||||
ARM_<Op>_REG_IMM_COND(p, rd, rn, imm8, rot, ARMCOND_AL)
|
||||
#define ARM_<Op>S_REG_IMM_COND(p, rd, rn, imm8, rot, cond) \
|
||||
ARM_DPIOP_S_REG_IMM8ROT_COND(p, ARMOP_<Op>, rd, rn, imm8, rot, cond)
|
||||
#define ARM_<Op>S_REG_IMM(p, rd, rn, imm8, rot) \
|
||||
ARM_<Op>S_REG_IMM_COND(p, rd, rn, imm8, rot, ARMCOND_AL)
|
||||
|
||||
#ifndef ARM_NOIASM
|
||||
#define _<Op>_REG_IMM_COND(rd, rn, imm8, rot, cond) \
|
||||
ARM_IASM_DPIOP_REG_IMM8ROT_COND(ARMOP_<Op>, rd, rn, imm8, rot, cond)
|
||||
#define _<Op>_REG_IMM(rd, rn, imm8, rot) \
|
||||
_<Op>_REG_IMM_COND(rd, rn, imm8, rot, ARMCOND_AL)
|
||||
#define _<Op>S_REG_IMM_COND(rd, rn, imm8, rot, cond) \
|
||||
ARM_IASM_DPIOP_S_REG_IMM8ROT_COND(ARMOP_<Op>, rd, rn, imm8, rot, cond)
|
||||
#define _<Op>S_REG_IMM(rd, rn, imm8, rot) \
|
||||
_<Op>S_REG_IMM_COND(rd, rn, imm8, rot, ARMCOND_AL)
|
||||
#endif
|
||||
|
||||
|
||||
/* Rd := Rn <Op> imm8 */
|
||||
#define ARM_<Op>_REG_IMM8_COND(p, rd, rn, imm8, cond) \
|
||||
ARM_<Op>_REG_IMM_COND(p, rd, rn, imm8, 0, cond)
|
||||
#define ARM_<Op>_REG_IMM8(p, rd, rn, imm8) \
|
||||
ARM_<Op>_REG_IMM8_COND(p, rd, rn, imm8, ARMCOND_AL)
|
||||
#define ARM_<Op>S_REG_IMM8_COND(p, rd, rn, imm8, cond) \
|
||||
ARM_<Op>S_REG_IMM_COND(p, rd, rn, imm8, 0, cond)
|
||||
#define ARM_<Op>S_REG_IMM8(p, rd, rn, imm8) \
|
||||
ARM_<Op>S_REG_IMM8_COND(p, rd, rn, imm8, ARMCOND_AL)
|
||||
|
||||
#ifndef ARM_NOIASM
|
||||
#define _<Op>_REG_IMM8_COND(rd, rn, imm8, cond) \
|
||||
_<Op>_REG_IMM_COND(rd, rn, imm8, 0, cond)
|
||||
#define _<Op>_REG_IMM8(rd, rn, imm8) \
|
||||
_<Op>_REG_IMM8_COND(rd, rn, imm8, ARMCOND_AL)
|
||||
#define _<Op>S_REG_IMM8_COND(rd, rn, imm8, cond) \
|
||||
_<Op>S_REG_IMM_COND(rd, rn, imm8, 0, cond)
|
||||
#define _<Op>S_REG_IMM8(rd, rn, imm8) \
|
||||
_<Op>S_REG_IMM8_COND(rd, rn, imm8, ARMCOND_AL)
|
||||
#endif
|
||||
|
||||
|
||||
/* Rd := Rn <Op> Rm */
|
||||
#define ARM_<Op>_REG_REG_COND(p, rd, rn, rm, cond) \
|
||||
ARM_DPIOP_REG_REG_COND(p, ARMOP_<Op>, rd, rn, rm, cond)
|
||||
#define ARM_<Op>_REG_REG(p, rd, rn, rm) \
|
||||
ARM_<Op>_REG_REG_COND(p, rd, rn, rm, ARMCOND_AL)
|
||||
#define ARM_<Op>S_REG_REG_COND(p, rd, rn, rm, cond) \
|
||||
ARM_DPIOP_S_REG_REG_COND(p, ARMOP_<Op>, rd, rn, rm, cond)
|
||||
#define ARM_<Op>S_REG_REG(p, rd, rn, rm) \
|
||||
ARM_<Op>S_REG_REG_COND(p, rd, rn, rm, ARMCOND_AL)
|
||||
|
||||
#ifndef ARM_NOIASM
|
||||
#define _<Op>_REG_REG_COND(rd, rn, rm, cond) \
|
||||
ARM_IASM_DPIOP_REG_REG_COND(ARMOP_<Op>, rd, rn, rm, cond)
|
||||
#define _<Op>_REG_REG(rd, rn, rm) \
|
||||
_<Op>_REG_REG_COND(rd, rn, rm, ARMCOND_AL)
|
||||
#define _<Op>S_REG_REG_COND(rd, rn, rm, cond) \
|
||||
ARM_IASM_DPIOP_S_REG_REG_COND(ARMOP_<Op>, rd, rn, rm, cond)
|
||||
#define _<Op>S_REG_REG(rd, rn, rm) \
|
||||
_<Op>S_REG_REG_COND(rd, rn, rm, ARMCOND_AL)
|
||||
#endif
|
||||
|
||||
|
||||
/* Rd := Rn <Op> (Rm <shift_type> imm_shift) */
|
||||
#define ARM_<Op>_REG_IMMSHIFT_COND(p, rd, rn, rm, shift_type, imm_shift, cond) \
|
||||
ARM_DPIOP_REG_IMMSHIFT_COND(p, ARMOP_<Op>, rd, rn, rm, shift_type, imm_shift, cond)
|
||||
#define ARM_<Op>_REG_IMMSHIFT(p, rd, rn, rm, shift_type, imm_shift) \
|
||||
ARM_<Op>_REG_IMMSHIFT_COND(p, rd, rn, rm, shift_type, imm_shift, ARMCOND_AL)
|
||||
#define ARM_<Op>S_REG_IMMSHIFT_COND(p, rd, rn, rm, shift_type, imm_shift, cond) \
|
||||
ARM_DPIOP_S_REG_IMMSHIFT_COND(p, ARMOP_<Op>, rd, rn, rm, shift_type, imm_shift, cond)
|
||||
#define ARM_<Op>S_REG_IMMSHIFT(p, rd, rn, rm, shift_type, imm_shift) \
|
||||
ARM_<Op>S_REG_IMMSHIFT_COND(p, rd, rn, rm, shift_type, imm_shift, ARMCOND_AL)
|
||||
|
||||
#ifndef ARM_NOIASM
|
||||
#define _<Op>_REG_IMMSHIFT_COND(rd, rn, rm, shift_type, imm_shift, cond) \
|
||||
ARM_IASM_DPIOP_REG_IMMSHIFT_COND(ARMOP_<Op>, rd, rn, rm, shift_type, imm_shift, cond)
|
||||
#define _<Op>_REG_IMMSHIFT(rd, rn, rm, shift_type, imm_shift) \
|
||||
_<Op>_REG_IMMSHIFT_COND(rd, rn, rm, shift_type, imm_shift, ARMCOND_AL)
|
||||
#define _<Op>S_REG_IMMSHIFT_COND(rd, rn, rm, shift_type, imm_shift, cond) \
|
||||
ARM_IASM_DPIOP_S_REG_IMMSHIFT_COND(ARMOP_<Op>, rd, rn, rm, shift_type, imm_shift, cond)
|
||||
#define _<Op>S_REG_IMMSHIFT(rd, rn, rm, shift_type, imm_shift) \
|
||||
_<Op>S_REG_IMMSHIFT_COND(rd, rn, rm, shift_type, imm_shift, ARMCOND_AL)
|
||||
#endif
|
||||
|
||||
|
||||
/* Rd := Rn <Op> (Rm <shift_type> Rs) */
|
||||
#define ARM_<Op>_REG_REGSHIFT_COND(p, rd, rn, rm, shift_type, rs, cond) \
|
||||
ARM_DPIOP_REG_REGSHIFT_COND(p, ARMOP_<Op>, rd, rn, rm, shift_t, rs, cond)
|
||||
#define ARM_<Op>_REG_REGSHIFT(p, rd, rn, rm, shift_type, rs) \
|
||||
ARM_<Op>_REG_REGSHIFT_COND(p, rd, rn, rm, shift_type, rs, ARMCOND_AL)
|
||||
#define ARM_<Op>S_REG_REGSHIFT_COND(p, rd, rn, rm, shift_type, rs, cond) \
|
||||
ARM_DPIOP_S_REG_REGSHIFT_COND(p, ARMOP_<Op>, rd, rn, rm, shift_t, rs, cond)
|
||||
#define ARM_<Op>S_REG_REGSHIFT(p, rd, rn, rm, shift_type, rs) \
|
||||
ARM_<Op>S_REG_REGSHIFT_COND(p, rd, rn, rm, shift_type, rs, ARMCOND_AL)
|
||||
|
||||
#ifndef ARM_NOIASM
|
||||
#define _<Op>_REG_REGSHIFT_COND(rd, rn, rm, shift_type, rs, cond) \
|
||||
ARM_IASM_DPIOP_REG_REGSHIFT_COND(ARMOP_<Op>, rd, rn, rm, shift_t, rs, cond)
|
||||
#define _<Op>_REG_REGSHIFT(rd, rn, rm, shift_type, rs) \
|
||||
_<Op>_REG_REGSHIFT_COND(rd, rn, rm, shift_type, rs, ARMCOND_AL)
|
||||
#define _<Op>S_REG_REGSHIFT_COND(rd, rn, rm, shift_type, rs, cond) \
|
||||
ARM_IASM_DPIOP_S_REG_REGSHIFT_COND(ARMOP_<Op>, rd, rn, rm, shift_t, rs, cond)
|
||||
#define _<Op>S_REG_REGSHIFT(rd, rn, rm, shift_type, rs) \
|
||||
_<Op>S_REG_REGSHIFT_COND(rd, rn, rm, shift_type, rs, ARMCOND_AL)
|
||||
#endif
|
||||
|
||||
|
@ -0,0 +1,30 @@
|
||||
#!/bin/sh
|
||||
|
||||
OPCODES="AND EOR SUB RSB ADD ADC SBC RSC ORR BIC"
|
||||
CMP_OPCODES="TST TEQ CMP CMN"
|
||||
MOV_OPCODES="MOV MVN"
|
||||
|
||||
# $1: opcode list
|
||||
# $2: template
|
||||
gen() {
|
||||
for i in $1; do
|
||||
sed "s/<Op>/$i/g" $2.th
|
||||
done
|
||||
}
|
||||
|
||||
|
||||
|
||||
echo -e "/* Macros for DPI ops, auto-generated from template */\n"
|
||||
|
||||
echo -e "\n/* mov/mvn */\n"
|
||||
gen "$MOV_OPCODES" mov_macros
|
||||
|
||||
echo -e "\n/* DPIs, arithmetic and logical */\n"
|
||||
gen "$OPCODES" dpi_macros
|
||||
|
||||
echo -e "\n\n"
|
||||
|
||||
echo -e "\n/* DPIs, comparison */\n"
|
||||
gen "$CMP_OPCODES" cmp_macros
|
||||
|
||||
echo -e "\n/* end generated */\n"
|
@ -0,0 +1,121 @@
|
||||
/* Rd := imm8 ROR rot */
|
||||
#define ARM_<Op>_REG_IMM_COND(p, reg, imm8, rot, cond) \
|
||||
ARM_DPIOP_REG_IMM8ROT_COND(p, ARMOP_<Op>, reg, 0, imm8, rot, cond)
|
||||
#define ARM_<Op>_REG_IMM(p, reg, imm8, rot) \
|
||||
ARM_<Op>_REG_IMM_COND(p, reg, imm8, rot, ARMCOND_AL)
|
||||
/* S */
|
||||
#define ARM_<Op>S_REG_IMM_COND(p, reg, imm8, rot, cond) \
|
||||
ARM_DPIOP_S_REG_IMM8ROT_COND(p, ARMOP_<Op>, reg, 0, imm8, rot, cond)
|
||||
#define ARM_<Op>S_REG_IMM(p, reg, imm8, rot) \
|
||||
ARM_<Op>S_REG_IMM_COND(p, reg, imm8, rot, ARMCOND_AL)
|
||||
|
||||
#ifndef ARM_NOIASM
|
||||
#define _<Op>_REG_IMM_COND(reg, imm8, rot, cond) \
|
||||
ARM_IASM_DPIOP_REG_IMM8ROT_COND(ARMOP_<Op>, reg, 0, imm8, rot, cond)
|
||||
#define _<Op>_REG_IMM(reg, imm8, rot) \
|
||||
_<Op>_REG_IMM_COND(reg, imm8, rot, ARMCOND_AL)
|
||||
/* S */
|
||||
#define _<Op>S_REG_IMM_COND(reg, imm8, rot, cond) \
|
||||
ARM_IASM_DPIOP_S_REG_IMM8ROT_COND(ARMOP_<Op>, reg, 0, imm8, rot, cond)
|
||||
#define _<Op>S_REG_IMM(reg, imm8, rot) \
|
||||
_<Op>S_REG_IMM_COND(reg, imm8, rot, ARMCOND_AL)
|
||||
#endif
|
||||
|
||||
|
||||
/* Rd := imm8 */
|
||||
#define ARM_<Op>_REG_IMM8_COND(p, reg, imm8, cond) \
|
||||
ARM_DPIOP_REG_IMM8ROT_COND(p, ARMOP_<Op>, reg, 0, imm8, 0, cond)
|
||||
#define ARM_<Op>_REG_IMM8(p, reg, imm8) \
|
||||
ARM_<Op>_REG_IMM8_COND(p, reg, imm8, ARMCOND_AL)
|
||||
/* S */
|
||||
#define ARM_<Op>S_REG_IMM8_COND(p, reg, imm8, cond) \
|
||||
ARM_DPIOP_S_REG_IMM8ROT_COND(p, ARMOP_<Op>, reg, 0, imm8, 0, cond)
|
||||
#define ARM_<Op>S_REG_IMM8(p, reg, imm8) \
|
||||
ARM_<Op>S_REG_IMM8_COND(p, reg, imm8, ARMCOND_AL)
|
||||
|
||||
#ifndef ARM_NOIASM
|
||||
#define _<Op>_REG_IMM8_COND(reg, imm8, cond) \
|
||||
ARM_IASM_DPIOP_REG_IMM8ROT_COND(ARMOP_<Op>, reg, 0, imm8, 0, cond)
|
||||
#define _<Op>_REG_IMM8(reg, imm8) \
|
||||
_<Op>_REG_IMM8_COND(reg, imm8, ARMCOND_AL)
|
||||
/* S */
|
||||
#define _<Op>S_REG_IMM8_COND(reg, imm8, cond) \
|
||||
ARM_IASM_DPIOP_S_REG_IMM8ROT_COND(ARMOP_<Op>, reg, 0, imm8, 0, cond)
|
||||
#define _<Op>S_REG_IMM8(reg, imm8) \
|
||||
_<Op>S_REG_IMM8_COND(reg, imm8, ARMCOND_AL)
|
||||
#endif
|
||||
|
||||
|
||||
/* Rd := Rm */
|
||||
#define ARM_<Op>_REG_REG_COND(p, rd, rm, cond) \
|
||||
ARM_DPIOP_REG_REG_COND(p, ARMOP_<Op>, rd, 0, rm, cond)
|
||||
#define ARM_<Op>_REG_REG(p, rd, rm) \
|
||||
ARM_<Op>_REG_REG_COND(p, rd, rm, ARMCOND_AL)
|
||||
/* S */
|
||||
#define ARM_<Op>S_REG_REG_COND(p, rd, rm, cond) \
|
||||
ARM_DPIOP_S_REG_REG_COND(p, ARMOP_<Op>, rd, 0, rm, cond)
|
||||
#define ARM_<Op>S_REG_REG(p, rd, rm) \
|
||||
ARM_<Op>S_REG_REG_COND(p, rd, rm, ARMCOND_AL)
|
||||
|
||||
#ifndef ARM_NOIASM
|
||||
#define _<Op>_REG_REG_COND(rd, rm, cond) \
|
||||
ARM_IASM_DPIOP_REG_REG_COND(ARMOP_<Op>, rd, 0, rm, cond)
|
||||
#define _<Op>_REG_REG(rd, rm) \
|
||||
_<Op>_REG_REG_COND(rd, rm, ARMCOND_AL)
|
||||
/* S */
|
||||
#define _<Op>S_REG_REG_COND(rd, rm, cond) \
|
||||
ARM_IASM_DPIOP_S_REG_REG_COND(ARMOP_<Op>, rd, 0, rm, cond)
|
||||
#define _<Op>S_REG_REG(rd, rm) \
|
||||
_<Op>S_REG_REG_COND(rd, rm, ARMCOND_AL)
|
||||
#endif
|
||||
|
||||
|
||||
/* Rd := Rm <shift_type> imm_shift */
|
||||
#define ARM_<Op>_REG_IMMSHIFT_COND(p, rd, rm, shift_type, imm_shift, cond) \
|
||||
ARM_DPIOP_REG_IMMSHIFT_COND(p, ARMOP_<Op>, rd, 0, rm, shift_type, imm_shift, cond)
|
||||
#define ARM_<Op>_REG_IMMSHIFT(p, rd, rm, shift_type, imm_shift) \
|
||||
ARM_<Op>_REG_IMMSHIFT_COND(p, rd, rm, shift_type, imm_shift, ARMCOND_AL)
|
||||
/* S */
|
||||
#define ARM_<Op>S_REG_IMMSHIFT_COND(p, rd, rm, shift_type, imm_shift, cond) \
|
||||
ARM_DPIOP_S_REG_IMMSHIFT_COND(p, ARMOP_<Op>, rd, 0, rm, shift_type, imm_shift, cond)
|
||||
#define ARM_<Op>S_REG_IMMSHIFT(p, rd, rm, shift_type, imm_shift) \
|
||||
ARM_<Op>S_REG_IMMSHIFT_COND(p, rd, rm, shift_type, imm_shift, ARMCOND_AL)
|
||||
|
||||
#ifndef ARM_NOIASM
|
||||
#define _<Op>_REG_IMMSHIFT_COND(rd, rm, shift_type, imm_shift, cond) \
|
||||
ARM_IASM_DPIOP_REG_IMMSHIFT_COND(ARMOP_<Op>, rd, 0, rm, shift_type, imm_shift, cond)
|
||||
#define _<Op>_REG_IMMSHIFT(rd, rm, shift_type, imm_shift) \
|
||||
_<Op>_REG_IMMSHIFT_COND(rd, rm, shift_type, imm_shift, ARMCOND_AL)
|
||||
/* S */
|
||||
#define _<Op>S_REG_IMMSHIFT_COND(rd, rm, shift_type, imm_shift, cond) \
|
||||
ARM_IASM_DPIOP_S_REG_IMMSHIFT_COND(ARMOP_<Op>, rd, 0, rm, shift_type, imm_shift, cond)
|
||||
#define _<Op>S_REG_IMMSHIFT(rd, rm, shift_type, imm_shift) \
|
||||
_<Op>S_REG_IMMSHIFT_COND(rd, rm, shift_type, imm_shift, ARMCOND_AL)
|
||||
#endif
|
||||
|
||||
|
||||
|
||||
/* Rd := (Rm <shift_type> Rs) */
|
||||
#define ARM_<Op>_REG_REGSHIFT_COND(p, rd, rm, shift_type, rs, cond) \
|
||||
ARM_DPIOP_REG_REGSHIFT_COND(p, ARMOP_<Op>, rd, 0, rm, shift_type, rs, cond)
|
||||
#define ARM_<Op>_REG_REGSHIFT(p, rd, rm, shift_type, rs) \
|
||||
ARM_<Op>_REG_REGSHIFT_COND(p, rd, rm, shift_type, rs, ARMCOND_AL)
|
||||
/* S */
|
||||
#define ARM_<Op>S_REG_REGSHIFT_COND(p, rd, rm, shift_type, rs, cond) \
|
||||
ARM_DPIOP_S_REG_REGSHIFT_COND(p, ARMOP_<Op>, rd, 0, rm, shift_type, rs, cond)
|
||||
#define ARM_<Op>S_REG_REGSHIFT(p, rd, rm, shift_type, rs) \
|
||||
ARM_<Op>S_REG_REGSHIFT_COND(p, rd, rm, shift_type, rs, ARMCOND_AL)
|
||||
|
||||
#ifndef ARM_NOIASM
|
||||
#define _<Op>_REG_REGSHIFT_COND(rd, rm, shift_type, rs, cond) \
|
||||
ARM_IASM_DPIOP_REG_REGSHIFT_COND(ARMOP_<Op>, rd, 0, rm, shift_type, rs, cond)
|
||||
#define _<Op>_REG_REGSHIFT(rd, rm, shift_type, rs) \
|
||||
_<Op>_REG_REGSHIFT_COND(rd, rm, shift_type, rs, ARMCOND_AL)
|
||||
/* S */
|
||||
#define _<Op>S_REG_REGSHIFT_COND(rd, rm, shift_type, rs, cond) \
|
||||
ARM_IASM_DPIOP_S_REG_REGSHIFT_COND(ARMOP_<Op>, rd, 0, rm, shift_type, rs, cond)
|
||||
#define _<Op>S_REG_REGSHIFT(rd, rm, shift_type, rs) \
|
||||
_<Op>S_REG_REGSHIFT_COND(rd, rm, shift_type, rs, ARMCOND_AL)
|
||||
#endif
|
||||
|
||||
|
@ -0,0 +1,710 @@
|
||||
/*
|
||||
* Create trampolines to invoke arbitrary functions.
|
||||
* Copyright (c) 2002 Sergey Chaban <serge@wildwestsoftware.com>
|
||||
*
|
||||
* Contributions by Malte Hildingson
|
||||
*/
|
||||
|
||||
#include "arm-codegen.h"
|
||||
#include "arm-dis.h"
|
||||
|
||||
#if defined(_WIN32_WCE) || defined (UNDER_CE)
|
||||
# include <windows.h>
|
||||
#else
|
||||
#include <unistd.h>
|
||||
#include <sys/mman.h>
|
||||
#endif
|
||||
|
||||
#if !defined(PLATFORM_MACOSX)
|
||||
#include <errno.h>
|
||||
|
||||
#include "mono/metadata/class.h"
|
||||
#include "mono/metadata/tabledefs.h"
|
||||
#include "mono/interpreter/interp.h"
|
||||
#include "mono/metadata/appdomain.h"
|
||||
|
||||
|
||||
#if 0
|
||||
# define ARM_DUMP_DISASM 1
|
||||
#endif
|
||||
|
||||
/* prototypes for private functions (to avoid compiler warnings) */
|
||||
void flush_icache (void);
|
||||
void* alloc_code_buff (int num_instr);
|
||||
|
||||
|
||||
|
||||
/*
|
||||
* The resulting function takes the form:
|
||||
* void func (void (*callme)(), void *retval, void *this_obj, stackval *arguments);
|
||||
* NOTE: all args passed in ARM registers (A1-A4),
|
||||
* then copied to R4-R7 (see definitions below).
|
||||
*/
|
||||
|
||||
#define REG_FUNC_ADDR ARMREG_R4
|
||||
#define REG_RETVAL ARMREG_R5
|
||||
#define REG_THIS ARMREG_R6
|
||||
#define REG_ARGP ARMREG_R7
|
||||
|
||||
|
||||
#define ARG_SIZE sizeof(stackval)
|
||||
|
||||
|
||||
|
||||
|
||||
void flush_icache ()
|
||||
{
|
||||
#if defined(_WIN32)
|
||||
FlushInstructionCache(GetCurrentProcess(), NULL, 0);
|
||||
#else
|
||||
# if 0
|
||||
asm ("mov r0, r0");
|
||||
asm ("mov r0, #0");
|
||||
asm ("mcr p15, 0, r0, c7, c7, 0");
|
||||
# else
|
||||
/* TODO: use (movnv pc, rx) method */
|
||||
# endif
|
||||
#endif
|
||||
}
|
||||
|
||||
|
||||
void* alloc_code_buff (int num_instr)
|
||||
{
|
||||
void* code_buff;
|
||||
int code_size = num_instr * sizeof(arminstr_t);
|
||||
|
||||
#if defined(_WIN32) || defined(UNDER_CE)
|
||||
int old_prot = 0;
|
||||
|
||||
code_buff = malloc(code_size);
|
||||
VirtualProtect(code_buff, code_size, PAGE_EXECUTE_READWRITE, &old_prot);
|
||||
#else
|
||||
int page_size = sysconf(_SC_PAGESIZE);
|
||||
int new_code_size;
|
||||
|
||||
new_code_size = code_size + page_size - 1;
|
||||
code_buff = malloc(new_code_size);
|
||||
code_buff = (void *) (((int) code_buff + page_size - 1) & ~(page_size - 1));
|
||||
|
||||
if (mprotect(code_buff, code_size, PROT_READ|PROT_WRITE|PROT_EXEC) != 0) {
|
||||
g_critical (G_GNUC_PRETTY_FUNCTION
|
||||
": mprotect error: %s", g_strerror (errno));
|
||||
}
|
||||
#endif
|
||||
|
||||
return code_buff;
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* Refer to ARM Procedure Call Standard (APCS) for more info.
|
||||
*/
|
||||
MonoPIFunc mono_arch_create_trampoline (MonoMethodSignature *sig, gboolean string_ctor)
|
||||
{
|
||||
MonoType* param;
|
||||
MonoPIFunc code_buff;
|
||||
arminstr_t* p;
|
||||
guint32 code_size, stack_size;
|
||||
guint32 simple_type;
|
||||
int i, hasthis, aregs, regc, stack_offs;
|
||||
int this_loaded;
|
||||
guchar reg_alloc [ARM_NUM_ARG_REGS];
|
||||
|
||||
/* pessimistic estimation for prologue/epilogue size */
|
||||
code_size = 16 + 16;
|
||||
/* push/pop work regs */
|
||||
code_size += 2;
|
||||
/* call */
|
||||
code_size += 2;
|
||||
/* handle retval */
|
||||
code_size += 2;
|
||||
|
||||
stack_size = 0;
|
||||
hasthis = sig->hasthis ? 1 : 0;
|
||||
|
||||
aregs = ARM_NUM_ARG_REGS - hasthis;
|
||||
|
||||
for (i = 0, regc = aregs; i < sig->param_count; ++i) {
|
||||
param = sig->params [i];
|
||||
|
||||
/* keep track of argument sizes */
|
||||
if (i < ARM_NUM_ARG_REGS) reg_alloc [i] = 0;
|
||||
|
||||
if (param->byref) {
|
||||
if (regc > 0) {
|
||||
code_size += 1;
|
||||
reg_alloc [i] = regc;
|
||||
--regc;
|
||||
} else {
|
||||
code_size += 2;
|
||||
stack_size += sizeof(gpointer);
|
||||
}
|
||||
} else {
|
||||
simple_type = param->type;
|
||||
enum_calc_size:
|
||||
switch (simple_type) {
|
||||
case MONO_TYPE_BOOLEAN:
|
||||
case MONO_TYPE_CHAR:
|
||||
case MONO_TYPE_I1:
|
||||
case MONO_TYPE_U1:
|
||||
case MONO_TYPE_I2:
|
||||
case MONO_TYPE_U2:
|
||||
case MONO_TYPE_I4:
|
||||
case MONO_TYPE_U4:
|
||||
case MONO_TYPE_I:
|
||||
case MONO_TYPE_U:
|
||||
case MONO_TYPE_PTR:
|
||||
case MONO_TYPE_R4:
|
||||
case MONO_TYPE_SZARRAY:
|
||||
case MONO_TYPE_CLASS:
|
||||
case MONO_TYPE_OBJECT:
|
||||
case MONO_TYPE_STRING:
|
||||
if (regc > 0) {
|
||||
/* register arg */
|
||||
code_size += 1;
|
||||
reg_alloc [i] = regc;
|
||||
--regc;
|
||||
} else {
|
||||
/* stack arg */
|
||||
code_size += 2;
|
||||
stack_size += 4;
|
||||
}
|
||||
break;
|
||||
case MONO_TYPE_I8:
|
||||
case MONO_TYPE_U8:
|
||||
case MONO_TYPE_R8:
|
||||
/* keep track of argument sizes */
|
||||
if (regc > 1) {
|
||||
/* fits into registers, two LDRs */
|
||||
code_size += 2;
|
||||
reg_alloc [i] = regc;
|
||||
regc -= 2;
|
||||
} else if (regc > 0) {
|
||||
/* first half fits into register, one LDR */
|
||||
code_size += 1;
|
||||
reg_alloc [i] = regc;
|
||||
--regc;
|
||||
/* the rest on the stack, LDR/STR */
|
||||
code_size += 2;
|
||||
stack_size += 4;
|
||||
} else {
|
||||
/* stack arg, 4 instrs - 2x(LDR/STR) */
|
||||
code_size += 4;
|
||||
stack_size += 2 * 4;
|
||||
}
|
||||
break;
|
||||
case MONO_TYPE_VALUETYPE:
|
||||
if (param->data.klass->enumtype) {
|
||||
simple_type = param->data.klass->enum_basetype->type;
|
||||
goto enum_calc_size;
|
||||
}
|
||||
|
||||
if (mono_class_value_size(param->data.klass, NULL) != 4) {
|
||||
g_error("can only marshal enums, not generic structures (size: %d)", mono_class_value_size(param->data.klass, NULL));
|
||||
}
|
||||
if (regc > 0) {
|
||||
/* register arg */
|
||||
code_size += 1;
|
||||
reg_alloc [i] = regc;
|
||||
--regc;
|
||||
} else {
|
||||
/* stack arg */
|
||||
code_size += 2;
|
||||
stack_size += 4;
|
||||
}
|
||||
break;
|
||||
default :
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
code_buff = (MonoPIFunc)alloc_code_buff(code_size);
|
||||
p = (arminstr_t*)code_buff;
|
||||
|
||||
/* prologue */
|
||||
p = arm_emit_lean_prologue(p, stack_size,
|
||||
/* save workset (r4-r7) */
|
||||
(1 << ARMREG_R4) | (1 << ARMREG_R5) | (1 << ARMREG_R6) | (1 << ARMREG_R7));
|
||||
|
||||
|
||||
/* copy args into workset */
|
||||
/* callme - always present */
|
||||
ARM_MOV_REG_REG(p, ARMREG_R4, ARMREG_A1);
|
||||
/* retval */
|
||||
if (sig->ret->byref || string_ctor || (sig->ret->type != MONO_TYPE_VOID)) {
|
||||
ARM_MOV_REG_REG(p, ARMREG_R5, ARMREG_A2);
|
||||
}
|
||||
/* this_obj */
|
||||
if (sig->hasthis) {
|
||||
this_loaded = 0;
|
||||
if (stack_size == 0) {
|
||||
ARM_MOV_REG_REG(p, ARMREG_A1, ARMREG_A3);
|
||||
this_loaded = 1;
|
||||
} else {
|
||||
ARM_MOV_REG_REG(p, ARMREG_R6, ARMREG_A3);
|
||||
}
|
||||
}
|
||||
/* args */
|
||||
if (sig->param_count != 0) {
|
||||
ARM_MOV_REG_REG(p, ARMREG_R7, ARMREG_A4);
|
||||
}
|
||||
|
||||
stack_offs = stack_size;
|
||||
|
||||
/* handle arguments */
|
||||
/* in reverse order so we could use r0 (arg1) for memory transfers */
|
||||
for (i = sig->param_count; --i >= 0;) {
|
||||
param = sig->params [i];
|
||||
if (param->byref) {
|
||||
if (i < aregs && reg_alloc[i] > 0) {
|
||||
ARM_LDR_IMM(p, ARMREG_A1 + i, REG_ARGP, i*ARG_SIZE);
|
||||
} else {
|
||||
stack_offs -= sizeof(armword_t);
|
||||
ARM_LDR_IMM(p, ARMREG_R0, REG_ARGP, i*ARG_SIZE);
|
||||
ARM_STR_IMM(p, ARMREG_R0, ARMREG_SP, stack_offs);
|
||||
}
|
||||
} else {
|
||||
simple_type = param->type;
|
||||
enum_marshal:
|
||||
switch (simple_type) {
|
||||
case MONO_TYPE_BOOLEAN:
|
||||
case MONO_TYPE_CHAR:
|
||||
case MONO_TYPE_I1:
|
||||
case MONO_TYPE_U1:
|
||||
case MONO_TYPE_I2:
|
||||
case MONO_TYPE_U2:
|
||||
case MONO_TYPE_I4:
|
||||
case MONO_TYPE_U4:
|
||||
case MONO_TYPE_I:
|
||||
case MONO_TYPE_U:
|
||||
case MONO_TYPE_PTR:
|
||||
case MONO_TYPE_R4:
|
||||
case MONO_TYPE_SZARRAY:
|
||||
case MONO_TYPE_CLASS:
|
||||
case MONO_TYPE_OBJECT:
|
||||
case MONO_TYPE_STRING:
|
||||
if (i < aregs && reg_alloc [i] > 0) {
|
||||
/* pass in register */
|
||||
ARM_LDR_IMM(p, ARMREG_A1 + hasthis + (aregs - reg_alloc [i]), REG_ARGP, i*ARG_SIZE);
|
||||
} else {
|
||||
stack_offs -= sizeof(armword_t);
|
||||
ARM_LDR_IMM(p, ARMREG_R0, REG_ARGP, i*ARG_SIZE);
|
||||
ARM_STR_IMM(p, ARMREG_R0, ARMREG_SP, stack_offs);
|
||||
}
|
||||
break;
|
||||
case MONO_TYPE_I8:
|
||||
case MONO_TYPE_U8:
|
||||
case MONO_TYPE_R8:
|
||||
if (i < aregs && reg_alloc [i] > 0) {
|
||||
if (reg_alloc [i] > 1) {
|
||||
/* pass in registers */
|
||||
ARM_LDR_IMM(p, ARMREG_A1 + hasthis + (aregs - reg_alloc [i]), REG_ARGP, i*ARG_SIZE);
|
||||
ARM_LDR_IMM(p, ARMREG_A1 + hasthis + (aregs - reg_alloc [i]) + 1, REG_ARGP, i*ARG_SIZE + 4);
|
||||
} else {
|
||||
stack_offs -= sizeof(armword_t);
|
||||
ARM_LDR_IMM(p, ARMREG_R0, REG_ARGP, i*ARG_SIZE + 4);
|
||||
ARM_STR_IMM(p, ARMREG_R0, ARMREG_SP, stack_offs);
|
||||
ARM_LDR_IMM(p, ARMREG_A1 + hasthis + (aregs - reg_alloc [i]), REG_ARGP, i*ARG_SIZE);
|
||||
}
|
||||
} else {
|
||||
/* two words transferred on the stack */
|
||||
stack_offs -= 2*sizeof(armword_t);
|
||||
ARM_LDR_IMM(p, ARMREG_R0, REG_ARGP, i*ARG_SIZE);
|
||||
ARM_STR_IMM(p, ARMREG_R0, ARMREG_SP, stack_offs);
|
||||
ARM_LDR_IMM(p, ARMREG_R0, REG_ARGP, i*ARG_SIZE + 4);
|
||||
ARM_STR_IMM(p, ARMREG_R0, ARMREG_SP, stack_offs + 4);
|
||||
}
|
||||
break;
|
||||
case MONO_TYPE_VALUETYPE:
|
||||
if (param->data.klass->enumtype) {
|
||||
/* it's an enum value, proceed based on its base type */
|
||||
simple_type = param->data.klass->enum_basetype->type;
|
||||
goto enum_marshal;
|
||||
} else {
|
||||
if (i < aregs && reg_alloc[i] > 0) {
|
||||
int vtreg = ARMREG_A1 + hasthis +
|
||||
hasthis + (aregs - reg_alloc[i]);
|
||||
ARM_LDR_IMM(p, vtreg, REG_ARGP, i * ARG_SIZE);
|
||||
ARM_LDR_IMM(p, vtreg, vtreg, 0);
|
||||
} else {
|
||||
stack_offs -= sizeof(armword_t);
|
||||
ARM_LDR_IMM(p, ARMREG_R0, REG_ARGP, i * ARG_SIZE);
|
||||
ARM_LDR_IMM(p, ARMREG_R0, ARMREG_R0, 0);
|
||||
ARM_STR_IMM(p, ARMREG_R0, ARMREG_SP, stack_offs);
|
||||
}
|
||||
}
|
||||
break;
|
||||
|
||||
default:
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (sig->hasthis && !this_loaded) {
|
||||
/* [this] always passed in A1, regardless of sig->call_convention */
|
||||
ARM_MOV_REG_REG(p, ARMREG_A1, REG_THIS);
|
||||
}
|
||||
|
||||
/* call [func] */
|
||||
ARM_MOV_REG_REG(p, ARMREG_LR, ARMREG_PC);
|
||||
ARM_MOV_REG_REG(p, ARMREG_PC, REG_FUNC_ADDR);
|
||||
|
||||
/* handle retval */
|
||||
if (sig->ret->byref || string_ctor) {
|
||||
ARM_STR_IMM(p, ARMREG_R0, REG_RETVAL, 0);
|
||||
} else {
|
||||
simple_type = sig->ret->type;
|
||||
enum_retvalue:
|
||||
switch (simple_type) {
|
||||
case MONO_TYPE_BOOLEAN:
|
||||
case MONO_TYPE_I1:
|
||||
case MONO_TYPE_U1:
|
||||
ARM_STRB_IMM(p, ARMREG_R0, REG_RETVAL, 0);
|
||||
break;
|
||||
case MONO_TYPE_CHAR:
|
||||
case MONO_TYPE_I2:
|
||||
case MONO_TYPE_U2:
|
||||
ARM_STRH_IMM(p, ARMREG_R0, REG_RETVAL, 0);
|
||||
break;
|
||||
/*
|
||||
* A 32-bit integer and integer-equivalent return value
|
||||
* is returned in R0.
|
||||
* Single-precision floating-point values are returned in R0.
|
||||
*/
|
||||
case MONO_TYPE_I:
|
||||
case MONO_TYPE_U:
|
||||
case MONO_TYPE_I4:
|
||||
case MONO_TYPE_U4:
|
||||
case MONO_TYPE_R4:
|
||||
case MONO_TYPE_OBJECT:
|
||||
case MONO_TYPE_CLASS:
|
||||
case MONO_TYPE_ARRAY:
|
||||
case MONO_TYPE_SZARRAY:
|
||||
case MONO_TYPE_STRING:
|
||||
ARM_STR_IMM(p, ARMREG_R0, REG_RETVAL, 0);
|
||||
break;
|
||||
/*
|
||||
* A 64-bit integer is returned in R0 and R1.
|
||||
* Double-precision floating-point values are returned in R0 and R1.
|
||||
*/
|
||||
case MONO_TYPE_I8:
|
||||
case MONO_TYPE_U8:
|
||||
case MONO_TYPE_R8:
|
||||
ARM_STR_IMM(p, ARMREG_R0, REG_RETVAL, 0);
|
||||
ARM_STR_IMM(p, ARMREG_R1, REG_RETVAL, 4);
|
||||
break;
|
||||
case MONO_TYPE_VALUETYPE:
|
||||
if (sig->ret->data.klass->enumtype) {
|
||||
simple_type = sig->ret->data.klass->enum_basetype->type;
|
||||
goto enum_retvalue;
|
||||
}
|
||||
break;
|
||||
case MONO_TYPE_VOID:
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
p = arm_emit_std_epilogue(p, stack_size,
|
||||
/* restore R4-R7 */
|
||||
(1 << ARMREG_R4) | (1 << ARMREG_R5) | (1 << ARMREG_R6) | (1 << ARMREG_R7));
|
||||
|
||||
flush_icache();
|
||||
|
||||
#ifdef ARM_DUMP_DISASM
|
||||
_armdis_decode((arminstr_t*)code_buff, ((guint8*)p) - ((guint8*)code_buff));
|
||||
#endif
|
||||
|
||||
return code_buff;
|
||||
}
|
||||
|
||||
|
||||
|
||||
#define MINV_OFFS(member) G_STRUCT_OFFSET(MonoInvocation, member)
|
||||
|
||||
|
||||
|
||||
/*
|
||||
* Returns a pointer to a native function that can be used to
|
||||
* call the specified method.
|
||||
* The function created will receive the arguments according
|
||||
* to the call convention specified in the method.
|
||||
* This function works by creating a MonoInvocation structure,
|
||||
* filling the fields in and calling ves_exec_method on it.
|
||||
* Still need to figure out how to handle the exception stuff
|
||||
* across the managed/unmanaged boundary.
|
||||
*/
|
||||
void* mono_arch_create_method_pointer (MonoMethod* method)
|
||||
{
|
||||
MonoMethodSignature* sig;
|
||||
guchar* p, * p_method, * p_stackval_from_data, * p_exec;
|
||||
void* code_buff;
|
||||
int i, stack_size, arg_pos, arg_add, stackval_pos, offs;
|
||||
int areg, reg_args, shift, pos;
|
||||
MonoJitInfo *ji;
|
||||
|
||||
code_buff = alloc_code_buff(128);
|
||||
p = (guchar*)code_buff;
|
||||
|
||||
sig = method->signature;
|
||||
|
||||
ARM_B(p, 3);
|
||||
|
||||
/* embed magic number followed by method pointer */
|
||||
*p++ = 'M';
|
||||
*p++ = 'o';
|
||||
*p++ = 'n';
|
||||
*p++ = 'o';
|
||||
/* method ptr */
|
||||
*(void**)p = method;
|
||||
p_method = p;
|
||||
p += 4;
|
||||
|
||||
/* call table */
|
||||
*(void**)p = stackval_from_data;
|
||||
p_stackval_from_data = p;
|
||||
p += 4;
|
||||
*(void**)p = ves_exec_method;
|
||||
p_exec = p;
|
||||
p += 4;
|
||||
|
||||
stack_size = sizeof(MonoInvocation) + ARG_SIZE*(sig->param_count + 1) + ARM_NUM_ARG_REGS*2*sizeof(armword_t);
|
||||
|
||||
/* prologue */
|
||||
p = (guchar*)arm_emit_lean_prologue((arminstr_t*)p, stack_size,
|
||||
(1 << ARMREG_R4) |
|
||||
(1 << ARMREG_R5) |
|
||||
(1 << ARMREG_R6) |
|
||||
(1 << ARMREG_R7));
|
||||
|
||||
/* R7 - ptr to stack args */
|
||||
ARM_MOV_REG_REG(p, ARMREG_R7, ARMREG_IP);
|
||||
|
||||
/*
|
||||
* Initialize MonoInvocation fields, first the ones known now.
|
||||
*/
|
||||
ARM_MOV_REG_IMM8(p, ARMREG_R4, 0);
|
||||
ARM_STR_IMM(p, ARMREG_R4, ARMREG_SP, MINV_OFFS(ex));
|
||||
ARM_STR_IMM(p, ARMREG_R4, ARMREG_SP, MINV_OFFS(ex_handler));
|
||||
ARM_STR_IMM(p, ARMREG_R4, ARMREG_SP, MINV_OFFS(parent));
|
||||
|
||||
/* Set the method pointer. */
|
||||
ARM_LDR_IMM(p, ARMREG_R4, ARMREG_PC, -(int)(p - p_method + sizeof(arminstr_t)*2));
|
||||
ARM_STR_IMM(p, ARMREG_R4, ARMREG_SP, MINV_OFFS(method));
|
||||
|
||||
if (sig->hasthis) {
|
||||
/* [this] in A1 */
|
||||
ARM_STR_IMM(p, ARMREG_A1, ARMREG_SP, MINV_OFFS(obj));
|
||||
} else {
|
||||
/* else set minv.obj to NULL */
|
||||
ARM_STR_IMM(p, ARMREG_R4, ARMREG_SP, MINV_OFFS(obj));
|
||||
}
|
||||
|
||||
/* copy args from registers to stack */
|
||||
areg = ARMREG_A1 + sig->hasthis;
|
||||
arg_pos = -(int)(ARM_NUM_ARG_REGS - sig->hasthis) * 2 * sizeof(armword_t);
|
||||
arg_add = 0;
|
||||
for (i = 0; i < sig->param_count; ++i) {
|
||||
if (areg >= ARM_NUM_ARG_REGS) break;
|
||||
ARM_STR_IMM(p, areg, ARMREG_R7, arg_pos);
|
||||
++areg;
|
||||
if (!sig->params[i]->byref) {
|
||||
switch (sig->params[i]->type) {
|
||||
case MONO_TYPE_I8:
|
||||
case MONO_TYPE_U8:
|
||||
case MONO_TYPE_R8:
|
||||
if (areg >= ARM_NUM_ARG_REGS) {
|
||||
/* load second half of 64-bit arg */
|
||||
ARM_LDR_IMM(p, ARMREG_R4, ARMREG_R7, 0);
|
||||
ARM_STR_IMM(p, ARMREG_R4, ARMREG_R7, arg_pos + sizeof(armword_t));
|
||||
arg_add = sizeof(armword_t);
|
||||
} else {
|
||||
/* second half is already the register */
|
||||
ARM_STR_IMM(p, areg, ARMREG_R7, arg_pos + sizeof(armword_t));
|
||||
++areg;
|
||||
}
|
||||
break;
|
||||
case MONO_TYPE_VALUETYPE:
|
||||
/* assert */
|
||||
default:
|
||||
break;
|
||||
}
|
||||
}
|
||||
arg_pos += 2 * sizeof(armword_t);
|
||||
}
|
||||
/* number of args passed in registers */
|
||||
reg_args = i;
|
||||
|
||||
|
||||
|
||||
/*
|
||||
* Calc and save stack args ptr,
|
||||
* args follow MonoInvocation struct on the stack.
|
||||
*/
|
||||
ARM_ADD_REG_IMM8(p, ARMREG_R1, ARMREG_SP, sizeof(MonoInvocation));
|
||||
ARM_STR_IMM(p, ARMREG_R1, ARMREG_SP, MINV_OFFS(stack_args));
|
||||
|
||||
/* convert method args to stackvals */
|
||||
arg_pos = -(int)(ARM_NUM_ARG_REGS - sig->hasthis) * 2 * sizeof(armword_t);
|
||||
stackval_pos = sizeof(MonoInvocation);
|
||||
for (i = 0; i < sig->param_count; ++i) {
|
||||
if (i < reg_args) {
|
||||
ARM_SUB_REG_IMM8(p, ARMREG_A3, ARMREG_R7, -arg_pos);
|
||||
arg_pos += 2 * sizeof(armword_t);
|
||||
} else {
|
||||
if (arg_pos < 0) arg_pos = 0;
|
||||
pos = arg_pos + arg_add;
|
||||
if (pos <= 0xFF) {
|
||||
ARM_ADD_REG_IMM8(p, ARMREG_A3, ARMREG_R7, pos);
|
||||
} else {
|
||||
if (is_arm_const((armword_t)pos)) {
|
||||
shift = calc_arm_mov_const_shift((armword_t)pos);
|
||||
ARM_ADD_REG_IMM(p, ARMREG_A3, ARMREG_R7, pos >> ((32 - shift) & 31), shift >> 1);
|
||||
} else {
|
||||
p = (guchar*)arm_mov_reg_imm32((arminstr_t*)p, ARMREG_R6, (armword_t)pos);
|
||||
ARM_ADD_REG_REG(p, ARMREG_A2, ARMREG_R7, ARMREG_R6);
|
||||
}
|
||||
}
|
||||
arg_pos += sizeof(armword_t);
|
||||
if (!sig->params[i]->byref) {
|
||||
switch (sig->params[i]->type) {
|
||||
case MONO_TYPE_I8:
|
||||
case MONO_TYPE_U8:
|
||||
case MONO_TYPE_R8:
|
||||
arg_pos += sizeof(armword_t);
|
||||
break;
|
||||
case MONO_TYPE_VALUETYPE:
|
||||
/* assert */
|
||||
default:
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* A2 = result */
|
||||
if (stackval_pos <= 0xFF) {
|
||||
ARM_ADD_REG_IMM8(p, ARMREG_A2, ARMREG_SP, stackval_pos);
|
||||
} else {
|
||||
if (is_arm_const((armword_t)stackval_pos)) {
|
||||
shift = calc_arm_mov_const_shift((armword_t)stackval_pos);
|
||||
ARM_ADD_REG_IMM(p, ARMREG_A2, ARMREG_SP, stackval_pos >> ((32 - shift) & 31), shift >> 1);
|
||||
} else {
|
||||
p = (guchar*)arm_mov_reg_imm32((arminstr_t*)p, ARMREG_R6, (armword_t)stackval_pos);
|
||||
ARM_ADD_REG_REG(p, ARMREG_A2, ARMREG_SP, ARMREG_R6);
|
||||
}
|
||||
}
|
||||
|
||||
/* A1 = type */
|
||||
p = (guchar*)arm_mov_reg_imm32((arminstr_t*)p, ARMREG_A1, (armword_t)sig->params [i]);
|
||||
|
||||
stackval_pos += ARG_SIZE;
|
||||
|
||||
offs = -(p + 2*sizeof(arminstr_t) - p_stackval_from_data);
|
||||
/* load function address */
|
||||
ARM_LDR_IMM(p, ARMREG_R4, ARMREG_PC, offs);
|
||||
/* call stackval_from_data */
|
||||
ARM_MOV_REG_REG(p, ARMREG_LR, ARMREG_PC);
|
||||
ARM_MOV_REG_REG(p, ARMREG_PC, ARMREG_R4);
|
||||
}
|
||||
|
||||
/* store retval ptr */
|
||||
p = (guchar*)arm_mov_reg_imm32((arminstr_t*)p, ARMREG_R5, (armword_t)stackval_pos);
|
||||
ARM_ADD_REG_REG(p, ARMREG_R5, ARMREG_SP, ARMREG_R4);
|
||||
ARM_STR_IMM(p, ARMREG_R5, ARMREG_SP, MINV_OFFS(retval));
|
||||
|
||||
/*
|
||||
* Call the method.
|
||||
*/
|
||||
/* A1 = MonoInvocation ptr */
|
||||
ARM_MOV_REG_REG(p, ARMREG_A1, ARMREG_SP);
|
||||
offs = -(p + 2*sizeof(arminstr_t) - p_exec);
|
||||
/* load function address */
|
||||
ARM_LDR_IMM(p, ARMREG_R4, ARMREG_PC, offs);
|
||||
/* call ves_exec */
|
||||
ARM_MOV_REG_REG(p, ARMREG_LR, ARMREG_PC);
|
||||
ARM_MOV_REG_REG(p, ARMREG_PC, ARMREG_R4);
|
||||
|
||||
|
||||
/*
|
||||
* Move retval into reg.
|
||||
*/
|
||||
if (sig->ret->byref) {
|
||||
ARM_LDR_IMM(p, ARMREG_R0, ARMREG_R5, 0);
|
||||
} else {
|
||||
switch (sig->ret->type) {
|
||||
case MONO_TYPE_BOOLEAN:
|
||||
case MONO_TYPE_I1:
|
||||
case MONO_TYPE_U1:
|
||||
ARM_LDRB_IMM(p, ARMREG_R0, ARMREG_R5, 0);
|
||||
break;
|
||||
case MONO_TYPE_CHAR:
|
||||
case MONO_TYPE_I2:
|
||||
case MONO_TYPE_U2:
|
||||
ARM_LDRH_IMM(p, ARMREG_R0, ARMREG_R5, 0);
|
||||
break;
|
||||
case MONO_TYPE_I:
|
||||
case MONO_TYPE_U:
|
||||
case MONO_TYPE_I4:
|
||||
case MONO_TYPE_U4:
|
||||
case MONO_TYPE_R4:
|
||||
case MONO_TYPE_OBJECT:
|
||||
case MONO_TYPE_CLASS:
|
||||
case MONO_TYPE_ARRAY:
|
||||
case MONO_TYPE_SZARRAY:
|
||||
ARM_LDR_IMM(p, ARMREG_R0, ARMREG_R5, 0);
|
||||
break;
|
||||
case MONO_TYPE_I8:
|
||||
case MONO_TYPE_U8:
|
||||
case MONO_TYPE_R8:
|
||||
ARM_LDR_IMM(p, ARMREG_R0, ARMREG_R5, 0);
|
||||
ARM_LDR_IMM(p, ARMREG_R1, ARMREG_R5, 4);
|
||||
break;
|
||||
case MONO_TYPE_VOID:
|
||||
default:
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
p = (guchar*)arm_emit_std_epilogue((arminstr_t*)p, stack_size,
|
||||
(1 << ARMREG_R4) |
|
||||
(1 << ARMREG_R5) |
|
||||
(1 << ARMREG_R6) |
|
||||
(1 << ARMREG_R7));
|
||||
|
||||
flush_icache();
|
||||
|
||||
#ifdef ARM_DUMP_DISASM
|
||||
_armdis_decode((arminstr_t*)code_buff, ((guint8*)p) - ((guint8*)code_buff));
|
||||
#endif
|
||||
|
||||
ji = g_new0(MonoJitInfo, 1);
|
||||
ji->method = method;
|
||||
ji->code_size = ((guint8 *) p) - ((guint8 *) code_buff);
|
||||
ji->code_start = (gpointer) code_buff;
|
||||
|
||||
mono_jit_info_table_add(mono_get_root_domain (), ji);
|
||||
|
||||
return code_buff;
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* mono_create_method_pointer () will insert a pointer to the MonoMethod
|
||||
* so that the interp can easily get at the data: this function will retrieve
|
||||
* the method from the code stream.
|
||||
*/
|
||||
MonoMethod* mono_method_pointer_get (void* code)
|
||||
{
|
||||
unsigned char* c = code;
|
||||
/* check out magic number that follows unconditional branch */
|
||||
if (c[4] == 'M' &&
|
||||
c[5] == 'o' &&
|
||||
c[6] == 'n' &&
|
||||
c[7] == 'o') return ((MonoMethod**)code)[2];
|
||||
return NULL;
|
||||
}
|
||||
#endif
|
@ -0,0 +1,15 @@
|
||||
/* -- <Op> -- */
|
||||
|
||||
|
||||
/* Fd := Fn <Op> Fm */
|
||||
#define ARM_VFP_<Op>D_COND(p, rd, rn, rm, cond) \
|
||||
ARM_EMIT((p), ARM_DEF_VFP_DYADIC(cond,ARM_VFP_COPROC_DOUBLE,ARM_VFP_<Op>,rd,rn,rm))
|
||||
#define ARM_VFP_<Op>D(p, rd, rn, rm) \
|
||||
ARM_VFP_<Op>D_COND(p, rd, rn, rm, ARMCOND_AL)
|
||||
|
||||
#define ARM_VFP_<Op>S_COND(p, rd, rn, rm, cond) \
|
||||
ARM_EMIT((p), ARM_DEF_VFP_DYADIC(cond,ARM_VFP_COPROC_SINGLE,ARM_VFP_<Op>,rd,rn,rm))
|
||||
#define ARM_VFP_<Op>S(p, rd, rn, rm) \
|
||||
ARM_VFP_<Op>S_COND(p, rd, rn, rm, ARMCOND_AL)
|
||||
|
||||
|
@ -0,0 +1,14 @@
|
||||
/* -- <Op> -- */
|
||||
|
||||
|
||||
/* Fd := <Op> Fm */
|
||||
|
||||
#define ARM_<Op>D_COND(p,dreg,sreg,cond) \
|
||||
ARM_EMIT((p), ARM_DEF_VFP_MONADIC((cond),ARM_VFP_COPROC_DOUBLE,ARM_VFP_<Op>,(dreg),(sreg)))
|
||||
#define ARM_<Op>D(p,dreg,sreg) ARM_<Op>D_COND(p,dreg,sreg,ARMCOND_AL)
|
||||
|
||||
#define ARM_<Op>S_COND(p,dreg,sreg,cond) \
|
||||
ARM_EMIT((p), ARM_DEF_VFP_MONADIC((cond),ARM_VFP_COPROC_SINGLE,ARM_VFP_<Op>,(dreg),(sreg)))
|
||||
#define ARM_<Op>S(p,dreg,sreg) ARM_<Op>S_COND(p,dreg,sreg,ARMCOND_AL)
|
||||
|
||||
|
@ -0,0 +1,24 @@
|
||||
#!/bin/sh
|
||||
|
||||
DYADIC="ADD SUB MUL NMUL DIV"
|
||||
MONADIC="CPY ABS NEG SQRT CMP CMPE CMPZ CMPEZ CVT UITO SITO TOUI TOSI TOUIZ TOSIZ"
|
||||
|
||||
# $1: opcode list
|
||||
# $2: template
|
||||
gen() {
|
||||
for i in $1; do
|
||||
sed "s/<Op>/$i/g" $2.th
|
||||
done
|
||||
}
|
||||
|
||||
echo -e "/* Macros for VFP ops, auto-generated from template */\n"
|
||||
|
||||
echo -e "\n/* dyadic */\n"
|
||||
gen "$DYADIC" vfp_macros
|
||||
|
||||
echo -e "\n/* monadic */\n"
|
||||
gen "$MONADIC" vfpm_macros
|
||||
|
||||
echo -e "\n\n"
|
||||
|
||||
echo -e "\n/* end generated */\n"
|
@ -0,0 +1,6 @@
|
||||
/
|
||||
/Makefile
|
||||
/Makefile.in
|
||||
/*.o
|
||||
/*.lo
|
||||
/.deps
|
@ -0,0 +1,3 @@
|
||||
#include "../../../../mono-extensions/mono/arch/arm64/arm64-codegen.h"
|
||||
|
||||
|
@ -0,0 +1,2 @@
|
||||
/Makefile
|
||||
/Makefile.in
|
@ -0,0 +1,3 @@
|
||||
EXTRA_DIST = ia64-codegen.h
|
||||
|
||||
|
@ -0,0 +1,861 @@
|
||||
/*
|
||||
* codegen.c: Tests for the IA64 code generation macros
|
||||
*/
|
||||
|
||||
#include <glib.h>
|
||||
#include <stdio.h>
|
||||
#include <ctype.h>
|
||||
|
||||
#define IA64_SIMPLE_EMIT_BUNDLE
|
||||
|
||||
#include <mono/arch/ia64/ia64-codegen.h>
|
||||
|
||||
void
|
||||
mono_disassemble_code (guint8 *code, int size, char *id)
|
||||
{
|
||||
int i;
|
||||
FILE *ofd;
|
||||
const char *tmp = g_get_tmp_dir ();
|
||||
const char *objdump_args = g_getenv ("MONO_OBJDUMP_ARGS");
|
||||
char *as_file;
|
||||
char *o_file;
|
||||
char *cmd;
|
||||
|
||||
as_file = g_strdup_printf ("%s/test.s", tmp);
|
||||
|
||||
if (!(ofd = fopen (as_file, "w")))
|
||||
g_assert_not_reached ();
|
||||
|
||||
for (i = 0; id [i]; ++i) {
|
||||
if (!isalnum (id [i]))
|
||||
fprintf (ofd, "_");
|
||||
else
|
||||
fprintf (ofd, "%c", id [i]);
|
||||
}
|
||||
fprintf (ofd, ":\n");
|
||||
|
||||
for (i = 0; i < size; ++i)
|
||||
fprintf (ofd, ".byte %d\n", (unsigned int) code [i]);
|
||||
|
||||
fclose (ofd);
|
||||
|
||||
#ifdef __ia64__
|
||||
#define DIS_CMD "objdump -d"
|
||||
#define AS_CMD "as"
|
||||
#else
|
||||
#define DIS_CMD "ia64-linux-gnu-objdump -d"
|
||||
#define AS_CMD "ia64-linux-gnu-as"
|
||||
#endif
|
||||
|
||||
o_file = g_strdup_printf ("%s/test.o", tmp);
|
||||
cmd = g_strdup_printf (AS_CMD " %s -o %s", as_file, o_file);
|
||||
system (cmd);
|
||||
g_free (cmd);
|
||||
if (!objdump_args)
|
||||
objdump_args = "";
|
||||
|
||||
cmd = g_strdup_printf (DIS_CMD " %s %s", objdump_args, o_file);
|
||||
system (cmd);
|
||||
g_free (cmd);
|
||||
|
||||
g_free (o_file);
|
||||
g_free (as_file);
|
||||
}
|
||||
|
||||
int
|
||||
main ()
|
||||
{
|
||||
Ia64CodegenState code;
|
||||
|
||||
guint8 *buf = g_malloc0 (40960);
|
||||
|
||||
ia64_codegen_init (code, buf);
|
||||
|
||||
ia64_add (code, 1, 2, 3);
|
||||
ia64_add1 (code, 1, 2, 3);
|
||||
ia64_sub (code, 1, 2, 3);
|
||||
ia64_sub1 (code, 1, 2, 3);
|
||||
ia64_addp4 (code, 1, 2, 3);
|
||||
ia64_and (code, 1, 2, 3);
|
||||
ia64_andcm (code, 1, 2, 3);
|
||||
ia64_or (code, 1, 2, 3);
|
||||
ia64_xor (code, 1, 2, 3);
|
||||
ia64_shladd (code, 1, 2, 3, 4);
|
||||
ia64_shladdp4 (code, 1, 2, 3, 4);
|
||||
ia64_sub_imm (code, 1, 0x7f, 2);
|
||||
ia64_sub_imm (code, 1, -1, 2);
|
||||
ia64_and_imm (code, 1, -128, 2);
|
||||
ia64_andcm_imm (code, 1, -128, 2);
|
||||
ia64_or_imm (code, 1, -128, 2);
|
||||
ia64_xor_imm (code, 1, -128, 2);
|
||||
ia64_adds_imm (code, 1, 8191, 2);
|
||||
ia64_adds_imm (code, 1, -8192, 2);
|
||||
ia64_adds_imm (code, 1, 1234, 2);
|
||||
ia64_adds_imm (code, 1, -1234, 2);
|
||||
ia64_addp4_imm (code, 1, -1234, 2);
|
||||
ia64_addl_imm (code, 1, 1234, 2);
|
||||
ia64_addl_imm (code, 1, -1234, 2);
|
||||
ia64_addl_imm (code, 1, 2097151, 2);
|
||||
ia64_addl_imm (code, 1, -2097152, 2);
|
||||
|
||||
ia64_cmp_lt (code, 1, 2, 1, 2);
|
||||
ia64_cmp_ltu (code, 1, 2, 1, 2);
|
||||
ia64_cmp_eq (code, 1, 2, 1, 2);
|
||||
ia64_cmp_lt_unc (code, 1, 2, 1, 2);
|
||||
ia64_cmp_ltu_unc (code, 1, 2, 1, 2);
|
||||
ia64_cmp_eq_unc (code, 1, 2, 1, 2);
|
||||
ia64_cmp_eq_and (code, 1, 2, 1, 2);
|
||||
ia64_cmp_eq_or (code, 1, 2, 1, 2);
|
||||
ia64_cmp_eq_or_andcm (code, 1, 2, 1, 2);
|
||||
ia64_cmp_ne_and (code, 1, 2, 1, 2);
|
||||
ia64_cmp_ne_or (code, 1, 2, 1, 2);
|
||||
ia64_cmp_ne_or_andcm (code, 1, 2, 1, 2);
|
||||
|
||||
ia64_cmp4_lt (code, 1, 2, 1, 2);
|
||||
ia64_cmp4_ltu (code, 1, 2, 1, 2);
|
||||
ia64_cmp4_eq (code, 1, 2, 1, 2);
|
||||
ia64_cmp4_lt_unc (code, 1, 2, 1, 2);
|
||||
ia64_cmp4_ltu_unc (code, 1, 2, 1, 2);
|
||||
ia64_cmp4_eq_unc (code, 1, 2, 1, 2);
|
||||
ia64_cmp4_eq_and (code, 1, 2, 1, 2);
|
||||
ia64_cmp4_eq_or (code, 1, 2, 1, 2);
|
||||
ia64_cmp4_eq_or_andcm (code, 1, 2, 1, 2);
|
||||
ia64_cmp4_ne_and (code, 1, 2, 1, 2);
|
||||
ia64_cmp4_ne_or (code, 1, 2, 1, 2);
|
||||
ia64_cmp4_ne_or_andcm (code, 1, 2, 1, 2);
|
||||
|
||||
ia64_cmp_gt_and (code, 1, 2, 0, 2);
|
||||
ia64_cmp_gt_or (code, 1, 2, 0, 2);
|
||||
ia64_cmp_gt_or_andcm (code, 1, 2, 0, 2);
|
||||
ia64_cmp_le_and (code, 1, 2, 0, 2);
|
||||
ia64_cmp_le_or (code, 1, 2, 0, 2);
|
||||
ia64_cmp_le_or_andcm (code, 1, 2, 0, 2);
|
||||
ia64_cmp_ge_and (code, 1, 2, 0, 2);
|
||||
ia64_cmp_ge_or (code, 1, 2, 0, 2);
|
||||
ia64_cmp_ge_or_andcm (code, 1, 2, 0, 2);
|
||||
ia64_cmp_lt_and (code, 1, 2, 0, 2);
|
||||
ia64_cmp_lt_or (code, 1, 2, 0, 2);
|
||||
ia64_cmp_lt_or_andcm (code, 1, 2, 0, 2);
|
||||
|
||||
ia64_cmp4_gt_and (code, 1, 2, 0, 2);
|
||||
ia64_cmp4_gt_or (code, 1, 2, 0, 2);
|
||||
ia64_cmp4_gt_or_andcm (code, 1, 2, 0, 2);
|
||||
ia64_cmp4_le_and (code, 1, 2, 0, 2);
|
||||
ia64_cmp4_le_or (code, 1, 2, 0, 2);
|
||||
ia64_cmp4_le_or_andcm (code, 1, 2, 0, 2);
|
||||
ia64_cmp4_ge_and (code, 1, 2, 0, 2);
|
||||
ia64_cmp4_ge_or (code, 1, 2, 0, 2);
|
||||
ia64_cmp4_ge_or_andcm (code, 1, 2, 0, 2);
|
||||
ia64_cmp4_lt_and (code, 1, 2, 0, 2);
|
||||
ia64_cmp4_lt_or (code, 1, 2, 0, 2);
|
||||
ia64_cmp4_lt_or_andcm (code, 1, 2, 0, 2);
|
||||
|
||||
ia64_cmp_lt_imm (code, 1, 2, 127, 2);
|
||||
ia64_cmp_lt_imm (code, 1, 2, -128, 2);
|
||||
|
||||
ia64_cmp_lt_imm (code, 1, 2, -128, 2);
|
||||
ia64_cmp_ltu_imm (code, 1, 2, -128, 2);
|
||||
ia64_cmp_eq_imm (code, 1, 2, -128, 2);
|
||||
ia64_cmp_lt_unc_imm (code, 1, 2, -128, 2);
|
||||
ia64_cmp_ltu_unc_imm (code, 1, 2, -128, 2);
|
||||
ia64_cmp_eq_unc_imm (code, 1, 2, -128, 2);
|
||||
ia64_cmp_eq_and_imm (code, 1, 2, -128, 2);
|
||||
ia64_cmp_eq_or_imm (code, 1, 2, -128, 2);
|
||||
ia64_cmp_eq_unc_imm (code, 1, 2, -128, 2);
|
||||
ia64_cmp_ne_and_imm (code, 1, 2, -128, 2);
|
||||
ia64_cmp_ne_or_imm (code, 1, 2, -128, 2);
|
||||
ia64_cmp_ne_or_andcm_imm (code, 1, 2, -128, 2);
|
||||
|
||||
ia64_cmp4_lt_imm (code, 1, 2, -128, 2);
|
||||
ia64_cmp4_ltu_imm (code, 1, 2, -128, 2);
|
||||
ia64_cmp4_eq_imm (code, 1, 2, -128, 2);
|
||||
ia64_cmp4_lt_unc_imm (code, 1, 2, -128, 2);
|
||||
ia64_cmp4_ltu_unc_imm (code, 1, 2, -128, 2);
|
||||
ia64_cmp4_eq_unc_imm (code, 1, 2, -128, 2);
|
||||
ia64_cmp4_eq_and_imm (code, 1, 2, -128, 2);
|
||||
ia64_cmp4_eq_or_imm (code, 1, 2, -128, 2);
|
||||
ia64_cmp4_eq_unc_imm (code, 1, 2, -128, 2);
|
||||
ia64_cmp4_ne_and_imm (code, 1, 2, -128, 2);
|
||||
ia64_cmp4_ne_or_imm (code, 1, 2, -128, 2);
|
||||
ia64_cmp4_ne_or_andcm_imm (code, 1, 2, -128, 2);
|
||||
|
||||
ia64_padd1 (code, 1, 2, 3);
|
||||
ia64_padd2 (code, 1, 2, 3);
|
||||
ia64_padd4 (code, 1, 2, 3);
|
||||
ia64_padd1_sss (code, 1, 2, 3);
|
||||
ia64_padd2_sss (code, 1, 2, 3);
|
||||
ia64_padd1_uuu (code, 1, 2, 3);
|
||||
ia64_padd2_uuu (code, 1, 2, 3);
|
||||
ia64_padd1_uus (code, 1, 2, 3);
|
||||
ia64_padd2_uus (code, 1, 2, 3);
|
||||
|
||||
ia64_psub1 (code, 1, 2, 3);
|
||||
ia64_psub2 (code, 1, 2, 3);
|
||||
ia64_psub4 (code, 1, 2, 3);
|
||||
ia64_psub1_sss (code, 1, 2, 3);
|
||||
ia64_psub2_sss (code, 1, 2, 3);
|
||||
ia64_psub1_uuu (code, 1, 2, 3);
|
||||
ia64_psub2_uuu (code, 1, 2, 3);
|
||||
ia64_psub1_uus (code, 1, 2, 3);
|
||||
ia64_psub2_uus (code, 1, 2, 3);
|
||||
|
||||
ia64_pavg1 (code, 1, 2, 3);
|
||||
ia64_pavg2 (code, 1, 2, 3);
|
||||
ia64_pavg1_raz (code, 1, 2, 3);
|
||||
ia64_pavg2_raz (code, 1, 2, 3);
|
||||
ia64_pavgsub1 (code, 1, 2, 3);
|
||||
ia64_pavgsub2 (code, 1, 2, 3);
|
||||
ia64_pcmp1_eq (code, 1, 2, 3);
|
||||
ia64_pcmp2_eq (code, 1, 2, 3);
|
||||
ia64_pcmp4_eq (code, 1, 2, 3);
|
||||
ia64_pcmp1_gt (code, 1, 2, 3);
|
||||
ia64_pcmp2_gt (code, 1, 2, 3);
|
||||
ia64_pcmp4_gt (code, 1, 2, 3);
|
||||
|
||||
ia64_pshladd2 (code, 1, 2, 3, 4);
|
||||
ia64_pshradd2 (code, 1, 2, 3, 4);
|
||||
|
||||
ia64_pmpyshr2 (code, 1, 2, 3, 0);
|
||||
ia64_pmpyshr2_u (code, 1, 2, 3, 0);
|
||||
ia64_pmpyshr2 (code, 1, 2, 3, 7);
|
||||
ia64_pmpyshr2_u (code, 1, 2, 3, 7);
|
||||
ia64_pmpyshr2 (code, 1, 2, 3, 15);
|
||||
ia64_pmpyshr2_u (code, 1, 2, 3, 15);
|
||||
ia64_pmpyshr2 (code, 1, 2, 3, 16);
|
||||
ia64_pmpyshr2_u (code, 1, 2, 3, 16);
|
||||
|
||||
ia64_pmpy2_r (code, 1, 2, 3);
|
||||
ia64_pmpy2_l (code, 1, 2, 3);
|
||||
ia64_mix1_r (code, 1, 2, 3);
|
||||
ia64_mix2_r (code, 1, 2, 3);
|
||||
ia64_mix4_r (code, 1, 2, 3);
|
||||
ia64_mix1_l (code, 1, 2, 3);
|
||||
ia64_mix2_l (code, 1, 2, 3);
|
||||
ia64_mix4_l (code, 1, 2, 3);
|
||||
ia64_pack2_uss (code, 1, 2, 3);
|
||||
ia64_pack2_sss (code, 1, 2, 3);
|
||||
ia64_pack4_sss (code, 1, 2, 3);
|
||||
ia64_unpack1_h (code, 1, 2, 3);
|
||||
ia64_unpack2_h (code, 1, 2, 3);
|
||||
ia64_unpack4_h (code, 1, 2, 3);
|
||||
ia64_unpack1_l (code, 1, 2, 3);
|
||||
ia64_unpack2_l (code, 1, 2, 3);
|
||||
ia64_unpack4_l (code, 1, 2, 3);
|
||||
ia64_pmin1_u (code, 1, 2, 3);
|
||||
ia64_pmax1_u (code, 1, 2, 3);
|
||||
ia64_pmin2 (code, 1, 2, 3);
|
||||
ia64_pmax2 (code, 1, 2, 3);
|
||||
ia64_psad1 (code, 1, 2, 3);
|
||||
|
||||
ia64_mux1 (code, 1, 2, IA64_MUX1_BRCST);
|
||||
ia64_mux1 (code, 1, 2, IA64_MUX1_MIX);
|
||||
ia64_mux1 (code, 1, 2, IA64_MUX1_SHUF);
|
||||
ia64_mux1 (code, 1, 2, IA64_MUX1_ALT);
|
||||
ia64_mux1 (code, 1, 2, IA64_MUX1_REV);
|
||||
|
||||
ia64_mux2 (code, 1, 2, 0x8d);
|
||||
|
||||
ia64_pshr2 (code, 1, 2, 3);
|
||||
ia64_pshr4 (code, 1, 2, 3);
|
||||
ia64_shr (code, 1, 2, 3);
|
||||
ia64_pshr2_u (code, 1, 2, 3);
|
||||
ia64_pshr4_u (code, 1, 2, 3);
|
||||
ia64_shr_u (code, 1, 2, 3);
|
||||
|
||||
ia64_pshr2_imm (code, 1, 2, 20);
|
||||
ia64_pshr4_imm (code, 1, 2, 20);
|
||||
ia64_pshr2_u_imm (code, 1, 2, 20);
|
||||
ia64_pshr4_u_imm (code, 1, 2, 20);
|
||||
|
||||
ia64_pshl2 (code, 1, 2, 3);
|
||||
ia64_pshl4 (code, 1, 2, 3);
|
||||
ia64_shl (code, 1, 2, 3);
|
||||
|
||||
ia64_pshl2_imm (code, 1, 2, 20);
|
||||
ia64_pshl4_imm (code, 1, 2, 20);
|
||||
|
||||
ia64_popcnt (code, 1, 2);
|
||||
|
||||
ia64_shrp (code, 1, 2, 3, 62);
|
||||
|
||||
ia64_extr_u (code, 1, 2, 62, 61);
|
||||
ia64_extr (code, 1, 2, 62, 61);
|
||||
|
||||
ia64_dep_z (code, 1, 2, 62, 61);
|
||||
|
||||
ia64_dep_z_imm (code, 1, 127, 62, 61);
|
||||
ia64_dep_z_imm (code, 1, -128, 62, 61);
|
||||
ia64_dep_imm (code, 1, 0, 2, 62, 61);
|
||||
ia64_dep_imm (code, 1, -1, 2, 62, 61);
|
||||
ia64_dep (code, 1, 2, 3, 10, 15);
|
||||
|
||||
ia64_tbit_z (code, 1, 2, 3, 0);
|
||||
|
||||
ia64_tbit_z (code, 1, 2, 3, 63);
|
||||
ia64_tbit_z_unc (code, 1, 2, 3, 63);
|
||||
ia64_tbit_z_and (code, 1, 2, 3, 63);
|
||||
ia64_tbit_nz_and (code, 1, 2, 3, 63);
|
||||
ia64_tbit_z_or (code, 1, 2, 3, 63);
|
||||
ia64_tbit_nz_or (code, 1, 2, 3, 63);
|
||||
ia64_tbit_z_or_andcm (code, 1, 2, 3, 63);
|
||||
ia64_tbit_nz_or_andcm (code, 1, 2, 3, 63);
|
||||
|
||||
ia64_tnat_z (code, 1, 2, 3);
|
||||
ia64_tnat_z_unc (code, 1, 2, 3);
|
||||
ia64_tnat_z_and (code, 1, 2, 3);
|
||||
ia64_tnat_nz_and (code, 1, 2, 3);
|
||||
ia64_tnat_z_or (code, 1, 2, 3);
|
||||
ia64_tnat_nz_or (code, 1, 2, 3);
|
||||
ia64_tnat_z_or_andcm (code, 1, 2, 3);
|
||||
ia64_tnat_nz_or_andcm (code, 1, 2, 3);
|
||||
|
||||
ia64_nop_i (code, 0x1234);
|
||||
ia64_hint_i (code, 0x1234);
|
||||
|
||||
ia64_break_i (code, 0x1234);
|
||||
|
||||
ia64_chk_s_i (code, 1, 0);
|
||||
ia64_chk_s_i (code, 1, -1);
|
||||
ia64_chk_s_i (code, 1, 1);
|
||||
|
||||
ia64_mov_to_br_hint (code, 1, 1, -1, IA64_MOV_TO_BR_WH_NONE, 0);
|
||||
ia64_mov_to_br_hint (code, 1, 1, -1, IA64_MOV_TO_BR_WH_SPTK, 0);
|
||||
ia64_mov_to_br_hint (code, 1, 1, -1, IA64_MOV_TO_BR_WH_DPTK, 0);
|
||||
ia64_mov_to_br_hint (code, 1, 1, -1, IA64_MOV_TO_BR_WH_DPTK, IA64_BR_IH_IMP);
|
||||
ia64_mov_ret_to_br_hint (code, 1, 1, -1, IA64_MOV_TO_BR_WH_NONE, 0);
|
||||
|
||||
ia64_mov_from_br (code, 1, 1);
|
||||
|
||||
ia64_mov_to_pred (code, 1, 0xfe);
|
||||
|
||||
ia64_mov_to_pred_rot_imm (code, 0xff0000);
|
||||
|
||||
ia64_mov_from_ip (code, 1);
|
||||
ia64_mov_from_pred (code, 1);
|
||||
|
||||
ia64_mov_to_ar_i (code, 1, 1);
|
||||
|
||||
ia64_mov_to_ar_imm_i (code, 1, 127);
|
||||
|
||||
ia64_mov_from_ar_i (code, 1, 1);
|
||||
|
||||
ia64_zxt1 (code, 1, 2);
|
||||
ia64_zxt2 (code, 1, 2);
|
||||
ia64_zxt4 (code, 1, 2);
|
||||
ia64_sxt1 (code, 1, 2);
|
||||
ia64_sxt2 (code, 1, 2);
|
||||
ia64_sxt4 (code, 1, 2);
|
||||
|
||||
ia64_czx1_l (code, 1, 2);
|
||||
ia64_czx2_l (code, 1, 2);
|
||||
ia64_czx1_r (code, 1, 2);
|
||||
ia64_czx2_r (code, 1, 2);
|
||||
|
||||
ia64_ld1_hint (code, 1, 2, IA64_LD_HINT_NONE);
|
||||
ia64_ld1_hint (code, 1, 2, IA64_LD_HINT_NT1);
|
||||
ia64_ld1_hint (code, 1, 2, IA64_LD_HINT_NTA);
|
||||
|
||||
ia64_ld1_hint (code, 1, 2, 0);
|
||||
ia64_ld2_hint (code, 1, 2, 0);
|
||||
ia64_ld4_hint (code, 1, 2, 0);
|
||||
ia64_ld8_hint (code, 1, 2, 0);
|
||||
|
||||
ia64_ld1_s_hint (code, 1, 2, 0);
|
||||
ia64_ld2_s_hint (code, 1, 2, 0);
|
||||
ia64_ld4_s_hint (code, 1, 2, 0);
|
||||
ia64_ld8_s_hint (code, 1, 2, 0);
|
||||
|
||||
ia64_ld1_a_hint (code, 1, 2, 0);
|
||||
ia64_ld2_a_hint (code, 1, 2, 0);
|
||||
ia64_ld4_a_hint (code, 1, 2, 0);
|
||||
ia64_ld8_a_hint (code, 1, 2, 0);
|
||||
|
||||
ia64_ld1_sa_hint (code, 1, 2, 0);
|
||||
ia64_ld2_sa_hint (code, 1, 2, 0);
|
||||
ia64_ld4_sa_hint (code, 1, 2, 0);
|
||||
ia64_ld8_sa_hint (code, 1, 2, 0);
|
||||
|
||||
ia64_ld1_bias_hint (code, 1, 2, 0);
|
||||
ia64_ld2_bias_hint (code, 1, 2, 0);
|
||||
ia64_ld4_bias_hint (code, 1, 2, 0);
|
||||
ia64_ld8_bias_hint (code, 1, 2, 0);
|
||||
|
||||
ia64_ld1_inc_hint (code, 1, 2, 3, IA64_LD_HINT_NONE);
|
||||
|
||||
ia64_ld1_inc_imm_hint (code, 1, 2, 255, IA64_LD_HINT_NONE);
|
||||
ia64_ld1_inc_imm_hint (code, 1, 2, -256, IA64_LD_HINT_NONE);
|
||||
|
||||
ia64_st1_hint (code, 1, 2, IA64_ST_HINT_NTA);
|
||||
|
||||
ia64_st1_hint (code, 1, 2, IA64_ST_HINT_NONE);
|
||||
ia64_st2_hint (code, 1, 2, IA64_ST_HINT_NONE);
|
||||
ia64_st4_hint (code, 1, 2, IA64_ST_HINT_NONE);
|
||||
ia64_st8_hint (code, 1, 2, IA64_ST_HINT_NONE);
|
||||
|
||||
ia64_st1_rel_hint (code, 1, 2, IA64_ST_HINT_NONE);
|
||||
ia64_st2_rel_hint (code, 1, 2, IA64_ST_HINT_NONE);
|
||||
ia64_st4_rel_hint (code, 1, 2, IA64_ST_HINT_NONE);
|
||||
ia64_st8_rel_hint (code, 1, 2, IA64_ST_HINT_NONE);
|
||||
|
||||
ia64_st8_spill_hint (code, 1, 2, IA64_ST_HINT_NONE);
|
||||
|
||||
ia64_st16_hint (code, 1, 2, IA64_ST_HINT_NONE);
|
||||
ia64_st16_rel_hint (code, 1, 2, IA64_ST_HINT_NONE);
|
||||
|
||||
ia64_st1_inc_imm_hint (code, 1, 2, 255, IA64_ST_HINT_NONE);
|
||||
ia64_st2_inc_imm_hint (code, 1, 2, 255, IA64_ST_HINT_NONE);
|
||||
ia64_st4_inc_imm_hint (code, 1, 2, 255, IA64_ST_HINT_NONE);
|
||||
ia64_st8_inc_imm_hint (code, 1, 2, 255, IA64_ST_HINT_NONE);
|
||||
|
||||
ia64_st1_rel_inc_imm_hint (code, 1, 2, 255, IA64_ST_HINT_NONE);
|
||||
ia64_st2_rel_inc_imm_hint (code, 1, 2, 255, IA64_ST_HINT_NONE);
|
||||
ia64_st4_rel_inc_imm_hint (code, 1, 2, 255, IA64_ST_HINT_NONE);
|
||||
ia64_st8_rel_inc_imm_hint (code, 1, 2, 255, IA64_ST_HINT_NONE);
|
||||
|
||||
ia64_st8_spill_inc_imm_hint (code, 1, 2, 255, IA64_ST_HINT_NONE);
|
||||
|
||||
ia64_ldfs_hint (code, 1, 2, 0);
|
||||
ia64_ldfd_hint (code, 1, 2, 0);
|
||||
ia64_ldf8_hint (code, 1, 2, 0);
|
||||
ia64_ldfe_hint (code, 1, 2, 0);
|
||||
|
||||
ia64_ldfs_s_hint (code, 1, 2, 0);
|
||||
ia64_ldfd_s_hint (code, 1, 2, 0);
|
||||
ia64_ldf8_s_hint (code, 1, 2, 0);
|
||||
ia64_ldfe_s_hint (code, 1, 2, 0);
|
||||
|
||||
ia64_ldfs_a_hint (code, 1, 2, 0);
|
||||
ia64_ldfd_a_hint (code, 1, 2, 0);
|
||||
ia64_ldf8_a_hint (code, 1, 2, 0);
|
||||
ia64_ldfe_a_hint (code, 1, 2, 0);
|
||||
|
||||
ia64_ldfs_sa_hint (code, 1, 2, 0);
|
||||
ia64_ldfd_sa_hint (code, 1, 2, 0);
|
||||
ia64_ldf8_sa_hint (code, 1, 2, 0);
|
||||
ia64_ldfe_sa_hint (code, 1, 2, 0);
|
||||
|
||||
ia64_ldfs_c_clr_hint (code, 1, 2, 0);
|
||||
ia64_ldfd_c_clr_hint (code, 1, 2, 0);
|
||||
ia64_ldf8_c_clr_hint (code, 1, 2, 0);
|
||||
ia64_ldfe_c_clr_hint (code, 1, 2, 0);
|
||||
|
||||
ia64_ldfs_c_nc_hint (code, 1, 2, 0);
|
||||
ia64_ldfd_c_nc_hint (code, 1, 2, 0);
|
||||
ia64_ldf8_c_nc_hint (code, 1, 2, 0);
|
||||
ia64_ldfe_c_nc_hint (code, 1, 2, 0);
|
||||
|
||||
ia64_ldf_fill_hint (code, 1, 2, 0);
|
||||
|
||||
ia64_ldfs_inc_hint (code, 1, 2, 3, 0);
|
||||
ia64_ldfd_inc_hint (code, 1, 2, 3, 0);
|
||||
ia64_ldf8_inc_hint (code, 1, 2, 3, 0);
|
||||
ia64_ldfe_inc_hint (code, 1, 2, 3, 0);
|
||||
|
||||
ia64_ldfs_s_inc_hint (code, 1, 2, 3, 0);
|
||||
ia64_ldfd_s_inc_hint (code, 1, 2, 3, 0);
|
||||
ia64_ldf8_s_inc_hint (code, 1, 2, 3, 0);
|
||||
ia64_ldfe_s_inc_hint (code, 1, 2, 3, 0);
|
||||
|
||||
ia64_ldfs_a_inc_hint (code, 1, 2, 3, 0);
|
||||
ia64_ldfd_a_inc_hint (code, 1, 2, 3, 0);
|
||||
ia64_ldf8_a_inc_hint (code, 1, 2, 3, 0);
|
||||
ia64_ldfe_a_inc_hint (code, 1, 2, 3, 0);
|
||||
|
||||
ia64_ldfs_sa_inc_hint (code, 1, 2, 3, 0);
|
||||
ia64_ldfd_sa_inc_hint (code, 1, 2, 3, 0);
|
||||
ia64_ldf8_sa_inc_hint (code, 1, 2, 3, 0);
|
||||
ia64_ldfe_sa_inc_hint (code, 1, 2, 3, 0);
|
||||
|
||||
ia64_ldfs_c_clr_inc_hint (code, 1, 2, 3, 0);
|
||||
ia64_ldfd_c_clr_inc_hint (code, 1, 2, 3, 0);
|
||||
ia64_ldf8_c_clr_inc_hint (code, 1, 2, 3, 0);
|
||||
ia64_ldfe_c_clr_inc_hint (code, 1, 2, 3, 0);
|
||||
|
||||
ia64_ldfs_c_nc_inc_hint (code, 1, 2, 3, 0);
|
||||
ia64_ldfd_c_nc_inc_hint (code, 1, 2, 3, 0);
|
||||
ia64_ldf8_c_nc_inc_hint (code, 1, 2, 3, 0);
|
||||
ia64_ldfe_c_nc_inc_hint (code, 1, 2, 3, 0);
|
||||
|
||||
ia64_ldf_fill_inc_hint (code, 1, 2, 3, 0);
|
||||
|
||||
ia64_ldfs_inc_imm_hint (code, 1, 2, 255, 0);
|
||||
ia64_ldfd_inc_imm_hint (code, 1, 2, 255, 0);
|
||||
ia64_ldf8_inc_imm_hint (code, 1, 2, 255, 0);
|
||||
ia64_ldfe_inc_imm_hint (code, 1, 2, 255, 0);
|
||||
|
||||
ia64_ldfs_s_inc_imm_hint (code, 1, 2, 255, 0);
|
||||
ia64_ldfd_s_inc_imm_hint (code, 1, 2, 255, 0);
|
||||
ia64_ldf8_s_inc_imm_hint (code, 1, 2, 255, 0);
|
||||
ia64_ldfe_s_inc_imm_hint (code, 1, 2, 255, 0);
|
||||
|
||||
ia64_ldfs_a_inc_imm_hint (code, 1, 2, 255, 0);
|
||||
ia64_ldfd_a_inc_imm_hint (code, 1, 2, 255, 0);
|
||||
ia64_ldf8_a_inc_imm_hint (code, 1, 2, 255, 0);
|
||||
ia64_ldfe_a_inc_imm_hint (code, 1, 2, 255, 0);
|
||||
|
||||
ia64_ldfs_sa_inc_imm_hint (code, 1, 2, 255, 0);
|
||||
ia64_ldfd_sa_inc_imm_hint (code, 1, 2, 255, 0);
|
||||
ia64_ldf8_sa_inc_imm_hint (code, 1, 2, 255, 0);
|
||||
ia64_ldfe_sa_inc_imm_hint (code, 1, 2, 255, 0);
|
||||
|
||||
ia64_ldfs_c_clr_inc_imm_hint (code, 1, 2, 255, 0);
|
||||
ia64_ldfd_c_clr_inc_imm_hint (code, 1, 2, 255, 0);
|
||||
ia64_ldf8_c_clr_inc_imm_hint (code, 1, 2, 255, 0);
|
||||
ia64_ldfe_c_clr_inc_imm_hint (code, 1, 2, 255, 0);
|
||||
|
||||
ia64_ldfs_c_nc_inc_imm_hint (code, 1, 2, 255, 0);
|
||||
ia64_ldfd_c_nc_inc_imm_hint (code, 1, 2, 255, 0);
|
||||
ia64_ldf8_c_nc_inc_imm_hint (code, 1, 2, 255, 0);
|
||||
ia64_ldfe_c_nc_inc_imm_hint (code, 1, 2, 255, 0);
|
||||
|
||||
ia64_ldf_fill_inc_imm_hint (code, 1, 2, 255, 0);
|
||||
|
||||
ia64_stfs_hint (code, 1, 2, 0);
|
||||
ia64_stfd_hint (code, 1, 2, 0);
|
||||
ia64_stf8_hint (code, 1, 2, 0);
|
||||
ia64_stfe_hint (code, 1, 2, 0);
|
||||
|
||||
ia64_stf_spill_hint (code, 1, 2, 0);
|
||||
|
||||
ia64_stfs_inc_imm_hint (code, 1, 2, 255, 0);
|
||||
ia64_stfd_inc_imm_hint (code, 1, 2, 255, 0);
|
||||
ia64_stf8_inc_imm_hint (code, 1, 2, 255, 0);
|
||||
ia64_stfe_inc_imm_hint (code, 1, 2, 255, 0);
|
||||
|
||||
ia64_stf_spill_inc_imm_hint (code, 1, 2, 255, 0);
|
||||
|
||||
ia64_ldfps_hint (code, 1, 2, 3, 0);
|
||||
ia64_ldfpd_hint (code, 1, 2, 3, 0);
|
||||
ia64_ldfp8_hint (code, 1, 2, 3, 0);
|
||||
|
||||
ia64_ldfps_s_hint (code, 1, 2, 3, 0);
|
||||
ia64_ldfpd_s_hint (code, 1, 2, 3, 0);
|
||||
ia64_ldfp8_s_hint (code, 1, 2, 3, 0);
|
||||
|
||||
ia64_ldfps_a_hint (code, 1, 2, 3, 0);
|
||||
ia64_ldfpd_a_hint (code, 1, 2, 3, 0);
|
||||
ia64_ldfp8_a_hint (code, 1, 2, 3, 0);
|
||||
|
||||
ia64_ldfps_sa_hint (code, 1, 2, 3, 0);
|
||||
ia64_ldfpd_sa_hint (code, 1, 2, 3, 0);
|
||||
ia64_ldfp8_sa_hint (code, 1, 2, 3, 0);
|
||||
|
||||
ia64_ldfps_c_clr_hint (code, 1, 2, 3, 0);
|
||||
ia64_ldfpd_c_clr_hint (code, 1, 2, 3, 0);
|
||||
ia64_ldfp8_c_clr_hint (code, 1, 2, 3, 0);
|
||||
|
||||
ia64_ldfps_c_nc_hint (code, 1, 2, 3, 0);
|
||||
ia64_ldfpd_c_nc_hint (code, 1, 2, 3, 0);
|
||||
ia64_ldfp8_c_nc_hint (code, 1, 2, 3, 0);
|
||||
|
||||
ia64_ldfps_inc_hint (code, 1, 2, 3, 0);
|
||||
ia64_ldfpd_inc_hint (code, 1, 2, 3, 0);
|
||||
ia64_ldfp8_inc_hint (code, 1, 2, 3, 0);
|
||||
|
||||
ia64_ldfps_s_inc_hint (code, 1, 2, 3, 0);
|
||||
ia64_ldfpd_s_inc_hint (code, 1, 2, 3, 0);
|
||||
ia64_ldfp8_s_inc_hint (code, 1, 2, 3, 0);
|
||||
|
||||
ia64_ldfps_a_inc_hint (code, 1, 2, 3, 0);
|
||||
ia64_ldfpd_a_inc_hint (code, 1, 2, 3, 0);
|
||||
ia64_ldfp8_a_inc_hint (code, 1, 2, 3, 0);
|
||||
|
||||
ia64_ldfps_sa_inc_hint (code, 1, 2, 3, 0);
|
||||
ia64_ldfpd_sa_inc_hint (code, 1, 2, 3, 0);
|
||||
ia64_ldfp8_sa_inc_hint (code, 1, 2, 3, 0);
|
||||
|
||||
ia64_ldfps_c_clr_inc_hint (code, 1, 2, 3, 0);
|
||||
ia64_ldfpd_c_clr_inc_hint (code, 1, 2, 3, 0);
|
||||
ia64_ldfp8_c_clr_inc_hint (code, 1, 2, 3, 0);
|
||||
|
||||
ia64_ldfps_c_nc_inc_hint (code, 1, 2, 3, 0);
|
||||
ia64_ldfpd_c_nc_inc_hint (code, 1, 2, 3, 0);
|
||||
ia64_ldfp8_c_nc_inc_hint (code, 1, 2, 3, 0);
|
||||
|
||||
ia64_lfetch_hint (code, 1, 0);
|
||||
ia64_lfetch_excl_hint (code, 1, 0);
|
||||
ia64_lfetch_fault_hint (code, 1, 0);
|
||||
ia64_lfetch_fault_excl_hint (code, 1, 0);
|
||||
|
||||
ia64_lfetch_hint (code, 1, IA64_LFHINT_NT1);
|
||||
ia64_lfetch_hint (code, 1, IA64_LFHINT_NT2);
|
||||
ia64_lfetch_hint (code, 1, IA64_LFHINT_NTA);
|
||||
|
||||
ia64_lfetch_inc_hint (code, 1, 2, 0);
|
||||
ia64_lfetch_excl_inc_hint (code, 1, 2, 0);
|
||||
ia64_lfetch_fault_inc_hint (code, 1, 2, 0);
|
||||
ia64_lfetch_fault_excl_inc_hint (code, 1, 2, 0);
|
||||
|
||||
ia64_lfetch_inc_imm_hint (code, 1, 255, 0);
|
||||
ia64_lfetch_excl_inc_imm_hint (code, 1, 255, 0);
|
||||
ia64_lfetch_fault_inc_imm_hint (code, 1, 255, 0);
|
||||
ia64_lfetch_fault_excl_inc_imm_hint (code, 1, 255, 0);
|
||||
|
||||
ia64_cmpxchg1_acq_hint (code, 1, 2, 3, 0);
|
||||
ia64_cmpxchg2_acq_hint (code, 1, 2, 3, 0);
|
||||
ia64_cmpxchg4_acq_hint (code, 1, 2, 3, 0);
|
||||
ia64_cmpxchg8_acq_hint (code, 1, 2, 3, 0);
|
||||
ia64_cmpxchg1_rel_hint (code, 1, 2, 3, 0);
|
||||
ia64_cmpxchg2_rel_hint (code, 1, 2, 3, 0);
|
||||
ia64_cmpxchg4_rel_hint (code, 1, 2, 3, 0);
|
||||
ia64_cmpxchg8_rel_hint (code, 1, 2, 3, 0);
|
||||
ia64_cmpxchg16_acq_hint (code, 1, 2, 3, 0);
|
||||
ia64_cmpxchg16_rel_hint (code, 1, 2, 3, 0);
|
||||
ia64_xchg1_hint (code, 1, 2, 3, 0);
|
||||
ia64_xchg2_hint (code, 1, 2, 3, 0);
|
||||
ia64_xchg4_hint (code, 1, 2, 3, 0);
|
||||
ia64_xchg8_hint (code, 1, 2, 3, 0);
|
||||
|
||||
ia64_fetchadd4_acq_hint (code, 1, 2, -16, 0);
|
||||
ia64_fetchadd4_acq_hint (code, 1, 2, -8, 0);
|
||||
ia64_fetchadd4_acq_hint (code, 1, 2, -4, 0);
|
||||
ia64_fetchadd4_acq_hint (code, 1, 2, -1, 0);
|
||||
ia64_fetchadd4_acq_hint (code, 1, 2, 1, 0);
|
||||
ia64_fetchadd4_acq_hint (code, 1, 2, 4, 0);
|
||||
ia64_fetchadd4_acq_hint (code, 1, 2, 8, 0);
|
||||
ia64_fetchadd4_acq_hint (code, 1, 2, 16, 0);
|
||||
|
||||
ia64_fetchadd4_acq_hint (code, 1, 2, 16, 0);
|
||||
ia64_fetchadd8_acq_hint (code, 1, 2, 16, 0);
|
||||
ia64_fetchadd4_rel_hint (code, 1, 2, 16, 0);
|
||||
ia64_fetchadd8_rel_hint (code, 1, 2, 16, 0);
|
||||
|
||||
ia64_setf_sig (code, 1, 2);
|
||||
ia64_setf_exp (code, 1, 2);
|
||||
ia64_setf_s (code, 1, 2);
|
||||
ia64_setf_d (code, 1, 2);
|
||||
|
||||
ia64_getf_sig (code, 1, 2);
|
||||
ia64_getf_exp (code, 1, 2);
|
||||
ia64_getf_s (code, 1, 2);
|
||||
ia64_getf_d (code, 1, 2);
|
||||
|
||||
ia64_chk_s_m (code, 1, 0);
|
||||
ia64_chk_s_m (code, 1, 1);
|
||||
ia64_chk_s_m (code, 1, -1);
|
||||
|
||||
ia64_chk_s_float_m (code, 1, 0);
|
||||
|
||||
ia64_chk_a_nc (code, 1, 0);
|
||||
ia64_chk_a_nc (code, 1, 1);
|
||||
ia64_chk_a_nc (code, 1, -1);
|
||||
|
||||
ia64_chk_a_nc (code, 1, 0);
|
||||
ia64_chk_a_clr (code, 1, 0);
|
||||
|
||||
ia64_chk_a_nc_float (code, 1, 0);
|
||||
ia64_chk_a_clr_float (code, 1, 0);
|
||||
|
||||
ia64_invala (code);
|
||||
ia64_fwb (code);
|
||||
ia64_mf (code);
|
||||
ia64_mf_a (code);
|
||||
ia64_srlz_d (code);
|
||||
ia64_stlz_i (code);
|
||||
ia64_sync_i (code);
|
||||
|
||||
ia64_flushrs (code);
|
||||
ia64_loadrs (code);
|
||||
|
||||
ia64_invala_e (code, 1);
|
||||
ia64_invala_e_float (code, 1);
|
||||
|
||||
ia64_fc (code, 1);
|
||||
ia64_fc_i (code, 1);
|
||||
|
||||
ia64_mov_to_ar_m (code, 1, 1);
|
||||
|
||||
ia64_mov_to_ar_imm_m (code, 1, 127);
|
||||
|
||||
ia64_mov_from_ar_m (code, 1, 1);
|
||||
|
||||
ia64_mov_to_cr (code, 1, 2);
|
||||
|
||||
ia64_mov_from_cr (code, 1, 2);
|
||||
|
||||
ia64_alloc (code, 1, 3, 4, 5, 0);
|
||||
ia64_alloc (code, 1, 3, 4, 5, 8);
|
||||
|
||||
ia64_mov_to_psr_l (code, 1);
|
||||
ia64_mov_to_psr_um (code, 1);
|
||||
|
||||
ia64_mov_from_psr (code, 1);
|
||||
ia64_mov_from_psr_um (code, 1);
|
||||
|
||||
ia64_break_m (code, 0x1234);
|
||||
ia64_nop_m (code, 0x1234);
|
||||
ia64_hint_m (code, 0x1234);
|
||||
|
||||
ia64_br_cond_hint (code, 0, 0, 0, 0);
|
||||
ia64_br_wexit_hint (code, 0, 0, 0, 0);
|
||||
ia64_br_wtop_hint (code, 0, 0, 0, 0);
|
||||
|
||||
ia64_br_cloop_hint (code, 0, 0, 0, 0);
|
||||
ia64_br_cexit_hint (code, 0, 0, 0, 0);
|
||||
ia64_br_ctop_hint (code, 0, 0, 0, 0);
|
||||
|
||||
ia64_br_call_hint (code, 1, 0, 0, 0, 0);
|
||||
|
||||
ia64_br_cond_reg_hint (code, 1, 0, 0, 0);
|
||||
ia64_br_ia_reg_hint (code, 1, 0, 0, 0);
|
||||
ia64_br_ret_reg_hint (code, 1, 0, 0, 0);
|
||||
|
||||
ia64_br_call_reg_hint (code, 1, 2, 0, 0, 0);
|
||||
|
||||
ia64_cover (code);
|
||||
ia64_clrrrb (code);
|
||||
ia64_clrrrb_pr (code);
|
||||
ia64_rfi (code);
|
||||
ia64_bsw_0 (code);
|
||||
ia64_bsw_1 (code);
|
||||
ia64_epc (code);
|
||||
|
||||
ia64_break_b (code, 0x1234);
|
||||
ia64_nop_b (code, 0x1234);
|
||||
ia64_hint_b (code, 0x1234);
|
||||
|
||||
ia64_break_x (code, 0x2123456789ABCDEFULL);
|
||||
|
||||
ia64_movl (code, 1, 0x123456789ABCDEF0LL);
|
||||
|
||||
ia64_brl_cond_hint (code, 0, 0, 0, 0);
|
||||
ia64_brl_cond_hint (code, -1, 0, 0, 0);
|
||||
|
||||
ia64_brl_call_hint (code, 1, 0, 0, 0, 0);
|
||||
ia64_brl_call_hint (code, 1, -1, 0, 0, 0);
|
||||
|
||||
ia64_nop_x (code, 0x2123456789ABCDEFULL);
|
||||
ia64_hint_x (code, 0x2123456789ABCDEFULL);
|
||||
|
||||
ia64_movl_pred (code, 1, 1, 0x123456789ABCDEF0LL);
|
||||
|
||||
/* FLOATING-POINT */
|
||||
ia64_fma_sf_pred (code, 1, 1, 2, 3, 4, 2);
|
||||
ia64_fma_s_sf_pred (code, 1, 1, 2, 3, 4, 2);
|
||||
ia64_fma_d_sf_pred (code, 1, 1, 2, 3, 4, 2);
|
||||
ia64_fpma_sf_pred (code, 1, 1, 2, 3, 4, 2);
|
||||
ia64_fms_sf_pred (code, 1, 1, 2, 3, 4, 2);
|
||||
ia64_fms_s_sf_pred (code, 1, 1, 2, 3, 4, 2);
|
||||
ia64_fms_d_sf_pred (code, 1, 1, 2, 3, 4, 2);
|
||||
ia64_fpms_sf_pred (code, 1, 1, 2, 3, 4, 2);
|
||||
ia64_fnma_sf_pred (code, 1, 1, 2, 3, 4, 2);
|
||||
ia64_fnma_s_sf_pred (code, 1, 1, 2, 3, 4, 2);
|
||||
ia64_fnma_d_sf_pred (code, 1, 1, 2, 3, 4, 2);
|
||||
ia64_fpnma_sf_pred (code, 1, 1, 2, 3, 4, 2);
|
||||
|
||||
ia64_xma_l_pred (code, 1, 1, 2, 3, 4);
|
||||
ia64_xma_h_pred (code, 1, 1, 2, 3, 4);
|
||||
ia64_xma_hu_pred (code, 1, 1, 2, 3, 4);
|
||||
|
||||
ia64_fselect_pred (code, 1, 1, 2, 3, 4);
|
||||
|
||||
ia64_fcmp_eq_sf_pred (code, 1, 1, 2, 3, 4, 0);
|
||||
ia64_fcmp_lt_sf_pred (code, 1, 1, 2, 3, 4, 0);
|
||||
ia64_fcmp_le_sf_pred (code, 1, 1, 2, 3, 4, 0);
|
||||
ia64_fcmp_unord_sf_pred (code, 1, 1, 2, 3, 4, 0);
|
||||
ia64_fcmp_eq_unc_sf_pred (code, 1, 1, 2, 3, 4, 0);
|
||||
ia64_fcmp_lt_unc_sf_pred (code, 1, 1, 2, 3, 4, 0);
|
||||
ia64_fcmp_le_unc_sf_pred (code, 1, 1, 2, 3, 4, 0);
|
||||
ia64_fcmp_unord_unc_sf_pred (code, 1, 1, 2, 3, 4, 0);
|
||||
|
||||
ia64_fclass_m_pred (code, 1, 1, 2, 3, 0x1ff);
|
||||
ia64_fclass_m_unc_pred (code, 1, 1, 2, 3, 0x1ff);
|
||||
|
||||
ia64_frcpa_sf_pred (code, 1, 1, 2, 3, 4, 0);
|
||||
ia64_fprcpa_sf_pred (code, 1, 1, 2, 3, 4, 0);
|
||||
|
||||
ia64_frsqrta_sf_pred (code, 1, 1, 2, 4, 0);
|
||||
ia64_fprsqrta_sf_pred (code, 1, 1, 2, 4, 0);
|
||||
|
||||
ia64_fmin_sf_pred (code, 1, 2, 3, 4, 0);
|
||||
ia64_fman_sf_pred (code, 1, 2, 3, 4, 0);
|
||||
ia64_famin_sf_pred (code, 1, 2, 3, 4, 0);
|
||||
ia64_famax_sf_pred (code, 1, 2, 3, 4, 0);
|
||||
ia64_fpmin_sf_pred (code, 1, 2, 3, 4, 0);
|
||||
ia64_fpman_sf_pred (code, 1, 2, 3, 4, 0);
|
||||
ia64_fpamin_sf_pred (code, 1, 2, 3, 4, 0);
|
||||
ia64_fpamax_sf_pred (code, 1, 2, 3, 4, 0);
|
||||
ia64_fpcmp_eq_sf_pred (code, 1, 2, 3, 4, 0);
|
||||
ia64_fpcmp_lt_sf_pred (code, 1, 2, 3, 4, 0);
|
||||
ia64_fpcmp_le_sf_pred (code, 1, 2, 3, 4, 0);
|
||||
ia64_fpcmp_unord_sf_pred (code, 1, 2, 3, 4, 0);
|
||||
ia64_fpcmp_neq_sf_pred (code, 1, 2, 3, 4, 0);
|
||||
ia64_fpcmp_nlt_sf_pred (code, 1, 2, 3, 4, 0);
|
||||
ia64_fpcmp_nle_sf_pred (code, 1, 2, 3, 4, 0);
|
||||
ia64_fpcmp_ord_sf_pred (code, 1, 2, 3, 4, 0);
|
||||
|
||||
ia64_fmerge_s_pred (code, 1, 2, 3, 4);
|
||||
ia64_fmerge_ns_pred (code, 1, 2, 3, 4);
|
||||
ia64_fmerge_se_pred (code, 1, 2, 3, 4);
|
||||
ia64_fmix_lr_pred (code, 1, 2, 3, 4);
|
||||
ia64_fmix_r_pred (code, 1, 2, 3, 4);
|
||||
ia64_fmix_l_pred (code, 1, 2, 3, 4);
|
||||
ia64_fsxt_r_pred (code, 1, 2, 3, 4);
|
||||
ia64_fsxt_l_pred (code, 1, 2, 3, 4);
|
||||
ia64_fpack_pred (code, 1, 2, 3, 4);
|
||||
ia64_fswap_pred (code, 1, 2, 3, 4);
|
||||
ia64_fswap_nl_pred (code, 1, 2, 3, 4);
|
||||
ia64_fswap_nr_pred (code, 1, 2, 3, 4);
|
||||
ia64_fand_pred (code, 1, 2, 3, 4);
|
||||
ia64_fandcm_pred (code, 1, 2, 3, 4);
|
||||
ia64_for_pred (code, 1, 2, 3, 4);
|
||||
ia64_fxor_pred (code, 1, 2, 3, 4);
|
||||
ia64_fpmerge_s_pred (code, 1, 2, 3, 4);
|
||||
ia64_fpmerge_ns_pred (code, 1, 2, 3, 4);
|
||||
ia64_fpmerge_se_pred (code, 1, 2, 3, 4);
|
||||
|
||||
ia64_fcvt_fx_sf_pred ((code), 1, 2, 3, 0);
|
||||
ia64_fcvt_fxu_sf_pred ((code), 1, 2, 3, 0);
|
||||
ia64_fcvt_fx_trunc_sf_pred ((code), 1, 2, 3, 0);
|
||||
ia64_fcvt_fxu_trunc_sf_pred ((code), 1, 2, 3, 0);
|
||||
ia64_fpcvt_fx_sf_pred ((code), 1, 2, 3, 0);
|
||||
ia64_fpcvt_fxu_sf_pred ((code), 1, 2, 3, 0);
|
||||
ia64_fpcvt_fx_trunc_sf_pred ((code), 1, 2, 3, 0);
|
||||
ia64_fpcvt_fxu_trunc_sf_pred ((code), 1, 2, 3, 0);
|
||||
|
||||
ia64_fcvt_xf_pred ((code), 1, 2, 3);
|
||||
|
||||
ia64_fsetc_sf_pred ((code), 1, 0x33, 0x33, 3);
|
||||
|
||||
ia64_fclrf_sf_pred ((code), 1, 3);
|
||||
|
||||
ia64_fchkf_sf_pred ((code), 1, -1, 3);
|
||||
|
||||
ia64_break_f_pred ((code), 1, 0x1234);
|
||||
|
||||
ia64_movl (code, 31, -123456);
|
||||
|
||||
ia64_codegen_close (code);
|
||||
|
||||
#if 0
|
||||
/* disassembly */
|
||||
{
|
||||
guint8 *buf = code.buf;
|
||||
int template;
|
||||
guint64 dw1, dw2;
|
||||
guint64 ins1, ins2, ins3;
|
||||
|
||||
ia64_break_i (code, 0x1234);
|
||||
|
||||
ia64_codegen_close (code);
|
||||
|
||||
dw1 = ((guint64*)buf) [0];
|
||||
dw2 = ((guint64*)buf) [1];
|
||||
|
||||
template = ia64_bundle_template (buf);
|
||||
ins1 = ia64_bundle_ins1 (buf);
|
||||
ins2 = ia64_bundle_ins2 (buf);
|
||||
ins3 = ia64_bundle_ins3 (buf);
|
||||
|
||||
code.buf = buf;
|
||||
ia64_emit_bundle_template (&code, template, ins1, ins2, ins3);
|
||||
|
||||
g_assert (dw1 == ((guint64*)buf) [0]);
|
||||
g_assert (dw2 == ((guint64*)buf) [1]);
|
||||
}
|
||||
#endif
|
||||
|
||||
mono_disassemble_code (buf, 40960, "code");
|
||||
|
||||
return 0;
|
||||
}
|
File diff suppressed because it is too large
Load Diff
@ -0,0 +1,6 @@
|
||||
/
|
||||
/Makefile
|
||||
/Makefile.in
|
||||
/*.o
|
||||
/*.lo
|
||||
/.deps
|
@ -0,0 +1,8 @@
|
||||
|
||||
AM_CPPFLAGS = $(GLIB_CFLAGS) -I$(top_srcdir)
|
||||
|
||||
noinst_LTLIBRARIES = libmonoarch-mips.la
|
||||
|
||||
libmonoarch_mips_la_SOURCES = mips-codegen.h
|
||||
|
||||
noinst_PROGRAMS = test
|
@ -0,0 +1,435 @@
|
||||
#ifndef __MIPS_CODEGEN_H__
|
||||
#define __MIPS_CODEGEN_H__
|
||||
/*
|
||||
* Copyright (c) 2004 Novell, Inc
|
||||
* Author: Paolo Molaro (lupus@ximian.com)
|
||||
*
|
||||
*/
|
||||
|
||||
/* registers */
|
||||
enum {
|
||||
mips_zero,
|
||||
mips_at, /* assembler temp */
|
||||
mips_v0, /* return values */
|
||||
mips_v1,
|
||||
mips_a0, /* 4 - func arguments */
|
||||
mips_a1,
|
||||
mips_a2,
|
||||
mips_a3,
|
||||
#if _MIPS_SIM == _ABIO32
|
||||
mips_t0, /* 8 temporaries */
|
||||
mips_t1,
|
||||
mips_t2,
|
||||
mips_t3,
|
||||
mips_t4,
|
||||
mips_t5,
|
||||
mips_t6,
|
||||
mips_t7,
|
||||
#elif _MIPS_SIM == _ABIN32
|
||||
mips_a4, /* 4 more argument registers */
|
||||
mips_a5,
|
||||
mips_a6,
|
||||
mips_a7,
|
||||
mips_t0, /* 4 temporaries */
|
||||
mips_t1,
|
||||
mips_t2,
|
||||
mips_t3,
|
||||
#endif
|
||||
mips_s0, /* 16 calle saved */
|
||||
mips_s1,
|
||||
mips_s2,
|
||||
mips_s3,
|
||||
mips_s4,
|
||||
mips_s5,
|
||||
mips_s6,
|
||||
mips_s7,
|
||||
mips_t8, /* 24 temps */
|
||||
mips_t9, /* 25 temp / pic call-through register */
|
||||
mips_k0, /* 26 kernel-reserved */
|
||||
mips_k1,
|
||||
mips_gp, /* 28 */
|
||||
mips_sp, /* stack pointer */
|
||||
mips_fp, /* frame pointer */
|
||||
mips_ra /* return address */
|
||||
};
|
||||
|
||||
/* we treat the register file as containing just doubles... */
|
||||
enum {
|
||||
mips_f0, /* return regs */
|
||||
mips_f1,
|
||||
mips_f2,
|
||||
mips_f3,
|
||||
mips_f4, /* temps */
|
||||
mips_f5,
|
||||
mips_f6,
|
||||
mips_f7,
|
||||
mips_f8,
|
||||
mips_f9,
|
||||
mips_f10,
|
||||
mips_f11,
|
||||
mips_f12, /* first arg */
|
||||
mips_f13,
|
||||
mips_f14, /* second arg */
|
||||
mips_f15,
|
||||
mips_f16, /* temps */
|
||||
mips_f17,
|
||||
mips_f18,
|
||||
mips_f19,
|
||||
mips_f20, /* callee saved */
|
||||
mips_f21,
|
||||
mips_f22,
|
||||
mips_f23,
|
||||
mips_f24,
|
||||
mips_f25,
|
||||
mips_f26,
|
||||
mips_f27,
|
||||
mips_f28,
|
||||
mips_f29,
|
||||
mips_f30,
|
||||
mips_f31
|
||||
};
|
||||
|
||||
/* prefetch hints */
|
||||
enum {
|
||||
MIPS_FOR_LOAD,
|
||||
MIPS_FOR_STORE,
|
||||
MIPS_FOR_LOAD_STREAMED = 4,
|
||||
MIPS_FOR_STORE_STREAMED,
|
||||
MIPS_FOR_LOAD_RETAINED,
|
||||
MIPS_FOR_STORE_RETAINED
|
||||
};
|
||||
|
||||
/* coprocessors */
|
||||
enum {
|
||||
MIPS_COP0,
|
||||
MIPS_COP1,
|
||||
MIPS_COP2,
|
||||
MIPS_COP3
|
||||
};
|
||||
|
||||
enum {
|
||||
MIPS_FMT_SINGLE = 16,
|
||||
MIPS_FMT_DOUBLE = 17,
|
||||
MIPS_FMT_WORD = 20,
|
||||
MIPS_FMT_LONG = 21,
|
||||
MIPS_FMT3_SINGLE = 0,
|
||||
MIPS_FMT3_DOUBLE = 1
|
||||
};
|
||||
|
||||
/* fpu rounding mode */
|
||||
enum {
|
||||
MIPS_ROUND_TO_NEAREST,
|
||||
MIPS_ROUND_TO_ZERO,
|
||||
MIPS_ROUND_TO_POSINF,
|
||||
MIPS_ROUND_TO_NEGINF,
|
||||
MIPS_ROUND_MASK = 3
|
||||
};
|
||||
|
||||
/* fpu enable/cause flags, cc */
|
||||
enum {
|
||||
MIPS_FPU_C_MASK = 1 << 23,
|
||||
MIPS_INEXACT = 1,
|
||||
MIPS_UNDERFLOW = 2,
|
||||
MIPS_OVERFLOW = 4,
|
||||
MIPS_DIVZERO = 8,
|
||||
MIPS_INVALID = 16,
|
||||
MIPS_NOTIMPL = 32,
|
||||
MIPS_FPU_FLAGS_OFFSET = 2,
|
||||
MIPS_FPU_ENABLES_OFFSET = 7,
|
||||
MIPS_FPU_CAUSES_OFFSET = 12
|
||||
};
|
||||
|
||||
/* fpu condition values - see manual entry for C.cond.fmt instructions */
|
||||
enum {
|
||||
MIPS_FPU_F,
|
||||
MIPS_FPU_UN,
|
||||
MIPS_FPU_EQ,
|
||||
MIPS_FPU_UEQ,
|
||||
MIPS_FPU_OLT,
|
||||
MIPS_FPU_ULT,
|
||||
MIPS_FPU_OLE,
|
||||
MIPS_FPU_ULE,
|
||||
MIPS_FPU_SF,
|
||||
MIPS_FPU_NGLE,
|
||||
MIPS_FPU_SEQ,
|
||||
MIPS_FPU_NGL,
|
||||
MIPS_FPU_LT,
|
||||
MIPS_FPU_NGE,
|
||||
MIPS_FPU_LE,
|
||||
MIPS_FPU_NGT
|
||||
};
|
||||
|
||||
#if SIZEOF_REGISTER == 4
|
||||
|
||||
#define MIPS_SW mips_sw
|
||||
#define MIPS_LW mips_lw
|
||||
#define MIPS_ADDU mips_addu
|
||||
#define MIPS_ADDIU mips_addiu
|
||||
#define MIPS_SWC1 mips_swc1
|
||||
#define MIPS_LWC1 mips_lwc1
|
||||
#define MIPS_MOVE mips_move
|
||||
|
||||
#elif SIZEOF_REGISTER == 8
|
||||
|
||||
#define MIPS_SW mips_sd
|
||||
#define MIPS_LW mips_ld
|
||||
#define MIPS_ADDU mips_daddu
|
||||
#define MIPS_ADDIU mips_daddiu
|
||||
#define MIPS_SWC1 mips_sdc1
|
||||
#define MIPS_LWC1 mips_ldc1
|
||||
#define MIPS_MOVE mips_dmove
|
||||
|
||||
#else
|
||||
#error Unknown SIZEOF_REGISTER
|
||||
#endif
|
||||
|
||||
#define mips_emit32(c,x) do { \
|
||||
*((guint32 *) (void *)(c)) = x; \
|
||||
(c) = (typeof(c))(((guint32 *)(void *)(c)) + 1); \
|
||||
} while (0)
|
||||
|
||||
#define mips_format_i(code,op,rs,rt,imm) mips_emit32 ((code), (((op)<<26)|((rs)<<21)|((rt)<<16)|((imm)&0xffff)))
|
||||
#define mips_format_j(code,op,imm) mips_emit32 ((code), (((op)<<26)|((imm)&0x03ffffff)))
|
||||
#define mips_format_r(code,op,rs,rt,rd,sa,func) mips_emit32 ((code), (((op)<<26)|((rs)<<21)|((rt)<<16)|((rd)<<11)|((sa)<<6)|(func)))
|
||||
#define mips_format_divmul(code,op,src1,src2,fun) mips_emit32 ((code), (((op)<<26)|((src1)<<21)|((src2)<<16)|(fun)))
|
||||
|
||||
#define mips_is_imm16(val) ((gint)(gshort)(gint)(val) == (gint)(val))
|
||||
|
||||
/* Load always using lui/addiu pair (for later patching) */
|
||||
#define mips_load(c,D,v) do { \
|
||||
if (((guint32)(v)) & (1 << 15)) { \
|
||||
mips_lui ((c), (D), mips_zero, (((guint32)(v))>>16)+1); \
|
||||
} \
|
||||
else { \
|
||||
mips_lui ((c), (D), mips_zero, (((guint32)(v))>>16)); \
|
||||
} \
|
||||
mips_addiu ((c), (D), (D), ((guint32)(v)) & 0xffff); \
|
||||
} while (0)
|
||||
|
||||
/* load constant - no patch-up */
|
||||
#define mips_load_const(c,D,v) do { \
|
||||
if (!mips_is_imm16 ((v))) { \
|
||||
if (((guint32)(v)) & (1 << 15)) { \
|
||||
mips_lui ((c), (D), mips_zero, (((guint32)(v))>>16)+1); \
|
||||
} \
|
||||
else { \
|
||||
mips_lui ((c), (D), mips_zero, (((guint32)(v))>>16)); \
|
||||
} \
|
||||
if (((guint32)(v)) & 0xffff) \
|
||||
mips_addiu ((c), (D), (D), ((guint32)(v)) & 0xffff); \
|
||||
} \
|
||||
else \
|
||||
mips_addiu ((c), (D), mips_zero, ((guint32)(v)) & 0xffff); \
|
||||
} while (0)
|
||||
|
||||
/* arithmetric ops */
|
||||
#define mips_add(c,dest,src1,src2) mips_format_r(c,0,src1,src2,dest,0,32)
|
||||
#define mips_addi(c,dest,src1,imm) mips_format_i(c,8,src1,dest,imm)
|
||||
#define mips_addu(c,dest,src1,src2) mips_format_r(c,0,src1,src2,dest,0,33)
|
||||
#define mips_addiu(c,dest,src1,imm) mips_format_i(c,9,src1,dest,imm)
|
||||
#define mips_dadd(c,dest,src1,src2) mips_format_r(c,0,src1,src2,dest,0,44)
|
||||
#define mips_daddi(c,dest,src1,imm) mips_format_i(c,24,src1,dest,imm)
|
||||
#define mips_daddu(c,dest,src1,src2) mips_format_r(c,0,src1,src2,dest,0,45)
|
||||
#define mips_daddiu(c,dest,src1,imm) mips_format_i(c,25,src1,dest,imm)
|
||||
#define mips_dsub(c,dest,src1,src2) mips_format_r(c,0,src1,src2,dest,0,46)
|
||||
#define mips_dsubu(c,dest,src1,src2) mips_format_r(c,0,src1,src2,dest,0,47)
|
||||
#define mips_mul(c,dest,src1,src2) mips_format_r(c,28,src1,src2,dest,0,2)
|
||||
#define mips_sub(c,dest,src1,src2) mips_format_r(c,0,src1,src2,dest,0,34)
|
||||
#define mips_subu(c,dest,src1,src2) mips_format_r(c,0,src1,src2,dest,0,35)
|
||||
|
||||
/* div and mul ops */
|
||||
#define mips_ddiv(c,src1,src2) mips_format_divmul(c,0,src1,src2,30)
|
||||
#define mips_ddivu(c,src1,src2) mips_format_divmul(c,0,src1,src2,31)
|
||||
#define mips_div(c,src1,src2) mips_format_divmul(c,0,src1,src2,26)
|
||||
#define mips_divu(c,src1,src2) mips_format_divmul(c,0,src1,src2,27)
|
||||
#define mips_dmult(c,src1,src2) mips_format_divmul(c,0,src1,src2,28)
|
||||
#define mips_dmultu(c,src1,src2) mips_format_divmul(c,0,src1,src2,29)
|
||||
#define mips_mult(c,src1,src2) mips_format_divmul(c,0,src1,src2,24)
|
||||
#define mips_multu(c,src1,src2) mips_format_divmul(c,0,src1,src2,25)
|
||||
|
||||
/* shift ops */
|
||||
#define mips_dsll(c,dest,src1,imm) mips_format_r(c,0,0,src1,dest,imm,56)
|
||||
#define mips_dsll32(c,dest,src1,imm) mips_format_r(c,0,0,src1,dest,imm,60)
|
||||
#define mips_dsllv(c,dest,src1,src2) mips_format_r(c,0,src2,src1,dest,0,20)
|
||||
#define mips_dsra(c,dest,src1,imm) mips_format_r(c,0,0,src1,dest,imm,59)
|
||||
#define mips_dsra32(c,dest,src1,imm) mips_format_r(c,0,0,src1,dest,imm,63)
|
||||
#define mips_dsrav(c,dest,src1,src2) mips_format_r(c,0,src2,src1,dest,0,23)
|
||||
#define mips_dsrl(c,dest,src1,imm) mips_format_r(c,0,0,src1,dest,imm,58)
|
||||
#define mips_dsrl32(c,dest,src1,imm) mips_format_r(c,0,0,src1,dest,imm,62)
|
||||
#define mips_dsrlv(c,dest,src1,src2) mips_format_r(c,0,src2,src1,dest,0,22)
|
||||
#define mips_sll(c,dest,src1,imm) mips_format_r(c,0,0,src1,dest,imm,0)
|
||||
#define mips_sllv(c,dest,src1,src2) mips_format_r(c,0,src2,src1,dest,0,4)
|
||||
#define mips_sra(c,dest,src1,imm) mips_format_r(c,0,0,src1,dest,imm,3)
|
||||
#define mips_srav(c,dest,src1,src2) mips_format_r(c,0,src2,src1,dest,0,7)
|
||||
#define mips_srl(c,dest,src1,imm) mips_format_r(c,0,0,src1,dest,imm,2)
|
||||
#define mips_srlv(c,dest,src1,src2) mips_format_r(c,0,src2,src1,dest,0,6)
|
||||
|
||||
/* logical ops */
|
||||
#define mips_and(c,dest,src1,src2) mips_format_r(c,0,src1,src2,dest,0,36)
|
||||
#define mips_andi(c,dest,src1,imm) mips_format_i(c,12,src1,dest,imm)
|
||||
#define mips_nor(c,dest,src1,src2) mips_format_r(c,0,src1,src2,dest,0,39)
|
||||
#define mips_or(c,dest,src1,src2) mips_format_r(c,0,src1,src2,dest,0,37)
|
||||
#define mips_ori(c,dest,src1,uimm) mips_format_i(c,13,src1,dest,uimm)
|
||||
#define mips_xor(c,dest,src1,src2) mips_format_r(c,0,src1,src2,dest,0,38)
|
||||
#define mips_xori(c,dest,src1,uimm) mips_format_i(c,14,src1,dest,uimm)
|
||||
|
||||
/* compares */
|
||||
#define mips_slt(c,dest,src1,src2) mips_format_r(c,0,src1,src2,dest,0,42)
|
||||
#define mips_slti(c,dest,src1,imm) mips_format_i(c,10,src1,dest,imm)
|
||||
#define mips_sltiu(c,dest,src1,imm) mips_format_i(c,11,src1,dest,imm)
|
||||
#define mips_sltu(c,dest,src1,src2) mips_format_r(c,0,src1,src2,dest,0,43)
|
||||
/* missing traps: teq, teqi, tge, tgei, tgeiu, tgeu, tlt, tlti, tltiu, tltu, tne, tnei, */
|
||||
|
||||
/* conditional branches */
|
||||
#define mips_beq(c,src1,src2,offset) mips_format_i(c,4,src1,src2,offset)
|
||||
#define mips_beql(c,src1,src2,offset) mips_format_i(c,20,src1,src2,offset)
|
||||
#define mips_bgez(c,src1,offset) mips_format_i(c,1,src1,1,offset)
|
||||
#define mips_bgezal(c,src1,offset) mips_format_i(c,1,src1,17,offset)
|
||||
#define mips_bgezall(c,src1,offset) mips_format_i(c,1,src1,19,offset)
|
||||
#define mips_bgezl(c,src1,offset) mips_format_i(c,1,src1,3,offset)
|
||||
#define mips_bgtz(c,src1,offset) mips_format_i(c,7,src1,0,offset)
|
||||
#define mips_bgtzl(c,src1,offset) mips_format_i(c,23,src1,0,offset)
|
||||
#define mips_blez(c,src1,offset) mips_format_i(c,6,src1,0,offset)
|
||||
#define mips_blezl(c,src1,offset) mips_format_i(c,22,src1,0,offset)
|
||||
#define mips_bltz(c,src1,offset) mips_format_i(c,1,src1,0,offset)
|
||||
#define mips_bltzal(c,src1,offset) mips_format_i(c,1,src1,16,offset)
|
||||
#define mips_bltzall(c,src1,offset) mips_format_i(c,1,src1,18,offset)
|
||||
#define mips_bltzl(c,src1,offset) mips_format_i(c,1,src1,2,offset)
|
||||
#define mips_bne(c,src1,src2,offset) mips_format_i(c,5,src1,src2,offset)
|
||||
#define mips_bnel(c,src1,src2,offset) mips_format_i(c,21,src1,src2,offset)
|
||||
|
||||
/* uncond branches and calls */
|
||||
#define mips_jump(c,target) mips_format_j(c,2,target)
|
||||
#define mips_jumpl(c,target) mips_format_j(c,3,target)
|
||||
#define mips_jalr(c,src1,retreg) mips_format_r(c,0,src1,0,retreg,0,9)
|
||||
#define mips_jr(c,src1) mips_emit32(c,((src1)<<21)|8)
|
||||
|
||||
/* loads and stores */
|
||||
#define mips_lb(c,dest,base,offset) mips_format_i(c,32,base,dest,offset)
|
||||
#define mips_lbu(c,dest,base,offset) mips_format_i(c,36,base,dest,offset)
|
||||
#define mips_ld(c,dest,base,offset) mips_format_i(c,55,base,dest,offset)
|
||||
#define mips_ldl(c,dest,base,offset) mips_format_i(c,26,base,dest,offset)
|
||||
#define mips_ldr(c,dest,base,offset) mips_format_i(c,27,base,dest,offset)
|
||||
#define mips_lh(c,dest,base,offset) mips_format_i(c,33,base,dest,offset)
|
||||
#define mips_lhu(c,dest,base,offset) mips_format_i(c,37,base,dest,offset)
|
||||
#define mips_ll(c,dest,base,offset) mips_format_i(c,48,base,dest,offset)
|
||||
#define mips_lld(c,dest,base,offset) mips_format_i(c,52,base,dest,offset)
|
||||
#define mips_lui(c,dest,base,uimm) mips_format_i(c,15,base,dest,uimm)
|
||||
#define mips_lw(c,dest,base,offset) mips_format_i(c,35,base,dest,offset)
|
||||
#define mips_lwl(c,dest,base,offset) mips_format_i(c,34,base,dest,offset)
|
||||
#define mips_lwr(c,dest,base,offset) mips_format_i(c,38,base,dest,offset)
|
||||
#define mips_lwu(c,dest,base,offset) mips_format_i(c,39,base,dest,offset)
|
||||
|
||||
#define mips_sb(c,src,base,offset) mips_format_i(c,40,base,src,offset)
|
||||
#define mips_sc(c,src,base,offset) mips_format_i(c,56,base,src,offset)
|
||||
#define mips_scd(c,src,base,offset) mips_format_i(c,60,base,src,offset)
|
||||
#define mips_sd(c,src,base,offset) mips_format_i(c,63,base,src,offset)
|
||||
#define mips_sdl(c,src,base,offset) mips_format_i(c,44,base,src,offset)
|
||||
#define mips_sdr(c,src,base,offset) mips_format_i(c,45,base,src,offset)
|
||||
#define mips_sh(c,src,base,offset) mips_format_i(c,41,base,src,offset)
|
||||
#define mips_sw(c,src,base,offset) mips_format_i(c,43,base,src,offset)
|
||||
#define mips_swl(c,src,base,offset) mips_format_i(c,50,base,src,offset)
|
||||
#define mips_swr(c,src,base,offset) mips_format_i(c,54,base,src,offset)
|
||||
|
||||
/* misc and coprocessor ops */
|
||||
#define mips_move(c,dest,src) mips_addu(c,dest,src,mips_zero)
|
||||
#define mips_dmove(c,dest,src) mips_daddu(c,dest,src,mips_zero)
|
||||
#define mips_nop(c) mips_or(c,mips_at,mips_at,0)
|
||||
#define mips_break(c,code) mips_emit32(c, ((code)<<6)|13)
|
||||
#define mips_mfhi(c,dest) mips_format_r(c,0,0,0,dest,0,16)
|
||||
#define mips_mflo(c,dest) mips_format_r(c,0,0,0,dest,0,18)
|
||||
#define mips_mthi(c,src) mips_format_r(c,0,src,0,0,0,17)
|
||||
#define mips_mtlo(c,src) mips_format_r(c,0,src,0,0,0,19)
|
||||
#define mips_movn(c,dest,src,test) mips_format_r(c,0,src,test,dest,0,11)
|
||||
#define mips_movz(c,dest,src,test) mips_format_r(c,0,src,test,dest,0,10)
|
||||
#define mips_pref(c,hint,base,offset) mips_format_i(c,51,base,hint,offset)
|
||||
#define mips_prefidx(c,hint,base,idx) mips_format_r(c,19,base,idx,hint,0,15)
|
||||
#define mips_sync(c,stype) mips_emit32(c, ((stype)<<6)|15)
|
||||
#define mips_syscall(c,code) mips_emit32(c, ((code)<<6)|12)
|
||||
|
||||
#define mips_cop(c,cop,fun) mips_emit32(c, ((16|(cop))<<26)|(fun))
|
||||
#define mips_ldc(c,cop,dest,base,offset) mips_format_i(c,(52|(cop)),base,dest,offset)
|
||||
#define mips_lwc(c,cop,dest,base,offset) mips_format_i(c,(48|(cop)),base,dest,offset)
|
||||
#define mips_sdc(c,cop,src,base,offset) mips_format_i(c,(60|(cop)),base,src,offset)
|
||||
#define mips_swc(c,cop,src,base,offset) mips_format_i(c,(56|(cop)),base,src,offset)
|
||||
#define mips_cfc1(c,dest,src) mips_format_r(c,17,2,dest,src,0,0)
|
||||
#define mips_ctc1(c,dest,src) mips_format_r(c,17,6,dest,src,0,0)
|
||||
|
||||
/* fpu ops */
|
||||
#define mips_fabss(c,dest,src) mips_format_r(c,17,MIPS_FMT_SINGLE,0,src,dest,5)
|
||||
#define mips_fabsd(c,dest,src) mips_format_r(c,17,MIPS_FMT_DOUBLE,0,src,dest,5)
|
||||
#define mips_fadds(c,dest,src1,src2) mips_format_r(c,17,MIPS_FMT_SINGLE,src2,src1,dest,0)
|
||||
#define mips_faddd(c,dest,src1,src2) mips_format_r(c,17,MIPS_FMT_DOUBLE,src2,src1,dest,0)
|
||||
#define mips_fdivs(c,dest,src1,src2) mips_format_r(c,17,MIPS_FMT_SINGLE,src2,src1,dest,3)
|
||||
#define mips_fdivd(c,dest,src1,src2) mips_format_r(c,17,MIPS_FMT_DOUBLE,src2,src1,dest,3)
|
||||
#define mips_fmuls(c,dest,src1,src2) mips_format_r(c,17,MIPS_FMT_SINGLE,src2,src1,dest,2)
|
||||
#define mips_fmuld(c,dest,src1,src2) mips_format_r(c,17,MIPS_FMT_DOUBLE,src2,src1,dest,2)
|
||||
#define mips_fnegs(c,dest,src) mips_format_r(c,17,MIPS_FMT_SINGLE,0,src,dest,7)
|
||||
#define mips_fnegd(c,dest,src) mips_format_r(c,17,MIPS_FMT_DOUBLE,0,src,dest,7)
|
||||
#define mips_fsqrts(c,dest,src) mips_format_r(c,17,MIPS_FMT_SINGLE,0,src,dest,4)
|
||||
#define mips_fsqrtd(c,dest,src) mips_format_r(c,17,MIPS_FMT_DOUBLE,0,src,dest,4)
|
||||
#define mips_fsubs(c,dest,src1,src2) mips_format_r(c,17,MIPS_FMT_SINGLE,src2,src1,dest,1)
|
||||
#define mips_fsubd(c,dest,src1,src2) mips_format_r(c,17,MIPS_FMT_DOUBLE,src2,src1,dest,1)
|
||||
#define mips_madds(c,dest,src1,src2,srcadd) mips_format_r(c,19,srcadd,src2,src1,dest,32|MIPS_FMT_SINGLE)
|
||||
#define mips_maddd(c,dest,src1,src2,srcadd) mips_format_r(c,19,srcadd,src2,src1,dest,32|MIPS_FMT_DOUBLE)
|
||||
#define mips_nmadds(c,dest,src1,src2,srcadd) mips_format_r(c,19,srcadd,src2,src1,dest,48|MIPS_FMT_SINGLE)
|
||||
#define mips_nmaddd(c,dest,src1,src2,srcadd) mips_format_r(c,19,srcadd,src2,src1,dest,48|MIPS_FMT_DOUBLE)
|
||||
#define mips_msubs(c,dest,src1,src2,srcsub) mips_format_r(c,19,srcsub,src2,src1,dest,40|MIPS_FMT_SINGLE)
|
||||
#define mips_msubd(c,dest,src1,src2,srcsub) mips_format_r(c,19,srcsub,src2,src1,dest,40|MIPS_FMT_DOUBLE)
|
||||
#define mips_nmsubs(c,dest,src1,src2,srcsub) mips_format_r(c,19,srcsub,src2,src1,dest,56|MIPS_FMT_SINGLE)
|
||||
#define mips_nmsubd(c,dest,src1,src2,srcsub) mips_format_r(c,19,srcsub,src2,src1,dest,56|MIPS_FMT_DOUBLE)
|
||||
|
||||
/* fp compare and branch */
|
||||
#define mips_fcmps(c,cond,src1,src2) mips_format_r(c,17,MIPS_FMT_SINGLE,src2,src1,0,(3<<4)|(cond))
|
||||
#define mips_fcmpd(c,cond,src1,src2) mips_format_r(c,17,MIPS_FMT_DOUBLE,src2,src1,0,(3<<4)|(cond))
|
||||
#define mips_fbfalse(c,offset) mips_format_i(c,17,8,0,offset)
|
||||
#define mips_fbfalsel(c,offset) mips_format_i(c,17,8,2,offset)
|
||||
#define mips_fbtrue(c,offset) mips_format_i(c,17,8,1,offset)
|
||||
#define mips_fbtruel(c,offset) mips_format_i(c,17,8,3,offset)
|
||||
|
||||
/* fp convert */
|
||||
#define mips_ceills(c,dest,src) mips_format_r(c,17,MIPS_FMT_SINGLE,0,src,dest,10)
|
||||
#define mips_ceilld(c,dest,src) mips_format_r(c,17,MIPS_FMT_DOUBLE,0,src,dest,10)
|
||||
#define mips_ceilws(c,dest,src) mips_format_r(c,17,MIPS_FMT_SINGLE,0,src,dest,14)
|
||||
#define mips_ceilwd(c,dest,src) mips_format_r(c,17,MIPS_FMT_DOUBLE,0,src,dest,14)
|
||||
#define mips_cvtds(c,dest,src) mips_format_r(c,17,MIPS_FMT_SINGLE,0,src,dest,33)
|
||||
#define mips_cvtdw(c,dest,src) mips_format_r(c,17,MIPS_FMT_WORD,0,src,dest,33)
|
||||
#define mips_cvtdl(c,dest,src) mips_format_r(c,17,MIPS_FMT_LONG,0,src,dest,33)
|
||||
#define mips_cvtls(c,dest,src) mips_format_r(c,17,MIPS_FMT_SINGLE,0,src,dest,37)
|
||||
#define mips_cvtld(c,dest,src) mips_format_r(c,17,MIPS_FMT_DOUBLE,0,src,dest,37)
|
||||
#define mips_cvtsd(c,dest,src) mips_format_r(c,17,MIPS_FMT_DOUBLE,0,src,dest,32)
|
||||
#define mips_cvtsw(c,dest,src) mips_format_r(c,17,MIPS_FMT_WORD,0,src,dest,32)
|
||||
#define mips_cvtsl(c,dest,src) mips_format_r(c,17,MIPS_FMT_LONG,0,src,dest,32)
|
||||
#define mips_cvtws(c,dest,src) mips_format_r(c,17,MIPS_FMT_SINGLE,0,src,dest,36)
|
||||
#define mips_cvtwd(c,dest,src) mips_format_r(c,17,MIPS_FMT_DOUBLE,0,src,dest,36)
|
||||
#define mips_floorls(c,dest,src) mips_format_r(c,17,MIPS_FMT_SINGLE,0,src,dest,11)
|
||||
#define mips_floorld(c,dest,src) mips_format_r(c,17,MIPS_FMT_DOUBLE,0,src,dest,11)
|
||||
#define mips_floorws(c,dest,src) mips_format_r(c,17,MIPS_FMT_SINGLE,0,src,dest,15)
|
||||
#define mips_floorwd(c,dest,src) mips_format_r(c,17,MIPS_FMT_DOUBLE,0,src,dest,15)
|
||||
#define mips_roundls(c,dest,src) mips_format_r(c,17,MIPS_FMT_SINGLE,0,src,dest,8)
|
||||
#define mips_roundld(c,dest,src) mips_format_r(c,17,MIPS_FMT_DOUBLE,0,src,dest,8)
|
||||
#define mips_roundws(c,dest,src) mips_format_r(c,17,MIPS_FMT_SINGLE,0,src,dest,12)
|
||||
#define mips_roundwd(c,dest,src) mips_format_r(c,17,MIPS_FMT_DOUBLE,0,src,dest,12)
|
||||
#define mips_truncls(c,dest,src) mips_format_r(c,17,MIPS_FMT_SINGLE,0,src,dest,9)
|
||||
#define mips_truncld(c,dest,src) mips_format_r(c,17,MIPS_FMT_DOUBLE,0,src,dest,9)
|
||||
#define mips_truncws(c,dest,src) mips_format_r(c,17,MIPS_FMT_SINGLE,0,src,dest,13)
|
||||
#define mips_truncwd(c,dest,src) mips_format_r(c,17,MIPS_FMT_DOUBLE,0,src,dest,13)
|
||||
|
||||
/* fp moves, loads */
|
||||
#define mips_fmovs(c,dest,src) mips_format_r(c,17,MIPS_FMT_SINGLE,0,src,dest,6)
|
||||
#define mips_fmovd(c,dest,src) mips_format_r(c,17,MIPS_FMT_DOUBLE,0,src,dest,6)
|
||||
#define mips_mfc1(c,dest,src) mips_format_r(c,17,0,dest,src,0,0)
|
||||
#define mips_mtc1(c,dest,src) mips_format_r(c,17,4,src,dest,0,0)
|
||||
#define mips_dmfc1(c,dest,src) mips_format_r(c,17,1,0,dest,src,0)
|
||||
#define mips_dmtc1(c,dest,src) mips_format_r(c,17,1,0,src,dest,0)
|
||||
#define mips_ldc1(c,dest,base,offset) mips_ldc(c,1,dest,base,offset)
|
||||
#define mips_ldxc1(c,dest,base,idx) mips_format_r(c,19,base,idx,0,dest,1)
|
||||
#define mips_lwc1(c,dest,base,offset) mips_lwc(c,1,dest,base,offset)
|
||||
#define mips_lwxc1(c,dest,base,idx) mips_format_r(c,19,base,idx,0,dest,0)
|
||||
#define mips_sdc1(c,src,base,offset) mips_sdc(c,1,src,base,offset)
|
||||
#define mips_sdxc1(c,src,base,idx) mips_format_r(c,19,base,idx,src,0,9)
|
||||
#define mips_swc1(c,src,base,offset) mips_swc(c,1,src,base,offset)
|
||||
#define mips_swxc1(c,src,base,idx) mips_format_r(c,19,base,idx,src,0,8)
|
||||
|
||||
#endif /* __MIPS_CODEGEN_H__ */
|
||||
|
@ -0,0 +1,159 @@
|
||||
#include "config.h"
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
|
||||
#define NO_MIPS_JIT_DEBUG
|
||||
|
||||
#include "mips-codegen.h"
|
||||
#include "mono/metadata/class.h"
|
||||
|
||||
/* don't run the resulting program, it will destroy your computer,
|
||||
* just objdump -d it to inspect we generated the correct assembler.
|
||||
*/
|
||||
|
||||
int main (int argc, char *argv[]) {
|
||||
guint32 *code, * p;
|
||||
|
||||
code = p = (guint32 *) malloc (sizeof (guint32) * 1024);
|
||||
|
||||
mips_add (p, 3, 4, 5);
|
||||
mips_addi (p, 3, 4, 5);
|
||||
mips_addu (p, 3, 4, 5);
|
||||
mips_addiu (p, 3, 4, 5);
|
||||
mips_sub (p, 3, 4, 5);
|
||||
mips_subu (p, 3, 4, 5);
|
||||
mips_dadd (p, 3, 4, 5);
|
||||
mips_daddi (p, 3, 4, 5);
|
||||
mips_daddu (p, 3, 4, 5);
|
||||
mips_daddiu (p, 3, 4, 5);
|
||||
mips_dsub (p, 3, 4, 5);
|
||||
mips_dsubu (p, 3, 4, 5);
|
||||
|
||||
mips_mult (p, 6, 7);
|
||||
mips_multu (p, 6, 7);
|
||||
mips_div (p, 6, 7);
|
||||
mips_divu (p, 6, 7);
|
||||
mips_dmult (p, 6, 7);
|
||||
mips_dmultu (p, 6, 7);
|
||||
mips_ddiv (p, 6, 7);
|
||||
mips_ddivu (p, 6, 7);
|
||||
|
||||
mips_sll (p, 3, 4, 5);
|
||||
mips_sllv (p, 3, 4, 5);
|
||||
mips_sra (p, 3, 4, 5);
|
||||
mips_srav (p, 3, 4, 5);
|
||||
mips_srl (p, 3, 4, 5);
|
||||
mips_srlv (p, 3, 4, 5);
|
||||
mips_dsll (p, 3, 4, 5);
|
||||
mips_dsll32 (p, 3, 4, 5);
|
||||
mips_dsllv (p, 3, 4, 5);
|
||||
mips_dsra (p, 3, 4, 5);
|
||||
mips_dsra32 (p, 3, 4, 5);
|
||||
mips_dsrav (p, 3, 4, 5);
|
||||
mips_dsrl (p, 3, 4, 5);
|
||||
mips_dsrl32 (p, 3, 4, 5);
|
||||
mips_dsrlv (p, 3, 4, 5);
|
||||
|
||||
mips_and (p, 8, 9, 10);
|
||||
mips_andi (p, 8, 9, 10);
|
||||
mips_nor (p, 8, 9, 10);
|
||||
mips_or (p, 8, 9, 10);
|
||||
mips_ori (p, 8, 9, 10);
|
||||
mips_xor (p, 8, 9, 10);
|
||||
mips_xori (p, 8, 9, 10);
|
||||
|
||||
mips_slt (p, 8, 9, 10);
|
||||
mips_slti (p, 8, 9, 10);
|
||||
mips_sltu (p, 8, 9, 10);
|
||||
mips_sltiu (p, 8, 9, 10);
|
||||
|
||||
mips_beq (p, 8, 9, 0xff1f);
|
||||
mips_beql (p, 8, 9, 0xff1f);
|
||||
mips_bne (p, 8, 9, 0xff1f);
|
||||
mips_bnel (p, 8, 9, 0xff1f);
|
||||
mips_bgez (p, 11, 0xff1f);
|
||||
mips_bgezal (p, 11, 0xff1f);
|
||||
mips_bgezall (p, 11, 0xff1f);
|
||||
mips_bgezl (p, 11, 0xff1f);
|
||||
mips_bgtz (p, 11, 0xff1f);
|
||||
mips_bgtzl (p, 11, 0xff1f);
|
||||
mips_blez (p, 11, 0xff1f);
|
||||
mips_blezl (p, 11, 0xff1f);
|
||||
mips_bltz (p, 11, 0xff1f);
|
||||
mips_bltzal (p, 11, 0xff1f);
|
||||
mips_bltzall (p, 11, 0xff1f);
|
||||
mips_bltzl (p, 11, 0xff1f);
|
||||
|
||||
mips_jump (p, 0xff1f);
|
||||
mips_jumpl (p, 0xff1f);
|
||||
mips_jalr (p, 12, mips_ra);
|
||||
mips_jr (p, 12);
|
||||
|
||||
mips_lb (p, 13, 14, 128);
|
||||
mips_lbu (p, 13, 14, 128);
|
||||
mips_ld (p, 13, 14, 128);
|
||||
mips_ldl (p, 13, 14, 128);
|
||||
mips_ldr (p, 13, 14, 128);
|
||||
mips_lh (p, 13, 14, 128);
|
||||
mips_lhu (p, 13, 14, 128);
|
||||
mips_ll (p, 13, 14, 128);
|
||||
mips_lld (p, 13, 14, 128);
|
||||
mips_lui (p, 13, 14, 128);
|
||||
mips_lw (p, 13, 14, 128);
|
||||
mips_lwl (p, 13, 14, 128);
|
||||
mips_lwr (p, 13, 14, 128);
|
||||
mips_lwu (p, 13, 14, 128);
|
||||
mips_sb (p, 13, 14, 128);
|
||||
mips_sc (p, 13, 14, 128);
|
||||
mips_scd (p, 13, 14, 128);
|
||||
mips_sd (p, 13, 14, 128);
|
||||
mips_sdl (p, 13, 14, 128);
|
||||
mips_sdr (p, 13, 14, 128);
|
||||
mips_sh (p, 13, 14, 128);
|
||||
mips_sw (p, 13, 14, 128);
|
||||
mips_swl (p, 13, 14, 128);
|
||||
mips_swr (p, 13, 14, 128);
|
||||
|
||||
mips_move (p, 15, 16);
|
||||
mips_nop (p);
|
||||
mips_break (p, 0);
|
||||
mips_sync (p, 0);
|
||||
mips_mfhi (p, 17);
|
||||
mips_mflo (p, 17);
|
||||
mips_mthi (p, 17);
|
||||
mips_mtlo (p, 17);
|
||||
|
||||
mips_fabsd (p, 16, 18);
|
||||
mips_fnegd (p, 16, 18);
|
||||
mips_fsqrtd (p, 16, 18);
|
||||
mips_faddd (p, 16, 18, 20);
|
||||
mips_fdivd (p, 16, 18, 20);
|
||||
mips_fmuld (p, 16, 18, 20);
|
||||
mips_fsubd (p, 16, 18, 20);
|
||||
|
||||
mips_fcmpd (p, MIPS_FPU_EQ, 18, 20);
|
||||
mips_fbfalse (p, 0xff1f);
|
||||
mips_fbfalsel (p, 0xff1f);
|
||||
mips_fbtrue (p, 0xff1f);
|
||||
mips_fbtruel (p, 0xff1f);
|
||||
|
||||
mips_ceilwd (p, 20, 22);
|
||||
mips_ceilld (p, 20, 22);
|
||||
mips_floorwd (p, 20, 22);
|
||||
mips_floorld (p, 20, 22);
|
||||
mips_roundwd (p, 20, 22);
|
||||
mips_roundld (p, 20, 22);
|
||||
mips_truncwd (p, 20, 22);
|
||||
mips_truncld (p, 20, 22);
|
||||
mips_cvtdw (p, 20, 22);
|
||||
mips_cvtds (p, 20, 22);
|
||||
mips_cvtdl (p, 20, 22);
|
||||
mips_cvtld (p, 20, 22);
|
||||
mips_cvtsd (p, 20, 22);
|
||||
mips_cvtwd (p, 20, 22);
|
||||
|
||||
mips_fmovd (p, 20, 22);
|
||||
printf ("size: %d\n", p - code);
|
||||
|
||||
return 0;
|
||||
}
|
@ -0,0 +1,7 @@
|
||||
/Makefile
|
||||
/Makefile.in
|
||||
/.libs
|
||||
/.deps
|
||||
/*.la
|
||||
/*.lo
|
||||
/test
|
@ -0,0 +1 @@
|
||||
EXTRA_DIST = ppc-codegen.h
|
@ -0,0 +1,953 @@
|
||||
/*
|
||||
Authors:
|
||||
Radek Doulik
|
||||
Christopher Taylor <ct_AT_clemson_DOT_edu>
|
||||
Andreas Faerber <andreas.faerber@web.de>
|
||||
|
||||
Copyright (C) 2001 Radek Doulik
|
||||
Copyright (C) 2007-2008 Andreas Faerber
|
||||
|
||||
for testing do the following: ./test | as -o test.o
|
||||
*/
|
||||
|
||||
#ifndef __MONO_PPC_CODEGEN_H__
|
||||
#define __MONO_PPC_CODEGEN_H__
|
||||
#include <glib.h>
|
||||
#include <assert.h>
|
||||
|
||||
typedef enum {
|
||||
ppc_r0 = 0,
|
||||
ppc_r1,
|
||||
ppc_sp = ppc_r1,
|
||||
ppc_r2,
|
||||
ppc_r3,
|
||||
ppc_r4,
|
||||
ppc_r5,
|
||||
ppc_r6,
|
||||
ppc_r7,
|
||||
ppc_r8,
|
||||
ppc_r9,
|
||||
ppc_r10,
|
||||
ppc_r11,
|
||||
ppc_r12,
|
||||
ppc_r13,
|
||||
ppc_r14,
|
||||
ppc_r15,
|
||||
ppc_r16,
|
||||
ppc_r17,
|
||||
ppc_r18,
|
||||
ppc_r19,
|
||||
ppc_r20,
|
||||
ppc_r21,
|
||||
ppc_r22,
|
||||
ppc_r23,
|
||||
ppc_r24,
|
||||
ppc_r25,
|
||||
ppc_r26,
|
||||
ppc_r27,
|
||||
ppc_r28,
|
||||
ppc_r29,
|
||||
ppc_r30,
|
||||
ppc_r31
|
||||
} PPCIntRegister;
|
||||
|
||||
typedef enum {
|
||||
ppc_f0 = 0,
|
||||
ppc_f1,
|
||||
ppc_f2,
|
||||
ppc_f3,
|
||||
ppc_f4,
|
||||
ppc_f5,
|
||||
ppc_f6,
|
||||
ppc_f7,
|
||||
ppc_f8,
|
||||
ppc_f9,
|
||||
ppc_f10,
|
||||
ppc_f11,
|
||||
ppc_f12,
|
||||
ppc_f13,
|
||||
ppc_f14,
|
||||
ppc_f15,
|
||||
ppc_f16,
|
||||
ppc_f17,
|
||||
ppc_f18,
|
||||
ppc_f19,
|
||||
ppc_f20,
|
||||
ppc_f21,
|
||||
ppc_f22,
|
||||
ppc_f23,
|
||||
ppc_f24,
|
||||
ppc_f25,
|
||||
ppc_f26,
|
||||
ppc_f27,
|
||||
ppc_f28,
|
||||
ppc_f29,
|
||||
ppc_f30,
|
||||
ppc_f31
|
||||
} PPCFloatRegister;
|
||||
|
||||
typedef enum {
|
||||
ppc_lr = 256,
|
||||
ppc_ctr = 256 + 32,
|
||||
ppc_xer = 32
|
||||
} PPCSpecialRegister;
|
||||
|
||||
enum {
|
||||
/* B0 operand for branches */
|
||||
PPC_BR_DEC_CTR_NONZERO_FALSE = 0,
|
||||
PPC_BR_LIKELY = 1, /* can be or'ed with the conditional variants */
|
||||
PPC_BR_DEC_CTR_ZERO_FALSE = 2,
|
||||
PPC_BR_FALSE = 4,
|
||||
PPC_BR_DEC_CTR_NONZERO_TRUE = 8,
|
||||
PPC_BR_DEC_CTR_ZERO_TRUE = 10,
|
||||
PPC_BR_TRUE = 12,
|
||||
PPC_BR_DEC_CTR_NONZERO = 16,
|
||||
PPC_BR_DEC_CTR_ZERO = 18,
|
||||
PPC_BR_ALWAYS = 20,
|
||||
/* B1 operand for branches */
|
||||
PPC_BR_LT = 0,
|
||||
PPC_BR_GT = 1,
|
||||
PPC_BR_EQ = 2,
|
||||
PPC_BR_SO = 3
|
||||
};
|
||||
|
||||
enum {
|
||||
PPC_TRAP_LT = 1,
|
||||
PPC_TRAP_GT = 2,
|
||||
PPC_TRAP_EQ = 4,
|
||||
PPC_TRAP_LT_UN = 8,
|
||||
PPC_TRAP_GT_UN = 16,
|
||||
PPC_TRAP_LE = 1 + PPC_TRAP_EQ,
|
||||
PPC_TRAP_GE = 2 + PPC_TRAP_EQ,
|
||||
PPC_TRAP_LE_UN = 8 + PPC_TRAP_EQ,
|
||||
PPC_TRAP_GE_UN = 16 + PPC_TRAP_EQ
|
||||
};
|
||||
|
||||
#define ppc_emit32(c,x) do { *((guint32 *) (c)) = GUINT32_TO_BE (x); (c) = (gpointer)((guint8 *)(c) + sizeof (guint32));} while (0)
|
||||
|
||||
#define ppc_is_imm16(val) ((((val)>> 15) == 0) || (((val)>> 15) == -1))
|
||||
#define ppc_is_uimm16(val) ((glong)(val) >= 0L && (glong)(val) <= 65535L)
|
||||
#define ppc_ha(val) (((val >> 16) + ((val & 0x8000) ? 1 : 0)) & 0xffff)
|
||||
|
||||
#define ppc_load32(c,D,v) G_STMT_START { \
|
||||
ppc_lis ((c), (D), (guint32)(v) >> 16); \
|
||||
ppc_ori ((c), (D), (D), (guint32)(v) & 0xffff); \
|
||||
} G_STMT_END
|
||||
|
||||
/* Macros to load/store pointer sized quantities */
|
||||
|
||||
#if defined(__mono_ppc64__) && !defined(__mono_ilp32__)
|
||||
|
||||
#define ppc_ldptr(c,D,d,A) ppc_ld ((c), (D), (d), (A))
|
||||
#define ppc_ldptr_update(c,D,d,A) ppc_ldu ((c), (D), (d), (A))
|
||||
#define ppc_ldptr_indexed(c,D,A,B) ppc_ldx ((c), (D), (A), (B))
|
||||
#define ppc_ldptr_update_indexed(c,D,A,B) ppc_ldux ((c), (D), (A), (B))
|
||||
|
||||
#define ppc_stptr(c,S,d,A) ppc_std ((c), (S), (d), (A))
|
||||
#define ppc_stptr_update(c,S,d,A) ppc_stdu ((c), (S), (d), (A))
|
||||
#define ppc_stptr_indexed(c,S,A,B) ppc_stdx ((c), (S), (A), (B))
|
||||
#define ppc_stptr_update_indexed(c,S,A,B) ppc_stdux ((c), (S), (A), (B))
|
||||
|
||||
#else
|
||||
|
||||
/* Same as ppc32 */
|
||||
#define ppc_ldptr(c,D,d,A) ppc_lwz ((c), (D), (d), (A))
|
||||
#define ppc_ldptr_update(c,D,d,A) ppc_lwzu ((c), (D), (d), (A))
|
||||
#define ppc_ldptr_indexed(c,D,A,B) ppc_lwzx ((c), (D), (A), (B))
|
||||
#define ppc_ldptr_update_indexed(c,D,A,B) ppc_lwzux ((c), (D), (A), (B))
|
||||
|
||||
#define ppc_stptr(c,S,d,A) ppc_stw ((c), (S), (d), (A))
|
||||
#define ppc_stptr_update(c,S,d,A) ppc_stwu ((c), (S), (d), (A))
|
||||
#define ppc_stptr_indexed(c,S,A,B) ppc_stwx ((c), (S), (A), (B))
|
||||
#define ppc_stptr_update_indexed(c,S,A,B) ppc_stwux ((c), (S), (A), (B))
|
||||
|
||||
#endif
|
||||
|
||||
/* Macros to load pointer sized immediates */
|
||||
#define ppc_load_ptr(c,D,v) ppc_load ((c),(D),(gsize)(v))
|
||||
#define ppc_load_ptr_sequence(c,D,v) ppc_load_sequence ((c),(D),(gsize)(v))
|
||||
|
||||
/* Macros to load/store regsize quantities */
|
||||
|
||||
#ifdef __mono_ppc64__
|
||||
#define ppc_ldr(c,D,d,A) ppc_ld ((c), (D), (d), (A))
|
||||
#define ppc_ldr_indexed(c,D,A,B) ppc_ldx ((c), (D), (A), (B))
|
||||
#define ppc_str(c,S,d,A) ppc_std ((c), (S), (d), (A))
|
||||
#define ppc_str_update(c,S,d,A) ppc_stdu ((c), (S), (d), (A))
|
||||
#define ppc_str_indexed(c,S,A,B) ppc_stdx ((c), (S), (A), (B))
|
||||
#define ppc_str_update_indexed(c,S,A,B) ppc_stdux ((c), (S), (A), (B))
|
||||
#else
|
||||
#define ppc_ldr(c,D,d,A) ppc_lwz ((c), (D), (d), (A))
|
||||
#define ppc_ldr_indexed(c,D,A,B) ppc_lwzx ((c), (D), (A), (B))
|
||||
#define ppc_str(c,S,d,A) ppc_stw ((c), (S), (d), (A))
|
||||
#define ppc_str_update(c,S,d,A) ppc_stwu ((c), (S), (d), (A))
|
||||
#define ppc_str_indexed(c,S,A,B) ppc_stwx ((c), (S), (A), (B))
|
||||
#define ppc_str_update_indexed(c,S,A,B) ppc_stwux ((c), (S), (A), (B))
|
||||
#endif
|
||||
|
||||
#define ppc_str_multiple(c,S,d,A) ppc_store_multiple_regs((c),(S),(d),(A))
|
||||
#define ppc_ldr_multiple(c,D,d,A) ppc_load_multiple_regs((c),(D),(d),(A))
|
||||
|
||||
/* PPC32 macros */
|
||||
|
||||
#ifndef __mono_ppc64__
|
||||
|
||||
#define ppc_load_sequence(c,D,v) ppc_load32 ((c), (D), (guint32)(v))
|
||||
|
||||
#define PPC_LOAD_SEQUENCE_LENGTH 8
|
||||
|
||||
#define ppc_load(c,D,v) G_STMT_START { \
|
||||
if (ppc_is_imm16 ((guint32)(v))) { \
|
||||
ppc_li ((c), (D), (guint16)(guint32)(v)); \
|
||||
} else { \
|
||||
ppc_load32 ((c), (D), (guint32)(v)); \
|
||||
} \
|
||||
} G_STMT_END
|
||||
|
||||
#define ppc_load_func(c,D,V) ppc_load_sequence ((c), (D), (V))
|
||||
|
||||
#define ppc_load_multiple_regs(c,D,d,A) ppc_lmw ((c), (D), (d), (A))
|
||||
|
||||
#define ppc_store_multiple_regs(c,S,d,A) ppc_stmw ((c), (S), (d), (A))
|
||||
|
||||
#define ppc_compare(c,cfrD,A,B) ppc_cmp((c), (cfrD), 0, (A), (B))
|
||||
#define ppc_compare_reg_imm(c,cfrD,A,B) ppc_cmpi((c), (cfrD), 0, (A), (B))
|
||||
#define ppc_compare_log(c,cfrD,A,B) ppc_cmpl((c), (cfrD), 0, (A), (B))
|
||||
|
||||
#define ppc_shift_left(c,A,S,B) ppc_slw((c), (S), (A), (B))
|
||||
#define ppc_shift_left_imm(c,A,S,n) ppc_slwi((c), (A), (S), (n))
|
||||
|
||||
#define ppc_shift_right_imm(c,A,S,B) ppc_srwi((c), (A), (S), (B))
|
||||
#define ppc_shift_right_arith_imm(c,A,S,B) ppc_srawi((c), (A), (S), (B))
|
||||
|
||||
#define ppc_multiply(c,D,A,B) ppc_mullw((c), (D), (A), (B))
|
||||
|
||||
#define ppc_clear_right_imm(c,A,S,n) ppc_clrrwi((c), (A), (S), (n))
|
||||
|
||||
#endif
|
||||
|
||||
#define ppc_opcode(c) ((c) >> 26)
|
||||
#define ppc_split_5_1_1(x) (((x) >> 5) & 0x1)
|
||||
#define ppc_split_5_1_5(x) ((x) & 0x1F)
|
||||
#define ppc_split_5_1(x) ((ppc_split_5_1_5(x) << 1) | ppc_split_5_1_1(x))
|
||||
|
||||
#define ppc_break(c) ppc_tw((c),31,0,0)
|
||||
#define ppc_addi(c,D,A,i) ppc_emit32 (c, (14 << 26) | ((D) << 21) | ((A) << 16) | (guint16)(i))
|
||||
#define ppc_addis(c,D,A,i) ppc_emit32 (c, (15 << 26) | ((D) << 21) | ((A) << 16) | (guint16)(i))
|
||||
#define ppc_li(c,D,v) ppc_addi (c, D, 0, (guint16)(v))
|
||||
#define ppc_lis(c,D,v) ppc_addis (c, D, 0, (guint16)(v))
|
||||
#define ppc_lwz(c,D,d,A) ppc_emit32 (c, (32 << 26) | ((D) << 21) | ((A) << 16) | (guint16)(d))
|
||||
#define ppc_lhz(c,D,d,A) ppc_emit32 (c, (40 << 26) | ((D) << 21) | ((A) << 16) | (guint16)(d))
|
||||
#define ppc_lbz(c,D,d,A) ppc_emit32 (c, (34 << 26) | ((D) << 21) | ((A) << 16) | (guint16)(d))
|
||||
#define ppc_stw(c,S,d,A) ppc_emit32 (c, (36 << 26) | ((S) << 21) | ((A) << 16) | (guint16)(d))
|
||||
#define ppc_sth(c,S,d,A) ppc_emit32 (c, (44 << 26) | ((S) << 21) | ((A) << 16) | (guint16)(d))
|
||||
#define ppc_stb(c,S,d,A) ppc_emit32 (c, (38 << 26) | ((S) << 21) | ((A) << 16) | (guint16)(d))
|
||||
#define ppc_stwu(c,s,d,A) ppc_emit32 (c, (37 << 26) | ((s) << 21) | ((A) << 16) | (guint16)(d))
|
||||
#define ppc_or(c,a,s,b) ppc_emit32 (c, (31 << 26) | ((s) << 21) | ((a) << 16) | ((b) << 11) | 888)
|
||||
#define ppc_mr(c,a,s) ppc_or (c, a, s, s)
|
||||
#define ppc_ori(c,S,A,ui) ppc_emit32 (c, (24 << 26) | ((S) << 21) | ((A) << 16) | (guint16)(ui))
|
||||
#define ppc_nop(c) ppc_ori (c, 0, 0, 0)
|
||||
#define ppc_mfspr(c,D,spr) ppc_emit32 (c, (31 << 26) | ((D) << 21) | ((spr) << 11) | (339 << 1))
|
||||
#define ppc_mflr(c,D) ppc_mfspr (c, D, ppc_lr)
|
||||
#define ppc_mtspr(c,spr,S) ppc_emit32 (c, (31 << 26) | ((S) << 21) | ((spr) << 11) | (467 << 1))
|
||||
#define ppc_mtlr(c,S) ppc_mtspr (c, ppc_lr, S)
|
||||
#define ppc_mtctr(c,S) ppc_mtspr (c, ppc_ctr, S)
|
||||
#define ppc_mtxer(c,S) ppc_mtspr (c, ppc_xer, S)
|
||||
|
||||
#define ppc_b(c,li) ppc_emit32 (c, (18 << 26) | ((li) << 2))
|
||||
#define ppc_bl(c,li) ppc_emit32 (c, (18 << 26) | ((li) << 2) | 1)
|
||||
#define ppc_ba(c,li) ppc_emit32 (c, (18 << 26) | ((li) << 2) | 2)
|
||||
#define ppc_bla(c,li) ppc_emit32 (c, (18 << 26) | ((li) << 2) | 3)
|
||||
#define ppc_blrl(c) ppc_emit32 (c, 0x4e800021)
|
||||
#define ppc_blr(c) ppc_emit32 (c, 0x4e800020)
|
||||
|
||||
#define ppc_lfs(c,D,d,A) ppc_emit32 (c, (48 << 26) | ((D) << 21) | ((A) << 16) | (guint16)(d))
|
||||
#define ppc_lfd(c,D,d,A) ppc_emit32 (c, (50 << 26) | ((D) << 21) | ((A) << 16) | (guint16)(d))
|
||||
#define ppc_stfs(c,S,d,a) ppc_emit32 (c, (52 << 26) | ((S) << 21) | ((a) << 16) | (guint16)(d))
|
||||
#define ppc_stfd(c,S,d,a) ppc_emit32 (c, (54 << 26) | ((S) << 21) | ((a) << 16) | (guint16)(d))
|
||||
|
||||
/***********************************************************************
|
||||
The macros below were tapped out by Christopher Taylor <ct_AT_clemson_DOT_edu>
|
||||
from 18 November 2002 to 19 December 2002.
|
||||
|
||||
Special thanks to rodo, lupus, dietmar, miguel, and duncan for patience,
|
||||
and motivation.
|
||||
|
||||
The macros found in this file are based on the assembler instructions found
|
||||
in Motorola and Digital DNA's:
|
||||
|
||||
"Programming Enviornments Manual For 32-bit Implementations of the PowerPC Architecture"
|
||||
|
||||
MPCFPE32B/AD
|
||||
12/2001
|
||||
REV2
|
||||
|
||||
see pages 326 - 524 for detailed information regarding each instruction
|
||||
|
||||
Also see the "Ximian Copyright Agreement, 2002" for more information regarding
|
||||
my and Ximian's copyright to this code. ;)
|
||||
*************************************************************************/
|
||||
|
||||
#define ppc_addx(c,D,A,B,OE,Rc) ppc_emit32(c, (31 << 26) | ((D) << 21) | ((A) << 16) | ((B) << 11) | (OE << 10) | (266 << 1) | Rc)
|
||||
#define ppc_add(c,D,A,B) ppc_addx(c,D,A,B,0,0)
|
||||
#define ppc_addd(c,D,A,B) ppc_addx(c,D,A,B,0,1)
|
||||
#define ppc_addo(c,D,A,B) ppc_addx(c,D,A,B,1,0)
|
||||
#define ppc_addod(c,D,A,B) ppc_addx(c,D,A,B,1,1)
|
||||
|
||||
#define ppc_addcx(c,D,A,B,OE,Rc) ppc_emit32(c, (31 << 26) | ((D) << 21) | ((A) << 16) | ((B) << 11) | (OE << 10) | (10 << 1) | Rc)
|
||||
#define ppc_addc(c,D,A,B) ppc_addcx(c,D,A,B,0,0)
|
||||
#define ppc_addcd(c,D,A,B) ppc_addcx(c,D,A,B,0,1)
|
||||
#define ppc_addco(c,D,A,B) ppc_addcx(c,D,A,B,1,0)
|
||||
#define ppc_addcod(c,D,A,B) ppc_addcx(c,D,A,B,1,1)
|
||||
|
||||
#define ppc_addex(c,D,A,B,OE,Rc) ppc_emit32(c, (31 << 26) | ((D) << 21) | ((A) << 16) | ((B) << 11) | (OE << 10) | (138 << 1) | Rc)
|
||||
#define ppc_adde(c,D,A,B) ppc_addex(c,D,A,B,0,0)
|
||||
#define ppc_added(c,D,A,B) ppc_addex(c,D,A,B,0,1)
|
||||
#define ppc_addeo(c,D,A,B) ppc_addex(c,D,A,B,1,0)
|
||||
#define ppc_addeod(c,D,A,B) ppc_addex(c,D,A,B,1,1)
|
||||
|
||||
#define ppc_addic(c,D,A,i) ppc_emit32(c, (12 << 26) | ((D) << 21) | ((A) << 16) | (guint16)(i))
|
||||
#define ppc_addicd(c,D,A,i) ppc_emit32(c, (13 << 26) | ((D) << 21) | ((A) << 16) | (guint16)(i))
|
||||
|
||||
#define ppc_addmex(c,D,A,OE,RC) ppc_emit32(c, (31 << 26) | ((D) << 21 ) | ((A) << 16) | (0 << 11) | ((OE) << 10) | (234 << 1) | RC)
|
||||
#define ppc_addme(c,D,A) ppc_addmex(c,D,A,0,0)
|
||||
#define ppc_addmed(c,D,A) ppc_addmex(c,D,A,0,1)
|
||||
#define ppc_addmeo(c,D,A) ppc_addmex(c,D,A,1,0)
|
||||
#define ppc_addmeod(c,D,A) ppc_addmex(c,D,A,1,1)
|
||||
|
||||
#define ppc_addzex(c,D,A,OE,RC) ppc_emit32(c, (31 << 26) | ((D) << 21 ) | ((A) << 16) | (0 << 11) | ((OE) << 10) | (202 << 1) | RC)
|
||||
#define ppc_addze(c,D,A) ppc_addzex(c,D,A,0,0)
|
||||
#define ppc_addzed(c,D,A) ppc_addzex(c,D,A,0,1)
|
||||
#define ppc_addzeo(c,D,A) ppc_addzex(c,D,A,1,0)
|
||||
#define ppc_addzeod(c,D,A) ppc_addzex(c,D,A,1,1)
|
||||
|
||||
#define ppc_andx(c,S,A,B,RC) ppc_emit32(c, (31 << 26) | ((S) << 21 ) | ((A) << 16) | ((B) << 11) | (28 << 1) | RC)
|
||||
#define ppc_and(c,S,A,B) ppc_andx(c,S,A,B,0)
|
||||
#define ppc_andd(c,S,A,B) ppc_andx(c,S,A,B,1)
|
||||
|
||||
#define ppc_andcx(c,S,A,B,RC) ppc_emit32(c, (31 << 26) | ((S) << 21 ) | ((A) << 16) | ((B) << 11) | (60 << 1) | RC)
|
||||
#define ppc_andc(c,S,A,B) ppc_andcx(c,S,A,B,0)
|
||||
#define ppc_andcd(c,S,A,B) ppc_andcx(c,S,A,B,1)
|
||||
|
||||
#define ppc_andid(c,S,A,ui) ppc_emit32(c, (28 << 26) | ((S) << 21 ) | ((A) << 16) | ((guint16)(ui)))
|
||||
#define ppc_andisd(c,S,A,ui) ppc_emit32(c, (29 << 26) | ((S) << 21 ) | ((A) << 16) | ((guint16)(ui)))
|
||||
|
||||
#define ppc_bcx(c,BO,BI,BD,AA,LK) ppc_emit32(c, (16 << 26) | (BO << 21 )| (BI << 16) | (BD << 2) | ((AA) << 1) | LK)
|
||||
#define ppc_bc(c,BO,BI,BD) ppc_bcx(c,BO,BI,BD,0,0)
|
||||
#define ppc_bca(c,BO,BI,BD) ppc_bcx(c,BO,BI,BD,1,0)
|
||||
#define ppc_bcl(c,BO,BI,BD) ppc_bcx(c,BO,BI,BD,0,1)
|
||||
#define ppc_bcla(c,BO,BI,BD) ppc_bcx(c,BO,BI,BD,1,1)
|
||||
|
||||
#define ppc_bcctrx(c,BO,BI,LK) ppc_emit32(c, (19 << 26) | (BO << 21 )| (BI << 16) | (0 << 11) | (528 << 1) | LK)
|
||||
#define ppc_bcctr(c,BO,BI) ppc_bcctrx(c,BO,BI,0)
|
||||
#define ppc_bcctrl(c,BO,BI) ppc_bcctrx(c,BO,BI,1)
|
||||
|
||||
#define ppc_bnectrp(c,BO,BI) ppc_bcctr(c,BO,BI)
|
||||
#define ppc_bnectrlp(c,BO,BI) ppc_bcctr(c,BO,BI)
|
||||
|
||||
#define ppc_bclrx(c,BO,BI,BH,LK) ppc_emit32(c, (19 << 26) | ((BO) << 21 )| ((BI) << 16) | (0 << 13) | ((BH) << 11) | (16 << 1) | (LK))
|
||||
#define ppc_bclr(c,BO,BI,BH) ppc_bclrx(c,BO,BI,BH,0)
|
||||
#define ppc_bclrl(c,BO,BI,BH) ppc_bclrx(c,BO,BI,BH,1)
|
||||
|
||||
#define ppc_bnelrp(c,BO,BI) ppc_bclr(c,BO,BI,0)
|
||||
#define ppc_bnelrlp(c,BO,BI) ppc_bclr(c,BO,BI,0)
|
||||
|
||||
#define ppc_cmp(c,cfrD,L,A,B) ppc_emit32(c, (31 << 26) | ((cfrD) << 23) | (0 << 22) | ((L) << 21) | ((A) << 16) | ((B) << 11) | (0 << 1) | 0)
|
||||
#define ppc_cmpi(c,cfrD,L,A,B) ppc_emit32(c, (11 << 26) | (cfrD << 23) | (0 << 22) | (L << 21) | (A << 16) | (guint16)(B))
|
||||
#define ppc_cmpl(c,cfrD,L,A,B) ppc_emit32(c, (31 << 26) | ((cfrD) << 23) | (0 << 22) | ((L) << 21) | ((A) << 16) | ((B) << 11) | (32 << 1) | 0)
|
||||
#define ppc_cmpli(c,cfrD,L,A,B) ppc_emit32(c, (10 << 26) | (cfrD << 23) | (0 << 22) | (L << 21) | (A << 16) | (guint16)(B))
|
||||
#define ppc_cmpw(c,cfrD,A,B) ppc_cmp(c, (cfrD), 0, (A), (B))
|
||||
|
||||
#define ppc_cntlzwx(c,S,A,Rc) ppc_emit32(c, (31 << 26) | (S << 21) | (A << 16) | (0 << 11) | (26 << 1) | Rc)
|
||||
#define ppc_cntlzw(c,S,A) ppc_cntlzwx(c,S,A,0)
|
||||
#define ppc_cntlzwd(c,S,A) ppc_cntlzwx(c,S,A,1)
|
||||
|
||||
#define ppc_crand(c,D,A,B) ppc_emit32(c, (19 << 26) | (D << 21) | (A << 16) | (B << 11) | (257 << 1) | 0)
|
||||
#define ppc_crandc(c,D,A,B) ppc_emit32(c, (19 << 26) | (D << 21) | (A << 16) | (B << 11) | (129 << 1) | 0)
|
||||
#define ppc_creqv(c,D,A,B) ppc_emit32(c, (19 << 26) | (D << 21) | (A << 16) | (B << 11) | (289 << 1) | 0)
|
||||
#define ppc_crnand(c,D,A,B) ppc_emit32(c, (19 << 26) | (D << 21) | (A << 16) | (B << 11) | (225 << 1) | 0)
|
||||
#define ppc_crnor(c,D,A,B) ppc_emit32(c, (19 << 26) | (D << 21) | (A << 16) | (B << 11) | (33 << 1) | 0)
|
||||
#define ppc_cror(c,D,A,B) ppc_emit32(c, (19 << 26) | (D << 21) | (A << 16) | (B << 11) | (449 << 1) | 0)
|
||||
#define ppc_crorc(c,D,A,B) ppc_emit32(c, (19 << 26) | (D << 21) | (A << 16) | (B << 11) | (417 << 1) | 0)
|
||||
#define ppc_crxor(c,D,A,B) ppc_emit32(c, (19 << 26) | (D << 21) | (A << 16) | (B << 11) | (193 << 1) | 0)
|
||||
|
||||
#define ppc_dcba(c,A,B) ppc_emit32(c, (31 << 26) | (0 << 21) | (A << 16) | (B << 11) | (758 << 1) | 0)
|
||||
#define ppc_dcbf(c,A,B) ppc_emit32(c, (31 << 26) | (0 << 21) | (A << 16) | (B << 11) | (86 << 1) | 0)
|
||||
#define ppc_dcbi(c,A,B) ppc_emit32(c, (31 << 26) | (0 << 21) | (A << 16) | (B << 11) | (470 << 1) | 0)
|
||||
#define ppc_dcbst(c,A,B) ppc_emit32(c, (31 << 26) | (0 << 21) | (A << 16) | (B << 11) | (54 << 1) | 0)
|
||||
#define ppc_dcbt(c,A,B) ppc_emit32(c, (31 << 26) | (0 << 21) | (A << 16) | (B << 11) | (278 << 1) | 0)
|
||||
#define ppc_dcbtst(c,A,B) ppc_emit32(c, (31 << 26) | (0 << 21) | (A << 16) | (B << 11) | (246 << 1) | 0)
|
||||
#define ppc_dcbz(c,A,B) ppc_emit32(c, (31 << 26) | (0 << 21) | (A << 16) | (B << 11) | (1014 << 1) | 0)
|
||||
|
||||
#define ppc_divwx(c,D,A,B,OE,Rc) ppc_emit32(c, (31 << 26) | (D << 21) | (A << 16) | (B << 11) | (OE << 10) | (491 << 1) | Rc)
|
||||
#define ppc_divw(c,D,A,B) ppc_divwx(c,D,A,B,0,0)
|
||||
#define ppc_divwd(c,D,A,B) ppc_divwx(c,D,A,B,0,1)
|
||||
#define ppc_divwo(c,D,A,B) ppc_divwx(c,D,A,B,1,0)
|
||||
#define ppc_divwod(c,D,A,B) ppc_divwx(c,D,A,B,1,1)
|
||||
|
||||
#define ppc_divwux(c,D,A,B,OE,Rc) ppc_emit32(c, (31 << 26) | (D << 21) | (A << 16) | (B << 11) | (OE << 10) | (459 << 1) | Rc)
|
||||
#define ppc_divwu(c,D,A,B) ppc_divwux(c,D,A,B,0,0)
|
||||
#define ppc_divwud(c,D,A,B) ppc_divwux(c,D,A,B,0,1)
|
||||
#define ppc_divwuo(c,D,A,B) ppc_divwux(c,D,A,B,1,0)
|
||||
#define ppc_divwuod(c,D,A,B) ppc_divwux(c,D,A,B,1,1)
|
||||
|
||||
#define ppc_eciwx(c,D,A,B) ppc_emit32(c, (31 << 26) | (D << 21) | (A << 16) | (B << 11) | (310 << 1) | 0)
|
||||
#define ppc_ecowx(c,S,A,B) ppc_emit32(c, (31 << 26) | (S << 21) | (A << 16) | (B << 11) | (438 << 1) | 0)
|
||||
#define ppc_eieio(c) ppc_emit32(c, (31 << 26) | (0 << 21) | (0 << 16) | (0 << 11) | (854 << 1) | 0)
|
||||
|
||||
#define ppc_eqvx(c,A,S,B,Rc) ppc_emit32(c, (31 << 26) | (S << 21) | (A << 16) | (B << 11) | (284 << 1) | Rc)
|
||||
#define ppc_eqv(c,A,S,B) ppc_eqvx(c,A,S,B,0)
|
||||
#define ppc_eqvd(c,A,S,B) ppc_eqvx(c,A,S,B,1)
|
||||
|
||||
#define ppc_extsbx(c,A,S,Rc) ppc_emit32(c, (31 << 26) | (S << 21) | (A << 16) | (0 << 11) | (954 << 1) | Rc)
|
||||
#define ppc_extsb(c,A,S) ppc_extsbx(c,A,S,0)
|
||||
#define ppc_extsbd(c,A,S) ppc_extsbx(c,A,S,1)
|
||||
|
||||
#define ppc_extshx(c,A,S,Rc) ppc_emit32(c, (31 << 26) | (S << 21) | (A << 16) | (0 << 11) | (922 << 1) | Rc)
|
||||
#define ppc_extsh(c,A,S) ppc_extshx(c,A,S,0)
|
||||
#define ppc_extshd(c,A,S) ppc_extshx(c,A,S,1)
|
||||
|
||||
#define ppc_fabsx(c,D,B,Rc) ppc_emit32(c, (63 << 26) | (D << 21) | (0 << 16) | (B << 11) | (264 << 1) | Rc)
|
||||
#define ppc_fabs(c,D,B) ppc_fabsx(c,D,B,0)
|
||||
#define ppc_fabsd(c,D,B) ppc_fabsx(c,D,B,1)
|
||||
|
||||
#define ppc_faddx(c,D,A,B,Rc) ppc_emit32(c, (63 << 26) | (D << 21) | (A << 16) | (B << 11) | (0 << 6) | (21 << 1) | Rc)
|
||||
#define ppc_fadd(c,D,A,B) ppc_faddx(c,D,A,B,0)
|
||||
#define ppc_faddd(c,D,A,B) ppc_faddx(c,D,A,B,1)
|
||||
|
||||
#define ppc_faddsx(c,D,A,B,Rc) ppc_emit32(c, (59 << 26) | (D << 21) | (A << 16) | (B << 11) | (0 << 6) | (21 << 1) | Rc)
|
||||
#define ppc_fadds(c,D,A,B) ppc_faddsx(c,D,A,B,0)
|
||||
#define ppc_faddsd(c,D,A,B) ppc_faddsx(c,D,A,B,1)
|
||||
|
||||
#define ppc_fcmpo(c,crfD,A,B) ppc_emit32(c, (63 << 26) | (crfD << 23) | (0 << 21) | (A << 16) | (B << 11) | (32 << 1) | 0)
|
||||
#define ppc_fcmpu(c,crfD,A,B) ppc_emit32(c, (63 << 26) | (crfD << 23) | (0 << 21) | (A << 16) | (B << 11) | (0 << 1) | 0)
|
||||
|
||||
#define ppc_fctiwx(c,D,B,Rc) ppc_emit32(c, (63 << 26) | (D << 21) | (0 << 16) | (B << 11) | (14 << 1) | Rc)
|
||||
#define ppc_fctiw(c,D,B) ppc_fctiwx(c,D,B,0)
|
||||
#define ppc_fctiwd(c,D,B) ppc_fctiwx(c,D,B,1)
|
||||
|
||||
#define ppc_fctiwzx(c,D,B,Rc) ppc_emit32(c, (63 << 26) | (D << 21) | (0 << 16) | (B << 11) | (15 << 1) | Rc)
|
||||
#define ppc_fctiwz(c,D,B) ppc_fctiwzx(c,D,B,0)
|
||||
#define ppc_fctiwzd(c,D,B) ppc_fctiwzx(c,D,B,1)
|
||||
|
||||
#define ppc_fdivx(c,D,A,B,Rc) ppc_emit32(c, (63 << 26) | (D << 21) | (A << 16) | (B << 11) | (0 << 6) | (18 << 1) | Rc)
|
||||
#define ppc_fdiv(c,D,A,B) ppc_fdivx(c,D,A,B,0)
|
||||
#define ppc_fdivd(c,D,A,B) ppc_fdivx(c,D,A,B,1)
|
||||
|
||||
#define ppc_fdivsx(c,D,A,B,Rc) ppc_emit32(c, (59 << 26) | (D << 21) | (A << 16) | (B << 11) | (0 << 6) | (18 << 1) | Rc)
|
||||
#define ppc_fdivs(c,D,A,B) ppc_fdivsx(c,D,A,B,0)
|
||||
#define ppc_fdivsd(c,D,A,B) ppc_fdivsx(c,D,A,B,1)
|
||||
|
||||
#define ppc_fmaddx(c,D,A,B,C,Rc) ppc_emit32(c, (63 << 26) | (D << 21) | (A << 16) | (B << 11) | (C << 6) | (29 << 1) | Rc)
|
||||
#define ppc_fmadd(c,D,A,B,C) ppc_fmaddx(c,D,A,B,C,0)
|
||||
#define ppc_fmaddd(c,D,A,B,C) ppc_fmaddx(c,D,A,B,C,1)
|
||||
|
||||
#define ppc_fmaddsx(c,D,A,B,C,Rc) ppc_emit32(c, (59 << 26) | (D << 21) | (A << 16) | (B << 11) | (C << 6) | (29 << 1) | Rc)
|
||||
#define ppc_fmadds(c,D,A,B,C) ppc_fmaddsx(c,D,A,B,C,0)
|
||||
#define ppc_fmaddsd(c,D,A,B,C) ppc_fmaddsx(c,D,A,B,C,1)
|
||||
|
||||
#define ppc_fmrx(c,D,B,Rc) ppc_emit32(c, (63 << 26) | (D << 21) | (0 << 16) | (B << 11) | (72 << 1) | Rc)
|
||||
#define ppc_fmr(c,D,B) ppc_fmrx(c,D,B,0)
|
||||
#define ppc_fmrd(c,D,B) ppc_fmrx(c,D,B,1)
|
||||
|
||||
#define ppc_fmsubx(c,D,A,C,B,Rc) ppc_emit32(c, (63 << 26) | (D << 21) | (A << 16) | (B << 11) | (C << 6) | (28 << 1) | Rc)
|
||||
#define ppc_fmsub(c,D,A,C,B) ppc_fmsubx(c,D,A,C,B,0)
|
||||
#define ppc_fmsubd(c,D,A,C,B) ppc_fmsubx(c,D,A,C,B,1)
|
||||
|
||||
#define ppc_fmsubsx(c,D,A,C,B,Rc) ppc_emit32(c, (59 << 26) | (D << 21) | (A << 16) | (B << 11) | (C << 6) | (28 << 1) | Rc)
|
||||
#define ppc_fmsubs(c,D,A,C,B) ppc_fmsubsx(c,D,A,C,B,0)
|
||||
#define ppc_fmsubsd(c,D,A,C,B) ppc_fmsubsx(c,D,A,C,B,1)
|
||||
|
||||
#define ppc_fmulx(c,D,A,C,Rc) ppc_emit32(c, (63 << 26) | (D << 21) | (A << 16) | (0 << 11) | (C << 6) | (25 << 1) | Rc)
|
||||
#define ppc_fmul(c,D,A,C) ppc_fmulx(c,D,A,C,0)
|
||||
#define ppc_fmuld(c,D,A,C) ppc_fmulx(c,D,A,C,1)
|
||||
|
||||
#define ppc_fmulsx(c,D,A,C,Rc) ppc_emit32(c, (59 << 26) | (D << 21) | (A << 16) | (0 << 11) | (C << 6) | (25 << 1) | Rc)
|
||||
#define ppc_fmuls(c,D,A,C) ppc_fmulsx(c,D,A,C,0)
|
||||
#define ppc_fmulsd(c,D,A,C) ppc_fmulsx(c,D,A,C,1)
|
||||
|
||||
#define ppc_fnabsx(c,D,B,Rc) ppc_emit32(c, (63 << 26) | (D << 21) | (0 << 16) | (B << 11) | (136 << 1) | Rc)
|
||||
#define ppc_fnabs(c,D,B) ppc_fnabsx(c,D,B,0)
|
||||
#define ppc_fnabsd(c,D,B) ppc_fnabsx(c,D,B,1)
|
||||
|
||||
#define ppc_fnegx(c,D,B,Rc) ppc_emit32(c, (63 << 26) | (D << 21) | (0 << 16) | (B << 11) | (40 << 1) | Rc)
|
||||
#define ppc_fneg(c,D,B) ppc_fnegx(c,D,B,0)
|
||||
#define ppc_fnegd(c,D,B) ppc_fnegx(c,D,B,1)
|
||||
|
||||
#define ppc_fnmaddx(c,D,A,C,B,Rc) ppc_emit32(c, (63 << 26) | (D << 21) | (A << 16) | (B << 11) | (C << 6) | (31 << 1) | Rc)
|
||||
#define ppc_fnmadd(c,D,A,C,B) ppc_fnmaddx(c,D,A,C,B,0)
|
||||
#define ppc_fnmaddd(c,D,A,C,B) ppc_fnmaddx(c,D,A,C,B,1)
|
||||
|
||||
#define ppc_fnmaddsx(c,D,A,C,B,Rc) ppc_emit32(c, (59 << 26) | (D << 21) | (A << 16) | (B << 11) | (C << 6) | (31 << 1) | Rc)
|
||||
#define ppc_fnmadds(c,D,A,C,B) ppc_fnmaddsx(c,D,A,C,B,0)
|
||||
#define ppc_fnmaddsd(c,D,A,C,B) ppc_fnmaddsx(c,D,A,C,B,1)
|
||||
|
||||
#define ppc_fnmsubx(c,D,A,C,B,Rc) ppc_emit32(c, (63 << 26) | (D << 21) | (A << 16) | (B << 11) | (C << 6) | (30 << 1) | Rc)
|
||||
#define ppc_fnmsub(c,D,A,C,B) ppc_fnmsubx(c,D,A,C,B,0)
|
||||
#define ppc_fnmsubd(c,D,A,C,B) ppc_fnmsubx(c,D,A,C,B,1)
|
||||
|
||||
#define ppc_fnmsubsx(c,D,A,C,B,Rc) ppc_emit32(c, (59 << 26) | (D << 21) | (A << 16) | (B << 11) | (C << 6) | (30 << 1) | Rc)
|
||||
#define ppc_fnmsubs(c,D,A,C,B) ppc_fnmsubsx(c,D,A,C,B,0)
|
||||
#define ppc_fnmsubsd(c,D,A,C,B) ppc_fnmsubsx(c,D,A,C,B,1)
|
||||
|
||||
#define ppc_fresx(c,D,B,Rc) ppc_emit32(c, (59 << 26) | (D << 21) | (0 << 16) | (B << 11) | (0 << 6) | (24 << 1) | Rc)
|
||||
#define ppc_fres(c,D,B) ppc_fresx(c,D,B,0)
|
||||
#define ppc_fresd(c,D,B) ppc_fresx(c,D,B,1)
|
||||
|
||||
#define ppc_frspx(c,D,B,Rc) ppc_emit32(c, (63 << 26) | (D << 21) | (0 << 16) | (B << 11) | (12 << 1) | Rc)
|
||||
#define ppc_frsp(c,D,B) ppc_frspx(c,D,B,0)
|
||||
#define ppc_frspd(c,D,B) ppc_frspx(c,D,B,1)
|
||||
|
||||
#define ppc_frsqrtex(c,D,B,Rc) ppc_emit32(c, (63 << 26) | (D << 21) | (0 << 16) | (B << 11) | (0 << 6) | (26 << 1) | Rc)
|
||||
#define ppc_frsqrte(c,D,B) ppc_frsqrtex(c,D,B,0)
|
||||
#define ppc_frsqrted(c,D,B) ppc_frsqrtex(c,D,B,1)
|
||||
|
||||
#define ppc_fselx(c,D,A,C,B,Rc) ppc_emit32(c, (63 << 26) | (D << 21) | (A << 16) | (B << 11) | (C << 6) | (23 << 1) | Rc)
|
||||
#define ppc_fsel(c,D,A,C,B) ppc_fselx(c,D,A,C,B,0)
|
||||
#define ppc_fseld(c,D,A,C,B) ppc_fselx(c,D,A,C,B,1)
|
||||
|
||||
#define ppc_fsqrtx(c,D,B,Rc) ppc_emit32(c, (63 << 26) | (D << 21) | (0 << 16) | (B << 11) | (0 << 6) | (22 << 1) | Rc)
|
||||
#define ppc_fsqrt(c,D,B) ppc_fsqrtx(c,D,B,0)
|
||||
#define ppc_fsqrtd(c,D,B) ppc_fsqrtx(c,D,B,1)
|
||||
|
||||
#define ppc_fsqrtsx(c,D,B,Rc) ppc_emit32(c, (59 << 26) | (D << 21) | (0 << 16) | (B << 11) | (0 << 6) | (22 << 1) | Rc)
|
||||
#define ppc_fsqrts(c,D,B) ppc_fsqrtsx(c,D,B,0)
|
||||
#define ppc_fsqrtsd(c,D,B) ppc_fsqrtsx(c,D,B,1)
|
||||
|
||||
#define ppc_fsubx(c,D,A,B,Rc) ppc_emit32(c, (63 << 26) | (D << 21) | (A << 16) | (B << 11) | (0 << 6) | (20 << 1) | Rc)
|
||||
#define ppc_fsub(c,D,A,B) ppc_fsubx(c,D,A,B,0)
|
||||
#define ppc_fsubd(c,D,A,B) ppc_fsubx(c,D,A,B,1)
|
||||
|
||||
#define ppc_fsubsx(c,D,A,B,Rc) ppc_emit32(c, (59 << 26) | (D << 21) | (A << 16) | (B << 11) | (0 << 6) | (20 << 1) | Rc)
|
||||
#define ppc_fsubs(c,D,A,B) ppc_fsubsx(c,D,A,B,0)
|
||||
#define ppc_fsubsd(c,D,A,B) ppc_fsubsx(c,D,A,B,1)
|
||||
|
||||
#define ppc_icbi(c,A,B) ppc_emit32(c, (31 << 26) | (0 << 21) | (A << 16) | (B << 11) | (982 << 1) | 0)
|
||||
|
||||
#define ppc_isync(c) ppc_emit32(c, (19 << 26) | (0 << 11) | (150 << 1) | 0)
|
||||
|
||||
#define ppc_lbzu(c,D,d,A) ppc_emit32(c, (35 << 26) | (D << 21) | (A << 16) | (guint16)d)
|
||||
#define ppc_lbzux(c,D,A,B) ppc_emit32(c, (31 << 26) | (D << 21) | (A << 16) | (B << 11) | (119 << 1) | 0)
|
||||
#define ppc_lbzx(c,D,A,B) ppc_emit32(c, (31 << 26) | (D << 21) | (A << 16) | (B << 11) | (87 << 1) | 0)
|
||||
|
||||
#define ppc_lfdu(c,D,d,A) ppc_emit32(c, (51 << 26) | (D << 21) | (A << 16) | (guint16)d)
|
||||
#define ppc_lfdux(c,D,A,B) ppc_emit32(c, (31 << 26) | (D << 21) | (A << 16) | (B << 11) | (631 << 1) | 0)
|
||||
#define ppc_lfdx(c,D,A,B) ppc_emit32(c, (31 << 26) | (D << 21) | (A << 16) | (B << 11) | (599 << 1) | 0)
|
||||
|
||||
#define ppc_lfsu(c,D,d,A) ppc_emit32(c, (49 << 26) | (D << 21) | (A << 16) | (guint16)d)
|
||||
#define ppc_lfsux(c,D,A,B) ppc_emit32(c, (31 << 26) | (D << 21) | (A << 16) | (B << 11) | (567 << 1) | 0)
|
||||
#define ppc_lfsx(c,D,A,B) ppc_emit32(c, (31 << 26) | (D << 21) | (A << 16) | (B << 11) | (535 << 1) | 0)
|
||||
|
||||
#define ppc_lha(c,D,d,A) ppc_emit32(c, (42 << 26) | (D << 21) | (A << 16) | (guint16)d)
|
||||
#define ppc_lhau(c,D,d,A) ppc_emit32(c, (43 << 26) | (D << 21) | (A << 16) | (guint16)d)
|
||||
#define ppc_lhaux(c,D,A,B) ppc_emit32(c, (31 << 26) | (D << 21) | (A << 16) | (B << 11) | (375 << 1) | 0)
|
||||
#define ppc_lhax(c,D,A,B) ppc_emit32(c, (31 << 26) | (D << 21) | (A << 16) | (B << 11) | (343 << 1) | 0)
|
||||
#define ppc_lhbrx(c,D,A,B) ppc_emit32(c, (31 << 26) | (D << 21) | (A << 16) | (B << 11) | (790 << 1) | 0)
|
||||
#define ppc_lhzu(c,D,d,A) ppc_emit32(c, (41 << 26) | (D << 21) | (A << 16) | (guint16)d)
|
||||
|
||||
#define ppc_lhzux(c,D,A,B) ppc_emit32(c, (31 << 26) | (D << 21) | (A << 16) | (B << 11) | (311 << 1) | 0)
|
||||
#define ppc_lhzx(c,D,A,B) ppc_emit32(c, (31 << 26) | (D << 21) | (A << 16) | (B << 11) | (279 << 1) | 0)
|
||||
|
||||
#define ppc_lmw(c,D,d,A) ppc_emit32(c, (46 << 26) | (D << 21) | (A << 16) | (guint16)d)
|
||||
|
||||
#define ppc_lswi(c,D,A,NB) ppc_emit32(c, (31 << 26) | (D << 21) | (A << 16) | (NB << 11) | (597 << 1) | 0)
|
||||
#define ppc_lswx(c,D,A,B) ppc_emit32(c, (31 << 26) | (D << 21) | (A << 16) | (B << 11) | (533 << 1) | 0)
|
||||
#define ppc_lwarx(c,D,A,B) ppc_emit32(c, (31 << 26) | (D << 21) | (A << 16) | (B << 11) | (20 << 1) | 0)
|
||||
#define ppc_lwbrx(c,D,A,B) ppc_emit32(c, (31 << 26) | (D << 21) | (A << 16) | (B << 11) | (534 << 1) | 0)
|
||||
|
||||
#define ppc_lwzu(c,D,d,A) ppc_emit32(c, (33 << 26) | (D << 21) | (A << 16) | (guint16)d)
|
||||
#define ppc_lwzux(c,D,A,B) ppc_emit32(c, (31 << 26) | (D << 21) | (A << 16) | (B << 11) | (55 << 1) | 0)
|
||||
#define ppc_lwzx(c,D,A,B) ppc_emit32(c, (31 << 26) | (D << 21) | (A << 16) | (B << 11) | (23 << 1) | 0)
|
||||
|
||||
#define ppc_mcrf(c,crfD,crfS) ppc_emit32(c, (19 << 26) | (crfD << 23) | (0 << 21) | (crfS << 18) | 0)
|
||||
#define ppc_mcrfs(c,crfD,crfS) ppc_emit32(c, (63 << 26) | (crfD << 23) | (0 << 21) | (crfS << 18) | (0 << 16) | (64 << 1) | 0)
|
||||
#define ppc_mcrxr(c,crfD) ppc_emit32(c, (31 << 26) | (crfD << 23) | (0 << 16) | (512 << 1) | 0)
|
||||
|
||||
#define ppc_mfcr(c,D) ppc_emit32(c, (31 << 26) | (D << 21) | (0 << 16) | (19 << 1) | 0)
|
||||
#define ppc_mffsx(c,D,Rc) ppc_emit32(c, (63 << 26) | (D << 21) | (0 << 16) | (583 << 1) | Rc)
|
||||
#define ppc_mffs(c,D) ppc_mffsx(c,D,0)
|
||||
#define ppc_mffsd(c,D) ppc_mffsx(c,D,1)
|
||||
#define ppc_mfmsr(c,D) ppc_emit32(c, (31 << 26) | (D << 21) | (0 << 16) | (83 << 1) | 0)
|
||||
#define ppc_mfsr(c,D,SR) ppc_emit32(c, (31 << 26) | (D << 21) | (0 << 20) | (SR << 16) | (0 << 11) | (595 << 1) | 0)
|
||||
#define ppc_mfsrin(c,D,B) ppc_emit32(c, (31 << 26) | (D << 21) | (0 << 16) | (B << 11) | (659 << 1) | 0)
|
||||
#define ppc_mftb(c,D,TBR) ppc_emit32(c, (31 << 26) | (D << 21) | (TBR << 11) | (371 << 1) | 0)
|
||||
|
||||
#define ppc_mtcrf(c,CRM,S) ppc_emit32(c, (31 << 26) | (S << 21) | (0 << 20) | (CRM << 12) | (0 << 11) | (144 << 1) | 0)
|
||||
|
||||
#define ppc_mtfsb0x(c,CRB,Rc) ppc_emit32(c, (63 << 26) | (CRB << 21) | (0 << 11) | (70 << 1) | Rc)
|
||||
#define ppc_mtfsb0(c,CRB) ppc_mtfsb0x(c,CRB,0)
|
||||
#define ppc_mtfsb0d(c,CRB) ppc_mtfsb0x(c,CRB,1)
|
||||
|
||||
#define ppc_mtfsb1x(c,CRB,Rc) ppc_emit32(c, (63 << 26) | (CRB << 21) | (0 << 11) | (38 << 1) | Rc)
|
||||
#define ppc_mtfsb1(c,CRB) ppc_mtfsb1x(c,CRB,0)
|
||||
#define ppc_mtfsb1d(c,CRB) ppc_mtfsb1x(c,CRB,1)
|
||||
|
||||
#define ppc_mtfsfx(c,FM,B,Rc) ppc_emit32(c, (63 << 26) | (0 << 25) | (FM << 22) | (0 << 21) | (B << 11) | (711 << 1) | Rc)
|
||||
#define ppc_mtfsf(c,FM,B) ppc_mtfsfx(c,FM,B,0)
|
||||
#define ppc_mtfsfd(c,FM,B) ppc_mtfsfx(c,FM,B,1)
|
||||
|
||||
#define ppc_mtfsfix(c,crfD,IMM,Rc) ppc_emit32(c, (63 << 26) | (crfD << 23) | (0 << 16) | (IMM << 12) | (0 << 11) | (134 << 1) | Rc)
|
||||
#define ppc_mtfsfi(c,crfD,IMM) ppc_mtfsfix(c,crfD,IMM,0)
|
||||
#define ppc_mtfsfid(c,crfD,IMM) ppc_mtfsfix(c,crfD,IMM,1)
|
||||
|
||||
#define ppc_mtmsr(c, S) ppc_emit32(c, (31 << 26) | (S << 21) | (0 << 11) | (146 << 1) | 0)
|
||||
|
||||
#define ppc_mtsr(c,SR,S) ppc_emit32(c, (31 << 26) | (S << 21) | (0 << 20) | (SR << 16) | (0 << 11) | (210 << 1) | 0)
|
||||
#define ppc_mtsrin(c,S,B) ppc_emit32(c, (31 << 26) | (S << 21) | (0 << 16) | (B << 11) | (242 << 1) | 0)
|
||||
|
||||
#define ppc_mulhwx(c,D,A,B,Rc) ppc_emit32(c, (31 << 26) | (D << 21) | (A << 16) | (B << 11) | (0 << 10) | (75 << 1) | Rc)
|
||||
#define ppc_mulhw(c,D,A,B) ppc_mulhwx(c,D,A,B,0)
|
||||
#define ppc_mulhwd(c,D,A,B) ppc_mulhwx(c,D,A,B,1)
|
||||
|
||||
#define ppc_mulhwux(c,D,A,B,Rc) ppc_emit32(c, (31 << 26) | (D << 21) | (A << 16) | (B << 11) | (0 << 10) | (11 << 1) | Rc)
|
||||
#define ppc_mulhwu(c,D,A,B) ppc_mulhwux(c,D,A,B,0)
|
||||
#define ppc_mulhwud(c,D,A,B) ppc_mulhwux(c,D,A,B,1)
|
||||
|
||||
#define ppc_mulli(c,D,A,SIMM) ppc_emit32(c, ((07) << 26) | (D << 21) | (A << 16) | (guint16)(SIMM))
|
||||
|
||||
#define ppc_mullwx(c,D,A,B,OE,Rc) ppc_emit32(c, (31 << 26) | (D << 21) | (A << 16) | (B << 11) | (OE << 10) | (235 << 1) | Rc)
|
||||
#define ppc_mullw(c,D,A,B) ppc_mullwx(c,D,A,B,0,0)
|
||||
#define ppc_mullwd(c,D,A,B) ppc_mullwx(c,D,A,B,0,1)
|
||||
#define ppc_mullwo(c,D,A,B) ppc_mullwx(c,D,A,B,1,0)
|
||||
#define ppc_mullwod(c,D,A,B) ppc_mullwx(c,D,A,B,1,1)
|
||||
|
||||
#define ppc_nandx(c,A,S,B,Rc) ppc_emit32(c, (31 << 26) | (S << 21) | (A << 16) | (B << 11) | (476 << 1) | Rc)
|
||||
#define ppc_nand(c,A,S,B) ppc_nandx(c,A,S,B,0)
|
||||
#define ppc_nandd(c,A,S,B) ppc_nandx(c,A,S,B,1)
|
||||
|
||||
#define ppc_negx(c,D,A,OE,Rc) ppc_emit32(c, (31 << 26) | (D << 21) | (A << 16) | (0 << 11) | (OE << 10) | (104 << 1) | Rc)
|
||||
#define ppc_neg(c,D,A) ppc_negx(c,D,A,0,0)
|
||||
#define ppc_negd(c,D,A) ppc_negx(c,D,A,0,1)
|
||||
#define ppc_nego(c,D,A) ppc_negx(c,D,A,1,0)
|
||||
#define ppc_negod(c,D,A) ppc_negx(c,D,A,1,1)
|
||||
|
||||
#define ppc_norx(c,A,S,B,Rc) ppc_emit32(c, (31 << 26) | (S << 21) | (A << 16) | (B << 11) | (124 << 1) | Rc)
|
||||
#define ppc_nor(c,A,S,B) ppc_norx(c,A,S,B,0)
|
||||
#define ppc_nord(c,A,S,B) ppc_norx(c,A,S,B,1)
|
||||
|
||||
#define ppc_not(c,A,S) ppc_norx(c,A,S,S,0)
|
||||
|
||||
#define ppc_orx(c,A,S,B,Rc) ppc_emit32(c, (31 << 26) | (S << 21) | (A << 16) | (B << 11) | (444 << 1) | Rc)
|
||||
#define ppc_ord(c,A,S,B) ppc_orx(c,A,S,B,1)
|
||||
|
||||
#define ppc_orcx(c,A,S,B,Rc) ppc_emit32(c, (31 << 26) | (S << 21) | (A << 16) | (B << 11) | (412 << 1) | Rc)
|
||||
#define ppc_orc(c,A,S,B) ppc_orcx(c,A,S,B,0)
|
||||
#define ppc_orcd(c,A,S,B) ppc_orcx(c,A,S,B,1)
|
||||
|
||||
#define ppc_oris(c,A,S,UIMM) ppc_emit32(c, (25 << 26) | (S << 21) | (A << 16) | (guint16)(UIMM))
|
||||
|
||||
#define ppc_rfi(c) ppc_emit32(c, (19 << 26) | (0 << 11) | (50 << 1) | 0)
|
||||
|
||||
#define ppc_rlwimix(c,A,S,SH,MB,ME,Rc) ppc_emit32(c, (20 << 26) | (S << 21) | (A << 16) | (SH << 11) | (MB << 6) | (ME << 1) | Rc)
|
||||
#define ppc_rlwimi(c,A,S,SH,MB,ME) ppc_rlwimix(c,A,S,SH,MB,ME,0)
|
||||
#define ppc_rlwimid(c,A,S,SH,MB,ME) ppc_rlwimix(c,A,S,SH,MB,ME,1)
|
||||
|
||||
#define ppc_rlwinmx(c,A,S,SH,MB,ME,Rc) ppc_emit32(c, (21 << 26) | ((S) << 21) | ((A) << 16) | ((SH) << 11) | ((MB) << 6) | ((ME) << 1) | (Rc))
|
||||
#define ppc_rlwinm(c,A,S,SH,MB,ME) ppc_rlwinmx(c,A,S,SH,MB,ME,0)
|
||||
#define ppc_rlwinmd(c,A,S,SH,MB,ME) ppc_rlwinmx(c,A,S,SH,MB,ME,1)
|
||||
#define ppc_extlwi(c,A,S,n,b) ppc_rlwinm(c,A,S, b, 0, (n) - 1)
|
||||
#define ppc_extrwi(c,A,S,n,b) ppc_rlwinm(c,A,S, (b) + (n), 32 - (n), 31)
|
||||
#define ppc_rotlwi(c,A,S,n) ppc_rlwinm(c,A,S, n, 0, 31)
|
||||
#define ppc_rotrwi(c,A,S,n) ppc_rlwinm(c,A,S, 32 - (n), 0, 31)
|
||||
#define ppc_slwi(c,A,S,n) ppc_rlwinm(c,A,S, n, 0, 31 - (n))
|
||||
#define ppc_srwi(c,A,S,n) ppc_rlwinm(c,A,S, 32 - (n), n, 31)
|
||||
#define ppc_clrlwi(c,A,S,n) ppc_rlwinm(c,A,S, 0, n, 31)
|
||||
#define ppc_clrrwi(c,A,S,n) ppc_rlwinm(c,A,S, 0, 0, 31 - (n))
|
||||
#define ppc_clrlslwi(c,A,S,b,n) ppc_rlwinm(c,A,S, n, (b) - (n), 31 - (n))
|
||||
|
||||
#define ppc_rlwnmx(c,A,S,SH,MB,ME,Rc) ppc_emit32(c, (23 << 26) | (S << 21) | (A << 16) | (SH << 11) | (MB << 6) | (ME << 1) | Rc)
|
||||
#define ppc_rlwnm(c,A,S,SH,MB,ME) ppc_rlwnmx(c,A,S,SH,MB,ME,0)
|
||||
#define ppc_rlwnmd(c,A,S,SH,MB,ME) ppc_rlwnmx(c,A,S,SH,MB,ME,1)
|
||||
|
||||
#define ppc_sc(c) ppc_emit32(c, (17 << 26) | (0 << 2) | (1 << 1) | 0)
|
||||
|
||||
#define ppc_slwx(c,S,A,B,Rc) ppc_emit32(c, (31 << 26) | (S << 21) | (A << 16) | (B << 11) | (24 << 1) | Rc)
|
||||
#define ppc_slw(c,S,A,B) ppc_slwx(c,S,A,B,0)
|
||||
#define ppc_slwd(c,S,A,B) ppc_slwx(c,S,A,B,1)
|
||||
|
||||
#define ppc_srawx(c,A,S,B,Rc) ppc_emit32(c, (31 << 26) | (S << 21) | (A << 16) | (B << 11) | (792 << 1) | Rc)
|
||||
#define ppc_sraw(c,A,S,B) ppc_srawx(c,A,S,B,0)
|
||||
#define ppc_srawd(c,A,S,B) ppc_srawx(c,A,S,B,1)
|
||||
|
||||
#define ppc_srawix(c,A,S,SH,Rc) ppc_emit32(c, (31 << 26) | (S << 21) | (A << 16) | (SH << 11) | (824 << 1) | Rc)
|
||||
#define ppc_srawi(c,A,S,B) ppc_srawix(c,A,S,B,0)
|
||||
#define ppc_srawid(c,A,S,B) ppc_srawix(c,A,S,B,1)
|
||||
|
||||
#define ppc_srwx(c,A,S,SH,Rc) ppc_emit32(c, (31 << 26) | (S << 21) | (A << 16) | (SH << 11) | (536 << 1) | Rc)
|
||||
#define ppc_srw(c,A,S,B) ppc_srwx(c,A,S,B,0)
|
||||
#define ppc_srwd(c,A,S,B) ppc_srwx(c,A,S,B,1)
|
||||
|
||||
#define ppc_stbu(c,S,d,A) ppc_emit32(c, (39 << 26) | (S << 21) | (A << 16) | (guint16)(d))
|
||||
|
||||
#define ppc_stbux(c,S,A,B) ppc_emit32(c, (31 << 26) | (S << 21) | (A << 16) | (B << 11) | (247 << 1) | 0)
|
||||
#define ppc_stbx(c,S,A,B) ppc_emit32(c, (31 << 26) | (S << 21) | (A << 16) | (B << 11) | (215 << 1) | 0)
|
||||
|
||||
#define ppc_stfdu(c,S,d,A) ppc_emit32(c, (55 << 26) | (S << 21) | (A << 16) | (guint16)(d))
|
||||
|
||||
#define ppc_stfdx(c,S,A,B) ppc_emit32(c, (31 << 26) | (S << 21) | (A << 16) | (B << 11) | (727 << 1) | 0)
|
||||
#define ppc_stfiwx(c,S,A,B) ppc_emit32(c, (31 << 26) | (S << 21) | (A << 16) | (B << 11) | (983 << 1) | 0)
|
||||
|
||||
#define ppc_stfsu(c,S,d,A) ppc_emit32(c, (53 << 26) | (S << 21) | (A << 16) | (guint16)(d))
|
||||
#define ppc_stfsux(c,S,A,B) ppc_emit32(c, (31 << 26) | (S << 21) | (A << 16) | (B << 11) | (695 << 1) | 0)
|
||||
#define ppc_stfsx(c,S,A,B) ppc_emit32(c, (31 << 26) | (S << 21) | (A << 16) | (B << 11) | (663 << 1) | 0)
|
||||
#define ppc_sthbrx(c,S,A,B) ppc_emit32(c, (31 << 26) | (S << 21) | (A << 16) | (B << 11) | (918 << 1) | 0)
|
||||
#define ppc_sthu(c,S,d,A) ppc_emit32(c, (45 << 26) | (S << 21) | (A << 16) | (guint16)(d))
|
||||
#define ppc_sthux(c,S,A,B) ppc_emit32(c, (31 << 26) | (S << 21) | (A << 16) | (B << 11) | (439 << 1) | 0)
|
||||
#define ppc_sthx(c,S,A,B) ppc_emit32(c, (31 << 26) | (S << 21) | (A << 16) | (B << 11) | (407 << 1) | 0)
|
||||
#define ppc_stmw(c,S,d,A) ppc_emit32(c, (47 << 26) | (S << 21) | (A << 16) | (guint16)d)
|
||||
#define ppc_stswi(c,S,A,NB) ppc_emit32(c, (31 << 26) | (S << 21) | (A << 16) | (NB << 11) | (725 << 1) | 0)
|
||||
#define ppc_stswx(c,S,A,NB) ppc_emit32(c, (31 << 26) | (S << 21) | (A << 16) | (NB << 11) | (661 << 1) | 0)
|
||||
#define ppc_stwbrx(c,S,A,B) ppc_emit32(c, (31 << 26) | (S << 21) | (A << 16) | (B << 11) | (662 << 1) | 0)
|
||||
#define ppc_stwcxd(c,S,A,B) ppc_emit32(c, (31 << 26) | (S << 21) | (A << 16) | (B << 11) | (150 << 1) | 1)
|
||||
#define ppc_stwux(c,S,A,B) ppc_emit32(c, (31 << 26) | (S << 21) | (A << 16) | (B << 11) | (183 << 1) | 0)
|
||||
#define ppc_stwx(c,S,A,B) ppc_emit32(c, (31 << 26) | (S << 21) | (A << 16) | (B << 11) | (151 << 1) | 0)
|
||||
|
||||
#define ppc_subfx(c,D,A,B,OE,Rc) ppc_emit32(c, (31 << 26) | (D << 21) | (A << 16) | (B << 11) | (OE << 10) | (40 << 1) | Rc)
|
||||
#define ppc_subf(c,D,A,B) ppc_subfx(c,D,A,B,0,0)
|
||||
#define ppc_subfd(c,D,A,B) ppc_subfx(c,D,A,B,0,1)
|
||||
#define ppc_subfo(c,D,A,B) ppc_subfx(c,D,A,B,1,0)
|
||||
#define ppc_subfod(c,D,A,B) ppc_subfx(c,D,A,B,1,1)
|
||||
|
||||
#define ppc_sub(c,D,A,B) ppc_subf(c,D,B,A)
|
||||
|
||||
#define ppc_subfcx(c,D,A,B,OE,Rc) ppc_emit32(c, (31 << 26) | (D << 21) | (A << 16) | (B << 11) | (OE << 10) | (8 << 1) | Rc)
|
||||
#define ppc_subfc(c,D,A,B) ppc_subfcx(c,D,A,B,0,0)
|
||||
#define ppc_subfcd(c,D,A,B) ppc_subfcx(c,D,A,B,0,1)
|
||||
#define ppc_subfco(c,D,A,B) ppc_subfcx(c,D,A,B,1,0)
|
||||
#define ppc_subfcod(c,D,A,B) ppc_subfcx(c,D,A,B,1,1)
|
||||
|
||||
#define ppc_subfex(c,D,A,B,OE,Rc) ppc_emit32(c, (31 << 26) | (D << 21) | (A << 16) | (B << 11) | (OE << 10) | (136 << 1) | Rc)
|
||||
#define ppc_subfe(c,D,A,B) ppc_subfex(c,D,A,B,0,0)
|
||||
#define ppc_subfed(c,D,A,B) ppc_subfex(c,D,A,B,0,1)
|
||||
#define ppc_subfeo(c,D,A,B) ppc_subfex(c,D,A,B,1,0)
|
||||
#define ppc_subfeod(c,D,A,B) ppc_subfex(c,D,A,B,1,1)
|
||||
|
||||
#define ppc_subfic(c,D,A,SIMM) ppc_emit32(c, (8 << 26) | (D << 21) | (A << 16) | (guint16)(SIMM))
|
||||
|
||||
#define ppc_subfmex(c,D,A,OE,Rc) ppc_emit32(c, (31 << 26) | (D << 21) | (A << 16) | (0 << 11) | (OE << 10) | (232 << 1) | Rc)
|
||||
#define ppc_subfme(c,D,A) ppc_subfmex(c,D,A,0,0)
|
||||
#define ppc_subfmed(c,D,A) ppc_subfmex(c,D,A,0,1)
|
||||
#define ppc_subfmeo(c,D,A) ppc_subfmex(c,D,A,1,0)
|
||||
#define ppc_subfmeod(c,D,A) ppc_subfmex(c,D,A,1,1)
|
||||
|
||||
#define ppc_subfzex(c,D,A,OE,Rc) ppc_emit32(c, (31 << 26) | (D << 21) | (A << 16) | (0 << 11) | (OE << 10) | (200 << 1) | Rc)
|
||||
#define ppc_subfze(c,D,A) ppc_subfzex(c,D,A,0,0)
|
||||
#define ppc_subfzed(c,D,A) ppc_subfzex(c,D,A,0,1)
|
||||
#define ppc_subfzeo(c,D,A) ppc_subfzex(c,D,A,1,0)
|
||||
#define ppc_subfzeod(c,D,A) ppc_subfzex(c,D,A,1,1)
|
||||
|
||||
#define ppc_sync(c) ppc_emit32(c, (31 << 26) | (0 << 11) | (598 << 1) | 0)
|
||||
#define ppc_tlbia(c) ppc_emit32(c, (31 << 26) | (0 << 11) | (370 << 1) | 0)
|
||||
#define ppc_tlbie(c,B) ppc_emit32(c, (31 << 26) | (0 << 16) | (B << 11) | (306 << 1) | 0)
|
||||
#define ppc_tlbsync(c) ppc_emit32(c, (31 << 26) | (0 << 11) | (566 << 1) | 0)
|
||||
|
||||
#define ppc_tw(c,TO,A,B) ppc_emit32(c, (31 << 26) | (TO << 21) | (A << 16) | (B << 11) | (4 << 1) | 0)
|
||||
#define ppc_twi(c,TO,A,SIMM) ppc_emit32(c, (3 << 26) | (TO << 21) | (A << 16) | (guint16)(SIMM))
|
||||
|
||||
#define ppc_xorx(c,A,S,B,RC) ppc_emit32(c, (31 << 26) | (S << 21) | (A << 16) | (B << 11) | (316 << 1) | RC)
|
||||
#define ppc_xor(c,A,S,B) ppc_xorx(c,A,S,B,0)
|
||||
#define ppc_xord(c,A,S,B) ppc_xorx(c,A,S,B,1)
|
||||
|
||||
#define ppc_xori(c,S,A,UIMM) ppc_emit32(c, (26 << 26) | (S << 21) | (A << 16) | (guint16)(UIMM))
|
||||
#define ppc_xoris(c,S,A,UIMM) ppc_emit32(c, (27 << 26) | (S << 21) | (A << 16) | (guint16)(UIMM))
|
||||
|
||||
/* this marks the end of my work, ct */
|
||||
|
||||
/* PPC64 */
|
||||
|
||||
/* The following FP instructions are not are available to 32-bit
|
||||
implementations (prior to PowerISA-V2.01 but are available to
|
||||
32-bit mode programs on 64-bit PowerPC implementations and all
|
||||
processors compliant with PowerISA-2.01 or later. */
|
||||
|
||||
#define ppc_fcfidx(c,D,B,Rc) ppc_emit32(c, (63 << 26) | ((D) << 21) | (0 << 16) | ((B) << 11) | (846 << 1) | (Rc))
|
||||
#define ppc_fcfid(c,D,B) ppc_fcfidx(c,D,B,0)
|
||||
#define ppc_fcfidd(c,D,B) ppc_fcfidx(c,D,B,1)
|
||||
|
||||
#define ppc_fctidx(c,D,B,Rc) ppc_emit32(c, (63 << 26) | ((D) << 21) | (0 << 16) | ((B) << 11) | (814 << 1) | (Rc))
|
||||
#define ppc_fctid(c,D,B) ppc_fctidx(c,D,B,0)
|
||||
#define ppc_fctidd(c,D,B) ppc_fctidx(c,D,B,1)
|
||||
|
||||
#define ppc_fctidzx(c,D,B,Rc) ppc_emit32(c, (63 << 26) | ((D) << 21) | (0 << 16) | ((B) << 11) | (815 << 1) | (Rc))
|
||||
#define ppc_fctidz(c,D,B) ppc_fctidzx(c,D,B,0)
|
||||
#define ppc_fctidzd(c,D,B) ppc_fctidzx(c,D,B,1)
|
||||
|
||||
#ifdef __mono_ppc64__
|
||||
|
||||
#define ppc_load_sequence(c,D,v) G_STMT_START { \
|
||||
ppc_lis ((c), (D), ((guint64)(v) >> 48) & 0xffff); \
|
||||
ppc_ori ((c), (D), (D), ((guint64)(v) >> 32) & 0xffff); \
|
||||
ppc_sldi ((c), (D), (D), 32); \
|
||||
ppc_oris ((c), (D), (D), ((guint64)(v) >> 16) & 0xffff); \
|
||||
ppc_ori ((c), (D), (D), (guint64)(v) & 0xffff); \
|
||||
} G_STMT_END
|
||||
|
||||
#define PPC_LOAD_SEQUENCE_LENGTH 20
|
||||
|
||||
#define ppc_is_imm32(val) (((((gint64)val)>> 31) == 0) || ((((gint64)val)>> 31) == -1))
|
||||
#define ppc_is_imm48(val) (((((gint64)val)>> 47) == 0) || ((((gint64)val)>> 47) == -1))
|
||||
|
||||
#define ppc_load48(c,D,v) G_STMT_START { \
|
||||
ppc_li ((c), (D), ((gint64)(v) >> 32) & 0xffff); \
|
||||
ppc_sldi ((c), (D), (D), 32); \
|
||||
ppc_oris ((c), (D), (D), ((guint64)(v) >> 16) & 0xffff); \
|
||||
ppc_ori ((c), (D), (D), (guint64)(v) & 0xffff); \
|
||||
} G_STMT_END
|
||||
|
||||
#define ppc_load(c,D,v) G_STMT_START { \
|
||||
if (ppc_is_imm16 ((guint64)(v))) { \
|
||||
ppc_li ((c), (D), (guint16)(guint64)(v)); \
|
||||
} else if (ppc_is_imm32 ((guint64)(v))) { \
|
||||
ppc_load32 ((c), (D), (guint32)(guint64)(v)); \
|
||||
} else if (ppc_is_imm48 ((guint64)(v))) { \
|
||||
ppc_load48 ((c), (D), (guint64)(v)); \
|
||||
} else { \
|
||||
ppc_load_sequence ((c), (D), (guint64)(v)); \
|
||||
} \
|
||||
} G_STMT_END
|
||||
|
||||
#define ppc_load_func(c,D,v) G_STMT_START { \
|
||||
ppc_load_sequence ((c), ppc_r11, (guint64)(gsize)(v)); \
|
||||
ppc_ldptr ((c), ppc_r2, sizeof (gpointer), ppc_r11); \
|
||||
ppc_ldptr ((c), (D), 0, ppc_r11); \
|
||||
} G_STMT_END
|
||||
|
||||
#define ppc_load_multiple_regs(c,D,d,A) G_STMT_START { \
|
||||
int __i, __o = (d); \
|
||||
for (__i = (D); __i <= 31; ++__i) { \
|
||||
ppc_ldr ((c), __i, __o, (A)); \
|
||||
__o += sizeof (guint64); \
|
||||
} \
|
||||
} G_STMT_END
|
||||
|
||||
#define ppc_store_multiple_regs(c,S,d,A) G_STMT_START { \
|
||||
int __i, __o = (d); \
|
||||
for (__i = (S); __i <= 31; ++__i) { \
|
||||
ppc_str ((c), __i, __o, (A)); \
|
||||
__o += sizeof (guint64); \
|
||||
} \
|
||||
} G_STMT_END
|
||||
|
||||
#define ppc_compare(c,cfrD,A,B) ppc_cmp((c), (cfrD), 1, (A), (B))
|
||||
#define ppc_compare_reg_imm(c,cfrD,A,B) ppc_cmpi((c), (cfrD), 1, (A), (B))
|
||||
#define ppc_compare_log(c,cfrD,A,B) ppc_cmpl((c), (cfrD), 1, (A), (B))
|
||||
|
||||
#define ppc_shift_left(c,A,S,B) ppc_sld((c), (A), (S), (B))
|
||||
#define ppc_shift_left_imm(c,A,S,n) ppc_sldi((c), (A), (S), (n))
|
||||
|
||||
#define ppc_shift_right_imm(c,A,S,B) ppc_srdi((c), (A), (S), (B))
|
||||
#define ppc_shift_right_arith_imm(c,A,S,B) ppc_sradi((c), (A), (S), (B))
|
||||
|
||||
#define ppc_multiply(c,D,A,B) ppc_mulld((c), (D), (A), (B))
|
||||
|
||||
#define ppc_clear_right_imm(c,A,S,n) ppc_clrrdi((c), (A), (S), (n))
|
||||
|
||||
#define ppc_divdx(c,D,A,B,OE,Rc) ppc_emit32(c, (31 << 26) | ((D) << 21) | ((A) << 16) | ((B) << 11) | ((OE) << 10) | (489 << 1) | (Rc))
|
||||
#define ppc_divd(c,D,A,B) ppc_divdx(c,D,A,B,0,0)
|
||||
#define ppc_divdd(c,D,A,B) ppc_divdx(c,D,A,B,0,1)
|
||||
#define ppc_divdo(c,D,A,B) ppc_divdx(c,D,A,B,1,0)
|
||||
#define ppc_divdod(c,D,A,B) ppc_divdx(c,D,A,B,1,1)
|
||||
|
||||
#define ppc_divdux(c,D,A,B,OE,Rc) ppc_emit32(c, (31 << 26) | ((D) << 21) | ((A) << 16) | ((B) << 11) | ((OE) << 10) | (457 << 1) | (Rc))
|
||||
#define ppc_divdu(c,D,A,B) ppc_divdux(c,D,A,B,0,0)
|
||||
#define ppc_divdud(c,D,A,B) ppc_divdux(c,D,A,B,0,1)
|
||||
#define ppc_divduo(c,D,A,B) ppc_divdux(c,D,A,B,1,0)
|
||||
#define ppc_divduod(c,D,A,B) ppc_divdux(c,D,A,B,1,1)
|
||||
|
||||
#define ppc_extswx(c,S,A,Rc) ppc_emit32(c, (31 << 26) | ((S) << 21) | ((A) << 16) | (0 << 11) | (986 << 1) | (Rc))
|
||||
#define ppc_extsw(c,A,S) ppc_extswx(c,S,A,0)
|
||||
#define ppc_extswd(c,A,S) ppc_extswx(c,S,A,1)
|
||||
|
||||
/* These move float to/from instuctions are only available on POWER6 in
|
||||
native mode. These instruction are faster then the equivalent
|
||||
store/load because they avoid the store queue and associated delays.
|
||||
These instructions should only be used in 64-bit mode unless the
|
||||
kernel preserves the 64-bit GPR on signals and dispatch in 32-bit
|
||||
mode. The Linux kernel does not. */
|
||||
#define ppc_mftgpr(c,T,B) ppc_emit32(c, (31 << 26) | ((T) << 21) | (0 << 16) | ((B) << 11) | (735 << 1) | 0)
|
||||
#define ppc_mffgpr(c,T,B) ppc_emit32(c, (31 << 26) | ((T) << 21) | (0 << 16) | ((B) << 11) | (607 << 1) | 0)
|
||||
|
||||
#define ppc_ld(c,D,ds,A) ppc_emit32(c, (58 << 26) | ((D) << 21) | ((A) << 16) | ((guint32)(ds) & 0xfffc) | 0)
|
||||
#define ppc_lwa(c,D,ds,A) ppc_emit32(c, (58 << 26) | ((D) << 21) | ((A) << 16) | ((ds) & 0xfffc) | 2)
|
||||
#define ppc_ldarx(c,D,A,B) ppc_emit32(c, (31 << 26) | ((D) << 21) | ((A) << 16) | ((B) << 11) | (84 << 1) | 0)
|
||||
#define ppc_ldu(c,D,ds,A) ppc_emit32(c, (58 << 26) | ((D) << 21) | ((A) << 16) | ((guint32)(ds) & 0xfffc) | 1)
|
||||
#define ppc_ldux(c,D,A,B) ppc_emit32(c, (31 << 26) | ((D) << 21) | ((A) << 16) | ((B) << 11) | (53 << 1) | 0)
|
||||
#define ppc_lwaux(c,D,A,B) ppc_emit32(c, (31 << 26) | ((D) << 21) | ((A) << 16) | ((B) << 11) | (373 << 1) | 0)
|
||||
#define ppc_ldx(c,D,A,B) ppc_emit32(c, (31 << 26) | ((D) << 21) | ((A) << 16) | ((B) << 11) | (21 << 1) | 0)
|
||||
#define ppc_lwax(c,D,A,B) ppc_emit32(c, (31 << 26) | ((D) << 21) | ((A) << 16) | ((B) << 11) | (341 << 1) | 0)
|
||||
|
||||
#define ppc_mulhdx(c,D,A,B,Rc) ppc_emit32(c, (31 << 26) | ((D) << 21) | ((A) << 16) | ((B) << 11) | (0 << 10) | (73 << 1) | (Rc))
|
||||
#define ppc_mulhd(c,D,A,B) ppc_mulhdx(c,D,A,B,0)
|
||||
#define ppc_mulhdd(c,D,A,B) ppc_mulhdx(c,D,A,B,1)
|
||||
#define ppc_mulhdux(c,D,A,B,Rc) ppc_emit32(c, (31 << 26) | ((D) << 21) | ((A) << 16) | ((B) << 11) | (0 << 10) | (9 << 1) | (Rc))
|
||||
#define ppc_mulhdu(c,D,A,B) ppc_mulhdux(c,D,A,B,0)
|
||||
#define ppc_mulhdud(c,D,A,B) ppc_mulhdux(c,D,A,B,1)
|
||||
|
||||
#define ppc_mulldx(c,D,A,B,OE,Rc) ppc_emit32(c, (31 << 26) | ((D) << 21) | ((A) << 16) | ((B) << 11) | ((OE) << 10) | (233 << 1) | (Rc))
|
||||
#define ppc_mulld(c,D,A,B) ppc_mulldx(c,D,A,B,0,0)
|
||||
#define ppc_mulldd(c,D,A,B) ppc_mulldx(c,D,A,B,0,1)
|
||||
#define ppc_mulldo(c,D,A,B) ppc_mulldx(c,D,A,B,1,0)
|
||||
#define ppc_mulldod(c,D,A,B) ppc_mulldx(c,D,A,B,1,1)
|
||||
|
||||
#define ppc_rldclx(c,A,S,B,MB,Rc) ppc_emit32(c, (30 << 26) | ((S) << 21) | ((A) << 16) | ((B) << 11) | (ppc_split_5_1(MB) << 5) | (8 << 1) | (Rc))
|
||||
#define ppc_rldcl(c,A,S,B,MB) ppc_rldclx(c,A,S,B,MB,0)
|
||||
#define ppc_rldcld(c,A,S,B,MB) ppc_rldclx(c,A,S,B,MB,1)
|
||||
#define ppc_rotld(c,A,S,B) ppc_rldcl(c, A, S, B, 0)
|
||||
|
||||
#define ppc_rldcrx(c,A,S,B,ME,Rc) ppc_emit32(c, (30 << 26) | ((S) << 21) | ((A) << 16) | ((B) << 11) | (ppc_split_5_1(ME) << 5) | (9 << 1) | (Rc))
|
||||
#define ppc_rldcr(c,A,S,B,ME) ppc_rldcrx(c,A,S,B,ME,0)
|
||||
#define ppc_rldcrd(c,A,S,B,ME) ppc_rldcrx(c,A,S,B,ME,1)
|
||||
|
||||
#define ppc_rldicx(c,S,A,SH,MB,Rc) ppc_emit32(c, (30 << 26) | ((S) << 21) | ((A) << 16) | (ppc_split_5_1_5(SH) << 11) | (ppc_split_5_1(MB) << 5) | (2 << 2) | (ppc_split_5_1_1(SH) << 1) | (Rc))
|
||||
#define ppc_rldic(c,A,S,SH,MB) ppc_rldicx(c,S,A,SH,MB,0)
|
||||
#define ppc_rldicd(c,A,S,SH,MB) ppc_rldicx(c,S,A,SH,MB,1)
|
||||
|
||||
#define ppc_rldiclx(c,S,A,SH,MB,Rc) ppc_emit32(c, (30 << 26) | ((S) << 21) | ((A) << 16) | (ppc_split_5_1_5(SH) << 11) | (ppc_split_5_1(MB) << 5) | (0 << 2) | (ppc_split_5_1_1(SH) << 1) | (Rc))
|
||||
#define ppc_rldicl(c,A,S,SH,MB) ppc_rldiclx(c,S,A,SH,MB,0)
|
||||
#define ppc_rldicld(c,A,S,SH,MB) ppc_rldiclx(c,S,A,SH,MB,1)
|
||||
#define ppc_extrdi(c,A,S,n,b) ppc_rldicl(c,A,S, (b) + (n), 64 - (n))
|
||||
#define ppc_rotldi(c,A,S,n) ppc_rldicl(c,A,S, n, 0)
|
||||
#define ppc_rotrdi(c,A,S,n) ppc_rldicl(c,A,S, 64 - (n), 0)
|
||||
#define ppc_srdi(c,A,S,n) ppc_rldicl(c,A,S, 64 - (n), n)
|
||||
#define ppc_clrldi(c,A,S,n) ppc_rldicl(c,A,S, 0, n)
|
||||
|
||||
#define ppc_rldicrx(c,A,S,SH,ME,Rc) ppc_emit32(c, (30 << 26) | ((S) << 21) | ((A) << 16) | (ppc_split_5_1_5(SH) << 11) | (ppc_split_5_1(ME) << 5) | (1 << 2) | (ppc_split_5_1_1(SH) << 1) | (Rc))
|
||||
#define ppc_rldicr(c,A,S,SH,ME) ppc_rldicrx(c,A,S,SH,ME,0)
|
||||
#define ppc_rldicrd(c,A,S,SH,ME) ppc_rldicrx(c,A,S,SH,ME,1)
|
||||
#define ppc_extldi(c,A,S,n,b) ppc_rldicr(c, A, S, b, (n) - 1)
|
||||
#define ppc_sldi(c,A,S,n) ppc_rldicr(c, A, S, n, 63 - (n))
|
||||
#define ppc_clrrdi(c,A,S,n) ppc_rldicr(c, A, S, 0, 63 - (n))
|
||||
|
||||
#define ppc_rldimix(c,S,A,SH,MB,Rc) ppc_emit32(c, (30 << 26) | ((S) << 21) | ((A) << 16) | (ppc_split_5_1_5(SH) << 11) | (ppc_split_5_1(MB) << 5) | (3 << 2) | (ppc_split_5_1_1(SH) << 1) | (Rc))
|
||||
#define ppc_rldimi(c,A,S,SH,MB) ppc_rldimix(c,S,A,SH,MB,0)
|
||||
#define ppc_rldimid(c,A,S,SH,MB) ppc_rldimix(c,S,A,SH,MB,1)
|
||||
|
||||
#define ppc_slbia(c) ppc_emit32(c, (31 << 26) | (0 << 21) | (0 << 16) | (0 << 11) | (498 << 1) | 0)
|
||||
#define ppc_slbie(c,B) ppc_emit32(c, (31 << 26) | (0 << 21) | (0 << 16) | ((B) << 11) | (434 << 1) | 0)
|
||||
#define ppc_sldx(c,S,A,B,Rc) ppc_emit32(c, (31 << 26) | ((S) << 21) | ((A) << 16) | ((B) << 11) | (27 << 1) | (Rc))
|
||||
#define ppc_sld(c,A,S,B) ppc_sldx(c,S,A,B,0)
|
||||
#define ppc_sldd(c,A,S,B) ppc_sldx(c,S,A,B,1)
|
||||
|
||||
#define ppc_sradx(c,S,A,B,Rc) ppc_emit32(c, (31 << 26) | ((S) << 21) | ((A) << 16) | ((B) << 11) | (794 << 1) | (Rc))
|
||||
#define ppc_srad(c,A,S,B) ppc_sradx(c,S,A,B,0)
|
||||
#define ppc_sradd(c,A,S,B) ppc_sradx(c,S,A,B,1)
|
||||
#define ppc_sradix(c,S,A,SH,Rc) ppc_emit32(c, (31 << 26) | ((S) << 21) | ((A) << 16) | (((SH) & 31) << 11) | (413 << 2) | (((SH) >> 5) << 1) | (Rc))
|
||||
#define ppc_sradi(c,A,S,SH) ppc_sradix(c,S,A,SH,0)
|
||||
#define ppc_sradid(c,A,S,SH) ppc_sradix(c,S,A,SH,1)
|
||||
|
||||
#define ppc_srdx(c,S,A,B,Rc) ppc_emit32(c, (31 << 26) | ((S) << 21) | ((A) << 16) | ((B) << 11) | (539 << 1) | (Rc))
|
||||
#define ppc_srd(c,A,S,B) ppc_srdx(c,S,A,B,0)
|
||||
#define ppc_srdd(c,A,S,B) ppc_srdx(c,S,A,B,1)
|
||||
|
||||
#define ppc_std(c,S,ds,A) ppc_emit32(c, (62 << 26) | ((S) << 21) | ((A) << 16) | ((guint32)(ds) & 0xfffc) | 0)
|
||||
#define ppc_stdcxd(c,S,A,B) ppc_emit32(c, (31 << 26) | ((S) << 21) | ((A) << 16) | ((B) << 11) | (214 << 1) | 1)
|
||||
#define ppc_stdu(c,S,ds,A) ppc_emit32(c, (62 << 26) | ((S) << 21) | ((A) << 16) | ((guint32)(ds) & 0xfffc) | 1)
|
||||
#define ppc_stdux(c,S,A,B) ppc_emit32(c, (31 << 26) | ((S) << 21) | ((A) << 16) | ((B) << 11) | (181 << 1) | 0)
|
||||
#define ppc_stdx(c,S,A,B) ppc_emit32(c, (31 << 26) | ((S) << 21) | ((A) << 16) | ((B) << 11) | (149 << 1) | 0)
|
||||
|
||||
#else
|
||||
/* Always true for 32-bit */
|
||||
#define ppc_is_imm32(val) (1)
|
||||
#endif
|
||||
|
||||
#endif
|
@ -0,0 +1,6 @@
|
||||
/Makefile
|
||||
/Makefile.in
|
||||
/.libs
|
||||
/.deps
|
||||
/*.la
|
||||
/*.lo
|
@ -0,0 +1,35 @@
|
||||
2010-03-23 Neale Ferguson <neale@sinenomine.net>
|
||||
|
||||
* s390x-codegen.h: Remove duplicate
|
||||
|
||||
2009-06-24 Neale Ferguson <neale@sinenomine.net>
|
||||
|
||||
* s390x-codegen.h: Add some new instructions.
|
||||
|
||||
2007-04-12 Neale Ferguson <neale@sinenomine.net>
|
||||
|
||||
* tramp.c: Add MONO_TYPE_PTR case.
|
||||
|
||||
2007-01-23 Neale Ferguson <neale@sinenomine.net>
|
||||
|
||||
* s390x-codegen.h: Add packed attribute to several instruction structures.
|
||||
|
||||
2006-03-13 Neale Ferguson <neale@sinenomine.net>
|
||||
|
||||
* s390x-codegen.h: Fix immediate checks.
|
||||
|
||||
2006-01-06 Neale Ferguson <neale@sinenomine.net>
|
||||
|
||||
* s390x-codegen.h: Add lpdbr instruction (OP_ABS).
|
||||
|
||||
2006-01-03 Neale Ferguson <neale@sinenomine.net>
|
||||
|
||||
* s390x-codegen.h: Add some new instructions.
|
||||
|
||||
2004-12-15 Neale Ferguson <Neale.Ferguson@SoftwareAG-usa.com>
|
||||
|
||||
* s390x-codegen.h: Add some new instructions (CS, CSG, CSY, CDS, CDSG, CDSY)
|
||||
|
||||
2004-08-03 Neale Ferguson <Neale.Ferguson@SoftwareAG-usa.com>
|
||||
|
||||
* s390x-codegen.h Makefile.am tramp.c: S/390 64-bit interpreter
|
@ -0,0 +1,7 @@
|
||||
|
||||
AM_CPPFLAGS = $(GLIB_CFLAGS) -I$(top_srcdir)
|
||||
|
||||
noinst_LTLIBRARIES = libmonoarch-s390x.la
|
||||
|
||||
libmonoarch_s390x_la_SOURCES = tramp.c s390x-codegen.h
|
||||
|
@ -0,0 +1,997 @@
|
||||
/*
|
||||
Copyright (C) 2001 Radek Doulik
|
||||
*/
|
||||
|
||||
#ifndef S390X_H
|
||||
#define S390X_H
|
||||
#include <glib.h>
|
||||
#include <assert.h>
|
||||
#include <limits.h>
|
||||
|
||||
#define FLOAT_REGS 2 /* No. float registers for parms */
|
||||
#define GENERAL_REGS 5 /* No. general registers for parms */
|
||||
|
||||
#define ARG_BASE s390_r10 /* Register for addressing arguments*/
|
||||
#define STKARG \
|
||||
(i*(sizeof(stackval))) /* Displacement of ith argument */
|
||||
|
||||
#define MINV_POS 160 /* MonoInvocation stack offset */
|
||||
#define STACK_POS (MINV_POS - sizeof (stackval) * sig->param_count)
|
||||
#define OBJ_POS 8
|
||||
#define TYPE_OFFSET (G_STRUCT_OFFSET (stackval, type))
|
||||
|
||||
#define MIN_CACHE_LINE 256
|
||||
|
||||
/*------------------------------------------------------------------*/
|
||||
/* Sequence to add an int/long long to parameters to stack_from_data*/
|
||||
/*------------------------------------------------------------------*/
|
||||
#define ADD_ISTACK_PARM(r, i) \
|
||||
if (reg_param < GENERAL_REGS-(r)) { \
|
||||
s390_lay (p, s390_r4, 0, STK_BASE, \
|
||||
local_start + (reg_param - this_flag) * sizeof(long)); \
|
||||
reg_param += (i); \
|
||||
} else { \
|
||||
s390_lay (p, s390_r4, 0, STK_BASE, \
|
||||
sz.stack_size + MINV_POS + stack_param * sizeof(long)); \
|
||||
stack_param += (i); \
|
||||
}
|
||||
|
||||
/*------------------------------------------------------------------*/
|
||||
/* Sequence to add a float/double to parameters to stack_from_data */
|
||||
/*------------------------------------------------------------------*/
|
||||
#define ADD_RSTACK_PARM(i) \
|
||||
if (fpr_param < FLOAT_REGS) { \
|
||||
s390_lay (p, s390_r4, 0, STK_BASE, \
|
||||
float_pos + (fpr_param * sizeof(float) * (i))); \
|
||||
fpr_param++; \
|
||||
} else { \
|
||||
stack_param += (stack_param % (i)); \
|
||||
s390_lay (p, s390_r4, 0, STK_BASE, \
|
||||
sz.stack_size + MINV_POS + stack_param * sizeof(float) * (i)); \
|
||||
stack_param += (i); \
|
||||
}
|
||||
|
||||
/*------------------------------------------------------------------*/
|
||||
/* Sequence to add a structure ptr to parameters to stack_from_data */
|
||||
/*------------------------------------------------------------------*/
|
||||
#define ADD_TSTACK_PARM \
|
||||
if (reg_param < GENERAL_REGS) { \
|
||||
s390_ly (p, s390_r4, 0, STK_BASE, \
|
||||
local_start + (reg_param - this_flag) * sizeof(long)); \
|
||||
reg_param++; \
|
||||
} else { \
|
||||
s390_ly (p, s390_r4, 0, STK_BASE, \
|
||||
sz.stack_size + MINV_POS + stack_param * sizeof(long)); \
|
||||
stack_param++; \
|
||||
}
|
||||
|
||||
#define ADD_PSTACK_PARM(r, i) \
|
||||
if (reg_param < GENERAL_REGS-(r)) { \
|
||||
s390_lay (p, s390_r4, 0, STK_BASE, \
|
||||
local_start + (reg_param - this_flag) * sizeof(long)); \
|
||||
reg_param += (i); \
|
||||
} else { \
|
||||
s390_ly (p, s390_r4, 0, STK_BASE, \
|
||||
sz.stack_size + MINV_POS + stack_param * sizeof(long)); \
|
||||
stack_param++; \
|
||||
}
|
||||
|
||||
typedef enum {
|
||||
s390_r0 = 0,
|
||||
s390_r1,
|
||||
s390_r2,
|
||||
s390_r3,
|
||||
s390_r4,
|
||||
s390_r5,
|
||||
s390_r6,
|
||||
s390_r7,
|
||||
s390_r8,
|
||||
s390_r9,
|
||||
s390_r10,
|
||||
s390_r11,
|
||||
s390_r12,
|
||||
s390_r13,
|
||||
s390_r14,
|
||||
s390_r15,
|
||||
} S390IntRegister;
|
||||
|
||||
typedef enum {
|
||||
s390_f0 = 0,
|
||||
s390_f1,
|
||||
s390_f2,
|
||||
s390_f3,
|
||||
s390_f4,
|
||||
s390_f5,
|
||||
s390_f6,
|
||||
s390_f7,
|
||||
s390_f8,
|
||||
s390_f9,
|
||||
s390_f10,
|
||||
s390_f11,
|
||||
s390_f12,
|
||||
s390_f13,
|
||||
s390_f14,
|
||||
s390_f15,
|
||||
} S390FloatRegister;
|
||||
|
||||
typedef enum {
|
||||
s390_a0 = 0,
|
||||
s390_a1,
|
||||
s390_a2,
|
||||
s390_a3,
|
||||
s390_a4,
|
||||
s390_a5,
|
||||
s390_a6,
|
||||
s390_a7,
|
||||
s390_a8,
|
||||
s390_a9,
|
||||
s390_a10,
|
||||
s390_a11,
|
||||
s390_a12,
|
||||
s390_a13,
|
||||
s390_a14,
|
||||
s390_a15,
|
||||
} S390AccRegister;
|
||||
|
||||
typedef enum {
|
||||
s390_fpc = 256,
|
||||
} S390SpecialRegister;
|
||||
|
||||
#define s390_is_imm16(val) ((glong)val >= (glong) SHRT_MIN && \
|
||||
(glong)val <= (glong) SHRT_MAX)
|
||||
#define s390_is_imm32(val) ((glong)val >= (glong) INT_MIN && \
|
||||
(glong)val <= (glong) INT_MAX)
|
||||
#define s390_is_uimm16(val) ((glong)val >= 0 && (glong)val <= (glong) USHRT_MAX)
|
||||
#define s390_is_uimm32(val) ((glong)val >= 0 && (glong)val <= (glong) UINT_MAX)
|
||||
#define s390_is_uimm20(val) ((glong)val >= 0 && (glong)val <= 1048575)
|
||||
#define s390_is_imm20(val) ((glong)val >= -524288 && (glong)val <= 524287)
|
||||
#define s390_is_imm12(val) ((glong)val >= (glong)-4096 && \
|
||||
(glong)val <= (glong)4095)
|
||||
#define s390_is_uimm12(val) ((glong)val >= 0 && (glong)val <= 4095)
|
||||
|
||||
#define STK_BASE s390_r15
|
||||
#define S390_SP s390_r15
|
||||
#define S390_FP s390_r11
|
||||
#define S390_MINIMAL_STACK_SIZE 160
|
||||
#define S390_REG_SAVE_OFFSET 48
|
||||
#define S390_PARM_SAVE_OFFSET 16
|
||||
#define S390_RET_ADDR_OFFSET 112
|
||||
#define S390_FLOAT_SAVE_OFFSET 128
|
||||
|
||||
#define S390_CC_ZR 8
|
||||
#define S390_CC_NE 7
|
||||
#define S390_CC_NZ 7
|
||||
#define S390_CC_LT 4
|
||||
#define S390_CC_GT 2
|
||||
#define S390_CC_GE 11
|
||||
#define S390_CC_NM 11
|
||||
#define S390_CC_LE 13
|
||||
#define S390_CC_OV 1
|
||||
#define S390_CC_NO 14
|
||||
#define S390_CC_CY 3
|
||||
#define S390_CC_NC 12
|
||||
#define S390_CC_UN 15
|
||||
|
||||
#define s390_word(addr, value) do \
|
||||
{ \
|
||||
* (guint32 *) addr = (guint32) value; \
|
||||
addr += sizeof(guint32); \
|
||||
} while (0)
|
||||
|
||||
#define s390_float(addr, value) do \
|
||||
{ \
|
||||
* (gfloat *) addr = (gfloat) value; \
|
||||
addr += sizeof(gfloat); \
|
||||
} while (0)
|
||||
|
||||
#define s390_llong(addr, value) do \
|
||||
{ \
|
||||
* (guint64 *) addr = (guint64) value; \
|
||||
addr += sizeof(guint64); \
|
||||
} while (0)
|
||||
|
||||
#define s390_double(addr, value) do \
|
||||
{ \
|
||||
* (gdouble *) addr = (gdouble) value; \
|
||||
addr += sizeof(gdouble); \
|
||||
} while (0)
|
||||
|
||||
typedef struct {
|
||||
short op;
|
||||
} E_Format;
|
||||
|
||||
typedef struct {
|
||||
char op;
|
||||
int im;
|
||||
} I_Format;
|
||||
|
||||
typedef struct {
|
||||
char op;
|
||||
char r1 : 4;
|
||||
char r2 : 4;
|
||||
} RR_Format;
|
||||
|
||||
typedef struct {
|
||||
short op;
|
||||
char xx;
|
||||
char r1 : 4;
|
||||
char r2 : 4;
|
||||
} RRE_Format;
|
||||
|
||||
typedef struct {
|
||||
short op;
|
||||
char r1 : 4;
|
||||
char xx : 4;
|
||||
char r3 : 4;
|
||||
char r2 : 4;
|
||||
} RRF_Format_1;
|
||||
|
||||
typedef struct {
|
||||
short op;
|
||||
char m3 : 4;
|
||||
char xx : 4;
|
||||
char r1 : 4;
|
||||
char r2 : 4;
|
||||
} RRF_Format_2;
|
||||
|
||||
typedef struct {
|
||||
short op;
|
||||
char r3 : 4;
|
||||
char m4 : 4;
|
||||
char r1 : 4;
|
||||
char r2 : 4;
|
||||
} RRF_Format_3;
|
||||
|
||||
typedef struct {
|
||||
char op;
|
||||
char r1 : 4;
|
||||
char x2 : 4;
|
||||
char b2 : 4;
|
||||
short d2 : 12;
|
||||
} RX_Format;
|
||||
|
||||
typedef struct {
|
||||
char op1;
|
||||
char r1 : 4;
|
||||
char x2 : 4;
|
||||
char b2 : 4;
|
||||
int d2 : 12;
|
||||
char xx;
|
||||
char op2;
|
||||
} RXE_Format;
|
||||
|
||||
typedef struct {
|
||||
char op1;
|
||||
char r3 : 4;
|
||||
char x2 : 4;
|
||||
char b2 : 4;
|
||||
int d2 : 12;
|
||||
char r1 : 4;
|
||||
char xx : 4;
|
||||
char op2;
|
||||
} RXF_Format;
|
||||
|
||||
typedef struct {
|
||||
char op1;
|
||||
char r1 : 4;
|
||||
char x2 : 4;
|
||||
char b2 : 4;
|
||||
int d2 : 20;
|
||||
char op2;
|
||||
} __attribute__ ((packed)) RXY_Format;
|
||||
|
||||
typedef struct {
|
||||
char op;
|
||||
char r1 : 4;
|
||||
char r3 : 4;
|
||||
char b2 : 4;
|
||||
int d2 : 12;
|
||||
} RS_Format_1;
|
||||
|
||||
typedef struct {
|
||||
char op;
|
||||
char r1 : 4;
|
||||
char m3 : 4;
|
||||
char b2 : 4;
|
||||
int d2 : 12;
|
||||
} RS_Format_2;
|
||||
|
||||
typedef struct {
|
||||
char op;
|
||||
char r1 : 4;
|
||||
char xx : 4;
|
||||
char b2 : 4;
|
||||
int d2 : 12;
|
||||
} RS_Format_3;
|
||||
|
||||
typedef struct {
|
||||
char op1;
|
||||
char r1 : 4;
|
||||
char r3 : 4;
|
||||
char b2 : 4;
|
||||
int d2 : 20;
|
||||
char op2;
|
||||
} __attribute__ ((packed)) RSY_Format_1;
|
||||
|
||||
typedef struct {
|
||||
char op1;
|
||||
char r1 : 4;
|
||||
char m3 : 4;
|
||||
char b2 : 4;
|
||||
int d2 : 20;
|
||||
char op2;
|
||||
} __attribute__ ((packed)) RSY_Format_2;
|
||||
|
||||
typedef struct {
|
||||
char op1;
|
||||
char l1 : 4;
|
||||
char xx : 4;
|
||||
char b1 : 4;
|
||||
int d1 : 12;
|
||||
char yy;
|
||||
char op2;
|
||||
} RSL_Format;
|
||||
|
||||
typedef struct {
|
||||
char op;
|
||||
char r1 : 4;
|
||||
char r3 : 4;
|
||||
short i2;
|
||||
} RSI_Format;
|
||||
|
||||
typedef struct {
|
||||
char op1;
|
||||
char m1 : 4;
|
||||
char op2 : 4;
|
||||
short i2;
|
||||
} RI_Format;
|
||||
|
||||
typedef struct {
|
||||
char op1;
|
||||
char r1 : 4;
|
||||
char r3 : 4;
|
||||
short i2;
|
||||
char xx;
|
||||
char op2;
|
||||
} RIE_Format_1;
|
||||
|
||||
typedef struct {
|
||||
char op1;
|
||||
char r1 : 4;
|
||||
char r3 : 4;
|
||||
short i2;
|
||||
char m2 : 4;
|
||||
char xx : 4;
|
||||
char op2;
|
||||
} RIE_Format_2;
|
||||
|
||||
typedef struct {
|
||||
char op1;
|
||||
char r1 : 4;
|
||||
char r3 : 4;
|
||||
short d;
|
||||
char i;
|
||||
char op2;
|
||||
} RIE_Format_3;
|
||||
|
||||
typedef struct {
|
||||
char op1;
|
||||
char r1 : 4;
|
||||
char yy : 4;
|
||||
short i2;
|
||||
char m3 : 4;
|
||||
char xx : 4;
|
||||
char op2;
|
||||
} RIE_Format_4;
|
||||
|
||||
typedef struct {
|
||||
char op1;
|
||||
char r1 : 4;
|
||||
char op2 : 4;
|
||||
int i2;
|
||||
} __attribute__ ((packed)) RIL_Format_1;
|
||||
|
||||
typedef struct {
|
||||
char op1;
|
||||
char m1 : 4;
|
||||
char op2 : 4;
|
||||
int i2;
|
||||
} __attribute__ ((packed)) RIL_Format_2;
|
||||
|
||||
typedef struct {
|
||||
char op;
|
||||
char i2;
|
||||
char b1 : 4;
|
||||
short d1 : 12;
|
||||
} SI_Format;
|
||||
|
||||
typedef struct {
|
||||
char op1;
|
||||
char i2;
|
||||
char b1 : 4;
|
||||
int d1 : 20;
|
||||
char op2;
|
||||
} __attribute__ ((packed)) SIY_Format;
|
||||
|
||||
typedef struct {
|
||||
short op;
|
||||
char b2 : 4;
|
||||
short d2 : 12;
|
||||
} S_Format;
|
||||
|
||||
typedef struct {
|
||||
char op;
|
||||
char ll;
|
||||
char b1 : 4;
|
||||
short d1 : 12;
|
||||
char b2 : 4;
|
||||
short d2 : 12;
|
||||
} SS_Format_1;
|
||||
|
||||
typedef struct {
|
||||
char op;
|
||||
char l1 : 4;
|
||||
char l2 : 4;
|
||||
char b1 : 4;
|
||||
short d1 : 12;
|
||||
char b2 : 4;
|
||||
short d2 : 12;
|
||||
} SS_Format_2;
|
||||
|
||||
typedef struct {
|
||||
char op;
|
||||
char r1 : 4;
|
||||
char r3 : 4;
|
||||
char b1 : 4;
|
||||
short d1 : 12;
|
||||
char b2 : 4;
|
||||
short d2 : 12;
|
||||
} SS_Format_3;
|
||||
|
||||
typedef struct {
|
||||
char op;
|
||||
char r1 : 4;
|
||||
char r3 : 4;
|
||||
char b2 : 4;
|
||||
short d2 : 12;
|
||||
char b4 : 4;
|
||||
short d4 : 12;
|
||||
} SS_Format_4;
|
||||
|
||||
typedef struct {
|
||||
short op;
|
||||
short tb1 : 4;
|
||||
short d1 : 12;
|
||||
short b2 : 4;
|
||||
short d2 : 12;
|
||||
} __attribute__ ((packed)) SSE_Format;
|
||||
|
||||
typedef struct {
|
||||
short op;
|
||||
char r3 : 4;
|
||||
char o2 : 4;
|
||||
short b1 : 4;
|
||||
short d1 : 12;
|
||||
short b2 : 4;
|
||||
short d2 : 12;
|
||||
} __attribute__ ((packed)) SSF_Format;
|
||||
|
||||
#define s390_emit16(c, x) do \
|
||||
{ \
|
||||
*((guint16 *) c) = (guint16) x; \
|
||||
c += sizeof(guint16); \
|
||||
} while(0)
|
||||
|
||||
#define s390_emit32(c, x) do \
|
||||
{ \
|
||||
*((guint32 *) c) = (guint32) x; \
|
||||
c += sizeof(guint32); \
|
||||
} while(0)
|
||||
|
||||
#define S390_E(c,opc) s390_emit16(c,opc)
|
||||
|
||||
#define S390_I(c,opc,imm) s390_emit16(c, (opc << 8 | imm))
|
||||
|
||||
#define S390_RR(c,opc,g1,g2) s390_emit16(c, (opc << 8 | (g1) << 4 | g2))
|
||||
|
||||
#define S390_RRE(c,opc,g1,g2) s390_emit32(c, (opc << 16 | (g1) << 4 | g2))
|
||||
|
||||
#define S390_RRF_1(c,opc,g1,g2,g3) s390_emit32(c, (opc << 16 | (g1) << 12 | (g3) << 4 | g2))
|
||||
|
||||
#define S390_RRF_2(c,opc,g1,k3,g2) s390_emit32(c, (opc << 16 | (k3) << 12 | (g1) << 4 | g2))
|
||||
|
||||
#define S390_RRF_3(c,opc,g1,g2,k4,g3) s390_emit32(c, (opc << 16 | (g3) << 12 | (k4) << 8 | (g1) << 4 | g2))
|
||||
|
||||
#define S390_RX(c,opc,g1,n2,s2,p2) s390_emit32(c, (opc << 24 | (g1) << 20 | (n2) << 16 | (s2) << 12 | ((p2) & 0xfff)))
|
||||
|
||||
#define S390_RXE(c,opc,g1,n2,s2,p2) do \
|
||||
{ \
|
||||
s390_emit16(c, ((opc & 0xff00) | (g1) << 4 | n2)); \
|
||||
s390_emit32(c, ((s2) << 28 | (((p2) & 0xfff) << 16) | \
|
||||
(opc & 0xff))); \
|
||||
} while (0)
|
||||
|
||||
#define S390_RXY(c,opc,g1,n2,s2,p2) do \
|
||||
{ \
|
||||
s390_emit16(c, ((opc & 0xff00) | (g1) << 4 | n2)); \
|
||||
s390_emit32(c, ((s2) << 28 | (((p2) & 0xfff) << 16) | \
|
||||
((((p2) & 0xff000) >> 12) << 8) | \
|
||||
(opc & 0xff))); \
|
||||
} while (0)
|
||||
|
||||
#define S390_RS_1(c,opc,g1,g3,s2,p2) s390_emit32(c, (opc << 24 | (g1) << 20 | (g3) << 16 | (s2) << 12 | ((p2) & 0xfff)))
|
||||
|
||||
#define S390_RS_2(c,opc,g1,k3,s2,p2) s390_emit32(c, (opc << 24 | (g1) << 20 | (k3) << 16 | (s2) << 12 | ((p2) & 0xfff)))
|
||||
|
||||
#define S390_RS_3(c,opc,g1,s2,p2) s390_emit32(c, (opc << 24 | (g1) << 20 | (s2) << 12 | ((p2) & 0xfff)))
|
||||
|
||||
#define S390_RSY_1(c,opc,g1,g3,s2,p2) do \
|
||||
{ \
|
||||
s390_emit16(c, ((opc & 0xff00) | (g1) << 4 | g3)); \
|
||||
s390_emit32(c, ((s2) << 28 | (((p2) & 0xfff) << 16) | \
|
||||
((((p2) & 0xff000) >> 12) << 8) | \
|
||||
(opc & 0xff))); \
|
||||
} while (0)
|
||||
|
||||
#define S390_RSY_2(c,opc,g1,k3,s2,p2) do \
|
||||
{ \
|
||||
s390_emit16(c, ((opc & 0xff00) | (g1) << 4 | k3)); \
|
||||
s390_emit32(c, ((s2) << 28 | (((p2) & 0xfff) << 16) | \
|
||||
((((p2) & 0xff000) >> 12) << 8) | \
|
||||
(opc & 0xff))); \
|
||||
} while (0)
|
||||
|
||||
#define S390_RSL(c,opc,ln,s1,p1) do \
|
||||
{ \
|
||||
s390_emit16(c, ((opc & 0xff00) | (ln) << 4)); \
|
||||
s390_emit32(c, ((s1) << 28 | ((s1 & 0xfff) << 16) | \
|
||||
(opc & 0xff))); \
|
||||
} while (0)
|
||||
|
||||
#define S390_RSI(c,opc,g1,g3,m2) s390_emit32(c, (opc << 24 | (g1) << 20 | (g3) << 16 | (m2 & 0xffff)))
|
||||
|
||||
#define S390_RI(c,opc,g1,m2) s390_emit32(c, ((opc >> 4) << 24 | (g1) << 20 | (opc & 0x0f) << 16 | (m2 & 0xffff)))
|
||||
|
||||
#define S390_RIE_1(c,opc,g1,g3,m2) do \
|
||||
{ \
|
||||
s390_emit16(c, ((opc & 0xff00) | (g1) << 4 | g3)); \
|
||||
s390_emit32(c, ((m2) << 16 | (opc & 0xff))); \
|
||||
} while (0)
|
||||
|
||||
#define S390_RIE_2(c,opc,g1,g2,m3,v) do \
|
||||
{ \
|
||||
s390_emit16(c, ((opc & 0xff00) | (g1) << 4 | g3)); \
|
||||
s390_emit16(c, (v)); \
|
||||
s390_emit16(c, ((m2) << 12 | (opc & 0xff))); \
|
||||
} while (0)
|
||||
|
||||
#define S390_RIE_3(c,opc,g1,i,m3,d) do \
|
||||
{ \
|
||||
s390_emit16(c, ((opc & 0xff00) | (g1) << 4 | m3)); \
|
||||
s390_emit16(c, (d)); \
|
||||
s390_emit16(c, ((i) << 8 | (opc & 0xff))); \
|
||||
} while (0)
|
||||
|
||||
#define S390_RIE_4(c,opc,g1,i2,m3) do \
|
||||
{ \
|
||||
s390_emit16(c, ((opc & 0xff00) | (g1) << 4); \
|
||||
s390_emit16(c, (i2)); \
|
||||
s390_emit16(c, ((m3) << 12 | (opc & 0xff))); \
|
||||
} while (0)
|
||||
|
||||
#define S390_RIL_1(c,opc,g1,m2) do \
|
||||
{ \
|
||||
s390_emit16(c, ((opc >> 4) << 8 | (g1) << 4 | (opc & 0xf))); \
|
||||
s390_emit32(c, m2); \
|
||||
} while (0)
|
||||
|
||||
#define S390_RIL_2(c,opc,k1,m2) do \
|
||||
{ \
|
||||
s390_emit16(c, ((opc >> 4) << 8 | (k1) << 4 | (opc & 0xf))); \
|
||||
s390_emit32(c, m2); \
|
||||
} while (0)
|
||||
|
||||
#define S390_RIS(c,opc,r,i,m3,b,d) do \
|
||||
{ \
|
||||
s390_emit16(c, ((opc, & 0xff00) | (r1) << 4) | (r2)); \
|
||||
s390_emit16(c, ((b) << 12) | (d)); \
|
||||
s390_emit16(c, ((i) << 4) | ((opc) & 0xff)); \
|
||||
}
|
||||
|
||||
#define S390_RRS(c,opc,r1,r2,m3,b,d) do \
|
||||
{ \
|
||||
s390_emit16(c, ((opc, & 0xff00) | (r1) << 4) | (r2)); \
|
||||
s390_emit16(c, ((b) << 12) | (d)); \
|
||||
s390_emit16(c, ((m3) << 12) | ((opc) & 0xff)); \
|
||||
}
|
||||
|
||||
#define S390_SI(c,opc,s1,p1,m2) s390_emit32(c, (opc << 24 | (m2) << 16 | (s1) << 12 | ((p1) & 0xfff)));
|
||||
|
||||
#define S390_SIY(c,opc,s1,p1,m2) do \
|
||||
{ \
|
||||
s390_emit16(c, ((opc & 0xff00) | m2)); \
|
||||
s390_emit32(c, ((s1) << 24 | (((p2) & 0xfffff) << 8) | \
|
||||
(opc & 0xff))); \
|
||||
} while (0)
|
||||
|
||||
#define S390_S(c,opc,s2,p2) s390_emit32(c, (opc << 16 | (s2) << 12 | ((p2) & 0xfff)))
|
||||
|
||||
#define S390_SS_1(c,opc,ln,s1,p1,s2,p2) do \
|
||||
{ \
|
||||
s390_emit32(c, (opc << 24 | ((ln-1) & 0xff) << 16 | \
|
||||
(s1) << 12 | ((p1) & 0xfff))); \
|
||||
s390_emit16(c, ((s2) << 12 | ((p2) & 0xfff))); \
|
||||
} while (0)
|
||||
|
||||
#define S390_SS_2(c,opc,n1,n2,s1,p1,s2,p2) do \
|
||||
{ \
|
||||
s390_emit32(c, (opc << 24 | (n1) << 16 | (n2) << 12 | \
|
||||
(s1) << 12 | ((p1) & 0xfff))); \
|
||||
s390_emit16(c, ((s2) << 12 | ((p2) & 0xfff))); \
|
||||
} while (0)
|
||||
|
||||
#define S390_SS_3(c,opc,g1,g3,s1,p1,s2,p2) do \
|
||||
{ \
|
||||
s390_emit32(c, (opc << 24 | (g1) << 16 | (g3) << 12 | \
|
||||
(s1) << 12 | ((p1) & 0xfff))); \
|
||||
s390_emit16(c, ((s2) << 12 | ((p2) & 0xfff))); \
|
||||
} while (0)
|
||||
|
||||
#define S390_SS_4(c,opc,g1,g3,s2,p2,s4,p4) do \
|
||||
{ \
|
||||
s390_emit32(c, (opc << 24 | (g1) << 16 | (g3) << 12 | \
|
||||
(s2) << 12 | ((p2) & 0xfff))); \
|
||||
s390_emit16(c, ((s4) << 12 | ((p4) & 0xfff))); \
|
||||
} while (0)
|
||||
|
||||
#define S390_SSE(c,opc,s1,p1,s2,p2) do \
|
||||
{ \
|
||||
s390_emit16(c, opc); \
|
||||
s390_emit16(c, ((s1) << 12 | ((p1) & 0xfff))); \
|
||||
s390_emit16(c, ((s2) << 12 | ((p2) & 0xfff))); \
|
||||
} while (0)
|
||||
|
||||
#define S390_SSF(c,opc,r3,s1,p1,s2,p2) do \
|
||||
{ \
|
||||
s390_emit16(c, (((opc) & 0xff00) << 8) | ((r3) << 4) | \
|
||||
((opc) & 0xf)); \
|
||||
s390_emit16(c, ((s1) << 12 | ((p1) & 0xfff))); \
|
||||
s390_emit16(c, ((s2) << 12 | ((p2) & 0xfff))); \
|
||||
} while (0)
|
||||
|
||||
#define s390_a(c, r, x, b, d) S390_RX(c, 0x5a, r, x, b, d)
|
||||
#define s390_adb(c, r, x, b, d) S390_RXE(c, 0xed1a, r, x, b, d)
|
||||
#define s390_adbr(c, r1, r2) S390_RRE(c, 0xb31a, r1, r2)
|
||||
#define s390_aebr(c, r1, r2) S390_RRE(c, 0xb30a, r1, r2)
|
||||
#define s390_afi(c, r, v) S390_RIL_1(c, 0xc29, r, v);
|
||||
#define s390_ag(c, r, x, b, d) S390_RXY(c, 0xe308, r, x, b, d)
|
||||
#define s390_agf(c, r, x, b, d) S390_RXY(c, 0xe318, r, x, b, d)
|
||||
#define s390_agfi(c, r, v) S390_RIL_1(c, 0xc28, r, v)
|
||||
#define s390_afgr(c, r1, r2) S390_RRE(c, 0xb918, r1, r2)
|
||||
#define s390_aghi(c, r, v) S390_RI(c, 0xa7b, r, v)
|
||||
#define s390_aghik(c, r, v) S390_RIE_1(c, 0xecd9, r, v)
|
||||
#define s390_agr(c, r1, r2) S390_RRE(c, 0xb908, r1, r2)
|
||||
#define s390_agrk(c, r1, r2, r3) S390_RRF_1(c, 0xb9e8, r1, r2, r3)
|
||||
#define s390_agsi(c, r, v) S390_SIY(c, 0xeb7a, r v)
|
||||
#define s390_ahhhr(c, r1, r2, r3) S390_RRF_1(c, 0xb9c8, r1, r2, r3)
|
||||
#define s390_ahhlr(c, r1, r2, r3) S390_RRF_1(c, 0xb9d8, r1, r2, r3)
|
||||
#define s390_ahi(c, r, v) S390_RI(c, 0xa7a, r, v)
|
||||
#define s390_ahik(c, r, v) S390_RIE_1(c, 0xecd8, r, v)
|
||||
#define s390_ahy(c, r, x, b, d) S390_RXY(c, 0xe37a, r, b, d)
|
||||
#define s390_aih(c, r, v) S390_RIL_1(c, 0xcc8, r, v)
|
||||
#define s390_al(c, r, x, b, d) S390_RX(c, 0x5e, r, x, b, d)
|
||||
#define s390_alc(c, r, x, b, d) S390_RXY(c, 0xe398, r, x, b, d)
|
||||
#define s390_alcg(c, r, x, b, d) S390_RXY(c, 0xe388, r, x, b, d)
|
||||
#define s390_alcgr(c, r1, r2) S390_RRE(c, 0xb988, r1, r2)
|
||||
#define s390_alcr(c, r1, r2) S390_RRE(c, 0xb998, r1, r2)
|
||||
#define s390_alfi(c, r, v) S390_RIL_1(c, 0xc2b, r, v)
|
||||
#define s390_alg(c, r, x, b, d) S390_RXY(c, 0xe30a, r, x, b, d)
|
||||
#define s390_algf(c, r, x, b, d) S390_RXY(c, 0xe31a, r, x, b, d)
|
||||
#define s390_algfi(c, r, v) S390_RIL_1(c, 0xc2a, r, v)
|
||||
#define s390_algfr(c, r1, r2) S390_RRE(c, 0xb91a, r1, r2)
|
||||
#define s390_alghsik(c, r, v) S390_RIE_1(c, 0xecd8, r, v)
|
||||
#define s390_algr(c, r1, r2) S390_RRE(c, 0xb90a, r1, r2)
|
||||
#define s390_algsi(c, r, v) S390_SIY(c, 0xeb7e, r, v)
|
||||
#define s390_alhhhr(c, r1, r2, r3) S390_RRF_1(c, 0xb9ca, r1, r2, r3)
|
||||
#define s390_alhhlr(c, r1, r2, r3) S390_RRF_1(c, 0xb9da, r1, r2, r3)
|
||||
#define s390_alhsik(c, r, v) S390_RIE_1(c, 0xecda, r, v)
|
||||
#define s390_alr(c, r1, r2) S390_RR(c, 0x1e, r1, r2)
|
||||
#define s390_alrk(c, r1, r2) S390_RRF(c, 0xb9fa, r1, r2)
|
||||
#define s390_alsi(c, r, v) S390_SIY(c, 0xeb6e, r, v)
|
||||
#define s390_alsih(c, r, v) S390_RIL_1(c, 0xcca, r, v)
|
||||
#define s390_alsihn(c, r, v) S390_RIL_1(c, 0xccb, r, v)
|
||||
#define s390_aly(c, r, x, b, d) S390_RXY(c, 0xe35e, r, x, b, d)
|
||||
#define s390_ar(c, r1, r2) S390_RR(c, 0x1a, r1, r2)
|
||||
#define s390_ark(c, r1, r2, r3) S390_RRF_1(c, 0xb9f8, r1, r2, r3)
|
||||
#define s390_asi(c, r, v) S390_SIY(c, 0xeb6a, r, v)
|
||||
#define s390_ay(c, r, x, b, d) S390_RXY(c, 0xe35a, r, x, b, d)
|
||||
#define s390_basr(c, r1, r2) S390_RR(c, 0x0d, r1, r2)
|
||||
#define s390_bctr(c, r1, r2) S390_RR(c, 0x06, r1, r2)
|
||||
#define s390_bctrg(c, r1, r2) S390_RRE(c, 0xb946, r1, r2)
|
||||
#define s390_bnzr(c, r) S390_RR(c, 0x07, 0x07, r)
|
||||
#define s390_bras(c, r, o) S390_RI(c, 0xa75, r, o)
|
||||
#define s390_brasl(c, r, o) S390_RIL_1(c, 0xc05, r, o)
|
||||
#define s390_brc(c, m, d) S390_RI(c, 0xa74, m, d)
|
||||
#define s390_brcl(c, m, d) S390_RIL_2(c, 0xc04, m, d)
|
||||
#define s390_br(c, r) S390_RR(c, 0x07, 0xf, r)
|
||||
#define s390_break(c) S390_RR(c, 0, 0, 0)
|
||||
#define s390_bzr(c, r) S390_RR(c, 0x07, 0x08, r)
|
||||
#define s390_c(c, r, x, b, d) S390_RX(c, 0x59, r, x, b, d)
|
||||
#define s390_cdb(c, r, x, b, d) S390_RXE(c, 0xed19, r, x, b, d)
|
||||
#define s390_cdbr(c, r1, r2) S390_RRE(c, 0xb319, r1, r2)
|
||||
#define s390_cdfbr(c, r1, r2) S390_RRE(c, 0xb395, r1, r2)
|
||||
#define s390_cdgbr(c, r1, r2) S390_RRE(c, 0xb3a5, r1, r2)
|
||||
#define s390_cds(c, r1, r2, b, d) S390_RX(c, 0xbb, r1, r2, b, d)
|
||||
#define s390_cdsg(c, r1, r2, b, d) S390_RSY_1(c, 0xeb3e, r1, r2, b, d)
|
||||
#define s390_cdsy(c, r1, r2, b, d) S390_RSY_1(c, 0xeb31, r1, r2, b, d)
|
||||
#define s390_cebr(c, r1, r2) S390_RRE(c, 0xb309, r1, r2)
|
||||
#define s390_cegbr(c, r1, r2) S390_RRE(c, 0xb3a4, r1, r2)
|
||||
#define s390_cfdbr(c, r1, m, r2) S390_RRF_2(c, 0xb399, r1, m, r2)
|
||||
#define s390_cfi(c, r, v) S390_RIL_1(c, 0xc2d, r, v)
|
||||
#define s390_cgdbr(c, r1, m, r2) S390_RRF_2(c, 0xb3a9, r1, m, r2)
|
||||
#define s390_cg(c, r, x, b, d) S390_RXY(c, 0xe320, r, x, b, d)
|
||||
#define s390_cgfi(c, r, v) S390_RIL_1(c, 0xc2c, r, v)
|
||||
#define s390_cgfrl(c, r, v) S390_RIL_1(c, 0xc6c, r, v)
|
||||
#define s390_cghi(c, r, i) S390_RI(c, 0xa7f, r, i)
|
||||
#define s390_cgib(c, r, i, m, b, d) S390_RIS(c, 0xecfc, r, i, m, b, d)
|
||||
#define s390_cgij(c, r, i, m, d) S390_RIE_3(c, 0xec7c, r, i, m, d)
|
||||
#define s390_cgit(c, r, i, m) S390_RIE_4(c, 0xec70, r, i m);
|
||||
#define s390_cgr(c, r1, r2) S390_RRE(c, 0xb920, r1, r2)
|
||||
#define s390_cgrb(c, r1, r2, m3, b, d) S390_RRS(c, 0xece4, r1, r2, m3, b, d)
|
||||
#define s390_cgrj(c, r1, r2, m3, v) S390_RIE_2(c, 0xec64, r1, r2, m3, v)
|
||||
#define s390_cgrl(c, r, v) S390_RIL_1(c, 0xc68, r, v)
|
||||
#define s390_chi(c, r, i) S390_RI(c, 0xa7e, r, i)
|
||||
#define s390_cib(c, r, i, m, b, d) S390_RIS(c, 0xecfe, r, i, m, b, d)
|
||||
#define s390_cij(c, r, i, m, d) S390_RIE_3(c, 0xec7e, r, i, m, d)
|
||||
#define s390_cit(c, r, i, m) S390_RIE_4(c, 0xec72, r, i m);
|
||||
#define s390_cl(c, r, x, b, d) S390_RX(c, 0x55, r, x, b, d)
|
||||
#define s390_clg(c, r, x, b, d) S390_RXY(c, 0xe321, r, x, b, d)
|
||||
#define s390_clgib(c, r, i, m, b, d) S390_RIS(c, 0xecfd, r, i, m, b, d)
|
||||
#define s390_clgij(c, r, i, b) S390_RIE_3(c, 0xec7d, r, i, m, d)
|
||||
#define s390_clgr(c, r1, r2) S390_RRE(c, 0xb921, r1, r2)
|
||||
#define s390_clgrj(c, r1, r2, m, v) S390_RIE_2(c, 0xec65, r1, r2, m, v)
|
||||
#define s390_clgrb(c, r1, r2, m3, b, d) S390_RRS(c, 0xece5, r1, r2, m3, b, d)
|
||||
#define s390_clib(c, r, i, m, b, d) S390_RIS(c, 0xecff, r, i, m, b, d)
|
||||
#define s390_clij(c, r, i, b) S390_RIE_3(c, 0xec7f, r, i, m, d)
|
||||
#define s390_clr(c, r1, r2) S390_RR(c, 0x15, r1, r2)
|
||||
#define s390_clrb(c, r1, r2, m3, b, d) S390_RRS(c, 0xecf7, r1, r2, m3, b, d)
|
||||
#define s390_clrj(c, r1, r2, m, v) S390_RIE_2(c, 0xec77, r1, r2, m, v)
|
||||
#define s390_cr(c, r1, r2) S390_RR(c, 0x19, r1, r2)
|
||||
#define s390_crb(c, r1, r2, m3, b, d) S390_RRS(c, 0xecf6, r1, r2, m3, b, d)
|
||||
#define s390_crj(c, r1, r2, m3, v) S390_RIE_2(c, 0xec76, r1, r2, m3, v)
|
||||
#define s390_crl(c, r, v) S390_RIL_1(c, 0xc6d, r, v)
|
||||
#define s390_crt(c, r1, r2, m3) S390_RRF_2(c, 0xb972, r1, r2, m3);
|
||||
#define s390_cgrt(c, r1, r2, m3) S390_RRF_2(c, 0xb960, r1, r2, m3);
|
||||
#define s390_cs(c, r1, r2, b, d) S390_RX(c, 0xba, r1, r2, b, d)
|
||||
#define s390_csg(c, r1, r2, b, d) S390_RSY_1(c, 0xeb30, r1, r2, b, d)
|
||||
#define s390_csst(c, d1, b1, d2, b2, r) S390_SSF(c, 0xc82, b1, d1, b2, d2, r)
|
||||
#define s390_csy(c, r1, r2, b, d) S390_RSY_1(c, 0xeb14, r1, r2, b, d)
|
||||
#define s390_ddbr(c, r1, r2) S390_RRE(c, 0xb31d, r1, r2)
|
||||
#define s390_debr(c, r1, r2) S390_RRE(c, 0xb30d, r1, r2)
|
||||
#define s390_didbr(c, r1, r2, m, r3) S390_RRF_3(c, 0xb35b, r1, r2, m, r3)
|
||||
#define s390_dlgr(c, r1, r2) S390_RRE(c, 0xb987, r1, r2)
|
||||
#define s390_dlr(c, r1, r2) S390_RRE(c, 0xb997, r1, r2)
|
||||
#define s390_dr(c, r1, r2) S390_RR(c, 0x1d, r1, r2)
|
||||
#define s390_dsgfr(c, r1, r2) S390_RRE(c, 0xb91d, r1, r2)
|
||||
#define s390_dsgr(c, r1, r2) S390_RRE(c, 0xb90d, r1, r2)
|
||||
#define s390_ear(c, r1, r2) S390_RRE(c, 0xb24f, r1, r2)
|
||||
#define s390_ic(c, r, x, b, d) S390_RX(c, 0x43, r, x, b, d)
|
||||
#define s390_icm(c, r, m, b, d) S390_RX(c, 0xbf, r, m, b, d)
|
||||
#define s390_icmy(c, r, x, b, d) S390_RXY(c, 0xeb81, r, x, b, d)
|
||||
#define s390_icy(c, r, x, b, d) S390_RXY(c, 0xe373, r, x, b, d)
|
||||
#define s390_iihf(c, r, v) S390_RIL_1(c, 0xc08, r, v)
|
||||
#define s390_iihh(c, r, v) S390_RI(c, 0xa50, r, v)
|
||||
#define s390_iihl(c, r, v) S390_RI(c, 0xa51, r, v)
|
||||
#define s390_iilf(c, r, v) S390_RIL_1(c, 0xc09, r, v)
|
||||
#define s390_iilh(c, r, v) S390_RI(c, 0xa52, r, v)
|
||||
#define s390_iill(c, r, v) S390_RI(c, 0xa53, r, v)
|
||||
#define s390_j(c,d) s390_brc(c, S390_CC_UN, d)
|
||||
#define s390_jc(c, m, d) s390_brc(c, m, d)
|
||||
#define s390_jcl(c, m, d) s390_brcl(c, m, d)
|
||||
#define s390_jcy(c, d) s390_brc(c, S390_CC_CY, d)
|
||||
#define s390_je(c, d) s390_brc(c, S390_CC_EQ, d)
|
||||
#define s390_jeo(c, d) s390_brc(c, S390_CC_ZR|S390_CC_OV, d)
|
||||
#define s390_jh(c, d) s390_brc(c, S390_CC_GT, d)
|
||||
#define s390_jho(c, d) s390_brc(c, S390_CC_GT|S390_CC_OV, d)
|
||||
#define s390_jl(c, d) s390_brc(c, S390_CC_LT, d)
|
||||
#define s390_jlo(c, d) s390_brc(c, S390_CC_LT|S390_CC_OV, d)
|
||||
#define s390_jm(c, d) s390_brc(c, S390_CC_LT, d)
|
||||
#define s390_jnc(c, d) s390_brc(c, S390_CC_NC, d)
|
||||
#define s390_jne(c, d) s390_brc(c, S390_CC_NZ, d)
|
||||
#define s390_jnh(c, d) s390_brc(c, S390_CC_LE, d)
|
||||
#define s390_jnl(c, d) s390_brc(c, S390_CC_GE, d)
|
||||
#define s390_jnz(c, d) s390_brc(c, S390_CC_NZ, d)
|
||||
#define s390_jo(c, d) s390_brc(c, S390_CC_OV, d)
|
||||
#define s390_jno(c, d) s390_brc(c, S390_CC_NO, d)
|
||||
#define s390_jp(c, d) s390_brc(c, S390_CC_GT, d)
|
||||
#define s390_jz(c, d) s390_brc(c, S390_CC_ZR, d)
|
||||
#define s390_jg(c,d) s390_brcl(c, S390_CC_UN, d)
|
||||
#define s390_jgcy(c, d) s390_brcl(c, S390_CC_CY, d)
|
||||
#define s390_jge(c, d) s390_brcl(c, S390_CC_EQ, d)
|
||||
#define s390_jgeo(c, d) s390_brcl(c, S390_CC_ZR|S390_CC_OV, d)
|
||||
#define s390_jgh(c, d) s390_brcl(c, S390_CC_GT, d)
|
||||
#define s390_jgho(c, d) s390_brcl(c, S390_CC_GT|S390_CC_OV, d)
|
||||
#define s390_jgl(c, d) s390_brcl(c, S390_CC_LT, d)
|
||||
#define s390_jglo(c, d) s390_brcl(c, S390_CC_LT|S390_CC_OV, d)
|
||||
#define s390_jgm(c, d) s390_brcl(c, S390_CC_LT, d)
|
||||
#define s390_jgnc(c, d) s390_brcl(c, S390_CC_NC, d)
|
||||
#define s390_jgne(c, d) s390_brcl(c, S390_CC_NZ, d)
|
||||
#define s390_jgnh(c, d) s390_brcl(c, S390_CC_LE, d)
|
||||
#define s390_jgnl(c, d) s390_brcl(c, S390_CC_GE, d)
|
||||
#define s390_jgnz(c, d) s390_brcl(c, S390_CC_NZ, d)
|
||||
#define s390_jgo(c, d) s390_brcl(c, S390_CC_OV, d)
|
||||
#define s390_jgno(c, d) s390_brcl(c, S390_CC_NO, d)
|
||||
#define s390_jgp(c, d) s390_brcl(c, S390_CC_GT, d)
|
||||
#define s390_jgz(c, d) s390_brcl(c, S390_CC_ZR, d)
|
||||
#define s390_l(c, r, x, b, d) S390_RX(c, 0x58, r, x, b, d)
|
||||
#define s390_ly(c, r, x, b, d) S390_RXY(c, 0xe358, r, x, b, d)
|
||||
#define s390_la(c, r, x, b, d) S390_RX(c, 0x41, r, x, b, d)
|
||||
#define s390_lay(c, r, x, b, d) S390_RXY(c, 0xe371, r, x, b, d)
|
||||
#define s390_lam(c, r1, r2, b, d) S390_RS_1(c, 0x9a, r1, r2, b, d)
|
||||
#define s390_larl(c, r, o) S390_RIL_1(c, 0xc00, r, o)
|
||||
#define s390_lb(c, r, x, b, d) S390_RXY(c, 0xe376, r, x, b, d)
|
||||
#define s390_lbr(c, r1, r2) S390_RRE(c, 0xb926, r1, r2)
|
||||
#define s390_lcdbr(c, r1, r2) S390_RRE(c, 0xb313, r1, r2)
|
||||
#define s390_lcgr(c, r1, r2) S390_RRE(c, 0xb903, r1, r2)
|
||||
#define s390_lcr(c, r1, r2) S390_RR(c, 0x13, r1, r2)
|
||||
#define s390_ld(c, f, x, b, d) S390_RX(c, 0x68, f, x, b, d)
|
||||
#define s390_ldy(c, r, x, b, d) S390_RXY(c, 0xed65, r, x, b, d)
|
||||
#define s390_ldeb(c, r, x, b, d) S390_RXE(c, 0xed04, r, x, b, d)
|
||||
#define s390_ldebr(c, r1, r2) S390_RRE(c, 0xb304, r1, r2)
|
||||
#define s390_ldgr(c, r1, r2) S390_RRE(c, 0xb3c1, r1, r2)
|
||||
#define s390_ldr(c, r1, r2) S390_RR(c, 0x28, r1, r2)
|
||||
#define s390_le(c, f, x, b, d) S390_RX(c, 0x78, f, x, b, d)
|
||||
#define s390_ledbr(c, r1, r2) S390_RRE(c, 0xb344, r1, r2)
|
||||
#define s390_ler(c, r1, r2) S390_RR(c, 0x38, r1, r2)
|
||||
#define s390_ley(c, r, x, b, d) S390_RXY(c, 0xed64, r, x, b, d)
|
||||
#define s390_lg(c, r, x, b, d) S390_RXY(c, 0xe304, r, x, b, d)
|
||||
#define s390_lgb(c, r, x, b, d) S390_RXY(c, 0xe377, r, x, b, d)
|
||||
#define s390_lgbr(c, r1, r2) S390_RRE(c, 0xb906, r1, r2)
|
||||
#define s390_lgdr(c, r1, r2) S390_RRE(c, 0xb3cd, r1, r2)
|
||||
#define s390_lgf(c, r, x, b, d) S390_RXY(c, 0xe314, r, x, b, d)
|
||||
#define s390_lgfi(c, r, v) S390_RIL_1(c, 0xc01, r, v)
|
||||
#define s390_lgfrl(c, r1, d) S390_RIL_1(c, 0xc4c, r1, d)
|
||||
#define s390_lgfr(c, r1, r2) S390_RRE(c, 0xb914, r1, r2)
|
||||
#define s390_lgh(c, r, x, b, d) S390_RXY(c, 0xe315, r, x, b, d)
|
||||
#define s390_lghi(c, r, v) S390_RI(c, 0xa79, r, v)
|
||||
#define s390_lghr(c, r1, r2) S390_RRE(c, 0xb907, r1, r2)
|
||||
#define s390_lgr(c, r1, r2) S390_RRE(c, 0xb904, r1, r2)
|
||||
#define s390_lgrl(c, r1, d) S390_RIL_1(c, 0xc48, r1, d)
|
||||
#define s390_lh(c, r, x, b, d) S390_RX(c, 0x48, r, x, b, d)
|
||||
#define s390_lhr(c, r1, r2) S390_RRE(c, 0xb927, r1, r2)
|
||||
#define s390_lhg(c, r, x, b, d) S390_RXY(c, 0xe315, r, x, b, d)
|
||||
#define s390_lhi(c, r, v) S390_RI(c, 0xa78, r, v)
|
||||
#define s390_lhy(c, r, x, b, d) S390_RXY(c, 0xe378, r, x, b, d)
|
||||
#define s390_llcr(c, r1, r2) S390_RRE(c, 0xb994, r1, r2)
|
||||
#define s390_llgc(c, r, x, b, d) S390_RXY(c, 0xe390, r, x, b, d)
|
||||
#define s390_llgcr(c, r1, r2) S390_RRE(c, 0xb984, r1, r2)
|
||||
#define s390_llgf(c, r, x, b, d) S390_RXY(c, 0xe316, r, x, b, d)
|
||||
#define s390_llgfr(c, r1, r2) S390_RRE(c, 0xb916, r1, r2)
|
||||
#define s390_llgh(c, r, x, b, d) S390_RXY(c, 0xe391, r, x, b, d)
|
||||
#define s390_llghr(c, r1, r2) S390_RRE(c, 0xb985, r1, r2)
|
||||
#define s390_llhr(c, r1, r2) S390_RRE(c, 0xb995, r1, r2)
|
||||
#define s390_llihf(c, r, v) S390_RIL_1(c, 0xc0e, r, v)
|
||||
#define s390_llihh(c, r, v) S390_RI(c, 0xa5c, r, v)
|
||||
#define s390_llihl(c, r, v) S390_RI(c, 0xa5d, r, v)
|
||||
#define s390_llilf(c, r, v) S390_RIL_1(c, 0xc0f, r, v)
|
||||
#define s390_llilh(c, r, v) S390_RI(c, 0xa5e, r, v)
|
||||
#define s390_llill(c, r, v) S390_RI(c, 0xa5f, r, v)
|
||||
#define s390_lm(c, r1, r2, b, d) S390_RS_1(c, 0x98, r1, r2, b, d)
|
||||
#define s390_lmg(c, r1, r2, b, d) S390_RSY_1(c, 0xeb04, r1, r2, b, d)
|
||||
#define s390_lndbr(c, r1, r2) S390_RRE(c, 0xb311, r1, r2)
|
||||
#define s390_lngr(c, r1, r2) S390_RRE(c, 0xb901, r1, r2)
|
||||
#define s390_lnr(c, r1, r2) S390_RR(c, 0x11, r1, r2)
|
||||
#define s390_lpdbr(c, r1, r2) S390_RRE(c, 0xb310, r1, r2)
|
||||
#define s390_lpgr(c, r1, r2) S390_RRE(c, 0xb900, r1, r2)
|
||||
#define s390_lpr(c, r1, r2) S390_RR(c, 0x10, r1, r2)
|
||||
#define s390_lr(c, r1, r2) S390_RR(c, 0x18, r1, r2)
|
||||
#define s390_lrl(c, r1, d) S390_RIL_1(c, 0xc4d, r1, d)
|
||||
#define s390_ltgfr(c, r1, r2) S390_RRE(c, 0xb912, r1, r2)
|
||||
#define s390_ltgr(c, r1, r2) S390_RRE(c, 0xb902, r1, r2)
|
||||
#define s390_ltr(c, r1, r2) S390_RR(c, 0x12, r1, r2)
|
||||
#define s390_lzdr(c, r) S390_RRE(c, 0xb375, r, 0)
|
||||
#define s390_lzer(c, r) S390_RRE(c, 0xb374, r, 0)
|
||||
#define s390_m(c, r, x, b, d) S390_RX(c, 0x5c, r, x, b, d)
|
||||
#define s390_mdbr(c, r1, r2) S390_RRE(c, 0xb31c, r1, r2)
|
||||
#define s390_meebr(c, r1, r2) S390_RRE(c, 0xb317, r1, r2)
|
||||
#define s390_mfy(c, r, x, b, d) S390_RXY(c, 0xe35c, r, x, b, d)
|
||||
#define s390_mlgr(c, r1, r2) S390_RRE(c, 0xb986, r1, r2)
|
||||
#define s390_mlr(c, r1, r2) S390_RRE(c, 0xb996, r1, r2)
|
||||
#define s390_mr(c, r1, r2) S390_RR(c, 0x1c, r1, r2)
|
||||
#define s390_ms(c, r, x, b, d) S390_RX(c, 0x71, r, x, b, d)
|
||||
#define s390_msi(c, r, v) S390_RIL_1(c, 0xc21, r, v)
|
||||
#define s390_msgfr(c, r1, r2) S390_RRE(c, 0xb91c, r1, r2)
|
||||
#define s390_msgi(c, r, v) S390_RIL_1(c, 0xc20, r, v)
|
||||
#define s390_msgr(c, r1, r2) S390_RRE(c, 0xb90c, r1, r2)
|
||||
#define s390_msr(c, r1, r2) S390_RRE(c, 0xb252, r1, r2)
|
||||
#define s390_mvc(c, l, b1, d1, b2, d2) S390_SS_1(c, 0xd2, l, b1, d1, b2, d2)
|
||||
#define s390_mvcl(c, r1, r2) S390_RR(c, 0x0e, r1, r2)
|
||||
#define s390_mvcle(c, r1, r3, d2, b2) S390_RS_1(c, 0xa8, r1, r3, d2, b2)
|
||||
#define s390_n(c, r, x, b, d) S390_RX(c, 0x54, r, x, b, d)
|
||||
#define s390_nc(c, l, b1, d1, b2, d2) S390_SS_1(c, 0xd4, l, b1, d1, b2, d2)
|
||||
#define s390_ng(c, r, x, b, d) S390_RXY(c, 0xe380, r, x, b, d)
|
||||
#define s390_ngr(c, r1, r2) S390_RRE(c, 0xb980, r1, r2)
|
||||
#define s390_ngrk(c, r1, r2, r3) S390_RRF_1(c, 0xb9e4, r1, r2, r3)
|
||||
#define s390_ni(c, b, d, v) S390_SI(c, 0x94, b, d, v)
|
||||
#define s390_nihf(c, r, v) S390_RIL_1(c, 0xc0a, r, v)
|
||||
#define s390_nihh(c, r, v) S390_RI(c, 0xa54, r, v)
|
||||
#define s390_nihl(c, r, v) S390_RI(c, 0xa55, r, v)
|
||||
#define s390_nilf(c, r, v) S390_RIL_1(c, 0xc0b, r, v)
|
||||
#define s390_nilh(c, r, v) S390_RI(c, 0xa56, r, v)
|
||||
#define s390_nill(c, r, v) S390_RI(c, 0xa57, r, v)
|
||||
#define s390_niy(c, b, d, v) S390_SIY(c, 0xeb54, b, d, v)
|
||||
#define s390_nop(c) S390_RR(c, 0x07, 0x0, 0)
|
||||
#define s390_nr(c, r1, r2) S390_RR(c, 0x14, r1, r2)
|
||||
#define s390_nrk(c, r1, r2) S390_RRF_1(c, 0xb9f4, r1, r2)
|
||||
#define s390_ny(c, r, x, b, d) S390_RRY(c, 0xe354, r1, r2)
|
||||
#define s390_o(c, r, x, b, d) S390_RX(c, 0x56, r, x, b, d)
|
||||
#define s390_oihf(c, r, v) S390_RIL_1(c, 0xc0c, r, v)
|
||||
#define s390_oihh(c, r, v) S390_RI(c, 0xa58, r, v)
|
||||
#define s390_oihl(c, r, v) S390_RI(c, 0xa59, r, v)
|
||||
#define s390_oilf(c, r, v) S390_RIL_1(c, 0xc0d, r, v)
|
||||
#define s390_oilh(c, r, v) S390_RI(c, 0xa5a, r, v)
|
||||
#define s390_oill(c, r, v) S390_RI(c, 0xa5b` r, v)
|
||||
#define s390_oiy(c, b, d, v) S390_SIY(c, 0xeb56 b, d, v)
|
||||
#define s390_og(c, r, x, b, d) S390_RXY(c, 0xe381, r, x, b, d)
|
||||
#define s390_ogr(c, r1, r2) S390_RRE(c, 0xb981, r1, r2)
|
||||
#define s390_or(c, r1, r2) S390_RR(c, 0x16, r1, r2)
|
||||
#define s390_s(c, r, x, b, d) S390_RX(c, 0x5b, r, x, b, d)
|
||||
#define s390_sdb(c, r, x, b, d) S390_RXE(c, 0xed1b, r, x, b, d)
|
||||
#define s390_sdbr(c, r1, r2) S390_RRE(c, 0xb31b, r1, r2)
|
||||
#define s390_sebr(c, r1, r2) S390_RRE(c, 0xb30b, r1, r2)
|
||||
#define s390_sg(c, r, x, b, d) S390_RXY(c, 0xe309, r, x, b, d)
|
||||
#define s390_sgf(c, r, x, b, d) S390_RXY(c, 0xe319, r, x, b, d)
|
||||
#define s390_sgr(c, r1, r2) S390_RRE(c, 0xb909, r1, r2)
|
||||
#define s390_sl(c, r, x, b, d) S390_RX(c, 0x5f, r, x, b, d)
|
||||
#define s390_sla(c, r, b, d) S390_RS_3(c, 0x8b, r, b, d)
|
||||
#define s390_slag(c, r1, r2, b, d) S390_RSY_1(c, 0xeb0b, r1, r2, b, d)
|
||||
#define s390_slbg(c, r, x, b, d) S390_RXY(c, 0xe389, r, x, b, d)
|
||||
#define s390_slbgr(c, r1, r2) S390_RRE(c, 0xb989, r1, r2)
|
||||
#define s390_slbr(c, r1, r2) S390_RRE(c, 0xb999, r1, r2)
|
||||
#define s390_slda(c, r, b, d) S390_RS_3(c, 0x8f, r, b, d)
|
||||
#define s390_sldl(c, r, b, d) S390_RS_3(c, 0x8d, r, b, d)
|
||||
#define s390_slfi(c, r, v) S390_RIL_1(c, 0xc25, r, v)
|
||||
#define s390_slg(c, r, x, b, d) S390_RXY(c, 0xe30b, r, x, b, d)
|
||||
#define s390_slgf(c, r, x, b, d) S390_RXY(c, 0xe31b, r, x, b, d)
|
||||
#define s390_slgfr(c, r1, r2) S390_RRE(c, 0xb91b, r1, r2)
|
||||
#define s390_slgfi(c, r, v) S390_RIL_1(c, 0xc24, r, v)
|
||||
#define s390_slgr(c, r1, r2) S390_RRE(c, 0xb90b, r1, r2)
|
||||
#define s390_sll(c, r, b, d) S390_RS_3(c, 0x89, r, b, d)
|
||||
#define s390_sllg(c, r1, r2, b, d) S390_RSY_1(c, 0xeb0d, r1, r2, b, d)
|
||||
#define s390_slr(c, r1, r2) S390_RR(c, 0x1f, r1, r2)
|
||||
#define s390_sqdbr(c, r1, r2) S390_RRE(c, 0xb315, r1, r2)
|
||||
#define s390_sqebr(c, r1, r2) S390_RRE(c, 0xb314, r1, r2)
|
||||
#define s390_sra(c, r, b, d) S390_RS_3(c, 0x8a, r, b, d)
|
||||
#define s390_srag(c, r1, r2, b, d) S390_RSY_1(c, 0xeb0a, r1, r2, b, d)
|
||||
#define s390_sr(c, r1, r2) S390_RR(c, 0x1b, r1, r2)
|
||||
#define s390_srda(c, r, b, d) S390_RS_3(c, 0x8e, r, b, d)
|
||||
#define s390_srdl(c, r, b, d) S390_RS_3(c, 0x8c, r, b, d)
|
||||
#define s390_srl(c, r, b, d) S390_RS_3(c, 0x88, r, b, d)
|
||||
#define s390_srlg(c, r1, r2, b, d) S390_RSY_1(c, 0xeb0c, r1, r2, b, d)
|
||||
#define s390_st(c, r, x, b, d) S390_RX(c, 0x50, r, x, b, d)
|
||||
#define s390_stam(c, r1, r2, b, d) S390_RS_1(c, 0x9b, r1, r2, b, d)
|
||||
#define s390_stc(c, r, x, b, d) S390_RX(c, 0x42, r, x, b, d)
|
||||
#define s390_stcm(c, r, m, b, d) S390_RX(c, 0xbe, r, m, b, d)
|
||||
#define s390_stcmy(c, r, x, b, d) S390_RXY(c, 0xeb2d, r, x, b, d)
|
||||
#define s390_stcy(c, r, x, b, d) S390_RXY(c, 0xe372, r, x, b, d)
|
||||
#define s390_std(c, f, x, b, d) S390_RX(c, 0x60, f, x, b, d)
|
||||
#define s390_stdy(c, r, x, b, d) S390_RXY(c, 0xed67, r, x, b, d)
|
||||
#define s390_ste(c, f, x, b, d) S390_RX(c, 0x70, f, x, b, d)
|
||||
#define s390_stey(c, r, x, b, d) S390_RXY(c, 0xed66, r, x, b, d)
|
||||
#define s390_stfpc(c, b, d) S390_S(c, 0xb29c, b, d)
|
||||
#define s390_stg(c, r, x, b, d) S390_RXY(c, 0xe324, r, x, b, d)
|
||||
#define s390_sth(c, r, x, b, d) S390_RX(c, 0x40, r, x, b, d)
|
||||
#define s390_sthy(c, r, x, b, d) S390_RXY(c, 0xe370, r, x, b, d)
|
||||
#define s390_stm(c, r1, r2, b, d) S390_RS_1(c, 0x90, r1, r2, b, d)
|
||||
#define s390_stmg(c, r1, r2, b, d) S390_RSY_1(c, 0xeb24, r1, r2, b, d)
|
||||
#define s390_sty(c, r, x, b, d) S390_RXY(c, 0xe350, r, x, b, d)
|
||||
#define s390_tcdb(c, r, x, b, d) S390_RXE(c, 0xed11, r, x, b, d)
|
||||
#define s390_tceb(c, r, x, b, d) S390_RXE(c, 0xed10, r, x, b, d)
|
||||
#define s390_x(c, r, x, b, d) S390_RX(c, 0x57, r, x, b, d)
|
||||
#define s390_xihf(c, r, v) S390_RIL_1(c, 0xc06, r, v)
|
||||
#define s390_xilf(c, r, v) S390_RIL_1(c, 0xc07, r, v)
|
||||
#define s390_xg(c, r, x, b, d) S390_RXY(c, 0xe382, r, x, b, d)
|
||||
#define s390_xgr(c, r1, r2) S390_RRE(c, 0xb982, r1, r2)
|
||||
#define s390_xr(c, r1, r2) S390_RR(c, 0x17, r1, r2)
|
||||
#define s390_xy(c, r, x, b, d) S390_RXY(c, 0xe357, r, x, b, d)
|
||||
#endif
|
File diff suppressed because it is too large
Load Diff
@ -0,0 +1,3 @@
|
||||
/Makefile
|
||||
/Makefile.in
|
||||
/.deps
|
@ -0,0 +1,7 @@
|
||||
|
||||
AM_CPPFLAGS = $(GLIB_CFLAGS) -I$(top_srcdir)
|
||||
|
||||
noinst_LTLIBRARIES = libmonoarch-sparc.la
|
||||
|
||||
libmonoarch_sparc_la_SOURCES = tramp.c sparc-codegen.h
|
||||
|
@ -0,0 +1,955 @@
|
||||
#ifndef __SPARC_CODEGEN_H__
|
||||
#define __SPARC_CODEGEN_H__
|
||||
|
||||
#if SIZEOF_VOID_P == 8
|
||||
#define SPARCV9 1
|
||||
#else
|
||||
#endif
|
||||
|
||||
typedef enum {
|
||||
sparc_r0 = 0,
|
||||
sparc_r1 = 1,
|
||||
sparc_r2 = 2,
|
||||
sparc_r3 = 3,
|
||||
sparc_r4 = 4,
|
||||
sparc_r5 = 5,
|
||||
sparc_r6 = 6,
|
||||
sparc_r7 = 7,
|
||||
sparc_r8 = 8,
|
||||
sparc_r9 = 9,
|
||||
sparc_r10 = 10,
|
||||
sparc_r11 = 11,
|
||||
sparc_r12 = 12,
|
||||
sparc_r13 = 13,
|
||||
sparc_r14 = 14,
|
||||
sparc_r15 = 15,
|
||||
sparc_r16 = 16,
|
||||
sparc_r17 = 17,
|
||||
sparc_r18 = 18,
|
||||
sparc_r19 = 19,
|
||||
sparc_r20 = 20,
|
||||
sparc_r21 = 21,
|
||||
sparc_r22 = 22,
|
||||
sparc_r23 = 23,
|
||||
sparc_r24 = 24,
|
||||
sparc_r25 = 25,
|
||||
sparc_r26 = 26,
|
||||
sparc_r27 = 27,
|
||||
sparc_r28 = 28,
|
||||
sparc_r29 = 29,
|
||||
sparc_r30 = 30,
|
||||
sparc_r31 = 31,
|
||||
/* aliases */
|
||||
/* global registers */
|
||||
sparc_g0 = 0, sparc_zero = 0,
|
||||
sparc_g1 = 1,
|
||||
sparc_g2 = 2,
|
||||
sparc_g3 = 3,
|
||||
sparc_g4 = 4,
|
||||
sparc_g5 = 5,
|
||||
sparc_g6 = 6,
|
||||
sparc_g7 = 7,
|
||||
/* out registers */
|
||||
sparc_o0 = 8,
|
||||
sparc_o1 = 9,
|
||||
sparc_o2 = 10,
|
||||
sparc_o3 = 11,
|
||||
sparc_o4 = 12,
|
||||
sparc_o5 = 13,
|
||||
sparc_o6 = 14, sparc_sp = 14,
|
||||
sparc_o7 = 15, sparc_callsite = 15,
|
||||
/* local registers */
|
||||
sparc_l0 = 16,
|
||||
sparc_l1 = 17,
|
||||
sparc_l2 = 18,
|
||||
sparc_l3 = 19,
|
||||
sparc_l4 = 20,
|
||||
sparc_l5 = 21,
|
||||
sparc_l6 = 22,
|
||||
sparc_l7 = 23,
|
||||
/* in registers */
|
||||
sparc_i0 = 24,
|
||||
sparc_i1 = 25,
|
||||
sparc_i2 = 26,
|
||||
sparc_i3 = 27,
|
||||
sparc_i4 = 28,
|
||||
sparc_i5 = 29,
|
||||
sparc_i6 = 30, sparc_fp = 30,
|
||||
sparc_i7 = 31,
|
||||
sparc_nreg = 32,
|
||||
/* floating point registers */
|
||||
sparc_f0 = 0,
|
||||
sparc_f1 = 1,
|
||||
sparc_f2 = 2,
|
||||
sparc_f3 = 3,
|
||||
sparc_f4 = 4,
|
||||
sparc_f5 = 5,
|
||||
sparc_f6 = 6,
|
||||
sparc_f7 = 7,
|
||||
sparc_f8 = 8,
|
||||
sparc_f9 = 9,
|
||||
sparc_f10 = 10,
|
||||
sparc_f11 = 11,
|
||||
sparc_f12 = 12,
|
||||
sparc_f13 = 13,
|
||||
sparc_f14 = 14,
|
||||
sparc_f15 = 15,
|
||||
sparc_f16 = 16,
|
||||
sparc_f17 = 17,
|
||||
sparc_f18 = 18,
|
||||
sparc_f19 = 19,
|
||||
sparc_f20 = 20,
|
||||
sparc_f21 = 21,
|
||||
sparc_f22 = 22,
|
||||
sparc_f23 = 23,
|
||||
sparc_f24 = 24,
|
||||
sparc_f25 = 25,
|
||||
sparc_f26 = 26,
|
||||
sparc_f27 = 27,
|
||||
sparc_f28 = 28,
|
||||
sparc_f29 = 29,
|
||||
sparc_f30 = 30,
|
||||
sparc_f31 = 31,
|
||||
} SparcRegister;
|
||||
|
||||
typedef enum {
|
||||
sparc_bn = 0, sparc_bnever = 0,
|
||||
sparc_be = 1,
|
||||
sparc_ble = 2,
|
||||
sparc_bl = 3,
|
||||
sparc_bleu = 4,
|
||||
sparc_bcs = 5, sparc_blu = 5,
|
||||
sparc_bneg = 6,
|
||||
sparc_bvs = 7, sparc_boverflow = 7,
|
||||
sparc_ba = 8, sparc_balways = 8,
|
||||
sparc_bne = 9,
|
||||
sparc_bg = 10,
|
||||
sparc_bge = 11,
|
||||
sparc_bgu = 12,
|
||||
sparc_bcc = 13, sparc_beu = 13,
|
||||
sparc_bpos = 14,
|
||||
sparc_bvc = 15
|
||||
} SparcCond;
|
||||
|
||||
typedef enum {
|
||||
/* with fcmp */
|
||||
sparc_feq = 0,
|
||||
sparc_fl = 1,
|
||||
sparc_fg = 2,
|
||||
sparc_unordered = 3,
|
||||
/* branch ops */
|
||||
sparc_fba = 8,
|
||||
sparc_fbn = 0,
|
||||
sparc_fbu = 7,
|
||||
sparc_fbg = 6,
|
||||
sparc_fbug = 5,
|
||||
sparc_fbl = 4,
|
||||
sparc_fbul = 3,
|
||||
sparc_fblg = 2,
|
||||
sparc_fbne = 1,
|
||||
sparc_fbe = 9,
|
||||
sparc_fbue = 10,
|
||||
sparc_fbge = 11,
|
||||
sparc_fbuge = 12,
|
||||
sparc_fble = 13,
|
||||
sparc_fbule = 14,
|
||||
sparc_fbo = 15
|
||||
} SparcFCond;
|
||||
|
||||
typedef enum {
|
||||
sparc_icc = 4,
|
||||
sparc_xcc = 6,
|
||||
sparc_fcc0 = 0,
|
||||
sparc_fcc1 = 1,
|
||||
sparc_fcc2 = 2,
|
||||
sparc_fcc3 = 3
|
||||
} SparcCC;
|
||||
|
||||
typedef enum {
|
||||
sparc_icc_short = 0,
|
||||
sparc_xcc_short = 2
|
||||
} SparcCCShort;
|
||||
|
||||
typedef enum {
|
||||
/* fop1 format */
|
||||
sparc_fitos_val = 196,
|
||||
sparc_fitod_val = 200,
|
||||
sparc_fitoq_val = 204,
|
||||
sparc_fxtos_val = 132,
|
||||
sparc_fxtod_val = 136,
|
||||
sparc_fxtoq_val = 140,
|
||||
sparc_fstoi_val = 209,
|
||||
sparc_fdtoi_val = 210,
|
||||
sparc_fqtoi_val = 211,
|
||||
sparc_fstod_val = 201,
|
||||
sparc_fstoq_val = 205,
|
||||
sparc_fdtos_val = 198,
|
||||
sparc_fdtoq_val = 206,
|
||||
sparc_fqtos_val = 199,
|
||||
sparc_fqtod_val = 203,
|
||||
sparc_fmovs_val = 1,
|
||||
sparc_fmovd_val = 2,
|
||||
sparc_fnegs_val = 5,
|
||||
sparc_fnegd_val = 6,
|
||||
sparc_fabss_val = 9,
|
||||
sparc_fabsd_val = 10,
|
||||
sparc_fsqrts_val = 41,
|
||||
sparc_fsqrtd_val = 42,
|
||||
sparc_fsqrtq_val = 43,
|
||||
sparc_fadds_val = 65,
|
||||
sparc_faddd_val = 66,
|
||||
sparc_faddq_val = 67,
|
||||
sparc_fsubs_val = 69,
|
||||
sparc_fsubd_val = 70,
|
||||
sparc_fsubq_val = 71,
|
||||
sparc_fmuls_val = 73,
|
||||
sparc_fmuld_val = 74,
|
||||
sparc_fmulq_val = 75,
|
||||
sparc_fsmuld_val = 105,
|
||||
sparc_fdmulq_val = 111,
|
||||
sparc_fdivs_val = 77,
|
||||
sparc_fdivd_val = 78,
|
||||
sparc_fdivq_val = 79,
|
||||
/* fop2 format */
|
||||
sparc_fcmps_val = 81,
|
||||
sparc_fcmpd_val = 82,
|
||||
sparc_fcmpq_val = 83,
|
||||
sparc_fcmpes_val = 85,
|
||||
sparc_fcmped_val = 86,
|
||||
sparc_fcmpeq_val = 87
|
||||
} SparcFOp;
|
||||
|
||||
typedef enum {
|
||||
sparc_membar_load_load = 0x1,
|
||||
sparc_membar_store_load = 0x2,
|
||||
sparc_membar_load_store = 0x4,
|
||||
sparc_membar_store_store = 0x8,
|
||||
|
||||
sparc_membar_lookaside = 0x10,
|
||||
sparc_membar_memissue = 0x20,
|
||||
sparc_membar_sync = 0x40,
|
||||
|
||||
sparc_membar_all = 0x4f
|
||||
} SparcMembarFlags;
|
||||
|
||||
typedef struct {
|
||||
unsigned int op : 2; /* always 1 */
|
||||
unsigned int disp : 30;
|
||||
} sparc_format1;
|
||||
|
||||
typedef struct {
|
||||
unsigned int op : 2; /* always 0 */
|
||||
unsigned int rd : 5;
|
||||
unsigned int op2 : 3;
|
||||
unsigned int disp : 22;
|
||||
} sparc_format2a;
|
||||
|
||||
typedef struct {
|
||||
unsigned int op : 2; /* always 0 */
|
||||
unsigned int a : 1;
|
||||
unsigned int cond : 4;
|
||||
unsigned int op2 : 3;
|
||||
unsigned int disp : 22;
|
||||
} sparc_format2b;
|
||||
|
||||
typedef struct {
|
||||
unsigned int op : 2; /* always 0 */
|
||||
unsigned int a : 1;
|
||||
unsigned int cond : 4;
|
||||
unsigned int op2 : 3;
|
||||
unsigned int cc01 : 2;
|
||||
unsigned int p : 1;
|
||||
unsigned int d19 : 19;
|
||||
} sparc_format2c;
|
||||
|
||||
typedef struct {
|
||||
unsigned int op : 2; /* always 0 */
|
||||
unsigned int a : 1;
|
||||
unsigned int res : 1;
|
||||
unsigned int rcond: 3;
|
||||
unsigned int op2 : 3;
|
||||
unsigned int d16hi: 2;
|
||||
unsigned int p : 1;
|
||||
unsigned int rs1 : 5;
|
||||
unsigned int d16lo: 14;
|
||||
} sparc_format2d;
|
||||
|
||||
typedef struct {
|
||||
unsigned int op : 2; /* 2 or 3 */
|
||||
unsigned int rd : 5;
|
||||
unsigned int op3 : 6;
|
||||
unsigned int rs1 : 5;
|
||||
unsigned int i : 1;
|
||||
unsigned int asi : 8;
|
||||
unsigned int rs2 : 5;
|
||||
} sparc_format3a;
|
||||
|
||||
typedef struct {
|
||||
unsigned int op : 2; /* 2 or 3 */
|
||||
unsigned int rd : 5;
|
||||
unsigned int op3 : 6;
|
||||
unsigned int rs1 : 5;
|
||||
unsigned int i : 1;
|
||||
unsigned int x : 1;
|
||||
unsigned int asi : 7;
|
||||
unsigned int rs2 : 5;
|
||||
} sparc_format3ax;
|
||||
|
||||
typedef struct {
|
||||
unsigned int op : 2; /* 2 or 3 */
|
||||
unsigned int rd : 5;
|
||||
unsigned int op3 : 6;
|
||||
unsigned int rs1 : 5;
|
||||
unsigned int i : 1;
|
||||
unsigned int imm : 13;
|
||||
} sparc_format3b;
|
||||
|
||||
typedef struct {
|
||||
unsigned int op : 2; /* 2 or 3 */
|
||||
unsigned int rd : 5;
|
||||
unsigned int op3 : 6;
|
||||
unsigned int rs1 : 5;
|
||||
unsigned int i : 1;
|
||||
unsigned int x : 1;
|
||||
unsigned int imm : 12;
|
||||
} sparc_format3bx;
|
||||
|
||||
typedef struct {
|
||||
unsigned int op : 2; /* 2 or 3 */
|
||||
unsigned int rd : 5;
|
||||
unsigned int op3 : 6;
|
||||
unsigned int rs1 : 5;
|
||||
unsigned int opf : 9;
|
||||
unsigned int rs2 : 5;
|
||||
} sparc_format3c;
|
||||
|
||||
typedef struct {
|
||||
unsigned int op : 2;
|
||||
unsigned int rd : 5;
|
||||
unsigned int op3 : 6;
|
||||
unsigned int rs1 : 5;
|
||||
unsigned int i : 1;
|
||||
unsigned int cc01 : 2;
|
||||
unsigned int res : 6;
|
||||
unsigned int rs2 : 5;
|
||||
} sparc_format4a;
|
||||
|
||||
typedef struct {
|
||||
unsigned int op : 2;
|
||||
unsigned int rd : 5;
|
||||
unsigned int op3 : 6;
|
||||
unsigned int rs1 : 5;
|
||||
unsigned int i : 1;
|
||||
unsigned int cc01 : 2;
|
||||
unsigned int simm : 11;
|
||||
} sparc_format4b;
|
||||
|
||||
typedef struct {
|
||||
unsigned int op : 2;
|
||||
unsigned int rd : 5;
|
||||
unsigned int op3 : 6;
|
||||
unsigned int cc2 : 1;
|
||||
unsigned int cond : 4;
|
||||
unsigned int i : 1;
|
||||
unsigned int cc01 : 2;
|
||||
unsigned int res : 6;
|
||||
unsigned int rs2 : 5;
|
||||
} sparc_format4c;
|
||||
|
||||
typedef struct {
|
||||
unsigned int op : 2;
|
||||
unsigned int rd : 5;
|
||||
unsigned int op3 : 6;
|
||||
unsigned int cc2 : 1;
|
||||
unsigned int cond : 4;
|
||||
unsigned int i : 1;
|
||||
unsigned int cc01 : 2;
|
||||
unsigned int simm : 11;
|
||||
} sparc_format4d;
|
||||
|
||||
/* for use in logical ops, use 0 to not set flags */
|
||||
#define sparc_cc 16
|
||||
|
||||
#define sparc_is_imm13(val) ((glong)val >= (glong)-(1<<12) && (glong)val <= (glong)((1<<12)-1))
|
||||
#define sparc_is_imm22(val) ((glong)val >= (glong)-(1<<21) && (glong)val <= (glong)((1<<21)-1))
|
||||
#define sparc_is_imm16(val) ((glong)val >= (glong)-(1<<15) && (glong)val <= (glong)((1<<15)-1))
|
||||
#define sparc_is_imm19(val) ((glong)val >= (glong)-(1<<18) && (glong)val <= (glong)((1<<18)-1))
|
||||
#define sparc_is_imm30(val) ((glong)val >= (glong)-(1<<29) && (glong)val <= (glong)((1<<29)-1))
|
||||
|
||||
/* disassembly */
|
||||
#define sparc_inst_op(inst) ((inst) >> 30)
|
||||
#define sparc_inst_op2(inst) (((inst) >> 22) & 0x7)
|
||||
#define sparc_inst_rd(inst) (((inst) >> 25) & 0x1f)
|
||||
#define sparc_inst_op3(inst) (((inst) >> 19) & 0x3f)
|
||||
#define sparc_inst_i(inst) (((inst) >> 13) & 0x1)
|
||||
#define sparc_inst_rs1(inst) (((inst) >> 14) & 0x1f)
|
||||
#define sparc_inst_rs2(inst) (((inst) >> 0) & 0x1f)
|
||||
#define sparc_inst_imm(inst) (((inst) >> 13) & 0x1)
|
||||
#define sparc_inst_imm13(inst) (((inst) >> 0) & 0x1fff)
|
||||
|
||||
#define sparc_encode_call(ins,addr) \
|
||||
do { \
|
||||
sparc_format1 *__f = (sparc_format1*)(ins); \
|
||||
__f->op = 1; \
|
||||
__f->disp = ((unsigned int)(addr) >> 2); \
|
||||
(ins) = (unsigned int*)__f + 1; \
|
||||
} while (0)
|
||||
|
||||
#define sparc_encode_format2a(ins,val,oper,dest) \
|
||||
do { \
|
||||
sparc_format2a *__f = (sparc_format2a*)(ins); \
|
||||
__f->op = 0; \
|
||||
__f->rd = (dest); \
|
||||
__f->op2 = (oper); \
|
||||
__f->disp = (val) & 0x3fffff; \
|
||||
(ins) = (unsigned int*)__f + 1; \
|
||||
} while (0)
|
||||
|
||||
#define sparc_encode_format2b(ins,aval,bcond,oper,disp22) \
|
||||
do { \
|
||||
sparc_format2b *__f = (sparc_format2b*)(ins); \
|
||||
__f->op = 0; \
|
||||
__f->a = (aval); \
|
||||
__f->cond = (bcond); \
|
||||
__f->op2 = (oper); \
|
||||
__f->disp = (disp22); \
|
||||
(ins) = (unsigned int*)__f + 1; \
|
||||
} while (0)
|
||||
|
||||
#define sparc_encode_format2c(ins,aval,bcond,oper,xcc,predict,disp19) \
|
||||
do { \
|
||||
sparc_format2c *__f = (sparc_format2c*)(ins); \
|
||||
__f->op = 0; \
|
||||
__f->a = (aval); \
|
||||
__f->cond = (bcond); \
|
||||
__f->op2 = (oper); \
|
||||
__f->cc01 = (xcc); \
|
||||
__f->p = (predict); \
|
||||
__f->d19 = (disp19); \
|
||||
(ins) = (unsigned int*)__f + 1; \
|
||||
} while (0)
|
||||
|
||||
#define sparc_encode_format2d(ins,aval,bcond,oper,predict,r1,disp16) \
|
||||
do { \
|
||||
sparc_format2d *__f = (sparc_format2d*)(ins); \
|
||||
__f->op = 0; \
|
||||
__f->a = (aval); \
|
||||
__f->res = 0; \
|
||||
__f->rcond = (bcond); \
|
||||
__f->op2 = (oper); \
|
||||
__f->d16hi = ((disp16) >> 14); \
|
||||
__f->p = (predict); \
|
||||
__f->rs1 = (r1); \
|
||||
__f->d16lo = ((disp16) & 0x3fff); \
|
||||
(ins) = (unsigned int*)__f + 1; \
|
||||
} while (0)
|
||||
|
||||
#define sparc_encode_format3a(ins,opval,asival,r1,r2,oper,dest) \
|
||||
do { \
|
||||
sparc_format3a *__f = (sparc_format3a*)(ins); \
|
||||
__f->op = (opval); \
|
||||
__f->asi = (asival); \
|
||||
__f->i = 0; \
|
||||
__f->rd = (dest); \
|
||||
__f->rs1 = (r1); \
|
||||
__f->rs2 = (r2); \
|
||||
__f->op3 = (oper); \
|
||||
(ins) = (unsigned int*)__f + 1; \
|
||||
} while (0)
|
||||
|
||||
#define sparc_encode_format3ax(ins,opval,asival,r1,r2,oper,dest) \
|
||||
do { \
|
||||
sparc_format3ax *__f = (sparc_format3ax*)(ins); \
|
||||
__f->op = (opval); \
|
||||
__f->asi = (asival); \
|
||||
__f->i = 0; \
|
||||
__f->x = 1; \
|
||||
__f->rd = (dest); \
|
||||
__f->rs1 = (r1); \
|
||||
__f->rs2 = (r2); \
|
||||
__f->op3 = (oper); \
|
||||
(ins) = (unsigned int*)__f + 1; \
|
||||
} while (0)
|
||||
|
||||
#define sparc_encode_format3b(ins,opval,r1,val,oper,dest) \
|
||||
do { \
|
||||
sparc_format3b *__f = (sparc_format3b*)(ins); \
|
||||
__f->op = (opval); \
|
||||
__f->imm = (val); \
|
||||
__f->i = 1; \
|
||||
__f->rd = (dest); \
|
||||
__f->rs1 = (r1); \
|
||||
__f->op3 = (oper); \
|
||||
(ins) = (unsigned int*)__f + 1; \
|
||||
} while (0)
|
||||
|
||||
#define sparc_encode_format3bx(ins,opval,r1,val,oper,dest) \
|
||||
do { \
|
||||
sparc_format3bx *__f = (sparc_format3bx*)(ins); \
|
||||
__f->op = (opval); \
|
||||
__f->imm = (val); \
|
||||
__f->i = 1; \
|
||||
__f->x = 1; \
|
||||
__f->rd = (dest); \
|
||||
__f->rs1 = (r1); \
|
||||
__f->op3 = (oper); \
|
||||
(ins) = (unsigned int*)__f + 1; \
|
||||
} while (0)
|
||||
|
||||
#define sparc_encode_format3c(ins,opval,opfval,r1,oper,r2,dest) \
|
||||
do { \
|
||||
sparc_format3c *__f = (sparc_format3c*)(ins); \
|
||||
__f->op = (opval); \
|
||||
__f->opf = (opfval); \
|
||||
__f->rd = (dest); \
|
||||
__f->rs1 = (r1); \
|
||||
__f->rs2 = (r2); \
|
||||
__f->op3 = (oper); \
|
||||
(ins) = (unsigned int*)__f + 1; \
|
||||
} while (0)
|
||||
|
||||
#define sparc_encode_format4a(ins,opval,oper,cc,r1,r2,dest) \
|
||||
do { \
|
||||
sparc_format4a *__f = (sparc_format4a*)(ins); \
|
||||
__f->op = (opval); \
|
||||
__f->rd = (dest); \
|
||||
__f->op3 = (oper); \
|
||||
__f->rs1 = (r1); \
|
||||
__f->i = 0; \
|
||||
__f->cc01= (cc) & 0x3; \
|
||||
__f->res = 0; \
|
||||
__f->rs2 = (r2); \
|
||||
(ins) = (unsigned int*)__f + 1; \
|
||||
} while (0)
|
||||
|
||||
#define sparc_encode_format4b(ins,opval,oper,cc,r1,imm,dest) \
|
||||
do { \
|
||||
sparc_format4b *__f = (sparc_format4b*)(ins); \
|
||||
__f->op = (opval); \
|
||||
__f->rd = (dest); \
|
||||
__f->op3 = (oper); \
|
||||
__f->rs1 = (r1); \
|
||||
__f->i = 1; \
|
||||
__f->cc01= (cc) & 0x3; \
|
||||
__f->simm = (imm); \
|
||||
(ins) = (unsigned int*)__f + 1; \
|
||||
} while (0)
|
||||
|
||||
#define sparc_encode_format4c(ins,opval,oper,cc,bcond,r2,dest) \
|
||||
do { \
|
||||
sparc_format4c *__f = (sparc_format4c*)(ins); \
|
||||
__f->op = (opval); \
|
||||
__f->rd = (dest); \
|
||||
__f->op3 = (oper); \
|
||||
__f->cc2 = ((xcc) >> 2) & 0x1; \
|
||||
__f->cond = bcond; \
|
||||
__f->i = 0; \
|
||||
__f->cc01= (xcc) & 0x3; \
|
||||
__f->res = 0; \
|
||||
__f->rs2 = (r2); \
|
||||
(ins) = (unsigned int*)__f + 1; \
|
||||
} while (0)
|
||||
|
||||
#define sparc_encode_format4d(ins,opval,oper,xcc,bcond,imm,dest) \
|
||||
do { \
|
||||
sparc_format4d *__f = (sparc_format4d*)(ins); \
|
||||
__f->op = (opval); \
|
||||
__f->rd = (dest); \
|
||||
__f->op3 = (oper); \
|
||||
__f->cc2 = ((xcc) >> 2) & 0x1; \
|
||||
__f->cond = bcond; \
|
||||
__f->i = 1; \
|
||||
__f->cc01= (xcc) & 0x3; \
|
||||
__f->simm = (imm); \
|
||||
(ins) = (unsigned int*)__f + 1; \
|
||||
} while (0)
|
||||
|
||||
/* is it useful to provide a non-default value? */
|
||||
#define sparc_asi 0x0
|
||||
|
||||
/* load */
|
||||
#define sparc_ldsb(ins,base,disp,dest) sparc_encode_format3a((ins),3,sparc_asi,(base),(disp),9,(dest))
|
||||
#define sparc_ldsb_imm(ins,base,disp,dest) sparc_encode_format3b((ins),3,(base),(disp),9,(dest))
|
||||
|
||||
#define sparc_ldsh(ins,base,disp,dest) sparc_encode_format3a((ins),3,sparc_asi,(base),(disp),10,(dest))
|
||||
#define sparc_ldsh_imm(ins,base,disp,dest) sparc_encode_format3b((ins),3,(base),(disp),10,(dest))
|
||||
|
||||
#define sparc_ldub(ins,base,disp,dest) sparc_encode_format3a((ins),3,sparc_asi,(base),(disp),1,(dest))
|
||||
#define sparc_ldub_imm(ins,base,disp,dest) sparc_encode_format3b((ins),3,(base),(disp),1,(dest))
|
||||
|
||||
#define sparc_lduh(ins,base,disp,dest) sparc_encode_format3a((ins),3,sparc_asi,(base),(disp),2,(dest))
|
||||
#define sparc_lduh_imm(ins,base,disp,dest) sparc_encode_format3b((ins),3,(base),(disp),2,(dest))
|
||||
|
||||
#define sparc_ld(ins,base,disp,dest) sparc_encode_format3a((ins),3,sparc_asi,(base),(disp),0,(dest))
|
||||
#define sparc_ld_imm(ins,base,disp,dest) sparc_encode_format3b((ins),3,(base),(disp),0,(dest))
|
||||
|
||||
/* Sparc V9 */
|
||||
#define sparc_ldx(ins,base,disp,dest) sparc_encode_format3a((ins),3,sparc_asi,(base),(disp),11,(dest))
|
||||
#define sparc_ldx_imm(ins,base,disp,dest) sparc_encode_format3b((ins),3,(base),(disp),11,(dest))
|
||||
|
||||
#define sparc_ldsw(ins,base,disp,dest) sparc_encode_format3a((ins),3,sparc_asi,(base),(disp),8,(dest))
|
||||
#define sparc_ldsw_imm(ins,base,disp,dest) sparc_encode_format3b((ins),3,(base),(disp),8,(dest))
|
||||
|
||||
#define sparc_ldd(ins,base,disp,dest) sparc_encode_format3a((ins),3,sparc_asi,(base),(disp),3,(dest))
|
||||
#define sparc_ldd_imm(ins,base,disp,dest) sparc_encode_format3b((ins),3,(base),(disp),3,(dest))
|
||||
|
||||
#define sparc_ldf(ins,base,disp,dest) sparc_encode_format3a((ins),3,0,(base),(disp),32,(dest))
|
||||
#define sparc_ldf_imm(ins,base,disp,dest) sparc_encode_format3b((ins),3,(base),(disp),32,(dest))
|
||||
|
||||
#define sparc_lddf(ins,base,disp,dest) sparc_encode_format3a((ins),3,0,(base),(disp),35,(dest))
|
||||
#define sparc_lddf_imm(ins,base,disp,dest) sparc_encode_format3b((ins),3,(base),(disp),35,(dest))
|
||||
|
||||
/* store */
|
||||
#define sparc_stb(ins,src,base,disp) sparc_encode_format3a((ins),3,sparc_asi,(base),(disp),5,(src))
|
||||
#define sparc_stb_imm(ins,src,base,disp) sparc_encode_format3b((ins),3,(base),(disp),5,(src))
|
||||
|
||||
#define sparc_sth(ins,src,base,disp) sparc_encode_format3a((ins),3,sparc_asi,(base),(disp),6,(src))
|
||||
#define sparc_sth_imm(ins,src,base,disp) sparc_encode_format3b((ins),3,(base),(disp),6,(src))
|
||||
|
||||
#define sparc_st(ins,src,base,disp) sparc_encode_format3a((ins),3,sparc_asi,(base),(disp),4,(src))
|
||||
#define sparc_st_imm(ins,src,base,disp) sparc_encode_format3b((ins),3,(base),(disp),4,(src))
|
||||
|
||||
/* Sparc V9 */
|
||||
#define sparc_stx(ins,src,base,disp) sparc_encode_format3a((ins),3,sparc_asi,(base),(disp),14,(src))
|
||||
#define sparc_stx_imm(ins,src,base,disp) sparc_encode_format3b((ins),3,(base),(disp),14,(src))
|
||||
|
||||
#define sparc_std(ins,src,base,disp) sparc_encode_format3a((ins),3,sparc_asi,(base),(disp),7,(src))
|
||||
#define sparc_std_imm(ins,src,base,disp) sparc_encode_format3b((ins),3,(base),(disp),7,(src))
|
||||
|
||||
#define sparc_stf(ins,src,base,disp) sparc_encode_format3a((ins),3,sparc_asi,(base),(disp),36,(src))
|
||||
#define sparc_stf_imm(ins,src,base,disp) sparc_encode_format3b((ins),3,(base),(disp),36,(src))
|
||||
|
||||
#define sparc_stdf(ins,src,base,disp) sparc_encode_format3a((ins),3,sparc_asi,(base),(disp),39,(src))
|
||||
#define sparc_stdf_imm(ins,src,base,disp) sparc_encode_format3b((ins),3,(base),(disp),39,(src))
|
||||
|
||||
/* swap */
|
||||
#define sparc_ldstub(ins,base,disp,dest) sparc_encode_format3a((ins),3,sparc_asi,(base),(disp),13,(dest))
|
||||
#define sparc_ldstub_imm(ins,base,disp,dest) sparc_encode_format3b((ins),3,(base),(disp),13,(dest))
|
||||
|
||||
#define sparc_swap(ins,base,disp,dest) sparc_encode_format3a((ins),3,sparc_asi,(base),(disp),15,(dest))
|
||||
#define sparc_swap_imm(ins,base,disp,dest) sparc_encode_format3b((ins),3,(base),(disp),15,(dest))
|
||||
|
||||
/* misc */
|
||||
/* note: with sethi val is the full 32 bit value (think of it as %hi(val)) */
|
||||
#define sparc_sethi(ins,val,dest) sparc_encode_format2a((ins),((val)>>10),4,(dest))
|
||||
|
||||
#define sparc_nop(ins) sparc_sethi((ins),0,sparc_zero)
|
||||
|
||||
#define sparc_save(ins,src,disp,dest) sparc_encode_format3a((ins),2,0,(src),(disp),60,(dest))
|
||||
#define sparc_save_imm(ins,src,disp,dest) sparc_encode_format3b((ins),2,(src),(disp),60,(dest))
|
||||
|
||||
#define sparc_restore(ins,src,disp,dest) sparc_encode_format3a((ins),2,0,(src),(disp),61,(dest))
|
||||
#define sparc_restore_imm(ins,src,disp,dest) sparc_encode_format3b((ins),2,(src),(disp),61,(dest))
|
||||
|
||||
#define sparc_rett(ins,src,disp) sparc_encode_format3a((ins),2,0,(src),(disp),0x39,0)
|
||||
#define sparc_rett_imm(ins,src,disp) sparc_encode_format3b((ins),2,(src),(disp),0x39,0)
|
||||
|
||||
#define sparc_jmpl(ins,base,disp,dest) sparc_encode_format3a((ins),2,0,(base),(disp),56,(dest))
|
||||
#define sparc_jmpl_imm(ins,base,disp,dest) sparc_encode_format3b((ins),2,(base),(disp),56,(dest))
|
||||
|
||||
#define sparc_call_simple(ins,disp) sparc_encode_call((ins),((unsigned int)(disp)))
|
||||
|
||||
#define sparc_rdy(ins,dest) sparc_encode_format3a((ins),2,0,0,0,40,(dest))
|
||||
|
||||
#define sparc_wry(ins,base,disp) sparc_encode_format3a((ins),2,0,(base),(disp),48,0)
|
||||
#define sparc_wry_imm(ins,base,disp) sparc_encode_format3b((ins),2,(base),(disp),48,0)
|
||||
|
||||
/* stbar, unimp, flush */
|
||||
#define sparc_stbar(ins) sparc_encode_format3a((ins),2,0,15,0,40,0)
|
||||
#define sparc_unimp(ins,val) sparc_encode_format2b((ins),0,0,0,(val))
|
||||
|
||||
#define sparc_flush(ins,base,disp) sparc_encode_format3a((ins),2,0,(base),(disp),59,0)
|
||||
#define sparc_flush_imm(ins,base,disp) sparc_encode_format3b((ins),2,(base),(disp),59,0)
|
||||
|
||||
#define sparc_flushw(ins) sparc_encode_format3a((ins),2,0,0,0,43,0)
|
||||
|
||||
#define sparc_membar(ins,flags) sparc_encode_format3b ((ins), 2, 0xf, (flags), 0x28, 0)
|
||||
|
||||
/* trap */
|
||||
|
||||
#define sparc_ta(ins,tt) sparc_encode_format3b((ins),2,0,(tt),58,0x8)
|
||||
|
||||
/* alu fop */
|
||||
/* provide wrappers for: fitos, fitod, fstoi, fdtoi, fstod, fdtos, fmov, fneg, fabs */
|
||||
|
||||
#define sparc_fop(ins,r1,op,r2,dest) sparc_encode_format3c((ins),2,(op),(r1),52,(r2),(dest))
|
||||
#define sparc_fcmp(ins,r1,op,r2) sparc_encode_format3c((ins),2,(op),(r1),53,(r2),0)
|
||||
|
||||
/* format 1 fops */
|
||||
#define sparc_fadds(ins, r1, r2, dest) sparc_fop( ins, r1, sparc_fadds_val, r2, dest )
|
||||
#define sparc_faddd(ins, r1, r2, dest) sparc_fop( ins, r1, sparc_faddd_val, r2, dest )
|
||||
#define sparc_faddq(ins, r1, r2, dest) sparc_fop( ins, r1, sparc_faddq_val, r2, dest )
|
||||
|
||||
#define sparc_fsubs(ins, r1, r2, dest) sparc_fop( ins, r1, sparc_fsubs_val, r2, dest )
|
||||
#define sparc_fsubd(ins, r1, r2, dest) sparc_fop( ins, r1, sparc_fsubd_val, r2, dest )
|
||||
#define sparc_fsubq(ins, r1, r2, dest) sparc_fop( ins, r1, sparc_fsubq_val, r2, dest )
|
||||
|
||||
#define sparc_fmuls( ins, r1, r2, dest ) sparc_fop( ins, r1, sparc_fmuls_val, r2, dest )
|
||||
#define sparc_fmuld( ins, r1, r2, dest ) sparc_fop( ins, r1, sparc_fmuld_val, r2, dest )
|
||||
#define sparc_fmulq( ins, r1, r2, dest ) sparc_fop( ins, r1, sparc_fmulq_val, r2, dest )
|
||||
|
||||
#define sparc_fsmuld( ins, r1, r2, dest ) sparc_fop( ins, r1, sparc_fsmuld_val, r2, dest )
|
||||
#define sparc_fdmulq( ins, r1, r2, dest ) sparc_fop( ins, r1, sparc_fdmulq_val, r2, dest )
|
||||
|
||||
#define sparc_fdivs( ins, r1, r2, dest ) sparc_fop( ins, r1, sparc_fdivs_val, r2, dest )
|
||||
#define sparc_fdivd( ins, r1, r2, dest ) sparc_fop( ins, r1, sparc_fdivd_val, r2, dest )
|
||||
#define sparc_fdivq( ins, r1, r2, dest ) sparc_fop( ins, r1, sparc_fdivq_val, r2, dest )
|
||||
|
||||
#define sparc_fitos( ins, r2, dest ) sparc_fop( ins, 0, sparc_fitos_val, r2, dest )
|
||||
#define sparc_fitod( ins, r2, dest ) sparc_fop( ins, 0, sparc_fitod_val, r2, dest )
|
||||
#define sparc_fitoq( ins, r2, dest ) sparc_fop( ins, 0, sparc_fitoq_val, r2, dest )
|
||||
|
||||
#define sparc_fxtos( ins, r2, dest) sparc_fop( ins, 0, sparc_fxtos_val, r2, dest )
|
||||
#define sparc_fxtod( ins, r2, dest) sparc_fop( ins, 0, sparc_fxtod_val, r2, dest )
|
||||
#define sparc_fxtoq( ins, r2, dest) sparc_fop( ins, 0, sparc_fxtoq_val, r2, dest )
|
||||
|
||||
#define sparc_fstoi( ins, r2, dest ) sparc_fop( ins, 0, sparc_fstoi_val, r2, dest )
|
||||
#define sparc_fdtoi( ins, r2, dest ) sparc_fop( ins, 0, sparc_fdtoi_val, r2, dest )
|
||||
#define sparc_fqtoi( ins, r2, dest ) sparc_fop( ins, 0, sparc_fqtoi_val, r2, dest )
|
||||
|
||||
#define sparc_fstod( ins, r2, dest ) sparc_fop( ins, 0, sparc_fstod_val, r2, dest )
|
||||
#define sparc_fstoq( ins, r2, dest ) sparc_fop( ins, 0, sparc_fstoq_val, r2, dest )
|
||||
|
||||
#define sparc_fdtos( ins, r2, dest ) sparc_fop( ins, 0, sparc_fdtos_val, r2, dest )
|
||||
#define sparc_fdtoq( ins, r2, dest ) sparc_fop( ins, 0, sparc_fdtoq_val, r2, dest )
|
||||
|
||||
#define sparc_fqtos( ins, r2, dest ) sparc_fop( ins, 0, sparc_fqtos_val, r2, dest )
|
||||
#define sparc_fqtod( ins, r2, dest ) sparc_fop( ins, 0, sparc_fqtod_val, r2, dest )
|
||||
|
||||
#define sparc_fmovs( ins, r2, dest ) sparc_fop( ins, 0, sparc_fmovs_val, r2, dest )
|
||||
#define sparc_fnegs( ins, r2, dest ) sparc_fop( ins, 0, sparc_fnegs_val, r2, dest )
|
||||
#define sparc_fabss( ins, r2, dest ) sparc_fop( ins, 0, sparc_fabss_val, r2, dest )
|
||||
|
||||
#define sparc_fmovd( ins, r2, dest) sparc_fop (ins, 0, sparc_fmovd_val, r2, dest);
|
||||
#define sparc_fnegd( ins, r2, dest) sparc_fop (ins, 0, sparc_fnegd_val, r2, dest);
|
||||
#define sparc_fabsd( ins, r2, dest) sparc_fop (ins, 0, sparc_fabsd_val, r2, dest);
|
||||
|
||||
#define sparc_fsqrts( ins, r2, dest ) sparc_fop( ins, 0, sparc_fsqrts_val, r2, dest )
|
||||
#define sparc_fsqrtd( ins, r2, dest ) sparc_fop( ins, 0, sparc_fsqrtd_val, r2, dest )
|
||||
#define sparc_fsqrtq( ins, r2, dest ) sparc_fop( ins, 0, sparc_fsqrtq_val, r2, dest )
|
||||
|
||||
/* format 2 fops */
|
||||
|
||||
#define sparc_fcmps( ins, r1, r2 ) sparc_fcmp( ins, r1, sparc_fcmps_val, r2 )
|
||||
#define sparc_fcmpd( ins, r1, r2 ) sparc_fcmp( ins, r1, sparc_fcmpd_val, r2 )
|
||||
#define sparc_fcmpq( ins, r1, r2 ) sparc_fcmp( ins, r1, sparc_fcmpq_val, r2 )
|
||||
#define sparc_fcmpes( ins, r1, r2 ) sparc_fcmpes( ins, r1, sparc_fcmpes_val, r2 )
|
||||
#define sparc_fcmped( ins, r1, r2 ) sparc_fcmped( ins, r1, sparc_fcmped_val, r2 )
|
||||
#define sparc_fcmpeq( ins, r1, r2 ) sparc_fcmpeq( ins, r1, sparc_fcmpeq_val, r2 )
|
||||
|
||||
/* logical */
|
||||
|
||||
/* FIXME: condense this using macros */
|
||||
/* FIXME: the setcc stuff is wrong in lots of places */
|
||||
|
||||
#define sparc_logic(ins,op,setcc,r1,r2,dest) sparc_encode_format3a((ins),2,0,(r1),(r2),((setcc) ? 0x10 : 0) | (op), (dest))
|
||||
#define sparc_logic_imm(ins,op,setcc,r1,imm,dest) sparc_encode_format3b((ins),2,(r1),(imm),((setcc) ? 0x10 : 0) | (op), (dest))
|
||||
|
||||
#define sparc_and(ins,setcc,r1,r2,dest) sparc_logic(ins,1,setcc,r1,r2,dest)
|
||||
#define sparc_and_imm(ins,setcc,r1,imm,dest) sparc_logic_imm(ins,1,setcc,r1,imm,dest)
|
||||
|
||||
#define sparc_andn(ins,setcc,r1,r2,dest) sparc_encode_format3a((ins),2,0,(r1),(r2),(setcc)|5,(dest))
|
||||
#define sparc_andn_imm(ins,setcc,r1,imm,dest) sparc_encode_format3b((ins),2,(r1),(imm),(setcc)|5,(dest))
|
||||
|
||||
#define sparc_or(ins,setcc,r1,r2,dest) sparc_encode_format3a((ins),2,0,(r1),(r2),(setcc)|2,(dest))
|
||||
#define sparc_or_imm(ins,setcc,r1,imm,dest) sparc_encode_format3b((ins),2,(r1),(imm),(setcc)|2,(dest))
|
||||
|
||||
#define sparc_orn(ins,setcc,r1,r2,dest) sparc_encode_format3a((ins),2,0,(r1),(r2),(setcc)|6,(dest))
|
||||
#define sparc_orn_imm(ins,setcc,r1,imm,dest) sparc_encode_format3b((ins),2,(r1),(imm),(setcc)|6,(dest))
|
||||
|
||||
#define sparc_xor(ins,setcc,r1,r2,dest) sparc_encode_format3a((ins),2,0,(r1),(r2),(setcc)|3,(dest))
|
||||
#define sparc_xor_imm(ins,setcc,r1,imm,dest) sparc_encode_format3b((ins),2,(r1),(imm), (setcc)|3,(dest))
|
||||
|
||||
#define sparc_xnor(ins,setcc,r1,r2,dest) sparc_encode_format3a((ins),2,0,(r1),(r2),(setcc)|7,(dest))
|
||||
#define sparc_xnor_imm(ins,setcc,r1,imm,dest) sparc_encode_format3b((ins),2,(r1),(imm),(setcc)|7,(dest))
|
||||
|
||||
/* shift */
|
||||
#define sparc_sll(ins,src,disp,dest) sparc_encode_format3a((ins),2,0,(src),(disp),37,(dest))
|
||||
#define sparc_sll_imm(ins,src,disp,dest) sparc_encode_format3b((ins),2,(src),(disp),37,(dest))
|
||||
|
||||
/* Sparc V9 */
|
||||
#define sparc_sllx(ins,src,disp,dest) sparc_encode_format3ax((ins),2,0,(src),(disp),37,(dest))
|
||||
#define sparc_sllx_imm(ins,src,disp,dest) sparc_encode_format3bx((ins),2,(src),(disp),37,(dest))
|
||||
|
||||
#define sparc_srl(ins,src,disp,dest) sparc_encode_format3a((ins),2,0,(src),(disp),38,(dest))
|
||||
#define sparc_srl_imm(ins,src,disp,dest) sparc_encode_format3b((ins),2,(src),(disp),38,(dest))
|
||||
|
||||
/* Sparc V9 */
|
||||
#define sparc_srlx(ins,src,disp,dest) sparc_encode_format3ax((ins),2,0,(src),(disp),38,(dest))
|
||||
#define sparc_srlx_imm(ins,src,disp,dest) sparc_encode_format3bx((ins),2,(src),(disp),38,(dest))
|
||||
|
||||
#define sparc_sra(ins,src,disp,dest) sparc_encode_format3a((ins),2,0,(src),(disp),39,(dest))
|
||||
#define sparc_sra_imm(ins,src,disp,dest) sparc_encode_format3b((ins),2,(src),(disp),39,(dest))
|
||||
|
||||
/* Sparc V9 */
|
||||
#define sparc_srax(ins,src,disp,dest) sparc_encode_format3ax((ins),2,0,(src),(disp),39,(dest))
|
||||
#define sparc_srax_imm(ins,src,disp,dest) sparc_encode_format3bx((ins),2,(src),(disp),39,(dest))
|
||||
|
||||
/* alu */
|
||||
|
||||
#define sparc_alu_reg(ins,op,setcc,r1,r2,dest) sparc_encode_format3a((ins),2,0,(r1),(r2),op|((setcc) ? 0x10 : 0),(dest))
|
||||
#define sparc_alu_imm(ins,op,setcc,r1,imm,dest) sparc_encode_format3b((ins),2,(r1),(imm),op|((setcc) ? 0x10 : 0),(dest))
|
||||
|
||||
#define sparc_add(ins,setcc,r1,r2,dest) sparc_alu_reg((ins),0,(setcc),(r1),(r2),(dest))
|
||||
#define sparc_add_imm(ins,setcc,r1,imm,dest) sparc_alu_imm((ins),0,(setcc),(r1),(imm),(dest))
|
||||
|
||||
#define sparc_addx(ins,setcc,r1,r2,dest) sparc_alu_reg((ins),0x8,(setcc),(r1),(r2),(dest))
|
||||
#define sparc_addx_imm(ins,setcc,r1,imm,dest) sparc_alu_imm((ins),0x8,(setcc),(r1),(imm),(dest))
|
||||
|
||||
#define sparc_sub(ins,setcc,r1,r2,dest) sparc_alu_reg((ins),0x4,(setcc),(r1),(r2),(dest))
|
||||
#define sparc_sub_imm(ins,setcc,r1,imm,dest) sparc_alu_imm((ins),0x4,(setcc),(r1),(imm),(dest))
|
||||
|
||||
#define sparc_subx(ins,setcc,r1,r2,dest) sparc_alu_reg((ins),0xc,(setcc),(r1),(r2),(dest))
|
||||
#define sparc_subx_imm(ins,setcc,r1,imm,dest) sparc_alu_imm((ins),0xc,(setcc),(r1),(imm),(dest))
|
||||
|
||||
#define sparc_muls(ins,r1,r2,dest) sparc_encode_format3a((ins),2,0,(r1),(r2),36,(dest))
|
||||
#define sparc_muls_imm(ins,r1,imm,dest) sparc_encode_format3b((ins),2,(r1),(imm),36,(dest))
|
||||
|
||||
#define sparc_umul(ins,setcc,r1,r2,dest) sparc_alu_reg((ins),0xa,(setcc),(r1),(r2),(dest))
|
||||
#define sparc_umul_imm(ins,setcc,r1,imm,dest) sparc_alu_imm((ins),0xa,(setcc),(r1),(imm),(dest))
|
||||
|
||||
#define sparc_smul(ins,setcc,r1,r2,dest) sparc_alu_reg((ins),0xb,(setcc),(r1),(r2),(dest))
|
||||
#define sparc_smul_imm(ins,setcc,r1,imm,dest) sparc_alu_imm((ins),0xb,(setcc),(r1),(imm),(dest))
|
||||
|
||||
#define sparc_udiv(ins,setcc,r1,r2,dest) sparc_alu_reg((ins),0xe,(setcc),(r1),(r2),(dest))
|
||||
#define sparc_udiv_imm(ins,setcc,r1,imm,dest) sparc_alu_imm((ins),0xe,(setcc),(r1),(imm),(dest))
|
||||
|
||||
#define sparc_sdiv(ins,setcc,r1,r2,dest) sparc_alu_reg((ins),0xf,(setcc),(r1),(r2),(dest))
|
||||
#define sparc_sdiv_imm(ins,setcc,r1,imm,dest) sparc_alu_imm((ins),0xf,(setcc),(r1),(imm),(dest))
|
||||
|
||||
|
||||
/* branch */
|
||||
#define sparc_branch(ins,aval,condval,displ) sparc_encode_format2b((ins),(aval),(condval),2,(displ))
|
||||
/* FIXME: float condition codes are different: unify. */
|
||||
#define sparc_fbranch(ins,aval,condval,displ) sparc_encode_format2b((ins),(aval),(condval),6,(displ))
|
||||
#define sparc_branchp(ins,aval,condval,xcc,predict,displ) sparc_encode_format2c((ins),(aval),(condval),0x1,(xcc),(predict),(displ))
|
||||
|
||||
#define sparc_brz(ins,aval,predict,rs1,disp) sparc_encode_format2d((ins), (aval),0x1,0x3,(predict),(rs1),(disp))
|
||||
#define sparc_brlez(ins,aval,predict,rs1,disp) sparc_encode_format2d((ins), (aval),0x2,0x3,(predict),(rs1),(disp))
|
||||
#define sparc_brlz(ins,aval,predict,rs1,disp) sparc_encode_format2d((ins), (aval),0x3,0x3,(predict),(rs1),(disp))
|
||||
#define sparc_brnz(ins,aval,predict,rs1,disp) sparc_encode_format2d((ins), (aval),0x5,0x3,(predict),(rs1),(disp))
|
||||
#define sparc_brgz(ins,aval,predict,rs1,disp) sparc_encode_format2d((ins), (aval),0x6,0x3,(predict),(rs1),(disp))
|
||||
#define sparc_brgez(ins,aval,predict,rs1,disp) sparc_encode_format2d((ins), (aval),0x7,0x3,(predict),(rs1),(disp))
|
||||
|
||||
/* conditional moves */
|
||||
#define sparc_movcc(ins,cc,condval,r1,dest) sparc_encode_format4c((ins), 0x2, 0x2c, cc, condval, r1, dest)
|
||||
|
||||
#define sparc_movcc_imm(ins,cc,condval,imm,dest) sparc_encode_format4d((ins), 0x2, 0x2c, cc, condval, imm, dest)
|
||||
|
||||
/* synthetic instructions */
|
||||
#define sparc_cmp(ins,r1,r2) sparc_sub((ins),sparc_cc,(r1),(r2),sparc_g0)
|
||||
#define sparc_cmp_imm(ins,r1,imm) sparc_sub_imm((ins),sparc_cc,(r1),(imm),sparc_g0)
|
||||
#define sparc_jmp(ins,base,disp) sparc_jmpl((ins),(base),(disp),sparc_g0)
|
||||
#define sparc_jmp_imm(ins,base,disp) sparc_jmpl_imm((ins),(base),(disp),sparc_g0)
|
||||
#define sparc_call(ins,base,disp) sparc_jmpl((ins),(base),(disp),sparc_o7)
|
||||
#define sparc_call_imm(ins,base,disp) sparc_jmpl_imm((ins),(base),(disp),sparc_o7)
|
||||
|
||||
#define sparc_test(ins,reg) sparc_or ((ins),sparc_cc,sparc_g0,(reg),sparc_g0)
|
||||
|
||||
#define sparc_ret(ins) sparc_jmpl_imm((ins),sparc_i7,8,sparc_g0)
|
||||
#define sparc_retl(ins) sparc_jmpl_imm((ins),sparc_o7,8,sparc_g0)
|
||||
#define sparc_restore_simple(ins) sparc_restore((ins),sparc_g0,sparc_g0,sparc_g0)
|
||||
#define sparc_rett_simple(ins) sparc_rett_imm((ins),sparc_i7,8)
|
||||
|
||||
#define sparc_set32(ins,val,reg) \
|
||||
do { \
|
||||
if ((val) == 0) \
|
||||
sparc_clr_reg((ins),(reg)); \
|
||||
else if (((guint32)(val) & 0x3ff) == 0) \
|
||||
sparc_sethi((ins),(guint32)(val),(reg)); \
|
||||
else if (((gint32)(val) >= -4096) && ((gint32)(val) <= 4095)) \
|
||||
sparc_or_imm((ins),FALSE,sparc_g0,(gint32)(val),(reg)); \
|
||||
else { \
|
||||
sparc_sethi((ins),(guint32)(val),(reg)); \
|
||||
sparc_or_imm((ins),FALSE,(reg),(guint32)(val)&0x3ff,(reg)); \
|
||||
} \
|
||||
} while (0)
|
||||
|
||||
#ifdef SPARCV9
|
||||
#define SPARC_SET_MAX_SIZE (6 * 4)
|
||||
#else
|
||||
#define SPARC_SET_MAX_SIZE (2 * 4)
|
||||
#endif
|
||||
|
||||
#if SPARCV9
|
||||
#define sparc_set(ins,ptr,reg) \
|
||||
do { \
|
||||
g_assert ((reg) != sparc_g1); \
|
||||
gint64 val = (gint64)ptr; \
|
||||
guint32 top_word = (val) >> 32; \
|
||||
guint32 bottom_word = (val) & 0xffffffff; \
|
||||
if (val == 0) \
|
||||
sparc_clr_reg ((ins), reg); \
|
||||
else if ((val >= -4096) && ((val) <= 4095)) \
|
||||
sparc_or_imm((ins),FALSE,sparc_g0,bottom_word,(reg)); \
|
||||
else if ((val >= 0) && (val <= 4294967295L)) { \
|
||||
sparc_sethi((ins),bottom_word,(reg)); \
|
||||
if (bottom_word & 0x3ff) \
|
||||
sparc_or_imm((ins),FALSE,(reg),bottom_word&0x3ff,(reg)); \
|
||||
} \
|
||||
else if ((val >= 0) && (val <= (1L << 44) - 1)) { \
|
||||
sparc_sethi ((ins), (val >> 12), (reg)); \
|
||||
sparc_or_imm ((ins), FALSE, (reg), (val >> 12) & 0x3ff, (reg)); \
|
||||
sparc_sllx_imm ((ins),(reg), 12, (reg)); \
|
||||
sparc_or_imm ((ins), FALSE, (reg), (val) & 0xfff, (reg)); \
|
||||
} \
|
||||
else if (top_word == 0xffffffff) { \
|
||||
sparc_xnor ((ins), FALSE, sparc_g0, sparc_g0, sparc_g1); \
|
||||
sparc_sethi((ins),bottom_word,(reg)); \
|
||||
sparc_sllx_imm((ins),sparc_g1,32,sparc_g1); \
|
||||
sparc_or_imm((ins),FALSE,(reg),bottom_word&0x3ff,(reg)); \
|
||||
sparc_or((ins),FALSE,(reg),sparc_g1,(reg)); \
|
||||
} \
|
||||
else { \
|
||||
sparc_sethi((ins),top_word,sparc_g1); \
|
||||
sparc_sethi((ins),bottom_word,(reg)); \
|
||||
sparc_or_imm((ins),FALSE,sparc_g1,top_word&0x3ff,sparc_g1); \
|
||||
sparc_or_imm((ins),FALSE,(reg),bottom_word&0x3ff,(reg)); \
|
||||
sparc_sllx_imm((ins),sparc_g1,32,sparc_g1); \
|
||||
sparc_or((ins),FALSE,(reg),sparc_g1,(reg)); \
|
||||
} \
|
||||
} while (0)
|
||||
#else
|
||||
#define sparc_set(ins,val,reg) \
|
||||
do { \
|
||||
if ((val) == 0) \
|
||||
sparc_clr_reg((ins),(reg)); \
|
||||
else if (((guint32)(val) & 0x3ff) == 0) \
|
||||
sparc_sethi((ins),(guint32)(val),(reg)); \
|
||||
else if (((gint32)(val) >= -4096) && ((gint32)(val) <= 4095)) \
|
||||
sparc_or_imm((ins),FALSE,sparc_g0,(gint32)(val),(reg)); \
|
||||
else { \
|
||||
sparc_sethi((ins),(guint32)(val),(reg)); \
|
||||
sparc_or_imm((ins),FALSE,(reg),(guint32)(val)&0x3ff,(reg)); \
|
||||
} \
|
||||
} while (0)
|
||||
#endif
|
||||
|
||||
#define sparc_set_ptr(ins,val,reg) sparc_set(ins,val,reg)
|
||||
|
||||
#ifdef SPARCV9
|
||||
#define sparc_set_template(ins,reg) sparc_set (ins,0x7fffffff7fffffff, reg)
|
||||
#else
|
||||
#define sparc_set_template(ins,reg) sparc_set (ins,0x7fffffff, reg)
|
||||
#endif
|
||||
|
||||
#define sparc_not(ins,reg) sparc_xnor((ins),FALSE,(reg),sparc_g0,(reg))
|
||||
#define sparc_neg(ins,reg) sparc_sub((ins),FALSE,sparc_g0,(reg),(reg))
|
||||
#define sparc_clr_reg(ins,reg) sparc_or((ins),FALSE,sparc_g0,sparc_g0,(reg))
|
||||
|
||||
#define sparc_mov_reg_reg(ins,src,dest) sparc_or((ins),FALSE,sparc_g0,(src),(dest))
|
||||
|
||||
#ifdef SPARCV9
|
||||
#define sparc_sti_imm sparc_stx_imm
|
||||
#define sparc_ldi_imm sparc_ldx_imm
|
||||
#define sparc_sti sparc_stx
|
||||
#define sparc_ldi sparc_ldx
|
||||
#else
|
||||
#define sparc_sti_imm sparc_st_imm
|
||||
#define sparc_ldi_imm sparc_ld_imm
|
||||
#define sparc_sti sparc_st
|
||||
#define sparc_ldi sparc_ld
|
||||
#endif
|
||||
|
||||
#endif /* __SPARC_CODEGEN_H__ */
|
||||
|
@ -0,0 +1,123 @@
|
||||
#include <glib.h>
|
||||
#include "sparc-codegen.h"
|
||||
|
||||
/* don't run the resulting program, it will destroy your computer,
|
||||
* just objdump -d it to inspect we generated the correct assembler.
|
||||
*/
|
||||
|
||||
int
|
||||
main ()
|
||||
{
|
||||
guint32 *p;
|
||||
guint32 code_buffer [500];
|
||||
guint32 local_size = 0, stack_size = 0, code_size = 6;
|
||||
guint32 arg_pos, simpletype;
|
||||
unsigned char *ins;
|
||||
int i, stringp, cur_out_reg, size;
|
||||
|
||||
p = code_buffer;
|
||||
|
||||
printf (".text\n.align 4\n.globl main\n.type main,@function\nmain:\n");
|
||||
|
||||
/*
|
||||
* Standard function prolog.
|
||||
*/
|
||||
sparc_save_imm (p, sparc_sp, -112-stack_size, sparc_sp);
|
||||
cur_out_reg = sparc_o0;
|
||||
arg_pos = 0;
|
||||
|
||||
if (1) {
|
||||
sparc_mov_reg_reg (p, sparc_i2, cur_out_reg);
|
||||
++cur_out_reg;
|
||||
}
|
||||
|
||||
sparc_ld_imm (p, sparc_i3, arg_pos, cur_out_reg);
|
||||
++cur_out_reg;
|
||||
sparc_ld_imm (p, sparc_i3, arg_pos+4, cur_out_reg);
|
||||
++cur_out_reg;
|
||||
/*
|
||||
* Insert call to function
|
||||
*/
|
||||
sparc_jmpl (p, sparc_i0, 0, sparc_callsite);
|
||||
sparc_nop (p);
|
||||
|
||||
sparc_jmpl_imm (p, sparc_i7, 8, sparc_zero);
|
||||
sparc_restore (p, sparc_zero, sparc_zero, sparc_zero);
|
||||
|
||||
sparc_ldsb (p, sparc_i3, sparc_l0, sparc_o5);
|
||||
sparc_ldsb_imm (p, sparc_i3, 2, sparc_o5);
|
||||
|
||||
sparc_ldsh (p, sparc_i3, sparc_l0, sparc_o5);
|
||||
sparc_ldsh_imm (p, sparc_i3, 2, sparc_o5);
|
||||
|
||||
sparc_ldub (p, sparc_i3, sparc_l0, sparc_o5);
|
||||
sparc_ldub_imm (p, sparc_i3, 2, sparc_o5);
|
||||
|
||||
sparc_lduh (p, sparc_i3, sparc_l0, sparc_o5);
|
||||
sparc_lduh_imm (p, sparc_i3, 2, sparc_o5);
|
||||
|
||||
sparc_ldf (p, sparc_i3, sparc_l0, sparc_o5);
|
||||
sparc_ldf_imm (p, sparc_i3, 2, sparc_o5);
|
||||
|
||||
sparc_stb (p, sparc_i3, sparc_l0, sparc_l2);
|
||||
sparc_stb_imm (p, sparc_i3, sparc_o5, 2);
|
||||
|
||||
sparc_sethi (p, 0xff000000, sparc_o2);
|
||||
sparc_rdy (p, sparc_l0);
|
||||
sparc_wry (p, sparc_l0, sparc_l1);
|
||||
sparc_wry_imm (p, sparc_l0, 16);
|
||||
sparc_stbar (p);
|
||||
sparc_unimp (p, 24);
|
||||
sparc_flush (p, sparc_l4, 0);
|
||||
|
||||
sparc_and (p, sparc_cc, sparc_l0, sparc_l1, sparc_o1);
|
||||
sparc_and_imm (p, FALSE, sparc_l0, 0xff, sparc_o1);
|
||||
sparc_andn (p, sparc_cc, sparc_l0, sparc_l1, sparc_o1);
|
||||
sparc_or (p, sparc_cc, sparc_l0, sparc_l1, sparc_o1);
|
||||
sparc_orn (p, sparc_cc, sparc_l0, sparc_l1, sparc_o1);
|
||||
sparc_xor (p, sparc_cc, sparc_l0, sparc_l1, sparc_o1);
|
||||
sparc_xnor (p, sparc_cc, sparc_l0, sparc_l1, sparc_o1);
|
||||
|
||||
sparc_sll (p, sparc_l0, sparc_l1, sparc_o1);
|
||||
sparc_sll_imm (p, sparc_l0, 2, sparc_o1);
|
||||
sparc_srl (p, sparc_l0, sparc_l1, sparc_o1);
|
||||
sparc_srl_imm (p, sparc_l0, 2, sparc_o1);
|
||||
sparc_sra (p, sparc_l0, sparc_l1, sparc_o1);
|
||||
sparc_sra_imm (p, sparc_l0, 2, sparc_o1);
|
||||
|
||||
sparc_add (p, sparc_cc, sparc_l0, sparc_l1, sparc_o1);
|
||||
sparc_add_imm (p, FALSE, sparc_l0, 0xff, sparc_o1);
|
||||
sparc_addx (p, sparc_cc, sparc_l0, sparc_l1, sparc_o1);
|
||||
sparc_sub (p, sparc_cc, sparc_l0, sparc_l1, sparc_o1);
|
||||
sparc_subx (p, sparc_cc, sparc_l0, sparc_l1, sparc_o1);
|
||||
|
||||
sparc_muls (p, sparc_l0, sparc_l1, sparc_o1);
|
||||
sparc_umul (p, sparc_cc, sparc_l0, sparc_l1, sparc_o1);
|
||||
sparc_smul (p, sparc_cc, sparc_l0, sparc_l1, sparc_o1);
|
||||
sparc_udiv (p, sparc_cc, sparc_l0, sparc_l1, sparc_o1);
|
||||
sparc_sdiv (p, sparc_cc, sparc_l0, sparc_l1, sparc_o1);
|
||||
|
||||
sparc_branch (p, FALSE, sparc_bne, -12);
|
||||
sparc_ret (p);
|
||||
sparc_retl (p);
|
||||
sparc_test (p, sparc_l4);
|
||||
sparc_cmp (p, sparc_l4, sparc_l6);
|
||||
sparc_cmp_imm (p, sparc_l4, 4);
|
||||
sparc_restore_simple (p);
|
||||
|
||||
sparc_set (p, 0xff000000, sparc_l7);
|
||||
sparc_set (p, 1, sparc_l7);
|
||||
sparc_set (p, 0xff0000ff, sparc_l7);
|
||||
|
||||
sparc_not (p, sparc_g2);
|
||||
sparc_neg (p, sparc_g3);
|
||||
sparc_clr_reg (p, sparc_g4);
|
||||
|
||||
|
||||
size = (p-code_buffer)*4;
|
||||
ins = (gchar*)code_buffer;
|
||||
for (i = 0; i < size; ++i)
|
||||
printf (".byte %d\n", (unsigned int) ins [i]);
|
||||
return 0;
|
||||
}
|
||||
|
File diff suppressed because it is too large
Load Diff
@ -0,0 +1,4 @@
|
||||
/Makefile.in
|
||||
/Makefile
|
||||
/.deps
|
||||
/.libs
|
@ -0,0 +1,2 @@
|
||||
EXTRA_DIST = x64-codegen.h
|
||||
|
File diff suppressed because it is too large
Load Diff
@ -0,0 +1,6 @@
|
||||
/Makefile
|
||||
/Makefile.in
|
||||
/.libs
|
||||
/.deps
|
||||
/*.la
|
||||
/*.lo
|
@ -0,0 +1 @@
|
||||
EXTRA_DIST = x86-codegen.h
|
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
@ -1,398 +1,539 @@
|
||||
/*
|
||||
|
||||
This file is part of FFTS -- The Fastest Fourier Transform in the South
|
||||
|
||||
Copyright (c) 2012, Anthony M. Blake <amb@anthonix.com>
|
||||
Copyright (c) 2012, The University of Waikato
|
||||
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
* Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
* Neither the name of the organization nor the
|
||||
names of its contributors may be used to endorse or promote products
|
||||
derived from this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
||||
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
DISCLAIMED. IN NO EVENT SHALL ANTHONY M. BLAKE BE LIABLE FOR ANY
|
||||
DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
|
||||
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
||||
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
|
||||
ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
This file is part of FFTS -- The Fastest Fourier Transform in the South
|
||||
|
||||
Copyright (c) 2012, Anthony M. Blake <amb@anthonix.com>
|
||||
Copyright (c) 2012, The University of Waikato
|
||||
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
* Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
* Neither the name of the organization nor the
|
||||
names of its contributors may be used to endorse or promote products
|
||||
derived from this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
||||
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
DISCLAIMED. IN NO EVENT SHALL ANTHONY M. BLAKE BE LIABLE FOR ANY
|
||||
DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
|
||||
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
||||
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
|
||||
ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
*/
|
||||
|
||||
#include "ffts.h"
|
||||
|
||||
#include "ffts_internal.h"
|
||||
#include "ffts_static.h"
|
||||
#include "ffts_trig.h"
|
||||
#include "macros.h"
|
||||
//#include "mini_macros.h"
|
||||
#include "patterns.h"
|
||||
#include "ffts_small.h"
|
||||
|
||||
#ifdef DYNAMIC_DISABLED
|
||||
#include "ffts_static.h"
|
||||
#ifndef DYNAMIC_DISABLED
|
||||
#include "codegen.h"
|
||||
#endif
|
||||
|
||||
#if _WIN32
|
||||
#include <windows.h>
|
||||
#else
|
||||
#if __APPLE__
|
||||
#include <libkern/OSCacheControl.h>
|
||||
#endif
|
||||
|
||||
#if HAVE_SYS_MMAN_H
|
||||
#include <sys/mman.h>
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#if defined(HAVE_NEON)
|
||||
static const FFTS_ALIGN(64) float w_data[16] = {
|
||||
0.70710678118654757273731092936941f,
|
||||
0.70710678118654746171500846685376f,
|
||||
-0.70710678118654757273731092936941f,
|
||||
-0.70710678118654746171500846685376f,
|
||||
1.0f,
|
||||
0.70710678118654757273731092936941f,
|
||||
-0.0f,
|
||||
-0.70710678118654746171500846685376f,
|
||||
0.70710678118654757273731092936941f,
|
||||
0.70710678118654746171500846685376f,
|
||||
0.70710678118654757273731092936941f,
|
||||
0.70710678118654746171500846685376f,
|
||||
1.0f,
|
||||
0.70710678118654757273731092936941f,
|
||||
0.0f,
|
||||
0.70710678118654746171500846685376f
|
||||
};
|
||||
#endif
|
||||
|
||||
static FFTS_INLINE int ffts_allow_execute(void *start, size_t len)
|
||||
{
|
||||
int result;
|
||||
|
||||
#ifdef _WIN32
|
||||
DWORD old_protect;
|
||||
result = !VirtualProtect(start, len, PAGE_EXECUTE_READ, &old_protect);
|
||||
#else
|
||||
#include "codegen.h"
|
||||
result = mprotect(start, len, PROT_READ | PROT_EXEC);
|
||||
#endif
|
||||
|
||||
#include <errno.h>
|
||||
#include <sys/mman.h>
|
||||
#include <string.h>
|
||||
#include <limits.h> /* for PAGESIZE */
|
||||
return result;
|
||||
}
|
||||
|
||||
static FFTS_INLINE int ffts_deny_execute(void *start, size_t len)
|
||||
{
|
||||
int result;
|
||||
|
||||
#ifdef _WIN32
|
||||
DWORD old_protect;
|
||||
result = (int) VirtualProtect(start, len, PAGE_READWRITE, &old_protect);
|
||||
#else
|
||||
result = mprotect(start, len, PROT_READ | PROT_WRITE);
|
||||
#endif
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
static FFTS_INLINE int ffts_flush_instruction_cache(void *start, size_t length)
|
||||
{
|
||||
#ifdef _WIN32
|
||||
return !FlushInstructionCache(GetCurrentProcess(), start, length);
|
||||
#else
|
||||
#ifdef __APPLE__
|
||||
sys_icache_invalidate(start, length);
|
||||
#elif __ANDROID__
|
||||
cacheflush((long) start, (long) start + length, 0);
|
||||
#elif __linux__
|
||||
#if GCC_VERSION_AT_LEAST(4,3)
|
||||
__builtin___clear_cache(start, (char*) start + length);
|
||||
#elif __GNUC__
|
||||
__clear_cache((long) start, (long) start + length);
|
||||
#endif
|
||||
#endif
|
||||
return 0;
|
||||
#endif
|
||||
}
|
||||
|
||||
static FFTS_INLINE void *ffts_vmem_alloc(size_t length)
|
||||
{
|
||||
#if __APPLE__
|
||||
#include <libkern/OSCacheControl.h>
|
||||
return mmap(NULL, length, PROT_READ | PROT_WRITE, MAP_ANON | MAP_SHARED, -1, 0);
|
||||
#elif _WIN32
|
||||
return VirtualAlloc(NULL, length, MEM_COMMIT | MEM_RESERVE, PAGE_READWRITE);
|
||||
#else
|
||||
#ifndef MAP_ANONYMOUS
|
||||
#define MAP_ANONYMOUS 0x20
|
||||
#endif
|
||||
|
||||
void ffts_execute(ffts_plan_t *p, const void * in, void * out) {
|
||||
p->transform(p, (const float *)in, (float *)out);
|
||||
return mmap(NULL, length, PROT_READ | PROT_WRITE, MAP_ANONYMOUS | MAP_SHARED, -1, 0);
|
||||
#endif
|
||||
}
|
||||
|
||||
void ffts_free(ffts_plan_t *p) {
|
||||
p->destroy(p);
|
||||
static FFTS_INLINE void ffts_vmem_free(void *addr, size_t length)
|
||||
{
|
||||
#ifdef _WIN32
|
||||
(void) length;
|
||||
VirtualFree(addr, 0, MEM_RELEASE);
|
||||
#else
|
||||
munmap(addr, length);
|
||||
#endif
|
||||
}
|
||||
|
||||
void ffts_free_1d(ffts_plan_t *p) {
|
||||
|
||||
size_t i;
|
||||
|
||||
if(p->ws) {
|
||||
FFTS_FREE(p->ws);
|
||||
}
|
||||
if(p->is) free(p->is);
|
||||
if(p->ws_is) free(p->ws_is);
|
||||
if(p->offsets) free(p->offsets);
|
||||
//free(p->transforms);
|
||||
if(p->transforms) free(p->transforms);
|
||||
|
||||
if(p->transform_base) {
|
||||
if (mprotect(p->transform_base, p->transform_size, PROT_READ | PROT_WRITE)) {
|
||||
perror("Couldn't mprotect");
|
||||
exit(errno);
|
||||
}
|
||||
munmap(p->transform_base, p->transform_size);
|
||||
//free(p->transform_base);
|
||||
}
|
||||
free(p);
|
||||
FFTS_API void
|
||||
ffts_execute(ffts_plan_t *p, const void *in, void *out)
|
||||
{
|
||||
/* TODO: Define NEEDS_ALIGNED properly instead */
|
||||
#if defined(HAVE_SSE) || defined(HAVE_NEON)
|
||||
if (((uintptr_t) in % 16) != 0) {
|
||||
LOG("ffts_execute: input buffer needs to be aligned to a 128bit boundary\n");
|
||||
}
|
||||
|
||||
if (((uintptr_t) out % 16) != 0) {
|
||||
LOG("ffts_execute: output buffer needs to be aligned to a 128bit boundary\n");
|
||||
}
|
||||
#endif
|
||||
|
||||
p->transform(p, (const float*) in, (float*) out);
|
||||
}
|
||||
|
||||
ffts_plan_t *ffts_init_1d(size_t N, int sign) {
|
||||
ffts_plan_t *p = malloc(sizeof(ffts_plan_t));
|
||||
size_t leafN = 8;
|
||||
size_t i;
|
||||
|
||||
#ifdef __arm__
|
||||
//#ifdef HAVE_NEON
|
||||
V MULI_SIGN;
|
||||
|
||||
if(sign < 0) MULI_SIGN = VLIT4(-0.0f, 0.0f, -0.0f, 0.0f);
|
||||
else MULI_SIGN = VLIT4(0.0f, -0.0f, 0.0f, -0.0f);
|
||||
//#endif
|
||||
FFTS_API void
|
||||
ffts_free(ffts_plan_t *p)
|
||||
{
|
||||
if (p) {
|
||||
p->destroy(p);
|
||||
}
|
||||
}
|
||||
|
||||
void ffts_free_1d(ffts_plan_t *p)
|
||||
{
|
||||
#if !defined(DYNAMIC_DISABLED)
|
||||
if (p->transform_base) {
|
||||
ffts_deny_execute(p->transform_base, p->transform_size);
|
||||
ffts_vmem_free(p->transform_base, p->transform_size);
|
||||
}
|
||||
#endif
|
||||
|
||||
if (p->ws_is) {
|
||||
free(p->ws_is);
|
||||
}
|
||||
|
||||
if (p->ws) {
|
||||
FFTS_FREE(p->ws);
|
||||
}
|
||||
|
||||
if (p->is) {
|
||||
free(p->is);
|
||||
}
|
||||
|
||||
if (p->offsets) {
|
||||
free(p->offsets);
|
||||
}
|
||||
|
||||
free(p);
|
||||
}
|
||||
|
||||
static int
|
||||
ffts_generate_luts(ffts_plan_t *p, size_t N, size_t leaf_N, int sign)
|
||||
{
|
||||
V4SF MULI_SIGN;
|
||||
size_t n_luts;
|
||||
ffts_cpx_32f *w;
|
||||
ffts_cpx_32f *tmp;
|
||||
size_t i, j, m, n;
|
||||
int stride;
|
||||
|
||||
if (sign < 0) {
|
||||
MULI_SIGN = V4SF_LIT4(-0.0f, 0.0f, -0.0f, 0.0f);
|
||||
} else {
|
||||
MULI_SIGN = V4SF_LIT4(0.0f, -0.0f, 0.0f, -0.0f);
|
||||
}
|
||||
|
||||
/* LUTS */
|
||||
n_luts = ffts_ctzl(N / leaf_N);
|
||||
if (n_luts >= 32) {
|
||||
n_luts = 0;
|
||||
}
|
||||
|
||||
if (n_luts) {
|
||||
size_t lut_size;
|
||||
|
||||
#if defined(__arm__) && !defined(HAVE_NEON)
|
||||
lut_size = leaf_N * (((1 << n_luts) - 2) * 3 + 1) * sizeof(ffts_cpx_32f) / 2;
|
||||
#else
|
||||
V MULI_SIGN;
|
||||
|
||||
if(sign < 0) MULI_SIGN = VLIT4(-0.0f, 0.0f, -0.0f, 0.0f);
|
||||
else MULI_SIGN = VLIT4(0.0f, -0.0f, 0.0f, -0.0f);
|
||||
lut_size = leaf_N * (((1 << n_luts) - 2) * 3 + 1) * sizeof(ffts_cpx_32f);
|
||||
#endif
|
||||
|
||||
p->transform = NULL;
|
||||
p->transform_base = NULL;
|
||||
p->transforms = NULL;
|
||||
p->is = NULL;
|
||||
p->ws_is = NULL;
|
||||
p->ws = NULL;
|
||||
p->offsets = NULL;
|
||||
p->destroy = ffts_free_1d;
|
||||
|
||||
if(N >= 32) {
|
||||
ffts_init_offsets(p, N, leafN);
|
||||
#ifdef __arm__
|
||||
p->ws = FFTS_MALLOC(lut_size, 32);
|
||||
if (!p->ws) {
|
||||
goto cleanup;
|
||||
}
|
||||
|
||||
p->ws_is = (size_t*) malloc(n_luts * sizeof(*p->ws_is));
|
||||
if (!p->ws_is) {
|
||||
goto cleanup;
|
||||
}
|
||||
}
|
||||
|
||||
w = p->ws;
|
||||
n = leaf_N * 2;
|
||||
|
||||
#ifdef HAVE_NEON
|
||||
ffts_init_is(p, N, leafN, 1);
|
||||
V4SF neg = (sign < 0) ? V4SF_LIT4(0.0f, 0.0f, 0.0f, 0.0f) : V4SF_LIT4(-0.0f, -0.0f, -0.0f, -0.0f);
|
||||
#endif
|
||||
|
||||
/* calculate factors */
|
||||
m = leaf_N << (n_luts - 2);
|
||||
tmp = FFTS_MALLOC(m * sizeof(ffts_cpx_32f), 32);
|
||||
|
||||
ffts_generate_cosine_sine_pow2_32f(tmp, m);
|
||||
|
||||
/* generate lookup tables */
|
||||
stride = 1 << (n_luts - 1);
|
||||
for (i = 0; i < n_luts; i++) {
|
||||
p->ws_is[i] = w - (ffts_cpx_32f*) p->ws;
|
||||
|
||||
if (!i) {
|
||||
ffts_cpx_32f *w0 = FFTS_MALLOC(n/4 * sizeof(ffts_cpx_32f), 32);
|
||||
float *fw0 = (float*) w0;
|
||||
float *fw = (float*) w;
|
||||
|
||||
for (j = 0; j < n/4; j++) {
|
||||
w0[j][0] = tmp[j * stride][0];
|
||||
w0[j][1] = tmp[j * stride][1];
|
||||
}
|
||||
|
||||
#if defined(__arm__)
|
||||
#ifdef HAVE_NEON
|
||||
for (j = 0; j < n/4; j += 4) {
|
||||
V4SF2 temp0 = V4SF2_LD(fw0 + j*2);
|
||||
temp0.val[1] = V4SF_XOR(temp0.val[1], neg);
|
||||
V4SF2_STORE_SPR(fw + j*2, temp0);
|
||||
}
|
||||
#else
|
||||
ffts_init_is(p, N, leafN, 1);
|
||||
for (j = 0; j < n/4; j++) {
|
||||
fw[j*2+0] = fw0[j*2+0];
|
||||
fw[j*2+1] = (sign < 0) ? fw0[j*2+1] : -fw0[j*2+1];
|
||||
}
|
||||
#endif
|
||||
w += n/4;
|
||||
#else
|
||||
ffts_init_is(p, N, leafN, 1);
|
||||
for (j = 0; j < n/4; j += 2) {
|
||||
V4SF re, im, temp0;
|
||||
temp0 = V4SF_LD(fw0 + j*2);
|
||||
re = V4SF_DUPLICATE_RE(temp0);
|
||||
im = V4SF_DUPLICATE_IM(temp0);
|
||||
im = V4SF_XOR(im, MULI_SIGN);
|
||||
V4SF_ST(fw + j*4 + 0, re);
|
||||
V4SF_ST(fw + j*4 + 4, im);
|
||||
}
|
||||
|
||||
w += n/4 * 2;
|
||||
#endif
|
||||
|
||||
p->i0 = N/leafN/3+1;
|
||||
p->i1 = N/leafN/3;
|
||||
if((N/leafN) % 3 > 1) p->i1++;
|
||||
p->i2 = N/leafN/3;
|
||||
|
||||
#ifdef __arm__
|
||||
#ifdef HAVE_NEON
|
||||
p->i0/=2;
|
||||
p->i1/=2;
|
||||
#endif
|
||||
#else
|
||||
p->i0/=2;
|
||||
p->i1/=2;
|
||||
#endif
|
||||
|
||||
}else{
|
||||
p->transforms = malloc(2 * sizeof(transform_index_t));
|
||||
p->transforms[0] = 0;
|
||||
p->transforms[1] = 1;
|
||||
if(N == 2) p->transform = &firstpass_2;
|
||||
else if(N == 4 && sign == -1) p->transform = &firstpass_4_f;
|
||||
else if(N == 4 && sign == 1) p->transform = &firstpass_4_b;
|
||||
else if(N == 8 && sign == -1) p->transform = &firstpass_8_f;
|
||||
else if(N == 8 && sign == 1) p->transform = &firstpass_8_b;
|
||||
else if(N == 16 && sign == -1) p->transform = &firstpass_16_f;
|
||||
else if(N == 16 && sign == 1) p->transform = &firstpass_16_b;
|
||||
|
||||
p->is = NULL;
|
||||
p->offsets = NULL;
|
||||
}
|
||||
|
||||
int hardcoded = 0;
|
||||
|
||||
/* LUTS */
|
||||
size_t n_luts = __builtin_ctzl(N/leafN);
|
||||
if(N < 32) { n_luts = __builtin_ctzl(N/4); hardcoded = 1; }
|
||||
|
||||
if(n_luts >= 32) n_luts = 0;
|
||||
|
||||
// fprintf(stderr, "n_luts = %zu\n", n_luts);
|
||||
|
||||
cdata_t *w;
|
||||
|
||||
int n = leafN*2;
|
||||
if(hardcoded) n = 8;
|
||||
|
||||
size_t lut_size = 0;
|
||||
|
||||
for(i=0;i<n_luts;i++) {
|
||||
if(!i || hardcoded) {
|
||||
#ifdef __arm__
|
||||
if(N <= 32) lut_size += n/4 * 2 * sizeof(cdata_t);
|
||||
else lut_size += n/4 * sizeof(cdata_t);
|
||||
#else
|
||||
lut_size += n/4 * 2 * sizeof(cdata_t);
|
||||
#endif
|
||||
n *= 2;
|
||||
} else {
|
||||
#ifdef __arm__
|
||||
lut_size += n/8 * 3 * sizeof(cdata_t);
|
||||
#else
|
||||
lut_size += n/8 * 3 * 2 * sizeof(cdata_t);
|
||||
#endif
|
||||
}
|
||||
n *= 2;
|
||||
}
|
||||
|
||||
// lut_size *= 16;
|
||||
|
||||
// fprintf(stderr, "lut size = %zu\n", lut_size);
|
||||
if(n_luts) {
|
||||
p->ws = FFTS_MALLOC(lut_size,32);
|
||||
p->ws_is = malloc(n_luts * sizeof(size_t));
|
||||
}else{
|
||||
p->ws = NULL;
|
||||
p->ws_is = NULL;
|
||||
}
|
||||
w = p->ws;
|
||||
|
||||
n = leafN*2;
|
||||
if(hardcoded) n = 8;
|
||||
|
||||
#ifdef HAVE_NEON
|
||||
V neg = (sign < 0) ? VLIT4(0.0f, 0.0f, 0.0f, 0.0f) : VLIT4(-0.0f, -0.0f, -0.0f, -0.0f);
|
||||
#endif
|
||||
|
||||
for(i=0;i<n_luts;i++) {
|
||||
p->ws_is[i] = w - (cdata_t *)p->ws;
|
||||
//fprintf(stderr, "LUT[%zu] = %d @ %08x - %zu\n", i, n, w, p->ws_is[i]);
|
||||
|
||||
if(!i || hardcoded) {
|
||||
cdata_t *w0 = FFTS_MALLOC(n/4 * sizeof(cdata_t), 32);
|
||||
|
||||
size_t j;
|
||||
for(j=0;j<n/4;j++) {
|
||||
w0[j][0] = W_re(n,j);
|
||||
w0[j][1] = W_im(n,j);
|
||||
}
|
||||
|
||||
|
||||
float *fw0 = (float *)w0;
|
||||
#ifdef __arm__
|
||||
if(N < 32) {
|
||||
//w = FFTS_MALLOC(n/4 * 2 * sizeof(cdata_t), 32);
|
||||
float *fw = (float *)w;
|
||||
V temp0, temp1, temp2;
|
||||
for(j=0;j<n/4;j+=2) {
|
||||
// #ifdef HAVE_NEON
|
||||
temp0 = VLD(fw0 + j*2);
|
||||
V re, im;
|
||||
re = VDUPRE(temp0);
|
||||
im = VDUPIM(temp0);
|
||||
#ifdef HAVE_NEON
|
||||
im = VXOR(im, MULI_SIGN);
|
||||
//im = IMULI(sign>0, im);
|
||||
#else
|
||||
im = MULI(sign>0, im);
|
||||
#endif
|
||||
VST(fw + j*4 , re);
|
||||
VST(fw + j*4+4, im);
|
||||
// #endif
|
||||
}
|
||||
w += n/4 * 2;
|
||||
}else{
|
||||
//w = FFTS_MALLOC(n/4 * sizeof(cdata_t), 32);
|
||||
float *fw = (float *)w;
|
||||
#ifdef HAVE_NEON
|
||||
VS temp0, temp1, temp2;
|
||||
for(j=0;j<n/4;j+=4) {
|
||||
temp0 = VLD2(fw0 + j*2);
|
||||
temp0.val[1] = VXOR(temp0.val[1], neg);
|
||||
STORESPR(fw + j*2, temp0);
|
||||
}
|
||||
#else
|
||||
for(j=0;j<n/4;j+=1) {
|
||||
fw[j*2] = fw0[j*2];
|
||||
fw[j*2+1] = (sign < 0) ? fw0[j*2+1] : -fw0[j*2+1];
|
||||
}
|
||||
#endif
|
||||
w += n/4;
|
||||
}
|
||||
#else
|
||||
//w = FFTS_MALLOC(n/4 * 2 * sizeof(cdata_t), 32);
|
||||
float *fw = (float *)w;
|
||||
V temp0, temp1, temp2;
|
||||
for(j=0;j<n/4;j+=2) {
|
||||
temp0 = VLD(fw0 + j*2);
|
||||
V re, im;
|
||||
re = VDUPRE(temp0);
|
||||
im = VDUPIM(temp0);
|
||||
im = VXOR(im, MULI_SIGN);
|
||||
VST(fw + j*4 , re);
|
||||
VST(fw + j*4+4, im);
|
||||
}
|
||||
w += n/4 * 2;
|
||||
#endif
|
||||
|
||||
FFTS_FREE(w0);
|
||||
}else{
|
||||
|
||||
cdata_t *w0 = FFTS_MALLOC(n/8 * sizeof(cdata_t), 32);
|
||||
cdata_t *w1 = FFTS_MALLOC(n/8 * sizeof(cdata_t), 32);
|
||||
cdata_t *w2 = FFTS_MALLOC(n/8 * sizeof(cdata_t), 32);
|
||||
|
||||
size_t j;
|
||||
for(j=0;j<n/8;j++) {
|
||||
w0[j][0] = W_re(n,j*2);
|
||||
w0[j][1] = W_im(n,j*2);
|
||||
w1[j][0] = W_re(n,j);
|
||||
w1[j][1] = W_im(n,j);
|
||||
w2[j][0] = W_re(n,j + (n/8));
|
||||
w2[j][1] = W_im(n,j + (n/8));
|
||||
|
||||
}
|
||||
|
||||
float *fw0 = (float *)w0;
|
||||
float *fw1 = (float *)w1;
|
||||
float *fw2 = (float *)w2;
|
||||
#ifdef __arm__
|
||||
//w = FFTS_MALLOC(n/8 * 3 * sizeof(cdata_t), 32);
|
||||
float *fw = (float *)w;
|
||||
#ifdef HAVE_NEON
|
||||
VS temp0, temp1, temp2;
|
||||
for(j=0;j<n/8;j+=4) {
|
||||
temp0 = VLD2(fw0 + j*2);
|
||||
temp0.val[1] = VXOR(temp0.val[1], neg);
|
||||
STORESPR(fw + j*2*3, temp0);
|
||||
temp1 = VLD2(fw1 + j*2);
|
||||
temp1.val[1] = VXOR(temp1.val[1], neg);
|
||||
STORESPR(fw + j*2*3 + 8, temp1);
|
||||
temp2 = VLD2(fw2 + j*2);
|
||||
temp2.val[1] = VXOR(temp2.val[1], neg);
|
||||
STORESPR(fw + j*2*3 + 16, temp2);
|
||||
}
|
||||
#else
|
||||
for(j=0;j<n/8;j+=1) {
|
||||
fw[j*6] = fw0[j*2];
|
||||
fw[j*6+1] = (sign < 0) ? fw0[j*2+1] : -fw0[j*2+1];
|
||||
fw[j*6+2] = fw1[j*2+0];
|
||||
fw[j*6+3] = (sign < 0) ? fw1[j*2+1] : -fw1[j*2+1];
|
||||
fw[j*6+4] = fw2[j*2+0];
|
||||
fw[j*6+5] = (sign < 0) ? fw2[j*2+1] : -fw2[j*2+1];
|
||||
}
|
||||
#endif
|
||||
w += n/8 * 3;
|
||||
#else
|
||||
//w = FFTS_MALLOC(n/8 * 3 * 2 * sizeof(cdata_t), 32);
|
||||
float *fw = (float *)w;
|
||||
V temp0, temp1, temp2, re, im;
|
||||
for(j=0;j<n/8;j+=2) {
|
||||
temp0 = VLD(fw0 + j*2);
|
||||
re = VDUPRE(temp0);
|
||||
im = VDUPIM(temp0);
|
||||
im = VXOR(im, MULI_SIGN);
|
||||
VST(fw + j*2*6 , re);
|
||||
VST(fw + j*2*6+4, im);
|
||||
|
||||
temp1 = VLD(fw1 + j*2);
|
||||
re = VDUPRE(temp1);
|
||||
im = VDUPIM(temp1);
|
||||
im = VXOR(im, MULI_SIGN);
|
||||
VST(fw + j*2*6+8 , re);
|
||||
VST(fw + j*2*6+12, im);
|
||||
|
||||
temp2 = VLD(fw2 + j*2);
|
||||
re = VDUPRE(temp2);
|
||||
im = VDUPIM(temp2);
|
||||
im = VXOR(im, MULI_SIGN);
|
||||
VST(fw + j*2*6+16, re);
|
||||
VST(fw + j*2*6+20, im);
|
||||
}
|
||||
w += n/8 * 3 * 2;
|
||||
#endif
|
||||
|
||||
FFTS_FREE(w0);
|
||||
FFTS_FREE(w1);
|
||||
FFTS_FREE(w2);
|
||||
}
|
||||
///p->ws[i] = w;
|
||||
|
||||
n *= 2;
|
||||
}
|
||||
|
||||
float *tmp = (float *)p->ws;
|
||||
|
||||
if(sign < 0) {
|
||||
p->oe_ws = (void *)(&w_data[4]);
|
||||
p->ee_ws = (void *)(w_data);
|
||||
p->eo_ws = (void *)(&w_data[4]);
|
||||
}else{
|
||||
p->oe_ws = (void *)(w_data + 12);
|
||||
p->ee_ws = (void *)(w_data + 8);
|
||||
p->eo_ws = (void *)(w_data + 12);
|
||||
}
|
||||
|
||||
p->N = N;
|
||||
p->lastlut = w;
|
||||
p->n_luts = n_luts;
|
||||
#ifdef DYNAMIC_DISABLED
|
||||
if(sign < 0) {
|
||||
if(N >= 32) p->transform = ffts_static_transform_f;
|
||||
}else{
|
||||
if(N >= 32) p->transform = ffts_static_transform_i;
|
||||
}
|
||||
|
||||
FFTS_FREE(w0);
|
||||
} else {
|
||||
ffts_cpx_32f *w0 = (ffts_cpx_32f*) FFTS_MALLOC(n/8 * sizeof(ffts_cpx_32f), 32);
|
||||
ffts_cpx_32f *w1 = (ffts_cpx_32f*) FFTS_MALLOC(n/8 * sizeof(ffts_cpx_32f), 32);
|
||||
ffts_cpx_32f *w2 = (ffts_cpx_32f*) FFTS_MALLOC(n/8 * sizeof(ffts_cpx_32f), 32);
|
||||
|
||||
float *fw0 = (float*) w0;
|
||||
float *fw1 = (float*) w1;
|
||||
float *fw2 = (float*) w2;
|
||||
|
||||
float *fw = (float *)w;
|
||||
|
||||
for (j = 0; j < n/8; j++) {
|
||||
w0[j][0] = tmp[2 * j * stride][0];
|
||||
w0[j][1] = tmp[2 * j * stride][1];
|
||||
|
||||
w1[j][0] = tmp[j * stride][0];
|
||||
w1[j][1] = tmp[j * stride][1];
|
||||
|
||||
w2[j][0] = tmp[(j + (n/8)) * stride][0];
|
||||
w2[j][1] = tmp[(j + (n/8)) * stride][1];
|
||||
}
|
||||
|
||||
#if defined(__arm__)
|
||||
#ifdef HAVE_NEON
|
||||
for (j = 0; j < n/8; j += 4) {
|
||||
V4SF2 temp0, temp1, temp2;
|
||||
|
||||
temp0 = V4SF2_LD(fw0 + j*2);
|
||||
temp0.val[1] = V4SF_XOR(temp0.val[1], neg);
|
||||
V4SF2_STORE_SPR(fw + j*2*3, temp0);
|
||||
|
||||
temp1 = V4SF2_LD(fw1 + j*2);
|
||||
temp1.val[1] = V4SF_XOR(temp1.val[1], neg);
|
||||
V4SF2_STORE_SPR(fw + j*2*3 + 8, temp1);
|
||||
|
||||
temp2 = V4SF2_LD(fw2 + j*2);
|
||||
temp2.val[1] = V4SF_XOR(temp2.val[1], neg);
|
||||
V4SF2_STORE_SPR(fw + j*2*3 + 16, temp2);
|
||||
}
|
||||
#else
|
||||
if(N>=32) ffts_generate_func_code(p, N, leafN, sign);
|
||||
for (j = 0; j < n/8; j++) {
|
||||
fw[j*6+0] = fw0[j*2+0];
|
||||
fw[j*6+1] = (sign < 0) ? fw0[j*2+1] : -fw0[j*2+1];
|
||||
fw[j*6+2] = fw1[j*2+0];
|
||||
fw[j*6+3] = (sign < 0) ? fw1[j*2+1] : -fw1[j*2+1];
|
||||
fw[j*6+4] = fw2[j*2+0];
|
||||
fw[j*6+5] = (sign < 0) ? fw2[j*2+1] : -fw2[j*2+1];
|
||||
}
|
||||
#endif
|
||||
w += n/8 * 3;
|
||||
#else
|
||||
for (j = 0; j < n/8; j += 2) {
|
||||
V4SF temp0, temp1, temp2, re, im;
|
||||
|
||||
temp0 = V4SF_LD(fw0 + j*2);
|
||||
re = V4SF_DUPLICATE_RE(temp0);
|
||||
im = V4SF_DUPLICATE_IM(temp0);
|
||||
im = V4SF_XOR(im, MULI_SIGN);
|
||||
V4SF_ST(fw + j*2*6+0, re);
|
||||
V4SF_ST(fw + j*2*6+4, im);
|
||||
|
||||
temp1 = V4SF_LD(fw1 + j*2);
|
||||
re = V4SF_DUPLICATE_RE(temp1);
|
||||
im = V4SF_DUPLICATE_IM(temp1);
|
||||
im = V4SF_XOR(im, MULI_SIGN);
|
||||
V4SF_ST(fw + j*2*6+8 , re);
|
||||
V4SF_ST(fw + j*2*6+12, im);
|
||||
|
||||
temp2 = V4SF_LD(fw2 + j*2);
|
||||
re = V4SF_DUPLICATE_RE(temp2);
|
||||
im = V4SF_DUPLICATE_IM(temp2);
|
||||
im = V4SF_XOR(im, MULI_SIGN);
|
||||
V4SF_ST(fw + j*2*6+16, re);
|
||||
V4SF_ST(fw + j*2*6+20, im);
|
||||
}
|
||||
|
||||
w += n/8 * 3 * 2;
|
||||
#endif
|
||||
|
||||
FFTS_FREE(w0);
|
||||
FFTS_FREE(w1);
|
||||
FFTS_FREE(w2);
|
||||
}
|
||||
|
||||
n *= 2;
|
||||
stride >>= 1;
|
||||
}
|
||||
|
||||
#if defined(HAVE_NEON)
|
||||
if (sign < 0) {
|
||||
p->oe_ws = (void*)(w_data + 4);
|
||||
p->ee_ws = (void*)(w_data);
|
||||
p->eo_ws = (void*)(w_data + 4);
|
||||
} else {
|
||||
p->oe_ws = (void*)(w_data + 12);
|
||||
p->ee_ws = (void*)(w_data + 8);
|
||||
p->eo_ws = (void*)(w_data + 12);
|
||||
}
|
||||
#endif
|
||||
|
||||
FFTS_FREE(tmp);
|
||||
|
||||
return p;
|
||||
p->lastlut = w;
|
||||
p->n_luts = n_luts;
|
||||
return 0;
|
||||
|
||||
cleanup:
|
||||
return -1;
|
||||
}
|
||||
|
||||
FFTS_API ffts_plan_t*
|
||||
ffts_init_1d(size_t N, int sign)
|
||||
{
|
||||
const size_t leaf_N = 8;
|
||||
ffts_plan_t *p;
|
||||
|
||||
if (N < 2 || (N & (N - 1)) != 0) {
|
||||
LOG("FFT size must be a power of two\n");
|
||||
return NULL;
|
||||
}
|
||||
|
||||
p = calloc(1, sizeof(*p));
|
||||
if (!p) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
p->destroy = ffts_free_1d;
|
||||
p->N = N;
|
||||
|
||||
if (N >= 32) {
|
||||
/* generate lookup tables */
|
||||
if (ffts_generate_luts(p, N, leaf_N, sign)) {
|
||||
goto cleanup;
|
||||
}
|
||||
|
||||
p->offsets = ffts_init_offsets(N, leaf_N);
|
||||
if (!p->offsets) {
|
||||
goto cleanup;
|
||||
}
|
||||
|
||||
p->is = ffts_init_is(N, leaf_N, 1);
|
||||
if (!p->is) {
|
||||
goto cleanup;
|
||||
}
|
||||
|
||||
p->i0 = N/leaf_N/3 + 1;
|
||||
p->i1 = p->i2 = N/leaf_N/3;
|
||||
if ((N/leaf_N) % 3 > 1) {
|
||||
p->i1++;
|
||||
}
|
||||
|
||||
#if !defined(HAVE_VFP) || defined(DYNAMIC_DISABLED)
|
||||
p->i0 /= 2;
|
||||
p->i1 /= 2;
|
||||
#endif
|
||||
|
||||
#ifdef DYNAMIC_DISABLED
|
||||
if (sign < 0) {
|
||||
p->transform = ffts_static_transform_f_32f;
|
||||
} else {
|
||||
p->transform = ffts_static_transform_i_32f;
|
||||
}
|
||||
#else
|
||||
/* determinate transform size */
|
||||
#if defined(__arm__)
|
||||
if (N < 8192) {
|
||||
p->transform_size = 8192;
|
||||
} else {
|
||||
p->transform_size = N;
|
||||
}
|
||||
#else
|
||||
if (N < 2048) {
|
||||
p->transform_size = 16384;
|
||||
} else {
|
||||
p->transform_size = 16384 + 2*N/8 * ffts_ctzl(N);
|
||||
}
|
||||
#endif
|
||||
|
||||
/* allocate code/function buffer */
|
||||
p->transform_base = ffts_vmem_alloc(p->transform_size);
|
||||
if (!p->transform_base) {
|
||||
goto cleanup;
|
||||
}
|
||||
|
||||
/* generate code */
|
||||
p->transform = ffts_generate_func_code(p, N, leaf_N, sign);
|
||||
if (!p->transform) {
|
||||
goto cleanup;
|
||||
}
|
||||
|
||||
/* enable execution with read access for the block */
|
||||
if (ffts_allow_execute(p->transform_base, p->transform_size)) {
|
||||
goto cleanup;
|
||||
}
|
||||
|
||||
/* flush from the instruction cache */
|
||||
if (ffts_flush_instruction_cache(p->transform_base, p->transform_size)) {
|
||||
goto cleanup;
|
||||
}
|
||||
#endif
|
||||
} else {
|
||||
switch (N) {
|
||||
case 2:
|
||||
p->transform = &ffts_small_2_32f;
|
||||
break;
|
||||
case 4:
|
||||
if (sign == -1) {
|
||||
p->transform = &ffts_small_forward4_32f;
|
||||
} else if (sign == 1) {
|
||||
p->transform = &ffts_small_backward4_32f;
|
||||
}
|
||||
break;
|
||||
case 8:
|
||||
if (sign == -1) {
|
||||
p->transform = &ffts_small_forward8_32f;
|
||||
} else if (sign == 1) {
|
||||
p->transform = &ffts_small_backward8_32f;
|
||||
}
|
||||
break;
|
||||
case 16:
|
||||
default:
|
||||
if (sign == -1) {
|
||||
p->transform = &ffts_small_forward16_32f;
|
||||
} else {
|
||||
p->transform = &ffts_small_backward16_32f;
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
return p;
|
||||
|
||||
cleanup:
|
||||
ffts_free_1d(p);
|
||||
return NULL;
|
||||
}
|
@ -1,177 +0,0 @@
|
||||
/*
|
||||
|
||||
This file is part of FFTS -- The Fastest Fourier Transform in the South
|
||||
|
||||
Copyright (c) 2012, Anthony M. Blake <amb@anthonix.com>
|
||||
Copyright (c) 2012, The University of Waikato
|
||||
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
* Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
* Neither the name of the organization nor the
|
||||
names of its contributors may be used to endorse or promote products
|
||||
derived from this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
||||
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
DISCLAIMED. IN NO EVENT SHALL ANTHONY M. BLAKE BE LIABLE FOR ANY
|
||||
DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
|
||||
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
||||
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
|
||||
ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
*/
|
||||
#ifndef __CP_SSE_H__
|
||||
#define __CP_SSE_H__
|
||||
|
||||
#include "config.h"
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <math.h>
|
||||
#include <stddef.h>
|
||||
#include <stdint.h>
|
||||
//#include <stdalign.h>
|
||||
|
||||
//#include "codegen.h"
|
||||
#include "types.h"
|
||||
|
||||
#define PI 3.1415926535897932384626433832795028841971693993751058209
|
||||
|
||||
static const __attribute__ ((aligned(64))) float w_data[16] = {
|
||||
0.70710678118654757273731092936941, 0.70710678118654746171500846685376,
|
||||
-0.70710678118654757273731092936941, -0.70710678118654746171500846685376,
|
||||
1.0f, 0.70710678118654757273731092936941f,
|
||||
-0.0f, -0.70710678118654746171500846685376,
|
||||
0.70710678118654757273731092936941, 0.70710678118654746171500846685376,
|
||||
0.70710678118654757273731092936941, 0.70710678118654746171500846685376,
|
||||
1.0f, 0.70710678118654757273731092936941f,
|
||||
0.0f, 0.70710678118654746171500846685376
|
||||
};
|
||||
|
||||
__INLINE float W_re(float N, float k) { return cos(-2.0f * PI * k / N); }
|
||||
__INLINE float W_im(float N, float k) { return sin(-2.0f * PI * k / N); }
|
||||
|
||||
typedef size_t transform_index_t;
|
||||
|
||||
//typedef void (*transform_func_t)(float *data, size_t N, float *LUT);
|
||||
typedef void (*transform_func_t)(float *data, size_t N, float *LUT);
|
||||
|
||||
typedef struct _ffts_plan_t ffts_plan_t;
|
||||
|
||||
/**
|
||||
* Contains all the Information need to perform FFT
|
||||
*
|
||||
*
|
||||
* DO NOT CHANGE THE ORDER OF MEMBERS
|
||||
* ASSEMBLY CODE USES HARD CODED OFFSETS TO REFERENCE
|
||||
* SOME OF THESE VARIABES!!
|
||||
*/
|
||||
struct _ffts_plan_t {
|
||||
|
||||
/**
|
||||
*
|
||||
*/
|
||||
ptrdiff_t *offsets;
|
||||
#ifdef DYNAMIC_DISABLED
|
||||
/**
|
||||
* Twiddle factors
|
||||
*/
|
||||
void *ws;
|
||||
/**
|
||||
* ee - 2 size x size8
|
||||
* oo - 2 x size4 in parallel
|
||||
* oe -
|
||||
*/
|
||||
void *oe_ws, *eo_ws, *ee_ws;
|
||||
#else
|
||||
void __attribute__((aligned(32))) *ws;
|
||||
void __attribute__((aligned(32))) *oe_ws, *eo_ws, *ee_ws;
|
||||
#endif
|
||||
/**
|
||||
* Pointer into an array of precomputed indexes for the input data array
|
||||
*/
|
||||
ptrdiff_t *is;
|
||||
|
||||
/**
|
||||
* Twiddle Factor Indexes
|
||||
*/
|
||||
size_t *ws_is;
|
||||
|
||||
/**
|
||||
* Size of the loops for the base cases
|
||||
*/
|
||||
size_t i0, i1, n_luts;
|
||||
|
||||
/**
|
||||
* Size fo the Transform
|
||||
*/
|
||||
size_t N;
|
||||
void *lastlut;
|
||||
/**
|
||||
* Used in multidimensional Code ??
|
||||
*/
|
||||
transform_index_t *transforms;
|
||||
//transform_func_t transform;
|
||||
|
||||
/**
|
||||
* Pointer to the dynamically generated function
|
||||
* that will execute the FFT
|
||||
*/
|
||||
void (*transform)(ffts_plan_t * , const void * , void * );
|
||||
|
||||
/**
|
||||
* Pointer to the base memory address of
|
||||
* of the transform function
|
||||
*/
|
||||
void *transform_base;
|
||||
|
||||
/**
|
||||
* Size of the memory block contain the
|
||||
* generated code
|
||||
*/
|
||||
size_t transform_size;
|
||||
|
||||
/**
|
||||
* Points to the cosnant variables used by
|
||||
* the Assembly Code
|
||||
*/
|
||||
void *constants;
|
||||
|
||||
// multi-dimensional stuff:
|
||||
struct _ffts_plan_t **plans;
|
||||
int rank;
|
||||
size_t *Ns, *Ms;
|
||||
void *buf;
|
||||
|
||||
void *transpose_buf;
|
||||
|
||||
/**
|
||||
* Pointer to the destroy function
|
||||
* to clean up the plan after use
|
||||
* (differs for real and multi dimension transforms
|
||||
*/
|
||||
void (*destroy)(ffts_plan_t *);
|
||||
|
||||
/**
|
||||
* Coefficiants for the real valued transforms
|
||||
*/
|
||||
float *A, *B;
|
||||
|
||||
size_t i2;
|
||||
};
|
||||
|
||||
|
||||
void ffts_free(ffts_plan_t *);
|
||||
ffts_plan_t *ffts_init_1d(size_t N, int sign);
|
||||
void ffts_execute(ffts_plan_t *, const void *, void *);
|
||||
#endif
|
@ -0,0 +1,111 @@
|
||||
/*
|
||||
|
||||
This file is part of FFTS -- The Fastest Fourier Transform in the South
|
||||
|
||||
Copyright (c) 2012, Anthony M. Blake <amb@anthonix.com>
|
||||
Copyright (c) 2012, The University of Waikato
|
||||
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
* Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
* Neither the name of the organization nor the
|
||||
names of its contributors may be used to endorse or promote products
|
||||
derived from this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
||||
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
DISCLAIMED. IN NO EVENT SHALL ANTHONY M. BLAKE BE LIABLE FOR ANY
|
||||
DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
|
||||
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
||||
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
|
||||
ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
*/
|
||||
|
||||
#ifndef FFTS_ATTRIBUTES_H
|
||||
#define FFTS_ATTRIBUTES_H
|
||||
|
||||
#if defined (_MSC_VER) && (_MSC_VER >= 1020)
|
||||
#pragma once
|
||||
#endif
|
||||
|
||||
/* Macro definitions for various function/variable attributes */
|
||||
#ifdef __GNUC__
|
||||
#define GCC_VERSION_AT_LEAST(x,y) \
|
||||
(__GNUC__ > x || __GNUC__ == x && __GNUC_MINOR__ >= y)
|
||||
#else
|
||||
#define GCC_VERSION_AT_LEAST(x,y) 0
|
||||
#endif
|
||||
|
||||
#ifdef __GNUC__
|
||||
#define FFTS_ALIGN(x) __attribute__((aligned(x)))
|
||||
#elif defined(_MSC_VER)
|
||||
#define FFTS_ALIGN(x) __declspec(align(x))
|
||||
#else
|
||||
#define FFTS_ALIGN(x)
|
||||
#endif
|
||||
|
||||
#if GCC_VERSION_AT_LEAST(3,1)
|
||||
#define FFTS_ALWAYS_INLINE __attribute__((always_inline)) inline
|
||||
#elif defined(_MSC_VER)
|
||||
#define FFTS_ALWAYS_INLINE __forceinline
|
||||
#else
|
||||
#define FFTS_ALWAYS_INLINE inline
|
||||
#endif
|
||||
|
||||
#if defined(_MSC_VER)
|
||||
#define FFTS_INLINE __inline
|
||||
#else
|
||||
#define FFTS_INLINE inline
|
||||
#endif
|
||||
|
||||
#if defined(__GNUC__)
|
||||
#define FFTS_RESTRICT __restrict
|
||||
#elif defined(_MSC_VER)
|
||||
#define FFTS_RESTRICT __restrict
|
||||
#else
|
||||
#define FFTS_RESTRICT
|
||||
#endif
|
||||
|
||||
#if GCC_VERSION_AT_LEAST(4,5)
|
||||
#define FFTS_ASSUME(cond) do { if (!(cond)) __builtin_unreachable(); } while (0)
|
||||
#elif defined(_MSC_VER)
|
||||
#define FFTS_ASSUME(cond) __assume(cond)
|
||||
#else
|
||||
#define FFTS_ASSUME(cond)
|
||||
#endif
|
||||
|
||||
#if GCC_VERSION_AT_LEAST(4,7)
|
||||
#define FFTS_ASSUME_ALIGNED_16(x) __builtin_assume_aligned(x, 16)
|
||||
#else
|
||||
#define FFTS_ASSUME_ALIGNED_16(x) x
|
||||
#endif
|
||||
|
||||
#if GCC_VERSION_AT_LEAST(4,7)
|
||||
#define FFTS_ASSUME_ALIGNED_32(x) __builtin_assume_aligned(x, 32)
|
||||
#else
|
||||
#define FFTS_ASSUME_ALIGNED_32(x) x
|
||||
#endif
|
||||
|
||||
#if defined(__GNUC__)
|
||||
#define FFTS_LIKELY(cond) __builtin_expect(!!(cond), 1)
|
||||
#else
|
||||
#define FFTS_LIKELY(cond) cond
|
||||
#endif
|
||||
|
||||
#if defined(__GNUC__)
|
||||
#define FFTS_UNLIKELY(cond) __builtin_expect(!!(cond), 0)
|
||||
#else
|
||||
#define FFTS_UNLIKELY(cond) cond
|
||||
#endif
|
||||
|
||||
#endif /* FFTS_ATTRIBUTES_H */
|
@ -0,0 +1,230 @@
|
||||
/*
|
||||
|
||||
This file is part of FFTS -- The Fastest Fourier Transform in the South
|
||||
|
||||
Copyright (c) 2015, Jukka Ojanen <jukka.ojanen@kolumbus.fi>
|
||||
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
* Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
* Neither the name of the organization nor the
|
||||
names of its contributors may be used to endorse or promote products
|
||||
derived from this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
||||
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
DISCLAIMED. IN NO EVENT SHALL ANTHONY M. BLAKE BE LIABLE FOR ANY
|
||||
DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
|
||||
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
||||
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
|
||||
ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
*/
|
||||
|
||||
#ifndef FFTS_DD_H
|
||||
#define FFTS_DD_H
|
||||
|
||||
#if defined (_MSC_VER) && (_MSC_VER >= 1020)
|
||||
#pragma once
|
||||
#endif
|
||||
|
||||
#include "ffts_attributes.h"
|
||||
|
||||
#if HAVE_SSE2
|
||||
#include <emmintrin.h>
|
||||
#endif
|
||||
|
||||
/* double-double number */
|
||||
struct ffts_dd_t
|
||||
{
|
||||
double hi;
|
||||
double lo;
|
||||
};
|
||||
|
||||
#if HAVE_SSE2
|
||||
/* double-double vector */
|
||||
struct ffts_dd2_t {
|
||||
__m128d hi;
|
||||
__m128d lo;
|
||||
};
|
||||
#endif
|
||||
|
||||
static FFTS_INLINE struct ffts_dd_t
|
||||
ffts_dd_add_dd_unnormalized(const struct ffts_dd_t a,
|
||||
const struct ffts_dd_t b);
|
||||
|
||||
static FFTS_INLINE struct ffts_dd_t
|
||||
ffts_dd_mul_dd_unnormalized(const struct ffts_dd_t a,
|
||||
const struct ffts_dd_t b);
|
||||
|
||||
static FFTS_INLINE struct ffts_dd_t
|
||||
ffts_dd_split(double a);
|
||||
|
||||
/* aka quick-two-sum */
|
||||
static FFTS_INLINE struct ffts_dd_t
|
||||
ffts_dd_add(double a, double b)
|
||||
{
|
||||
struct ffts_dd_t dd;
|
||||
dd.hi = a + b;
|
||||
dd.lo = b - (dd.hi - a);
|
||||
return dd;
|
||||
}
|
||||
|
||||
static FFTS_INLINE struct ffts_dd_t
|
||||
ffts_dd_add_dd(const struct ffts_dd_t a,
|
||||
const struct ffts_dd_t b)
|
||||
{
|
||||
struct ffts_dd_t t1 = ffts_dd_add_dd_unnormalized(a, b);
|
||||
return ffts_dd_add(t1.hi, t1.lo);
|
||||
}
|
||||
|
||||
static FFTS_INLINE struct ffts_dd_t
|
||||
ffts_dd_add_dd_unnormalized(const struct ffts_dd_t a,
|
||||
const struct ffts_dd_t b)
|
||||
{
|
||||
struct ffts_dd_t dd;
|
||||
double e1;
|
||||
dd.hi = a.hi + b.hi;
|
||||
e1 = dd.hi - a.hi;
|
||||
dd.lo = ((a.hi - (dd.hi - e1)) + (b.hi - e1)) + (a.lo + b.lo);
|
||||
return dd;
|
||||
}
|
||||
|
||||
static FFTS_INLINE struct ffts_dd_t
|
||||
ffts_dd_mul(const double a, const double b)
|
||||
{
|
||||
struct ffts_dd_t dd;
|
||||
struct ffts_dd_t t1 = ffts_dd_split(a);
|
||||
struct ffts_dd_t t2 = ffts_dd_split(b);
|
||||
dd.hi = a * b;
|
||||
dd.lo = (t1.hi * t2.hi - dd.hi);
|
||||
dd.lo += (t1.hi * t2.lo + t1.lo * t2.hi);
|
||||
dd.lo += t1.lo * t2.lo;
|
||||
return dd;
|
||||
}
|
||||
|
||||
static FFTS_INLINE struct ffts_dd_t
|
||||
ffts_dd_mul_dd(const struct ffts_dd_t a,
|
||||
const struct ffts_dd_t b)
|
||||
{
|
||||
struct ffts_dd_t dd = ffts_dd_mul_dd_unnormalized(a, b);
|
||||
return ffts_dd_add(dd.hi, dd.lo);
|
||||
}
|
||||
|
||||
static FFTS_INLINE struct ffts_dd_t
|
||||
ffts_dd_mul_dd_unnormalized(const struct ffts_dd_t a,
|
||||
const struct ffts_dd_t b)
|
||||
{
|
||||
struct ffts_dd_t dd = ffts_dd_mul(a.hi, b.hi);
|
||||
dd.lo += (a.hi * b.lo + a.lo * b.hi);
|
||||
return dd;
|
||||
}
|
||||
|
||||
static FFTS_INLINE struct ffts_dd_t
|
||||
ffts_dd_split(double a)
|
||||
{
|
||||
/* 2^27+1 = 134217729 */
|
||||
struct ffts_dd_t dd;
|
||||
double t = 134217729.0 * a;
|
||||
dd.hi = t - (t - a);
|
||||
dd.lo = a - dd.hi;
|
||||
return dd;
|
||||
}
|
||||
|
||||
#if HAVE_SSE2
|
||||
static FFTS_INLINE struct ffts_dd2_t
|
||||
ffts_dd2_add_dd2_unnormalized(const struct ffts_dd2_t *const FFTS_RESTRICT a,
|
||||
const struct ffts_dd2_t *const FFTS_RESTRICT b);
|
||||
|
||||
static FFTS_INLINE struct ffts_dd2_t
|
||||
ffts_dd2_mul_dd2_unnormalized(const struct ffts_dd2_t *const FFTS_RESTRICT a,
|
||||
const struct ffts_dd2_t *const FFTS_RESTRICT b);
|
||||
|
||||
static FFTS_INLINE struct ffts_dd2_t
|
||||
ffts_dd2_split(__m128d a);
|
||||
|
||||
static FFTS_INLINE struct ffts_dd2_t
|
||||
ffts_dd2_add(__m128d a, __m128d b)
|
||||
{
|
||||
struct ffts_dd2_t dd2;
|
||||
dd2.hi = _mm_add_pd(a, b);
|
||||
dd2.lo = _mm_sub_pd(b, _mm_sub_pd(dd2.hi, a));
|
||||
return dd2;
|
||||
}
|
||||
|
||||
static FFTS_INLINE struct ffts_dd2_t
|
||||
ffts_dd2_add_dd2(const struct ffts_dd2_t *const FFTS_RESTRICT a,
|
||||
const struct ffts_dd2_t *const FFTS_RESTRICT b)
|
||||
{
|
||||
struct ffts_dd2_t t1 = ffts_dd2_add_dd2_unnormalized(a, b);
|
||||
return ffts_dd2_add(t1.hi, t1.lo);
|
||||
}
|
||||
|
||||
static FFTS_INLINE struct ffts_dd2_t
|
||||
ffts_dd2_add_dd2_unnormalized(const struct ffts_dd2_t *const FFTS_RESTRICT a,
|
||||
const struct ffts_dd2_t *const FFTS_RESTRICT b)
|
||||
{
|
||||
struct ffts_dd2_t dd2;
|
||||
__m128d e1;
|
||||
dd2.hi = _mm_add_pd(a->hi, b->hi);
|
||||
e1 = _mm_sub_pd(dd2.hi, a->hi);
|
||||
dd2.lo = _mm_add_pd(_mm_add_pd(_mm_sub_pd(a->hi, _mm_sub_pd(dd2.hi, e1)),
|
||||
_mm_sub_pd(b->hi, e1)), _mm_add_pd(a->lo, b->lo));
|
||||
return dd2;
|
||||
}
|
||||
|
||||
static FFTS_INLINE struct ffts_dd2_t
|
||||
ffts_dd2_mul(const __m128d a, const __m128d b)
|
||||
{
|
||||
struct ffts_dd2_t dd2;
|
||||
struct ffts_dd2_t t1 = ffts_dd2_split(a);
|
||||
struct ffts_dd2_t t2 = ffts_dd2_split(b);
|
||||
dd2.hi = _mm_mul_pd(a, b);
|
||||
dd2.lo = _mm_add_pd(_mm_add_pd(_mm_sub_pd(
|
||||
_mm_mul_pd(t1.hi, t2.hi), dd2.hi),
|
||||
_mm_add_pd(_mm_mul_pd(t1.hi, t2.lo),
|
||||
_mm_mul_pd(t1.lo, t2.hi))),
|
||||
_mm_mul_pd(t1.lo, t2.lo));
|
||||
return dd2;
|
||||
}
|
||||
|
||||
static FFTS_INLINE struct ffts_dd2_t
|
||||
ffts_dd2_mul_dd2(const struct ffts_dd2_t *const FFTS_RESTRICT a,
|
||||
const struct ffts_dd2_t *const FFTS_RESTRICT b)
|
||||
{
|
||||
struct ffts_dd2_t dd2 = ffts_dd2_mul_dd2_unnormalized(a, b);
|
||||
return ffts_dd2_add(dd2.hi, dd2.lo);
|
||||
}
|
||||
|
||||
static FFTS_INLINE struct ffts_dd2_t
|
||||
ffts_dd2_mul_dd2_unnormalized(const struct ffts_dd2_t *const FFTS_RESTRICT a,
|
||||
const struct ffts_dd2_t *const FFTS_RESTRICT b)
|
||||
{
|
||||
struct ffts_dd2_t dd2 = ffts_dd2_mul(a->hi, b->hi);
|
||||
dd2.lo = _mm_add_pd(dd2.lo, _mm_add_pd(
|
||||
_mm_mul_pd(a->hi, b->lo), _mm_mul_pd(a->lo, b->hi)));
|
||||
return dd2;
|
||||
}
|
||||
|
||||
static FFTS_INLINE struct ffts_dd2_t
|
||||
ffts_dd2_split(__m128d a)
|
||||
{
|
||||
/* 2^27+1 = 134217729 */
|
||||
struct ffts_dd2_t dd2;
|
||||
__m128d t = _mm_mul_pd(a, _mm_set1_pd(134217729.0));
|
||||
dd2.hi = _mm_sub_pd(t, _mm_sub_pd(t, a));
|
||||
dd2.lo = _mm_sub_pd(a, dd2.hi);
|
||||
return dd2;
|
||||
}
|
||||
#endif /* HAVE_SSE2 */
|
||||
|
||||
#endif /* FFTS_DD_H */
|
@ -0,0 +1,215 @@
|
||||
/*
|
||||
|
||||
This file is part of FFTS -- The Fastest Fourier Transform in the South
|
||||
|
||||
Copyright (c) 2012, Anthony M. Blake <amb@anthonix.com>
|
||||
Copyright (c) 2012, The University of Waikato
|
||||
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
* Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
* Neither the name of the organization nor the
|
||||
names of its contributors may be used to endorse or promote products
|
||||
derived from this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
||||
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
DISCLAIMED. IN NO EVENT SHALL ANTHONY M. BLAKE BE LIABLE FOR ANY
|
||||
DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
|
||||
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
||||
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
|
||||
ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
*/
|
||||
|
||||
#ifndef FFTS_INTERNAL_H
|
||||
#define FFTS_INTERNAL_H
|
||||
|
||||
//#include "config.h"
|
||||
#include "ffts_attributes.h"
|
||||
#include "types.h"
|
||||
|
||||
#ifdef HAVE_MALLOC_H
|
||||
#include <malloc.h>
|
||||
#endif
|
||||
|
||||
#include <stddef.h>
|
||||
|
||||
#ifdef HAVE_STDINT_H
|
||||
#include <stdint.h>
|
||||
#endif
|
||||
|
||||
#ifdef HAVE_STDLIB_H
|
||||
#include <stdlib.h>
|
||||
#endif
|
||||
|
||||
#include <stdio.h>
|
||||
|
||||
#ifdef ENABLE_LOG
|
||||
#ifdef __ANDROID__
|
||||
#include <android/log.h>
|
||||
#define LOG(s) __android_log_print(ANDROID_LOG_ERROR, "FFTS", s)
|
||||
#else
|
||||
#define LOG(s) fprintf(stderr, s)
|
||||
#endif
|
||||
#else
|
||||
#define LOG(s)
|
||||
#endif
|
||||
|
||||
struct _ffts_plan_t;
|
||||
typedef void (*transform_func_t)(struct _ffts_plan_t *p, const void *in, void *out);
|
||||
|
||||
/**
|
||||
* Contains all the Information need to perform FFT
|
||||
*
|
||||
*
|
||||
* DO NOT CHANGE THE ORDER OF MEMBERS
|
||||
* ASSEMBLY CODE USES HARD CODED OFFSETS TO REFERENCE
|
||||
* SOME OF THESE VARIABES!!
|
||||
*/
|
||||
struct _ffts_plan_t {
|
||||
|
||||
/**
|
||||
*
|
||||
*/
|
||||
ptrdiff_t *offsets;
|
||||
#ifdef DYNAMIC_DISABLED
|
||||
/**
|
||||
* Twiddle factors
|
||||
*/
|
||||
void *ws;
|
||||
|
||||
/**
|
||||
* ee - 2 size x size8
|
||||
* oo - 2 x size4 in parallel
|
||||
* oe -
|
||||
*/
|
||||
void *oe_ws, *eo_ws, *ee_ws;
|
||||
#else
|
||||
void FFTS_ALIGN(32) *ws;
|
||||
void FFTS_ALIGN(32) *oe_ws, *eo_ws, *ee_ws;
|
||||
#endif
|
||||
|
||||
/**
|
||||
* Pointer into an array of precomputed indexes for the input data array
|
||||
*/
|
||||
ptrdiff_t *is;
|
||||
|
||||
/**
|
||||
* Twiddle Factor Indexes
|
||||
*/
|
||||
size_t *ws_is;
|
||||
|
||||
/**
|
||||
* Size of the loops for the base cases
|
||||
*/
|
||||
size_t i0, i1, n_luts;
|
||||
|
||||
/**
|
||||
* Size fo the Transform
|
||||
*/
|
||||
size_t N;
|
||||
void *lastlut;
|
||||
|
||||
#ifdef __arm__
|
||||
size_t *temporary_fix_as_dynamic_code_assumes_fixed_offset;
|
||||
#endif
|
||||
|
||||
/**
|
||||
* Pointer to the dynamically generated function
|
||||
* that will execute the FFT
|
||||
*/
|
||||
transform_func_t transform;
|
||||
|
||||
/**
|
||||
* Pointer to the base memory address of
|
||||
* of the transform function
|
||||
*/
|
||||
void *transform_base;
|
||||
|
||||
/**
|
||||
* Size of the memory block contain the
|
||||
* generated code
|
||||
*/
|
||||
size_t transform_size;
|
||||
|
||||
/**
|
||||
* Points to the cosnant variables used by
|
||||
* the Assembly Code
|
||||
*/
|
||||
void *constants;
|
||||
|
||||
// multi-dimensional stuff:
|
||||
struct _ffts_plan_t **plans;
|
||||
int rank;
|
||||
size_t *Ns, *Ms;
|
||||
void *buf;
|
||||
|
||||
void *transpose_buf;
|
||||
|
||||
/**
|
||||
* Pointer to the destroy function
|
||||
* to clean up the plan after use
|
||||
* (differs for real and multi dimension transforms
|
||||
*/
|
||||
void (*destroy)(struct _ffts_plan_t *);
|
||||
|
||||
/**
|
||||
* Coefficiants for the real valued transforms
|
||||
*/
|
||||
float *A, *B;
|
||||
|
||||
size_t i2;
|
||||
};
|
||||
|
||||
static FFTS_INLINE void *ffts_aligned_malloc(size_t size)
|
||||
{
|
||||
#if defined(_WIN32)
|
||||
return _aligned_malloc(size, 32);
|
||||
#else
|
||||
return valloc(size);
|
||||
#endif
|
||||
}
|
||||
|
||||
static FFTS_INLINE void ffts_aligned_free(void *p)
|
||||
{
|
||||
#if defined(_WIN32)
|
||||
_aligned_free(p);
|
||||
#else
|
||||
free(p);
|
||||
#endif
|
||||
}
|
||||
|
||||
#if GCC_VERSION_AT_LEAST(3,3)
|
||||
#define ffts_ctzl __builtin_ctzl
|
||||
#elif defined(_MSC_VER)
|
||||
#include <intrin.h>
|
||||
#ifdef _M_X64
|
||||
#pragma intrinsic(_BitScanForward64)
|
||||
static __inline unsigned long ffts_ctzl(size_t N)
|
||||
{
|
||||
unsigned long count;
|
||||
_BitScanForward64((unsigned long*) &count, N);
|
||||
return count;
|
||||
}
|
||||
#else
|
||||
#pragma intrinsic(_BitScanForward)
|
||||
static __inline unsigned long ffts_ctzl(size_t N)
|
||||
{
|
||||
unsigned long count;
|
||||
_BitScanForward((unsigned long*) &count, N);
|
||||
return count;
|
||||
}
|
||||
#endif /* _WIN64 */
|
||||
#endif /* _MSC_VER */
|
||||
|
||||
#endif /* FFTS_INTERNAL_H */
|
@ -1,282 +1,193 @@
|
||||
/*
|
||||
|
||||
This file is part of FFTS -- The Fastest Fourier Transform in the South
|
||||
|
||||
Copyright (c) 2012, Anthony M. Blake <amb@anthonix.com>
|
||||
Copyright (c) 2012, The University of Waikato
|
||||
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
* Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
* Neither the name of the organization nor the
|
||||
names of its contributors may be used to endorse or promote products
|
||||
derived from this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
||||
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
DISCLAIMED. IN NO EVENT SHALL ANTHONY M. BLAKE BE LIABLE FOR ANY
|
||||
DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
|
||||
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
||||
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
|
||||
ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
*/
|
||||
|
||||
#include "ffts_nd.h"
|
||||
This file is part of FFTS -- The Fastest Fourier Transform in the South
|
||||
|
||||
Copyright (c) 2016, Jukka Ojanen <jukka.ojanen@kolumbus.fi>
|
||||
Copyright (c) 2012, Anthony M. Blake <amb@anthonix.com>
|
||||
Copyright (c) 2012, The University of Waikato
|
||||
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
* Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
* Neither the name of the organization nor the
|
||||
names of its contributors may be used to endorse or promote products
|
||||
derived from this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
||||
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
DISCLAIMED. IN NO EVENT SHALL ANTHONY M. BLAKE BE LIABLE FOR ANY
|
||||
DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
|
||||
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
||||
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
|
||||
ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
#ifdef HAVE_NEON
|
||||
#include "neon.h"
|
||||
#endif
|
||||
|
||||
void ffts_free_nd(ffts_plan_t *p) {
|
||||
|
||||
int i;
|
||||
for(i=0;i<p->rank;i++) {
|
||||
|
||||
ffts_plan_t *x = p->plans[i];
|
||||
int k;
|
||||
for(k=0;k<i;k++) {
|
||||
if(p->Ms[i] == p->Ms[k]) x = NULL;
|
||||
}
|
||||
|
||||
if(x) ffts_free(x);
|
||||
}
|
||||
|
||||
free(p->Ns);
|
||||
free(p->Ms);
|
||||
free(p->plans);
|
||||
free(p->buf);
|
||||
free(p->transpose_buf);
|
||||
free(p);
|
||||
}
|
||||
#define TSIZE 8
|
||||
#include <string.h>
|
||||
void ffts_transpose(uint64_t *in, uint64_t *out, int w, int h, uint64_t *buf) {
|
||||
|
||||
#ifdef HAVE_NEON
|
||||
size_t i,j,k;
|
||||
int linebytes = w*8;
|
||||
|
||||
for(j=0;j<h;j+=8) {
|
||||
for(i=0;i<w;i+=8) {
|
||||
neon_transpose_to_buf(in + j*w + i, buf, w);
|
||||
|
||||
uint64_t *p = out + i*h + j;
|
||||
uint64_t *pbuf = buf;
|
||||
uint64_t *ptemp;
|
||||
|
||||
__asm__ __volatile__(
|
||||
"mov %[ptemp], %[p]\n\t"
|
||||
"add %[p], %[p], %[w], lsl #3\n\t"
|
||||
"vld1.32 {q8,q9}, [%[pbuf], :128]!\n\t"
|
||||
"vld1.32 {q10,q11}, [%[pbuf], :128]!\n\t"
|
||||
"vld1.32 {q12,q13}, [%[pbuf], :128]!\n\t"
|
||||
"vld1.32 {q14,q15}, [%[pbuf], :128]!\n\t"
|
||||
"vst1.32 {q8,q9}, [%[ptemp], :128]!\n\t"
|
||||
"vst1.32 {q10,q11}, [%[ptemp], :128]!\n\t"
|
||||
"mov %[ptemp], %[p]\n\t"
|
||||
"add %[p], %[p], %[w], lsl #3\n\t"
|
||||
"vst1.32 {q12,q13}, [%[ptemp], :128]!\n\t"
|
||||
"vst1.32 {q14,q15}, [%[ptemp], :128]!\n\t"
|
||||
"mov %[ptemp], %[p]\n\t"
|
||||
"add %[p], %[p], %[w], lsl #3\n\t"
|
||||
"vld1.32 {q8,q9}, [%[pbuf], :128]!\n\t"
|
||||
"vld1.32 {q10,q11}, [%[pbuf], :128]!\n\t"
|
||||
"vld1.32 {q12,q13}, [%[pbuf], :128]!\n\t"
|
||||
"vld1.32 {q14,q15}, [%[pbuf], :128]!\n\t"
|
||||
"vst1.32 {q8,q9}, [%[ptemp], :128]!\n\t"
|
||||
"vst1.32 {q10,q11}, [%[ptemp], :128]!\n\t"
|
||||
"mov %[ptemp], %[p]\n\t"
|
||||
"add %[p], %[p], %[w], lsl #3\n\t"
|
||||
"vst1.32 {q12,q13}, [%[ptemp], :128]!\n\t"
|
||||
"vst1.32 {q14,q15}, [%[ptemp], :128]!\n\t"
|
||||
"mov %[ptemp], %[p]\n\t"
|
||||
"add %[p], %[p], %[w], lsl #3\n\t"
|
||||
"vld1.32 {q8,q9}, [%[pbuf], :128]!\n\t"
|
||||
"vld1.32 {q10,q11}, [%[pbuf], :128]!\n\t"
|
||||
"vld1.32 {q12,q13}, [%[pbuf], :128]!\n\t"
|
||||
"vld1.32 {q14,q15}, [%[pbuf], :128]!\n\t"
|
||||
"vst1.32 {q8,q9}, [%[ptemp], :128]!\n\t"
|
||||
"vst1.32 {q10,q11}, [%[ptemp], :128]!\n\t"
|
||||
"mov %[ptemp], %[p]\n\t"
|
||||
"add %[p], %[p], %[w], lsl #3\n\t"
|
||||
"vst1.32 {q12,q13}, [%[ptemp], :128]!\n\t"
|
||||
"vst1.32 {q14,q15}, [%[ptemp], :128]!\n\t"
|
||||
"mov %[ptemp], %[p]\n\t"
|
||||
"add %[p], %[p], %[w], lsl #3\n\t"
|
||||
"vld1.32 {q8,q9}, [%[pbuf], :128]!\n\t"
|
||||
"vld1.32 {q10,q11}, [%[pbuf], :128]!\n\t"
|
||||
"vld1.32 {q12,q13}, [%[pbuf], :128]!\n\t"
|
||||
"vld1.32 {q14,q15}, [%[pbuf], :128]!\n\t"
|
||||
"vst1.32 {q8,q9}, [%[ptemp], :128]!\n\t"
|
||||
"vst1.32 {q10,q11}, [%[ptemp], :128]!\n\t"
|
||||
"mov %[ptemp], %[p]\n\t"
|
||||
"vst1.32 {q12,q13}, [%[ptemp], :128]!\n\t"
|
||||
"vst1.32 {q14,q15}, [%[ptemp], :128]!\n\t"
|
||||
|
||||
: [p] "+r" (p), [pbuf] "+r" (pbuf), [ptemp] "+r" (ptemp)
|
||||
: [w] "r" (w)
|
||||
: "memory", "q8", "q9", "q10", "q11"
|
||||
);
|
||||
// out[i*h + j] = in[j*w + i];
|
||||
}
|
||||
}
|
||||
#else
|
||||
#ifdef HAVE_SSE
|
||||
uint64_t tmp[TSIZE*TSIZE] __attribute__((aligned(64)));
|
||||
int tx, ty;
|
||||
int x, y;
|
||||
int tw = w / TSIZE;
|
||||
int th = h / TSIZE;
|
||||
for (ty=0;ty<th;ty++) {
|
||||
for (tx=0;tx<tw;tx++) {
|
||||
uint64_t *ip0 = in + w*TSIZE*ty + tx * TSIZE;
|
||||
uint64_t *op0 = tmp;//out + h*TSIZE*tx + ty*TSIZE;
|
||||
|
||||
// Copy/transpose to tmp
|
||||
for (y=0;y<TSIZE;y+=2) {
|
||||
//for (x=0;x<TSIZE;x+=2) {
|
||||
//op[x*TSIZE] = ip[x];
|
||||
__m128d q0 = _mm_load_pd((double *)(ip0 + 0*w));
|
||||
__m128d q1 = _mm_load_pd((double *)(ip0 + 1*w));
|
||||
__m128d q2 = _mm_load_pd((double *)(ip0 + 2*w));
|
||||
__m128d q3 = _mm_load_pd((double *)(ip0 + 3*w));
|
||||
__m128d q4 = _mm_load_pd((double *)(ip0 + 4*w));
|
||||
__m128d q5 = _mm_load_pd((double *)(ip0 + 5*w));
|
||||
__m128d q6 = _mm_load_pd((double *)(ip0 + 6*w));
|
||||
__m128d q7 = _mm_load_pd((double *)(ip0 + 7*w));
|
||||
ip0 += 2;
|
||||
|
||||
__m128d t0 = _mm_shuffle_pd(q0, q1, _MM_SHUFFLE2(0, 0));
|
||||
__m128d t1 = _mm_shuffle_pd(q0, q1, _MM_SHUFFLE2(1, 1));
|
||||
__m128d t2 = _mm_shuffle_pd(q2, q3, _MM_SHUFFLE2(0, 0));
|
||||
__m128d t3 = _mm_shuffle_pd(q2, q3, _MM_SHUFFLE2(1, 1));
|
||||
__m128d t4 = _mm_shuffle_pd(q4, q5, _MM_SHUFFLE2(0, 0));
|
||||
__m128d t5 = _mm_shuffle_pd(q4, q5, _MM_SHUFFLE2(1, 1));
|
||||
__m128d t6 = _mm_shuffle_pd(q6, q7, _MM_SHUFFLE2(0, 0));
|
||||
__m128d t7 = _mm_shuffle_pd(q6, q7, _MM_SHUFFLE2(1, 1));
|
||||
//_mm_store_pd((double *)(op0 + y*h + x), t0);
|
||||
//_mm_store_pd((double *)(op0 + y*h + x + h), t1);
|
||||
_mm_store_pd((double *)(op0 + 0), t0);
|
||||
_mm_store_pd((double *)(op0 + 0 + TSIZE), t1);
|
||||
_mm_store_pd((double *)(op0 + 2 ), t2);
|
||||
_mm_store_pd((double *)(op0 + 2 + TSIZE), t3);
|
||||
_mm_store_pd((double *)(op0 + 4 ), t4);
|
||||
_mm_store_pd((double *)(op0 + 4 + TSIZE), t5);
|
||||
_mm_store_pd((double *)(op0 + 6 ), t6);
|
||||
_mm_store_pd((double *)(op0 + 6 + TSIZE), t7);
|
||||
//}
|
||||
op0 += 2*TSIZE;
|
||||
}
|
||||
|
||||
op0 = out + h*tx*TSIZE + ty*TSIZE;
|
||||
ip0 = tmp;
|
||||
for (y=0;y<TSIZE;y+=1) {
|
||||
// memcpy(op0, ip0, TSIZE * sizeof(*ip0));
|
||||
|
||||
__m128d q0 = _mm_load_pd((double *)(ip0 + 0));
|
||||
__m128d q1 = _mm_load_pd((double *)(ip0 + 2));
|
||||
__m128d q2 = _mm_load_pd((double *)(ip0 + 4));
|
||||
__m128d q3 = _mm_load_pd((double *)(ip0 + 6));
|
||||
_mm_store_pd((double *)(op0 + 0), q0);
|
||||
_mm_store_pd((double *)(op0 + 2), q1);
|
||||
_mm_store_pd((double *)(op0 + 4), q2);
|
||||
_mm_store_pd((double *)(op0 + 6), q3);
|
||||
|
||||
op0 += h;
|
||||
ip0 += TSIZE;
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
/*
|
||||
size_t i,j;
|
||||
for(i=0;i<w;i+=2) {
|
||||
for(j=0;j<h;j+=2) {
|
||||
// out[i*h + j] = in[j*w + i];
|
||||
__m128d q0 = _mm_load_pd((double *)(in + j*w + i));
|
||||
__m128d q1 = _mm_load_pd((double *)(in + j*w + i + w));
|
||||
__m128d t0 = _mm_shuffle_pd(q0, q1, _MM_SHUFFLE2(0, 0));
|
||||
__m128d t1 = _mm_shuffle_pd(q0, q1, _MM_SHUFFLE2(1, 1));
|
||||
_mm_store_pd((double *)(out + i*h + j), t0);
|
||||
_mm_store_pd((double *)(out + i*h + j + h), t1);
|
||||
}
|
||||
}
|
||||
*/
|
||||
#endif
|
||||
#endif
|
||||
|
||||
}
|
||||
|
||||
void ffts_execute_nd(ffts_plan_t *p, const void * in, void * out) {
|
||||
|
||||
uint64_t *din = (uint64_t *)in;
|
||||
uint64_t *buf = p->buf;
|
||||
uint64_t *dout = (uint64_t *)out;
|
||||
|
||||
size_t i,j;
|
||||
for(i=0;i<p->Ns[0];i++) {
|
||||
p->plans[0]->transform(p->plans[0], din + (i * p->Ms[0]), buf + (i * p->Ms[0]));
|
||||
}
|
||||
ffts_transpose(buf, dout, p->Ms[0], p->Ns[0], p->transpose_buf);
|
||||
|
||||
for(i=1;i<p->rank;i++) {
|
||||
for(j=0;j<p->Ns[i];j++) {
|
||||
p->plans[i]->transform(p->plans[i], dout + (j * p->Ms[i]), buf + (j * p->Ms[i]));
|
||||
}
|
||||
ffts_transpose(buf, dout, p->Ms[i], p->Ns[i], p->transpose_buf);
|
||||
}
|
||||
#include "ffts_nd.h"
|
||||
#include "ffts_internal.h"
|
||||
#include "ffts_transpose.h"
|
||||
|
||||
static void
|
||||
ffts_free_nd(ffts_plan_t *p)
|
||||
{
|
||||
if (p->plans) {
|
||||
int i, j;
|
||||
|
||||
for (i = 0; i < p->rank; i++) {
|
||||
ffts_plan_t *plan = p->plans[i];
|
||||
|
||||
if (plan) {
|
||||
for (j = 0; j < i; j++) {
|
||||
if (p->Ns[i] == p->Ns[j]) {
|
||||
plan = NULL;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (plan) {
|
||||
ffts_free(plan);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
free(p->plans);
|
||||
}
|
||||
|
||||
if (p->Ns) {
|
||||
free(p->Ns);
|
||||
}
|
||||
|
||||
if (p->Ms) {
|
||||
free(p->Ms);
|
||||
}
|
||||
|
||||
if (p->buf) {
|
||||
ffts_aligned_free(p->buf);
|
||||
}
|
||||
|
||||
free(p);
|
||||
}
|
||||
|
||||
ffts_plan_t *ffts_init_nd(int rank, size_t *Ns, int sign) {
|
||||
size_t vol = 1;
|
||||
static void
|
||||
ffts_execute_nd(ffts_plan_t *p, const void *in, void *out)
|
||||
{
|
||||
uint64_t *din = (uint64_t*) in;
|
||||
uint64_t *buf = p->buf;
|
||||
uint64_t *dout = (uint64_t*) out;
|
||||
|
||||
ffts_plan_t *p = malloc(sizeof(ffts_plan_t));
|
||||
ffts_plan_t *plan;
|
||||
int i;
|
||||
size_t j;
|
||||
|
||||
p->transform = &ffts_execute_nd;
|
||||
p->destroy = &ffts_free_nd;
|
||||
plan = p->plans[0];
|
||||
for (j = 0; j < p->Ms[0]; j++) {
|
||||
plan->transform(plan, din + (j * p->Ns[0]), buf + (j * p->Ns[0]));
|
||||
}
|
||||
|
||||
p->rank = rank;
|
||||
p->Ns = malloc(sizeof(size_t) * rank);
|
||||
p->Ms = malloc(sizeof(size_t) * rank);
|
||||
p->plans = malloc(sizeof(ffts_plan_t **) * rank);
|
||||
int i;
|
||||
for(i=0;i<rank;i++) {
|
||||
p->Ns[i] = Ns[i];
|
||||
vol *= Ns[i];
|
||||
}
|
||||
p->buf = valloc(sizeof(float) * 2 * vol);
|
||||
ffts_transpose(buf, dout, p->Ns[0], p->Ms[0]);
|
||||
|
||||
for(i=0;i<rank;i++) {
|
||||
p->Ms[i] = vol / p->Ns[i];
|
||||
for (i = 1; i < p->rank; i++) {
|
||||
plan = p->plans[i];
|
||||
|
||||
p->plans[i] = NULL;
|
||||
int k;
|
||||
for(k=0;k<i;k++) {
|
||||
if(p->Ms[k] == p->Ms[i])
|
||||
p->plans[i] = p->plans[k];
|
||||
}
|
||||
for (j = 0; j < p->Ms[i]; j++) {
|
||||
plan->transform(plan, dout + (j * p->Ns[i]), buf + (j * p->Ns[i]));
|
||||
}
|
||||
|
||||
if(!p->plans[i]) p->plans[i] = ffts_init_1d(p->Ms[i], sign);
|
||||
}
|
||||
ffts_transpose(buf, dout, p->Ns[i], p->Ms[i]);
|
||||
}
|
||||
}
|
||||
|
||||
p->transpose_buf = valloc(sizeof(float) * 2 * 8 * 8);
|
||||
return p;
|
||||
FFTS_API ffts_plan_t*
|
||||
ffts_init_nd(int rank, size_t *Ns, int sign)
|
||||
{
|
||||
ffts_plan_t *p;
|
||||
size_t vol = 1;
|
||||
int i, j;
|
||||
|
||||
if (!Ns) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
if (rank == 1) {
|
||||
return ffts_init_1d(Ns[0], sign);
|
||||
}
|
||||
|
||||
p = calloc(1, sizeof(*p));
|
||||
if (!p) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
p->transform = &ffts_execute_nd;
|
||||
p->destroy = &ffts_free_nd;
|
||||
p->rank = rank;
|
||||
|
||||
p->Ms = malloc(rank * sizeof(*p->Ms));
|
||||
if (!p->Ms) {
|
||||
goto cleanup;
|
||||
}
|
||||
|
||||
p->Ns = malloc(rank * sizeof(*p->Ns));
|
||||
if (!p->Ns) {
|
||||
goto cleanup;
|
||||
}
|
||||
|
||||
/* reverse the order */
|
||||
for (i = 0; i < rank; i++) {
|
||||
size_t N = Ns[rank - i - 1];
|
||||
p->Ns[i] = N;
|
||||
vol *= N;
|
||||
}
|
||||
|
||||
p->buf = ffts_aligned_malloc(2 * vol * sizeof(float));
|
||||
if (!p->buf) {
|
||||
goto cleanup;
|
||||
}
|
||||
|
||||
p->plans = calloc(rank, sizeof(*p->plans));
|
||||
if (!p->plans) {
|
||||
goto cleanup;
|
||||
}
|
||||
|
||||
for (i = 0; i < rank; i++) {
|
||||
p->Ms[i] = vol / p->Ns[i];
|
||||
|
||||
for (j = 0; j < i; j++) {
|
||||
if (p->Ns[i] == p->Ns[j]) {
|
||||
p->plans[i] = p->plans[j];
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (!p->plans[i]) {
|
||||
p->plans[i] = ffts_init_1d(p->Ns[i], sign);
|
||||
if (!p->plans) {
|
||||
goto cleanup;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return p;
|
||||
|
||||
cleanup:
|
||||
ffts_free_nd(p);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
FFTS_API ffts_plan_t*
|
||||
ffts_init_2d(size_t N1, size_t N2, int sign)
|
||||
{
|
||||
size_t Ns[2];
|
||||
|
||||
ffts_plan_t *ffts_init_2d(size_t N1, size_t N2, int sign) {
|
||||
size_t Ns[2];
|
||||
Ns[0] = N1;
|
||||
Ns[1] = N2;
|
||||
return ffts_init_nd(2, Ns, sign);
|
||||
Ns[0] = N1; /* x */
|
||||
Ns[1] = N2; /* y */
|
||||
return ffts_init_nd(2, Ns, sign);
|
||||
}
|
||||
|
@ -1,58 +1,50 @@
|
||||
/*
|
||||
|
||||
This file is part of FFTS -- The Fastest Fourier Transform in the South
|
||||
|
||||
Copyright (c) 2012, Anthony M. Blake <amb@anthonix.com>
|
||||
Copyright (c) 2012, The University of Waikato
|
||||
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
* Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
* Neither the name of the organization nor the
|
||||
names of its contributors may be used to endorse or promote products
|
||||
derived from this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
||||
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
DISCLAIMED. IN NO EVENT SHALL ANTHONY M. BLAKE BE LIABLE FOR ANY
|
||||
DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
|
||||
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
||||
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
|
||||
ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
*/
|
||||
|
||||
#ifndef __FFTS_ND_H__
|
||||
#define __FFTS_ND_H__
|
||||
This file is part of FFTS -- The Fastest Fourier Transform in the South
|
||||
|
||||
Copyright (c) 2012, Anthony M. Blake <amb@anthonix.com>
|
||||
Copyright (c) 2012, The University of Waikato
|
||||
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
* Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
* Neither the name of the organization nor the
|
||||
names of its contributors may be used to endorse or promote products
|
||||
derived from this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
||||
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
DISCLAIMED. IN NO EVENT SHALL ANTHONY M. BLAKE BE LIABLE FOR ANY
|
||||
DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
|
||||
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
||||
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
|
||||
ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
#include <stdint.h>
|
||||
#include <stddef.h>
|
||||
#include <stdio.h>
|
||||
*/
|
||||
|
||||
#include "ffts.h"
|
||||
#ifndef FFTS_ND_H
|
||||
#define FFTS_ND_H
|
||||
|
||||
#ifdef HAVE_NEON
|
||||
#include <arm_neon.h>
|
||||
#endif
|
||||
#ifdef HAVE_SSE
|
||||
#include <xmmintrin.h>
|
||||
#if defined (_MSC_VER) && (_MSC_VER >= 1020)
|
||||
#pragma once
|
||||
#endif
|
||||
|
||||
void ffts_free_nd(ffts_plan_t *p);
|
||||
void ffts_transpose(uint64_t *in, uint64_t *out, int w, int h, uint64_t *buf);
|
||||
#include "ffts.h"
|
||||
#include <stddef.h>
|
||||
|
||||
void ffts_execute_nd(ffts_plan_t *p, const void * in, void * out);
|
||||
ffts_plan_t *ffts_init_nd(int rank, size_t *Ns, int sign);
|
||||
ffts_plan_t *ffts_init_2d(size_t N1, size_t N2, int sign);
|
||||
ffts_plan_t*
|
||||
ffts_init_nd(int rank, size_t *Ns, int sign);
|
||||
|
||||
#endif
|
||||
ffts_plan_t*
|
||||
ffts_init_2d(size_t N1, size_t N2, int sign);
|
||||
|
||||
#endif /* FFTS_ND_H */
|
||||
|
@ -1,226 +1,654 @@
|
||||
/*
|
||||
|
||||
This file is part of FFTS -- The Fastest Fourier Transform in the South
|
||||
|
||||
Copyright (c) 2012, Anthony M. Blake <amb@anthonix.com>
|
||||
Copyright (c) 2012, The University of Waikato
|
||||
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
* Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
* Neither the name of the organization nor the
|
||||
names of its contributors may be used to endorse or promote products
|
||||
derived from this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
||||
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
DISCLAIMED. IN NO EVENT SHALL ANTHONY M. BLAKE BE LIABLE FOR ANY
|
||||
DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
|
||||
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
||||
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
|
||||
ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
This file is part of FFTS -- The Fastest Fourier Transform in the South
|
||||
|
||||
Copyright (c) 2012, Anthony M. Blake <amb@anthonix.com>
|
||||
Copyright (c) 2012, The University of Waikato
|
||||
Copyright (c) 2015, Jukka Ojanen <jukka.ojanen@kolumbus.fi>
|
||||
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
* Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
* Neither the name of the organization nor the
|
||||
names of its contributors may be used to endorse or promote products
|
||||
derived from this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
||||
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
DISCLAIMED. IN NO EVENT SHALL ANTHONY M. BLAKE BE LIABLE FOR ANY
|
||||
DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
|
||||
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
||||
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
|
||||
ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
*/
|
||||
|
||||
#include "ffts_real.h"
|
||||
#include "ffts_internal.h"
|
||||
#include "ffts_trig.h"
|
||||
|
||||
#ifdef HAVE_NEON
|
||||
#include <arm_neon.h>
|
||||
#elif HAVE_SSE
|
||||
#include <xmmintrin.h>
|
||||
|
||||
/* check if have SSE3 intrinsics */
|
||||
#ifdef HAVE_PMMINTRIN_H
|
||||
#include <pmmintrin.h>
|
||||
#elif HAVE_INTRIN_H
|
||||
#include <intrin.h>
|
||||
#else
|
||||
/* avoid using negative zero as some configurations have problems with those */
|
||||
static const FFTS_ALIGN(16) unsigned int sign_mask_even[4] = {
|
||||
0x80000000, 0, 0x80000000, 0
|
||||
};
|
||||
static const FFTS_ALIGN(16) unsigned int sign_mask_odd[4] = {
|
||||
0, 0x80000000, 0, 0x80000000
|
||||
};
|
||||
#endif
|
||||
#endif
|
||||
|
||||
static void
|
||||
ffts_free_1d_real(ffts_plan_t *p)
|
||||
{
|
||||
if (p->B) {
|
||||
ffts_aligned_free(p->B);
|
||||
}
|
||||
|
||||
if (p->A) {
|
||||
ffts_aligned_free(p->A);
|
||||
}
|
||||
|
||||
void ffts_free_1d_real(ffts_plan_t *p) {
|
||||
ffts_free(p->plans[0]);
|
||||
free(p->A);
|
||||
free(p->B);
|
||||
free(p->plans);
|
||||
free(p->buf);
|
||||
free(p);
|
||||
if (p->buf) {
|
||||
ffts_aligned_free(p->buf);
|
||||
}
|
||||
|
||||
if (p->plans[0]) {
|
||||
ffts_free(p->plans[0]);
|
||||
}
|
||||
|
||||
free(p);
|
||||
}
|
||||
|
||||
void ffts_execute_1d_real(ffts_plan_t *p, const void *vin, void *vout) {
|
||||
float *out = (float *)vout;
|
||||
float *buf = (float *)p->buf;
|
||||
float *A = p->A;
|
||||
float *B = p->B;
|
||||
static void
|
||||
ffts_execute_1d_real(ffts_plan_t *p, const void *input, void *output)
|
||||
{
|
||||
float *const FFTS_RESTRICT out =
|
||||
(float *const FFTS_RESTRICT) FFTS_ASSUME_ALIGNED_16(output);
|
||||
float *const FFTS_RESTRICT buf =
|
||||
(float *const FFTS_RESTRICT) FFTS_ASSUME_ALIGNED_32(p->buf);
|
||||
const float *const FFTS_RESTRICT A =
|
||||
(const float *const FFTS_RESTRICT) FFTS_ASSUME_ALIGNED_32(p->A);
|
||||
const float *const FFTS_RESTRICT B =
|
||||
(const float *const FFTS_RESTRICT) FFTS_ASSUME_ALIGNED_32(p->B);
|
||||
const int N = (const int) p->N;
|
||||
int i;
|
||||
|
||||
#ifdef __ARM_NEON__
|
||||
float *p_buf0 = buf;
|
||||
float *p_buf1 = buf + N - 2;
|
||||
float *p_out = out;
|
||||
#endif
|
||||
|
||||
p->plans[0]->transform(p->plans[0], vin, buf);
|
||||
/* we know this */
|
||||
FFTS_ASSUME(N/2 > 0);
|
||||
|
||||
size_t N = p->N;
|
||||
buf[N] = buf[0];
|
||||
buf[N+1] = buf[1];
|
||||
p->plans[0]->transform(p->plans[0], input, buf);
|
||||
|
||||
float *p_buf0 = buf;
|
||||
float *p_buf1 = buf + N - 2;
|
||||
float *p_out = out;
|
||||
#ifndef HAVE_SSE
|
||||
buf[N + 0] = buf[0];
|
||||
buf[N + 1] = buf[1];
|
||||
#endif
|
||||
|
||||
size_t i;
|
||||
#ifdef __ARM_NEON__
|
||||
for(i=0;i<N/2;i+=2) {
|
||||
__asm__ __volatile__ ("vld1.32 {q8}, [%[pa], :128]!\n\t"
|
||||
"vld1.32 {q9}, [%[pb], :128]!\n\t"
|
||||
"vld1.32 {q10}, [%[buf0], :128]!\n\t"
|
||||
"vld1.32 {q11}, [%[buf1], :64]\n\t"
|
||||
"sub %[buf1], %[buf1], #16\n\t"
|
||||
|
||||
"vdup.32 d26, d16[1]\n\t"
|
||||
"vdup.32 d27, d17[1]\n\t"
|
||||
"vdup.32 d24, d16[0]\n\t"
|
||||
"vdup.32 d25, d17[0]\n\t"
|
||||
|
||||
"vdup.32 d30, d23[1]\n\t"
|
||||
"vdup.32 d31, d22[1]\n\t"
|
||||
"vdup.32 d28, d23[0]\n\t"
|
||||
"vdup.32 d29, d22[0]\n\t"
|
||||
|
||||
"vmul.f32 q13, q13, q10\n\t"
|
||||
"vmul.f32 q15, q15, q9\n\t"
|
||||
"vmul.f32 q12, q12, q10\n\t"
|
||||
"vmul.f32 q14, q14, q9\n\t"
|
||||
"vrev64.f32 q13, q13\n\t"
|
||||
"vrev64.f32 q15, q15\n\t"
|
||||
|
||||
"vtrn.32 d26, d27\n\t"
|
||||
"vtrn.32 d30, d31\n\t"
|
||||
"vneg.f32 d26, d26\n\t"
|
||||
"vneg.f32 d31, d31\n\t"
|
||||
"vtrn.32 d26, d27\n\t"
|
||||
"vtrn.32 d30, d31\n\t"
|
||||
|
||||
"vadd.f32 q12, q12, q14\n\t"
|
||||
"vadd.f32 q13, q13, q15\n\t"
|
||||
"vadd.f32 q12, q12, q13\n\t"
|
||||
"vst1.32 {q12}, [%[pout], :128]!\n\t"
|
||||
: [pa] "+r" (A), [pb] "+r" (B), [buf0] "+r" (p_buf0), [buf1] "+r" (p_buf1),
|
||||
[pout] "+r" (p_out)
|
||||
:
|
||||
: "memory", "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15"
|
||||
);
|
||||
for (i = 0; i < N; i += 4) {
|
||||
__asm__ __volatile__ (
|
||||
"vld1.32 {q8}, [%[pa]]!\n\t"
|
||||
"vld1.32 {q9}, [%[pb]]!\n\t"
|
||||
"vld1.32 {q10}, [%[buf0]]!\n\t"
|
||||
"vld1.32 {q11}, [%[buf1]]\n\t"
|
||||
"sub %[buf1], %[buf1], #16\n\t"
|
||||
|
||||
"vdup.32 d26, d16[1]\n\t"
|
||||
"vdup.32 d27, d17[1]\n\t"
|
||||
"vdup.32 d24, d16[0]\n\t"
|
||||
"vdup.32 d25, d17[0]\n\t"
|
||||
|
||||
"vdup.32 d30, d23[1]\n\t"
|
||||
"vdup.32 d31, d22[1]\n\t"
|
||||
"vdup.32 d28, d23[0]\n\t"
|
||||
"vdup.32 d29, d22[0]\n\t"
|
||||
|
||||
"vmul.f32 q13, q13, q10\n\t"
|
||||
"vmul.f32 q15, q15, q9\n\t"
|
||||
"vmul.f32 q12, q12, q10\n\t"
|
||||
"vmul.f32 q14, q14, q9\n\t"
|
||||
"vrev64.f32 q13, q13\n\t"
|
||||
"vrev64.f32 q15, q15\n\t"
|
||||
|
||||
"vtrn.32 d26, d27\n\t"
|
||||
"vtrn.32 d30, d31\n\t"
|
||||
"vneg.f32 d26, d26\n\t"
|
||||
"vneg.f32 d31, d31\n\t"
|
||||
"vtrn.32 d26, d27\n\t"
|
||||
"vtrn.32 d30, d31\n\t"
|
||||
|
||||
"vadd.f32 q12, q12, q14\n\t"
|
||||
"vadd.f32 q13, q13, q15\n\t"
|
||||
"vadd.f32 q12, q12, q13\n\t"
|
||||
"vst1.32 {q12}, [%[pout]]!\n\t"
|
||||
: [buf0] "+r" (p_buf0), [buf1] "+r" (p_buf1), [pout] "+r" (p_out)
|
||||
: [pa] "r" (A), [pb] "r" (B)
|
||||
: "memory", "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15"
|
||||
);
|
||||
}
|
||||
#elif HAVE_SSE3
|
||||
if (FFTS_UNLIKELY(N <= 8)) {
|
||||
__m128 t0 = _mm_load_ps(buf);
|
||||
__m128 t1 = _mm_load_ps(buf + N - 4);
|
||||
__m128 t2 = _mm_load_ps(A);
|
||||
__m128 t3 = _mm_load_ps(B);
|
||||
|
||||
_mm_store_ps(out, _mm_add_ps(_mm_addsub_ps(
|
||||
_mm_mul_ps(t0, _mm_moveldup_ps(t2)),
|
||||
_mm_mul_ps(_mm_shuffle_ps(t0, t0, _MM_SHUFFLE(2,3,0,1)),
|
||||
_mm_movehdup_ps(t2))), _mm_addsub_ps(
|
||||
_mm_mul_ps(_mm_shuffle_ps(t0, t1, _MM_SHUFFLE(3,3,1,1)),
|
||||
_mm_shuffle_ps(t3, t3, _MM_SHUFFLE(2,3,0,1))),
|
||||
_mm_mul_ps(_mm_shuffle_ps(t0, t1, _MM_SHUFFLE(2,2,0,0)), t3))));
|
||||
|
||||
if (N == 8) {
|
||||
t2 = _mm_load_ps(A + 4);
|
||||
t3 = _mm_load_ps(B + 4);
|
||||
|
||||
_mm_store_ps(out + 4, _mm_add_ps(_mm_addsub_ps(
|
||||
_mm_mul_ps(t1, _mm_moveldup_ps(t2)),
|
||||
_mm_mul_ps(_mm_shuffle_ps(t1, t1, _MM_SHUFFLE(2,3,0,1)),
|
||||
_mm_movehdup_ps(t2))), _mm_addsub_ps(
|
||||
_mm_mul_ps(_mm_shuffle_ps(t1, t0, _MM_SHUFFLE(3,3,1,1)),
|
||||
_mm_shuffle_ps(t3, t3, _MM_SHUFFLE(2,3,0,1))),
|
||||
_mm_mul_ps(_mm_shuffle_ps(t1, t0, _MM_SHUFFLE(2,2,0,0)), t3))));
|
||||
}
|
||||
} else {
|
||||
__m128 t0 = _mm_load_ps(buf);
|
||||
|
||||
for (i = 0; i < N; i += 16) {
|
||||
__m128 t1 = _mm_load_ps(buf + i);
|
||||
__m128 t2 = _mm_load_ps(buf + N - i - 4);
|
||||
__m128 t3 = _mm_load_ps(A + i);
|
||||
__m128 t4 = _mm_load_ps(B + i);
|
||||
|
||||
_mm_store_ps(out + i, _mm_add_ps(_mm_addsub_ps(
|
||||
_mm_mul_ps(t1, _mm_moveldup_ps(t3)),
|
||||
_mm_mul_ps(_mm_shuffle_ps(t1, t1, _MM_SHUFFLE(2,3,0,1)),
|
||||
_mm_movehdup_ps(t3))), _mm_addsub_ps(
|
||||
_mm_mul_ps(_mm_shuffle_ps(t0, t2, _MM_SHUFFLE(3,3,1,1)),
|
||||
_mm_shuffle_ps(t4, t4, _MM_SHUFFLE(2,3,0,1))),
|
||||
_mm_mul_ps(_mm_shuffle_ps(t0, t2, _MM_SHUFFLE(2,2,0,0)), t4))));
|
||||
|
||||
t0 = _mm_load_ps(buf + N - i - 8);
|
||||
t1 = _mm_load_ps(buf + i + 4);
|
||||
t3 = _mm_load_ps(A + i + 4);
|
||||
t4 = _mm_load_ps(B + i + 4);
|
||||
|
||||
_mm_store_ps(out + i + 4, _mm_add_ps(_mm_addsub_ps(
|
||||
_mm_mul_ps(t1, _mm_moveldup_ps(t3)),
|
||||
_mm_mul_ps(_mm_shuffle_ps(t1, t1, _MM_SHUFFLE(2,3,0,1)),
|
||||
_mm_movehdup_ps(t3))), _mm_addsub_ps(
|
||||
_mm_mul_ps(_mm_shuffle_ps(t2, t0, _MM_SHUFFLE(3,3,1,1)),
|
||||
_mm_shuffle_ps(t4, t4, _MM_SHUFFLE(2,3,0,1))),
|
||||
_mm_mul_ps(_mm_shuffle_ps(t2, t0, _MM_SHUFFLE(2,2,0,0)), t4))));
|
||||
|
||||
t1 = _mm_load_ps(buf + i + 8);
|
||||
t2 = _mm_load_ps(buf + N - i - 12);
|
||||
t3 = _mm_load_ps(A + i + 8);
|
||||
t4 = _mm_load_ps(B + i + 8);
|
||||
|
||||
_mm_store_ps(out + i + 8, _mm_add_ps(_mm_addsub_ps(
|
||||
_mm_mul_ps(t1, _mm_moveldup_ps(t3)),
|
||||
_mm_mul_ps(_mm_shuffle_ps(t1, t1, _MM_SHUFFLE(2,3,0,1)),
|
||||
_mm_movehdup_ps(t3))), _mm_addsub_ps(
|
||||
_mm_mul_ps(_mm_shuffle_ps(t0, t2, _MM_SHUFFLE(3,3,1,1)),
|
||||
_mm_shuffle_ps(t4, t4, _MM_SHUFFLE(2,3,0,1))),
|
||||
_mm_mul_ps(_mm_shuffle_ps(t0, t2, _MM_SHUFFLE(2,2,0,0)), t4))));
|
||||
|
||||
t0 = _mm_load_ps(buf + N - i - 16);
|
||||
t1 = _mm_load_ps(buf + i + 12);
|
||||
t3 = _mm_load_ps(A + i + 12);
|
||||
t4 = _mm_load_ps(B + i + 12);
|
||||
|
||||
_mm_store_ps(out + i + 12, _mm_add_ps(_mm_addsub_ps(
|
||||
_mm_mul_ps(t1, _mm_moveldup_ps(t3)),
|
||||
_mm_mul_ps(_mm_shuffle_ps(t1, t1, _MM_SHUFFLE(2,3,0,1)),
|
||||
_mm_movehdup_ps(t3))), _mm_addsub_ps(
|
||||
_mm_mul_ps(_mm_shuffle_ps(t2, t0, _MM_SHUFFLE(3,3,1,1)),
|
||||
_mm_shuffle_ps(t4, t4, _MM_SHUFFLE(2,3,0,1))),
|
||||
_mm_mul_ps(_mm_shuffle_ps(t2, t0, _MM_SHUFFLE(2,2,0,0)), t4))));
|
||||
}
|
||||
}
|
||||
#elif HAVE_SSE
|
||||
if (FFTS_UNLIKELY(N <= 8)) {
|
||||
__m128 c0 = _mm_load_ps((const float*) sign_mask_even);
|
||||
__m128 t0 = _mm_load_ps(buf);
|
||||
__m128 t1 = _mm_load_ps(buf + N - 4);
|
||||
__m128 t2 = _mm_load_ps(A);
|
||||
__m128 t3 = _mm_load_ps(B);
|
||||
|
||||
_mm_store_ps(out, _mm_add_ps(_mm_add_ps(_mm_add_ps(
|
||||
_mm_mul_ps(t0, _mm_shuffle_ps(t2, t2, _MM_SHUFFLE(2,2,0,0))),
|
||||
_mm_mul_ps(_mm_shuffle_ps(t0, t0, _MM_SHUFFLE(2,3,0,1)),
|
||||
_mm_xor_ps(_mm_shuffle_ps(t2, t2, _MM_SHUFFLE(3,3,1,1)), c0))),
|
||||
_mm_mul_ps(_mm_shuffle_ps(t0, t1, _MM_SHUFFLE(2,2,0,0)), t3)),
|
||||
_mm_mul_ps(_mm_shuffle_ps(t0, t1, _MM_SHUFFLE(3,3,1,1)),
|
||||
_mm_shuffle_ps(_mm_xor_ps(t3, c0), _mm_xor_ps(t3, c0),
|
||||
_MM_SHUFFLE(2,3,0,1)))));
|
||||
|
||||
if (N == 8) {
|
||||
t2 = _mm_load_ps(A + 4);
|
||||
t3 = _mm_load_ps(B + 4);
|
||||
|
||||
_mm_store_ps(out + 4, _mm_add_ps(_mm_add_ps(_mm_add_ps(
|
||||
_mm_mul_ps(t1, _mm_shuffle_ps(t2, t2, _MM_SHUFFLE(2,2,0,0))),
|
||||
_mm_mul_ps(_mm_shuffle_ps(t1, t1, _MM_SHUFFLE(2,3,0,1)),
|
||||
_mm_xor_ps(_mm_shuffle_ps(t2, t2, _MM_SHUFFLE(3,3,1,1)), c0))),
|
||||
_mm_mul_ps(_mm_shuffle_ps(t1, t0, _MM_SHUFFLE(2,2,0,0)), t3)),
|
||||
_mm_mul_ps(_mm_shuffle_ps(t1, t0, _MM_SHUFFLE(3,3,1,1)),
|
||||
_mm_shuffle_ps(_mm_xor_ps(t3, c0), _mm_xor_ps(t3, c0),
|
||||
_MM_SHUFFLE(2,3,0,1)))));
|
||||
}
|
||||
} else {
|
||||
__m128 c0 = _mm_load_ps((const float*) sign_mask_even);
|
||||
__m128 t0 = _mm_load_ps(buf);
|
||||
|
||||
for (i = 0; i < N; i += 16) {
|
||||
__m128 t1 = _mm_load_ps(buf + i);
|
||||
__m128 t2 = _mm_load_ps(buf + N - i - 4);
|
||||
__m128 t3 = _mm_load_ps(A + i);
|
||||
__m128 t4 = _mm_load_ps(B + i);
|
||||
|
||||
_mm_store_ps(out + i, _mm_add_ps(_mm_add_ps(_mm_add_ps(
|
||||
_mm_mul_ps(t1, _mm_shuffle_ps(t3, t3, _MM_SHUFFLE(2,2,0,0))),
|
||||
_mm_mul_ps(_mm_shuffle_ps(t1, t1, _MM_SHUFFLE(2,3,0,1)),
|
||||
_mm_xor_ps(_mm_shuffle_ps(t3, t3, _MM_SHUFFLE(3,3,1,1)), c0))),
|
||||
_mm_mul_ps(_mm_shuffle_ps(t0, t2, _MM_SHUFFLE(2,2,0,0)), t4)),
|
||||
_mm_mul_ps(_mm_shuffle_ps(t0, t2, _MM_SHUFFLE(3,3,1,1)),
|
||||
_mm_shuffle_ps(_mm_xor_ps(t4, c0), _mm_xor_ps(t4, c0),
|
||||
_MM_SHUFFLE(2,3,0,1)))));
|
||||
|
||||
t0 = _mm_load_ps(buf + N - i - 8);
|
||||
t1 = _mm_load_ps(buf + i + 4);
|
||||
t3 = _mm_load_ps(A + i + 4);
|
||||
t4 = _mm_load_ps(B + i + 4);
|
||||
|
||||
_mm_store_ps(out + i + 4, _mm_add_ps(_mm_add_ps(_mm_add_ps(
|
||||
_mm_mul_ps(t1, _mm_shuffle_ps(t3, t3, _MM_SHUFFLE(2,2,0,0))),
|
||||
_mm_mul_ps(_mm_shuffle_ps(t1, t1, _MM_SHUFFLE(2,3,0,1)),
|
||||
_mm_xor_ps(_mm_shuffle_ps(t3, t3, _MM_SHUFFLE(3,3,1,1)), c0))),
|
||||
_mm_mul_ps(_mm_shuffle_ps(t2, t0, _MM_SHUFFLE(2,2,0,0)), t4)),
|
||||
_mm_mul_ps(_mm_shuffle_ps(t2, t0, _MM_SHUFFLE(3,3,1,1)),
|
||||
_mm_shuffle_ps(_mm_xor_ps(t4, c0), _mm_xor_ps(t4, c0),
|
||||
_MM_SHUFFLE(2,3,0,1)))));
|
||||
|
||||
t1 = _mm_load_ps(buf + i + 8);
|
||||
t2 = _mm_load_ps(buf + N - i - 12);
|
||||
t3 = _mm_load_ps(A + i + 8);
|
||||
t4 = _mm_load_ps(B + i + 8);
|
||||
|
||||
_mm_store_ps(out + i + 8, _mm_add_ps(_mm_add_ps(_mm_add_ps(
|
||||
_mm_mul_ps(t1, _mm_shuffle_ps(t3, t3, _MM_SHUFFLE(2,2,0,0))),
|
||||
_mm_mul_ps(_mm_shuffle_ps(t1, t1, _MM_SHUFFLE(2,3,0,1)),
|
||||
_mm_xor_ps(_mm_shuffle_ps(t3, t3, _MM_SHUFFLE(3,3,1,1)), c0))),
|
||||
_mm_mul_ps(_mm_shuffle_ps(t0, t2, _MM_SHUFFLE(2,2,0,0)), t4)),
|
||||
_mm_mul_ps(_mm_shuffle_ps(t0, t2, _MM_SHUFFLE(3,3,1,1)),
|
||||
_mm_shuffle_ps(_mm_xor_ps(t4, c0), _mm_xor_ps(t4, c0),
|
||||
_MM_SHUFFLE(2,3,0,1)))));
|
||||
|
||||
t0 = _mm_load_ps(buf + N - i - 16);
|
||||
t1 = _mm_load_ps(buf + i + 12);
|
||||
t3 = _mm_load_ps(A + i + 12);
|
||||
t4 = _mm_load_ps(B + i + 12);
|
||||
|
||||
_mm_store_ps(out + i + 12, _mm_add_ps(_mm_add_ps(_mm_add_ps(
|
||||
_mm_mul_ps(t1, _mm_shuffle_ps(t3, t3, _MM_SHUFFLE(2,2,0,0))),
|
||||
_mm_mul_ps(_mm_shuffle_ps(t1, t1, _MM_SHUFFLE(2,3,0,1)),
|
||||
_mm_xor_ps(_mm_shuffle_ps(t3, t3, _MM_SHUFFLE(3,3,1,1)), c0))),
|
||||
_mm_mul_ps(_mm_shuffle_ps(t2, t0, _MM_SHUFFLE(2,2,0,0)), t4)),
|
||||
_mm_mul_ps(_mm_shuffle_ps(t2, t0, _MM_SHUFFLE(3,3,1,1)),
|
||||
_mm_shuffle_ps(_mm_xor_ps(t4, c0), _mm_xor_ps(t4, c0),
|
||||
_MM_SHUFFLE(2,3,0,1)))));
|
||||
}
|
||||
}
|
||||
#else
|
||||
for(i=0;i<N/2;i++) {
|
||||
out[2*i] = buf[2*i]*A[2*i] - buf[2*i+1]*A[2*i+1] + buf[N-2*i]*B[2*i] + buf[N-2*i+1]*B[2*i+1];
|
||||
out[2*i+1] = buf[2*i+1]*A[2*i] + buf[2*i]*A[2*i+1] + buf[N-2*i]*B[2*i+1] - buf[N-2*i+1]*B[2*i];
|
||||
|
||||
// out[2*N-2*i] = out[2*i];
|
||||
// out[2*N-2*i+1] = -out[2*i+1];
|
||||
|
||||
#endif
|
||||
}
|
||||
|
||||
out[N] = buf[0] - buf[1];
|
||||
out[N+1] = 0.0f;
|
||||
|
||||
for (i = 0; i < N/2; i++) {
|
||||
out[2*i + 0] =
|
||||
buf[ 2*i + 0] * A[2*i + 0] - buf[ 2*i + 1] * A[2*i + 1] +
|
||||
buf[N - 2*i + 0] * B[2*i + 0] + buf[N - 2*i + 1] * B[2*i + 1];
|
||||
out[2*i + 1] =
|
||||
buf[ 2*i + 1] * A[2*i + 0] + buf[ 2*i + 0] * A[2*i + 1] +
|
||||
buf[N - 2*i + 0] * B[2*i + 1] - buf[N - 2*i + 1] * B[2*i + 0];
|
||||
}
|
||||
#endif
|
||||
|
||||
out[N + 0] = buf[0] - buf[1];
|
||||
out[N + 1] = 0.0f;
|
||||
}
|
||||
|
||||
void ffts_execute_1d_real_inv(ffts_plan_t *p, const void *vin, void *vout) {
|
||||
float *out = (float *)vout;
|
||||
float *in = (float *)vin;
|
||||
float *buf = (float *)p->buf;
|
||||
float *A = p->A;
|
||||
float *B = p->B;
|
||||
size_t N = p->N;
|
||||
|
||||
float *p_buf0 = in;
|
||||
float *p_buf1 = in + N - 2;
|
||||
|
||||
float *p_out = buf;
|
||||
|
||||
size_t i;
|
||||
static void
|
||||
ffts_execute_1d_real_inv(ffts_plan_t *p, const void *input, void *output)
|
||||
{
|
||||
float *const FFTS_RESTRICT in =
|
||||
(float *const FFTS_RESTRICT) FFTS_ASSUME_ALIGNED_16(input);
|
||||
float *const FFTS_RESTRICT buf =
|
||||
(float *const FFTS_RESTRICT) FFTS_ASSUME_ALIGNED_32(p->buf);
|
||||
const float *const FFTS_RESTRICT A =
|
||||
(const float *const FFTS_RESTRICT) FFTS_ASSUME_ALIGNED_32(p->A);
|
||||
const float *const FFTS_RESTRICT B =
|
||||
(const float *const FFTS_RESTRICT) FFTS_ASSUME_ALIGNED_32(p->B);
|
||||
const int N = (const int) p->N;
|
||||
int i;
|
||||
|
||||
#ifdef __ARM_NEON__
|
||||
float *p_buf0 = in;
|
||||
float *p_buf1 = in + N - 2;
|
||||
float *p_out = buf;
|
||||
#endif
|
||||
|
||||
/* we know this */
|
||||
FFTS_ASSUME(N/2 > 0);
|
||||
|
||||
#ifdef __ARM_NEON__
|
||||
for(i=0;i<N/2;i+=2) {
|
||||
__asm__ __volatile__ ("vld1.32 {q8}, [%[pa], :128]!\n\t"
|
||||
"vld1.32 {q9}, [%[pb], :128]!\n\t"
|
||||
"vld1.32 {q10}, [%[buf0], :128]!\n\t"
|
||||
"vld1.32 {q11}, [%[buf1], :64]\n\t"
|
||||
"sub %[buf1], %[buf1], #16\n\t"
|
||||
|
||||
"vdup.32 d26, d16[1]\n\t"
|
||||
"vdup.32 d27, d17[1]\n\t"
|
||||
"vdup.32 d24, d16[0]\n\t"
|
||||
"vdup.32 d25, d17[0]\n\t"
|
||||
|
||||
"vdup.32 d30, d23[1]\n\t"
|
||||
"vdup.32 d31, d22[1]\n\t"
|
||||
"vdup.32 d28, d23[0]\n\t"
|
||||
"vdup.32 d29, d22[0]\n\t"
|
||||
|
||||
"vmul.f32 q13, q13, q10\n\t"
|
||||
"vmul.f32 q15, q15, q9\n\t"
|
||||
"vmul.f32 q12, q12, q10\n\t"
|
||||
"vmul.f32 q14, q14, q9\n\t"
|
||||
"vrev64.f32 q13, q13\n\t"
|
||||
"vrev64.f32 q15, q15\n\t"
|
||||
|
||||
"vtrn.32 d26, d27\n\t"
|
||||
"vtrn.32 d28, d29\n\t"
|
||||
"vneg.f32 d27, d27\n\t"
|
||||
"vneg.f32 d29, d29\n\t"
|
||||
"vtrn.32 d26, d27\n\t"
|
||||
"vtrn.32 d28, d29\n\t"
|
||||
|
||||
"vadd.f32 q12, q12, q14\n\t"
|
||||
"vsub.f32 q13, q13, q15\n\t"
|
||||
"vadd.f32 q12, q12, q13\n\t"
|
||||
"vst1.32 {q12}, [%[pout], :128]!\n\t"
|
||||
: [pa] "+r" (A), [pb] "+r" (B), [buf0] "+r" (p_buf0), [buf1] "+r" (p_buf1),
|
||||
[pout] "+r" (p_out)
|
||||
:
|
||||
: "memory", "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15"
|
||||
);
|
||||
for (i = 0; i < N/2; i += 2) {
|
||||
__asm__ __volatile__ (
|
||||
"vld1.32 {q8}, [%[pa]]!\n\t"
|
||||
"vld1.32 {q9}, [%[pb]]!\n\t"
|
||||
"vld1.32 {q10}, [%[buf0]]!\n\t"
|
||||
"vld1.32 {q11}, [%[buf1]]\n\t"
|
||||
"sub %[buf1], %[buf1], #16\n\t"
|
||||
|
||||
"vdup.32 d26, d16[1]\n\t"
|
||||
"vdup.32 d27, d17[1]\n\t"
|
||||
"vdup.32 d24, d16[0]\n\t"
|
||||
"vdup.32 d25, d17[0]\n\t"
|
||||
|
||||
"vdup.32 d30, d23[1]\n\t"
|
||||
"vdup.32 d31, d22[1]\n\t"
|
||||
"vdup.32 d28, d23[0]\n\t"
|
||||
"vdup.32 d29, d22[0]\n\t"
|
||||
|
||||
"vmul.f32 q13, q13, q10\n\t"
|
||||
"vmul.f32 q15, q15, q9\n\t"
|
||||
"vmul.f32 q12, q12, q10\n\t"
|
||||
"vmul.f32 q14, q14, q9\n\t"
|
||||
"vrev64.f32 q13, q13\n\t"
|
||||
"vrev64.f32 q15, q15\n\t"
|
||||
|
||||
"vtrn.32 d26, d27\n\t"
|
||||
"vtrn.32 d28, d29\n\t"
|
||||
"vneg.f32 d27, d27\n\t"
|
||||
"vneg.f32 d29, d29\n\t"
|
||||
"vtrn.32 d26, d27\n\t"
|
||||
"vtrn.32 d28, d29\n\t"
|
||||
|
||||
"vadd.f32 q12, q12, q14\n\t"
|
||||
"vsub.f32 q13, q13, q15\n\t"
|
||||
"vadd.f32 q12, q12, q13\n\t"
|
||||
"vst1.32 {q12}, [%[pout]]!\n\t"
|
||||
: [buf0] "+r" (p_buf0), [buf1] "+r" (p_buf1), [pout] "+r" (p_out)
|
||||
: [pa] "r" (A), [pb] "r" (B)
|
||||
: "memory", "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15"
|
||||
);
|
||||
}
|
||||
#elif HAVE_SSE3
|
||||
if (FFTS_UNLIKELY(N <= 8)) {
|
||||
__m128 t0 = _mm_loadl_pi(_mm_setzero_ps(), (const __m64*) &in[N]);
|
||||
__m128 t1 = _mm_load_ps(in);
|
||||
__m128 t2 = _mm_load_ps(in + N - 4);
|
||||
__m128 t3 = _mm_load_ps(A);
|
||||
__m128 t4 = _mm_load_ps(B);
|
||||
|
||||
_mm_store_ps(buf, _mm_sub_ps(_mm_addsub_ps(
|
||||
_mm_mul_ps(t1, _mm_moveldup_ps(t3)),
|
||||
_mm_mul_ps(_mm_shuffle_ps(t1, t1, _MM_SHUFFLE(2,3,0,1)),
|
||||
_mm_movehdup_ps(t3))), _mm_addsub_ps(
|
||||
_mm_mul_ps(_mm_shuffle_ps(t0, t2, _MM_SHUFFLE(3,3,1,1)),
|
||||
_mm_shuffle_ps(t4, t4, _MM_SHUFFLE(2,3,0,1))),
|
||||
_mm_mul_ps(_mm_shuffle_ps(t0, t2, _MM_SHUFFLE(2,2,0,0)), t4))));
|
||||
|
||||
if (N == 8) {
|
||||
t3 = _mm_load_ps(A + 4);
|
||||
t4 = _mm_load_ps(B + 4);
|
||||
|
||||
_mm_store_ps(buf + 4, _mm_sub_ps(_mm_addsub_ps(
|
||||
_mm_mul_ps(t2, _mm_moveldup_ps(t3)),
|
||||
_mm_mul_ps(_mm_shuffle_ps(t2, t2, _MM_SHUFFLE(2,3,0,1)),
|
||||
_mm_movehdup_ps(t3))), _mm_addsub_ps(
|
||||
_mm_mul_ps(_mm_shuffle_ps(t2, t1, _MM_SHUFFLE(3,3,1,1)),
|
||||
_mm_shuffle_ps(t4, t4, _MM_SHUFFLE(2,3,0,1))),
|
||||
_mm_mul_ps(_mm_shuffle_ps(t2, t1, _MM_SHUFFLE(2,2,0,0)), t4))));
|
||||
}
|
||||
} else {
|
||||
__m128 t0 = _mm_loadl_pi(_mm_setzero_ps(), (const __m64*) &in[N]);
|
||||
|
||||
for (i = 0; i < N; i += 16) {
|
||||
__m128 t1 = _mm_load_ps(in + i);
|
||||
__m128 t2 = _mm_load_ps(in + N - i - 4);
|
||||
__m128 t3 = _mm_load_ps(A + i);
|
||||
__m128 t4 = _mm_load_ps(B + i);
|
||||
|
||||
_mm_store_ps(buf + i, _mm_sub_ps(_mm_addsub_ps(
|
||||
_mm_mul_ps(t1, _mm_moveldup_ps(t3)),
|
||||
_mm_mul_ps(_mm_shuffle_ps(t1, t1, _MM_SHUFFLE(2,3,0,1)),
|
||||
_mm_movehdup_ps(t3))), _mm_addsub_ps(
|
||||
_mm_mul_ps(_mm_shuffle_ps(t0, t2, _MM_SHUFFLE(3,3,1,1)),
|
||||
_mm_shuffle_ps(t4, t4, _MM_SHUFFLE(2,3,0,1))),
|
||||
_mm_mul_ps(_mm_shuffle_ps(t0, t2, _MM_SHUFFLE(2,2,0,0)), t4))));
|
||||
|
||||
t0 = _mm_load_ps(in + N - i - 8);
|
||||
t1 = _mm_load_ps(in + i + 4);
|
||||
t3 = _mm_load_ps(A + i + 4);
|
||||
t4 = _mm_load_ps(B + i + 4);
|
||||
|
||||
_mm_store_ps(buf + i + 4, _mm_sub_ps(_mm_addsub_ps(
|
||||
_mm_mul_ps(t1, _mm_moveldup_ps(t3)),
|
||||
_mm_mul_ps(_mm_shuffle_ps(t1, t1, _MM_SHUFFLE(2,3,0,1)),
|
||||
_mm_movehdup_ps(t3))), _mm_addsub_ps(
|
||||
_mm_mul_ps(_mm_shuffle_ps(t2, t0, _MM_SHUFFLE(3,3,1,1)),
|
||||
_mm_shuffle_ps(t4, t4, _MM_SHUFFLE(2,3,0,1))),
|
||||
_mm_mul_ps(_mm_shuffle_ps(t2, t0, _MM_SHUFFLE(2,2,0,0)), t4))));
|
||||
|
||||
t1 = _mm_load_ps(in + i + 8);
|
||||
t2 = _mm_load_ps(in + N - i - 12);
|
||||
t3 = _mm_load_ps(A + i + 8);
|
||||
t4 = _mm_load_ps(B + i + 8);
|
||||
|
||||
_mm_store_ps(buf + i + 8, _mm_sub_ps(_mm_addsub_ps(
|
||||
_mm_mul_ps(t1, _mm_moveldup_ps(t3)),
|
||||
_mm_mul_ps(_mm_shuffle_ps(t1, t1, _MM_SHUFFLE(2,3,0,1)),
|
||||
_mm_movehdup_ps(t3))), _mm_addsub_ps(
|
||||
_mm_mul_ps(_mm_shuffle_ps(t0, t2, _MM_SHUFFLE(3,3,1,1)),
|
||||
_mm_shuffle_ps(t4, t4, _MM_SHUFFLE(2,3,0,1))),
|
||||
_mm_mul_ps(_mm_shuffle_ps(t0, t2, _MM_SHUFFLE(2,2,0,0)), t4))));
|
||||
|
||||
t0 = _mm_load_ps(in + N - i - 16);
|
||||
t1 = _mm_load_ps(in + i + 12);
|
||||
t3 = _mm_load_ps(A + i + 12);
|
||||
t4 = _mm_load_ps(B + i + 12);
|
||||
|
||||
_mm_store_ps(buf + i + 12, _mm_sub_ps(_mm_addsub_ps(
|
||||
_mm_mul_ps(t1, _mm_moveldup_ps(t3)),
|
||||
_mm_mul_ps(_mm_shuffle_ps(t1, t1, _MM_SHUFFLE(2,3,0,1)),
|
||||
_mm_movehdup_ps(t3))), _mm_addsub_ps(
|
||||
_mm_mul_ps(_mm_shuffle_ps(t2, t0, _MM_SHUFFLE(3,3,1,1)),
|
||||
_mm_shuffle_ps(t4, t4, _MM_SHUFFLE(2,3,0,1))),
|
||||
_mm_mul_ps(_mm_shuffle_ps(t2, t0, _MM_SHUFFLE(2,2,0,0)), t4))));
|
||||
}
|
||||
}
|
||||
#elif HAVE_SSE
|
||||
if (FFTS_UNLIKELY(N <= 8)) {
|
||||
__m128 c0 = _mm_load_ps((const float*) sign_mask_odd);
|
||||
__m128 t0 = _mm_loadl_pi(_mm_setzero_ps(), (const __m64*) &in[N]);
|
||||
__m128 t1 = _mm_load_ps(in);
|
||||
__m128 t2 = _mm_load_ps(in + N - 4);
|
||||
__m128 t3 = _mm_load_ps(A);
|
||||
__m128 t4 = _mm_load_ps(B);
|
||||
|
||||
_mm_store_ps(buf, _mm_add_ps(_mm_sub_ps(_mm_add_ps(
|
||||
_mm_mul_ps(t1, _mm_shuffle_ps(t3, t3, _MM_SHUFFLE(2,2,0,0))),
|
||||
_mm_mul_ps(_mm_shuffle_ps(t1, t1, _MM_SHUFFLE(2,3,0,1)),
|
||||
_mm_xor_ps(_mm_shuffle_ps(t3, t3, _MM_SHUFFLE(3,3,1,1)), c0))),
|
||||
_mm_mul_ps(_mm_shuffle_ps(t0, t2, _MM_SHUFFLE(3,3,1,1)),
|
||||
_mm_shuffle_ps(t4, t4, _MM_SHUFFLE(2,3,0,1)))),
|
||||
_mm_mul_ps(_mm_shuffle_ps(t0, t2, _MM_SHUFFLE(2,2,0,0)),
|
||||
_mm_xor_ps(t4, c0))));
|
||||
|
||||
if (N == 8) {
|
||||
t3 = _mm_load_ps(A + 4);
|
||||
t4 = _mm_load_ps(B + 4);
|
||||
|
||||
_mm_store_ps(buf + 4, _mm_add_ps(_mm_sub_ps(_mm_add_ps(
|
||||
_mm_mul_ps(t2, _mm_shuffle_ps(t3, t3, _MM_SHUFFLE(2,2,0,0))),
|
||||
_mm_mul_ps(_mm_shuffle_ps(t2, t2, _MM_SHUFFLE(2,3,0,1)),
|
||||
_mm_xor_ps(_mm_shuffle_ps(t3, t3, _MM_SHUFFLE(3,3,1,1)), c0))),
|
||||
_mm_mul_ps(_mm_shuffle_ps(t2, t1, _MM_SHUFFLE(3,3,1,1)),
|
||||
_mm_shuffle_ps(t4, t4, _MM_SHUFFLE(2,3,0,1)))),
|
||||
_mm_mul_ps(_mm_shuffle_ps(t2, t1, _MM_SHUFFLE(2,2,0,0)),
|
||||
_mm_xor_ps(t4, c0))));
|
||||
}
|
||||
} else {
|
||||
__m128 c0 = _mm_load_ps((const float*) sign_mask_odd);
|
||||
__m128 t0 = _mm_loadl_pi(_mm_setzero_ps(), (const __m64*) &in[N]);
|
||||
|
||||
for (i = 0; i < N; i += 16) {
|
||||
__m128 t1 = _mm_load_ps(in + i);
|
||||
__m128 t2 = _mm_load_ps(in + N - i - 4);
|
||||
__m128 t3 = _mm_load_ps(A + i);
|
||||
__m128 t4 = _mm_load_ps(B + i);
|
||||
|
||||
_mm_store_ps(buf + i, _mm_add_ps(_mm_sub_ps(_mm_add_ps(
|
||||
_mm_mul_ps(t1, _mm_shuffle_ps(t3, t3, _MM_SHUFFLE(2,2,0,0))),
|
||||
_mm_mul_ps(_mm_shuffle_ps(t1, t1, _MM_SHUFFLE(2,3,0,1)),
|
||||
_mm_xor_ps(_mm_shuffle_ps(t3, t3, _MM_SHUFFLE(3,3,1,1)), c0))),
|
||||
_mm_mul_ps(_mm_shuffle_ps(t0, t2, _MM_SHUFFLE(3,3,1,1)),
|
||||
_mm_shuffle_ps(t4, t4, _MM_SHUFFLE(2,3,0,1)))),
|
||||
_mm_mul_ps(_mm_shuffle_ps(t0, t2, _MM_SHUFFLE(2,2,0,0)),
|
||||
_mm_xor_ps(t4, c0))));
|
||||
|
||||
t0 = _mm_load_ps(in + N - i - 8);
|
||||
t1 = _mm_load_ps(in + i + 4);
|
||||
t3 = _mm_load_ps(A + i + 4);
|
||||
t4 = _mm_load_ps(B + i + 4);
|
||||
|
||||
_mm_store_ps(buf + i + 4, _mm_add_ps(_mm_sub_ps(_mm_add_ps(
|
||||
_mm_mul_ps(t1, _mm_shuffle_ps(t3, t3, _MM_SHUFFLE(2,2,0,0))),
|
||||
_mm_mul_ps(_mm_shuffle_ps(t1, t1, _MM_SHUFFLE(2,3,0,1)),
|
||||
_mm_xor_ps(_mm_shuffle_ps(t3, t3, _MM_SHUFFLE(3,3,1,1)), c0))),
|
||||
_mm_mul_ps(_mm_shuffle_ps(t2, t0, _MM_SHUFFLE(3,3,1,1)),
|
||||
_mm_shuffle_ps(t4, t4, _MM_SHUFFLE(2,3,0,1)))),
|
||||
_mm_mul_ps(_mm_shuffle_ps(t2, t0, _MM_SHUFFLE(2,2,0,0)),
|
||||
_mm_xor_ps(t4, c0))));
|
||||
|
||||
t1 = _mm_load_ps(in + i + 8);
|
||||
t2 = _mm_load_ps(in + N - i - 12);
|
||||
t3 = _mm_load_ps(A + i + 8);
|
||||
t4 = _mm_load_ps(B + i + 8);
|
||||
|
||||
_mm_store_ps(buf + i + 8, _mm_add_ps(_mm_sub_ps(_mm_add_ps(
|
||||
_mm_mul_ps(t1, _mm_shuffle_ps(t3, t3, _MM_SHUFFLE(2,2,0,0))),
|
||||
_mm_mul_ps(_mm_shuffle_ps(t1, t1, _MM_SHUFFLE(2,3,0,1)),
|
||||
_mm_xor_ps(_mm_shuffle_ps(t3, t3, _MM_SHUFFLE(3,3,1,1)), c0))),
|
||||
_mm_mul_ps(_mm_shuffle_ps(t0, t2, _MM_SHUFFLE(3,3,1,1)),
|
||||
_mm_shuffle_ps(t4, t4, _MM_SHUFFLE(2,3,0,1)))),
|
||||
_mm_mul_ps(_mm_shuffle_ps(t0, t2, _MM_SHUFFLE(2,2,0,0)),
|
||||
_mm_xor_ps(t4, c0))));
|
||||
|
||||
t0 = _mm_load_ps(in + N - i - 16);
|
||||
t1 = _mm_load_ps(in + i + 12);
|
||||
t3 = _mm_load_ps(A + i + 12);
|
||||
t4 = _mm_load_ps(B + i + 12);
|
||||
|
||||
_mm_store_ps(buf + i + 12, _mm_add_ps(_mm_sub_ps(_mm_add_ps(
|
||||
_mm_mul_ps(t1, _mm_shuffle_ps(t3, t3, _MM_SHUFFLE(2,2,0,0))),
|
||||
_mm_mul_ps(_mm_shuffle_ps(t1, t1, _MM_SHUFFLE(2,3,0,1)),
|
||||
_mm_xor_ps(_mm_shuffle_ps(t3, t3, _MM_SHUFFLE(3,3,1,1)), c0))),
|
||||
_mm_mul_ps(_mm_shuffle_ps(t2, t0, _MM_SHUFFLE(3,3,1,1)),
|
||||
_mm_shuffle_ps(t4, t4, _MM_SHUFFLE(2,3,0,1)))),
|
||||
_mm_mul_ps(_mm_shuffle_ps(t2, t0, _MM_SHUFFLE(2,2,0,0)),
|
||||
_mm_xor_ps(t4, c0))));
|
||||
}
|
||||
}
|
||||
#else
|
||||
for(i=0;i<N/2;i++) {
|
||||
buf[2*i] = in[2*i]*A[2*i] + in[2*i+1]*A[2*i+1] + in[N-2*i]*B[2*i] - in[N-2*i+1]*B[2*i+1];
|
||||
buf[2*i+1] = in[2*i+1]*A[2*i] - in[2*i]*A[2*i+1] - in[N-2*i]*B[2*i+1] - in[N-2*i+1]*B[2*i];
|
||||
for (i = 0; i < N/2; i++) {
|
||||
buf[2*i + 0] =
|
||||
in[ 2*i + 0] * A[2*i + 0] + in[ 2*i + 1] * A[2*i + 1] +
|
||||
in[N - 2*i + 0] * B[2*i + 0] - in[N - 2*i + 1] * B[2*i + 1];
|
||||
buf[2*i + 1] =
|
||||
in[ 2*i + 1] * A[2*i + 0] - in[ 2*i + 0] * A[2*i + 1] -
|
||||
in[N - 2*i + 0] * B[2*i + 1] - in[N - 2*i + 1] * B[2*i + 0];
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
p->plans[0]->transform(p->plans[0], buf, out);
|
||||
|
||||
}
|
||||
|
||||
ffts_plan_t *ffts_init_1d_real(size_t N, int sign) {
|
||||
ffts_plan_t *p = malloc(sizeof(ffts_plan_t));
|
||||
|
||||
if(sign < 0) p->transform = &ffts_execute_1d_real;
|
||||
else p->transform = &ffts_execute_1d_real_inv;
|
||||
|
||||
p->destroy = &ffts_free_1d_real;
|
||||
p->N = N;
|
||||
p->rank = 1;
|
||||
p->plans = malloc(sizeof(ffts_plan_t **) * 1);
|
||||
|
||||
p->plans[0] = ffts_init_1d(N/2, sign);
|
||||
|
||||
p->buf = valloc(sizeof(float) * 2 * ((N/2) + 1));
|
||||
|
||||
p->A = valloc(sizeof(float) * N);
|
||||
p->B = valloc(sizeof(float) * N);
|
||||
|
||||
if(sign < 0) {
|
||||
int i;
|
||||
for (i = 0; i < N/2; i++) {
|
||||
p->A[2 * i] = 0.5 * (1.0 - sin (2.0f * PI / (double) (N) * (double) i));
|
||||
p->A[2 * i + 1] = 0.5 * (-1.0 * cos (2.0f * PI / (double) (N) * (double) i));
|
||||
p->B[2 * i] = 0.5 * (1.0 + sin (2.0f * PI / (double) (N) * (double) i));
|
||||
p->B[2 * i + 1] = 0.5 * (1.0 * cos (2.0f * PI / (double) (N) * (double) i));
|
||||
}
|
||||
}else{
|
||||
int i;
|
||||
for (i = 0; i < N/2; i++) {
|
||||
p->A[2 * i] = 1.0 * (1.0 - sin (2.0f * PI / (double) (N) * (double) i));
|
||||
p->A[2 * i + 1] = 1.0 * (-1.0 * cos (2.0f * PI / (double) (N) * (double) i));
|
||||
p->B[2 * i] = 1.0 * (1.0 + sin (2.0f * PI / (double) (N) * (double) i));
|
||||
p->B[2 * i + 1] = 1.0 * (1.0 * cos (2.0f * PI / (double) (N) * (double) i));
|
||||
}
|
||||
}
|
||||
|
||||
return p;
|
||||
p->plans[0]->transform(p->plans[0], buf, output);
|
||||
}
|
||||
|
||||
FFTS_API ffts_plan_t*
|
||||
ffts_init_1d_real(size_t N, int sign)
|
||||
{
|
||||
ffts_plan_t *p;
|
||||
|
||||
p = (ffts_plan_t*) calloc(1, sizeof(*p) + sizeof(*p->plans));
|
||||
if (!p) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
if (sign < 0) {
|
||||
p->transform = &ffts_execute_1d_real;
|
||||
} else {
|
||||
p->transform = &ffts_execute_1d_real_inv;
|
||||
}
|
||||
|
||||
p->destroy = &ffts_free_1d_real;
|
||||
p->N = N;
|
||||
p->rank = 1;
|
||||
p->plans = (ffts_plan_t**) &p[1];
|
||||
|
||||
p->plans[0] = ffts_init_1d(N/2, sign);
|
||||
if (!p->plans[0]) {
|
||||
goto cleanup;
|
||||
}
|
||||
|
||||
p->buf = ffts_aligned_malloc(2 * ((N/2) + 1) * sizeof(float));
|
||||
if (!p->buf) {
|
||||
goto cleanup;
|
||||
}
|
||||
|
||||
p->A = (float*) ffts_aligned_malloc(N * sizeof(float));
|
||||
if (!p->A) {
|
||||
goto cleanup;
|
||||
}
|
||||
|
||||
p->B = (float*) ffts_aligned_malloc(N * sizeof(float));
|
||||
if (!p->B) {
|
||||
goto cleanup;
|
||||
}
|
||||
|
||||
#ifdef HAVE_SSE3
|
||||
ffts_generate_table_1d_real_32f(p, sign, 1);
|
||||
#else
|
||||
ffts_generate_table_1d_real_32f(p, sign, 0);
|
||||
#endif
|
||||
|
||||
return p;
|
||||
|
||||
cleanup:
|
||||
ffts_free_1d_real(p);
|
||||
return NULL;
|
||||
}
|
@ -1,53 +1,47 @@
|
||||
/*
|
||||
|
||||
This file is part of FFTS -- The Fastest Fourier Transform in the South
|
||||
|
||||
Copyright (c) 2012, Anthony M. Blake <amb@anthonix.com>
|
||||
Copyright (c) 2012, The University of Waikato
|
||||
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
* Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
* Neither the name of the organization nor the
|
||||
names of its contributors may be used to endorse or promote products
|
||||
derived from this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
||||
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
DISCLAIMED. IN NO EVENT SHALL ANTHONY M. BLAKE BE LIABLE FOR ANY
|
||||
DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
|
||||
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
||||
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
|
||||
ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
*/
|
||||
|
||||
#ifndef __FFTS_REAL_H__
|
||||
#define __FFTS_REAL_H__
|
||||
This file is part of FFTS -- The Fastest Fourier Transform in the South
|
||||
|
||||
Copyright (c) 2012, Anthony M. Blake <amb@anthonix.com>
|
||||
Copyright (c) 2012, The University of Waikato
|
||||
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
* Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
* Neither the name of the organization nor the
|
||||
names of its contributors may be used to endorse or promote products
|
||||
derived from this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
||||
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
DISCLAIMED. IN NO EVENT SHALL ANTHONY M. BLAKE BE LIABLE FOR ANY
|
||||
DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
|
||||
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
||||
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
|
||||
ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
#include <stdint.h>
|
||||
#include <stddef.h>
|
||||
#include <stdio.h>
|
||||
*/
|
||||
|
||||
#include "ffts.h"
|
||||
#ifndef FFTS_REAL_H
|
||||
#define FFTS_REAL_H
|
||||
|
||||
#ifdef HAVE_NEON
|
||||
#include <arm_neon.h>
|
||||
#endif
|
||||
#ifdef HAVE_SSE
|
||||
#include <xmmintrin.h>
|
||||
#if defined (_MSC_VER) && (_MSC_VER >= 1020)
|
||||
#pragma once
|
||||
#endif
|
||||
|
||||
ffts_plan_t *ffts_init_1d_real(size_t N, int sign);
|
||||
#include "ffts.h"
|
||||
#include <stddef.h>
|
||||
|
||||
#endif
|
||||
ffts_plan_t*
|
||||
ffts_init_1d_real(size_t N, int sign);
|
||||
|
||||
#endif /* FFTS_REAL_H */
|
||||
|
@ -1,177 +1,269 @@
|
||||
/*
|
||||
|
||||
This file is part of FFTS -- The Fastest Fourier Transform in the South
|
||||
|
||||
Copyright (c) 2012, Anthony M. Blake <amb@anthonix.com>
|
||||
Copyright (c) 2012, The University of Waikato
|
||||
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
* Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
* Neither the name of the organization nor the
|
||||
names of its contributors may be used to endorse or promote products
|
||||
derived from this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
||||
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
DISCLAIMED. IN NO EVENT SHALL ANTHONY M. BLAKE BE LIABLE FOR ANY
|
||||
DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
|
||||
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
||||
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
|
||||
ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
This file is part of FFTS -- The Fastest Fourier Transform in the South
|
||||
|
||||
Copyright (c) 2012, Anthony M. Blake <amb@anthonix.com>
|
||||
Copyright (c) 2012, The University of Waikato
|
||||
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
* Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
* Neither the name of the organization nor the
|
||||
names of its contributors may be used to endorse or promote products
|
||||
derived from this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
||||
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
DISCLAIMED. IN NO EVENT SHALL ANTHONY M. BLAKE BE LIABLE FOR ANY
|
||||
DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
|
||||
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
||||
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
|
||||
ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
*/
|
||||
|
||||
#include "ffts_real_nd.h"
|
||||
#include "ffts_real.h"
|
||||
#include "ffts_internal.h"
|
||||
#include "ffts_transpose.h"
|
||||
|
||||
#ifdef __ARM_NEON__
|
||||
#include "neon.h"
|
||||
#endif
|
||||
|
||||
void ffts_free_nd_real(ffts_plan_t *p) {
|
||||
|
||||
int i;
|
||||
for(i=0;i<p->rank;i++) {
|
||||
|
||||
ffts_plan_t *x = p->plans[i];
|
||||
|
||||
int k;
|
||||
for(k=i+1;k<p->rank;k++) {
|
||||
if(x == p->plans[k]) p->plans[k] = NULL;
|
||||
}
|
||||
|
||||
if(x) ffts_free(x);
|
||||
}
|
||||
|
||||
free(p->Ns);
|
||||
free(p->Ms);
|
||||
free(p->plans);
|
||||
free(p->buf);
|
||||
free(p->transpose_buf);
|
||||
free(p);
|
||||
}
|
||||
static void
|
||||
ffts_free_nd_real(ffts_plan_t *p)
|
||||
{
|
||||
if (p->plans) {
|
||||
int i, j;
|
||||
|
||||
for (i = 0; i < p->rank; i++) {
|
||||
ffts_plan_t *plan = p->plans[i];
|
||||
|
||||
if (plan) {
|
||||
for (j = 0; j < i; j++) {
|
||||
if (p->Ns[i] == p->Ns[j]) {
|
||||
plan = NULL;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (plan) {
|
||||
ffts_free(plan);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void ffts_scalar_transpose(uint64_t *in, uint64_t *out, int w, int h, uint64_t *buf) {
|
||||
free(p->plans);
|
||||
}
|
||||
|
||||
size_t i,j;
|
||||
for(i=0;i<w;i+=1) {
|
||||
for(j=0;j<h;j+=1) {
|
||||
out[i*h + j] = in[j*w + i];
|
||||
}
|
||||
}
|
||||
if (p->buf) {
|
||||
ffts_aligned_free(p->buf);
|
||||
}
|
||||
|
||||
if (p->Ns) {
|
||||
free(p->Ns);
|
||||
}
|
||||
|
||||
if (p->Ms) {
|
||||
free(p->Ms);
|
||||
}
|
||||
|
||||
free(p);
|
||||
}
|
||||
|
||||
void ffts_execute_nd_real(ffts_plan_t *p, const void * in, void * out) {
|
||||
static void
|
||||
ffts_execute_nd_real(ffts_plan_t *p, const void *in, void *out)
|
||||
{
|
||||
const size_t Ms0 = p->Ms[0];
|
||||
const size_t Ns0 = p->Ns[0];
|
||||
|
||||
uint32_t *din = (uint32_t*) in;
|
||||
uint64_t *buf = p->buf;
|
||||
uint64_t *dout = (uint64_t*) out;
|
||||
|
||||
ffts_plan_t *plan;
|
||||
int i;
|
||||
size_t j;
|
||||
|
||||
plan = p->plans[0];
|
||||
for (j = 0; j < Ns0; j++) {
|
||||
plan->transform(plan, din + (j * Ms0), buf + (j * (Ms0 / 2 + 1)));
|
||||
}
|
||||
|
||||
uint32_t *din = (uint32_t *)in;
|
||||
uint64_t *buf = p->buf;
|
||||
uint64_t *dout = (uint64_t *)out;
|
||||
ffts_transpose(buf, dout, Ms0 / 2 + 1, Ns0);
|
||||
|
||||
size_t i,j;
|
||||
for(i=0;i<p->Ns[0];i++) {
|
||||
p->plans[0]->transform(p->plans[0], din + (i * p->Ms[0]), buf + (i * (p->Ms[0] / 2 + 1)));
|
||||
}
|
||||
ffts_scalar_transpose(buf, dout, p->Ms[0] / 2 + 1, p->Ns[0], p->transpose_buf);
|
||||
for (i = 1; i < p->rank; i++) {
|
||||
const size_t Ms = p->Ms[i];
|
||||
const size_t Ns = p->Ns[i];
|
||||
|
||||
for(i=1;i<p->rank;i++) {
|
||||
for(j=0;j<p->Ns[i];j++) {
|
||||
p->plans[i]->transform(p->plans[i], dout + (j * p->Ms[i]), buf + (j * p->Ms[i]));
|
||||
}
|
||||
ffts_scalar_transpose(buf, dout, p->Ms[i], p->Ns[i], p->transpose_buf);
|
||||
}
|
||||
plan = p->plans[i];
|
||||
|
||||
for (j = 0; j < Ns; j++) {
|
||||
plan->transform(plan, dout + (j * Ms), buf + (j * Ms));
|
||||
}
|
||||
|
||||
ffts_transpose(buf, dout, Ms, Ns);
|
||||
}
|
||||
}
|
||||
|
||||
void ffts_execute_nd_real_inv(ffts_plan_t *p, const void * in, void * out) {
|
||||
|
||||
uint64_t *din = (uint64_t *)in;
|
||||
uint64_t *buf = p->buf;
|
||||
uint64_t *dout = (uint64_t *)out;
|
||||
|
||||
float *bufr = (float *)(p->buf);
|
||||
float *doutr = (float *)out;
|
||||
|
||||
size_t i,j;
|
||||
ffts_scalar_transpose(din, buf, p->Ms[0], p->Ns[0], p->transpose_buf);
|
||||
|
||||
for(i=0;i<p->Ms[0];i++) {
|
||||
p->plans[0]->transform(p->plans[0], buf + (i * p->Ns[0]), dout + (i * p->Ns[0]));
|
||||
}
|
||||
|
||||
ffts_scalar_transpose(dout, buf, p->Ns[0], p->Ms[0], p->transpose_buf);
|
||||
for(j=0;j<p->Ms[1];j++) {
|
||||
p->plans[1]->transform(p->plans[1], buf + (j * (p->Ms[0])), &doutr[j * p->Ns[1]]);
|
||||
}
|
||||
static void
|
||||
ffts_execute_nd_real_inv(ffts_plan_t *p, const void *in, void *out)
|
||||
{
|
||||
const size_t Ms0 = p->Ms[0];
|
||||
const size_t Ms1 = p->Ms[1];
|
||||
const size_t Ns0 = p->Ns[0];
|
||||
const size_t Ns1 = p->Ns[1];
|
||||
|
||||
uint64_t *din = (uint64_t*) in;
|
||||
uint64_t *buf = p->buf;
|
||||
uint64_t *buf2;
|
||||
float *doutr = (float*) out;
|
||||
|
||||
ffts_plan_t *plan;
|
||||
size_t vol;
|
||||
|
||||
int i;
|
||||
size_t j;
|
||||
|
||||
vol = p->Ns[0];
|
||||
for (i = 1; i < p->rank; i++) {
|
||||
vol *= p->Ns[i];
|
||||
}
|
||||
|
||||
buf2 = buf + vol;
|
||||
|
||||
ffts_transpose(din, buf, Ms0, Ns0);
|
||||
|
||||
plan = p->plans[0];
|
||||
for (j = 0; j < Ms0; j++) {
|
||||
plan->transform(plan, buf + (j * Ns0), buf2 + (j * Ns0));
|
||||
}
|
||||
|
||||
ffts_transpose(buf2, buf, Ns0, Ms0);
|
||||
|
||||
plan = p->plans[1];
|
||||
for (j = 0; j < Ms1; j++) {
|
||||
plan->transform(plan, buf + (j * Ms0), &doutr[j * Ns1]);
|
||||
}
|
||||
}
|
||||
|
||||
ffts_plan_t *ffts_init_nd_real(int rank, size_t *Ns, int sign) {
|
||||
size_t vol = 1;
|
||||
FFTS_API ffts_plan_t*
|
||||
ffts_init_nd_real(int rank, size_t *Ns, int sign)
|
||||
{
|
||||
int i;
|
||||
size_t vol = 1;
|
||||
size_t bufsize;
|
||||
ffts_plan_t *p;
|
||||
|
||||
ffts_plan_t *p = malloc(sizeof(ffts_plan_t));
|
||||
p = (ffts_plan_t*) calloc(1, sizeof(*p));
|
||||
if (!p) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
if(sign < 0) p->transform = &ffts_execute_nd_real;
|
||||
else p->transform = &ffts_execute_nd_real_inv;
|
||||
if (sign < 0) {
|
||||
p->transform = &ffts_execute_nd_real;
|
||||
} else {
|
||||
p->transform = &ffts_execute_nd_real_inv;
|
||||
}
|
||||
|
||||
p->destroy = &ffts_free_nd_real;
|
||||
p->destroy = &ffts_free_nd_real;
|
||||
p->rank = rank;
|
||||
|
||||
p->rank = rank;
|
||||
p->Ns = malloc(sizeof(size_t) * rank);
|
||||
p->Ms = malloc(sizeof(size_t) * rank);
|
||||
p->plans = malloc(sizeof(ffts_plan_t **) * rank);
|
||||
int i;
|
||||
for(i=0;i<rank;i++) {
|
||||
p->Ns[i] = Ns[i];
|
||||
vol *= Ns[i];
|
||||
}
|
||||
p->buf = valloc(sizeof(float) * 2 * vol);
|
||||
p->Ms = (size_t*) malloc(rank * sizeof(*p->Ms));
|
||||
if (!p->Ms) {
|
||||
goto cleanup;
|
||||
}
|
||||
|
||||
for(i=0;i<rank;i++) {
|
||||
p->Ms[i] = vol / p->Ns[i];
|
||||
|
||||
p->plans[i] = NULL;
|
||||
int k;
|
||||
p->Ns = (size_t*) malloc(rank * sizeof(*p->Ns));
|
||||
if (!p->Ns) {
|
||||
goto cleanup;
|
||||
}
|
||||
|
||||
if(sign < 0) {
|
||||
for(k=1;k<i;k++) {
|
||||
if(p->Ms[k] == p->Ms[i]) p->plans[i] = p->plans[k];
|
||||
}
|
||||
if(!i) p->plans[i] = ffts_init_1d_real(p->Ms[i], sign);
|
||||
else if(!p->plans[i]) p->plans[i] = ffts_init_1d(p->Ms[i], sign);
|
||||
}else{
|
||||
for(k=0;k<i;k++) {
|
||||
if(p->Ns[k] == p->Ns[i]) p->plans[i] = p->plans[k];
|
||||
}
|
||||
if(i==rank-1) p->plans[i] = ffts_init_1d_real(p->Ns[i], sign);
|
||||
else if(!p->plans[i]) p->plans[i] = ffts_init_1d(p->Ns[i], sign);
|
||||
}
|
||||
}
|
||||
if(sign < 0) {
|
||||
for(i=1;i<rank;i++) {
|
||||
p->Ns[i] = p->Ns[i] / 2 + 1;
|
||||
}
|
||||
}else{
|
||||
for(i=0;i<rank-1;i++) {
|
||||
p->Ms[i] = p->Ms[i] / 2 + 1;
|
||||
}
|
||||
}
|
||||
|
||||
p->transpose_buf = valloc(sizeof(float) * 2 * 8 * 8);
|
||||
return p;
|
||||
for (i = 0; i < rank; i++) {
|
||||
p->Ns[i] = Ns[i];
|
||||
vol *= Ns[i];
|
||||
}
|
||||
|
||||
/* there is probably a prettier way of doing this, but it works.. */
|
||||
if (sign < 0) {
|
||||
bufsize = 2 * vol;
|
||||
} else {
|
||||
bufsize = 2 * (Ns[0] * ((vol / Ns[0]) / 2 + 1) + vol);
|
||||
}
|
||||
|
||||
p->buf = ffts_aligned_malloc(bufsize * sizeof(float));
|
||||
if (!p->buf) {
|
||||
goto cleanup;
|
||||
}
|
||||
|
||||
p->plans = (ffts_plan_t**) calloc(rank, sizeof(*p->plans));
|
||||
if (!p->plans) {
|
||||
goto cleanup;
|
||||
}
|
||||
|
||||
for (i = 0; i < rank; i++) {
|
||||
int k;
|
||||
|
||||
p->Ms[i] = vol / p->Ns[i];
|
||||
|
||||
if (sign < 0) {
|
||||
if (!i) {
|
||||
p->plans[i] = ffts_init_1d_real(p->Ms[i], sign);
|
||||
} else {
|
||||
for (k = 1; k < i; k++) {
|
||||
if (p->Ms[k] == p->Ms[i]) {
|
||||
p->plans[i] = p->plans[k];
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (!p->plans[i]) {
|
||||
p->plans[i] = ffts_init_1d(p->Ms[i], sign);
|
||||
p->Ns[i] = p->Ns[i] / 2 + 1;
|
||||
}
|
||||
}
|
||||
} else {
|
||||
if (i == rank - 1) {
|
||||
p->plans[i] = ffts_init_1d_real(p->Ns[i], sign);
|
||||
} else {
|
||||
for (k = 0; k < i; k++) {
|
||||
if (p->Ns[k] == p->Ns[i]) {
|
||||
p->plans[i] = p->plans[k];
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (!p->plans[i]) {
|
||||
p->plans[i] = ffts_init_1d(p->Ns[i], sign);
|
||||
p->Ms[i] = p->Ms[i] / 2 + 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (!p->plans[i]) {
|
||||
goto cleanup;
|
||||
}
|
||||
}
|
||||
|
||||
return p;
|
||||
|
||||
cleanup:
|
||||
ffts_free_nd_real(p);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
FFTS_API ffts_plan_t*
|
||||
ffts_init_2d_real(size_t N1, size_t N2, int sign)
|
||||
{
|
||||
size_t Ns[2];
|
||||
|
||||
ffts_plan_t *ffts_init_2d_real(size_t N1, size_t N2, int sign) {
|
||||
size_t Ns[2];
|
||||
Ns[0] = N1;
|
||||
Ns[1] = N2;
|
||||
return ffts_init_nd_real(2, Ns, sign);
|
||||
Ns[0] = N1;
|
||||
Ns[1] = N2;
|
||||
return ffts_init_nd_real(2, Ns, sign);
|
||||
}
|
||||
|
@ -1,53 +1,50 @@
|
||||
/*
|
||||
|
||||
This file is part of FFTS -- The Fastest Fourier Transform in the South
|
||||
|
||||
Copyright (c) 2012, Anthony M. Blake <amb@anthonix.com>
|
||||
Copyright (c) 2012, The University of Waikato
|
||||
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
* Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
* Neither the name of the organization nor the
|
||||
names of its contributors may be used to endorse or promote products
|
||||
derived from this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
||||
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
DISCLAIMED. IN NO EVENT SHALL ANTHONY M. BLAKE BE LIABLE FOR ANY
|
||||
DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
|
||||
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
||||
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
|
||||
ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
This file is part of FFTS -- The Fastest Fourier Transform in the South
|
||||
|
||||
Copyright (c) 2012, Anthony M. Blake <amb@anthonix.com>
|
||||
Copyright (c) 2012, The University of Waikato
|
||||
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
* Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
* Neither the name of the organization nor the
|
||||
names of its contributors may be used to endorse or promote products
|
||||
derived from this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
||||
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
DISCLAIMED. IN NO EVENT SHALL ANTHONY M. BLAKE BE LIABLE FOR ANY
|
||||
DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
|
||||
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
||||
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
|
||||
ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
*/
|
||||
|
||||
#ifndef __FFTS_REAL_ND_H__
|
||||
#define __FFTS_REAL_ND_H__
|
||||
#ifndef FFTS_REAL_ND_H
|
||||
#define FFTS_REAL_ND_H
|
||||
|
||||
#include <stdint.h>
|
||||
#include <stddef.h>
|
||||
#include <stdio.h>
|
||||
#if defined (_MSC_VER) && (_MSC_VER >= 1020)
|
||||
#pragma once
|
||||
#endif
|
||||
|
||||
#include "ffts_nd.h"
|
||||
#include "ffts_real.h"
|
||||
#include "ffts.h"
|
||||
#include <stddef.h>
|
||||
|
||||
#ifdef HAVE_NEON
|
||||
#include <arm_neon.h>
|
||||
#endif
|
||||
#ifdef HAVE_SSE
|
||||
#include <xmmintrin.h>
|
||||
#endif
|
||||
ffts_plan_t*
|
||||
ffts_init_nd_real(int rank, size_t *Ns, int sign);
|
||||
|
||||
#endif
|
||||
ffts_plan_t*
|
||||
ffts_init_2d_real(size_t N1, size_t N2, int sign);
|
||||
|
||||
#endif /* FFTS_REAL_ND_H */
|
@ -1,156 +0,0 @@
|
||||
/*
|
||||
|
||||
This file is part of FFTS -- The Fastest Fourier Transform in the South
|
||||
|
||||
Copyright (c) 2013, Michael J. Cree <mcree@orcon.net.nz>
|
||||
Copyright (c) 2012, 2013, Anthony M. Blake <amb@anthonix.com>
|
||||
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
* Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
* Neither the name of the organization nor the
|
||||
names of its contributors may be used to endorse or promote products
|
||||
derived from this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
||||
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
DISCLAIMED. IN NO EVENT SHALL ANTHONY M. BLAKE BE LIABLE FOR ANY
|
||||
DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
|
||||
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
||||
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
|
||||
ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
*/
|
||||
|
||||
#include "ffts.h"
|
||||
#include "macros.h"
|
||||
|
||||
#include <stdlib.h>
|
||||
|
||||
#define DEBUG(x)
|
||||
|
||||
#include "ffts_small.h"
|
||||
|
||||
void firstpass_16_f(ffts_plan_t * p, const void * in, void * out)
|
||||
{
|
||||
const data_t *din = (const data_t *)in;
|
||||
data_t *dout = (data_t *)out;
|
||||
V r0_1,r2_3,r4_5,r6_7,r8_9,r10_11,r12_13,r14_15;
|
||||
float *LUT8 = p->ws;
|
||||
|
||||
L_4_4(0, din+0,din+16,din+8,din+24,&r0_1,&r2_3,&r8_9,&r10_11);
|
||||
L_2_4(0, din+4,din+20,din+28,din+12,&r4_5,&r6_7,&r14_15,&r12_13);
|
||||
K_N(0, VLD(LUT8),VLD(LUT8+4),&r0_1,&r2_3,&r4_5,&r6_7);
|
||||
K_N(0, VLD(LUT8+8),VLD(LUT8+12),&r0_1,&r4_5,&r8_9,&r12_13);
|
||||
S_4(r0_1,r4_5,r8_9,r12_13,dout+0,dout+8,dout+16,dout+24);
|
||||
K_N(0, VLD(LUT8+16),VLD(LUT8+20),&r2_3,&r6_7,&r10_11,&r14_15);
|
||||
S_4(r2_3,r6_7,r10_11,r14_15,dout+4,dout+12,dout+20,dout+28);
|
||||
}
|
||||
|
||||
void firstpass_16_b(ffts_plan_t * p, const void * in, void * out)
|
||||
{
|
||||
const data_t *din = (const data_t *)in;
|
||||
data_t *dout = (data_t *)out;
|
||||
V r0_1,r2_3,r4_5,r6_7,r8_9,r10_11,r12_13,r14_15;
|
||||
float *LUT8 = p->ws;
|
||||
|
||||
L_4_4(1, din+0,din+16,din+8,din+24,&r0_1,&r2_3,&r8_9,&r10_11);
|
||||
L_2_4(1, din+4,din+20,din+28,din+12,&r4_5,&r6_7,&r14_15,&r12_13);
|
||||
K_N(1, VLD(LUT8),VLD(LUT8+4),&r0_1,&r2_3,&r4_5,&r6_7);
|
||||
K_N(1, VLD(LUT8+8),VLD(LUT8+12),&r0_1,&r4_5,&r8_9,&r12_13);
|
||||
S_4(r0_1,r4_5,r8_9,r12_13,dout+0,dout+8,dout+16,dout+24);
|
||||
K_N(1, VLD(LUT8+16),VLD(LUT8+20),&r2_3,&r6_7,&r10_11,&r14_15);
|
||||
S_4(r2_3,r6_7,r10_11,r14_15,dout+4,dout+12,dout+20,dout+28);
|
||||
}
|
||||
|
||||
|
||||
void firstpass_8_f(ffts_plan_t *p, const void *in, void *out)
|
||||
{
|
||||
const data_t *din = (const data_t *)in;
|
||||
data_t *dout = (data_t *)out;
|
||||
V r0_1, r2_3, r4_5, r6_7;
|
||||
float *LUT8 = p->ws + p->ws_is[0];
|
||||
|
||||
L_4_2(0, din, din+8, din+4, din+12, &r0_1, &r2_3, &r4_5, &r6_7);
|
||||
K_N(0, VLD(LUT8), VLD(LUT8+4), &r0_1, &r2_3, &r4_5, &r6_7);
|
||||
S_4(r0_1,r2_3,r4_5,r6_7,dout+0,dout+4,dout+8,dout+12);
|
||||
}
|
||||
|
||||
void firstpass_8_b(ffts_plan_t *p, const void *in, void *out)
|
||||
{
|
||||
const data_t *din = (const data_t *)in;
|
||||
data_t *dout = (data_t *)out;
|
||||
V r0_1, r2_3, r4_5, r6_7;
|
||||
float *LUT8 = p->ws + p->ws_is[0];
|
||||
|
||||
L_4_2(1, din, din+8, din+4, din+12, &r0_1, &r2_3, &r4_5, &r6_7);
|
||||
K_N(1, VLD(LUT8), VLD(LUT8+4), &r0_1, &r2_3, &r4_5, &r6_7);
|
||||
S_4(r0_1,r2_3,r4_5,r6_7,dout+0,dout+4,dout+8,dout+12);
|
||||
}
|
||||
|
||||
|
||||
void firstpass_4_f(ffts_plan_t *p, const void *in, void *out)
|
||||
{
|
||||
const data_t *din = (const data_t *)in;
|
||||
data_t *dout = (data_t *)out;
|
||||
cdata_t t0, t1, t2, t3, t4, t5, t6, t7;
|
||||
t0[0] = din[0]; t0[1] = din[1];
|
||||
t1[0] = din[4]; t1[1] = din[5];
|
||||
t2[0] = din[2]; t2[1] = din[3];
|
||||
t3[0] = din[6]; t3[1] = din[7];
|
||||
|
||||
t4[0] = t0[0] + t1[0]; t4[1] = t0[1] + t1[1];
|
||||
t5[0] = t0[0] - t1[0]; t5[1] = t0[1] - t1[1];
|
||||
t6[0] = t2[0] + t3[0]; t6[1] = t2[1] + t3[1];
|
||||
t7[0] = t2[0] - t3[0]; t7[1] = t2[1] - t3[1];
|
||||
|
||||
dout[0] = t4[0] + t6[0]; dout[1] = t4[1] + t6[1];
|
||||
dout[4] = t4[0] - t6[0]; dout[5] = t4[1] - t6[1];
|
||||
dout[2] = t5[0] + t7[1]; dout[3] = t5[1] - t7[0];
|
||||
dout[6] = t5[0] - t7[1]; dout[7] = t5[1] + t7[0];
|
||||
}
|
||||
|
||||
void firstpass_4_b(ffts_plan_t *p, const void *in, void *out)
|
||||
{
|
||||
const data_t *din = (const data_t *)in;
|
||||
data_t *dout = (data_t *)out;
|
||||
cdata_t t0, t1, t2, t3, t4, t5, t6, t7;
|
||||
t0[0] = din[0]; t0[1] = din[1];
|
||||
t1[0] = din[4]; t1[1] = din[5];
|
||||
t2[0] = din[2]; t2[1] = din[3];
|
||||
t3[0] = din[6]; t3[1] = din[7];
|
||||
|
||||
t4[0] = t0[0] + t1[0]; t4[1] = t0[1] + t1[1];
|
||||
t5[0] = t0[0] - t1[0]; t5[1] = t0[1] - t1[1];
|
||||
t6[0] = t2[0] + t3[0]; t6[1] = t2[1] + t3[1];
|
||||
t7[0] = t2[0] - t3[0]; t7[1] = t2[1] - t3[1];
|
||||
|
||||
dout[0] = t4[0] + t6[0]; dout[1] = t4[1] + t6[1];
|
||||
dout[4] = t4[0] - t6[0]; dout[5] = t4[1] - t6[1];
|
||||
dout[2] = t5[0] - t7[1]; dout[3] = t5[1] + t7[0];
|
||||
dout[6] = t5[0] + t7[1]; dout[7] = t5[1] - t7[0];
|
||||
}
|
||||
|
||||
void firstpass_2(ffts_plan_t *p, const void *in, void *out)
|
||||
{
|
||||
const data_t *din = (const data_t *)in;
|
||||
data_t *dout = (data_t *)out;
|
||||
cdata_t t0, t1, r0,r1;
|
||||
t0[0] = din[0]; t0[1] = din[1];
|
||||
t1[0] = din[2]; t1[1] = din[3];
|
||||
r0[0] = t0[0] + t1[0];
|
||||
r0[1] = t0[1] + t1[1];
|
||||
r1[0] = t0[0] - t1[0];
|
||||
r1[1] = t0[1] - t1[1];
|
||||
dout[0] = r0[0]; dout[1] = r0[1];
|
||||
dout[2] = r1[0]; dout[3] = r1[1];
|
||||
}
|
@ -1,13 +0,0 @@
|
||||
#ifndef __FFTS_SMALL_H__
|
||||
#define __FFTS_SMALL_H__
|
||||
|
||||
|
||||
void firstpass_16_f(ffts_plan_t * p, const void * in, void * out);
|
||||
void firstpass_16_b(ffts_plan_t * p, const void * in, void * out);
|
||||
void firstpass_8_f(ffts_plan_t * p, const void * in, void * out);
|
||||
void firstpass_8_b(ffts_plan_t * p, const void * in, void * out);
|
||||
void firstpass_4_f(ffts_plan_t * p, const void * in, void * out);
|
||||
void firstpass_4_b(ffts_plan_t * p, const void * in, void * out);
|
||||
void firstpass_2(ffts_plan_t * p, const void * in, void * out);
|
||||
|
||||
#endif
|
File diff suppressed because it is too large
Load Diff
@ -1,46 +1,91 @@
|
||||
/*
|
||||
|
||||
This file is part of FFTS -- The Fastest Fourier Transform in the South
|
||||
|
||||
Copyright (c) 2012, Anthony M. Blake <amb@anthonix.com>
|
||||
Copyright (c) 2012, The University of Waikato
|
||||
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
* Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
* Neither the name of the organization nor the
|
||||
names of its contributors may be used to endorse or promote products
|
||||
derived from this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
||||
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
DISCLAIMED. IN NO EVENT SHALL ANTHONY M. BLAKE BE LIABLE FOR ANY
|
||||
DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
|
||||
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
||||
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
|
||||
ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
This file is part of FFTS -- The Fastest Fourier Transform in the South
|
||||
|
||||
Copyright (c) 2012, Anthony M. Blake <amb@anthonix.com>
|
||||
Copyright (c) 2012, The University of Waikato
|
||||
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
* Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
* Neither the name of the organization nor the
|
||||
names of its contributors may be used to endorse or promote products
|
||||
derived from this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
||||
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
DISCLAIMED. IN NO EVENT SHALL ANTHONY M. BLAKE BE LIABLE FOR ANY
|
||||
DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
|
||||
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
||||
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
|
||||
ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
*/
|
||||
|
||||
#ifndef __FFTS_STATIC_H__
|
||||
#define __FFTS_STATIC_H__
|
||||
#ifndef FFTS_STATIC_H
|
||||
#define FFTS_STATIC_H
|
||||
|
||||
#if defined (_MSC_VER) && (_MSC_VER >= 1020)
|
||||
#pragma once
|
||||
#endif
|
||||
|
||||
#include "ffts.h"
|
||||
#include "neon.h"
|
||||
|
||||
void ffts_static_rec_f(ffts_plan_t *p, float *data, size_t N) ;
|
||||
void ffts_static_transform_f(ffts_plan_t *p, const void *in, void *out);
|
||||
void
|
||||
ffts_small_2_32f(ffts_plan_t *p, const void *in, void *out);
|
||||
|
||||
void ffts_static_rec_i(ffts_plan_t *p, float *data, size_t N) ;
|
||||
void ffts_static_transform_i(ffts_plan_t *p, const void *in, void *out);
|
||||
void
|
||||
ffts_small_2_64f(ffts_plan_t *p, const void *in, void *out);
|
||||
|
||||
#endif
|
||||
void
|
||||
ffts_small_forward4_32f(ffts_plan_t *p, const void *in, void *out);
|
||||
|
||||
void
|
||||
ffts_small_forward4_64f(ffts_plan_t *p, const void *in, void *out);
|
||||
|
||||
void
|
||||
ffts_small_backward4_32f(ffts_plan_t *p, const void *in, void *out);
|
||||
|
||||
void
|
||||
ffts_small_backward4_64f(ffts_plan_t *p, const void *in, void *out);
|
||||
|
||||
void
|
||||
ffts_small_forward8_32f(ffts_plan_t *p, const void *in, void *out);
|
||||
|
||||
void
|
||||
ffts_small_forward8_64f(ffts_plan_t *p, const void *in, void *out);
|
||||
|
||||
void
|
||||
ffts_small_backward8_32f(ffts_plan_t *p, const void *in, void *out);
|
||||
|
||||
void
|
||||
ffts_small_backward8_64f(ffts_plan_t *p, const void *in, void *out);
|
||||
|
||||
void
|
||||
ffts_small_forward16_32f(ffts_plan_t *p, const void *in, void *out);
|
||||
|
||||
void
|
||||
ffts_small_forward16_64f(ffts_plan_t *p, const void *in, void *out);
|
||||
|
||||
void
|
||||
ffts_small_backward16_32f(ffts_plan_t *p, const void *in, void *out);
|
||||
|
||||
void
|
||||
ffts_small_backward16_64f(ffts_plan_t *p, const void *in, void *out);
|
||||
|
||||
void
|
||||
ffts_static_transform_f_32f(ffts_plan_t *p, const void *in, void *out);
|
||||
|
||||
void
|
||||
ffts_static_transform_i_32f(ffts_plan_t *p, const void *in, void *out);
|
||||
|
||||
#endif /* FFTS_STATIC_H */
|
||||
|
@ -0,0 +1,194 @@
|
||||
/*
|
||||
|
||||
This file is part of FFTS -- The Fastest Fourier Transform in the South
|
||||
|
||||
Copyright (c) 2016, Jukka Ojanen <jukka.ojanen@kolumbus.fi>
|
||||
Copyright (c) 2012, Anthony M. Blake <amb@anthonix.com>
|
||||
Copyright (c) 2012, The University of Waikato
|
||||
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
* Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
* Neither the name of the organization nor the
|
||||
names of its contributors may be used to endorse or promote products
|
||||
derived from this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
||||
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
DISCLAIMED. IN NO EVENT SHALL ANTHONY M. BLAKE BE LIABLE FOR ANY
|
||||
DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
|
||||
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
||||
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
|
||||
ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
*/
|
||||
|
||||
#include "ffts_transpose.h"
|
||||
#include "ffts_internal.h"
|
||||
|
||||
#ifdef HAVE_NEON
|
||||
#include "neon.h"
|
||||
#include <arm_neon.h>
|
||||
#elif HAVE_SSE2
|
||||
#include <emmintrin.h>
|
||||
#endif
|
||||
|
||||
#define TSIZE 8
|
||||
|
||||
void
|
||||
ffts_transpose(uint64_t *in, uint64_t *out, int w, int h)
|
||||
{
|
||||
#ifdef HAVE_NEON
|
||||
#if 0
|
||||
neon_transpose4(in, out, w, h);
|
||||
#else
|
||||
neon_transpose8(in, out, w, h);
|
||||
#endif
|
||||
#elif HAVE_SSE2
|
||||
uint64_t FFTS_ALIGN(64) tmp[TSIZE*TSIZE];
|
||||
int tx, ty;
|
||||
/* int x; */
|
||||
int y;
|
||||
int tw = w / TSIZE;
|
||||
int th = h / TSIZE;
|
||||
|
||||
for (ty = 0; ty < th; ty++) {
|
||||
for (tx = 0; tx < tw; tx++) {
|
||||
uint64_t *ip0 = in + w*TSIZE*ty + tx * TSIZE;
|
||||
uint64_t *op0 = tmp; /* out + h*TSIZE*tx + ty*TSIZE; */
|
||||
|
||||
/* copy/transpose to tmp */
|
||||
for (y = 0; y < TSIZE; y += 2) {
|
||||
/* for (x=0;x<TSIZE;x+=2) {
|
||||
op[x*TSIZE] = ip[x];
|
||||
*/
|
||||
__m128d q0 = _mm_load_pd((double*)(ip0 + 0*w));
|
||||
__m128d q1 = _mm_load_pd((double*)(ip0 + 1*w));
|
||||
__m128d q2 = _mm_load_pd((double*)(ip0 + 2*w));
|
||||
__m128d q3 = _mm_load_pd((double*)(ip0 + 3*w));
|
||||
__m128d q4 = _mm_load_pd((double*)(ip0 + 4*w));
|
||||
__m128d q5 = _mm_load_pd((double*)(ip0 + 5*w));
|
||||
__m128d q6 = _mm_load_pd((double*)(ip0 + 6*w));
|
||||
__m128d q7 = _mm_load_pd((double*)(ip0 + 7*w));
|
||||
|
||||
__m128d t0 = _mm_shuffle_pd(q0, q1, _MM_SHUFFLE2(0, 0));
|
||||
__m128d t1 = _mm_shuffle_pd(q0, q1, _MM_SHUFFLE2(1, 1));
|
||||
__m128d t2 = _mm_shuffle_pd(q2, q3, _MM_SHUFFLE2(0, 0));
|
||||
__m128d t3 = _mm_shuffle_pd(q2, q3, _MM_SHUFFLE2(1, 1));
|
||||
__m128d t4 = _mm_shuffle_pd(q4, q5, _MM_SHUFFLE2(0, 0));
|
||||
__m128d t5 = _mm_shuffle_pd(q4, q5, _MM_SHUFFLE2(1, 1));
|
||||
__m128d t6 = _mm_shuffle_pd(q6, q7, _MM_SHUFFLE2(0, 0));
|
||||
__m128d t7 = _mm_shuffle_pd(q6, q7, _MM_SHUFFLE2(1, 1));
|
||||
|
||||
ip0 += 2;
|
||||
/* _mm_store_pd((double *)(op0 + y*h + x), t0);
|
||||
_mm_store_pd((double *)(op0 + y*h + x + h), t1);
|
||||
*/
|
||||
|
||||
_mm_store_pd((double*)(op0 + 0 ), t0);
|
||||
_mm_store_pd((double*)(op0 + 0 + TSIZE), t1);
|
||||
_mm_store_pd((double*)(op0 + 2 ), t2);
|
||||
_mm_store_pd((double*)(op0 + 2 + TSIZE), t3);
|
||||
_mm_store_pd((double*)(op0 + 4 ), t4);
|
||||
_mm_store_pd((double*)(op0 + 4 + TSIZE), t5);
|
||||
_mm_store_pd((double*)(op0 + 6 ), t6);
|
||||
_mm_store_pd((double*)(op0 + 6 + TSIZE), t7);
|
||||
/* } */
|
||||
|
||||
op0 += 2*TSIZE;
|
||||
}
|
||||
|
||||
op0 = out + h*tx*TSIZE + ty*TSIZE;
|
||||
ip0 = tmp;
|
||||
for (y = 0; y < TSIZE; y += 1) {
|
||||
/* memcpy(op0, ip0, TSIZE * sizeof(*ip0)); */
|
||||
|
||||
__m128d q0 = _mm_load_pd((double*)(ip0 + 0));
|
||||
__m128d q1 = _mm_load_pd((double*)(ip0 + 2));
|
||||
__m128d q2 = _mm_load_pd((double*)(ip0 + 4));
|
||||
__m128d q3 = _mm_load_pd((double*)(ip0 + 6));
|
||||
|
||||
_mm_store_pd((double*)(op0 + 0), q0);
|
||||
_mm_store_pd((double*)(op0 + 2), q1);
|
||||
_mm_store_pd((double*)(op0 + 4), q2);
|
||||
_mm_store_pd((double*)(op0 + 6), q3);
|
||||
|
||||
op0 += h;
|
||||
ip0 += TSIZE;
|
||||
}
|
||||
}
|
||||
}
|
||||
/*
|
||||
size_t i,j;
|
||||
for(i=0;i<w;i+=2) {
|
||||
for(j=0;j<h;j+=2) {
|
||||
// out[i*h + j] = in[j*w + i];
|
||||
__m128d q0 = _mm_load_pd((double *)(in + j*w + i));
|
||||
__m128d q1 = _mm_load_pd((double *)(in + j*w + i + w));
|
||||
__m128d t0 = _mm_shuffle_pd(q0, q1, _MM_SHUFFLE2(0, 0));
|
||||
__m128d t1 = _mm_shuffle_pd(q0, q1, _MM_SHUFFLE2(1, 1));
|
||||
_mm_store_pd((double *)(out + i*h + j), t0);
|
||||
_mm_store_pd((double *)(out + i*h + j + h), t1);
|
||||
}
|
||||
}
|
||||
*/
|
||||
#else
|
||||
const int bw = 1;
|
||||
const int bh = 8;
|
||||
int i = 0, j = 0;
|
||||
|
||||
for (; i <= h - bh; i += bh) {
|
||||
for (j = 0; j <= w - bw; j += bw) {
|
||||
uint64_t const *ib = &in[w*i + j];
|
||||
uint64_t *ob = &out[h*j + i];
|
||||
|
||||
uint64_t s_0_0 = ib[0*w + 0];
|
||||
uint64_t s_1_0 = ib[1*w + 0];
|
||||
uint64_t s_2_0 = ib[2*w + 0];
|
||||
uint64_t s_3_0 = ib[3*w + 0];
|
||||
uint64_t s_4_0 = ib[4*w + 0];
|
||||
uint64_t s_5_0 = ib[5*w + 0];
|
||||
uint64_t s_6_0 = ib[6*w + 0];
|
||||
uint64_t s_7_0 = ib[7*w + 0];
|
||||
|
||||
ob[0*h + 0] = s_0_0;
|
||||
ob[0*h + 1] = s_1_0;
|
||||
ob[0*h + 2] = s_2_0;
|
||||
ob[0*h + 3] = s_3_0;
|
||||
ob[0*h + 4] = s_4_0;
|
||||
ob[0*h + 5] = s_5_0;
|
||||
ob[0*h + 6] = s_6_0;
|
||||
ob[0*h + 7] = s_7_0;
|
||||
}
|
||||
}
|
||||
|
||||
if (i < h) {
|
||||
int i1;
|
||||
|
||||
for (i1 = 0; i1 < w; i1++) {
|
||||
for (j = i; j < h; j++) {
|
||||
out[i1*h + j] = in[j*w + i1];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (j < w) {
|
||||
int j1;
|
||||
|
||||
for (i = j; i < w; i++) {
|
||||
for (j1 = 0; j1 < h; j1++) {
|
||||
out[i*h + j1] = in[j1*w + i];
|
||||
}
|
||||
}
|
||||
}
|
||||
#endif
|
||||
}
|
@ -0,0 +1,46 @@
|
||||
/*
|
||||
|
||||
This file is part of FFTS -- The Fastest Fourier Transform in the South
|
||||
|
||||
Copyright (c) 2012, Anthony M. Blake <amb@anthonix.com>
|
||||
Copyright (c) 2012, The University of Waikato
|
||||
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
* Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
* Neither the name of the organization nor the
|
||||
names of its contributors may be used to endorse or promote products
|
||||
derived from this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
||||
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
DISCLAIMED. IN NO EVENT SHALL ANTHONY M. BLAKE BE LIABLE FOR ANY
|
||||
DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
|
||||
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
||||
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
|
||||
ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
*/
|
||||
|
||||
#ifndef FFTS_TRANSPOSE_H
|
||||
#define FFTS_TRANSPOSE_H
|
||||
|
||||
#if defined (_MSC_VER) && (_MSC_VER >= 1020)
|
||||
#pragma once
|
||||
#endif
|
||||
|
||||
#include "ffts_internal.h"
|
||||
|
||||
void
|
||||
ffts_transpose(uint64_t *in, uint64_t *out, int w, int h);
|
||||
|
||||
#endif /* FFTS_TRANSPOSE_H */
|
@ -0,0 +1,56 @@
|
||||
/*
|
||||
|
||||
This file is part of FFTS -- The Fastest Fourier Transform in the South
|
||||
|
||||
Copyright (c) 2015, Jukka Ojanen <jukka.ojanen@kolumbus.fi>
|
||||
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
* Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
* Neither the name of the organization nor the
|
||||
names of its contributors may be used to endorse or promote products
|
||||
derived from this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
||||
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
DISCLAIMED. IN NO EVENT SHALL ANTHONY M. BLAKE BE LIABLE FOR ANY
|
||||
DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
|
||||
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
||||
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
|
||||
ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
*/
|
||||
|
||||
#ifndef FFTS_TRIG_H
|
||||
#define FFTS_TRIG_H
|
||||
|
||||
#if defined (_MSC_VER) && (_MSC_VER >= 1020)
|
||||
#pragma once
|
||||
#endif
|
||||
|
||||
#include "ffts_internal.h"
|
||||
|
||||
int
|
||||
ffts_generate_cosine_sine_32f(ffts_cpx_32f *const table, int table_size);
|
||||
|
||||
int
|
||||
ffts_generate_cosine_sine_pow2_32f(ffts_cpx_32f *const table, int table_size);
|
||||
|
||||
int
|
||||
ffts_generate_cosine_sine_pow2_64f(ffts_cpx_64f *const table, int table_size);
|
||||
|
||||
int
|
||||
ffts_generate_table_1d_real_32f(struct _ffts_plan_t *const p,
|
||||
int sign,
|
||||
int invert);
|
||||
|
||||
#endif /* FFTS_TRIG_H */
|
@ -1,206 +1,264 @@
|
||||
/*
|
||||
|
||||
This file is part of FFTS -- The Fastest Fourier Transform in the South
|
||||
|
||||
Copyright (c) 2013, Michael J. Cree <mcree@orcon.net.nz>
|
||||
Copyright (c) 2012, 2013, Anthony M. Blake <amb@anthonix.com>
|
||||
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
* Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
* Neither the name of the organization nor the
|
||||
names of its contributors may be used to endorse or promote products
|
||||
derived from this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
||||
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
DISCLAIMED. IN NO EVENT SHALL ANTHONY M. BLAKE BE LIABLE FOR ANY
|
||||
DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
|
||||
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
||||
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
|
||||
ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
*/
|
||||
This file is part of FFTS -- The Fastest Fourier Transform in the South
|
||||
|
||||
Copyright (c) 2013, Michael J. Cree <mcree@orcon.net.nz>
|
||||
Copyright (c) 2012, 2013, Anthony M. Blake <amb@anthonix.com>
|
||||
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
* Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
* Neither the name of the organization nor the
|
||||
names of its contributors may be used to endorse or promote products
|
||||
derived from this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
||||
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
DISCLAIMED. IN NO EVENT SHALL ANTHONY M. BLAKE BE LIABLE FOR ANY
|
||||
DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
|
||||
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
||||
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
|
||||
ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
#ifndef __MACROS_ALPHA_H__
|
||||
#define __MACROS_ALPHA_H__
|
||||
*/
|
||||
|
||||
#include <math.h>
|
||||
#ifndef FFTS_MACROS_ALPHA_H
|
||||
#define FFTS_MACROS_ALPHA_H
|
||||
|
||||
#ifdef __alpha__
|
||||
#define restrict
|
||||
#if defined (_MSC_VER) && (_MSC_VER >= 1020)
|
||||
#pragma once
|
||||
#endif
|
||||
|
||||
typedef struct {float r1, i1, r2, i2;} V;
|
||||
#include "ffts_attributes.h"
|
||||
|
||||
#define FFTS_MALLOC(d,a) malloc(d)
|
||||
#define FFTS_FREE(d) free(d)
|
||||
#ifdef HAVE_STRING_H
|
||||
#include <string.h>
|
||||
#endif
|
||||
|
||||
#define VLIT4(f3,f2,f1,f0) ((V){f0,f1,f2,f3})
|
||||
#ifdef HAVE_STDLIB_H
|
||||
#include <stdlib.h>
|
||||
#endif
|
||||
|
||||
static inline V VADD(V x, V y)
|
||||
typedef union {
|
||||
struct {
|
||||
float r1;
|
||||
float i1;
|
||||
float r2;
|
||||
float i2;
|
||||
} r;
|
||||
uint32_t u[4];
|
||||
} V4SF;
|
||||
|
||||
#define FFTS_MALLOC(d,a) (malloc(d))
|
||||
#define FFTS_FREE(d) (free(d))
|
||||
|
||||
static FFTS_ALWAYS_INLINE V4SF
|
||||
V4SF_LIT4(float f3, float f2, float f1, float f0)
|
||||
{
|
||||
V z;
|
||||
z.r1 = x.r1 + y.r1;
|
||||
z.i1 = x.i1 + y.i1;
|
||||
z.r2 = x.r2 + y.r2;
|
||||
z.i2 = x.i2 + y.i2;
|
||||
V4SF z;
|
||||
|
||||
z.r.r1 = f0;
|
||||
z.r.i1 = f1;
|
||||
z.r.r2 = f2;
|
||||
z.r.i2 = f3;
|
||||
|
||||
return z;
|
||||
}
|
||||
|
||||
|
||||
static inline V VSUB(V x, V y)
|
||||
static FFTS_ALWAYS_INLINE V4SF
|
||||
V4SF_ADD(V4SF x, V4SF y)
|
||||
{
|
||||
V z;
|
||||
z.r1 = x.r1 - y.r1;
|
||||
z.i1 = x.i1 - y.i1;
|
||||
z.r2 = x.r2 - y.r2;
|
||||
z.i2 = x.i2 - y.i2;
|
||||
V4SF z;
|
||||
|
||||
z.r.r1 = x.r.r1 + y.r.r1;
|
||||
z.r.i1 = x.r.i1 + y.r.i1;
|
||||
z.r.r2 = x.r.r2 + y.r.r2;
|
||||
z.r.i2 = x.r.i2 + y.r.i2;
|
||||
|
||||
return z;
|
||||
}
|
||||
|
||||
|
||||
static inline V VMUL(V x, V y)
|
||||
static FFTS_ALWAYS_INLINE V4SF
|
||||
V4SF_SUB(V4SF x, V4SF y)
|
||||
{
|
||||
V z;
|
||||
z.r1 = x.r1 * y.r1;
|
||||
z.i1 = x.i1 * y.i1;
|
||||
z.r2 = x.r2 * y.r2;
|
||||
z.i2 = x.i2 * y.i2;
|
||||
V4SF z;
|
||||
|
||||
z.r.r1 = x.r.r1 - y.r.r1;
|
||||
z.r.i1 = x.r.i1 - y.r.i1;
|
||||
z.r.r2 = x.r.r2 - y.r.r2;
|
||||
z.r.i2 = x.r.i2 - y.r.i2;
|
||||
|
||||
return z;
|
||||
}
|
||||
|
||||
static inline V VXOR(V x, V y)
|
||||
static FFTS_ALWAYS_INLINE V4SF
|
||||
V4SF_MUL(V4SF x, V4SF y)
|
||||
{
|
||||
V r;
|
||||
r.r1 = (uint32_t)x.r1 ^ (uint32_t)y.r1;
|
||||
r.i1 = (uint32_t)x.i1 ^ (uint32_t)y.i1;
|
||||
r.r2 = (uint32_t)x.r2 ^ (uint32_t)y.r2;
|
||||
r.i2 = (uint32_t)x.i2 ^ (uint32_t)y.i2;
|
||||
return r;
|
||||
V4SF z;
|
||||
|
||||
z.r.r1 = x.r.r1 * y.r.r1;
|
||||
z.r.i1 = x.r.i1 * y.r.i1;
|
||||
z.r.r2 = x.r.r2 * y.r.r2;
|
||||
z.r.i2 = x.r.i2 * y.r.i2;
|
||||
|
||||
return z;
|
||||
}
|
||||
|
||||
static inline V VSWAPPAIRS(V x)
|
||||
static FFTS_ALWAYS_INLINE V4SF
|
||||
V4SF_XOR(V4SF x, V4SF y)
|
||||
{
|
||||
V z;
|
||||
z.r1 = x.i1;
|
||||
z.i1 = x.r1;
|
||||
z.r2 = x.i2;
|
||||
z.i2 = x.r2;
|
||||
V4SF z;
|
||||
|
||||
z.u[0] = x.u[0] ^ y.u[0];
|
||||
z.u[1] = x.u[1] ^ y.u[1];
|
||||
z.u[2] = x.u[2] ^ y.u[2];
|
||||
z.u[3] = x.u[3] ^ y.u[3];
|
||||
|
||||
return z;
|
||||
}
|
||||
|
||||
|
||||
static inline V VBLEND(V x, V y)
|
||||
static FFTS_ALWAYS_INLINE V4SF
|
||||
V4SF_SWAP_PAIRS(V4SF x)
|
||||
{
|
||||
V z;
|
||||
z.r1 = x.r1;
|
||||
z.i1 = x.i1;
|
||||
z.r2 = y.r2;
|
||||
z.i2 = y.i2;
|
||||
V4SF z;
|
||||
|
||||
z.r.r1 = x.r.i1;
|
||||
z.r.i1 = x.r.r1;
|
||||
z.r.r2 = x.r.i2;
|
||||
z.r.i2 = x.r.r2;
|
||||
|
||||
return z;
|
||||
}
|
||||
|
||||
static inline V VUNPACKHI(V x, V y)
|
||||
static FFTS_ALWAYS_INLINE V4SF
|
||||
V4SF_BLEND(V4SF x, V4SF y)
|
||||
{
|
||||
V z;
|
||||
z.r1 = x.r2;
|
||||
z.i1 = x.i2;
|
||||
z.r2 = y.r2;
|
||||
z.i2 = y.i2;
|
||||
V4SF z;
|
||||
|
||||
z.r.r1 = x.r.r1;
|
||||
z.r.i1 = x.r.i1;
|
||||
z.r.r2 = y.r.r2;
|
||||
z.r.i2 = y.r.i2;
|
||||
|
||||
return z;
|
||||
}
|
||||
|
||||
static inline V VUNPACKLO(V x, V y)
|
||||
static FFTS_ALWAYS_INLINE V4SF
|
||||
V4SF_UNPACK_HI(V4SF x, V4SF y)
|
||||
{
|
||||
V z;
|
||||
z.r1 = x.r1;
|
||||
z.i1 = x.i1;
|
||||
z.r2 = y.r1;
|
||||
z.i2 = y.i1;
|
||||
V4SF z;
|
||||
|
||||
z.r.r1 = x.r.r2;
|
||||
z.r.i1 = x.r.i2;
|
||||
z.r.r2 = y.r.r2;
|
||||
z.r.i2 = y.r.i2;
|
||||
|
||||
return z;
|
||||
}
|
||||
|
||||
static inline V VDUPRE(V x)
|
||||
static FFTS_ALWAYS_INLINE V4SF
|
||||
V4SF_UNPACK_LO(V4SF x, V4SF y)
|
||||
{
|
||||
V z;
|
||||
z.r1 = x.r1;
|
||||
z.i1 = x.r1;
|
||||
z.r2 = x.r2;
|
||||
z.i2 = x.r2;
|
||||
V4SF z;
|
||||
|
||||
z.r.r1 = x.r.r1;
|
||||
z.r.i1 = x.r.i1;
|
||||
z.r.r2 = y.r.r1;
|
||||
z.r.i2 = y.r.i1;
|
||||
|
||||
return z;
|
||||
}
|
||||
|
||||
static inline V VDUPIM(V x)
|
||||
static FFTS_ALWAYS_INLINE V4SF
|
||||
V4SF_DUPLICATE_RE(V4SF x)
|
||||
{
|
||||
V z;
|
||||
z.r1 = x.i1;
|
||||
z.i1 = x.i1;
|
||||
z.r2 = x.i2;
|
||||
z.i2 = x.i2;
|
||||
V4SF z;
|
||||
|
||||
z.r.r1 = x.r.r1;
|
||||
z.r.i1 = x.r.r1;
|
||||
z.r.r2 = x.r.r2;
|
||||
z.r.i2 = x.r.r2;
|
||||
|
||||
return z;
|
||||
}
|
||||
|
||||
static inline V IMUL(V d, V re, V im)
|
||||
static FFTS_ALWAYS_INLINE V4SF
|
||||
V4SF_DUPLICATE_IM(V4SF x)
|
||||
{
|
||||
re = VMUL(re, d);
|
||||
im = VMUL(im, VSWAPPAIRS(d));
|
||||
return VSUB(re, im);
|
||||
V4SF z;
|
||||
|
||||
z.r.r1 = x.r.i1;
|
||||
z.r.i1 = x.r.i1;
|
||||
z.r.r2 = x.r.i2;
|
||||
z.r.i2 = x.r.i2;
|
||||
|
||||
return z;
|
||||
}
|
||||
|
||||
static FFTS_ALWAYS_INLINE V4SF
|
||||
V4SF_IMUL(V4SF d, V4SF re, V4SF im)
|
||||
{
|
||||
re = V4SF_MUL(re, d);
|
||||
im = V4SF_MUL(im, V4SF_SWAP_PAIRS(d));
|
||||
return V4SF_SUB(re, im);
|
||||
}
|
||||
|
||||
static inline V IMULJ(V d, V re, V im)
|
||||
static FFTS_ALWAYS_INLINE V4SF
|
||||
V4SF_IMULJ(V4SF d, V4SF re, V4SF im)
|
||||
{
|
||||
re = VMUL(re, d);
|
||||
im = VMUL(im, VSWAPPAIRS(d));
|
||||
return VADD(re, im);
|
||||
re = V4SF_MUL(re, d);
|
||||
im = V4SF_MUL(im, V4SF_SWAP_PAIRS(d));
|
||||
return V4SF_ADD(re, im);
|
||||
}
|
||||
|
||||
static inline V MULI(int inv, V x)
|
||||
static FFTS_ALWAYS_INLINE V4SF
|
||||
V4SF_MULI(int inv, V4SF x)
|
||||
{
|
||||
V z;
|
||||
V4SF z;
|
||||
|
||||
if (inv) {
|
||||
z.r1 = -x.r1;
|
||||
z.i1 = x.i1;
|
||||
z.r2 = -x.r2;
|
||||
z.i2 = x.i2;
|
||||
}else{
|
||||
z.r1 = x.r1;
|
||||
z.i1 = -x.i1;
|
||||
z.r2 = x.r2;
|
||||
z.i2 = -x.i2;
|
||||
z.r.r1 = -x.r.r1;
|
||||
z.r.i1 = x.r.i1;
|
||||
z.r.r2 = -x.r.r2;
|
||||
z.r.i2 = x.r.i2;
|
||||
} else {
|
||||
z.r.r1 = x.r.r1;
|
||||
z.r.i1 = -x.r.i1;
|
||||
z.r.r2 = x.r.r2;
|
||||
z.r.i2 = -x.r.i2;
|
||||
}
|
||||
|
||||
return z;
|
||||
}
|
||||
|
||||
|
||||
static inline V IMULI(int inv, V x)
|
||||
static FFTS_ALWAYS_INLINE V4SF
|
||||
V4SF_IMULI(int inv, V4SF x)
|
||||
{
|
||||
return VSWAPPAIRS(MULI(inv, x));
|
||||
return V4SF_SWAP_PAIRS(V4SF_MULI(inv, x));
|
||||
}
|
||||
|
||||
|
||||
static inline V VLD(const void *s)
|
||||
static FFTS_ALWAYS_INLINE V4SF
|
||||
V4SF_LD(const void *s)
|
||||
{
|
||||
V *d = (V *)s;
|
||||
return *d;
|
||||
V4SF z;
|
||||
memcpy(&z, s, sizeof(z));
|
||||
return z;
|
||||
}
|
||||
|
||||
|
||||
static inline void VST(void *d, V s)
|
||||
static FFTS_ALWAYS_INLINE void
|
||||
V4SF_ST(void *d, V4SF s)
|
||||
{
|
||||
V *r = (V *)d;
|
||||
V4SF *r = (V4SF*) d;
|
||||
*r = s;
|
||||
}
|
||||
|
||||
#endif
|
||||
#endif /* FFTS_MACROS_ALPHA_H */
|
@ -1,96 +1,119 @@
|
||||
/*
|
||||
|
||||
This file is part of FFTS -- The Fastest Fourier Transform in the South
|
||||
|
||||
Copyright (c) 2012, 2013, Anthony M. Blake <amb@anthonix.com>
|
||||
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
* Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
* Neither the name of the organization nor the
|
||||
names of its contributors may be used to endorse or promote products
|
||||
derived from this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
||||
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
DISCLAIMED. IN NO EVENT SHALL ANTHONY M. BLAKE BE LIABLE FOR ANY
|
||||
DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
|
||||
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
||||
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
|
||||
ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
This file is part of FFTS -- The Fastest Fourier Transform in the South
|
||||
|
||||
Copyright (c) 2012, 2013, Anthony M. Blake <amb@anthonix.com>
|
||||
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
* Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
* Neither the name of the organization nor the
|
||||
names of its contributors may be used to endorse or promote products
|
||||
derived from this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
||||
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
DISCLAIMED. IN NO EVENT SHALL ANTHONY M. BLAKE BE LIABLE FOR ANY
|
||||
DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
|
||||
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
||||
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
|
||||
ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
*/
|
||||
#ifndef __MACROS_NEON_H__
|
||||
#define __MACROS_NEON_H__
|
||||
|
||||
#include "neon.h"
|
||||
#ifndef FFTS_MACROS_NEON_H
|
||||
#define FFTS_MACROS_NEON_H
|
||||
|
||||
#include <arm_neon.h>
|
||||
|
||||
typedef float32x4_t V;
|
||||
#ifdef HAVE_STDLIB_H
|
||||
#include <stdlib.h>
|
||||
#endif
|
||||
|
||||
typedef float32x4x2_t VS;
|
||||
#define FFTS_MALLOC(d,a) (valloc(d))
|
||||
#define FFTS_FREE(d) (free(d))
|
||||
|
||||
#define ADD vaddq_f32
|
||||
#define SUB vsubq_f32
|
||||
#define MUL vmulq_f32
|
||||
#define VADD vaddq_f32
|
||||
#define VSUB vsubq_f32
|
||||
#define VMUL vmulq_f32
|
||||
#define VXOR(x,y) (vreinterpretq_f32_u32(veorq_u32(vreinterpretq_u32_f32(x), vreinterpretq_u32_f32(y))))
|
||||
#define VST vst1q_f32
|
||||
#define VLD vld1q_f32
|
||||
#define VST2 vst2q_f32
|
||||
#define VLD2 vld2q_f32
|
||||
typedef float32x4_t V4SF;
|
||||
typedef float32x4x2_t V4SF2;
|
||||
|
||||
#define VSWAPPAIRS(x) (vrev64q_f32(x))
|
||||
#define V4SF_ADD vaddq_f32
|
||||
#define V4SF_SUB vsubq_f32
|
||||
#define V4SF_MUL vmulq_f32
|
||||
|
||||
#define VUNPACKHI(a,b) (vcombine_f32(vget_high_f32(a), vget_high_f32(b)))
|
||||
#define VUNPACKLO(a,b) (vcombine_f32(vget_low_f32(a), vget_low_f32(b)))
|
||||
#define V4SF_XOR(x,y) \
|
||||
(vreinterpretq_f32_u32(veorq_u32(vreinterpretq_u32_f32(x), vreinterpretq_u32_f32(y))))
|
||||
|
||||
#define VBLEND(x,y) (vcombine_f32(vget_low_f32(x), vget_high_f32(y)))
|
||||
#define V4SF_ST vst1q_f32
|
||||
#define V4SF_LD vld1q_f32
|
||||
|
||||
__INLINE V VLIT4(data_t f3, data_t f2, data_t f1, data_t f0) {
|
||||
data_t __attribute__ ((aligned(16))) d[4] = {f0, f1, f2, f3};
|
||||
return VLD(d);
|
||||
}
|
||||
#define V4SF_SWAP_PAIRS(x) \
|
||||
(vrev64q_f32(x))
|
||||
|
||||
#define VDUPRE(r) vcombine_f32(vdup_lane_f32(vget_low_f32(r),0), vdup_lane_f32(vget_high_f32(r),0))
|
||||
#define VDUPIM(r) vcombine_f32(vdup_lane_f32(vget_low_f32(r),1), vdup_lane_f32(vget_high_f32(r),1))
|
||||
#define V4SF_UNPACK_HI(a,b) \
|
||||
(vcombine_f32(vget_high_f32(a), vget_high_f32(b)))
|
||||
|
||||
#define FFTS_MALLOC(d,a) (valloc(d))
|
||||
#define FFTS_FREE(d) (free(d))
|
||||
#define V4SF_UNPACK_LO(a,b) \
|
||||
(vcombine_f32(vget_low_f32(a), vget_low_f32(b)))
|
||||
|
||||
__INLINE void STORESPR(data_t * addr, VS p) {
|
||||
#define V4SF_BLEND(x,y) \
|
||||
(vcombine_f32(vget_low_f32(x), vget_high_f32(y)))
|
||||
|
||||
vst1q_f32(addr, p.val[0]);
|
||||
vst1q_f32(addr + 4, p.val[1]);
|
||||
|
||||
static FFTS_ALWAYS_INLINE V4SF
|
||||
V4SF_LIT4(float f3, float f2, float f1, float f0)
|
||||
{
|
||||
float FFTS_ALIGN(16) d[4] = {f0, f1, f2, f3};
|
||||
return V4SF_LD(d);
|
||||
}
|
||||
|
||||
__INLINE V IMULI(int inv, V a) {
|
||||
if(inv) return VSWAPPAIRS(VXOR(a, VLIT4(0.0f, -0.0f, 0.0f, -0.0f)));
|
||||
else return VSWAPPAIRS(VXOR(a, VLIT4(-0.0f, 0.0f, -0.0f, 0.0f)));
|
||||
#define V4SF_DUPLICATE_RE(r) \
|
||||
vcombine_f32(vdup_lane_f32(vget_low_f32(r),0), vdup_lane_f32(vget_high_f32(r),0))
|
||||
|
||||
#define V4SF_DUPLICATE_IM(r) \
|
||||
vcombine_f32(vdup_lane_f32(vget_low_f32(r),1), vdup_lane_f32(vget_high_f32(r),1))
|
||||
|
||||
static FFTS_ALWAYS_INLINE V4SF
|
||||
V4SF_IMULI(int inv, V4SF a)
|
||||
{
|
||||
if (inv) {
|
||||
return V4SF_SWAP_PAIRS(V4SF_XOR(a, V4SF_LIT4(0.0f, -0.0f, 0.0f, -0.0f)));
|
||||
} else {
|
||||
return V4SF_SWAP_PAIRS(V4SF_XOR(a, V4SF_LIT4(-0.0f, 0.0f, -0.0f, 0.0f)));
|
||||
}
|
||||
}
|
||||
|
||||
__INLINE V IMUL(V d, V re, V im) {
|
||||
re = VMUL(re, d);
|
||||
im = VMUL(im, VSWAPPAIRS(d));
|
||||
return VSUB(re, im);
|
||||
static FFTS_ALWAYS_INLINE V4SF
|
||||
V4SF_IMUL(V4SF d, V4SF re, V4SF im)
|
||||
{
|
||||
re = V4SF_MUL(re, d);
|
||||
im = V4SF_MUL(im, V4SF_SWAP_PAIRS(d));
|
||||
return V4SF_SUB(re, im);
|
||||
}
|
||||
|
||||
__INLINE V IMULJ(V d, V re, V im) {
|
||||
re = VMUL(re, d);
|
||||
im = VMUL(im, VSWAPPAIRS(d));
|
||||
return VADD(re, im);
|
||||
static FFTS_ALWAYS_INLINE V4SF
|
||||
V4SF_IMULJ(V4SF d, V4SF re, V4SF im)
|
||||
{
|
||||
re = V4SF_MUL(re, d);
|
||||
im = V4SF_MUL(im, V4SF_SWAP_PAIRS(d));
|
||||
return V4SF_ADD(re, im);
|
||||
}
|
||||
|
||||
#endif
|
||||
#define V4SF2_ST vst2q_f32
|
||||
#define V4SF2_LD vld2q_f32
|
||||
|
||||
static FFTS_ALWAYS_INLINE void
|
||||
V4SF2_STORE_SPR(float *addr, V4SF2 p)
|
||||
{
|
||||
vst1q_f32(addr, p.val[0]);
|
||||
vst1q_f32(addr + 4, p.val[1]);
|
||||
}
|
||||
|
||||
#endif /* FFTS_MACROS_NEON_H */
|
||||
|
@ -1,84 +1,100 @@
|
||||
/*
|
||||
|
||||
This file is part of FFTS -- The Fastest Fourier Transform in the South
|
||||
|
||||
Copyright (c) 2012, Anthony M. Blake <amb@anthonix.com>
|
||||
Copyright (c) 2012, The University of Waikato
|
||||
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
* Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
* Neither the name of the organization nor the
|
||||
names of its contributors may be used to endorse or promote products
|
||||
derived from this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
||||
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
DISCLAIMED. IN NO EVENT SHALL ANTHONY M. BLAKE BE LIABLE FOR ANY
|
||||
DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
|
||||
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
||||
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
|
||||
ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
This file is part of FFTS -- The Fastest Fourier Transform in the South
|
||||
|
||||
Copyright (c) 2012, Anthony M. Blake <amb@anthonix.com>
|
||||
Copyright (c) 2012, The University of Waikato
|
||||
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
* Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
* Neither the name of the organization nor the
|
||||
names of its contributors may be used to endorse or promote products
|
||||
derived from this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
||||
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
DISCLAIMED. IN NO EVENT SHALL ANTHONY M. BLAKE BE LIABLE FOR ANY
|
||||
DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
|
||||
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
||||
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
|
||||
ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
*/
|
||||
|
||||
#ifndef __SSE_FLOAT_H__
|
||||
#define __SSE_FLOAT_H__
|
||||
#ifndef FFTS_MACROS_SSE_H
|
||||
#define FFTS_MACROS_SSE_H
|
||||
|
||||
#if defined (_MSC_VER) && (_MSC_VER >= 1020)
|
||||
#pragma once
|
||||
#endif
|
||||
|
||||
#include <xmmintrin.h>
|
||||
|
||||
//#define VL 4
|
||||
#define FFTS_MALLOC(d,a) (_mm_malloc(d,a))
|
||||
#define FFTS_FREE(d) (_mm_free(d))
|
||||
|
||||
typedef __m128 V;
|
||||
typedef __m128 V4SF;
|
||||
|
||||
#define VADD _mm_add_ps
|
||||
#define VSUB _mm_sub_ps
|
||||
#define VMUL _mm_mul_ps
|
||||
//#define VLIT4 _mm_set_ps
|
||||
#define VXOR _mm_xor_ps
|
||||
#define VST _mm_store_ps
|
||||
#define VLD _mm_load_ps
|
||||
#define V4SF_ADD _mm_add_ps
|
||||
#define V4SF_SUB _mm_sub_ps
|
||||
#define V4SF_MUL _mm_mul_ps
|
||||
#define V4SF_LIT4 _mm_set_ps
|
||||
#define V4SF_XOR _mm_xor_ps
|
||||
#define V4SF_ST _mm_store_ps
|
||||
#define V4SF_LD _mm_load_ps
|
||||
|
||||
#define VSWAPPAIRS(x) (_mm_shuffle_ps(x,x,_MM_SHUFFLE(2,3,0,1)))
|
||||
#define V4SF_SWAP_PAIRS(x) \
|
||||
(_mm_shuffle_ps(x, x, _MM_SHUFFLE(2,3,0,1)))
|
||||
|
||||
#define VUNPACKHI(x,y) (_mm_shuffle_ps(x,y,_MM_SHUFFLE(3,2,3,2)))
|
||||
#define VUNPACKLO(x,y) (_mm_shuffle_ps(x,y,_MM_SHUFFLE(1,0,1,0)))
|
||||
#define V4SF_UNPACK_HI(x,y) \
|
||||
(_mm_shuffle_ps(x, y, _MM_SHUFFLE(3,2,3,2)))
|
||||
|
||||
#define VBLEND(x,y) (_mm_shuffle_ps(x,y,_MM_SHUFFLE(3,2,1,0)))
|
||||
#define V4SF_UNPACK_LO(x,y) \
|
||||
(_mm_movelh_ps(x, y))
|
||||
|
||||
#define VLIT4 _mm_set_ps
|
||||
#define V4SF_BLEND(x, y) \
|
||||
(_mm_shuffle_ps(x, y, _MM_SHUFFLE(3,2,1,0)))
|
||||
|
||||
#define VDUPRE(r) (_mm_shuffle_ps(r,r,_MM_SHUFFLE(2,2,0,0)))
|
||||
#define VDUPIM(r) (_mm_shuffle_ps(r,r,_MM_SHUFFLE(3,3,1,1)))
|
||||
#define V4SF_DUPLICATE_RE(r) \
|
||||
(_mm_shuffle_ps(r, r, _MM_SHUFFLE(2,2,0,0)))
|
||||
|
||||
#define FFTS_MALLOC(d,a) (_mm_malloc(d,a))
|
||||
#define FFTS_FREE(d) (_mm_free(d))
|
||||
#define V4SF_DUPLICATE_IM(r) \
|
||||
(_mm_shuffle_ps(r, r, _MM_SHUFFLE(3,3,1,1)))
|
||||
|
||||
__INLINE V IMULI(int inv, V a) {
|
||||
if(inv) return VSWAPPAIRS(VXOR(a, VLIT4(0.0f, -0.0f, 0.0f, -0.0f)));
|
||||
else return VSWAPPAIRS(VXOR(a, VLIT4(-0.0f, 0.0f, -0.0f, 0.0f)));
|
||||
static FFTS_ALWAYS_INLINE V4SF
|
||||
V4SF_IMULI(int inv, V4SF a)
|
||||
{
|
||||
if (inv) {
|
||||
return V4SF_SWAP_PAIRS(V4SF_XOR(a, V4SF_LIT4(0.0f, -0.0f, 0.0f, -0.0f)));
|
||||
} else {
|
||||
return V4SF_SWAP_PAIRS(V4SF_XOR(a, V4SF_LIT4(-0.0f, 0.0f, -0.0f, 0.0f)));
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
__INLINE V IMUL(V d, V re, V im) {
|
||||
re = VMUL(re, d);
|
||||
im = VMUL(im, VSWAPPAIRS(d));
|
||||
return VSUB(re, im);
|
||||
static FFTS_ALWAYS_INLINE V4SF
|
||||
V4SF_IMUL(V4SF d, V4SF re, V4SF im)
|
||||
{
|
||||
re = V4SF_MUL(re, d);
|
||||
im = V4SF_MUL(im, V4SF_SWAP_PAIRS(d));
|
||||
return V4SF_SUB(re, im);
|
||||
}
|
||||
|
||||
__INLINE V IMULJ(V d, V re, V im) {
|
||||
re = VMUL(re, d);
|
||||
im = VMUL(im, VSWAPPAIRS(d));
|
||||
return VADD(re, im);
|
||||
static FFTS_ALWAYS_INLINE V4SF
|
||||
V4SF_IMULJ(V4SF d, V4SF re, V4SF im)
|
||||
{
|
||||
re = V4SF_MUL(re, d);
|
||||
im = V4SF_MUL(im, V4SF_SWAP_PAIRS(d));
|
||||
return V4SF_ADD(re, im);
|
||||
}
|
||||
|
||||
#endif
|
||||
#endif /* FFTS_MACROS_SSE_H */
|
||||
|
@ -1,161 +1,204 @@
|
||||
/*
|
||||
|
||||
This file is part of FFTS -- The Fastest Fourier Transform in the South
|
||||
|
||||
Copyright (c) 2013, Michael J. Cree <mcree@orcon.net.nz>
|
||||
Copyright (c) 2012, 2013, Anthony M. Blake <amb@anthonix.com>
|
||||
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
* Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
* Neither the name of the organization nor the
|
||||
names of its contributors may be used to endorse or promote products
|
||||
derived from this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
||||
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
DISCLAIMED. IN NO EVENT SHALL ANTHONY M. BLAKE BE LIABLE FOR ANY
|
||||
DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
|
||||
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
||||
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
|
||||
ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
This file is part of FFTS -- The Fastest Fourier Transform in the South
|
||||
|
||||
Copyright (c) 2013, Michael J. Cree <mcree@orcon.net.nz>
|
||||
Copyright (c) 2012, 2013, Anthony M. Blake <amb@anthonix.com>
|
||||
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
* Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
* Neither the name of the organization nor the
|
||||
names of its contributors may be used to endorse or promote products
|
||||
derived from this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
||||
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
DISCLAIMED. IN NO EVENT SHALL ANTHONY M. BLAKE BE LIABLE FOR ANY
|
||||
DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
|
||||
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
||||
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
|
||||
ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
*/
|
||||
|
||||
#ifndef __MACROS_H__
|
||||
#define __MACROS_H__
|
||||
#ifndef FFTS_MACROS_H
|
||||
#define FFTS_MACROS_H
|
||||
|
||||
#if defined (_MSC_VER) && (_MSC_VER >= 1020)
|
||||
#pragma once
|
||||
#endif
|
||||
|
||||
#ifdef HAVE_NEON
|
||||
#include "macros-neon.h"
|
||||
#elif HAVE_SSE
|
||||
#include "macros-sse.h"
|
||||
#elif __powerpc__
|
||||
#include "macros-altivec.h"
|
||||
#else
|
||||
#ifdef __alpha__
|
||||
#include "macros-alpha.h"
|
||||
#else
|
||||
#ifdef __powerpc__
|
||||
#include "macros-altivec.h"
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#endif
|
||||
static FFTS_INLINE void
|
||||
V4SF_TX2(V4SF *a, V4SF *b)
|
||||
{
|
||||
V4SF t0 = V4SF_UNPACK_LO(*a, *b);
|
||||
V4SF t1 = V4SF_UNPACK_HI(*a, *b);
|
||||
*a = t0;
|
||||
*b = t1;
|
||||
}
|
||||
|
||||
static FFTS_INLINE void
|
||||
V4SF_K_N(int inv,
|
||||
V4SF re,
|
||||
V4SF im,
|
||||
V4SF *r0,
|
||||
V4SF *r1,
|
||||
V4SF *r2,
|
||||
V4SF *r3)
|
||||
{
|
||||
V4SF uk, uk2, zk_p, zk_n, zk, zk_d;
|
||||
|
||||
#ifdef HAVE_VFP
|
||||
#include "macros-alpha.h"
|
||||
#endif
|
||||
#ifdef HAVE_SSE
|
||||
#include "macros-sse.h"
|
||||
#endif
|
||||
uk = *r0;
|
||||
uk2 = *r1;
|
||||
|
||||
static inline void TX2(V *a, V *b)
|
||||
{
|
||||
V TX2_t0 = VUNPACKLO(*a, *b);
|
||||
V TX2_t1 = VUNPACKHI(*a, *b);
|
||||
*a = TX2_t0; *b = TX2_t1;
|
||||
zk_p = V4SF_IMUL(*r2, re, im);
|
||||
zk_n = V4SF_IMULJ(*r3, re, im);
|
||||
|
||||
zk = V4SF_ADD(zk_p, zk_n);
|
||||
zk_d = V4SF_IMULI(inv, V4SF_SUB(zk_p, zk_n));
|
||||
|
||||
*r2 = V4SF_SUB(uk, zk);
|
||||
*r0 = V4SF_ADD(uk, zk);
|
||||
*r3 = V4SF_ADD(uk2, zk_d);
|
||||
*r1 = V4SF_SUB(uk2, zk_d);
|
||||
}
|
||||
|
||||
static inline void K_N(int inv, V re, V im, V *r0, V *r1, V *r2, V *r3)
|
||||
static FFTS_INLINE void
|
||||
V4SF_L_2_4(int inv,
|
||||
const float *FFTS_RESTRICT i0,
|
||||
const float *FFTS_RESTRICT i1,
|
||||
const float *FFTS_RESTRICT i2,
|
||||
const float *FFTS_RESTRICT i3,
|
||||
V4SF *r0,
|
||||
V4SF *r1,
|
||||
V4SF *r2,
|
||||
V4SF *r3)
|
||||
{
|
||||
V uk, uk2, zk_p, zk_n, zk, zk_d;
|
||||
uk = *r0; uk2 = *r1;
|
||||
zk_p = IMUL(*r2, re, im);
|
||||
zk_n = IMULJ(*r3, re, im);
|
||||
|
||||
zk = VADD(zk_p, zk_n);
|
||||
zk_d = IMULI(inv, VSUB(zk_p, zk_n));
|
||||
|
||||
*r2 = VSUB(uk, zk);
|
||||
*r0 = VADD(uk, zk);
|
||||
*r3 = VADD(uk2, zk_d);
|
||||
*r1 = VSUB(uk2, zk_d);
|
||||
}
|
||||
V4SF t0, t1, t2, t3, t4, t5, t6, t7;
|
||||
|
||||
t0 = V4SF_LD(i0);
|
||||
t1 = V4SF_LD(i1);
|
||||
t2 = V4SF_LD(i2);
|
||||
t3 = V4SF_LD(i3);
|
||||
|
||||
static inline void S_4(V r0, V r1, V r2, V r3,
|
||||
data_t * restrict o0, data_t * restrict o1,
|
||||
data_t * restrict o2, data_t * restrict o3)
|
||||
{
|
||||
VST(o0, r0); VST(o1, r1); VST(o2, r2); VST(o3, r3);
|
||||
}
|
||||
t4 = V4SF_ADD(t0, t1);
|
||||
t5 = V4SF_SUB(t0, t1);
|
||||
t6 = V4SF_ADD(t2, t3);
|
||||
t7 = V4SF_SUB(t2, t3);
|
||||
|
||||
*r0 = V4SF_UNPACK_LO(t4, t5);
|
||||
*r1 = V4SF_UNPACK_LO(t6, t7);
|
||||
|
||||
static inline void L_2_4(int inv,
|
||||
const data_t * restrict i0, const data_t * restrict i1,
|
||||
const data_t * restrict i2, const data_t * restrict i3,
|
||||
V *r0, V *r1, V *r2, V *r3)
|
||||
{
|
||||
V t0, t1, t2, t3, t4, t5, t6, t7;
|
||||
|
||||
t0 = VLD(i0); t1 = VLD(i1); t2 = VLD(i2); t3 = VLD(i3);
|
||||
t4 = VADD(t0, t1);
|
||||
t5 = VSUB(t0, t1);
|
||||
t6 = VADD(t2, t3);
|
||||
t7 = VSUB(t2, t3);
|
||||
*r0 = VUNPACKLO(t4, t5);
|
||||
*r1 = VUNPACKLO(t6, t7);
|
||||
t5 = IMULI(inv, t5);
|
||||
t0 = VADD(t6, t4);
|
||||
t2 = VSUB(t6, t4);
|
||||
t1 = VSUB(t7, t5);
|
||||
t3 = VADD(t7, t5);
|
||||
*r3 = VUNPACKHI(t0, t1);
|
||||
*r2 = VUNPACKHI(t2, t3);
|
||||
}
|
||||
t5 = V4SF_IMULI(inv, t5);
|
||||
|
||||
t0 = V4SF_ADD(t6, t4);
|
||||
t2 = V4SF_SUB(t6, t4);
|
||||
t1 = V4SF_SUB(t7, t5);
|
||||
t3 = V4SF_ADD(t7, t5);
|
||||
|
||||
static inline void L_4_4(int inv,
|
||||
const data_t * restrict i0, const data_t * restrict i1,
|
||||
const data_t * restrict i2, const data_t * restrict i3,
|
||||
V *r0, V *r1, V *r2, V *r3)
|
||||
{
|
||||
V t0, t1, t2, t3, t4, t5, t6, t7;
|
||||
|
||||
t0 = VLD(i0); t1 = VLD(i1); t2 = VLD(i2); t3 = VLD(i3);
|
||||
t4 = VADD(t0, t1);
|
||||
t5 = VSUB(t0, t1);
|
||||
t6 = VADD(t2, t3);
|
||||
t7 = IMULI(inv, VSUB(t2, t3));
|
||||
t0 = VADD(t4, t6);
|
||||
t2 = VSUB(t4, t6);
|
||||
t1 = VSUB(t5, t7);
|
||||
t3 = VADD(t5, t7);
|
||||
TX2(&t0, &t1);
|
||||
TX2(&t2, &t3);
|
||||
*r0 = t0; *r2 = t1; *r1 = t2; *r3 = t3;
|
||||
*r3 = V4SF_UNPACK_HI(t0, t1);
|
||||
*r2 = V4SF_UNPACK_HI(t2, t3);
|
||||
}
|
||||
|
||||
static FFTS_INLINE void
|
||||
V4SF_L_4_4(int inv,
|
||||
const float *FFTS_RESTRICT i0,
|
||||
const float *FFTS_RESTRICT i1,
|
||||
const float *FFTS_RESTRICT i2,
|
||||
const float *FFTS_RESTRICT i3,
|
||||
V4SF *r0,
|
||||
V4SF *r1,
|
||||
V4SF *r2,
|
||||
V4SF *r3)
|
||||
{
|
||||
V4SF t0, t1, t2, t3, t4, t5, t6, t7;
|
||||
|
||||
t0 = V4SF_LD(i0);
|
||||
t1 = V4SF_LD(i1);
|
||||
t2 = V4SF_LD(i2);
|
||||
t3 = V4SF_LD(i3);
|
||||
|
||||
t4 = V4SF_ADD(t0, t1);
|
||||
t5 = V4SF_SUB(t0, t1);
|
||||
t6 = V4SF_ADD(t2, t3);
|
||||
|
||||
t7 = V4SF_IMULI(inv, V4SF_SUB(t2, t3));
|
||||
|
||||
t0 = V4SF_ADD(t4, t6);
|
||||
t2 = V4SF_SUB(t4, t6);
|
||||
t1 = V4SF_SUB(t5, t7);
|
||||
t3 = V4SF_ADD(t5, t7);
|
||||
|
||||
V4SF_TX2(&t0, &t1);
|
||||
V4SF_TX2(&t2, &t3);
|
||||
|
||||
*r0 = t0;
|
||||
*r2 = t1;
|
||||
*r1 = t2;
|
||||
*r3 = t3;
|
||||
}
|
||||
|
||||
static inline void L_4_2(int inv,
|
||||
const data_t * restrict i0, const data_t * restrict i1,
|
||||
const data_t * restrict i2, const data_t * restrict i3,
|
||||
V *r0, V *r1, V *r2, V *r3)
|
||||
static FFTS_INLINE void
|
||||
V4SF_L_4_2(int inv,
|
||||
const float *FFTS_RESTRICT i0,
|
||||
const float *FFTS_RESTRICT i1,
|
||||
const float *FFTS_RESTRICT i2,
|
||||
const float *FFTS_RESTRICT i3,
|
||||
V4SF *r0,
|
||||
V4SF *r1,
|
||||
V4SF *r2,
|
||||
V4SF *r3)
|
||||
{
|
||||
V t0, t1, t2, t3, t4, t5, t6, t7;
|
||||
|
||||
t0 = VLD(i0); t1 = VLD(i1); t6 = VLD(i2); t7 = VLD(i3);
|
||||
t2 = VBLEND(t6, t7);
|
||||
t3 = VBLEND(t7, t6);
|
||||
t4 = VADD(t0, t1);
|
||||
t5 = VSUB(t0, t1);
|
||||
t6 = VADD(t2, t3);
|
||||
t7 = VSUB(t2, t3);
|
||||
*r2 = VUNPACKHI(t4, t5);
|
||||
*r3 = VUNPACKHI(t6, t7);
|
||||
t7 = IMULI(inv, t7);
|
||||
t0 = VADD(t4, t6);
|
||||
t2 = VSUB(t4, t6);
|
||||
t1 = VSUB(t5, t7);
|
||||
t3 = VADD(t5, t7);
|
||||
*r0 = VUNPACKLO(t0, t1);
|
||||
*r1 = VUNPACKLO(t2, t3);
|
||||
V4SF t0, t1, t2, t3, t4, t5, t6, t7;
|
||||
|
||||
t0 = V4SF_LD(i0);
|
||||
t1 = V4SF_LD(i1);
|
||||
t6 = V4SF_LD(i2);
|
||||
t7 = V4SF_LD(i3);
|
||||
|
||||
t2 = V4SF_BLEND(t6, t7);
|
||||
t3 = V4SF_BLEND(t7, t6);
|
||||
|
||||
t4 = V4SF_ADD(t0, t1);
|
||||
t5 = V4SF_SUB(t0, t1);
|
||||
t6 = V4SF_ADD(t2, t3);
|
||||
t7 = V4SF_SUB(t2, t3);
|
||||
|
||||
*r2 = V4SF_UNPACK_HI(t4, t5);
|
||||
*r3 = V4SF_UNPACK_HI(t6, t7);
|
||||
|
||||
t7 = V4SF_IMULI(inv, t7);
|
||||
|
||||
t0 = V4SF_ADD(t4, t6);
|
||||
t2 = V4SF_SUB(t4, t6);
|
||||
t1 = V4SF_SUB(t5, t7);
|
||||
t3 = V4SF_ADD(t5, t7);
|
||||
|
||||
*r0 = V4SF_UNPACK_LO(t0, t1);
|
||||
*r1 = V4SF_UNPACK_LO(t2, t3);
|
||||
}
|
||||
#endif
|
||||
|
||||
#define V4SF_S_4(r0, r1, r2, r3, o0, o1, o2, o3) \
|
||||
V4SF_ST(o0, r0); V4SF_ST(o1, r1); V4SF_ST(o2, r2); V4SF_ST(o3, r3);
|
||||
|
||||
#endif /* FFTS_MACROS_H */
|
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
@ -1,956 +0,0 @@
|
||||
/*
|
||||
|
||||
This file is part of FFTS -- The Fastest Fourier Transform in the South
|
||||
|
||||
Copyright (c) 2012, Anthony M. Blake <amb@anthonix.com>
|
||||
Copyright (c) 2012, The University of Waikato
|
||||
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
* Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
* Neither the name of the organization nor the
|
||||
names of its contributors may be used to endorse or promote products
|
||||
derived from this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
||||
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
DISCLAIMED. IN NO EVENT SHALL ANTHONY M. BLAKE BE LIABLE FOR ANY
|
||||
DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
|
||||
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
||||
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
|
||||
ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
*/
|
||||
|
||||
.align 4
|
||||
#ifdef __APPLE__
|
||||
.globl _neon_static_e_f
|
||||
_neon_static_e_f:
|
||||
#else
|
||||
.globl neon_static_e_f
|
||||
neon_static_e_f:
|
||||
#endif
|
||||
push {r4, r5, r6, r7, r8, r9, r10, r11, lr}
|
||||
vstmdb sp!, {d8-d15}
|
||||
ldr lr, [r0, #40] @ this is p->N
|
||||
add r3, r1, #0
|
||||
add r7, r1, lr
|
||||
add r5, r7, lr
|
||||
add r10, r5, lr
|
||||
add r4, r10, lr
|
||||
add r8, r4, lr
|
||||
add r6, r8, lr
|
||||
add r9, r6, lr
|
||||
ldr r12, [r0]
|
||||
add r1, r0, #0
|
||||
add r0, r2, #0
|
||||
ldr r2, [r1, #16] @ this is p->ee_ws
|
||||
ldr r11, [r1, #28] @ this is p->i0
|
||||
|
||||
vld1.32 {d16, d17}, [r2, :128]
|
||||
_neon_ee_loop:
|
||||
vld2.32 {q15}, [r10, :128]!
|
||||
vld2.32 {q13}, [r8, :128]!
|
||||
vld2.32 {q14}, [r7, :128]!
|
||||
vld2.32 {q9}, [r4, :128]!
|
||||
vld2.32 {q10}, [r3, :128]!
|
||||
vld2.32 {q11}, [r6, :128]!
|
||||
vld2.32 {q12}, [r5, :128]!
|
||||
vsub.f32 q1, q14, q13
|
||||
vld2.32 {q0}, [r9, :128]!
|
||||
subs r11, r11, #1
|
||||
vsub.f32 q2, q0, q15
|
||||
vadd.f32 q0, q0, q15
|
||||
vmul.f32 d10, d2, d17
|
||||
vmul.f32 d11, d3, d16
|
||||
vmul.f32 d12, d3, d17
|
||||
vmul.f32 d6, d4, d17
|
||||
vmul.f32 d7, d5, d16
|
||||
vmul.f32 d8, d4, d16
|
||||
vmul.f32 d9, d5, d17
|
||||
vmul.f32 d13, d2, d16
|
||||
vsub.f32 d7, d7, d6
|
||||
vadd.f32 d11, d11, d10
|
||||
vsub.f32 q1, q12, q11
|
||||
vsub.f32 q2, q10, q9
|
||||
vadd.f32 d6, d9, d8
|
||||
vadd.f32 q4, q14, q13
|
||||
vadd.f32 q11, q12, q11
|
||||
vadd.f32 q12, q10, q9
|
||||
vsub.f32 d10, d13, d12
|
||||
vsub.f32 q7, q4, q0
|
||||
vsub.f32 q9, q12, q11
|
||||
vsub.f32 q13, q5, q3
|
||||
vsub.f32 d29, d5, d2 @
|
||||
vadd.f32 q5, q5, q3
|
||||
vadd.f32 q10, q4, q0
|
||||
vadd.f32 q11, q12, q11
|
||||
vadd.f32 d31, d5, d2 @
|
||||
vadd.f32 d28, d4, d3 @
|
||||
vsub.f32 d30, d4, d3 @
|
||||
vsub.f32 d5, d19, d14 @
|
||||
vsub.f32 d7, d31, d26 @
|
||||
vadd.f32 q1, q14, q5
|
||||
vadd.f32 q0, q11, q10
|
||||
vadd.f32 d6, d30, d27 @
|
||||
vadd.f32 d4, d18, d15 @
|
||||
vadd.f32 d13, d19, d14 @
|
||||
vsub.f32 d12, d18, d15 @
|
||||
vadd.f32 d15, d31, d26 @
|
||||
ldr r2, [r12], #4
|
||||
vtrn.32 q1, q3
|
||||
ldr lr, [r12], #4
|
||||
vtrn.32 q0, q2
|
||||
add r2, r0, r2, lsl #2
|
||||
vsub.f32 q4, q11, q10
|
||||
add lr, r0, lr, lsl #2
|
||||
vsub.f32 q5, q14, q5
|
||||
vsub.f32 d14, d30, d27 @
|
||||
vst2.32 {q0,q1}, [r2, :128]!
|
||||
vst2.32 {q2,q3}, [lr, :128]!
|
||||
vtrn.32 q4, q6
|
||||
vtrn.32 q5, q7
|
||||
vst2.32 {q4,q5}, [r2, :128]!
|
||||
vst2.32 {q6,q7}, [lr, :128]!
|
||||
bne _neon_ee_loop
|
||||
|
||||
ldr r11, [r1, #12]
|
||||
vld2.32 {q9}, [r5, :128]! @tag2
|
||||
vld2.32 {q13}, [r3, :128]! @tag0
|
||||
vld2.32 {q12}, [r4, :128]! @tag1
|
||||
vld2.32 {q0}, [r7, :128]! @tag4
|
||||
vsub.f32 q11, q13, q12
|
||||
vld2.32 {q8}, [r6, :128]! @tag3
|
||||
vadd.f32 q12, q13, q12
|
||||
vsub.f32 q10, q9, q8
|
||||
vadd.f32 q8, q9, q8
|
||||
vadd.f32 q9, q12, q8
|
||||
vsub.f32 d9, d23, d20 @
|
||||
vadd.f32 d11, d23, d20 @
|
||||
vsub.f32 q8, q12, q8
|
||||
vadd.f32 d8, d22, d21 @
|
||||
vsub.f32 d10, d22, d21 @
|
||||
ldr r2, [r12], #4
|
||||
vld1.32 {d20, d21}, [r11, :128]
|
||||
ldr lr, [r12], #4
|
||||
vtrn.32 q9, q4
|
||||
add r2, r0, r2, lsl #2
|
||||
vtrn.32 q8, q5
|
||||
add lr, r0, lr, lsl #2
|
||||
vswp d9,d10
|
||||
vst1.32 {d8,d9,d10,d11}, [lr, :128]!
|
||||
vld2.32 {q13}, [r10, :128]! @tag7
|
||||
vld2.32 {q15}, [r9, :128]! @tag6
|
||||
vld2.32 {q11}, [r8, :128]! @tag5
|
||||
vsub.f32 q14, q15, q13
|
||||
vsub.f32 q12, q0, q11
|
||||
vadd.f32 q11, q0, q11
|
||||
vadd.f32 q13, q15, q13
|
||||
vsub.f32 d13, d29, d24 @
|
||||
vadd.f32 q15, q13, q11
|
||||
vadd.f32 d12, d28, d25 @
|
||||
vadd.f32 d15, d29, d24 @
|
||||
vsub.f32 d14, d28, d25 @
|
||||
vtrn.32 q15, q6
|
||||
vsub.f32 q15, q13, q11
|
||||
vtrn.32 q15, q7
|
||||
vswp d13, d14
|
||||
vst1.32 {d12,d13,d14,d15}, [lr, :128]!
|
||||
vtrn.32 q13, q14
|
||||
vtrn.32 q11, q12
|
||||
vmul.f32 d24, d26, d21
|
||||
vmul.f32 d28, d27, d20
|
||||
vmul.f32 d25, d26, d20
|
||||
vmul.f32 d26, d27, d21
|
||||
vmul.f32 d27, d22, d21
|
||||
vmul.f32 d30, d23, d20
|
||||
vmul.f32 d29, d23, d21
|
||||
vmul.f32 d22, d22, d20
|
||||
vsub.f32 d21, d28, d24
|
||||
vadd.f32 d20, d26, d25
|
||||
vadd.f32 d25, d30, d27
|
||||
vsub.f32 d24, d22, d29
|
||||
vadd.f32 q11, q12, q10
|
||||
vsub.f32 q10, q12, q10
|
||||
vadd.f32 q0, q9, q11
|
||||
vsub.f32 q2, q9, q11
|
||||
vsub.f32 d3, d17, d20 @
|
||||
vadd.f32 d7, d17, d20 @
|
||||
vadd.f32 d2, d16, d21 @
|
||||
vsub.f32 d6, d16, d21 @
|
||||
vswp d1, d2
|
||||
vswp d5, d6
|
||||
vstmia r2!, {q0-q3}
|
||||
|
||||
add r2, r7, #0
|
||||
add r7, r9, #0
|
||||
add r9, r2, #0
|
||||
add r2, r8, #0
|
||||
add r8, r10, #0
|
||||
add r10, r2, #0
|
||||
ldr r11, [r1, #32] @ this is p->i1
|
||||
cmp r11, #0
|
||||
beq _neon_oo_loop_exit
|
||||
_neon_oo_loop:
|
||||
vld2.32 {q8}, [r6, :128]!
|
||||
vld2.32 {q9}, [r5, :128]!
|
||||
vld2.32 {q10}, [r4, :128]!
|
||||
vld2.32 {q13}, [r3, :128]!
|
||||
vadd.f32 q11, q9, q8
|
||||
vsub.f32 q8, q9, q8
|
||||
vsub.f32 q9, q13, q10
|
||||
vadd.f32 q12, q13, q10
|
||||
subs r11, r11, #1
|
||||
vld2.32 {q10}, [r7, :128]!
|
||||
vld2.32 {q13}, [r9, :128]!
|
||||
vsub.f32 q2, q12, q11
|
||||
vadd.f32 d7, d19, d16 @
|
||||
vsub.f32 d3, d19, d16 @
|
||||
vsub.f32 d6, d18, d17 @
|
||||
vadd.f32 d2, d18, d17 @
|
||||
vld2.32 {q9}, [r8, :128]!
|
||||
vld2.32 {q8}, [r10, :128]!
|
||||
vadd.f32 q0, q12, q11
|
||||
vadd.f32 q11, q13, q8
|
||||
vadd.f32 q12, q10, q9
|
||||
vsub.f32 q8, q13, q8
|
||||
vsub.f32 q9, q10, q9
|
||||
vsub.f32 q6, q12, q11
|
||||
vadd.f32 q4, q12, q11
|
||||
vtrn.32 q0, q2
|
||||
ldr r2, [r12], #4
|
||||
vadd.f32 d15, d19, d16 @
|
||||
ldr lr, [r12], #4
|
||||
vsub.f32 d11, d19, d16 @
|
||||
vsub.f32 d14, d18, d17 @
|
||||
vadd.f32 d10, d18, d17 @
|
||||
add r2, r0, r2, lsl #2
|
||||
vtrn.32 q1, q3
|
||||
add lr, r0, lr, lsl #2
|
||||
vst2.32 {q0,q1}, [r2, :128]!
|
||||
vst2.32 {q2,q3}, [lr, :128]!
|
||||
vtrn.32 q4, q6
|
||||
vtrn.32 q5, q7
|
||||
vst2.32 {q4,q5}, [r2, :128]!
|
||||
vst2.32 {q6,q7}, [lr, :128]!
|
||||
bne _neon_oo_loop
|
||||
_neon_oo_loop_exit:
|
||||
|
||||
|
||||
add r2, r3, #0
|
||||
add r3, r7, #0
|
||||
add r7, r2, #0
|
||||
add r2, r4, #0
|
||||
add r4, r8, #0
|
||||
add r8, r2, #0
|
||||
add r2, r5, #0
|
||||
add r5, r9, #0
|
||||
add r9, r2, #0
|
||||
add r2, r6, #0
|
||||
add r6, r10, #0
|
||||
add r10, r2, #0
|
||||
add r2, r9, #0
|
||||
add r9, r10, #0
|
||||
add r10, r2, #0
|
||||
ldr r2, [r1, #16]
|
||||
ldr r11, [r1, #32] @ this is p->i1
|
||||
cmp r11, #0
|
||||
beq _neon_ee_loop2_exit
|
||||
|
||||
vld1.32 {d16, d17}, [r2, :128]
|
||||
_neon_ee_loop2:
|
||||
vld2.32 {q15}, [r10, :128]!
|
||||
vld2.32 {q13}, [r8, :128]!
|
||||
vld2.32 {q14}, [r7, :128]!
|
||||
vld2.32 {q9}, [r4, :128]!
|
||||
vld2.32 {q10}, [r3, :128]!
|
||||
vld2.32 {q11}, [r6, :128]!
|
||||
vld2.32 {q12}, [r5, :128]!
|
||||
vsub.f32 q1, q14, q13
|
||||
vld2.32 {q0}, [r9, :128]!
|
||||
subs r11, r11, #1
|
||||
vsub.f32 q2, q0, q15
|
||||
vadd.f32 q0, q0, q15
|
||||
vmul.f32 d10, d2, d17
|
||||
vmul.f32 d11, d3, d16
|
||||
vmul.f32 d12, d3, d17
|
||||
vmul.f32 d6, d4, d17
|
||||
vmul.f32 d7, d5, d16
|
||||
vmul.f32 d8, d4, d16
|
||||
vmul.f32 d9, d5, d17
|
||||
vmul.f32 d13, d2, d16
|
||||
vsub.f32 d7, d7, d6
|
||||
vadd.f32 d11, d11, d10
|
||||
vsub.f32 q1, q12, q11
|
||||
vsub.f32 q2, q10, q9
|
||||
vadd.f32 d6, d9, d8
|
||||
vadd.f32 q4, q14, q13
|
||||
vadd.f32 q11, q12, q11
|
||||
vadd.f32 q12, q10, q9
|
||||
vsub.f32 d10, d13, d12
|
||||
vsub.f32 q7, q4, q0
|
||||
vsub.f32 q9, q12, q11
|
||||
vsub.f32 q13, q5, q3
|
||||
vsub.f32 d29, d5, d2 @
|
||||
vadd.f32 q5, q5, q3
|
||||
vadd.f32 q10, q4, q0
|
||||
vadd.f32 q11, q12, q11
|
||||
vadd.f32 d31, d5, d2 @
|
||||
vadd.f32 d28, d4, d3 @
|
||||
vsub.f32 d30, d4, d3 @
|
||||
vsub.f32 d5, d19, d14 @
|
||||
vsub.f32 d7, d31, d26 @
|
||||
vadd.f32 q1, q14, q5
|
||||
vadd.f32 q0, q11, q10
|
||||
vadd.f32 d6, d30, d27 @
|
||||
vadd.f32 d4, d18, d15 @
|
||||
vadd.f32 d13, d19, d14 @
|
||||
vsub.f32 d12, d18, d15 @
|
||||
vadd.f32 d15, d31, d26 @
|
||||
ldr r2, [r12], #4
|
||||
vtrn.32 q1, q3
|
||||
ldr lr, [r12], #4
|
||||
vtrn.32 q0, q2
|
||||
add r2, r0, r2, lsl #2
|
||||
vsub.f32 q4, q11, q10
|
||||
add lr, r0, lr, lsl #2
|
||||
vsub.f32 q5, q14, q5
|
||||
vsub.f32 d14, d30, d27 @
|
||||
vst2.32 {q0,q1}, [r2, :128]!
|
||||
vst2.32 {q2,q3}, [lr, :128]!
|
||||
vtrn.32 q4, q6
|
||||
vtrn.32 q5, q7
|
||||
vst2.32 {q4,q5}, [r2, :128]!
|
||||
vst2.32 {q6,q7}, [lr, :128]!
|
||||
bne _neon_ee_loop2
|
||||
_neon_ee_loop2_exit:
|
||||
|
||||
vldmia sp!, {d8-d15}
|
||||
pop {r4, r5, r6, r7, r8, r9, r10, r11, pc}
|
||||
|
||||
|
||||
|
||||
|
||||
.align 4
|
||||
#ifdef __APPLE__
|
||||
.globl _neon_static_o_f
|
||||
_neon_static_o_f:
|
||||
#else
|
||||
.globl neon_static_o_f
|
||||
neon_static_o_f:
|
||||
#endif
|
||||
push {r4, r5, r6, r7, r8, r9, r10, r11, lr}
|
||||
vstmdb sp!, {d8-d15}
|
||||
ldr lr, [r0, #40] @ this is p->N
|
||||
add r3, r1, #0
|
||||
add r7, r1, lr
|
||||
add r5, r7, lr
|
||||
add r10, r5, lr
|
||||
add r4, r10, lr
|
||||
add r8, r4, lr
|
||||
add r6, r8, lr
|
||||
add r9, r6, lr
|
||||
ldr r12, [r0]
|
||||
add r1, r0, #0
|
||||
add r0, r2, #0
|
||||
ldr r2, [r1, #16] @ this is p->ee_ws
|
||||
ldr r11, [r1, #28] @ this is p->i0
|
||||
|
||||
vld1.32 {d16, d17}, [r2, :128]
|
||||
_neon_ee_o_loop:
|
||||
vld2.32 {q15}, [r10, :128]!
|
||||
vld2.32 {q13}, [r8, :128]!
|
||||
vld2.32 {q14}, [r7, :128]!
|
||||
vld2.32 {q9}, [r4, :128]!
|
||||
vld2.32 {q10}, [r3, :128]!
|
||||
vld2.32 {q11}, [r6, :128]!
|
||||
vld2.32 {q12}, [r5, :128]!
|
||||
vsub.f32 q1, q14, q13
|
||||
vld2.32 {q0}, [r9, :128]!
|
||||
subs r11, r11, #1
|
||||
vsub.f32 q2, q0, q15
|
||||
vadd.f32 q0, q0, q15
|
||||
vmul.f32 d10, d2, d17
|
||||
vmul.f32 d11, d3, d16
|
||||
vmul.f32 d12, d3, d17
|
||||
vmul.f32 d6, d4, d17
|
||||
vmul.f32 d7, d5, d16
|
||||
vmul.f32 d8, d4, d16
|
||||
vmul.f32 d9, d5, d17
|
||||
vmul.f32 d13, d2, d16
|
||||
vsub.f32 d7, d7, d6
|
||||
vadd.f32 d11, d11, d10
|
||||
vsub.f32 q1, q12, q11
|
||||
vsub.f32 q2, q10, q9
|
||||
vadd.f32 d6, d9, d8
|
||||
vadd.f32 q4, q14, q13
|
||||
vadd.f32 q11, q12, q11
|
||||
vadd.f32 q12, q10, q9
|
||||
vsub.f32 d10, d13, d12
|
||||
vsub.f32 q7, q4, q0
|
||||
vsub.f32 q9, q12, q11
|
||||
vsub.f32 q13, q5, q3
|
||||
vsub.f32 d29, d5, d2 @
|
||||
vadd.f32 q5, q5, q3
|
||||
vadd.f32 q10, q4, q0
|
||||
vadd.f32 q11, q12, q11
|
||||
vadd.f32 d31, d5, d2 @
|
||||
vadd.f32 d28, d4, d3 @
|
||||
vsub.f32 d30, d4, d3 @
|
||||
vsub.f32 d5, d19, d14 @
|
||||
vsub.f32 d7, d31, d26 @
|
||||
vadd.f32 q1, q14, q5
|
||||
vadd.f32 q0, q11, q10
|
||||
vadd.f32 d6, d30, d27 @
|
||||
vadd.f32 d4, d18, d15 @
|
||||
vadd.f32 d13, d19, d14 @
|
||||
vsub.f32 d12, d18, d15 @
|
||||
vadd.f32 d15, d31, d26 @
|
||||
ldr r2, [r12], #4
|
||||
vtrn.32 q1, q3
|
||||
ldr lr, [r12], #4
|
||||
vtrn.32 q0, q2
|
||||
add r2, r0, r2, lsl #2
|
||||
vsub.f32 q4, q11, q10
|
||||
add lr, r0, lr, lsl #2
|
||||
vsub.f32 q5, q14, q5
|
||||
vsub.f32 d14, d30, d27 @
|
||||
vst2.32 {q0,q1}, [r2, :128]!
|
||||
vst2.32 {q2,q3}, [lr, :128]!
|
||||
vtrn.32 q4, q6
|
||||
vtrn.32 q5, q7
|
||||
vst2.32 {q4,q5}, [r2, :128]!
|
||||
vst2.32 {q6,q7}, [lr, :128]!
|
||||
bne _neon_ee_o_loop
|
||||
|
||||
add r2, r7, #0
|
||||
add r7, r9, #0
|
||||
add r9, r2, #0
|
||||
add r2, r8, #0
|
||||
add r8, r10, #0
|
||||
add r10, r2, #0
|
||||
ldr r11, [r1, #32] @ this is p->i1
|
||||
cmp r11, #0
|
||||
beq _neon_oo_o_loop_exit
|
||||
_neon_oo_o_loop:
|
||||
vld2.32 {q8}, [r6, :128]!
|
||||
vld2.32 {q9}, [r5, :128]!
|
||||
vld2.32 {q10}, [r4, :128]!
|
||||
vld2.32 {q13}, [r3, :128]!
|
||||
vadd.f32 q11, q9, q8
|
||||
vsub.f32 q8, q9, q8
|
||||
vsub.f32 q9, q13, q10
|
||||
vadd.f32 q12, q13, q10
|
||||
subs r11, r11, #1
|
||||
vld2.32 {q10}, [r7, :128]!
|
||||
vld2.32 {q13}, [r9, :128]!
|
||||
vsub.f32 q2, q12, q11
|
||||
vadd.f32 d7, d19, d16 @
|
||||
vsub.f32 d3, d19, d16 @
|
||||
vsub.f32 d6, d18, d17 @
|
||||
vadd.f32 d2, d18, d17 @
|
||||
vld2.32 {q9}, [r8, :128]!
|
||||
vld2.32 {q8}, [r10, :128]!
|
||||
vadd.f32 q0, q12, q11
|
||||
vadd.f32 q11, q13, q8
|
||||
vadd.f32 q12, q10, q9
|
||||
vsub.f32 q8, q13, q8
|
||||
vsub.f32 q9, q10, q9
|
||||
vsub.f32 q6, q12, q11
|
||||
vadd.f32 q4, q12, q11
|
||||
vtrn.32 q0, q2
|
||||
ldr r2, [r12], #4
|
||||
vadd.f32 d15, d19, d16 @
|
||||
ldr lr, [r12], #4
|
||||
vsub.f32 d11, d19, d16 @
|
||||
vsub.f32 d14, d18, d17 @
|
||||
vadd.f32 d10, d18, d17 @
|
||||
add r2, r0, r2, lsl #2
|
||||
vtrn.32 q1, q3
|
||||
add lr, r0, lr, lsl #2
|
||||
vst2.32 {q0,q1}, [r2, :128]!
|
||||
vst2.32 {q2,q3}, [lr, :128]!
|
||||
vtrn.32 q4, q6
|
||||
vtrn.32 q5, q7
|
||||
vst2.32 {q4,q5}, [r2, :128]!
|
||||
vst2.32 {q6,q7}, [lr, :128]!
|
||||
bne _neon_oo_o_loop
|
||||
_neon_oo_o_loop_exit:
|
||||
|
||||
ldr r11, [r1, #8]
|
||||
vld1.32 {q8}, [r5, :128]!
|
||||
vld1.32 {q10}, [r6, :128]!
|
||||
vld2.32 {q11}, [r4, :128]!
|
||||
vld2.32 {q13}, [r3, :128]!
|
||||
vld2.32 {q15}, [r10, :128]!
|
||||
vorr d25, d17, d17
|
||||
vorr d24, d20, d20
|
||||
vorr d20, d16, d16
|
||||
vsub.f32 q9, q13, q11
|
||||
vadd.f32 q11, q13, q11
|
||||
ldr r2, [r12], #4
|
||||
vtrn.32 d24, d25
|
||||
ldr lr, [r12], #4
|
||||
vtrn.32 d20, d21
|
||||
add r2, r0, r2, lsl #2
|
||||
vsub.f32 q8, q10, q12
|
||||
add lr, r0, lr, lsl #2
|
||||
vadd.f32 q10, q10, q12
|
||||
vadd.f32 q0, q11, q10
|
||||
vsub.f32 d25, d19, d16 @
|
||||
vadd.f32 d27, d19, d16 @
|
||||
vsub.f32 q1, q11, q10
|
||||
vadd.f32 d24, d18, d17 @
|
||||
vsub.f32 d26, d18, d17 @
|
||||
vtrn.32 q0, q12
|
||||
vtrn.32 q1, q13
|
||||
vld1.32 {d24, d25}, [r11, :128]
|
||||
vswp d1, d2
|
||||
vst1.32 {q0, q1}, [r2, :128]!
|
||||
vld2.32 {q0}, [r9, :128]!
|
||||
vadd.f32 q1, q0, q15
|
||||
vld2.32 {q13}, [r8, :128]!
|
||||
vld2.32 {q14}, [r7, :128]!
|
||||
vsub.f32 q15, q0, q15
|
||||
vsub.f32 q0, q14, q13
|
||||
vadd.f32 q3, q14, q13
|
||||
vadd.f32 q2, q3, q1
|
||||
vsub.f32 d29, d1, d30 @
|
||||
vadd.f32 d27, d1, d30 @
|
||||
vsub.f32 q3, q3, q1
|
||||
vadd.f32 d28, d0, d31 @
|
||||
vsub.f32 d26, d0, d31 @
|
||||
vtrn.32 q2, q14
|
||||
vtrn.32 q3, q13
|
||||
vswp d5, d6
|
||||
vst1.32 {q2, q3}, [r2, :128]!
|
||||
vtrn.32 q11, q9
|
||||
vtrn.32 q10, q8
|
||||
vmul.f32 d20, d18, d25
|
||||
vmul.f32 d22, d19, d24
|
||||
vmul.f32 d21, d19, d25
|
||||
vmul.f32 d18, d18, d24
|
||||
vmul.f32 d19, d16, d25
|
||||
vmul.f32 d30, d17, d24
|
||||
vmul.f32 d23, d16, d24
|
||||
vmul.f32 d24, d17, d25
|
||||
vadd.f32 d17, d22, d20
|
||||
vsub.f32 d16, d18, d21
|
||||
vsub.f32 d21, d30, d19
|
||||
vadd.f32 d20, d24, d23
|
||||
vadd.f32 q9, q8, q10
|
||||
vsub.f32 q8, q8, q10
|
||||
vadd.f32 q4, q14, q9
|
||||
vsub.f32 q6, q14, q9
|
||||
vsub.f32 d11, d27, d16 @
|
||||
vadd.f32 d15, d27, d16 @
|
||||
vadd.f32 d10, d26, d17 @
|
||||
vsub.f32 d14, d26, d17 @
|
||||
vswp d9, d10
|
||||
vswp d13, d14
|
||||
vstmia lr!, {q4-q7}
|
||||
|
||||
|
||||
add r2, r3, #0
|
||||
add r3, r7, #0
|
||||
add r7, r2, #0
|
||||
add r2, r4, #0
|
||||
add r4, r8, #0
|
||||
add r8, r2, #0
|
||||
add r2, r5, #0
|
||||
add r5, r9, #0
|
||||
add r9, r2, #0
|
||||
add r2, r6, #0
|
||||
add r6, r10, #0
|
||||
add r10, r2, #0
|
||||
add r2, r9, #0
|
||||
add r9, r10, #0
|
||||
add r10, r2, #0
|
||||
ldr r2, [r1, #16]
|
||||
ldr r11, [r1, #32] @ this is p->i1
|
||||
cmp r11, #0
|
||||
beq _neon_ee_o_loop2_exit
|
||||
|
||||
vld1.32 {d16, d17}, [r2, :128]
|
||||
_neon_ee_o_loop2:
|
||||
vld2.32 {q15}, [r10, :128]!
|
||||
vld2.32 {q13}, [r8, :128]!
|
||||
vld2.32 {q14}, [r7, :128]!
|
||||
vld2.32 {q9}, [r4, :128]!
|
||||
vld2.32 {q10}, [r3, :128]!
|
||||
vld2.32 {q11}, [r6, :128]!
|
||||
vld2.32 {q12}, [r5, :128]!
|
||||
vsub.f32 q1, q14, q13
|
||||
vld2.32 {q0}, [r9, :128]!
|
||||
subs r11, r11, #1
|
||||
vsub.f32 q2, q0, q15
|
||||
vadd.f32 q0, q0, q15
|
||||
vmul.f32 d10, d2, d17
|
||||
vmul.f32 d11, d3, d16
|
||||
vmul.f32 d12, d3, d17
|
||||
vmul.f32 d6, d4, d17
|
||||
vmul.f32 d7, d5, d16
|
||||
vmul.f32 d8, d4, d16
|
||||
vmul.f32 d9, d5, d17
|
||||
vmul.f32 d13, d2, d16
|
||||
vsub.f32 d7, d7, d6
|
||||
vadd.f32 d11, d11, d10
|
||||
vsub.f32 q1, q12, q11
|
||||
vsub.f32 q2, q10, q9
|
||||
vadd.f32 d6, d9, d8
|
||||
vadd.f32 q4, q14, q13
|
||||
vadd.f32 q11, q12, q11
|
||||
vadd.f32 q12, q10, q9
|
||||
vsub.f32 d10, d13, d12
|
||||
vsub.f32 q7, q4, q0
|
||||
vsub.f32 q9, q12, q11
|
||||
vsub.f32 q13, q5, q3
|
||||
vsub.f32 d29, d5, d2 @
|
||||
vadd.f32 q5, q5, q3
|
||||
vadd.f32 q10, q4, q0
|
||||
vadd.f32 q11, q12, q11
|
||||
vadd.f32 d31, d5, d2 @
|
||||
vadd.f32 d28, d4, d3 @
|
||||
vsub.f32 d30, d4, d3 @
|
||||
vsub.f32 d5, d19, d14 @
|
||||
vsub.f32 d7, d31, d26 @
|
||||
vadd.f32 q1, q14, q5
|
||||
vadd.f32 q0, q11, q10
|
||||
vadd.f32 d6, d30, d27 @
|
||||
vadd.f32 d4, d18, d15 @
|
||||
vadd.f32 d13, d19, d14 @
|
||||
vsub.f32 d12, d18, d15 @
|
||||
vadd.f32 d15, d31, d26 @
|
||||
ldr r2, [r12], #4
|
||||
vtrn.32 q1, q3
|
||||
ldr lr, [r12], #4
|
||||
vtrn.32 q0, q2
|
||||
add r2, r0, r2, lsl #2
|
||||
vsub.f32 q4, q11, q10
|
||||
add lr, r0, lr, lsl #2
|
||||
vsub.f32 q5, q14, q5
|
||||
vsub.f32 d14, d30, d27 @
|
||||
vst2.32 {q0,q1}, [r2, :128]!
|
||||
vst2.32 {q2,q3}, [lr, :128]!
|
||||
vtrn.32 q4, q6
|
||||
vtrn.32 q5, q7
|
||||
vst2.32 {q4,q5}, [r2, :128]!
|
||||
vst2.32 {q6,q7}, [lr, :128]!
|
||||
bne _neon_ee_o_loop2
|
||||
_neon_ee_o_loop2_exit:
|
||||
|
||||
vldmia sp!, {d8-d15}
|
||||
pop {r4, r5, r6, r7, r8, r9, r10, r11, pc}
|
||||
|
||||
.align 4
|
||||
#ifdef __APPLE__
|
||||
.globl _neon_static_x4_f
|
||||
_neon_static_x4_f:
|
||||
#else
|
||||
.globl neon_static_x4_f
|
||||
neon_static_x4_f:
|
||||
#endif
|
||||
@ add r3, r0, #0
|
||||
push {r4, r5, r6, lr}
|
||||
vstmdb sp!, {d8-d15}
|
||||
|
||||
vld1.32 {q8,q9}, [r0, :128]
|
||||
add r4, r0, r1, lsl #1
|
||||
vld1.32 {q10,q11}, [r4, :128]
|
||||
add r5, r0, r1, lsl #2
|
||||
vld1.32 {q12,q13}, [r5, :128]
|
||||
add r6, r4, r1, lsl #2
|
||||
vld1.32 {q14,q15}, [r6, :128]
|
||||
vld1.32 {q2,q3}, [r2, :128]
|
||||
|
||||
vmul.f32 q0, q13, q3
|
||||
vmul.f32 q5, q12, q2
|
||||
vmul.f32 q1, q14, q2
|
||||
vmul.f32 q4, q14, q3
|
||||
vmul.f32 q14, q12, q3
|
||||
vmul.f32 q13, q13, q2
|
||||
vmul.f32 q12, q15, q3
|
||||
vmul.f32 q2, q15, q2
|
||||
vsub.f32 q0, q5, q0
|
||||
vadd.f32 q13, q13, q14
|
||||
vadd.f32 q12, q12, q1
|
||||
vsub.f32 q1, q2, q4
|
||||
vadd.f32 q15, q0, q12
|
||||
vsub.f32 q12, q0, q12
|
||||
vadd.f32 q14, q13, q1
|
||||
vsub.f32 q13, q13, q1
|
||||
vadd.f32 q0, q8, q15
|
||||
vadd.f32 q1, q9, q14
|
||||
vadd.f32 q2, q10, q13 @
|
||||
vsub.f32 q4, q8, q15
|
||||
vsub.f32 q3, q11, q12 @
|
||||
vst1.32 {q0,q1}, [r0, :128]
|
||||
vsub.f32 q5, q9, q14
|
||||
vsub.f32 q6, q10, q13 @
|
||||
vadd.f32 q7, q11, q12 @
|
||||
vst1.32 {q2,q3}, [r4, :128]
|
||||
vst1.32 {q4,q5}, [r5, :128]
|
||||
vst1.32 {q6,q7}, [r6, :128]
|
||||
vldmia sp!, {d8-d15}
|
||||
pop {r4, r5, r6, pc}
|
||||
|
||||
|
||||
|
||||
.align 4
|
||||
#ifdef __APPLE__
|
||||
.globl _neon_static_x8_f
|
||||
_neon_static_x8_f:
|
||||
#else
|
||||
.globl neon_static_x8_f
|
||||
neon_static_x8_f:
|
||||
#endif
|
||||
push {r4, r5, r6, r7, r8, r9, r10, r11, lr}
|
||||
vstmdb sp!, {d8-d15}
|
||||
mov r11, #0
|
||||
add r3, r0, #0 @ data0
|
||||
add r5, r0, r1, lsl #1 @ data2
|
||||
add r4, r0, r1 @ data1
|
||||
add r7, r5, r1, lsl #1 @ data4
|
||||
add r6, r5, r1 @ data3
|
||||
add r9, r7, r1, lsl #1 @ data6
|
||||
add r8, r7, r1 @ data5
|
||||
add r10, r9, r1 @ data7
|
||||
add r12, r2, #0 @ LUT
|
||||
|
||||
sub r11, r11, r1, lsr #5
|
||||
neon_x8_loop:
|
||||
vld1.32 {q2,q3}, [r12, :128]!
|
||||
vld1.32 {q14,q15}, [r6, :128]
|
||||
vld1.32 {q10,q11}, [r5, :128]
|
||||
adds r11, r11, #1
|
||||
vmul.f32 q12, q15, q2
|
||||
vmul.f32 q8, q14, q3
|
||||
vmul.f32 q13, q14, q2
|
||||
vmul.f32 q9, q10, q3
|
||||
vmul.f32 q1, q10, q2
|
||||
vmul.f32 q0, q11, q2
|
||||
vmul.f32 q14, q11, q3
|
||||
vmul.f32 q15, q15, q3
|
||||
vld1.32 {q2,q3}, [r12, :128]!
|
||||
vsub.f32 q10, q12, q8
|
||||
vadd.f32 q11, q0, q9
|
||||
vadd.f32 q8, q15, q13
|
||||
vld1.32 {q12,q13}, [r4, :128]
|
||||
vsub.f32 q9, q1, q14
|
||||
vsub.f32 q15, q11, q10
|
||||
vsub.f32 q14, q9, q8
|
||||
vadd.f32 q4, q12, q15 @
|
||||
vsub.f32 q6, q12, q15 @
|
||||
vsub.f32 q5, q13, q14 @
|
||||
vadd.f32 q7, q13, q14 @
|
||||
vld1.32 {q14,q15}, [r9, :128]
|
||||
vld1.32 {q12,q13}, [r7, :128]
|
||||
vmul.f32 q1, q14, q2
|
||||
vmul.f32 q0, q14, q3
|
||||
vst1.32 {q4,q5}, [r4, :128]
|
||||
vmul.f32 q14, q15, q3
|
||||
vmul.f32 q4, q15, q2
|
||||
vadd.f32 q15, q9, q8
|
||||
vst1.32 {q6,q7}, [r6, :128]
|
||||
vmul.f32 q8, q12, q3
|
||||
vmul.f32 q5, q13, q3
|
||||
vmul.f32 q12, q12, q2
|
||||
vmul.f32 q9, q13, q2
|
||||
vadd.f32 q14, q14, q1
|
||||
vsub.f32 q13, q4, q0
|
||||
vadd.f32 q0, q9, q8
|
||||
vld1.32 {q8,q9}, [r3, :128]
|
||||
vadd.f32 q1, q11, q10
|
||||
vsub.f32 q12, q12, q5
|
||||
vadd.f32 q11, q8, q15
|
||||
vsub.f32 q8, q8, q15
|
||||
vadd.f32 q2, q12, q14
|
||||
vsub.f32 q10, q0, q13
|
||||
vadd.f32 q15, q0, q13
|
||||
vadd.f32 q13, q9, q1
|
||||
vsub.f32 q9, q9, q1
|
||||
vsub.f32 q12, q12, q14
|
||||
vadd.f32 q0, q11, q2
|
||||
vadd.f32 q1, q13, q15
|
||||
vsub.f32 q4, q11, q2
|
||||
vadd.f32 q2, q8, q10 @
|
||||
vsub.f32 q3, q9, q12 @
|
||||
vst1.32 {q0,q1}, [r3, :128]!
|
||||
vsub.f32 q5, q13, q15
|
||||
vld1.32 {q14,q15}, [r10, :128]
|
||||
vadd.f32 q7, q9, q12 @
|
||||
vld1.32 {q12,q13}, [r8, :128]
|
||||
vst1.32 {q2,q3}, [r5, :128]!
|
||||
vld1.32 {q2,q3}, [r12, :128]!
|
||||
vsub.f32 q6, q8, q10 @
|
||||
vmul.f32 q8, q14, q2
|
||||
vst1.32 {q4,q5}, [r7, :128]!
|
||||
vmul.f32 q10, q15, q3
|
||||
vmul.f32 q9, q13, q3
|
||||
vmul.f32 q11, q12, q2
|
||||
vmul.f32 q14, q14, q3
|
||||
vst1.32 {q6,q7}, [r9, :128]!
|
||||
vmul.f32 q15, q15, q2
|
||||
vmul.f32 q12, q12, q3
|
||||
vmul.f32 q13, q13, q2
|
||||
vadd.f32 q10, q10, q8
|
||||
vsub.f32 q11, q11, q9
|
||||
vld1.32 {q8,q9}, [r4, :128]
|
||||
vsub.f32 q14, q15, q14
|
||||
vadd.f32 q15, q13, q12
|
||||
vadd.f32 q13, q11, q10
|
||||
vadd.f32 q12, q15, q14
|
||||
vsub.f32 q15, q15, q14
|
||||
vsub.f32 q14, q11, q10
|
||||
vld1.32 {q10,q11}, [r6, :128]
|
||||
vadd.f32 q0, q8, q13
|
||||
vadd.f32 q1, q9, q12
|
||||
vadd.f32 q2, q10, q15 @
|
||||
vsub.f32 q3, q11, q14 @
|
||||
vsub.f32 q4, q8, q13
|
||||
vst1.32 {q0,q1}, [r4, :128]!
|
||||
vsub.f32 q5, q9, q12
|
||||
vsub.f32 q6, q10, q15 @
|
||||
vst1.32 {q2,q3}, [r6, :128]!
|
||||
vadd.f32 q7, q11, q14 @
|
||||
vst1.32 {q4,q5}, [r8, :128]!
|
||||
vst1.32 {q6,q7}, [r10, :128]!
|
||||
bne neon_x8_loop
|
||||
|
||||
vldmia sp!, {d8-d15}
|
||||
pop {r4, r5, r6, r7, r8, r9, r10, r11, pc}
|
||||
|
||||
.align 4
|
||||
#ifdef __APPLE__
|
||||
.globl _neon_static_x8_t_f
|
||||
_neon_static_x8_t_f:
|
||||
#else
|
||||
.globl neon_static_x8_t_f
|
||||
neon_static_x8_t_f:
|
||||
#endif
|
||||
push {r4, r5, r6, r7, r8, r9, r10, r11, lr}
|
||||
vstmdb sp!, {d8-d15}
|
||||
mov r11, #0
|
||||
add r3, r0, #0 @ data0
|
||||
add r5, r0, r1, lsl #1 @ data2
|
||||
add r4, r0, r1 @ data1
|
||||
add r7, r5, r1, lsl #1 @ data4
|
||||
add r6, r5, r1 @ data3
|
||||
add r9, r7, r1, lsl #1 @ data6
|
||||
add r8, r7, r1 @ data5
|
||||
add r10, r9, r1 @ data7
|
||||
add r12, r2, #0 @ LUT
|
||||
|
||||
sub r11, r11, r1, lsr #5
|
||||
neon_x8_t_loop:
|
||||
vld1.32 {q2,q3}, [r12, :128]!
|
||||
vld1.32 {q14,q15}, [r6, :128]
|
||||
vld1.32 {q10,q11}, [r5, :128]
|
||||
adds r11, r11, #1
|
||||
vmul.f32 q12, q15, q2
|
||||
vmul.f32 q8, q14, q3
|
||||
vmul.f32 q13, q14, q2
|
||||
vmul.f32 q9, q10, q3
|
||||
vmul.f32 q1, q10, q2
|
||||
vmul.f32 q0, q11, q2
|
||||
vmul.f32 q14, q11, q3
|
||||
vmul.f32 q15, q15, q3
|
||||
vld1.32 {q2,q3}, [r12, :128]!
|
||||
vsub.f32 q10, q12, q8
|
||||
vadd.f32 q11, q0, q9
|
||||
vadd.f32 q8, q15, q13
|
||||
vld1.32 {q12,q13}, [r4, :128]
|
||||
vsub.f32 q9, q1, q14
|
||||
vsub.f32 q15, q11, q10
|
||||
vsub.f32 q14, q9, q8
|
||||
vadd.f32 q4, q12, q15 @
|
||||
vsub.f32 q6, q12, q15 @
|
||||
vsub.f32 q5, q13, q14 @
|
||||
vadd.f32 q7, q13, q14 @
|
||||
vld1.32 {q14,q15}, [r9, :128]
|
||||
vld1.32 {q12,q13}, [r7, :128]
|
||||
vmul.f32 q1, q14, q2
|
||||
vmul.f32 q0, q14, q3
|
||||
vst1.32 {q4,q5}, [r4, :128]
|
||||
vmul.f32 q14, q15, q3
|
||||
vmul.f32 q4, q15, q2
|
||||
vadd.f32 q15, q9, q8
|
||||
vst1.32 {q6,q7}, [r6, :128]
|
||||
vmul.f32 q8, q12, q3
|
||||
vmul.f32 q5, q13, q3
|
||||
vmul.f32 q12, q12, q2
|
||||
vmul.f32 q9, q13, q2
|
||||
vadd.f32 q14, q14, q1
|
||||
vsub.f32 q13, q4, q0
|
||||
vadd.f32 q0, q9, q8
|
||||
vld1.32 {q8,q9}, [r3, :128]
|
||||
vadd.f32 q1, q11, q10
|
||||
vsub.f32 q12, q12, q5
|
||||
vadd.f32 q11, q8, q15
|
||||
vsub.f32 q8, q8, q15
|
||||
vadd.f32 q2, q12, q14
|
||||
vsub.f32 q10, q0, q13
|
||||
vadd.f32 q15, q0, q13
|
||||
vadd.f32 q13, q9, q1
|
||||
vsub.f32 q9, q9, q1
|
||||
vsub.f32 q12, q12, q14
|
||||
vadd.f32 q0, q11, q2
|
||||
vadd.f32 q1, q13, q15
|
||||
vsub.f32 q4, q11, q2
|
||||
vadd.f32 q2, q8, q10 @
|
||||
vsub.f32 q3, q9, q12 @
|
||||
vst2.32 {q0,q1}, [r3, :128]!
|
||||
vsub.f32 q5, q13, q15
|
||||
vld1.32 {q14,q15}, [r10, :128]
|
||||
vadd.f32 q7, q9, q12 @
|
||||
vld1.32 {q12,q13}, [r8, :128]
|
||||
vst2.32 {q2,q3}, [r5, :128]!
|
||||
vld1.32 {q2,q3}, [r12, :128]!
|
||||
vsub.f32 q6, q8, q10 @
|
||||
vmul.f32 q8, q14, q2
|
||||
vst2.32 {q4,q5}, [r7, :128]!
|
||||
vmul.f32 q10, q15, q3
|
||||
vmul.f32 q9, q13, q3
|
||||
vmul.f32 q11, q12, q2
|
||||
vmul.f32 q14, q14, q3
|
||||
vst2.32 {q6,q7}, [r9, :128]!
|
||||
vmul.f32 q15, q15, q2
|
||||
vmul.f32 q12, q12, q3
|
||||
vmul.f32 q13, q13, q2
|
||||
vadd.f32 q10, q10, q8
|
||||
vsub.f32 q11, q11, q9
|
||||
vld1.32 {q8,q9}, [r4, :128]
|
||||
vsub.f32 q14, q15, q14
|
||||
vadd.f32 q15, q13, q12
|
||||
vadd.f32 q13, q11, q10
|
||||
vadd.f32 q12, q15, q14
|
||||
vsub.f32 q15, q15, q14
|
||||
vsub.f32 q14, q11, q10
|
||||
vld1.32 {q10,q11}, [r6, :128]
|
||||
vadd.f32 q0, q8, q13
|
||||
vadd.f32 q1, q9, q12
|
||||
vadd.f32 q2, q10, q15 @
|
||||
vsub.f32 q3, q11, q14 @
|
||||
vsub.f32 q4, q8, q13
|
||||
vst2.32 {q0,q1}, [r4, :128]!
|
||||
vsub.f32 q5, q9, q12
|
||||
vsub.f32 q6, q10, q15 @
|
||||
vst2.32 {q2,q3}, [r6, :128]!
|
||||
vadd.f32 q7, q11, q14 @
|
||||
vst2.32 {q4,q5}, [r8, :128]!
|
||||
vst2.32 {q6,q7}, [r10, :128]!
|
||||
bne neon_x8_t_loop
|
||||
|
||||
vldmia sp!, {d8-d15}
|
||||
pop {r4, r5, r6, r7, r8, r9, r10, r11, pc}
|
||||
|
||||
|
@ -1,955 +0,0 @@
|
||||
/*
|
||||
|
||||
This file is part of FFTS -- The Fastest Fourier Transform in the South
|
||||
|
||||
Copyright (c) 2012, Anthony M. Blake <amb@anthonix.com>
|
||||
Copyright (c) 2012, The University of Waikato
|
||||
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
* Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
* Neither the name of the organization nor the
|
||||
names of its contributors may be used to endorse or promote products
|
||||
derived from this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
||||
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
DISCLAIMED. IN NO EVENT SHALL ANTHONY M. BLAKE BE LIABLE FOR ANY
|
||||
DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
|
||||
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
||||
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
|
||||
ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
*/
|
||||
|
||||
.align 4
|
||||
#ifdef __APPLE__
|
||||
.globl _neon_static_e_i
|
||||
_neon_static_e_i:
|
||||
#else
|
||||
.globl neon_static_e_i
|
||||
neon_static_e_i:
|
||||
#endif
|
||||
push {r4, r5, r6, r7, r8, r9, r10, r11, lr}
|
||||
vstmdb sp!, {d8-d15}
|
||||
ldr lr, [r0, #40] @ this is p->N
|
||||
add r3, r1, #0
|
||||
add r7, r1, lr
|
||||
add r5, r7, lr
|
||||
add r10, r5, lr
|
||||
add r4, r10, lr
|
||||
add r8, r4, lr
|
||||
add r6, r8, lr
|
||||
add r9, r6, lr
|
||||
ldr r12, [r0]
|
||||
add r1, r0, #0
|
||||
add r0, r2, #0
|
||||
ldr r2, [r1, #16] @ this is p->ee_ws
|
||||
ldr r11, [r1, #28] @ this is p->i0
|
||||
|
||||
vld1.32 {d16, d17}, [r2, :128]
|
||||
_neon_ee_loop:
|
||||
vld2.32 {q15}, [r10, :128]!
|
||||
vld2.32 {q13}, [r8, :128]!
|
||||
vld2.32 {q14}, [r7, :128]!
|
||||
vld2.32 {q9}, [r4, :128]!
|
||||
vld2.32 {q10}, [r3, :128]!
|
||||
vld2.32 {q11}, [r6, :128]!
|
||||
vld2.32 {q12}, [r5, :128]!
|
||||
vsub.f32 q1, q14, q13
|
||||
vld2.32 {q0}, [r9, :128]!
|
||||
subs r11, r11, #1
|
||||
vsub.f32 q2, q0, q15
|
||||
vadd.f32 q0, q0, q15
|
||||
vmul.f32 d10, d2, d17
|
||||
vmul.f32 d11, d3, d16
|
||||
vmul.f32 d12, d3, d17
|
||||
vmul.f32 d6, d4, d17
|
||||
vmul.f32 d7, d5, d16
|
||||
vmul.f32 d8, d4, d16
|
||||
vmul.f32 d9, d5, d17
|
||||
vmul.f32 d13, d2, d16
|
||||
vsub.f32 d7, d7, d6
|
||||
vadd.f32 d11, d11, d10
|
||||
vsub.f32 q1, q12, q11
|
||||
vsub.f32 q2, q10, q9
|
||||
vadd.f32 d6, d9, d8
|
||||
vadd.f32 q4, q14, q13
|
||||
vadd.f32 q11, q12, q11
|
||||
vadd.f32 q12, q10, q9
|
||||
vsub.f32 d10, d13, d12
|
||||
vsub.f32 q7, q4, q0
|
||||
vsub.f32 q9, q12, q11
|
||||
vsub.f32 q13, q5, q3
|
||||
vadd.f32 d29, d5, d2 @
|
||||
vadd.f32 q5, q5, q3
|
||||
vadd.f32 q10, q4, q0
|
||||
vadd.f32 q11, q12, q11
|
||||
vsub.f32 d31, d5, d2 @
|
||||
vsub.f32 d28, d4, d3 @
|
||||
vadd.f32 d30, d4, d3 @
|
||||
vadd.f32 d5, d19, d14 @
|
||||
vadd.f32 d7, d31, d26 @
|
||||
vadd.f32 q1, q14, q5
|
||||
vadd.f32 q0, q11, q10
|
||||
vsub.f32 d6, d30, d27 @
|
||||
vsub.f32 d4, d18, d15 @
|
||||
vsub.f32 d13, d19, d14 @
|
||||
vadd.f32 d12, d18, d15 @
|
||||
vsub.f32 d15, d31, d26 @
|
||||
ldr r2, [r12], #4
|
||||
vtrn.32 q1, q3
|
||||
ldr lr, [r12], #4
|
||||
vtrn.32 q0, q2
|
||||
add r2, r0, r2, lsl #2
|
||||
vsub.f32 q4, q11, q10
|
||||
add lr, r0, lr, lsl #2
|
||||
vsub.f32 q5, q14, q5
|
||||
vadd.f32 d14, d30, d27 @
|
||||
vst2.32 {q0,q1}, [r2, :128]!
|
||||
vst2.32 {q2,q3}, [lr, :128]!
|
||||
vtrn.32 q4, q6
|
||||
vtrn.32 q5, q7
|
||||
vst2.32 {q4,q5}, [r2, :128]!
|
||||
vst2.32 {q6,q7}, [lr, :128]!
|
||||
bne _neon_ee_loop
|
||||
|
||||
ldr r11, [r1, #12]
|
||||
vld2.32 {q9}, [r5, :128]! @tag2
|
||||
vld2.32 {q13}, [r3, :128]! @tag0
|
||||
vld2.32 {q12}, [r4, :128]! @tag1
|
||||
vld2.32 {q0}, [r7, :128]! @tag4
|
||||
vsub.f32 q11, q13, q12
|
||||
vld2.32 {q8}, [r6, :128]! @tag3
|
||||
vadd.f32 q12, q13, q12
|
||||
vsub.f32 q10, q9, q8
|
||||
vadd.f32 q8, q9, q8
|
||||
vadd.f32 q9, q12, q8
|
||||
vadd.f32 d9, d23, d20 @
|
||||
vsub.f32 d11, d23, d20 @
|
||||
vsub.f32 q8, q12, q8
|
||||
vsub.f32 d8, d22, d21 @
|
||||
vadd.f32 d10, d22, d21 @
|
||||
ldr r2, [r12], #4
|
||||
vld1.32 {d20, d21}, [r11, :128]
|
||||
ldr lr, [r12], #4
|
||||
vtrn.32 q9, q4
|
||||
add r2, r0, r2, lsl #2
|
||||
vtrn.32 q8, q5
|
||||
add lr, r0, lr, lsl #2
|
||||
vswp d9,d10
|
||||
vst1.32 {d8,d9,d10,d11}, [lr, :128]!
|
||||
vld2.32 {q13}, [r10, :128]! @tag7
|
||||
vld2.32 {q15}, [r9, :128]! @tag6
|
||||
vld2.32 {q11}, [r8, :128]! @tag5
|
||||
vsub.f32 q14, q15, q13
|
||||
vsub.f32 q12, q0, q11
|
||||
vadd.f32 q11, q0, q11
|
||||
vadd.f32 q13, q15, q13
|
||||
vadd.f32 d13, d29, d24 @
|
||||
vadd.f32 q15, q13, q11
|
||||
vsub.f32 d12, d28, d25 @
|
||||
vsub.f32 d15, d29, d24 @
|
||||
vadd.f32 d14, d28, d25 @
|
||||
vtrn.32 q15, q6
|
||||
vsub.f32 q15, q13, q11
|
||||
vtrn.32 q15, q7
|
||||
vswp d13, d14
|
||||
vst1.32 {d12,d13,d14,d15}, [lr, :128]!
|
||||
vtrn.32 q13, q14
|
||||
vtrn.32 q11, q12
|
||||
vmul.f32 d24, d26, d21
|
||||
vmul.f32 d28, d27, d20
|
||||
vmul.f32 d25, d26, d20
|
||||
vmul.f32 d26, d27, d21
|
||||
vmul.f32 d27, d22, d21
|
||||
vmul.f32 d30, d23, d20
|
||||
vmul.f32 d29, d23, d21
|
||||
vmul.f32 d22, d22, d20
|
||||
vsub.f32 d21, d28, d24
|
||||
vadd.f32 d20, d26, d25
|
||||
vadd.f32 d25, d30, d27
|
||||
vsub.f32 d24, d22, d29
|
||||
vadd.f32 q11, q12, q10
|
||||
vsub.f32 q10, q12, q10
|
||||
vadd.f32 q0, q9, q11
|
||||
vsub.f32 q2, q9, q11
|
||||
vadd.f32 d3, d17, d20 @
|
||||
vsub.f32 d7, d17, d20 @
|
||||
vsub.f32 d2, d16, d21 @
|
||||
vadd.f32 d6, d16, d21 @
|
||||
vswp d1, d2
|
||||
vswp d5, d6
|
||||
vstmia r2!, {q0-q3}
|
||||
|
||||
add r2, r7, #0
|
||||
add r7, r9, #0
|
||||
add r9, r2, #0
|
||||
add r2, r8, #0
|
||||
add r8, r10, #0
|
||||
add r10, r2, #0
|
||||
ldr r11, [r1, #32] @ this is p->i1
|
||||
cmp r11, #0
|
||||
beq _neon_oo_loop_exit
|
||||
_neon_oo_loop:
|
||||
vld2.32 {q8}, [r6, :128]!
|
||||
vld2.32 {q9}, [r5, :128]!
|
||||
vld2.32 {q10}, [r4, :128]!
|
||||
vld2.32 {q13}, [r3, :128]!
|
||||
vadd.f32 q11, q9, q8
|
||||
vsub.f32 q8, q9, q8
|
||||
vsub.f32 q9, q13, q10
|
||||
vadd.f32 q12, q13, q10
|
||||
subs r11, r11, #1
|
||||
vld2.32 {q10}, [r7, :128]!
|
||||
vld2.32 {q13}, [r9, :128]!
|
||||
vsub.f32 q2, q12, q11
|
||||
vsub.f32 d7, d19, d16 @
|
||||
vadd.f32 d3, d19, d16 @
|
||||
vadd.f32 d6, d18, d17 @
|
||||
vsub.f32 d2, d18, d17 @
|
||||
vld2.32 {q9}, [r8, :128]!
|
||||
vld2.32 {q8}, [r10, :128]!
|
||||
vadd.f32 q0, q12, q11
|
||||
vadd.f32 q11, q13, q8
|
||||
vadd.f32 q12, q10, q9
|
||||
vsub.f32 q8, q13, q8
|
||||
vsub.f32 q9, q10, q9
|
||||
vsub.f32 q6, q12, q11
|
||||
vadd.f32 q4, q12, q11
|
||||
vtrn.32 q0, q2
|
||||
ldr r2, [r12], #4
|
||||
vsub.f32 d15, d19, d16 @
|
||||
ldr lr, [r12], #4
|
||||
vadd.f32 d11, d19, d16 @
|
||||
vadd.f32 d14, d18, d17 @
|
||||
vsub.f32 d10, d18, d17 @
|
||||
add r2, r0, r2, lsl #2
|
||||
vtrn.32 q1, q3
|
||||
add lr, r0, lr, lsl #2
|
||||
vst2.32 {q0,q1}, [r2, :128]!
|
||||
vst2.32 {q2,q3}, [lr, :128]!
|
||||
vtrn.32 q4, q6
|
||||
vtrn.32 q5, q7
|
||||
vst2.32 {q4,q5}, [r2, :128]!
|
||||
vst2.32 {q6,q7}, [lr, :128]!
|
||||
bne _neon_oo_loop
|
||||
_neon_oo_loop_exit:
|
||||
|
||||
add r2, r3, #0
|
||||
add r3, r7, #0
|
||||
add r7, r2, #0
|
||||
add r2, r4, #0
|
||||
add r4, r8, #0
|
||||
add r8, r2, #0
|
||||
add r2, r5, #0
|
||||
add r5, r9, #0
|
||||
add r9, r2, #0
|
||||
add r2, r6, #0
|
||||
add r6, r10, #0
|
||||
add r10, r2, #0
|
||||
add r2, r9, #0
|
||||
add r9, r10, #0
|
||||
add r10, r2, #0
|
||||
ldr r2, [r1, #16]
|
||||
ldr r11, [r1, #32] @ this is p->i1
|
||||
cmp r11, #0
|
||||
beq _neon_ee_loop2_exit
|
||||
|
||||
vld1.32 {d16, d17}, [r2, :128]
|
||||
_neon_ee_loop2:
|
||||
vld2.32 {q15}, [r10, :128]!
|
||||
vld2.32 {q13}, [r8, :128]!
|
||||
vld2.32 {q14}, [r7, :128]!
|
||||
vld2.32 {q9}, [r4, :128]!
|
||||
vld2.32 {q10}, [r3, :128]!
|
||||
vld2.32 {q11}, [r6, :128]!
|
||||
vld2.32 {q12}, [r5, :128]!
|
||||
vsub.f32 q1, q14, q13
|
||||
vld2.32 {q0}, [r9, :128]!
|
||||
subs r11, r11, #1
|
||||
vsub.f32 q2, q0, q15
|
||||
vadd.f32 q0, q0, q15
|
||||
vmul.f32 d10, d2, d17
|
||||
vmul.f32 d11, d3, d16
|
||||
vmul.f32 d12, d3, d17
|
||||
vmul.f32 d6, d4, d17
|
||||
vmul.f32 d7, d5, d16
|
||||
vmul.f32 d8, d4, d16
|
||||
vmul.f32 d9, d5, d17
|
||||
vmul.f32 d13, d2, d16
|
||||
vsub.f32 d7, d7, d6
|
||||
vadd.f32 d11, d11, d10
|
||||
vsub.f32 q1, q12, q11
|
||||
vsub.f32 q2, q10, q9
|
||||
vadd.f32 d6, d9, d8
|
||||
vadd.f32 q4, q14, q13
|
||||
vadd.f32 q11, q12, q11
|
||||
vadd.f32 q12, q10, q9
|
||||
vsub.f32 d10, d13, d12
|
||||
vsub.f32 q7, q4, q0
|
||||
vsub.f32 q9, q12, q11
|
||||
vsub.f32 q13, q5, q3
|
||||
vadd.f32 d29, d5, d2 @
|
||||
vadd.f32 q5, q5, q3
|
||||
vadd.f32 q10, q4, q0
|
||||
vadd.f32 q11, q12, q11
|
||||
vsub.f32 d31, d5, d2 @
|
||||
vsub.f32 d28, d4, d3 @
|
||||
vadd.f32 d30, d4, d3 @
|
||||
vadd.f32 d5, d19, d14 @
|
||||
vadd.f32 d7, d31, d26 @
|
||||
vadd.f32 q1, q14, q5
|
||||
vadd.f32 q0, q11, q10
|
||||
vsub.f32 d6, d30, d27 @
|
||||
vsub.f32 d4, d18, d15 @
|
||||
vsub.f32 d13, d19, d14 @
|
||||
vadd.f32 d12, d18, d15 @
|
||||
vsub.f32 d15, d31, d26 @
|
||||
ldr r2, [r12], #4
|
||||
vtrn.32 q1, q3
|
||||
ldr lr, [r12], #4
|
||||
vtrn.32 q0, q2
|
||||
add r2, r0, r2, lsl #2
|
||||
vsub.f32 q4, q11, q10
|
||||
add lr, r0, lr, lsl #2
|
||||
vsub.f32 q5, q14, q5
|
||||
vadd.f32 d14, d30, d27 @
|
||||
vst2.32 {q0,q1}, [r2, :128]!
|
||||
vst2.32 {q2,q3}, [lr, :128]!
|
||||
vtrn.32 q4, q6
|
||||
vtrn.32 q5, q7
|
||||
vst2.32 {q4,q5}, [r2, :128]!
|
||||
vst2.32 {q6,q7}, [lr, :128]!
|
||||
bne _neon_ee_loop2
|
||||
_neon_ee_loop2_exit:
|
||||
|
||||
vldmia sp!, {d8-d15}
|
||||
pop {r4, r5, r6, r7, r8, r9, r10, r11, pc}
|
||||
|
||||
|
||||
|
||||
|
||||
.align 4
|
||||
#ifdef __APPLE__
|
||||
.globl _neon_static_o_i
|
||||
_neon_static_o_i:
|
||||
#else
|
||||
.globl neon_static_o_i
|
||||
neon_static_o_i:
|
||||
#endif
|
||||
push {r4, r5, r6, r7, r8, r9, r10, r11, lr}
|
||||
vstmdb sp!, {d8-d15}
|
||||
ldr lr, [r0, #40] @ this is p->N
|
||||
add r3, r1, #0
|
||||
add r7, r1, lr
|
||||
add r5, r7, lr
|
||||
add r10, r5, lr
|
||||
add r4, r10, lr
|
||||
add r8, r4, lr
|
||||
add r6, r8, lr
|
||||
add r9, r6, lr
|
||||
ldr r12, [r0]
|
||||
add r1, r0, #0
|
||||
add r0, r2, #0
|
||||
ldr r2, [r1, #16] @ this is p->ee_ws
|
||||
ldr r11, [r1, #28] @ this is p->i0
|
||||
|
||||
vld1.32 {d16, d17}, [r2, :128]
|
||||
_neon_ee_o_loop:
|
||||
vld2.32 {q15}, [r10, :128]!
|
||||
vld2.32 {q13}, [r8, :128]!
|
||||
vld2.32 {q14}, [r7, :128]!
|
||||
vld2.32 {q9}, [r4, :128]!
|
||||
vld2.32 {q10}, [r3, :128]!
|
||||
vld2.32 {q11}, [r6, :128]!
|
||||
vld2.32 {q12}, [r5, :128]!
|
||||
vsub.f32 q1, q14, q13
|
||||
vld2.32 {q0}, [r9, :128]!
|
||||
subs r11, r11, #1
|
||||
vsub.f32 q2, q0, q15
|
||||
vadd.f32 q0, q0, q15
|
||||
vmul.f32 d10, d2, d17
|
||||
vmul.f32 d11, d3, d16
|
||||
vmul.f32 d12, d3, d17
|
||||
vmul.f32 d6, d4, d17
|
||||
vmul.f32 d7, d5, d16
|
||||
vmul.f32 d8, d4, d16
|
||||
vmul.f32 d9, d5, d17
|
||||
vmul.f32 d13, d2, d16
|
||||
vsub.f32 d7, d7, d6
|
||||
vadd.f32 d11, d11, d10
|
||||
vsub.f32 q1, q12, q11
|
||||
vsub.f32 q2, q10, q9
|
||||
vadd.f32 d6, d9, d8
|
||||
vadd.f32 q4, q14, q13
|
||||
vadd.f32 q11, q12, q11
|
||||
vadd.f32 q12, q10, q9
|
||||
vsub.f32 d10, d13, d12
|
||||
vsub.f32 q7, q4, q0
|
||||
vsub.f32 q9, q12, q11
|
||||
vsub.f32 q13, q5, q3
|
||||
vadd.f32 d29, d5, d2 @
|
||||
vadd.f32 q5, q5, q3
|
||||
vadd.f32 q10, q4, q0
|
||||
vadd.f32 q11, q12, q11
|
||||
vsub.f32 d31, d5, d2 @
|
||||
vsub.f32 d28, d4, d3 @
|
||||
vadd.f32 d30, d4, d3 @
|
||||
vadd.f32 d5, d19, d14 @
|
||||
vadd.f32 d7, d31, d26 @
|
||||
vadd.f32 q1, q14, q5
|
||||
vadd.f32 q0, q11, q10
|
||||
vsub.f32 d6, d30, d27 @
|
||||
vsub.f32 d4, d18, d15 @
|
||||
vsub.f32 d13, d19, d14 @
|
||||
vadd.f32 d12, d18, d15 @
|
||||
vsub.f32 d15, d31, d26 @
|
||||
ldr r2, [r12], #4
|
||||
vtrn.32 q1, q3
|
||||
ldr lr, [r12], #4
|
||||
vtrn.32 q0, q2
|
||||
add r2, r0, r2, lsl #2
|
||||
vsub.f32 q4, q11, q10
|
||||
add lr, r0, lr, lsl #2
|
||||
vsub.f32 q5, q14, q5
|
||||
vadd.f32 d14, d30, d27 @
|
||||
vst2.32 {q0,q1}, [r2, :128]!
|
||||
vst2.32 {q2,q3}, [lr, :128]!
|
||||
vtrn.32 q4, q6
|
||||
vtrn.32 q5, q7
|
||||
vst2.32 {q4,q5}, [r2, :128]!
|
||||
vst2.32 {q6,q7}, [lr, :128]!
|
||||
bne _neon_ee_o_loop
|
||||
|
||||
add r2, r7, #0
|
||||
add r7, r9, #0
|
||||
add r9, r2, #0
|
||||
add r2, r8, #0
|
||||
add r8, r10, #0
|
||||
add r10, r2, #0
|
||||
ldr r11, [r1, #32] @ this is p->i1
|
||||
cmp r11, #0
|
||||
beq _neon_oo_o_loop_exit
|
||||
_neon_oo_o_loop:
|
||||
vld2.32 {q8}, [r6, :128]!
|
||||
vld2.32 {q9}, [r5, :128]!
|
||||
vld2.32 {q10}, [r4, :128]!
|
||||
vld2.32 {q13}, [r3, :128]!
|
||||
vadd.f32 q11, q9, q8
|
||||
vsub.f32 q8, q9, q8
|
||||
vsub.f32 q9, q13, q10
|
||||
vadd.f32 q12, q13, q10
|
||||
subs r11, r11, #1
|
||||
vld2.32 {q10}, [r7, :128]!
|
||||
vld2.32 {q13}, [r9, :128]!
|
||||
vsub.f32 q2, q12, q11
|
||||
vsub.f32 d7, d19, d16 @
|
||||
vadd.f32 d3, d19, d16 @
|
||||
vadd.f32 d6, d18, d17 @
|
||||
vsub.f32 d2, d18, d17 @
|
||||
vld2.32 {q9}, [r8, :128]!
|
||||
vld2.32 {q8}, [r10, :128]!
|
||||
vadd.f32 q0, q12, q11
|
||||
vadd.f32 q11, q13, q8
|
||||
vadd.f32 q12, q10, q9
|
||||
vsub.f32 q8, q13, q8
|
||||
vsub.f32 q9, q10, q9
|
||||
vsub.f32 q6, q12, q11
|
||||
vadd.f32 q4, q12, q11
|
||||
vtrn.32 q0, q2
|
||||
ldr r2, [r12], #4
|
||||
vsub.f32 d15, d19, d16 @
|
||||
ldr lr, [r12], #4
|
||||
vadd.f32 d11, d19, d16 @
|
||||
vadd.f32 d14, d18, d17 @
|
||||
vsub.f32 d10, d18, d17 @
|
||||
add r2, r0, r2, lsl #2
|
||||
vtrn.32 q1, q3
|
||||
add lr, r0, lr, lsl #2
|
||||
vst2.32 {q0,q1}, [r2, :128]!
|
||||
vst2.32 {q2,q3}, [lr, :128]!
|
||||
vtrn.32 q4, q6
|
||||
vtrn.32 q5, q7
|
||||
vst2.32 {q4,q5}, [r2, :128]!
|
||||
vst2.32 {q6,q7}, [lr, :128]!
|
||||
bne _neon_oo_o_loop
|
||||
_neon_oo_o_loop_exit:
|
||||
|
||||
ldr r11, [r1, #8]
|
||||
vld1.32 {q8}, [r5, :128]!
|
||||
vld1.32 {q10}, [r6, :128]!
|
||||
vld2.32 {q11}, [r4, :128]!
|
||||
vld2.32 {q13}, [r3, :128]!
|
||||
vld2.32 {q15}, [r10, :128]!
|
||||
vorr d25, d17, d17
|
||||
vorr d24, d20, d20
|
||||
vorr d20, d16, d16
|
||||
vsub.f32 q9, q13, q11
|
||||
vadd.f32 q11, q13, q11
|
||||
ldr r2, [r12], #4
|
||||
vtrn.32 d24, d25
|
||||
ldr lr, [r12], #4
|
||||
vtrn.32 d20, d21
|
||||
add r2, r0, r2, lsl #2
|
||||
vsub.f32 q8, q10, q12
|
||||
add lr, r0, lr, lsl #2
|
||||
vadd.f32 q10, q10, q12
|
||||
vadd.f32 q0, q11, q10
|
||||
vadd.f32 d25, d19, d16 @
|
||||
vsub.f32 d27, d19, d16 @
|
||||
vsub.f32 q1, q11, q10
|
||||
vsub.f32 d24, d18, d17 @
|
||||
vadd.f32 d26, d18, d17 @
|
||||
vtrn.32 q0, q12
|
||||
vtrn.32 q1, q13
|
||||
vld1.32 {d24, d25}, [r11, :128]
|
||||
vswp d1, d2
|
||||
vst1.32 {q0, q1}, [r2, :128]!
|
||||
vld2.32 {q0}, [r9, :128]!
|
||||
vadd.f32 q1, q0, q15
|
||||
vld2.32 {q13}, [r8, :128]!
|
||||
vld2.32 {q14}, [r7, :128]!
|
||||
vsub.f32 q15, q0, q15
|
||||
vsub.f32 q0, q14, q13
|
||||
vadd.f32 q3, q14, q13
|
||||
vadd.f32 q2, q3, q1
|
||||
vadd.f32 d29, d1, d30 @
|
||||
vsub.f32 d27, d1, d30 @
|
||||
vsub.f32 q3, q3, q1
|
||||
vsub.f32 d28, d0, d31 @
|
||||
vadd.f32 d26, d0, d31 @
|
||||
vtrn.32 q2, q14
|
||||
vtrn.32 q3, q13
|
||||
vswp d5, d6
|
||||
vst1.32 {q2, q3}, [r2, :128]!
|
||||
vtrn.32 q11, q9
|
||||
vtrn.32 q10, q8
|
||||
vmul.f32 d20, d18, d25
|
||||
vmul.f32 d22, d19, d24
|
||||
vmul.f32 d21, d19, d25
|
||||
vmul.f32 d18, d18, d24
|
||||
vmul.f32 d19, d16, d25
|
||||
vmul.f32 d30, d17, d24
|
||||
vmul.f32 d23, d16, d24
|
||||
vmul.f32 d24, d17, d25
|
||||
vadd.f32 d17, d22, d20
|
||||
vsub.f32 d16, d18, d21
|
||||
vsub.f32 d21, d30, d19
|
||||
vadd.f32 d20, d24, d23
|
||||
vadd.f32 q9, q8, q10
|
||||
vsub.f32 q8, q8, q10
|
||||
vadd.f32 q4, q14, q9
|
||||
vsub.f32 q6, q14, q9
|
||||
vadd.f32 d11, d27, d16 @
|
||||
vsub.f32 d15, d27, d16 @
|
||||
vsub.f32 d10, d26, d17 @
|
||||
vadd.f32 d14, d26, d17 @
|
||||
vswp d9, d10
|
||||
vswp d13, d14
|
||||
vstmia lr!, {q4-q7}
|
||||
|
||||
|
||||
add r2, r3, #0
|
||||
add r3, r7, #0
|
||||
add r7, r2, #0
|
||||
add r2, r4, #0
|
||||
add r4, r8, #0
|
||||
add r8, r2, #0
|
||||
add r2, r5, #0
|
||||
add r5, r9, #0
|
||||
add r9, r2, #0
|
||||
add r2, r6, #0
|
||||
add r6, r10, #0
|
||||
add r10, r2, #0
|
||||
add r2, r9, #0
|
||||
add r9, r10, #0
|
||||
add r10, r2, #0
|
||||
ldr r2, [r1, #16]
|
||||
ldr r11, [r1, #32] @ this is p->i1
|
||||
cmp r11, #0
|
||||
beq _neon_ee_o_loop2_exit
|
||||
|
||||
vld1.32 {d16, d17}, [r2, :128]
|
||||
_neon_ee_o_loop2:
|
||||
vld2.32 {q15}, [r10, :128]!
|
||||
vld2.32 {q13}, [r8, :128]!
|
||||
vld2.32 {q14}, [r7, :128]!
|
||||
vld2.32 {q9}, [r4, :128]!
|
||||
vld2.32 {q10}, [r3, :128]!
|
||||
vld2.32 {q11}, [r6, :128]!
|
||||
vld2.32 {q12}, [r5, :128]!
|
||||
vsub.f32 q1, q14, q13
|
||||
vld2.32 {q0}, [r9, :128]!
|
||||
subs r11, r11, #1
|
||||
vsub.f32 q2, q0, q15
|
||||
vadd.f32 q0, q0, q15
|
||||
vmul.f32 d10, d2, d17
|
||||
vmul.f32 d11, d3, d16
|
||||
vmul.f32 d12, d3, d17
|
||||
vmul.f32 d6, d4, d17
|
||||
vmul.f32 d7, d5, d16
|
||||
vmul.f32 d8, d4, d16
|
||||
vmul.f32 d9, d5, d17
|
||||
vmul.f32 d13, d2, d16
|
||||
vsub.f32 d7, d7, d6
|
||||
vadd.f32 d11, d11, d10
|
||||
vsub.f32 q1, q12, q11
|
||||
vsub.f32 q2, q10, q9
|
||||
vadd.f32 d6, d9, d8
|
||||
vadd.f32 q4, q14, q13
|
||||
vadd.f32 q11, q12, q11
|
||||
vadd.f32 q12, q10, q9
|
||||
vsub.f32 d10, d13, d12
|
||||
vsub.f32 q7, q4, q0
|
||||
vsub.f32 q9, q12, q11
|
||||
vsub.f32 q13, q5, q3
|
||||
vadd.f32 d29, d5, d2 @
|
||||
vadd.f32 q5, q5, q3
|
||||
vadd.f32 q10, q4, q0
|
||||
vadd.f32 q11, q12, q11
|
||||
vsub.f32 d31, d5, d2 @
|
||||
vsub.f32 d28, d4, d3 @
|
||||
vadd.f32 d30, d4, d3 @
|
||||
vadd.f32 d5, d19, d14 @
|
||||
vadd.f32 d7, d31, d26 @
|
||||
vadd.f32 q1, q14, q5
|
||||
vadd.f32 q0, q11, q10
|
||||
vsub.f32 d6, d30, d27 @
|
||||
vsub.f32 d4, d18, d15 @
|
||||
vsub.f32 d13, d19, d14 @
|
||||
vadd.f32 d12, d18, d15 @
|
||||
vsub.f32 d15, d31, d26 @
|
||||
ldr r2, [r12], #4
|
||||
vtrn.32 q1, q3
|
||||
ldr lr, [r12], #4
|
||||
vtrn.32 q0, q2
|
||||
add r2, r0, r2, lsl #2
|
||||
vsub.f32 q4, q11, q10
|
||||
add lr, r0, lr, lsl #2
|
||||
vsub.f32 q5, q14, q5
|
||||
vadd.f32 d14, d30, d27 @
|
||||
vst2.32 {q0,q1}, [r2, :128]!
|
||||
vst2.32 {q2,q3}, [lr, :128]!
|
||||
vtrn.32 q4, q6
|
||||
vtrn.32 q5, q7
|
||||
vst2.32 {q4,q5}, [r2, :128]!
|
||||
vst2.32 {q6,q7}, [lr, :128]!
|
||||
bne _neon_ee_o_loop2
|
||||
_neon_ee_o_loop2_exit:
|
||||
|
||||
vldmia sp!, {d8-d15}
|
||||
pop {r4, r5, r6, r7, r8, r9, r10, r11, pc}
|
||||
|
||||
.align 4
|
||||
#ifdef __APPLE__
|
||||
.globl _neon_static_x4_i
|
||||
_neon_static_x4_i:
|
||||
#else
|
||||
.globl neon_static_x4_i
|
||||
neon_static_x4_i:
|
||||
#endif
|
||||
@ add r3, r0, #0
|
||||
push {r4, r5, r6, lr}
|
||||
vstmdb sp!, {d8-d15}
|
||||
|
||||
vld1.32 {q8,q9}, [r0, :128]
|
||||
add r4, r0, r1, lsl #1
|
||||
vld1.32 {q10,q11}, [r4, :128]
|
||||
add r5, r0, r1, lsl #2
|
||||
vld1.32 {q12,q13}, [r5, :128]
|
||||
add r6, r4, r1, lsl #2
|
||||
vld1.32 {q14,q15}, [r6, :128]
|
||||
vld1.32 {q2,q3}, [r2, :128]
|
||||
|
||||
vmul.f32 q0, q13, q3
|
||||
vmul.f32 q5, q12, q2
|
||||
vmul.f32 q1, q14, q2
|
||||
vmul.f32 q4, q14, q3
|
||||
vmul.f32 q14, q12, q3
|
||||
vmul.f32 q13, q13, q2
|
||||
vmul.f32 q12, q15, q3
|
||||
vmul.f32 q2, q15, q2
|
||||
vsub.f32 q0, q5, q0
|
||||
vadd.f32 q13, q13, q14
|
||||
vadd.f32 q12, q12, q1
|
||||
vsub.f32 q1, q2, q4
|
||||
vadd.f32 q15, q0, q12
|
||||
vsub.f32 q12, q0, q12
|
||||
vadd.f32 q14, q13, q1
|
||||
vsub.f32 q13, q13, q1
|
||||
vadd.f32 q0, q8, q15
|
||||
vadd.f32 q1, q9, q14
|
||||
vsub.f32 q2, q10, q13 @
|
||||
vsub.f32 q4, q8, q15
|
||||
vadd.f32 q3, q11, q12 @
|
||||
vst1.32 {q0,q1}, [r0, :128]
|
||||
vsub.f32 q5, q9, q14
|
||||
vadd.f32 q6, q10, q13 @
|
||||
vsub.f32 q7, q11, q12 @
|
||||
vst1.32 {q2,q3}, [r4, :128]
|
||||
vst1.32 {q4,q5}, [r5, :128]
|
||||
vst1.32 {q6,q7}, [r6, :128]
|
||||
vldmia sp!, {d8-d15}
|
||||
pop {r4, r5, r6, pc}
|
||||
|
||||
|
||||
|
||||
.align 4
|
||||
#ifdef __APPLE__
|
||||
.globl _neon_static_x8_i
|
||||
_neon_static_x8_i:
|
||||
#else
|
||||
.globl neon_static_x8_i
|
||||
neon_static_x8_i:
|
||||
#endif
|
||||
push {r4, r5, r6, r7, r8, r9, r10, r11, lr}
|
||||
vstmdb sp!, {d8-d15}
|
||||
mov r11, #0
|
||||
add r3, r0, #0 @ data0
|
||||
add r5, r0, r1, lsl #1 @ data2
|
||||
add r4, r0, r1 @ data1
|
||||
add r7, r5, r1, lsl #1 @ data4
|
||||
add r6, r5, r1 @ data3
|
||||
add r9, r7, r1, lsl #1 @ data6
|
||||
add r8, r7, r1 @ data5
|
||||
add r10, r9, r1 @ data7
|
||||
add r12, r2, #0 @ LUT
|
||||
|
||||
sub r11, r11, r1, lsr #5
|
||||
neon_x8_loop:
|
||||
vld1.32 {q2,q3}, [r12, :128]!
|
||||
vld1.32 {q14,q15}, [r6, :128]
|
||||
vld1.32 {q10,q11}, [r5, :128]
|
||||
adds r11, r11, #1
|
||||
vmul.f32 q12, q15, q2
|
||||
vmul.f32 q8, q14, q3
|
||||
vmul.f32 q13, q14, q2
|
||||
vmul.f32 q9, q10, q3
|
||||
vmul.f32 q1, q10, q2
|
||||
vmul.f32 q0, q11, q2
|
||||
vmul.f32 q14, q11, q3
|
||||
vmul.f32 q15, q15, q3
|
||||
vld1.32 {q2,q3}, [r12, :128]!
|
||||
vsub.f32 q10, q12, q8
|
||||
vadd.f32 q11, q0, q9
|
||||
vadd.f32 q8, q15, q13
|
||||
vld1.32 {q12,q13}, [r4, :128]
|
||||
vsub.f32 q9, q1, q14
|
||||
vsub.f32 q15, q11, q10
|
||||
vsub.f32 q14, q9, q8
|
||||
vsub.f32 q4, q12, q15 @
|
||||
vadd.f32 q6, q12, q15 @
|
||||
vadd.f32 q5, q13, q14 @
|
||||
vsub.f32 q7, q13, q14 @
|
||||
vld1.32 {q14,q15}, [r9, :128]
|
||||
vld1.32 {q12,q13}, [r7, :128]
|
||||
vmul.f32 q1, q14, q2
|
||||
vmul.f32 q0, q14, q3
|
||||
vst1.32 {q4,q5}, [r4, :128]
|
||||
vmul.f32 q14, q15, q3
|
||||
vmul.f32 q4, q15, q2
|
||||
vadd.f32 q15, q9, q8
|
||||
vst1.32 {q6,q7}, [r6, :128]
|
||||
vmul.f32 q8, q12, q3
|
||||
vmul.f32 q5, q13, q3
|
||||
vmul.f32 q12, q12, q2
|
||||
vmul.f32 q9, q13, q2
|
||||
vadd.f32 q14, q14, q1
|
||||
vsub.f32 q13, q4, q0
|
||||
vadd.f32 q0, q9, q8
|
||||
vld1.32 {q8,q9}, [r3, :128]
|
||||
vadd.f32 q1, q11, q10
|
||||
vsub.f32 q12, q12, q5
|
||||
vadd.f32 q11, q8, q15
|
||||
vsub.f32 q8, q8, q15
|
||||
vadd.f32 q2, q12, q14
|
||||
vsub.f32 q10, q0, q13
|
||||
vadd.f32 q15, q0, q13
|
||||
vadd.f32 q13, q9, q1
|
||||
vsub.f32 q9, q9, q1
|
||||
vsub.f32 q12, q12, q14
|
||||
vadd.f32 q0, q11, q2
|
||||
vadd.f32 q1, q13, q15
|
||||
vsub.f32 q4, q11, q2
|
||||
vsub.f32 q2, q8, q10 @
|
||||
vadd.f32 q3, q9, q12 @
|
||||
vst1.32 {q0,q1}, [r3, :128]!
|
||||
vsub.f32 q5, q13, q15
|
||||
vld1.32 {q14,q15}, [r10, :128]
|
||||
vsub.f32 q7, q9, q12 @
|
||||
vld1.32 {q12,q13}, [r8, :128]
|
||||
vst1.32 {q2,q3}, [r5, :128]!
|
||||
vld1.32 {q2,q3}, [r12, :128]!
|
||||
vadd.f32 q6, q8, q10 @
|
||||
vmul.f32 q8, q14, q2
|
||||
vst1.32 {q4,q5}, [r7, :128]!
|
||||
vmul.f32 q10, q15, q3
|
||||
vmul.f32 q9, q13, q3
|
||||
vmul.f32 q11, q12, q2
|
||||
vmul.f32 q14, q14, q3
|
||||
vst1.32 {q6,q7}, [r9, :128]!
|
||||
vmul.f32 q15, q15, q2
|
||||
vmul.f32 q12, q12, q3
|
||||
vmul.f32 q13, q13, q2
|
||||
vadd.f32 q10, q10, q8
|
||||
vsub.f32 q11, q11, q9
|
||||
vld1.32 {q8,q9}, [r4, :128]
|
||||
vsub.f32 q14, q15, q14
|
||||
vadd.f32 q15, q13, q12
|
||||
vadd.f32 q13, q11, q10
|
||||
vadd.f32 q12, q15, q14
|
||||
vsub.f32 q15, q15, q14
|
||||
vsub.f32 q14, q11, q10
|
||||
vld1.32 {q10,q11}, [r6, :128]
|
||||
vadd.f32 q0, q8, q13
|
||||
vadd.f32 q1, q9, q12
|
||||
vsub.f32 q2, q10, q15 @
|
||||
vadd.f32 q3, q11, q14 @
|
||||
vsub.f32 q4, q8, q13
|
||||
vst1.32 {q0,q1}, [r4, :128]!
|
||||
vsub.f32 q5, q9, q12
|
||||
vadd.f32 q6, q10, q15 @
|
||||
vst1.32 {q2,q3}, [r6, :128]!
|
||||
vsub.f32 q7, q11, q14 @
|
||||
vst1.32 {q4,q5}, [r8, :128]!
|
||||
vst1.32 {q6,q7}, [r10, :128]!
|
||||
bne neon_x8_loop
|
||||
|
||||
vldmia sp!, {d8-d15}
|
||||
pop {r4, r5, r6, r7, r8, r9, r10, r11, pc}
|
||||
|
||||
.align 4
|
||||
#ifdef __APPLE__
|
||||
.globl _neon_static_x8_t_i
|
||||
_neon_static_x8_t_i:
|
||||
#else
|
||||
.globl neon_static_x8_t_i
|
||||
neon_static_x8_t_i:
|
||||
#endif
|
||||
push {r4, r5, r6, r7, r8, r9, r10, r11, lr}
|
||||
vstmdb sp!, {d8-d15}
|
||||
mov r11, #0
|
||||
add r3, r0, #0 @ data0
|
||||
add r5, r0, r1, lsl #1 @ data2
|
||||
add r4, r0, r1 @ data1
|
||||
add r7, r5, r1, lsl #1 @ data4
|
||||
add r6, r5, r1 @ data3
|
||||
add r9, r7, r1, lsl #1 @ data6
|
||||
add r8, r7, r1 @ data5
|
||||
add r10, r9, r1 @ data7
|
||||
add r12, r2, #0 @ LUT
|
||||
|
||||
sub r11, r11, r1, lsr #5
|
||||
neon_x8_t_loop:
|
||||
vld1.32 {q2,q3}, [r12, :128]!
|
||||
vld1.32 {q14,q15}, [r6, :128]
|
||||
vld1.32 {q10,q11}, [r5, :128]
|
||||
adds r11, r11, #1
|
||||
vmul.f32 q12, q15, q2
|
||||
vmul.f32 q8, q14, q3
|
||||
vmul.f32 q13, q14, q2
|
||||
vmul.f32 q9, q10, q3
|
||||
vmul.f32 q1, q10, q2
|
||||
vmul.f32 q0, q11, q2
|
||||
vmul.f32 q14, q11, q3
|
||||
vmul.f32 q15, q15, q3
|
||||
vld1.32 {q2,q3}, [r12, :128]!
|
||||
vsub.f32 q10, q12, q8
|
||||
vadd.f32 q11, q0, q9
|
||||
vadd.f32 q8, q15, q13
|
||||
vld1.32 {q12,q13}, [r4, :128]
|
||||
vsub.f32 q9, q1, q14
|
||||
vsub.f32 q15, q11, q10
|
||||
vsub.f32 q14, q9, q8
|
||||
vsub.f32 q4, q12, q15 @
|
||||
vadd.f32 q6, q12, q15 @
|
||||
vadd.f32 q5, q13, q14 @
|
||||
vsub.f32 q7, q13, q14 @
|
||||
vld1.32 {q14,q15}, [r9, :128]
|
||||
vld1.32 {q12,q13}, [r7, :128]
|
||||
vmul.f32 q1, q14, q2
|
||||
vmul.f32 q0, q14, q3
|
||||
vst1.32 {q4,q5}, [r4, :128]
|
||||
vmul.f32 q14, q15, q3
|
||||
vmul.f32 q4, q15, q2
|
||||
vadd.f32 q15, q9, q8
|
||||
vst1.32 {q6,q7}, [r6, :128]
|
||||
vmul.f32 q8, q12, q3
|
||||
vmul.f32 q5, q13, q3
|
||||
vmul.f32 q12, q12, q2
|
||||
vmul.f32 q9, q13, q2
|
||||
vadd.f32 q14, q14, q1
|
||||
vsub.f32 q13, q4, q0
|
||||
vadd.f32 q0, q9, q8
|
||||
vld1.32 {q8,q9}, [r3, :128]
|
||||
vadd.f32 q1, q11, q10
|
||||
vsub.f32 q12, q12, q5
|
||||
vadd.f32 q11, q8, q15
|
||||
vsub.f32 q8, q8, q15
|
||||
vadd.f32 q2, q12, q14
|
||||
vsub.f32 q10, q0, q13
|
||||
vadd.f32 q15, q0, q13
|
||||
vadd.f32 q13, q9, q1
|
||||
vsub.f32 q9, q9, q1
|
||||
vsub.f32 q12, q12, q14
|
||||
vadd.f32 q0, q11, q2
|
||||
vadd.f32 q1, q13, q15
|
||||
vsub.f32 q4, q11, q2
|
||||
vsub.f32 q2, q8, q10 @
|
||||
vadd.f32 q3, q9, q12 @
|
||||
vst2.32 {q0,q1}, [r3, :128]!
|
||||
vsub.f32 q5, q13, q15
|
||||
vld1.32 {q14,q15}, [r10, :128]
|
||||
vsub.f32 q7, q9, q12 @
|
||||
vld1.32 {q12,q13}, [r8, :128]
|
||||
vst2.32 {q2,q3}, [r5, :128]!
|
||||
vld1.32 {q2,q3}, [r12, :128]!
|
||||
vadd.f32 q6, q8, q10 @
|
||||
vmul.f32 q8, q14, q2
|
||||
vst2.32 {q4,q5}, [r7, :128]!
|
||||
vmul.f32 q10, q15, q3
|
||||
vmul.f32 q9, q13, q3
|
||||
vmul.f32 q11, q12, q2
|
||||
vmul.f32 q14, q14, q3
|
||||
vst2.32 {q6,q7}, [r9, :128]!
|
||||
vmul.f32 q15, q15, q2
|
||||
vmul.f32 q12, q12, q3
|
||||
vmul.f32 q13, q13, q2
|
||||
vadd.f32 q10, q10, q8
|
||||
vsub.f32 q11, q11, q9
|
||||
vld1.32 {q8,q9}, [r4, :128]
|
||||
vsub.f32 q14, q15, q14
|
||||
vadd.f32 q15, q13, q12
|
||||
vadd.f32 q13, q11, q10
|
||||
vadd.f32 q12, q15, q14
|
||||
vsub.f32 q15, q15, q14
|
||||
vsub.f32 q14, q11, q10
|
||||
vld1.32 {q10,q11}, [r6, :128]
|
||||
vadd.f32 q0, q8, q13
|
||||
vadd.f32 q1, q9, q12
|
||||
vsub.f32 q2, q10, q15 @
|
||||
vadd.f32 q3, q11, q14 @
|
||||
vsub.f32 q4, q8, q13
|
||||
vst2.32 {q0,q1}, [r4, :128]!
|
||||
vsub.f32 q5, q9, q12
|
||||
vadd.f32 q6, q10, q15 @
|
||||
vst2.32 {q2,q3}, [r6, :128]!
|
||||
vsub.f32 q7, q11, q14 @
|
||||
vst2.32 {q4,q5}, [r8, :128]!
|
||||
vst2.32 {q6,q7}, [r10, :128]!
|
||||
bne neon_x8_t_loop
|
||||
|
||||
vldmia sp!, {d8-d15}
|
||||
pop {r4, r5, r6, r7, r8, r9, r10, r11, pc}
|
||||
|
||||
|
@ -1,208 +0,0 @@
|
||||
/*
|
||||
|
||||
This file is part of FFTS -- The Fastest Fourier Transform in the South
|
||||
|
||||
Copyright (c) 2012, Anthony M. Blake <amb@anthonix.com>
|
||||
Copyright (c) 2012, The University of Waikato
|
||||
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
* Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
* Neither the name of the organization nor the
|
||||
names of its contributors may be used to endorse or promote products
|
||||
derived from this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
||||
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
DISCLAIMED. IN NO EVENT SHALL ANTHONY M. BLAKE BE LIABLE FOR ANY
|
||||
DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
|
||||
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
||||
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
|
||||
ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
*/
|
||||
|
||||
#include "patterns.h"
|
||||
|
||||
void permute_addr(int N, int offset, int stride, int *d) {
|
||||
int i, a[4] = {0,2,1,3};
|
||||
for(i=0;i<4;i++) {
|
||||
d[i] = offset + (a[i] << stride);
|
||||
if(d[i] < 0) d[i] += N;
|
||||
}
|
||||
}
|
||||
|
||||
void ffts_hardcodedleaf_is_rec(ptrdiff_t **is, int bigN, int N, int poffset, int offset, int stride, int even, int VL) {
|
||||
|
||||
if(N > 4) {
|
||||
ffts_hardcodedleaf_is_rec(is, bigN, N/2, poffset, offset, stride + 1, even, VL);
|
||||
if(N/4 >= 4) ffts_hardcodedleaf_is_rec(is, bigN, N/4, poffset+(1<<stride),offset+(N/2), stride + 2, 0, VL);
|
||||
if(N/4 >= 4) ffts_hardcodedleaf_is_rec(is, bigN, N/4, poffset-(1<<stride),offset+(3*N/4), stride + 2, 0, VL);
|
||||
else {
|
||||
int temp = poffset+(1<<stride);
|
||||
if(temp < 0) temp += bigN;
|
||||
temp *= 2;
|
||||
|
||||
if(!(temp % (VL*2))) {
|
||||
(*is)[0] = poffset+(1<<stride);
|
||||
(*is)[1] = poffset+(1<<stride)+(1<<(stride+2));
|
||||
(*is)[2] = poffset-(1<<stride);
|
||||
(*is)[3] = poffset-(1<<stride)+(1<<(stride+2));
|
||||
int i;
|
||||
for(i=0;i<4;i++) if((*is)[i] < 0) (*is)[i] += bigN;
|
||||
for(i=0;i<4;i++) (*is)[i] *= 2;
|
||||
*is += 4;
|
||||
}
|
||||
}
|
||||
}else if(N == 4) {
|
||||
int perm[4];
|
||||
permute_addr(bigN, poffset, stride, perm);
|
||||
if(!((perm[0]*2) % (VL*2))) {
|
||||
int i;
|
||||
for(i=0;i<4;i++) {
|
||||
(*is)[i] = perm[i] * 2;
|
||||
}
|
||||
*is += 4;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void ffts_init_is(ffts_plan_t *p, int N, int leafN, int VL) {
|
||||
int i, i0 = N/leafN/3+1, i1=N/leafN/3, i2 = N/leafN/3;
|
||||
int stride = log(N/leafN)/log(2);
|
||||
|
||||
p->is = malloc(N/VL * sizeof(ptrdiff_t));
|
||||
|
||||
ptrdiff_t *is = p->is;
|
||||
|
||||
if((N/leafN) % 3 > 1) i1++;
|
||||
|
||||
for(i=0;i<i0;i++) ffts_hardcodedleaf_is_rec(&is, N, leafN, i, 0, stride, 1, VL);
|
||||
for(i=i0;i<i0+i1;i++) {
|
||||
ffts_hardcodedleaf_is_rec(&is, N, leafN/2, i, 0, stride+1, 1, VL);
|
||||
ffts_hardcodedleaf_is_rec(&is, N, leafN/2, i-(1<<stride), 0, stride+1, 1, VL);
|
||||
}
|
||||
for(i=0-i2;i<0;i++) ffts_hardcodedleaf_is_rec(&is, N, leafN, i, 0, stride, 1, VL);
|
||||
|
||||
|
||||
//for(i=0;i<N/VL;i++) {
|
||||
// printf("%td ", p->is[i]);
|
||||
// if(i % 16 == 15) printf("\n");
|
||||
//}
|
||||
|
||||
p->i0 = i0; p->i1 = i1;
|
||||
}
|
||||
/**
|
||||
*
|
||||
*
|
||||
*/
|
||||
void ffts_elaborate_offsets(ptrdiff_t *offsets, int leafN, int N, int ioffset, int ooffset, int stride, int even) {
|
||||
if((even && N == leafN) || (!even && N <= leafN)) {
|
||||
offsets[2*(ooffset/leafN)] = ioffset*2;
|
||||
offsets[2*(ooffset/leafN)+1] = ooffset;
|
||||
}else if(N > 4) {
|
||||
ffts_elaborate_offsets(offsets, leafN, N/2, ioffset, ooffset, stride+1, even);
|
||||
ffts_elaborate_offsets(offsets, leafN, N/4, ioffset+(1<<stride), ooffset+N/2, stride+2, 0);
|
||||
if(N/4 >= leafN)
|
||||
ffts_elaborate_offsets(offsets, leafN, N/4, ioffset-(1<<stride), ooffset+3*N/4, stride+2, 0);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
int compare_offsets(const void *a, const void *b) {
|
||||
return ((ptrdiff_t *)a)[0] - ((ptrdiff_t *)b)[0];
|
||||
}
|
||||
|
||||
uint32_t reverse_bits(uint32_t a, int n) {
|
||||
uint32_t x = 0;
|
||||
|
||||
int i;
|
||||
for(i=0;i<n;i++) {
|
||||
if(a & (1 << i)) x |= 1 << (n-i-1);
|
||||
}
|
||||
return x;
|
||||
}
|
||||
|
||||
|
||||
void ffts_init_offsets(ffts_plan_t *p, int N, int leafN) {
|
||||
|
||||
ptrdiff_t *offsets = malloc(2 * N/leafN * sizeof(ptrdiff_t));
|
||||
|
||||
ffts_elaborate_offsets(offsets, leafN, N, 0, 0, 1, 1);
|
||||
|
||||
size_t i;
|
||||
for(i=0;i<2*N/leafN;i+=2) {
|
||||
if(offsets[i] < 0) offsets[i] = N + offsets[i];
|
||||
}
|
||||
|
||||
qsort(offsets, N/leafN, 2 * sizeof(ptrdiff_t), compare_offsets);
|
||||
//elaborate_is(p, N, 0, 0, 1);
|
||||
p->offsets = malloc(N/leafN * sizeof(ptrdiff_t));
|
||||
for(i=0;i<N/leafN;i++) {
|
||||
p->offsets[i] = offsets[i*2+1]*2;
|
||||
}
|
||||
//for(i=0;i<N/leafN;i++) {
|
||||
// printf("%4d %4d\n", p->offsets[i], reverse_bits(p->offsets[i], __builtin_ctzl(2*N)));
|
||||
//}
|
||||
free(offsets);
|
||||
}
|
||||
|
||||
/*
|
||||
int tree_count(int N, int leafN, int offset) {
|
||||
|
||||
if(N <= leafN) return 0;
|
||||
int count = 0;
|
||||
count += tree_count(N/4, leafN, offset);
|
||||
count += tree_count(N/8, leafN, offset + N/4);
|
||||
count += tree_count(N/8, leafN, offset + N/4 + N/8);
|
||||
count += tree_count(N/4, leafN, offset + N/2);
|
||||
count += tree_count(N/4, leafN, offset + 3*N/4);
|
||||
|
||||
return 1 + count;
|
||||
}
|
||||
|
||||
void elaborate_tree(transform_index_t **p, int N, int leafN, int offset) {
|
||||
|
||||
if(N <= leafN) return;
|
||||
elaborate_tree(p, N/4, leafN, offset);
|
||||
elaborate_tree(p, N/8, leafN, offset + N/4);
|
||||
elaborate_tree(p, N/8, leafN, offset + N/4 + N/8);
|
||||
elaborate_tree(p, N/4, leafN, offset + N/2);
|
||||
elaborate_tree(p, N/4, leafN, offset + 3*N/4);
|
||||
|
||||
(*p)[0] = N;
|
||||
(*p)[1] = offset*2;
|
||||
|
||||
(*p)+=2;
|
||||
}
|
||||
|
||||
void ffts_init_tree(ffts_plan_t *p, int N, int leafN) {
|
||||
|
||||
int count = tree_count(N, leafN, 0) + 1;
|
||||
transform_index_t *ps = p->transforms = malloc(count * 2 * sizeof(transform_index_t));
|
||||
|
||||
//printf("count = %d\n", count);
|
||||
|
||||
elaborate_tree(&ps, N, leafN, 0);
|
||||
#ifdef __ARM_NEON__
|
||||
ps -= 2;
|
||||
#endif
|
||||
ps[0] = 0;
|
||||
ps[1] = 0;
|
||||
//int i;
|
||||
//for(i=0;i<count;i++) {
|
||||
// fprintf(stderr, "%lu %lu - %d\n", p->transforms[i*2], p->transforms[i*2+1],
|
||||
// __builtin_ctzl(p->transforms[i*2]) - 5);
|
||||
//}
|
||||
|
||||
}
|
||||
*/
|
@ -1,44 +1,520 @@
|
||||
/*
|
||||
|
||||
This file is part of FFTS -- The Fastest Fourier Transform in the South
|
||||
|
||||
Copyright (c) 2012, Anthony M. Blake <amb@anthonix.com>
|
||||
Copyright (c) 2012, The University of Waikato
|
||||
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
* Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
* Neither the name of the organization nor the
|
||||
names of its contributors may be used to endorse or promote products
|
||||
derived from this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
||||
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
DISCLAIMED. IN NO EVENT SHALL ANTHONY M. BLAKE BE LIABLE FOR ANY
|
||||
DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
|
||||
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
||||
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
|
||||
ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
This file is part of FFTS -- The Fastest Fourier Transform in the South
|
||||
|
||||
Copyright (c) 2012, Anthony M. Blake <amb@anthonix.com>
|
||||
Copyright (c) 2012, The University of Waikato
|
||||
Copyright (c) 2015, Jukka Ojanen <jukka.ojanen@kolumbus.fi>
|
||||
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
* Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
* Neither the name of the organization nor the
|
||||
names of its contributors may be used to endorse or promote products
|
||||
derived from this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
||||
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
DISCLAIMED. IN NO EVENT SHALL ANTHONY M. BLAKE BE LIABLE FOR ANY
|
||||
DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
|
||||
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
||||
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
|
||||
ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
*/
|
||||
|
||||
#ifndef FFTS_PATTERNS_H
|
||||
#define FFTS_PATTERNS_H
|
||||
|
||||
#if defined (_MSC_VER) && (_MSC_VER >= 1020)
|
||||
#pragma once
|
||||
#endif
|
||||
|
||||
#include <stddef.h>
|
||||
|
||||
#ifdef HAVE_STDLIB_H
|
||||
#include <stdlib.h>
|
||||
#endif
|
||||
|
||||
#ifndef LEAF_N
|
||||
#define LEAF_N 8
|
||||
#endif
|
||||
|
||||
#if LEAF_N == 8
|
||||
static void
|
||||
ffts_elaborate_offsets_even8(ptrdiff_t *const offsets,
|
||||
int log_N);
|
||||
|
||||
static void
|
||||
ffts_elaborate_offsets_odd8(ptrdiff_t *const offsets,
|
||||
int log_N,
|
||||
int input_offset,
|
||||
int output_offset,
|
||||
int stride);
|
||||
|
||||
static void
|
||||
ffts_hardcodedleaf_is_rec_even4(ptrdiff_t **is,
|
||||
int big_N,
|
||||
int offset,
|
||||
int stride,
|
||||
int VL);
|
||||
|
||||
static void
|
||||
ffts_hardcodedleaf_is_rec_even8(ptrdiff_t **is,
|
||||
int big_N,
|
||||
int offset,
|
||||
int stride,
|
||||
int VL);
|
||||
#else
|
||||
static void
|
||||
ffts_elaborate_offsets_even(ptrdiff_t *const offsets,
|
||||
int leaf_N,
|
||||
int N,
|
||||
int input_offset,
|
||||
int output_offset,
|
||||
int stride);
|
||||
|
||||
static void
|
||||
ffts_elaborate_offsets_odd(ptrdiff_t *const offsets,
|
||||
int leaf_N,
|
||||
int N,
|
||||
int input_offset,
|
||||
int output_offset,
|
||||
int stride);
|
||||
|
||||
static void
|
||||
ffts_hardcodedleaf_is_rec_even(ptrdiff_t **is,
|
||||
int big_N,
|
||||
int N,
|
||||
int offset,
|
||||
int stride,
|
||||
int VL);
|
||||
|
||||
static void
|
||||
ffts_hardcodedleaf_is_rec_odd(ptrdiff_t **is,
|
||||
int big_N,
|
||||
int N,
|
||||
int offset,
|
||||
int stride,
|
||||
int VL);
|
||||
#endif
|
||||
|
||||
static int
|
||||
ffts_compare_offsets(const void *pa, const void *pb)
|
||||
{
|
||||
const ptrdiff_t a = *(const ptrdiff_t*) pa;
|
||||
const ptrdiff_t b = *(const ptrdiff_t*) pb;
|
||||
return (a > b) - (a < b);
|
||||
}
|
||||
|
||||
static void
|
||||
ffts_permute_addr(int N, int offset, int stride, int *const d)
|
||||
{
|
||||
int a[4] = {0,2,1,3};
|
||||
int i;
|
||||
|
||||
for (i = 0; i < 4; i++) {
|
||||
d[i] = offset + (a[i] << stride);
|
||||
if (d[i] < 0) {
|
||||
d[i] += N;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#if LEAF_N == 8
|
||||
static void
|
||||
ffts_elaborate_offsets_even8(ptrdiff_t *const offsets, int log_N)
|
||||
{
|
||||
int offset = 1 << (log_N - 4);
|
||||
int stride = 1;
|
||||
|
||||
offsets[0] = 0;
|
||||
offsets[1] = 0;
|
||||
offsets[2] = offset * 2;
|
||||
offsets[3] = 8;
|
||||
offsets[4] = offset;
|
||||
offsets[5] = 16;
|
||||
offsets[6] = -offset;
|
||||
offsets[7] = 24;
|
||||
|
||||
for(; log_N > 5; --log_N, stride *= 2) {
|
||||
ffts_elaborate_offsets_odd8(offsets, log_N - 2,
|
||||
stride, 1 << (log_N - 1), stride * 4);
|
||||
|
||||
ffts_elaborate_offsets_odd8(offsets, log_N - 2,
|
||||
-stride, 3 * (1 << (log_N - 2)), stride * 4);
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
ffts_elaborate_offsets_odd8(ptrdiff_t *const offsets,
|
||||
int log_N,
|
||||
int input_offset,
|
||||
int output_offset,
|
||||
int stride)
|
||||
{
|
||||
if (log_N <= 4) {
|
||||
offsets[(output_offset / 4) + 0] = input_offset * 2;
|
||||
offsets[(output_offset / 4) + 1] = output_offset;
|
||||
|
||||
if (log_N == 4) {
|
||||
offsets[(output_offset / 4) + 2] = (input_offset + stride) * 2;
|
||||
offsets[(output_offset / 4) + 3] = output_offset + 8;
|
||||
}
|
||||
} else {
|
||||
ffts_elaborate_offsets_odd8(offsets, log_N - 1, input_offset,
|
||||
output_offset, stride * 2);
|
||||
|
||||
ffts_elaborate_offsets_odd8(offsets, log_N - 2, input_offset + stride,
|
||||
output_offset + (1 << (log_N - 1)), stride * 4);
|
||||
|
||||
ffts_elaborate_offsets_odd8(offsets, log_N - 2, input_offset - stride,
|
||||
output_offset + 3 * (1 << (log_N - 2)), stride * 4);
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
ffts_hardcodedleaf_is_rec_even4(ptrdiff_t **is,
|
||||
int big_N,
|
||||
int offset,
|
||||
int stride,
|
||||
int VL)
|
||||
{
|
||||
int i, perm[4];
|
||||
|
||||
ffts_permute_addr(big_N, offset, stride, perm);
|
||||
|
||||
if (!((2 * perm[0]) % (2 * VL))) {
|
||||
for (i = 0; i < 4; i++) {
|
||||
(*is)[i] = 2 * perm[i];
|
||||
}
|
||||
|
||||
*is += 4;
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
ffts_hardcodedleaf_is_rec_even8(ptrdiff_t **is,
|
||||
int big_N,
|
||||
int offset,
|
||||
int stride,
|
||||
int VL)
|
||||
{
|
||||
int temp;
|
||||
|
||||
ffts_hardcodedleaf_is_rec_even4(is, big_N, offset, stride + 1, VL);
|
||||
|
||||
temp = offset + (1 << stride);
|
||||
if (temp < 0) {
|
||||
temp += big_N;
|
||||
}
|
||||
|
||||
temp *= 2;
|
||||
|
||||
if (!(temp % (2 * VL))) {
|
||||
int i;
|
||||
|
||||
(*is)[0] = offset + (1 << stride);
|
||||
(*is)[1] = offset + (1 << stride) + (1 << (stride + 2));
|
||||
(*is)[2] = offset - (1 << stride);
|
||||
(*is)[3] = offset - (1 << stride) + (1 << (stride + 2));
|
||||
|
||||
for (i = 0; i < 4; i++) {
|
||||
if ((*is)[i] < 0) {
|
||||
(*is)[i] += big_N;
|
||||
}
|
||||
}
|
||||
|
||||
for (i = 0; i < 4; i++) {
|
||||
(*is)[i] *= 2;
|
||||
}
|
||||
|
||||
*is += 4;
|
||||
}
|
||||
}
|
||||
#else
|
||||
static void
|
||||
ffts_elaborate_offsets_even(ptrdiff_t *const offsets,
|
||||
int leaf_N,
|
||||
int N,
|
||||
int input_offset,
|
||||
int output_offset,
|
||||
int stride)
|
||||
{
|
||||
if (N == leaf_N) {
|
||||
offsets[2 * (output_offset / leaf_N) + 0] = input_offset * 2;
|
||||
offsets[2 * (output_offset / leaf_N) + 1] = output_offset;
|
||||
} else if (N > 4) {
|
||||
ffts_elaborate_offsets_even(offsets, leaf_N,
|
||||
N/2, input_offset, output_offset, stride * 2);
|
||||
|
||||
ffts_elaborate_offsets_odd(offsets, leaf_N,
|
||||
N/4, input_offset + stride, output_offset + N/2, stride * 4);
|
||||
|
||||
if (N/4 >= leaf_N) {
|
||||
ffts_elaborate_offsets_odd(offsets, leaf_N,
|
||||
N/4, input_offset - stride, output_offset + 3*N/4, stride * 4);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#ifndef __PATTERNS_H__
|
||||
#define __PATTERNS_H__
|
||||
static void
|
||||
ffts_elaborate_offsets_odd(ptrdiff_t *const offsets,
|
||||
int leaf_N,
|
||||
int N,
|
||||
int input_offset,
|
||||
int output_offset,
|
||||
int stride)
|
||||
{
|
||||
if (N <= leaf_N) {
|
||||
offsets[2 * (output_offset / leaf_N) + 0] = input_offset * 2;
|
||||
offsets[2 * (output_offset / leaf_N) + 1] = output_offset;
|
||||
} else if (N > 4) {
|
||||
ffts_elaborate_offsets_odd(offsets, leaf_N, N/2,
|
||||
input_offset, output_offset, stride * 2);
|
||||
|
||||
#include "ffts.h"
|
||||
ffts_elaborate_offsets_odd(offsets, leaf_N, N/4,
|
||||
input_offset + stride, output_offset + N/2, stride * 4);
|
||||
|
||||
void ffts_init_is(ffts_plan_t *p, int N, int leafN, int VL);
|
||||
void ffts_init_offsets(ffts_plan_t *p, int N, int leafN);
|
||||
//void ffts_init_tree(ffts_plan_t *p, int N, int leafN);
|
||||
if (N/4 >= leaf_N) {
|
||||
ffts_elaborate_offsets_odd(offsets, leaf_N, N/4,
|
||||
input_offset - stride, output_offset + 3*N/4, stride * 4);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
ffts_hardcodedleaf_is_rec_even(ptrdiff_t **is,
|
||||
int big_N,
|
||||
int N,
|
||||
int offset,
|
||||
int stride,
|
||||
int VL)
|
||||
{
|
||||
if (N > 4) {
|
||||
ffts_hardcodedleaf_is_rec_even(is, big_N, N/2, offset, stride + 1, VL);
|
||||
|
||||
if (N/4 >= 4) {
|
||||
ffts_hardcodedleaf_is_rec_odd(
|
||||
is, big_N, N/4, offset + (1 << stride), stride + 2, VL);
|
||||
ffts_hardcodedleaf_is_rec_odd(
|
||||
is, big_N, N/4, offset - (1 << stride), stride + 2, VL);
|
||||
} else {
|
||||
int temp = offset + (1 << stride);
|
||||
|
||||
if (temp < 0) {
|
||||
temp += big_N;
|
||||
}
|
||||
|
||||
temp *= 2;
|
||||
|
||||
if (!(temp % (2 * VL))) {
|
||||
int i;
|
||||
|
||||
(*is)[0] = offset + (1 << stride);
|
||||
(*is)[1] = offset + (1 << stride) + (1 << (stride + 2));
|
||||
(*is)[2] = offset - (1 << stride);
|
||||
(*is)[3] = offset - (1 << stride) + (1 << (stride + 2));
|
||||
|
||||
for (i = 0; i < 4; i++) {
|
||||
if ((*is)[i] < 0) {
|
||||
(*is)[i] += big_N;
|
||||
}
|
||||
}
|
||||
|
||||
for (i = 0; i < 4; i++) {
|
||||
(*is)[i] *= 2;
|
||||
}
|
||||
|
||||
*is += 4;
|
||||
}
|
||||
}
|
||||
} else if (N == 4) {
|
||||
int perm[4];
|
||||
|
||||
ffts_permute_addr(big_N, offset, stride, perm);
|
||||
|
||||
if (!((2 * perm[0]) % (2 * VL))) {
|
||||
int i;
|
||||
|
||||
for (i = 0; i < 4; i++) {
|
||||
(*is)[i] = 2 * perm[i];
|
||||
}
|
||||
|
||||
*is += 4;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
ffts_hardcodedleaf_is_rec_odd(ptrdiff_t **is,
|
||||
int big_N,
|
||||
int N,
|
||||
int offset,
|
||||
int stride,
|
||||
int VL)
|
||||
{
|
||||
if (N > 4) {
|
||||
ffts_hardcodedleaf_is_rec_odd(is, big_N, N/2, offset, stride + 1, VL);
|
||||
|
||||
if (N/4 >= 4) {
|
||||
ffts_hardcodedleaf_is_rec_odd(
|
||||
is, big_N, N/4, offset + (1 << stride), stride + 2, VL);
|
||||
ffts_hardcodedleaf_is_rec_odd(
|
||||
is, big_N, N/4, offset - (1 << stride), stride + 2, VL);
|
||||
} else {
|
||||
int temp = offset + (1 << stride);
|
||||
|
||||
if (temp < 0) {
|
||||
temp += big_N;
|
||||
}
|
||||
|
||||
temp *= 2;
|
||||
|
||||
if (!(temp % (2 * VL))) {
|
||||
int i;
|
||||
|
||||
(*is)[0] = offset + (1 << stride);
|
||||
(*is)[1] = offset + (1 << stride) + (1 << (stride + 2));
|
||||
(*is)[2] = offset - (1 << stride);
|
||||
(*is)[3] = offset - (1 << stride) + (1 << (stride + 2));
|
||||
|
||||
for (i = 0; i < 4; i++) {
|
||||
if ((*is)[i] < 0) {
|
||||
(*is)[i] += big_N;
|
||||
}
|
||||
}
|
||||
|
||||
for (i = 0; i < 4; i++) {
|
||||
(*is)[i] *= 2;
|
||||
}
|
||||
|
||||
*is += 4;
|
||||
}
|
||||
}
|
||||
} else if (N == 4) {
|
||||
int perm[4];
|
||||
|
||||
ffts_permute_addr(big_N, offset, stride, perm);
|
||||
|
||||
if (!((2 * perm[0]) % (2 * VL))) {
|
||||
int i;
|
||||
|
||||
for (i = 0; i < 4; i++) {
|
||||
(*is)[i] = 2 * perm[i];
|
||||
}
|
||||
|
||||
*is += 4;
|
||||
}
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
static ptrdiff_t*
|
||||
ffts_init_is(size_t N, size_t leaf_N, int VL)
|
||||
{
|
||||
int i, i0, i1, i2;
|
||||
int stride = ffts_ctzl(N/leaf_N);
|
||||
ptrdiff_t *is, *pis;
|
||||
|
||||
is = malloc(N / VL * sizeof(*is));
|
||||
if (!is) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
i0 = N/leaf_N/3 + 1;
|
||||
i1 = i2 = N/leaf_N/3;
|
||||
if ((N/leaf_N) % 3 > 1) {
|
||||
i1++;
|
||||
}
|
||||
|
||||
pis = is;
|
||||
|
||||
#if LEAF_N == 8
|
||||
for (i = 0; i < i0; i++) {
|
||||
ffts_hardcodedleaf_is_rec_even8(
|
||||
&pis, N, i, stride, VL);
|
||||
}
|
||||
|
||||
for (i = i0; i < i0 + i1; i++) {
|
||||
ffts_hardcodedleaf_is_rec_even4(
|
||||
&pis, N, i, stride + 1, VL);
|
||||
ffts_hardcodedleaf_is_rec_even4(
|
||||
&pis, N, i - (1 << stride), stride + 1, VL);
|
||||
}
|
||||
|
||||
for (i = 0 - i2; i < 0; i++) {
|
||||
ffts_hardcodedleaf_is_rec_even8(
|
||||
&pis, N, i, stride, VL);
|
||||
}
|
||||
#else
|
||||
for (i = 0; i < i0; i++) {
|
||||
ffts_hardcodedleaf_is_rec_even(
|
||||
&pis, N, leaf_N, i, stride, VL);
|
||||
}
|
||||
|
||||
for (i = i0; i < i0 + i1; i++) {
|
||||
ffts_hardcodedleaf_is_rec_even(
|
||||
&pis, N, leaf_N / 2, i, stride + 1, VL);
|
||||
ffts_hardcodedleaf_is_rec_even(
|
||||
&pis, N, leaf_N / 2, i - (1 << stride), stride + 1, VL);
|
||||
}
|
||||
|
||||
for (i = 0 - i2; i < 0; i++) {
|
||||
ffts_hardcodedleaf_is_rec_even(
|
||||
&pis, N, leaf_N, i, stride, VL);
|
||||
}
|
||||
#endif
|
||||
|
||||
return is;
|
||||
}
|
||||
|
||||
static ptrdiff_t*
|
||||
ffts_init_offsets(size_t N, size_t leaf_N)
|
||||
{
|
||||
ptrdiff_t *offsets, *tmp;
|
||||
size_t i;
|
||||
|
||||
offsets = malloc(N/leaf_N * sizeof(*offsets));
|
||||
if (!offsets) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
tmp = malloc(2 * N/leaf_N * sizeof(*tmp));
|
||||
if (!tmp) {
|
||||
free(offsets);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
#if LEAF_N == 8
|
||||
ffts_elaborate_offsets_even8(tmp, ffts_ctzl(N));
|
||||
#else
|
||||
ffts_elaborate_offsets_even(tmp, leaf_N, N, 0, 0, 1);
|
||||
#endif
|
||||
|
||||
for (i = 0; i < 2*N/leaf_N; i += 2) {
|
||||
if (tmp[i] < 0) {
|
||||
tmp[i] += N;
|
||||
}
|
||||
}
|
||||
|
||||
qsort(tmp, N/leaf_N, 2 * sizeof(*tmp), ffts_compare_offsets);
|
||||
|
||||
for (i = 0; i < N/leaf_N; i++) {
|
||||
offsets[i] = 2 * tmp[2*i + 1];
|
||||
}
|
||||
|
||||
free(tmp);
|
||||
return offsets;
|
||||
}
|
||||
|
||||
#endif /* FFTS_PATTERNS_H */
|
||||
|
@ -0,0 +1,448 @@
|
||||
/*
|
||||
|
||||
This file is part of FFTS -- The Fastest Fourier Transform in the South
|
||||
|
||||
Copyright (c) 2012, Anthony M. Blake <amb@anthonix.com>
|
||||
Copyright (c) 2012, The University of Waikato
|
||||
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
* Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
* Neither the name of the organization nor the
|
||||
names of its contributors may be used to endorse or promote products
|
||||
derived from this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
||||
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
DISCLAIMED. IN NO EVENT SHALL ANTHONY M. BLAKE BE LIABLE FOR ANY
|
||||
DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
|
||||
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
||||
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
|
||||
ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
*/
|
||||
|
||||
typedef struct _sym_t {
|
||||
int c;
|
||||
struct _sym_t *pPrev, *pNext;
|
||||
struct _seq_rule_t *r;
|
||||
int offset;
|
||||
} sym_t;
|
||||
|
||||
typedef struct _seq_rule_t {
|
||||
int c;
|
||||
sym_t *ss;
|
||||
struct _seq_rule_t *pPrev, *pNext;
|
||||
int count;
|
||||
int length;
|
||||
} seq_rule_t;
|
||||
|
||||
void sym_tail_insert(sym_t **ss, sym_t *s)
|
||||
{
|
||||
if (!*ss) {
|
||||
*ss = s;
|
||||
s->pPrev = s->pNext = NULL;
|
||||
} else {
|
||||
while (*ss) {
|
||||
s->pPrev = *ss;
|
||||
ss = &(*ss)->pNext;
|
||||
}
|
||||
|
||||
*ss = s;
|
||||
}
|
||||
}
|
||||
|
||||
sym_t* sym_init(int c)
|
||||
{
|
||||
sym_t *s;
|
||||
|
||||
s = (sym_t*) malloc(sizeof(*s));
|
||||
if (!s) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
s->c = c;
|
||||
s->pPrev = s->pNext = NULL;
|
||||
s->r = NULL;
|
||||
|
||||
return s;
|
||||
}
|
||||
|
||||
sym_t* sym_init_from_sym(sym_t *s2)
|
||||
{
|
||||
sym_t *s;
|
||||
|
||||
s = (sym_t*) malloc(sizeof(*s));
|
||||
if (!s) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
s->c = s2->c;
|
||||
s->pPrev = s->pNext = NULL;
|
||||
s->r = s2->r;
|
||||
s->offset = s2->offset;
|
||||
|
||||
return s;
|
||||
}
|
||||
|
||||
seq_rule_t* seq_init_rule(int c)
|
||||
{
|
||||
seq_rule_t *G;
|
||||
|
||||
G = (seq_rule_t *)malloc(sizeof(*G));
|
||||
if (!G) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
G->c = c;
|
||||
G->count = 2;
|
||||
G->ss = NULL;
|
||||
G->pPrev = NULL;
|
||||
G->pNext = NULL;
|
||||
|
||||
return G;
|
||||
}
|
||||
|
||||
seq_rule_t* seq_grammer_insert_new_rule(seq_rule_t *G, char r, sym_t *a, sym_t *b)
|
||||
{
|
||||
sym_t *sa, *sb;
|
||||
|
||||
while (G->pNext) {
|
||||
G = G->pNext;
|
||||
}
|
||||
|
||||
G->pNext = seq_init_rule(r);
|
||||
if (!G->pNext) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
sa = sym_init_from_sym(a);
|
||||
if (!sa) {
|
||||
goto cleanup_pnext;
|
||||
}
|
||||
|
||||
sb = sym_init_from_sym(b);
|
||||
if (!sb) {
|
||||
goto cleanup_sa;
|
||||
}
|
||||
|
||||
sb->offset = sb->offset - sa->offset;
|
||||
sa->offset = 0;
|
||||
sym_tail_insert(&G->pNext->ss, sa);
|
||||
sym_tail_insert(&G->pNext->ss, sb);
|
||||
return G->pNext;
|
||||
|
||||
cleanup_sa:
|
||||
free(sa);
|
||||
|
||||
cleanup_pnext:
|
||||
free(G->pNext);
|
||||
G->pNext = NULL;
|
||||
|
||||
return NULL;
|
||||
}
|
||||
|
||||
sym_t* sym_match_digram(sym_t *s, sym_t *term, sym_t *a, sym_t *b)
|
||||
{
|
||||
while (s != term) {
|
||||
if (s->c == a->c && s->pNext->c == b->c &&
|
||||
s->pNext->offset - s->offset == b->offset-a->offset) {
|
||||
return s;
|
||||
}
|
||||
|
||||
s = s->pNext;
|
||||
}
|
||||
|
||||
return NULL;
|
||||
}
|
||||
|
||||
seq_rule_t* seq_match_digram(seq_rule_t *R, sym_t *a, sym_t *b)
|
||||
{
|
||||
while (R) {
|
||||
if (R->ss->c == a->c && R->ss->pNext->c == b->c &&
|
||||
R->ss->pNext->offset - R->ss->offset == b->offset - a->offset) {
|
||||
return R;
|
||||
}
|
||||
|
||||
R = R->pNext;
|
||||
}
|
||||
|
||||
return NULL;
|
||||
}
|
||||
|
||||
sym_t* sym_tail(sym_t *s)
|
||||
{
|
||||
while (s->pNext) {
|
||||
s = s->pNext;
|
||||
}
|
||||
|
||||
return s;
|
||||
}
|
||||
|
||||
int sym_count(sym_t *s)
|
||||
{
|
||||
int count = 0;
|
||||
|
||||
while (s) {
|
||||
count++;
|
||||
s = s->pNext;
|
||||
}
|
||||
|
||||
return count;
|
||||
}
|
||||
|
||||
sym_t* sym_copylist(sym_t *s)
|
||||
{
|
||||
sym_t *head = NULL;
|
||||
sym_t *prev = head;
|
||||
|
||||
while (s) {
|
||||
sym_t *copy = sym_init_from_sym(s);
|
||||
if (!copy) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
copy->pPrev = prev;
|
||||
|
||||
if (prev) {
|
||||
prev->pNext = copy;
|
||||
}
|
||||
|
||||
if (!head) {
|
||||
head = copy;
|
||||
}
|
||||
|
||||
prev = copy;
|
||||
s = s->pNext;
|
||||
}
|
||||
|
||||
return head;
|
||||
}
|
||||
|
||||
void seq_enforce_uniqueness(seq_rule_t *G)
|
||||
{
|
||||
seq_rule_t *R = G;//->pNext;
|
||||
seq_rule_t **ppr = &G->pNext;
|
||||
|
||||
while (R) {
|
||||
if (R == G || R->count > 1) {
|
||||
sym_t *s = R->ss;
|
||||
sym_t **pp = &R->ss;
|
||||
|
||||
while (s) {
|
||||
if (s->r && s->r->count == 1) {
|
||||
sym_t *temp_itr;
|
||||
|
||||
*pp = s->r->ss;
|
||||
|
||||
temp_itr = s->r->ss;
|
||||
while (temp_itr) {
|
||||
temp_itr->offset += s->offset;
|
||||
temp_itr = temp_itr->pNext;
|
||||
}
|
||||
|
||||
s->r->ss->pPrev = s->pPrev;
|
||||
if (s->pNext) {
|
||||
s->pNext->pPrev = sym_tail(s->r->ss);
|
||||
}
|
||||
|
||||
sym_tail(s->r->ss)->pNext = s->pNext;
|
||||
s = s->r->ss;
|
||||
continue;
|
||||
}
|
||||
|
||||
pp = &s->pNext;
|
||||
s = s->pNext;
|
||||
}
|
||||
|
||||
ppr = &R->pNext;
|
||||
} else {
|
||||
*ppr = R->pNext;
|
||||
}
|
||||
|
||||
R = R->pNext;
|
||||
}
|
||||
}
|
||||
|
||||
void seq_merge_small_rules(seq_rule_t *G, int thresh)
|
||||
{
|
||||
seq_rule_t *R = G;
|
||||
|
||||
while (R) {
|
||||
if (sym_count(R->ss) <= thresh) {
|
||||
//printf("count %d > %d for %d\n", sym_count(R->ss), thresh, R->c);
|
||||
sym_t *s = R->ss;
|
||||
sym_t **pp = &R->ss;
|
||||
|
||||
while (s) {
|
||||
if (s->r) {
|
||||
sym_t *copylist;
|
||||
sym_t *copylist_itr;
|
||||
|
||||
s->r->count--;
|
||||
|
||||
copylist = sym_copylist(s->r->ss);
|
||||
if (!copylist) {
|
||||
return;
|
||||
}
|
||||
|
||||
copylist_itr = copylist;
|
||||
while (copylist_itr) {
|
||||
copylist_itr->offset += s->offset;
|
||||
copylist_itr = copylist_itr->pNext;
|
||||
}
|
||||
|
||||
*pp = copylist;
|
||||
copylist->pPrev = s->pPrev;
|
||||
if (s->pNext) {
|
||||
s->pNext->pPrev = sym_tail(copylist);
|
||||
}
|
||||
|
||||
sym_tail(copylist)->pNext = s->pNext;
|
||||
pp = &(sym_tail(copylist)->pNext);
|
||||
s = sym_tail(copylist)->pNext;
|
||||
continue;
|
||||
}
|
||||
|
||||
pp = &s->pNext;
|
||||
s = s->pNext;
|
||||
}
|
||||
}
|
||||
|
||||
R = R->pNext;
|
||||
}
|
||||
|
||||
seq_enforce_uniqueness(G);
|
||||
}
|
||||
|
||||
void seq_extract_hierarchy(seq_rule_t *G)
|
||||
{
|
||||
int next_rule = -2;
|
||||
sym_t *cursym = G->ss;
|
||||
|
||||
while (cursym) {
|
||||
sym_t *m = NULL;
|
||||
seq_rule_t *mr = NULL;
|
||||
|
||||
if (cursym->pPrev && cursym->pPrev->pPrev) {
|
||||
mr = seq_match_digram(G->pNext, cursym->pPrev, cursym);
|
||||
if (mr) {
|
||||
if (cursym->pPrev->r) {
|
||||
cursym->pPrev->r->count--;
|
||||
}
|
||||
|
||||
if(cursym->r) {
|
||||
cursym->r->count--;
|
||||
}
|
||||
|
||||
mr->count++;
|
||||
|
||||
cursym->pPrev->r = mr;
|
||||
cursym->pPrev->c = mr->c;
|
||||
cursym->pPrev->pNext = cursym->pNext;
|
||||
cursym->pNext->pPrev = cursym->pPrev;
|
||||
cursym = cursym->pPrev;
|
||||
}
|
||||
|
||||
m = sym_match_digram(G->ss, cursym->pPrev->pPrev, cursym->pPrev, cursym);
|
||||
if (m) {
|
||||
seq_rule_t *newr;
|
||||
|
||||
if (cursym->pPrev->r) {
|
||||
cursym->pPrev->r->count--;
|
||||
}
|
||||
|
||||
if (cursym->r) {
|
||||
cursym->r->count--;
|
||||
}
|
||||
|
||||
newr = seq_grammer_insert_new_rule(G, next_rule, m, m->pNext);
|
||||
if (!newr) {
|
||||
return;
|
||||
}
|
||||
|
||||
m->r = newr;
|
||||
m->c = next_rule;
|
||||
m->pNext = m->pNext->pNext;
|
||||
m->pNext->pPrev = m;
|
||||
|
||||
cursym->pPrev->r = newr;
|
||||
cursym->pPrev->c = next_rule;
|
||||
cursym->pPrev->pNext = cursym->pNext;
|
||||
cursym->pNext->pPrev = cursym->pPrev;
|
||||
cursym = cursym->pPrev;
|
||||
|
||||
next_rule--;
|
||||
}
|
||||
}
|
||||
|
||||
if (!m && !mr) {
|
||||
cursym = cursym->pNext;
|
||||
}
|
||||
}
|
||||
|
||||
seq_enforce_uniqueness(G);
|
||||
seq_merge_small_rules(G, 2);
|
||||
// seq_enforce_uniqueness(G);
|
||||
}
|
||||
|
||||
void seq_compute_lengths(seq_rule_t *G)
|
||||
{
|
||||
seq_rule_t *R = G->pNext;
|
||||
sym_t *s;
|
||||
int sum;
|
||||
|
||||
while (R) {
|
||||
sum = 0;
|
||||
s = R->ss;
|
||||
|
||||
while (s) {
|
||||
if (s->c >= 0) {
|
||||
if (s->offset + s->c > sum) {
|
||||
sum = s->offset + s->c;
|
||||
}
|
||||
}
|
||||
|
||||
if (s->c < 0) {
|
||||
if (s->offset + s->r->length > sum) {
|
||||
sum = s->offset + s->r->length;
|
||||
}
|
||||
}
|
||||
|
||||
s = s->pNext;
|
||||
}
|
||||
|
||||
R->length = sum;
|
||||
R = R->pNext;
|
||||
}
|
||||
|
||||
sum = 0;
|
||||
s = G->ss;
|
||||
|
||||
while (s) {
|
||||
if (s->c >= 0) {
|
||||
if (s->offset + s->c > sum) {
|
||||
sum = s->offset + s->c;
|
||||
}
|
||||
}
|
||||
|
||||
if (s->c < 0) {
|
||||
if (s->offset + s->r->length > sum) {
|
||||
sum = s->offset + s->r->length;
|
||||
}
|
||||
}
|
||||
|
||||
s = s->pNext;
|
||||
}
|
||||
|
||||
G->length = sum;
|
||||
}
|
@ -1,878 +0,0 @@
|
||||
/*
|
||||
|
||||
This file is part of FFTS -- The Fastest Fourier Transform in the South
|
||||
|
||||
Copyright (c) 2012, Anthony M. Blake <amb@anthonix.com>
|
||||
Copyright (c) 2012, The University of Waikato
|
||||
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
* Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
* Neither the name of the organization nor the
|
||||
names of its contributors may be used to endorse or promote products
|
||||
derived from this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
||||
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
DISCLAIMED. IN NO EVENT SHALL ANTHONY M. BLAKE BE LIABLE FOR ANY
|
||||
DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
|
||||
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
||||
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
|
||||
ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
*/
|
||||
|
||||
|
||||
.globl _neon_x4
|
||||
.align 4
|
||||
_neon_x4:
|
||||
|
||||
.globl _neon_x8
|
||||
.align 4
|
||||
_neon_x8:
|
||||
|
||||
.globl _neon_x8_t
|
||||
.align 4
|
||||
_neon_x8_t:
|
||||
|
||||
|
||||
#ifdef __APPLE__
|
||||
.globl _leaf_ee_init
|
||||
_leaf_ee_init:
|
||||
#else
|
||||
.globl leaf_ee_init
|
||||
leaf_ee_init:
|
||||
#endif
|
||||
#lea L_sse_constants(%rip), %r9
|
||||
movq 0xe0(%rdi), %r9
|
||||
xorl %eax, %eax
|
||||
# eax is loop counter (init to 0)
|
||||
# rcx is loop max count
|
||||
# rsi is 'in' base pointer
|
||||
# rdx is 'out' base pointer
|
||||
# r8 is offsets pointer
|
||||
# r9 is constants pointer
|
||||
# scratch: rax r11 r12
|
||||
# .align 4, 0x90
|
||||
|
||||
# _leaf_ee + 9 needs 16 byte alignment
|
||||
#ifdef __APPLE__
|
||||
.globl _leaf_ee
|
||||
_leaf_ee:
|
||||
#else
|
||||
.globl leaf_ee
|
||||
leaf_ee:
|
||||
#endif
|
||||
movaps 32(%r9), %xmm0 #83.5
|
||||
movaps (%r9), %xmm8 #83.5
|
||||
LEAF_EE_1:
|
||||
LEAF_EE_const_0:
|
||||
movaps 0xFECA(%rsi,%rax,4), %xmm7 #83.5
|
||||
LEAF_EE_const_2:
|
||||
movaps 0xFECA(%rsi,%rax,4), %xmm12 #83.5
|
||||
movaps %xmm7, %xmm6 #83.5
|
||||
LEAF_EE_const_3:
|
||||
movaps 0xFECA(%rsi,%rax,4), %xmm10 #83.5
|
||||
movaps %xmm12, %xmm11 #83.5
|
||||
subps %xmm10, %xmm12 #83.5
|
||||
addps %xmm10, %xmm11 #83.5
|
||||
xorps %xmm8, %xmm12 #83.5
|
||||
LEAF_EE_const_1:
|
||||
movaps 0xFECA(%rsi,%rax,4), %xmm9 #83.5
|
||||
LEAF_EE_const_4:
|
||||
movaps 0xFECA(%rsi,%rax,4), %xmm10 #83.5
|
||||
addps %xmm9, %xmm6 #83.5
|
||||
subps %xmm9, %xmm7 #83.5
|
||||
LEAF_EE_const_5:
|
||||
movaps 0xFECA(%rsi,%rax,4), %xmm13 #83.5
|
||||
movaps %xmm10, %xmm9 #83.5
|
||||
LEAF_EE_const_6:
|
||||
movaps 0xFECA(%rsi,%rax,4), %xmm3 #83.5
|
||||
movaps %xmm6, %xmm5 #83.5
|
||||
LEAF_EE_const_7:
|
||||
movaps 0xFECA(%rsi,%rax,4), %xmm14 #83.5
|
||||
movaps %xmm3, %xmm15 #83.5
|
||||
shufps $177, %xmm12, %xmm12 #83.5
|
||||
movaps %xmm7, %xmm4 #83.5
|
||||
movslq (%r8, %rax, 4), %r11 #83.44
|
||||
subps %xmm13, %xmm10 #83.5
|
||||
subps %xmm14, %xmm3 #83.5
|
||||
addps %xmm11, %xmm5 #83.5
|
||||
subps %xmm11, %xmm6 #83.5
|
||||
subps %xmm12, %xmm4 #83.5
|
||||
addps %xmm12, %xmm7 #83.5
|
||||
addps %xmm13, %xmm9 #83.5
|
||||
addps %xmm14, %xmm15 #83.5
|
||||
movaps 16(%r9), %xmm12 #83.5
|
||||
movaps %xmm9, %xmm1 #83.5
|
||||
movaps 16(%r9), %xmm11 #83.5
|
||||
movaps %xmm5, %xmm2 #83.5
|
||||
mulps %xmm10, %xmm12 #83.5
|
||||
subps %xmm15, %xmm9 #83.5
|
||||
addps %xmm15, %xmm1 #83.5
|
||||
mulps %xmm3, %xmm11 #83.5
|
||||
addps %xmm1, %xmm2 #83.5
|
||||
subps %xmm1, %xmm5 #83.5
|
||||
shufps $177, %xmm10, %xmm10 #83.5
|
||||
xorps %xmm8, %xmm9 #83.5
|
||||
shufps $177, %xmm3, %xmm3 #83.5
|
||||
movaps %xmm6, %xmm1 #83.5
|
||||
mulps %xmm0, %xmm10 #83.5
|
||||
movaps %xmm4, %xmm13 #83.5
|
||||
mulps %xmm0, %xmm3 #83.5
|
||||
subps %xmm10, %xmm12 #83.5
|
||||
addps %xmm3, %xmm11 #83.5
|
||||
movaps %xmm12, %xmm3 #83.5
|
||||
movaps %xmm7, %xmm14 #83.5
|
||||
shufps $177, %xmm9, %xmm9 #83.5
|
||||
subps %xmm11, %xmm12 #83.5
|
||||
addps %xmm11, %xmm3 #83.5
|
||||
subps %xmm9, %xmm1 #83.5
|
||||
addps %xmm9, %xmm6 #83.5
|
||||
addps %xmm3, %xmm4 #83.5
|
||||
subps %xmm3, %xmm13 #83.5
|
||||
xorps %xmm8, %xmm12 #83.5
|
||||
movaps %xmm2, %xmm3 #83.5
|
||||
shufps $177, %xmm12, %xmm12 #83.5
|
||||
movaps %xmm6, %xmm9 #83.5
|
||||
movslq 8(%r8, %rax, 4), %r12 #83.59
|
||||
movlhps %xmm4, %xmm3 #83.5
|
||||
addq $4, %rax
|
||||
shufps $238, %xmm4, %xmm2 #83.5
|
||||
movaps %xmm1, %xmm4 #83.5
|
||||
#movntdq %xmm3, (%rdx,%r11,4) #83.5
|
||||
subps %xmm12, %xmm7 #83.5
|
||||
addps %xmm12, %xmm14 #83.5
|
||||
movlhps %xmm7, %xmm4 #83.5
|
||||
shufps $238, %xmm7, %xmm1 #83.5
|
||||
movaps %xmm5, %xmm7 #83.5
|
||||
movlhps %xmm13, %xmm7 #83.5
|
||||
movlhps %xmm14, %xmm9 #83.5
|
||||
shufps $238, %xmm13, %xmm5 #83.5
|
||||
shufps $238, %xmm14, %xmm6 #83.5
|
||||
movaps %xmm3, (%rdx,%r11,4) #83.5
|
||||
movaps %xmm4, 16(%rdx,%r11,4) #83.5
|
||||
movaps %xmm7, 32(%rdx,%r11,4) #83.5
|
||||
movaps %xmm9, 48(%rdx,%r11,4) #83.5
|
||||
movaps %xmm2, (%rdx,%r12,4) #83.5
|
||||
movaps %xmm1, 16(%rdx,%r12,4) #83.5
|
||||
movaps %xmm5, 32(%rdx,%r12,4) #83.5
|
||||
movaps %xmm6, 48(%rdx,%r12,4) #83.5
|
||||
cmpq %rcx, %rax
|
||||
jne LEAF_EE_1
|
||||
|
||||
|
||||
|
||||
# _leaf_oo + 4 needs to be 16 byte aligned
|
||||
#ifdef __APPLE__
|
||||
.globl _leaf_oo
|
||||
_leaf_oo:
|
||||
#else
|
||||
.globl leaf_oo
|
||||
leaf_oo:
|
||||
#endif
|
||||
movaps (%r9), %xmm5 #92.7
|
||||
LEAF_OO_1:
|
||||
LEAF_OO_const_0:
|
||||
movaps 0xFECA(%rsi,%rax,4), %xmm4 #93.5
|
||||
movaps %xmm4, %xmm6 #93.5
|
||||
LEAF_OO_const_1:
|
||||
movaps 0xFECA(%rsi,%rax,4), %xmm7 #93.5
|
||||
LEAF_OO_const_2:
|
||||
movaps 0xFECA(%rsi,%rax,4), %xmm10 #93.5
|
||||
addps %xmm7, %xmm6 #93.5
|
||||
subps %xmm7, %xmm4 #93.5
|
||||
LEAF_OO_const_3:
|
||||
movaps 0xFECA(%rsi,%rax,4), %xmm8 #93.5
|
||||
movaps %xmm10, %xmm9 #93.5
|
||||
LEAF_OO_const_4:
|
||||
movaps 0xFECA(%rsi,%rax,4), %xmm1 #93.5
|
||||
movaps %xmm6, %xmm3 #93.5
|
||||
LEAF_OO_const_5:
|
||||
movaps 0xFECA(%rsi,%rax,4), %xmm11 #93.5
|
||||
movaps %xmm1, %xmm2 #93.5
|
||||
LEAF_OO_const_6:
|
||||
movaps 0xFECA(%rsi,%rax,4), %xmm14 #93.5
|
||||
movaps %xmm4, %xmm15 #93.5
|
||||
LEAF_OO_const_7:
|
||||
movaps 0xFECA(%rsi,%rax,4), %xmm12 #93.5
|
||||
movaps %xmm14, %xmm13 #93.5
|
||||
movslq (%r8, %rax, 4), %r11 #83.44
|
||||
subps %xmm8, %xmm10 #93.5
|
||||
addps %xmm8, %xmm9 #93.5
|
||||
addps %xmm11, %xmm2 #93.5
|
||||
subps %xmm12, %xmm14 #93.5
|
||||
subps %xmm11, %xmm1 #93.5
|
||||
addps %xmm12, %xmm13 #93.5
|
||||
addps %xmm9, %xmm3 #93.5
|
||||
subps %xmm9, %xmm6 #93.5
|
||||
xorps %xmm5, %xmm10 #93.5
|
||||
xorps %xmm5, %xmm14 #93.5
|
||||
shufps $177, %xmm10, %xmm10 #93.5
|
||||
movaps %xmm2, %xmm9 #93.5
|
||||
shufps $177, %xmm14, %xmm14 #93.5
|
||||
movaps %xmm6, %xmm7 #93.5
|
||||
movslq 8(%r8, %rax, 4), %r12 #83.59
|
||||
addq $4, %rax #92.18
|
||||
addps %xmm10, %xmm4 #93.5
|
||||
addps %xmm13, %xmm9 #93.5
|
||||
subps %xmm13, %xmm2 #93.5
|
||||
subps %xmm10, %xmm15 #93.5
|
||||
movaps %xmm1, %xmm13 #93.5
|
||||
movaps %xmm2, %xmm8 #93.5
|
||||
movlhps %xmm4, %xmm7 #93.5
|
||||
subps %xmm14, %xmm13 #93.5
|
||||
addps %xmm14, %xmm1 #93.5
|
||||
shufps $238, %xmm4, %xmm6 #93.5
|
||||
movaps %xmm3, %xmm14 #93.5
|
||||
movaps %xmm9, %xmm4 #93.5
|
||||
movlhps %xmm15, %xmm14 #93.5
|
||||
movlhps %xmm13, %xmm4 #93.5
|
||||
movlhps %xmm1, %xmm8 #93.5
|
||||
shufps $238, %xmm15, %xmm3 #93.5
|
||||
shufps $238, %xmm13, %xmm9 #93.5
|
||||
shufps $238, %xmm1, %xmm2 #93.5
|
||||
movaps %xmm14, (%rdx,%r11,4) #93.5
|
||||
movaps %xmm7, 16(%rdx,%r11,4) #93.5
|
||||
movaps %xmm4, 32(%rdx,%r11,4) #93.5
|
||||
movaps %xmm8, 48(%rdx,%r11,4) #93.5
|
||||
movaps %xmm3, (%rdx,%r12,4) #93.5
|
||||
movaps %xmm6, 16(%rdx,%r12,4) #93.5
|
||||
movaps %xmm9, 32(%rdx,%r12,4) #93.5
|
||||
movaps %xmm2, 48(%rdx,%r12,4) #93.5
|
||||
cmpq %rcx, %rax
|
||||
jne LEAF_OO_1 # Prob 95% #92.14
|
||||
|
||||
#ifdef __APPLE__
|
||||
.globl _leaf_eo
|
||||
_leaf_eo:
|
||||
#else
|
||||
.globl leaf_eo
|
||||
leaf_eo:
|
||||
#endif
|
||||
LEAF_EO_const_0:
|
||||
movaps 0xFECA(%rsi,%rax,4), %xmm9 #88.5
|
||||
LEAF_EO_const_2:
|
||||
movaps 0xFECA(%rsi,%rax,4), %xmm7 #88.5
|
||||
movaps %xmm9, %xmm11 #88.5
|
||||
LEAF_EO_const_3:
|
||||
movaps 0xFECA(%rsi,%rax,4), %xmm5 #88.5
|
||||
movaps %xmm7, %xmm6 #88.5
|
||||
LEAF_EO_const_1:
|
||||
movaps 0xFECA(%rsi,%rax,4), %xmm4 #88.5
|
||||
subps %xmm5, %xmm7 #88.5
|
||||
addps %xmm4, %xmm11 #88.5
|
||||
subps %xmm4, %xmm9 #88.5
|
||||
addps %xmm5, %xmm6 #88.5
|
||||
movaps (%r9), %xmm3 #88.5
|
||||
movaps %xmm11, %xmm10 #88.5
|
||||
xorps %xmm3, %xmm7 #88.5
|
||||
movaps %xmm9, %xmm8 #88.5
|
||||
shufps $177, %xmm7, %xmm7 #88.5
|
||||
addps %xmm6, %xmm10 #88.5
|
||||
subps %xmm6, %xmm11 #88.5
|
||||
subps %xmm7, %xmm8 #88.5
|
||||
addps %xmm7, %xmm9 #88.5
|
||||
movslq 8(%r8, %rax, 4), %r12 #83.59
|
||||
movaps %xmm10, %xmm2 #88.5
|
||||
movslq (%r8, %rax, 4), %r11 #83.44
|
||||
movaps %xmm11, %xmm1 #88.5
|
||||
shufps $238, %xmm8, %xmm10 #88.5
|
||||
shufps $238, %xmm9, %xmm11 #88.5
|
||||
movaps %xmm10, (%rdx,%r12,4) #88.5
|
||||
movaps %xmm11, 16(%rdx,%r12,4) #88.5
|
||||
LEAF_EO_const_4:
|
||||
movaps 0xFECA(%rsi,%rax,4), %xmm15 #88.5
|
||||
LEAF_EO_const_5:
|
||||
movaps 0xFECA(%rsi,%rax,4), %xmm12 #88.5
|
||||
movaps %xmm15, %xmm14 #88.5
|
||||
LEAF_EO_const_6:
|
||||
movaps 0xFECA(%rsi,%rax,4), %xmm4 #88.5
|
||||
addps %xmm12, %xmm14 #88.5
|
||||
subps %xmm12, %xmm15 #88.5
|
||||
LEAF_EO_const_7:
|
||||
movaps 0xFECA(%rsi,%rax,4), %xmm13 #88.5
|
||||
movaps %xmm4, %xmm5 #88.5
|
||||
movaps %xmm14, %xmm7 #88.5
|
||||
addps %xmm13, %xmm5 #88.5
|
||||
subps %xmm13, %xmm4 #88.5
|
||||
movlhps %xmm8, %xmm2 #88.5
|
||||
movaps %xmm5, %xmm8 #88.5
|
||||
movlhps %xmm15, %xmm7 #88.5
|
||||
xorps %xmm3, %xmm15 #88.5
|
||||
movaps %xmm5, %xmm6 #88.5
|
||||
subps %xmm14, %xmm5 #88.5
|
||||
addps %xmm14, %xmm6 #88.5
|
||||
movlhps %xmm9, %xmm1 #88.5
|
||||
movaps %xmm4, %xmm14 #88.5
|
||||
movlhps %xmm4, %xmm8 #88.5
|
||||
movaps %xmm1, %xmm12 #88.5
|
||||
shufps $177, %xmm15, %xmm15 #88.5
|
||||
movaps 0x30(%r9), %xmm11 #88.5
|
||||
addq $4, %rax #90.5
|
||||
subps %xmm15, %xmm14 #88.5
|
||||
mulps %xmm7, %xmm11 #88.5
|
||||
addps %xmm15, %xmm4 #88.5
|
||||
movaps 0x30(%r9), %xmm9 #88.5
|
||||
movaps 0x40(%r9), %xmm15 #88.5
|
||||
shufps $177, %xmm7, %xmm7 #88.5
|
||||
mulps %xmm8, %xmm9 #88.5
|
||||
mulps %xmm15, %xmm7 #88.5
|
||||
shufps $177, %xmm8, %xmm8 #88.5
|
||||
subps %xmm7, %xmm11 #88.5
|
||||
mulps %xmm15, %xmm8 #88.5
|
||||
movaps %xmm11, %xmm10 #88.5
|
||||
addps %xmm8, %xmm9 #88.5
|
||||
shufps $238, %xmm14, %xmm6 #88.5
|
||||
subps %xmm9, %xmm11 #88.5
|
||||
addps %xmm9, %xmm10 #88.5
|
||||
xorps %xmm3, %xmm11 #88.5
|
||||
movaps %xmm2, %xmm3 #88.5
|
||||
shufps $177, %xmm11, %xmm11 #88.5
|
||||
subps %xmm10, %xmm3 #88.5
|
||||
addps %xmm10, %xmm2 #88.5
|
||||
addps %xmm11, %xmm12 #88.5
|
||||
subps %xmm11, %xmm1 #88.5
|
||||
shufps $238, %xmm4, %xmm5 #88.5
|
||||
movaps %xmm5, 48(%rdx,%r12,4) #88.5
|
||||
movaps %xmm6, 32(%rdx,%r12,4) #88.5
|
||||
movaps %xmm2, (%rdx,%r11,4) #88.5
|
||||
movaps %xmm1, 16(%rdx,%r11,4) #88.5
|
||||
movaps %xmm3, 32(%rdx,%r11,4) #88.5
|
||||
movaps %xmm12, 48(%rdx,%r11,4) #88.5
|
||||
|
||||
|
||||
#ifdef __APPLE__
|
||||
.globl _leaf_oe
|
||||
_leaf_oe:
|
||||
#else
|
||||
.globl leaf_oe
|
||||
leaf_oe:
|
||||
#endif
|
||||
movaps (%r9), %xmm0 #59.5
|
||||
#movaps 0x20(%r9), %xmm1 #59.5
|
||||
LEAF_OE_const_2:
|
||||
movaps 0xFECA(%rsi,%rax,4), %xmm6 #70.5
|
||||
LEAF_OE_const_3:
|
||||
movaps 0xFECA(%rsi,%rax,4), %xmm8 #70.5
|
||||
movaps %xmm6, %xmm10 #70.5
|
||||
shufps $228, %xmm8, %xmm10 #70.5
|
||||
movaps %xmm10, %xmm9 #70.5
|
||||
shufps $228, %xmm6, %xmm8 #70.5
|
||||
LEAF_OE_const_0:
|
||||
movaps 0xFECA(%rsi,%rax,4), %xmm12 #70.5
|
||||
LEAF_OE_const_1:
|
||||
movaps 0xFECA(%rsi,%rax,4), %xmm7 #70.5
|
||||
movaps %xmm12, %xmm14 #70.5
|
||||
movslq (%r8, %rax, 4), %r11 #83.44
|
||||
addps %xmm8, %xmm9 #70.5
|
||||
subps %xmm8, %xmm10 #70.5
|
||||
addps %xmm7, %xmm14 #70.5
|
||||
subps %xmm7, %xmm12 #70.5
|
||||
movaps %xmm9, %xmm4 #70.5
|
||||
movaps %xmm14, %xmm13 #70.5
|
||||
shufps $238, %xmm10, %xmm4 #70.5
|
||||
xorps %xmm0, %xmm10 #70.5
|
||||
shufps $177, %xmm10, %xmm10 #70.5
|
||||
movaps %xmm12, %xmm11 #70.5
|
||||
movaps %xmm14, %xmm5 #70.5
|
||||
addps %xmm9, %xmm13 #70.5
|
||||
subps %xmm10, %xmm11 #70.5
|
||||
subps %xmm9, %xmm14 #70.5
|
||||
shufps $238, %xmm12, %xmm5 #70.5
|
||||
addps %xmm10, %xmm12 #70.5
|
||||
movslq 8(%r8, %rax, 4), %r12 #83.59
|
||||
movlhps %xmm11, %xmm13 #70.5
|
||||
movaps %xmm13, (%rdx,%r11,4) #70.5
|
||||
movaps 0x30(%r9), %xmm13 #70.5
|
||||
movlhps %xmm12, %xmm14 #70.5
|
||||
movaps 0x40(%r9), %xmm12 #70.5
|
||||
mulps %xmm5, %xmm13 #70.5
|
||||
shufps $177, %xmm5, %xmm5 #70.5
|
||||
mulps %xmm12, %xmm5 #70.5
|
||||
movaps %xmm14, 16(%rdx,%r11,4) #70.5
|
||||
subps %xmm5, %xmm13 #70.5
|
||||
movaps 0x30(%r9), %xmm5 #70.5
|
||||
mulps %xmm4, %xmm5 #70.5
|
||||
shufps $177, %xmm4, %xmm4 #70.5
|
||||
mulps %xmm12, %xmm4 #70.5
|
||||
LEAF_OE_const_4:
|
||||
movaps 0xFECA(%rsi,%rax,4), %xmm9 #70.5
|
||||
addps %xmm4, %xmm5 #70.5
|
||||
LEAF_OE_const_6:
|
||||
movaps 0xFECA(%rsi,%rax,4), %xmm7 #70.5
|
||||
movaps %xmm9, %xmm3 #70.5
|
||||
LEAF_OE_const_7:
|
||||
movaps 0xFECA(%rsi,%rax,4), %xmm2 #70.5
|
||||
movaps %xmm7, %xmm6 #70.5
|
||||
LEAF_OE_const_5:
|
||||
movaps 0xFECA(%rsi,%rax,4), %xmm15 #70.5
|
||||
movaps %xmm13, %xmm4 #70.5
|
||||
subps %xmm2, %xmm7 #70.5
|
||||
addps %xmm15, %xmm3 #70.5
|
||||
subps %xmm15, %xmm9 #70.5
|
||||
addps %xmm2, %xmm6 #70.5
|
||||
subps %xmm5, %xmm13 #70.5
|
||||
addps %xmm5, %xmm4 #70.5
|
||||
xorps %xmm0, %xmm7 #70.5
|
||||
addq $4, %rax #72.5
|
||||
movaps %xmm3, %xmm2 #70.5
|
||||
shufps $177, %xmm7, %xmm7 #70.5
|
||||
movaps %xmm9, %xmm8 #70.5
|
||||
xorps %xmm0, %xmm13 #70.5
|
||||
addps %xmm6, %xmm2 #70.5
|
||||
subps %xmm7, %xmm8 #70.5
|
||||
subps %xmm6, %xmm3 #70.5
|
||||
addps %xmm7, %xmm9 #70.5
|
||||
movaps %xmm2, %xmm10 #70.5
|
||||
movaps %xmm3, %xmm11 #70.5
|
||||
shufps $238, %xmm8, %xmm2 #70.5
|
||||
shufps $238, %xmm9, %xmm3 #70.5
|
||||
movaps %xmm2, %xmm14 #70.5
|
||||
shufps $177, %xmm13, %xmm13 #70.5
|
||||
subps %xmm4, %xmm14 #70.5
|
||||
addps %xmm4, %xmm2 #70.5
|
||||
movaps %xmm3, %xmm4 #70.5
|
||||
subps %xmm13, %xmm3 #70.5
|
||||
addps %xmm13, %xmm4 #70.5
|
||||
movlhps %xmm8, %xmm10 #70.5
|
||||
movlhps %xmm9, %xmm11 #70.5
|
||||
movaps %xmm10, 32(%rdx,%r11,4) #70.5
|
||||
movaps %xmm11, 48(%rdx,%r11,4) #70.5
|
||||
movaps %xmm2, (%rdx,%r12,4) #70.5
|
||||
movaps %xmm3, 16(%rdx,%r12,4) #70.5
|
||||
movaps %xmm14, 32(%rdx,%r12,4) #70.5
|
||||
movaps %xmm4, 48(%rdx,%r12,4) #70.5
|
||||
|
||||
|
||||
#ifdef __APPLE__
|
||||
.globl _leaf_end
|
||||
_leaf_end:
|
||||
#else
|
||||
.globl leaf_end
|
||||
leaf_end:
|
||||
#endif
|
||||
|
||||
#ifdef __APPLE__
|
||||
.globl _x_init
|
||||
_x_init:
|
||||
#else
|
||||
.globl x_init
|
||||
x_init:
|
||||
#endif
|
||||
#movaps L_sse_constants(%rip), %xmm3 #34.3
|
||||
movaps (%r9), %xmm3 #34.3
|
||||
movq 0x20(%rdi),%r8
|
||||
#ifdef __APPLE__
|
||||
.globl _x4
|
||||
_x4:
|
||||
#else
|
||||
.globl x4
|
||||
x4:
|
||||
#endif
|
||||
movaps 64(%rdx), %xmm0 #34.3
|
||||
movaps 96(%rdx), %xmm1 #34.3
|
||||
movaps (%rdx), %xmm7 #34.3
|
||||
movaps (%r8), %xmm4 #const
|
||||
movaps %xmm7, %xmm9 #34.3
|
||||
movaps %xmm4, %xmm6 #34.3
|
||||
movaps 16(%r8), %xmm2 #const
|
||||
mulps %xmm0, %xmm6 #34.3
|
||||
mulps %xmm1, %xmm4 #34.3
|
||||
shufps $177, %xmm0, %xmm0 #34.3
|
||||
shufps $177, %xmm1, %xmm1 #34.3
|
||||
mulps %xmm2, %xmm0 #34.3
|
||||
mulps %xmm1, %xmm2 #34.3
|
||||
subps %xmm0, %xmm6 #34.3
|
||||
addps %xmm2, %xmm4 #34.3
|
||||
movaps %xmm6, %xmm5 #34.3
|
||||
subps %xmm4, %xmm6 #34.3
|
||||
addps %xmm4, %xmm5 #34.3
|
||||
movaps 32(%rdx), %xmm8 #34.3
|
||||
xorps %xmm3, %xmm6 #34.3
|
||||
shufps $177, %xmm6, %xmm6 #34.3
|
||||
movaps %xmm8, %xmm10 #34.3
|
||||
movaps 112(%rdx), %xmm12 #34.3
|
||||
subps %xmm5, %xmm9 #34.3
|
||||
addps %xmm5, %xmm7 #34.3
|
||||
addps %xmm6, %xmm10 #34.3
|
||||
subps %xmm6, %xmm8 #34.3
|
||||
movaps %xmm7, (%rdx) #34.3
|
||||
movaps %xmm8, 32(%rdx) #34.3
|
||||
movaps %xmm9, 64(%rdx) #34.3
|
||||
movaps %xmm10, 96(%rdx) #34.3
|
||||
movaps 32(%r8), %xmm14 #const #34.3
|
||||
movaps 80(%rdx), %xmm11 #34.3
|
||||
movaps %xmm14, %xmm0 #34.3
|
||||
movaps 48(%r8), %xmm13 #const #34.3
|
||||
mulps %xmm11, %xmm0 #34.3
|
||||
mulps %xmm12, %xmm14 #34.3
|
||||
shufps $177, %xmm11, %xmm11 #34.3
|
||||
shufps $177, %xmm12, %xmm12 #34.3
|
||||
mulps %xmm13, %xmm11 #34.3
|
||||
mulps %xmm12, %xmm13 #34.3
|
||||
subps %xmm11, %xmm0 #34.3
|
||||
addps %xmm13, %xmm14 #34.3
|
||||
movaps %xmm0, %xmm15 #34.3
|
||||
subps %xmm14, %xmm0 #34.3
|
||||
addps %xmm14, %xmm15 #34.3
|
||||
xorps %xmm3, %xmm0 #34.3
|
||||
movaps 16(%rdx), %xmm1 #34.3
|
||||
movaps 48(%rdx), %xmm2 #34.3
|
||||
movaps %xmm1, %xmm4 #34.3
|
||||
shufps $177, %xmm0, %xmm0 #34.3
|
||||
movaps %xmm2, %xmm5 #34.3
|
||||
addps %xmm15, %xmm1 #34.3
|
||||
subps %xmm0, %xmm2 #34.3
|
||||
subps %xmm15, %xmm4 #34.3
|
||||
addps %xmm0, %xmm5 #34.3
|
||||
movaps %xmm1, 16(%rdx) #34.3
|
||||
movaps %xmm2, 48(%rdx) #34.3
|
||||
movaps %xmm4, 80(%rdx) #34.3
|
||||
movaps %xmm5, 112(%rdx) #34.3
|
||||
ret
|
||||
|
||||
# _x8_soft + 5 needs to be 16 byte aligned
|
||||
#ifdef __APPLE__
|
||||
.globl _x8_soft
|
||||
_x8_soft:
|
||||
#else
|
||||
.globl x8_soft
|
||||
x8_soft:
|
||||
#endif
|
||||
xorl %eax, %eax
|
||||
movq %rdx, %rbx
|
||||
movq %r8, %rsi
|
||||
leaq (%rdx,%rcx,4), %r9
|
||||
leaq (%r9,%rcx,4), %r10
|
||||
leaq (%r10,%rcx,4), %r11
|
||||
leaq (%r11,%rcx,4), %r12
|
||||
leaq (%r12,%rcx,4), %r13
|
||||
leaq (%r13,%rcx,4), %r14
|
||||
leaq (%r14,%rcx,4), %r15
|
||||
X8_soft_loop:
|
||||
movaps (%rsi), %xmm9
|
||||
movaps (%r10,%rax,4), %xmm6
|
||||
movaps %xmm9, %xmm11
|
||||
movaps (%r11,%rax,4), %xmm7
|
||||
movaps 16(%rsi), %xmm8
|
||||
mulps %xmm6, %xmm11
|
||||
mulps %xmm7, %xmm9
|
||||
shufps $177, %xmm6, %xmm6
|
||||
mulps %xmm8, %xmm6
|
||||
shufps $177, %xmm7, %xmm7
|
||||
subps %xmm6, %xmm11
|
||||
mulps %xmm7, %xmm8
|
||||
movaps %xmm11, %xmm10
|
||||
addps %xmm8, %xmm9
|
||||
movaps 32(%rsi), %xmm15
|
||||
addps %xmm9, %xmm10
|
||||
subps %xmm9, %xmm11
|
||||
movaps (%rbx,%rax,4), %xmm5
|
||||
movaps %xmm15, %xmm6
|
||||
movaps (%r12,%rax,4), %xmm12
|
||||
movaps %xmm5, %xmm2
|
||||
movaps (%r14,%rax,4), %xmm13
|
||||
xorps %xmm3, %xmm11 #const
|
||||
movaps 48(%rsi), %xmm14
|
||||
subps %xmm10, %xmm2
|
||||
mulps %xmm12, %xmm6
|
||||
addps %xmm10, %xmm5
|
||||
mulps %xmm13, %xmm15
|
||||
movaps 64(%rsi), %xmm10
|
||||
movaps %xmm5, %xmm0
|
||||
shufps $177, %xmm12, %xmm12
|
||||
shufps $177, %xmm13, %xmm13
|
||||
mulps %xmm14, %xmm12
|
||||
mulps %xmm13, %xmm14
|
||||
subps %xmm12, %xmm6
|
||||
addps %xmm14, %xmm15
|
||||
movaps (%r13,%rax,4), %xmm7
|
||||
movaps %xmm10, %xmm13
|
||||
movaps (%r15,%rax,4), %xmm8
|
||||
movaps %xmm6, %xmm12
|
||||
movaps 80(%rsi), %xmm9
|
||||
addq $96, %rsi
|
||||
mulps %xmm7, %xmm13
|
||||
subps %xmm15, %xmm6
|
||||
addps %xmm15, %xmm12
|
||||
mulps %xmm8, %xmm10
|
||||
subps %xmm12, %xmm0
|
||||
addps %xmm12, %xmm5
|
||||
shufps $177, %xmm7, %xmm7
|
||||
xorps %xmm3, %xmm6 #const
|
||||
shufps $177, %xmm8, %xmm8
|
||||
movaps %xmm2, %xmm12
|
||||
mulps %xmm9, %xmm7
|
||||
mulps %xmm8, %xmm9
|
||||
subps %xmm7, %xmm13
|
||||
addps %xmm9, %xmm10
|
||||
movaps (%r9,%rax,4), %xmm4
|
||||
shufps $177, %xmm11, %xmm11
|
||||
movaps %xmm4, %xmm1
|
||||
shufps $177, %xmm6, %xmm6
|
||||
addps %xmm11, %xmm1
|
||||
subps %xmm11, %xmm4
|
||||
addps %xmm6, %xmm12
|
||||
subps %xmm6, %xmm2
|
||||
movaps %xmm13, %xmm11
|
||||
movaps %xmm4, %xmm14
|
||||
movaps %xmm1, %xmm6
|
||||
subps %xmm10, %xmm13
|
||||
addps %xmm10, %xmm11
|
||||
xorps %xmm3, %xmm13 #const
|
||||
addps %xmm11, %xmm4
|
||||
subps %xmm11, %xmm14
|
||||
shufps $177, %xmm13, %xmm13
|
||||
movaps %xmm5, (%rbx,%rax,4)
|
||||
movaps %xmm4, (%r9,%rax,4)
|
||||
movaps %xmm2, (%r10,%rax,4)
|
||||
subps %xmm13, %xmm1
|
||||
addps %xmm13, %xmm6
|
||||
movaps %xmm1, (%r11,%rax,4)
|
||||
movaps %xmm0, (%r12,%rax,4)
|
||||
movaps %xmm14, (%r13,%rax,4)
|
||||
movaps %xmm12, (%r14,%rax,4)
|
||||
movaps %xmm6, (%r15,%rax,4)
|
||||
addq $4, %rax
|
||||
cmpq %rcx, %rax
|
||||
jne X8_soft_loop
|
||||
ret
|
||||
|
||||
#ifdef __APPLE__
|
||||
.globl _x8_hard
|
||||
_x8_hard:
|
||||
#else
|
||||
.globl x8_hard
|
||||
x8_hard:
|
||||
#endif
|
||||
movaps (%r9), %xmm5
|
||||
X8_loop:
|
||||
movaps (%r8), %xmm9
|
||||
X8_const_2:
|
||||
movaps 0xFECA(%rdx,%rax,4), %xmm6
|
||||
movaps %xmm9, %xmm11
|
||||
X8_const_3:
|
||||
movaps 0xFECA(%rdx,%rax,4), %xmm7
|
||||
movaps 16(%r8), %xmm8
|
||||
mulps %xmm6, %xmm11
|
||||
mulps %xmm7, %xmm9
|
||||
shufps $177, %xmm6, %xmm6
|
||||
mulps %xmm8, %xmm6
|
||||
shufps $177, %xmm7, %xmm7
|
||||
subps %xmm6, %xmm11
|
||||
mulps %xmm7, %xmm8
|
||||
movaps %xmm11, %xmm10
|
||||
addps %xmm8, %xmm9
|
||||
movaps 32(%r8), %xmm15
|
||||
addps %xmm9, %xmm10
|
||||
subps %xmm9, %xmm11
|
||||
X8_const_0:
|
||||
movaps 0xFECA(%rdx,%rax,4), %xmm3
|
||||
movaps %xmm15, %xmm6
|
||||
X8_const_4:
|
||||
movaps 0xFECA(%rdx,%rax,4), %xmm12
|
||||
movaps %xmm3, %xmm2
|
||||
X8_const_6:
|
||||
movaps 0xFECA(%rdx,%rax,4), %xmm13
|
||||
xorps %xmm5, %xmm11
|
||||
movaps 48(%r8), %xmm14
|
||||
subps %xmm10, %xmm2
|
||||
mulps %xmm12, %xmm6
|
||||
addps %xmm10, %xmm3
|
||||
mulps %xmm13, %xmm15
|
||||
movaps 64(%r8), %xmm10
|
||||
movaps %xmm3, %xmm0
|
||||
shufps $177, %xmm12, %xmm12
|
||||
shufps $177, %xmm13, %xmm13
|
||||
mulps %xmm14, %xmm12
|
||||
mulps %xmm13, %xmm14
|
||||
subps %xmm12, %xmm6
|
||||
addps %xmm14, %xmm15
|
||||
X8_const_5:
|
||||
movaps 0xFECA(%rdx,%rax,4), %xmm7
|
||||
movaps %xmm10, %xmm13
|
||||
X8_const_7:
|
||||
movaps 0xFECA(%rdx,%rax,4), %xmm8
|
||||
movaps %xmm6, %xmm12
|
||||
movaps 80(%r8), %xmm9
|
||||
addq $96, %r8
|
||||
mulps %xmm7, %xmm13
|
||||
subps %xmm15, %xmm6
|
||||
addps %xmm15, %xmm12
|
||||
mulps %xmm8, %xmm10
|
||||
subps %xmm12, %xmm0
|
||||
addps %xmm12, %xmm3
|
||||
shufps $177, %xmm7, %xmm7
|
||||
xorps %xmm5, %xmm6
|
||||
shufps $177, %xmm8, %xmm8
|
||||
movaps %xmm2, %xmm12
|
||||
mulps %xmm9, %xmm7
|
||||
mulps %xmm8, %xmm9
|
||||
subps %xmm7, %xmm13
|
||||
addps %xmm9, %xmm10
|
||||
X8_const_1:
|
||||
movaps 0xFECA(%rdx,%rax,4), %xmm4
|
||||
shufps $177, %xmm11, %xmm11
|
||||
movaps %xmm4, %xmm1
|
||||
shufps $177, %xmm6, %xmm6
|
||||
addps %xmm11, %xmm1
|
||||
subps %xmm11, %xmm4
|
||||
addps %xmm6, %xmm12
|
||||
subps %xmm6, %xmm2
|
||||
movaps %xmm13, %xmm11
|
||||
movaps %xmm4, %xmm14
|
||||
movaps %xmm1, %xmm6
|
||||
subps %xmm10, %xmm13
|
||||
addps %xmm10, %xmm11
|
||||
xorps %xmm5, %xmm13
|
||||
addps %xmm11, %xmm4
|
||||
subps %xmm11, %xmm14
|
||||
shufps $177, %xmm13, %xmm13
|
||||
X8_const1_0:
|
||||
movaps %xmm3, 0xFECA(%rdx,%rax,4)
|
||||
X8_const1_1:
|
||||
movaps %xmm4, 0xFECA(%rdx,%rax,4)
|
||||
X8_const1_2:
|
||||
movaps %xmm2, 0xFECA(%rdx,%rax,4)
|
||||
subps %xmm13, %xmm1
|
||||
addps %xmm13, %xmm6
|
||||
X8_const1_3:
|
||||
movaps %xmm1, 0xFECA(%rdx,%rax,4)
|
||||
X8_const1_4:
|
||||
movaps %xmm0, 0xFECA(%rdx,%rax,4)
|
||||
X8_const1_5:
|
||||
movaps %xmm14, 0xFECA(%rdx,%rax,4)
|
||||
X8_const1_6:
|
||||
movaps %xmm12, 0xFECA(%rdx,%rax,4)
|
||||
X8_const1_7:
|
||||
movaps %xmm6, 0xFECA(%rdx,%rax,4)
|
||||
addq $4, %rax
|
||||
cmpq %rcx, %rax
|
||||
jne X8_loop
|
||||
|
||||
#ifdef __APPLE__
|
||||
.globl _sse_leaf_ee_offsets
|
||||
.globl _sse_leaf_oo_offsets
|
||||
.globl _sse_leaf_eo_offsets
|
||||
.globl _sse_leaf_oe_offsets
|
||||
.align 4
|
||||
_sse_leaf_ee_offsets:
|
||||
.long LEAF_EE_const_0-_leaf_ee+0x4
|
||||
.long LEAF_EE_const_1-_leaf_ee+0x5
|
||||
.long LEAF_EE_const_2-_leaf_ee+0x5
|
||||
.long LEAF_EE_const_3-_leaf_ee+0x5
|
||||
.long LEAF_EE_const_4-_leaf_ee+0x5
|
||||
.long LEAF_EE_const_5-_leaf_ee+0x5
|
||||
.long LEAF_EE_const_6-_leaf_ee+0x4
|
||||
.long LEAF_EE_const_7-_leaf_ee+0x5
|
||||
_sse_leaf_oo_offsets:
|
||||
.long LEAF_OO_const_0-_leaf_oo+0x4
|
||||
.long LEAF_OO_const_1-_leaf_oo+0x4
|
||||
.long LEAF_OO_const_2-_leaf_oo+0x5
|
||||
.long LEAF_OO_const_3-_leaf_oo+0x5
|
||||
.long LEAF_OO_const_4-_leaf_oo+0x4
|
||||
.long LEAF_OO_const_5-_leaf_oo+0x5
|
||||
.long LEAF_OO_const_6-_leaf_oo+0x5
|
||||
.long LEAF_OO_const_7-_leaf_oo+0x5
|
||||
_sse_leaf_eo_offsets:
|
||||
.long LEAF_EO_const_0-_leaf_eo+0x5
|
||||
.long LEAF_EO_const_1-_leaf_eo+0x4
|
||||
.long LEAF_EO_const_2-_leaf_eo+0x4
|
||||
.long LEAF_EO_const_3-_leaf_eo+0x4
|
||||
.long LEAF_EO_const_4-_leaf_eo+0x5
|
||||
.long LEAF_EO_const_5-_leaf_eo+0x5
|
||||
.long LEAF_EO_const_6-_leaf_eo+0x4
|
||||
.long LEAF_EO_const_7-_leaf_eo+0x5
|
||||
_sse_leaf_oe_offsets:
|
||||
.long LEAF_OE_const_0-_leaf_oe+0x5
|
||||
.long LEAF_OE_const_1-_leaf_oe+0x4
|
||||
.long LEAF_OE_const_2-_leaf_oe+0x4
|
||||
.long LEAF_OE_const_3-_leaf_oe+0x5
|
||||
.long LEAF_OE_const_4-_leaf_oe+0x5
|
||||
.long LEAF_OE_const_5-_leaf_oe+0x5
|
||||
.long LEAF_OE_const_6-_leaf_oe+0x4
|
||||
.long LEAF_OE_const_7-_leaf_oe+0x4
|
||||
#else
|
||||
.globl sse_leaf_ee_offsets
|
||||
.globl sse_leaf_oo_offsets
|
||||
.globl sse_leaf_eo_offsets
|
||||
.globl sse_leaf_oe_offsets
|
||||
.align 4
|
||||
sse_leaf_ee_offsets:
|
||||
.long LEAF_EE_const_0-leaf_ee+0x4
|
||||
.long LEAF_EE_const_1-leaf_ee+0x5
|
||||
.long LEAF_EE_const_2-leaf_ee+0x5
|
||||
.long LEAF_EE_const_3-leaf_ee+0x5
|
||||
.long LEAF_EE_const_4-leaf_ee+0x5
|
||||
.long LEAF_EE_const_5-leaf_ee+0x5
|
||||
.long LEAF_EE_const_6-leaf_ee+0x4
|
||||
.long LEAF_EE_const_7-leaf_ee+0x5
|
||||
sse_leaf_oo_offsets:
|
||||
.long LEAF_OO_const_0-leaf_oo+0x4
|
||||
.long LEAF_OO_const_1-leaf_oo+0x4
|
||||
.long LEAF_OO_const_2-leaf_oo+0x5
|
||||
.long LEAF_OO_const_3-leaf_oo+0x5
|
||||
.long LEAF_OO_const_4-leaf_oo+0x4
|
||||
.long LEAF_OO_const_5-leaf_oo+0x5
|
||||
.long LEAF_OO_const_6-leaf_oo+0x5
|
||||
.long LEAF_OO_const_7-leaf_oo+0x5
|
||||
sse_leaf_eo_offsets:
|
||||
.long LEAF_EO_const_0-leaf_eo+0x5
|
||||
.long LEAF_EO_const_1-leaf_eo+0x4
|
||||
.long LEAF_EO_const_2-leaf_eo+0x4
|
||||
.long LEAF_EO_const_3-leaf_eo+0x4
|
||||
.long LEAF_EO_const_4-leaf_eo+0x5
|
||||
.long LEAF_EO_const_5-leaf_eo+0x5
|
||||
.long LEAF_EO_const_6-leaf_eo+0x4
|
||||
.long LEAF_EO_const_7-leaf_eo+0x5
|
||||
sse_leaf_oe_offsets:
|
||||
.long LEAF_OE_const_0-leaf_oe+0x5
|
||||
.long LEAF_OE_const_1-leaf_oe+0x4
|
||||
.long LEAF_OE_const_2-leaf_oe+0x4
|
||||
.long LEAF_OE_const_3-leaf_oe+0x5
|
||||
.long LEAF_OE_const_4-leaf_oe+0x5
|
||||
.long LEAF_OE_const_5-leaf_oe+0x5
|
||||
.long LEAF_OE_const_6-leaf_oe+0x4
|
||||
.long LEAF_OE_const_7-leaf_oe+0x4
|
||||
#endif
|
||||
|
||||
#ifdef __APPLE__
|
||||
.data
|
||||
#else
|
||||
.section .data
|
||||
#endif
|
||||
.p2align 4
|
||||
#ifdef __APPLE__
|
||||
.globl _sse_constants
|
||||
_sse_constants:
|
||||
#else
|
||||
.globl sse_constants
|
||||
sse_constants:
|
||||
#endif
|
||||
.long 0x00000000,0x80000000,0x00000000,0x80000000
|
||||
.long 0x3f3504f3,0x3f3504f3,0x3f3504f3,0x3f3504f3
|
||||
.long 0xbf3504f3,0x3f3504f3,0xbf3504f3,0x3f3504f3
|
||||
.long 0x3f800000,0x3f800000,0x3f3504f3,0x3f3504f3
|
||||
.long 0x00000000,0x00000000,0xbf3504f3,0x3f3504f3
|
||||
#ifdef __APPLE__
|
||||
.globl _sse_constants_inv
|
||||
_sse_constants_inv:
|
||||
#else
|
||||
.globl sse_constants_inv
|
||||
sse_constants_inv:
|
||||
#endif
|
||||
.long 0x80000000,0x00000000,0x80000000,0x00000000
|
||||
.long 0x3f3504f3,0x3f3504f3,0x3f3504f3,0x3f3504f3
|
||||
.long 0x3f3504f3,0xbf3504f3,0x3f3504f3,0xbf3504f3
|
||||
.long 0x3f800000,0x3f800000,0x3f3504f3,0x3f3504f3
|
||||
.long 0x00000000,0x00000000,0x3f3504f3,0xbf3504f3
|
Loading…
Reference in new issue