parent
c223a6a35f
commit
14d918151b
@ -0,0 +1,6 @@
|
|||||||
|
FFTS was developed at the University of Waikato by Anthony Blake <amb@anthonix.com>
|
||||||
|
|
||||||
|
The following authors have also graciously contributed code:
|
||||||
|
|
||||||
|
Michael Zucchi <notzed@gmail.com> -- JNI java/android support
|
||||||
|
Michael Cree <mcree@orcon.net.nz> -- Architecture specific code, including support for Altivec and DEC Alpha
|
@ -0,0 +1,31 @@
|
|||||||
|
/*
|
||||||
|
|
||||||
|
This file is part of FFTS -- The Fastest Fourier Transform in the South
|
||||||
|
|
||||||
|
Copyright (c) 2012, 2013 Anthony M. Blake <amb@anthonix.com>
|
||||||
|
|
||||||
|
All rights reserved.
|
||||||
|
|
||||||
|
Redistribution and use in source and binary forms, with or without
|
||||||
|
modification, are permitted provided that the following conditions are met:
|
||||||
|
* Redistributions of source code must retain the above copyright
|
||||||
|
notice, this list of conditions and the following disclaimer.
|
||||||
|
* Redistributions in binary form must reproduce the above copyright
|
||||||
|
notice, this list of conditions and the following disclaimer in the
|
||||||
|
documentation and/or other materials provided with the distribution.
|
||||||
|
* Neither the name of the organization nor the
|
||||||
|
names of its contributors may be used to endorse or promote products
|
||||||
|
derived from this software without specific prior written permission.
|
||||||
|
|
||||||
|
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
||||||
|
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||||
|
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||||
|
DISCLAIMED. IN NO EVENT SHALL ANTHONY M. BLAKE BE LIABLE FOR ANY
|
||||||
|
DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
|
||||||
|
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
||||||
|
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
|
||||||
|
ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||||
|
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||||
|
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
|
||||||
|
*/
|
@ -0,0 +1,11 @@
|
|||||||
|
AUTOMAKE_OPTIONS = foreign
|
||||||
|
SUBDIRS = src tests
|
||||||
|
EXTRA_DIST=COPYRIGHT ffts.pc.in build_iphone.sh build_android.sh
|
||||||
|
ACLOCAL_AMFLAGS = -Im4
|
||||||
|
|
||||||
|
pkgconfigdir = $(libdir)/pkgconfig
|
||||||
|
pkgconfig_DATA = ffts.pc
|
||||||
|
|
||||||
|
if ENABLE_JNI
|
||||||
|
SUBDIRS += java
|
||||||
|
endif
|
@ -0,0 +1,842 @@
|
|||||||
|
# Makefile.in generated by automake 1.12.4 from Makefile.am.
|
||||||
|
# @configure_input@
|
||||||
|
|
||||||
|
# Copyright (C) 1994-2012 Free Software Foundation, Inc.
|
||||||
|
|
||||||
|
# This Makefile.in is free software; the Free Software Foundation
|
||||||
|
# gives unlimited permission to copy and/or distribute it,
|
||||||
|
# with or without modifications, as long as this notice is preserved.
|
||||||
|
|
||||||
|
# This program is distributed in the hope that it will be useful,
|
||||||
|
# but WITHOUT ANY WARRANTY, to the extent permitted by law; without
|
||||||
|
# even the implied warranty of MERCHANTABILITY or FITNESS FOR A
|
||||||
|
# PARTICULAR PURPOSE.
|
||||||
|
|
||||||
|
@SET_MAKE@
|
||||||
|
|
||||||
|
VPATH = @srcdir@
|
||||||
|
am__make_dryrun = \
|
||||||
|
{ \
|
||||||
|
am__dry=no; \
|
||||||
|
case $$MAKEFLAGS in \
|
||||||
|
*\\[\ \ ]*) \
|
||||||
|
echo 'am--echo: ; @echo "AM" OK' | $(MAKE) -f - 2>/dev/null \
|
||||||
|
| grep '^AM OK$$' >/dev/null || am__dry=yes;; \
|
||||||
|
*) \
|
||||||
|
for am__flg in $$MAKEFLAGS; do \
|
||||||
|
case $$am__flg in \
|
||||||
|
*=*|--*) ;; \
|
||||||
|
*n*) am__dry=yes; break;; \
|
||||||
|
esac; \
|
||||||
|
done;; \
|
||||||
|
esac; \
|
||||||
|
test $$am__dry = yes; \
|
||||||
|
}
|
||||||
|
pkgdatadir = $(datadir)/@PACKAGE@
|
||||||
|
pkgincludedir = $(includedir)/@PACKAGE@
|
||||||
|
pkglibdir = $(libdir)/@PACKAGE@
|
||||||
|
pkglibexecdir = $(libexecdir)/@PACKAGE@
|
||||||
|
am__cd = CDPATH="$${ZSH_VERSION+.}$(PATH_SEPARATOR)" && cd
|
||||||
|
install_sh_DATA = $(install_sh) -c -m 644
|
||||||
|
install_sh_PROGRAM = $(install_sh) -c
|
||||||
|
install_sh_SCRIPT = $(install_sh) -c
|
||||||
|
INSTALL_HEADER = $(INSTALL_DATA)
|
||||||
|
transform = $(program_transform_name)
|
||||||
|
NORMAL_INSTALL = :
|
||||||
|
PRE_INSTALL = :
|
||||||
|
POST_INSTALL = :
|
||||||
|
NORMAL_UNINSTALL = :
|
||||||
|
PRE_UNINSTALL = :
|
||||||
|
POST_UNINSTALL = :
|
||||||
|
build_triplet = @build@
|
||||||
|
host_triplet = @host@
|
||||||
|
@ENABLE_JNI_TRUE@am__append_1 = java
|
||||||
|
subdir = .
|
||||||
|
DIST_COMMON = README $(am__configure_deps) $(srcdir)/Makefile.am \
|
||||||
|
$(srcdir)/Makefile.in $(srcdir)/config.h.in \
|
||||||
|
$(srcdir)/ffts.pc.in $(top_srcdir)/configure AUTHORS \
|
||||||
|
config.guess config.sub depcomp install-sh ltmain.sh missing
|
||||||
|
ACLOCAL_M4 = $(top_srcdir)/aclocal.m4
|
||||||
|
am__aclocal_m4_deps = $(top_srcdir)/m4/ax_check_classpath.m4 \
|
||||||
|
$(top_srcdir)/m4/ax_check_java_home.m4 \
|
||||||
|
$(top_srcdir)/m4/ax_java_options.m4 \
|
||||||
|
$(top_srcdir)/m4/ax_jni_include_dir.m4 \
|
||||||
|
$(top_srcdir)/m4/ax_prog_jar.m4 \
|
||||||
|
$(top_srcdir)/m4/ax_prog_javac.m4 \
|
||||||
|
$(top_srcdir)/m4/ax_prog_javac_works.m4 \
|
||||||
|
$(top_srcdir)/configure.ac
|
||||||
|
am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \
|
||||||
|
$(ACLOCAL_M4)
|
||||||
|
am__CONFIG_DISTCLEAN_FILES = config.status config.cache config.log \
|
||||||
|
configure.lineno config.status.lineno
|
||||||
|
mkinstalldirs = $(install_sh) -d
|
||||||
|
CONFIG_HEADER = config.h
|
||||||
|
CONFIG_CLEAN_FILES = ffts.pc
|
||||||
|
CONFIG_CLEAN_VPATH_FILES =
|
||||||
|
SOURCES =
|
||||||
|
DIST_SOURCES =
|
||||||
|
RECURSIVE_TARGETS = all-recursive check-recursive dvi-recursive \
|
||||||
|
html-recursive info-recursive install-data-recursive \
|
||||||
|
install-dvi-recursive install-exec-recursive \
|
||||||
|
install-html-recursive install-info-recursive \
|
||||||
|
install-pdf-recursive install-ps-recursive install-recursive \
|
||||||
|
installcheck-recursive installdirs-recursive pdf-recursive \
|
||||||
|
ps-recursive uninstall-recursive
|
||||||
|
am__can_run_installinfo = \
|
||||||
|
case $$AM_UPDATE_INFO_DIR in \
|
||||||
|
n|no|NO) false;; \
|
||||||
|
*) (install-info --version) >/dev/null 2>&1;; \
|
||||||
|
esac
|
||||||
|
am__vpath_adj_setup = srcdirstrip=`echo "$(srcdir)" | sed 's|.|.|g'`;
|
||||||
|
am__vpath_adj = case $$p in \
|
||||||
|
$(srcdir)/*) f=`echo "$$p" | sed "s|^$$srcdirstrip/||"`;; \
|
||||||
|
*) f=$$p;; \
|
||||||
|
esac;
|
||||||
|
am__strip_dir = f=`echo $$p | sed -e 's|^.*/||'`;
|
||||||
|
am__install_max = 40
|
||||||
|
am__nobase_strip_setup = \
|
||||||
|
srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*|]/\\\\&/g'`
|
||||||
|
am__nobase_strip = \
|
||||||
|
for p in $$list; do echo "$$p"; done | sed -e "s|$$srcdirstrip/||"
|
||||||
|
am__nobase_list = $(am__nobase_strip_setup); \
|
||||||
|
for p in $$list; do echo "$$p $$p"; done | \
|
||||||
|
sed "s| $$srcdirstrip/| |;"' / .*\//!s/ .*/ ./; s,\( .*\)/[^/]*$$,\1,' | \
|
||||||
|
$(AWK) 'BEGIN { files["."] = "" } { files[$$2] = files[$$2] " " $$1; \
|
||||||
|
if (++n[$$2] == $(am__install_max)) \
|
||||||
|
{ print $$2, files[$$2]; n[$$2] = 0; files[$$2] = "" } } \
|
||||||
|
END { for (dir in files) print dir, files[dir] }'
|
||||||
|
am__base_list = \
|
||||||
|
sed '$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;s/\n/ /g' | \
|
||||||
|
sed '$$!N;$$!N;$$!N;$$!N;s/\n/ /g'
|
||||||
|
am__uninstall_files_from_dir = { \
|
||||||
|
test -z "$$files" \
|
||||||
|
|| { test ! -d "$$dir" && test ! -f "$$dir" && test ! -r "$$dir"; } \
|
||||||
|
|| { echo " ( cd '$$dir' && rm -f" $$files ")"; \
|
||||||
|
$(am__cd) "$$dir" && rm -f $$files; }; \
|
||||||
|
}
|
||||||
|
am__installdirs = "$(DESTDIR)$(pkgconfigdir)"
|
||||||
|
DATA = $(pkgconfig_DATA)
|
||||||
|
RECURSIVE_CLEAN_TARGETS = mostlyclean-recursive clean-recursive \
|
||||||
|
distclean-recursive maintainer-clean-recursive
|
||||||
|
AM_RECURSIVE_TARGETS = $(RECURSIVE_TARGETS:-recursive=) \
|
||||||
|
$(RECURSIVE_CLEAN_TARGETS:-recursive=) tags TAGS ctags CTAGS \
|
||||||
|
cscope distdir dist dist-all distcheck
|
||||||
|
ETAGS = etags
|
||||||
|
CTAGS = ctags
|
||||||
|
CSCOPE = cscope
|
||||||
|
DIST_SUBDIRS = src tests java
|
||||||
|
DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST)
|
||||||
|
distdir = $(PACKAGE)-$(VERSION)
|
||||||
|
top_distdir = $(distdir)
|
||||||
|
am__remove_distdir = \
|
||||||
|
if test -d "$(distdir)"; then \
|
||||||
|
find "$(distdir)" -type d ! -perm -200 -exec chmod u+w {} ';' \
|
||||||
|
&& rm -rf "$(distdir)" \
|
||||||
|
|| { sleep 5 && rm -rf "$(distdir)"; }; \
|
||||||
|
else :; fi
|
||||||
|
am__post_remove_distdir = $(am__remove_distdir)
|
||||||
|
am__relativize = \
|
||||||
|
dir0=`pwd`; \
|
||||||
|
sed_first='s,^\([^/]*\)/.*$$,\1,'; \
|
||||||
|
sed_rest='s,^[^/]*/*,,'; \
|
||||||
|
sed_last='s,^.*/\([^/]*\)$$,\1,'; \
|
||||||
|
sed_butlast='s,/*[^/]*$$,,'; \
|
||||||
|
while test -n "$$dir1"; do \
|
||||||
|
first=`echo "$$dir1" | sed -e "$$sed_first"`; \
|
||||||
|
if test "$$first" != "."; then \
|
||||||
|
if test "$$first" = ".."; then \
|
||||||
|
dir2=`echo "$$dir0" | sed -e "$$sed_last"`/"$$dir2"; \
|
||||||
|
dir0=`echo "$$dir0" | sed -e "$$sed_butlast"`; \
|
||||||
|
else \
|
||||||
|
first2=`echo "$$dir2" | sed -e "$$sed_first"`; \
|
||||||
|
if test "$$first2" = "$$first"; then \
|
||||||
|
dir2=`echo "$$dir2" | sed -e "$$sed_rest"`; \
|
||||||
|
else \
|
||||||
|
dir2="../$$dir2"; \
|
||||||
|
fi; \
|
||||||
|
dir0="$$dir0"/"$$first"; \
|
||||||
|
fi; \
|
||||||
|
fi; \
|
||||||
|
dir1=`echo "$$dir1" | sed -e "$$sed_rest"`; \
|
||||||
|
done; \
|
||||||
|
reldir="$$dir2"
|
||||||
|
DIST_ARCHIVES = $(distdir).tar.gz
|
||||||
|
GZIP_ENV = --best
|
||||||
|
DIST_TARGETS = dist-gzip
|
||||||
|
distuninstallcheck_listfiles = find . -type f -print
|
||||||
|
am__distuninstallcheck_listfiles = $(distuninstallcheck_listfiles) \
|
||||||
|
| sed 's|^\./|$(prefix)/|' | grep -v '$(infodir)/dir$$'
|
||||||
|
distcleancheck_listfiles = find . -type f -print
|
||||||
|
ACLOCAL = @ACLOCAL@
|
||||||
|
AMTAR = @AMTAR@
|
||||||
|
AR = @AR@
|
||||||
|
AUTOCONF = @AUTOCONF@
|
||||||
|
AUTOHEADER = @AUTOHEADER@
|
||||||
|
AUTOMAKE = @AUTOMAKE@
|
||||||
|
AWK = @AWK@
|
||||||
|
CC = @CC@
|
||||||
|
CCAS = @CCAS@
|
||||||
|
CCASDEPMODE = @CCASDEPMODE@
|
||||||
|
CCASFLAGS = @CCASFLAGS@
|
||||||
|
CCDEPMODE = @CCDEPMODE@
|
||||||
|
CFLAGS = @CFLAGS@
|
||||||
|
CPP = @CPP@
|
||||||
|
CPPFLAGS = @CPPFLAGS@
|
||||||
|
CXX = @CXX@
|
||||||
|
CXXCPP = @CXXCPP@
|
||||||
|
CXXDEPMODE = @CXXDEPMODE@
|
||||||
|
CXXFLAGS = @CXXFLAGS@
|
||||||
|
CYGPATH_W = @CYGPATH_W@
|
||||||
|
DEFS = @DEFS@
|
||||||
|
DEPDIR = @DEPDIR@
|
||||||
|
DLLTOOL = @DLLTOOL@
|
||||||
|
DSYMUTIL = @DSYMUTIL@
|
||||||
|
DUMPBIN = @DUMPBIN@
|
||||||
|
ECHO_C = @ECHO_C@
|
||||||
|
ECHO_N = @ECHO_N@
|
||||||
|
ECHO_T = @ECHO_T@
|
||||||
|
EGREP = @EGREP@
|
||||||
|
EXEEXT = @EXEEXT@
|
||||||
|
FGREP = @FGREP@
|
||||||
|
GREP = @GREP@
|
||||||
|
INSTALL = @INSTALL@
|
||||||
|
INSTALL_DATA = @INSTALL_DATA@
|
||||||
|
INSTALL_PROGRAM = @INSTALL_PROGRAM@
|
||||||
|
INSTALL_SCRIPT = @INSTALL_SCRIPT@
|
||||||
|
INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@
|
||||||
|
JAR = @JAR@
|
||||||
|
JAVA = @JAVA@
|
||||||
|
JAVAC = @JAVAC@
|
||||||
|
JAVACFLAGS = @JAVACFLAGS@
|
||||||
|
JAVAFLAGS = @JAVAFLAGS@
|
||||||
|
JAVAPREFIX = @JAVAPREFIX@
|
||||||
|
JAVA_PATH_NAME = @JAVA_PATH_NAME@
|
||||||
|
JNI_CPPFLAGS = @JNI_CPPFLAGS@
|
||||||
|
LD = @LD@
|
||||||
|
LDFLAGS = @LDFLAGS@
|
||||||
|
LIBOBJS = @LIBOBJS@
|
||||||
|
LIBS = @LIBS@
|
||||||
|
LIBTOOL = @LIBTOOL@
|
||||||
|
LIPO = @LIPO@
|
||||||
|
LN_S = @LN_S@
|
||||||
|
LTLIBOBJS = @LTLIBOBJS@
|
||||||
|
MAKEINFO = @MAKEINFO@
|
||||||
|
MANIFEST_TOOL = @MANIFEST_TOOL@
|
||||||
|
MKDIR_P = @MKDIR_P@
|
||||||
|
NM = @NM@
|
||||||
|
NMEDIT = @NMEDIT@
|
||||||
|
OBJDUMP = @OBJDUMP@
|
||||||
|
OBJEXT = @OBJEXT@
|
||||||
|
OTOOL = @OTOOL@
|
||||||
|
OTOOL64 = @OTOOL64@
|
||||||
|
PACKAGE = @PACKAGE@
|
||||||
|
PACKAGE_BUGREPORT = @PACKAGE_BUGREPORT@
|
||||||
|
PACKAGE_NAME = @PACKAGE_NAME@
|
||||||
|
PACKAGE_STRING = @PACKAGE_STRING@
|
||||||
|
PACKAGE_TARNAME = @PACKAGE_TARNAME@
|
||||||
|
PACKAGE_URL = @PACKAGE_URL@
|
||||||
|
PACKAGE_VERSION = @PACKAGE_VERSION@
|
||||||
|
PATH_SEPARATOR = @PATH_SEPARATOR@
|
||||||
|
RANLIB = @RANLIB@
|
||||||
|
SED = @SED@
|
||||||
|
SET_MAKE = @SET_MAKE@
|
||||||
|
SHELL = @SHELL@
|
||||||
|
STRIP = @STRIP@
|
||||||
|
VERSION = @VERSION@
|
||||||
|
_ACJNI_JAVAC = @_ACJNI_JAVAC@
|
||||||
|
abs_builddir = @abs_builddir@
|
||||||
|
abs_srcdir = @abs_srcdir@
|
||||||
|
abs_top_builddir = @abs_top_builddir@
|
||||||
|
abs_top_srcdir = @abs_top_srcdir@
|
||||||
|
ac_ct_AR = @ac_ct_AR@
|
||||||
|
ac_ct_CC = @ac_ct_CC@
|
||||||
|
ac_ct_CXX = @ac_ct_CXX@
|
||||||
|
ac_ct_DUMPBIN = @ac_ct_DUMPBIN@
|
||||||
|
am__include = @am__include@
|
||||||
|
am__leading_dot = @am__leading_dot@
|
||||||
|
am__quote = @am__quote@
|
||||||
|
am__tar = @am__tar@
|
||||||
|
am__untar = @am__untar@
|
||||||
|
bindir = @bindir@
|
||||||
|
build = @build@
|
||||||
|
build_alias = @build_alias@
|
||||||
|
build_cpu = @build_cpu@
|
||||||
|
build_os = @build_os@
|
||||||
|
build_vendor = @build_vendor@
|
||||||
|
builddir = @builddir@
|
||||||
|
datadir = @datadir@
|
||||||
|
datarootdir = @datarootdir@
|
||||||
|
docdir = @docdir@
|
||||||
|
dvidir = @dvidir@
|
||||||
|
exec_prefix = @exec_prefix@
|
||||||
|
host = @host@
|
||||||
|
host_alias = @host_alias@
|
||||||
|
host_cpu = @host_cpu@
|
||||||
|
host_os = @host_os@
|
||||||
|
host_vendor = @host_vendor@
|
||||||
|
htmldir = @htmldir@
|
||||||
|
includedir = @includedir@
|
||||||
|
infodir = @infodir@
|
||||||
|
install_sh = @install_sh@
|
||||||
|
libdir = @libdir@
|
||||||
|
libexecdir = @libexecdir@
|
||||||
|
localedir = @localedir@
|
||||||
|
localstatedir = @localstatedir@
|
||||||
|
mandir = @mandir@
|
||||||
|
mkdir_p = @mkdir_p@
|
||||||
|
oldincludedir = @oldincludedir@
|
||||||
|
pdfdir = @pdfdir@
|
||||||
|
prefix = @prefix@
|
||||||
|
program_transform_name = @program_transform_name@
|
||||||
|
psdir = @psdir@
|
||||||
|
sbindir = @sbindir@
|
||||||
|
sharedstatedir = @sharedstatedir@
|
||||||
|
srcdir = @srcdir@
|
||||||
|
sysconfdir = @sysconfdir@
|
||||||
|
target_alias = @target_alias@
|
||||||
|
top_build_prefix = @top_build_prefix@
|
||||||
|
top_builddir = @top_builddir@
|
||||||
|
top_srcdir = @top_srcdir@
|
||||||
|
AUTOMAKE_OPTIONS = foreign
|
||||||
|
SUBDIRS = src tests $(am__append_1)
|
||||||
|
EXTRA_DIST = COPYRIGHT ffts.pc.in build_iphone.sh build_android.sh
|
||||||
|
ACLOCAL_AMFLAGS = -Im4
|
||||||
|
pkgconfigdir = $(libdir)/pkgconfig
|
||||||
|
pkgconfig_DATA = ffts.pc
|
||||||
|
all: config.h
|
||||||
|
$(MAKE) $(AM_MAKEFLAGS) all-recursive
|
||||||
|
|
||||||
|
.SUFFIXES:
|
||||||
|
am--refresh: Makefile
|
||||||
|
@:
|
||||||
|
$(srcdir)/Makefile.in: $(srcdir)/Makefile.am $(am__configure_deps)
|
||||||
|
@for dep in $?; do \
|
||||||
|
case '$(am__configure_deps)' in \
|
||||||
|
*$$dep*) \
|
||||||
|
echo ' cd $(srcdir) && $(AUTOMAKE) --foreign'; \
|
||||||
|
$(am__cd) $(srcdir) && $(AUTOMAKE) --foreign \
|
||||||
|
&& exit 0; \
|
||||||
|
exit 1;; \
|
||||||
|
esac; \
|
||||||
|
done; \
|
||||||
|
echo ' cd $(top_srcdir) && $(AUTOMAKE) --foreign Makefile'; \
|
||||||
|
$(am__cd) $(top_srcdir) && \
|
||||||
|
$(AUTOMAKE) --foreign Makefile
|
||||||
|
.PRECIOUS: Makefile
|
||||||
|
Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status
|
||||||
|
@case '$?' in \
|
||||||
|
*config.status*) \
|
||||||
|
echo ' $(SHELL) ./config.status'; \
|
||||||
|
$(SHELL) ./config.status;; \
|
||||||
|
*) \
|
||||||
|
echo ' cd $(top_builddir) && $(SHELL) ./config.status $@ $(am__depfiles_maybe)'; \
|
||||||
|
cd $(top_builddir) && $(SHELL) ./config.status $@ $(am__depfiles_maybe);; \
|
||||||
|
esac;
|
||||||
|
|
||||||
|
$(top_builddir)/config.status: $(top_srcdir)/configure $(CONFIG_STATUS_DEPENDENCIES)
|
||||||
|
$(SHELL) ./config.status --recheck
|
||||||
|
|
||||||
|
$(top_srcdir)/configure: $(am__configure_deps)
|
||||||
|
$(am__cd) $(srcdir) && $(AUTOCONF)
|
||||||
|
$(ACLOCAL_M4): $(am__aclocal_m4_deps)
|
||||||
|
$(am__cd) $(srcdir) && $(ACLOCAL) $(ACLOCAL_AMFLAGS)
|
||||||
|
$(am__aclocal_m4_deps):
|
||||||
|
|
||||||
|
config.h: stamp-h1
|
||||||
|
@if test ! -f $@; then rm -f stamp-h1; else :; fi
|
||||||
|
@if test ! -f $@; then $(MAKE) $(AM_MAKEFLAGS) stamp-h1; else :; fi
|
||||||
|
|
||||||
|
stamp-h1: $(srcdir)/config.h.in $(top_builddir)/config.status
|
||||||
|
@rm -f stamp-h1
|
||||||
|
cd $(top_builddir) && $(SHELL) ./config.status config.h
|
||||||
|
$(srcdir)/config.h.in: $(am__configure_deps)
|
||||||
|
($(am__cd) $(top_srcdir) && $(AUTOHEADER))
|
||||||
|
rm -f stamp-h1
|
||||||
|
touch $@
|
||||||
|
|
||||||
|
distclean-hdr:
|
||||||
|
-rm -f config.h stamp-h1
|
||||||
|
ffts.pc: $(top_builddir)/config.status $(srcdir)/ffts.pc.in
|
||||||
|
cd $(top_builddir) && $(SHELL) ./config.status $@
|
||||||
|
|
||||||
|
mostlyclean-libtool:
|
||||||
|
-rm -f *.lo
|
||||||
|
|
||||||
|
clean-libtool:
|
||||||
|
-rm -rf .libs _libs
|
||||||
|
|
||||||
|
distclean-libtool:
|
||||||
|
-rm -f libtool config.lt
|
||||||
|
install-pkgconfigDATA: $(pkgconfig_DATA)
|
||||||
|
@$(NORMAL_INSTALL)
|
||||||
|
@list='$(pkgconfig_DATA)'; test -n "$(pkgconfigdir)" || list=; \
|
||||||
|
if test -n "$$list"; then \
|
||||||
|
echo " $(MKDIR_P) '$(DESTDIR)$(pkgconfigdir)'"; \
|
||||||
|
$(MKDIR_P) "$(DESTDIR)$(pkgconfigdir)" || exit 1; \
|
||||||
|
fi; \
|
||||||
|
for p in $$list; do \
|
||||||
|
if test -f "$$p"; then d=; else d="$(srcdir)/"; fi; \
|
||||||
|
echo "$$d$$p"; \
|
||||||
|
done | $(am__base_list) | \
|
||||||
|
while read files; do \
|
||||||
|
echo " $(INSTALL_DATA) $$files '$(DESTDIR)$(pkgconfigdir)'"; \
|
||||||
|
$(INSTALL_DATA) $$files "$(DESTDIR)$(pkgconfigdir)" || exit $$?; \
|
||||||
|
done
|
||||||
|
|
||||||
|
uninstall-pkgconfigDATA:
|
||||||
|
@$(NORMAL_UNINSTALL)
|
||||||
|
@list='$(pkgconfig_DATA)'; test -n "$(pkgconfigdir)" || list=; \
|
||||||
|
files=`for p in $$list; do echo $$p; done | sed -e 's|^.*/||'`; \
|
||||||
|
dir='$(DESTDIR)$(pkgconfigdir)'; $(am__uninstall_files_from_dir)
|
||||||
|
|
||||||
|
# This directory's subdirectories are mostly independent; you can cd
|
||||||
|
# into them and run 'make' without going through this Makefile.
|
||||||
|
# To change the values of 'make' variables: instead of editing Makefiles,
|
||||||
|
# (1) if the variable is set in 'config.status', edit 'config.status'
|
||||||
|
# (which will cause the Makefiles to be regenerated when you run 'make');
|
||||||
|
# (2) otherwise, pass the desired values on the 'make' command line.
|
||||||
|
$(RECURSIVE_TARGETS) $(RECURSIVE_CLEAN_TARGETS):
|
||||||
|
@fail= failcom='exit 1'; \
|
||||||
|
for f in x $$MAKEFLAGS; do \
|
||||||
|
case $$f in \
|
||||||
|
*=* | --[!k]*);; \
|
||||||
|
*k*) failcom='fail=yes';; \
|
||||||
|
esac; \
|
||||||
|
done; \
|
||||||
|
dot_seen=no; \
|
||||||
|
target=`echo $@ | sed s/-recursive//`; \
|
||||||
|
case "$@" in \
|
||||||
|
distclean-* | maintainer-clean-*) list='$(DIST_SUBDIRS)' ;; \
|
||||||
|
*) list='$(SUBDIRS)' ;; \
|
||||||
|
esac; \
|
||||||
|
for subdir in $$list; do \
|
||||||
|
echo "Making $$target in $$subdir"; \
|
||||||
|
if test "$$subdir" = "."; then \
|
||||||
|
dot_seen=yes; \
|
||||||
|
local_target="$$target-am"; \
|
||||||
|
else \
|
||||||
|
local_target="$$target"; \
|
||||||
|
fi; \
|
||||||
|
($(am__cd) $$subdir && $(MAKE) $(AM_MAKEFLAGS) $$local_target) \
|
||||||
|
|| eval $$failcom; \
|
||||||
|
done; \
|
||||||
|
if test "$$dot_seen" = "no"; then \
|
||||||
|
$(MAKE) $(AM_MAKEFLAGS) "$$target-am" || exit 1; \
|
||||||
|
fi; test -z "$$fail"
|
||||||
|
tags-recursive:
|
||||||
|
list='$(SUBDIRS)'; for subdir in $$list; do \
|
||||||
|
test "$$subdir" = . || ($(am__cd) $$subdir && $(MAKE) $(AM_MAKEFLAGS) tags); \
|
||||||
|
done
|
||||||
|
ctags-recursive:
|
||||||
|
list='$(SUBDIRS)'; for subdir in $$list; do \
|
||||||
|
test "$$subdir" = . || ($(am__cd) $$subdir && $(MAKE) $(AM_MAKEFLAGS) ctags); \
|
||||||
|
done
|
||||||
|
cscopelist-recursive:
|
||||||
|
list='$(SUBDIRS)'; for subdir in $$list; do \
|
||||||
|
test "$$subdir" = . || ($(am__cd) $$subdir && $(MAKE) $(AM_MAKEFLAGS) cscopelist); \
|
||||||
|
done
|
||||||
|
|
||||||
|
ID: $(HEADERS) $(SOURCES) $(LISP) $(TAGS_FILES)
|
||||||
|
list='$(SOURCES) $(HEADERS) $(LISP) $(TAGS_FILES)'; \
|
||||||
|
unique=`for i in $$list; do \
|
||||||
|
if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \
|
||||||
|
done | \
|
||||||
|
$(AWK) '{ files[$$0] = 1; nonempty = 1; } \
|
||||||
|
END { if (nonempty) { for (i in files) print i; }; }'`; \
|
||||||
|
mkid -fID $$unique
|
||||||
|
tags: TAGS
|
||||||
|
|
||||||
|
TAGS: tags-recursive $(HEADERS) $(SOURCES) config.h.in $(TAGS_DEPENDENCIES) \
|
||||||
|
$(TAGS_FILES) $(LISP)
|
||||||
|
set x; \
|
||||||
|
here=`pwd`; \
|
||||||
|
if ($(ETAGS) --etags-include --version) >/dev/null 2>&1; then \
|
||||||
|
include_option=--etags-include; \
|
||||||
|
empty_fix=.; \
|
||||||
|
else \
|
||||||
|
include_option=--include; \
|
||||||
|
empty_fix=; \
|
||||||
|
fi; \
|
||||||
|
list='$(SUBDIRS)'; for subdir in $$list; do \
|
||||||
|
if test "$$subdir" = .; then :; else \
|
||||||
|
test ! -f $$subdir/TAGS || \
|
||||||
|
set "$$@" "$$include_option=$$here/$$subdir/TAGS"; \
|
||||||
|
fi; \
|
||||||
|
done; \
|
||||||
|
list='$(SOURCES) $(HEADERS) config.h.in $(LISP) $(TAGS_FILES)'; \
|
||||||
|
unique=`for i in $$list; do \
|
||||||
|
if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \
|
||||||
|
done | \
|
||||||
|
$(AWK) '{ files[$$0] = 1; nonempty = 1; } \
|
||||||
|
END { if (nonempty) { for (i in files) print i; }; }'`; \
|
||||||
|
shift; \
|
||||||
|
if test -z "$(ETAGS_ARGS)$$*$$unique"; then :; else \
|
||||||
|
test -n "$$unique" || unique=$$empty_fix; \
|
||||||
|
if test $$# -gt 0; then \
|
||||||
|
$(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \
|
||||||
|
"$$@" $$unique; \
|
||||||
|
else \
|
||||||
|
$(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \
|
||||||
|
$$unique; \
|
||||||
|
fi; \
|
||||||
|
fi
|
||||||
|
ctags: CTAGS
|
||||||
|
CTAGS: ctags-recursive $(HEADERS) $(SOURCES) config.h.in $(TAGS_DEPENDENCIES) \
|
||||||
|
$(TAGS_FILES) $(LISP)
|
||||||
|
list='$(SOURCES) $(HEADERS) config.h.in $(LISP) $(TAGS_FILES)'; \
|
||||||
|
unique=`for i in $$list; do \
|
||||||
|
if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \
|
||||||
|
done | \
|
||||||
|
$(AWK) '{ files[$$0] = 1; nonempty = 1; } \
|
||||||
|
END { if (nonempty) { for (i in files) print i; }; }'`; \
|
||||||
|
test -z "$(CTAGS_ARGS)$$unique" \
|
||||||
|
|| $(CTAGS) $(CTAGSFLAGS) $(AM_CTAGSFLAGS) $(CTAGS_ARGS) \
|
||||||
|
$$unique
|
||||||
|
|
||||||
|
GTAGS:
|
||||||
|
here=`$(am__cd) $(top_builddir) && pwd` \
|
||||||
|
&& $(am__cd) $(top_srcdir) \
|
||||||
|
&& gtags -i $(GTAGS_ARGS) "$$here"
|
||||||
|
|
||||||
|
cscope: cscope.files
|
||||||
|
test ! -s cscope.files \
|
||||||
|
|| $(CSCOPE) -b -q $(AM_CSCOPEFLAGS) $(CSCOPEFLAGS) -i cscope.files $(CSCOPE_ARGS)
|
||||||
|
|
||||||
|
clean-cscope:
|
||||||
|
-rm -f cscope.files
|
||||||
|
|
||||||
|
cscope.files: clean-cscope cscopelist-recursive cscopelist
|
||||||
|
|
||||||
|
cscopelist: cscopelist-recursive $(HEADERS) $(SOURCES) $(LISP)
|
||||||
|
list='$(SOURCES) $(HEADERS) $(LISP)'; \
|
||||||
|
case "$(srcdir)" in \
|
||||||
|
[\\/]* | ?:[\\/]*) sdir="$(srcdir)" ;; \
|
||||||
|
*) sdir=$(subdir)/$(srcdir) ;; \
|
||||||
|
esac; \
|
||||||
|
for i in $$list; do \
|
||||||
|
if test -f "$$i"; then \
|
||||||
|
echo "$(subdir)/$$i"; \
|
||||||
|
else \
|
||||||
|
echo "$$sdir/$$i"; \
|
||||||
|
fi; \
|
||||||
|
done >> $(top_builddir)/cscope.files
|
||||||
|
|
||||||
|
distclean-tags:
|
||||||
|
-rm -f TAGS ID GTAGS GRTAGS GSYMS GPATH tags
|
||||||
|
-rm -f cscope.out cscope.in.out cscope.po.out cscope.files
|
||||||
|
|
||||||
|
distdir: $(DISTFILES)
|
||||||
|
$(am__remove_distdir)
|
||||||
|
test -d "$(distdir)" || mkdir "$(distdir)"
|
||||||
|
@srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \
|
||||||
|
topsrcdirstrip=`echo "$(top_srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \
|
||||||
|
list='$(DISTFILES)'; \
|
||||||
|
dist_files=`for file in $$list; do echo $$file; done | \
|
||||||
|
sed -e "s|^$$srcdirstrip/||;t" \
|
||||||
|
-e "s|^$$topsrcdirstrip/|$(top_builddir)/|;t"`; \
|
||||||
|
case $$dist_files in \
|
||||||
|
*/*) $(MKDIR_P) `echo "$$dist_files" | \
|
||||||
|
sed '/\//!d;s|^|$(distdir)/|;s,/[^/]*$$,,' | \
|
||||||
|
sort -u` ;; \
|
||||||
|
esac; \
|
||||||
|
for file in $$dist_files; do \
|
||||||
|
if test -f $$file || test -d $$file; then d=.; else d=$(srcdir); fi; \
|
||||||
|
if test -d $$d/$$file; then \
|
||||||
|
dir=`echo "/$$file" | sed -e 's,/[^/]*$$,,'`; \
|
||||||
|
if test -d "$(distdir)/$$file"; then \
|
||||||
|
find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \
|
||||||
|
fi; \
|
||||||
|
if test -d $(srcdir)/$$file && test $$d != $(srcdir); then \
|
||||||
|
cp -fpR $(srcdir)/$$file "$(distdir)$$dir" || exit 1; \
|
||||||
|
find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \
|
||||||
|
fi; \
|
||||||
|
cp -fpR $$d/$$file "$(distdir)$$dir" || exit 1; \
|
||||||
|
else \
|
||||||
|
test -f "$(distdir)/$$file" \
|
||||||
|
|| cp -p $$d/$$file "$(distdir)/$$file" \
|
||||||
|
|| exit 1; \
|
||||||
|
fi; \
|
||||||
|
done
|
||||||
|
@list='$(DIST_SUBDIRS)'; for subdir in $$list; do \
|
||||||
|
if test "$$subdir" = .; then :; else \
|
||||||
|
$(am__make_dryrun) \
|
||||||
|
|| test -d "$(distdir)/$$subdir" \
|
||||||
|
|| $(MKDIR_P) "$(distdir)/$$subdir" \
|
||||||
|
|| exit 1; \
|
||||||
|
dir1=$$subdir; dir2="$(distdir)/$$subdir"; \
|
||||||
|
$(am__relativize); \
|
||||||
|
new_distdir=$$reldir; \
|
||||||
|
dir1=$$subdir; dir2="$(top_distdir)"; \
|
||||||
|
$(am__relativize); \
|
||||||
|
new_top_distdir=$$reldir; \
|
||||||
|
echo " (cd $$subdir && $(MAKE) $(AM_MAKEFLAGS) top_distdir="$$new_top_distdir" distdir="$$new_distdir" \\"; \
|
||||||
|
echo " am__remove_distdir=: am__skip_length_check=: am__skip_mode_fix=: distdir)"; \
|
||||||
|
($(am__cd) $$subdir && \
|
||||||
|
$(MAKE) $(AM_MAKEFLAGS) \
|
||||||
|
top_distdir="$$new_top_distdir" \
|
||||||
|
distdir="$$new_distdir" \
|
||||||
|
am__remove_distdir=: \
|
||||||
|
am__skip_length_check=: \
|
||||||
|
am__skip_mode_fix=: \
|
||||||
|
distdir) \
|
||||||
|
|| exit 1; \
|
||||||
|
fi; \
|
||||||
|
done
|
||||||
|
-test -n "$(am__skip_mode_fix)" \
|
||||||
|
|| find "$(distdir)" -type d ! -perm -755 \
|
||||||
|
-exec chmod u+rwx,go+rx {} \; -o \
|
||||||
|
! -type d ! -perm -444 -links 1 -exec chmod a+r {} \; -o \
|
||||||
|
! -type d ! -perm -400 -exec chmod a+r {} \; -o \
|
||||||
|
! -type d ! -perm -444 -exec $(install_sh) -c -m a+r {} {} \; \
|
||||||
|
|| chmod -R a+r "$(distdir)"
|
||||||
|
dist-gzip: distdir
|
||||||
|
tardir=$(distdir) && $(am__tar) | GZIP=$(GZIP_ENV) gzip -c >$(distdir).tar.gz
|
||||||
|
$(am__post_remove_distdir)
|
||||||
|
|
||||||
|
dist-bzip2: distdir
|
||||||
|
tardir=$(distdir) && $(am__tar) | BZIP2=$${BZIP2--9} bzip2 -c >$(distdir).tar.bz2
|
||||||
|
$(am__post_remove_distdir)
|
||||||
|
|
||||||
|
dist-lzip: distdir
|
||||||
|
tardir=$(distdir) && $(am__tar) | lzip -c $${LZIP_OPT--9} >$(distdir).tar.lz
|
||||||
|
$(am__post_remove_distdir)
|
||||||
|
|
||||||
|
dist-xz: distdir
|
||||||
|
tardir=$(distdir) && $(am__tar) | XZ_OPT=$${XZ_OPT--e} xz -c >$(distdir).tar.xz
|
||||||
|
$(am__post_remove_distdir)
|
||||||
|
|
||||||
|
dist-tarZ: distdir
|
||||||
|
tardir=$(distdir) && $(am__tar) | compress -c >$(distdir).tar.Z
|
||||||
|
$(am__post_remove_distdir)
|
||||||
|
|
||||||
|
dist-shar: distdir
|
||||||
|
shar $(distdir) | GZIP=$(GZIP_ENV) gzip -c >$(distdir).shar.gz
|
||||||
|
$(am__post_remove_distdir)
|
||||||
|
|
||||||
|
dist-zip: distdir
|
||||||
|
-rm -f $(distdir).zip
|
||||||
|
zip -rq $(distdir).zip $(distdir)
|
||||||
|
$(am__post_remove_distdir)
|
||||||
|
|
||||||
|
dist dist-all:
|
||||||
|
$(MAKE) $(AM_MAKEFLAGS) $(DIST_TARGETS) am__post_remove_distdir='@:'
|
||||||
|
$(am__post_remove_distdir)
|
||||||
|
|
||||||
|
# This target untars the dist file and tries a VPATH configuration. Then
|
||||||
|
# it guarantees that the distribution is self-contained by making another
|
||||||
|
# tarfile.
|
||||||
|
distcheck: dist
|
||||||
|
case '$(DIST_ARCHIVES)' in \
|
||||||
|
*.tar.gz*) \
|
||||||
|
GZIP=$(GZIP_ENV) gzip -dc $(distdir).tar.gz | $(am__untar) ;;\
|
||||||
|
*.tar.bz2*) \
|
||||||
|
bzip2 -dc $(distdir).tar.bz2 | $(am__untar) ;;\
|
||||||
|
*.tar.lz*) \
|
||||||
|
lzip -dc $(distdir).tar.lz | $(am__untar) ;;\
|
||||||
|
*.tar.xz*) \
|
||||||
|
xz -dc $(distdir).tar.xz | $(am__untar) ;;\
|
||||||
|
*.tar.Z*) \
|
||||||
|
uncompress -c $(distdir).tar.Z | $(am__untar) ;;\
|
||||||
|
*.shar.gz*) \
|
||||||
|
GZIP=$(GZIP_ENV) gzip -dc $(distdir).shar.gz | unshar ;;\
|
||||||
|
*.zip*) \
|
||||||
|
unzip $(distdir).zip ;;\
|
||||||
|
esac
|
||||||
|
chmod -R a-w $(distdir)
|
||||||
|
chmod u+w $(distdir)
|
||||||
|
mkdir $(distdir)/_build $(distdir)/_inst
|
||||||
|
chmod a-w $(distdir)
|
||||||
|
test -d $(distdir)/_build || exit 0; \
|
||||||
|
dc_install_base=`$(am__cd) $(distdir)/_inst && pwd | sed -e 's,^[^:\\/]:[\\/],/,'` \
|
||||||
|
&& dc_destdir="$${TMPDIR-/tmp}/am-dc-$$$$/" \
|
||||||
|
&& am__cwd=`pwd` \
|
||||||
|
&& $(am__cd) $(distdir)/_build \
|
||||||
|
&& ../configure --srcdir=.. --prefix="$$dc_install_base" \
|
||||||
|
$(AM_DISTCHECK_CONFIGURE_FLAGS) \
|
||||||
|
$(DISTCHECK_CONFIGURE_FLAGS) \
|
||||||
|
&& $(MAKE) $(AM_MAKEFLAGS) \
|
||||||
|
&& $(MAKE) $(AM_MAKEFLAGS) dvi \
|
||||||
|
&& $(MAKE) $(AM_MAKEFLAGS) check \
|
||||||
|
&& $(MAKE) $(AM_MAKEFLAGS) install \
|
||||||
|
&& $(MAKE) $(AM_MAKEFLAGS) installcheck \
|
||||||
|
&& $(MAKE) $(AM_MAKEFLAGS) uninstall \
|
||||||
|
&& $(MAKE) $(AM_MAKEFLAGS) distuninstallcheck_dir="$$dc_install_base" \
|
||||||
|
distuninstallcheck \
|
||||||
|
&& chmod -R a-w "$$dc_install_base" \
|
||||||
|
&& ({ \
|
||||||
|
(cd ../.. && umask 077 && mkdir "$$dc_destdir") \
|
||||||
|
&& $(MAKE) $(AM_MAKEFLAGS) DESTDIR="$$dc_destdir" install \
|
||||||
|
&& $(MAKE) $(AM_MAKEFLAGS) DESTDIR="$$dc_destdir" uninstall \
|
||||||
|
&& $(MAKE) $(AM_MAKEFLAGS) DESTDIR="$$dc_destdir" \
|
||||||
|
distuninstallcheck_dir="$$dc_destdir" distuninstallcheck; \
|
||||||
|
} || { rm -rf "$$dc_destdir"; exit 1; }) \
|
||||||
|
&& rm -rf "$$dc_destdir" \
|
||||||
|
&& $(MAKE) $(AM_MAKEFLAGS) dist \
|
||||||
|
&& rm -rf $(DIST_ARCHIVES) \
|
||||||
|
&& $(MAKE) $(AM_MAKEFLAGS) distcleancheck \
|
||||||
|
&& cd "$$am__cwd" \
|
||||||
|
|| exit 1
|
||||||
|
$(am__post_remove_distdir)
|
||||||
|
@(echo "$(distdir) archives ready for distribution: "; \
|
||||||
|
list='$(DIST_ARCHIVES)'; for i in $$list; do echo $$i; done) | \
|
||||||
|
sed -e 1h -e 1s/./=/g -e 1p -e 1x -e '$$p' -e '$$x'
|
||||||
|
distuninstallcheck:
|
||||||
|
@test -n '$(distuninstallcheck_dir)' || { \
|
||||||
|
echo 'ERROR: trying to run $@ with an empty' \
|
||||||
|
'$$(distuninstallcheck_dir)' >&2; \
|
||||||
|
exit 1; \
|
||||||
|
}; \
|
||||||
|
$(am__cd) '$(distuninstallcheck_dir)' || { \
|
||||||
|
echo 'ERROR: cannot chdir into $(distuninstallcheck_dir)' >&2; \
|
||||||
|
exit 1; \
|
||||||
|
}; \
|
||||||
|
test `$(am__distuninstallcheck_listfiles) | wc -l` -eq 0 \
|
||||||
|
|| { echo "ERROR: files left after uninstall:" ; \
|
||||||
|
if test -n "$(DESTDIR)"; then \
|
||||||
|
echo " (check DESTDIR support)"; \
|
||||||
|
fi ; \
|
||||||
|
$(distuninstallcheck_listfiles) ; \
|
||||||
|
exit 1; } >&2
|
||||||
|
distcleancheck: distclean
|
||||||
|
@if test '$(srcdir)' = . ; then \
|
||||||
|
echo "ERROR: distcleancheck can only run from a VPATH build" ; \
|
||||||
|
exit 1 ; \
|
||||||
|
fi
|
||||||
|
@test `$(distcleancheck_listfiles) | wc -l` -eq 0 \
|
||||||
|
|| { echo "ERROR: files left in build directory after distclean:" ; \
|
||||||
|
$(distcleancheck_listfiles) ; \
|
||||||
|
exit 1; } >&2
|
||||||
|
check-am: all-am
|
||||||
|
check: check-recursive
|
||||||
|
all-am: Makefile $(DATA) config.h
|
||||||
|
installdirs: installdirs-recursive
|
||||||
|
installdirs-am:
|
||||||
|
for dir in "$(DESTDIR)$(pkgconfigdir)"; do \
|
||||||
|
test -z "$$dir" || $(MKDIR_P) "$$dir"; \
|
||||||
|
done
|
||||||
|
install: install-recursive
|
||||||
|
install-exec: install-exec-recursive
|
||||||
|
install-data: install-data-recursive
|
||||||
|
uninstall: uninstall-recursive
|
||||||
|
|
||||||
|
install-am: all-am
|
||||||
|
@$(MAKE) $(AM_MAKEFLAGS) install-exec-am install-data-am
|
||||||
|
|
||||||
|
installcheck: installcheck-recursive
|
||||||
|
install-strip:
|
||||||
|
if test -z '$(STRIP)'; then \
|
||||||
|
$(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
|
||||||
|
install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
|
||||||
|
install; \
|
||||||
|
else \
|
||||||
|
$(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
|
||||||
|
install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
|
||||||
|
"INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'" install; \
|
||||||
|
fi
|
||||||
|
mostlyclean-generic:
|
||||||
|
|
||||||
|
clean-generic:
|
||||||
|
|
||||||
|
distclean-generic:
|
||||||
|
-test -z "$(CONFIG_CLEAN_FILES)" || rm -f $(CONFIG_CLEAN_FILES)
|
||||||
|
-test . = "$(srcdir)" || test -z "$(CONFIG_CLEAN_VPATH_FILES)" || rm -f $(CONFIG_CLEAN_VPATH_FILES)
|
||||||
|
|
||||||
|
maintainer-clean-generic:
|
||||||
|
@echo "This command is intended for maintainers to use"
|
||||||
|
@echo "it deletes files that may require special tools to rebuild."
|
||||||
|
clean: clean-recursive
|
||||||
|
|
||||||
|
clean-am: clean-generic clean-libtool mostlyclean-am
|
||||||
|
|
||||||
|
distclean: distclean-recursive
|
||||||
|
-rm -f $(am__CONFIG_DISTCLEAN_FILES)
|
||||||
|
-rm -f Makefile
|
||||||
|
distclean-am: clean-am distclean-generic distclean-hdr \
|
||||||
|
distclean-libtool distclean-tags
|
||||||
|
|
||||||
|
dvi: dvi-recursive
|
||||||
|
|
||||||
|
dvi-am:
|
||||||
|
|
||||||
|
html: html-recursive
|
||||||
|
|
||||||
|
html-am:
|
||||||
|
|
||||||
|
info: info-recursive
|
||||||
|
|
||||||
|
info-am:
|
||||||
|
|
||||||
|
install-data-am: install-pkgconfigDATA
|
||||||
|
|
||||||
|
install-dvi: install-dvi-recursive
|
||||||
|
|
||||||
|
install-dvi-am:
|
||||||
|
|
||||||
|
install-exec-am:
|
||||||
|
|
||||||
|
install-html: install-html-recursive
|
||||||
|
|
||||||
|
install-html-am:
|
||||||
|
|
||||||
|
install-info: install-info-recursive
|
||||||
|
|
||||||
|
install-info-am:
|
||||||
|
|
||||||
|
install-man:
|
||||||
|
|
||||||
|
install-pdf: install-pdf-recursive
|
||||||
|
|
||||||
|
install-pdf-am:
|
||||||
|
|
||||||
|
install-ps: install-ps-recursive
|
||||||
|
|
||||||
|
install-ps-am:
|
||||||
|
|
||||||
|
installcheck-am:
|
||||||
|
|
||||||
|
maintainer-clean: maintainer-clean-recursive
|
||||||
|
-rm -f $(am__CONFIG_DISTCLEAN_FILES)
|
||||||
|
-rm -rf $(top_srcdir)/autom4te.cache
|
||||||
|
-rm -f Makefile
|
||||||
|
maintainer-clean-am: distclean-am maintainer-clean-generic
|
||||||
|
|
||||||
|
mostlyclean: mostlyclean-recursive
|
||||||
|
|
||||||
|
mostlyclean-am: mostlyclean-generic mostlyclean-libtool
|
||||||
|
|
||||||
|
pdf: pdf-recursive
|
||||||
|
|
||||||
|
pdf-am:
|
||||||
|
|
||||||
|
ps: ps-recursive
|
||||||
|
|
||||||
|
ps-am:
|
||||||
|
|
||||||
|
uninstall-am: uninstall-pkgconfigDATA
|
||||||
|
|
||||||
|
.MAKE: $(RECURSIVE_CLEAN_TARGETS) $(RECURSIVE_TARGETS) all \
|
||||||
|
cscopelist-recursive ctags-recursive install-am install-strip \
|
||||||
|
tags-recursive
|
||||||
|
|
||||||
|
.PHONY: $(RECURSIVE_CLEAN_TARGETS) $(RECURSIVE_TARGETS) CTAGS GTAGS \
|
||||||
|
all all-am am--refresh check check-am clean clean-cscope \
|
||||||
|
clean-generic clean-libtool cscope cscopelist \
|
||||||
|
cscopelist-recursive ctags ctags-recursive dist dist-all \
|
||||||
|
dist-bzip2 dist-gzip dist-lzip dist-shar dist-tarZ dist-xz \
|
||||||
|
dist-zip distcheck distclean distclean-generic distclean-hdr \
|
||||||
|
distclean-libtool distclean-tags distcleancheck distdir \
|
||||||
|
distuninstallcheck dvi dvi-am html html-am info info-am \
|
||||||
|
install install-am install-data install-data-am install-dvi \
|
||||||
|
install-dvi-am install-exec install-exec-am install-html \
|
||||||
|
install-html-am install-info install-info-am install-man \
|
||||||
|
install-pdf install-pdf-am install-pkgconfigDATA install-ps \
|
||||||
|
install-ps-am install-strip installcheck installcheck-am \
|
||||||
|
installdirs installdirs-am maintainer-clean \
|
||||||
|
maintainer-clean-generic mostlyclean mostlyclean-generic \
|
||||||
|
mostlyclean-libtool pdf pdf-am ps ps-am tags tags-recursive \
|
||||||
|
uninstall uninstall-am uninstall-pkgconfigDATA
|
||||||
|
|
||||||
|
|
||||||
|
# Tell versions [3.59,3.63) of GNU make to not export all variables.
|
||||||
|
# Otherwise a system limit (for SysV at least) may be exceeded.
|
||||||
|
.NOEXPORT:
|
@ -0,0 +1,27 @@
|
|||||||
|
FFTS -- The Fastest Fourier Transform in the South
|
||||||
|
by Anthony Blake <anthonix@me.com>
|
||||||
|
|
||||||
|
To build for Android, edit and run build_android.sh
|
||||||
|
|
||||||
|
To build for iOS, edit and run build_iphone.sh
|
||||||
|
|
||||||
|
To build for Linux or OS X on x86, run
|
||||||
|
./configure --enable-sse --enable-single --prefix=/usr/local
|
||||||
|
make
|
||||||
|
make install
|
||||||
|
|
||||||
|
FFTS dynamically generates code at runtime. This can be disabled with
|
||||||
|
--disable-dynamic-code
|
||||||
|
|
||||||
|
For JNI targets: --enable-jni will build the jni stuff automatically for
|
||||||
|
the host target, and --enable-shared must also be added manually for it to
|
||||||
|
work.
|
||||||
|
|
||||||
|
If you like FFTS, please show your support by sending a postcard to:
|
||||||
|
|
||||||
|
Anthony Blake
|
||||||
|
Department of Computer Science
|
||||||
|
The University of Waikato
|
||||||
|
Private Bag 3105
|
||||||
|
Hamilton 3240
|
||||||
|
NEW ZEALAND
|
File diff suppressed because it is too large
Load Diff
@ -0,0 +1,80 @@
|
|||||||
|
#!/bin/sh
|
||||||
|
# Compiles ffts for Android
|
||||||
|
# Make sure you have NDK_ROOT defined in .bashrc or .bash_profile
|
||||||
|
# Modify INSTALL_DIR to suit your situation
|
||||||
|
|
||||||
|
INSTALL_DIR="`pwd`/java/android/bin"
|
||||||
|
|
||||||
|
PLATFORM=android-8
|
||||||
|
TOOL="4.6"
|
||||||
|
|
||||||
|
case $(uname -s) in
|
||||||
|
Darwin)
|
||||||
|
CONFBUILD=i386-apple-darwin`uname -r`
|
||||||
|
HOSTPLAT=darwin-x86
|
||||||
|
;;
|
||||||
|
Linux)
|
||||||
|
CONFBUILD=x86-unknown-linux
|
||||||
|
HOSTPLAT=linux-`uname -m`
|
||||||
|
;;
|
||||||
|
*) echo $0: Unknown platform; exit
|
||||||
|
esac
|
||||||
|
|
||||||
|
case arm in
|
||||||
|
arm)
|
||||||
|
TARGPLAT=arm-linux-androideabi
|
||||||
|
ARCH=arm
|
||||||
|
CONFTARG=arm-eabi
|
||||||
|
;;
|
||||||
|
x86)
|
||||||
|
TARGPLAT=x86
|
||||||
|
ARCH=x86
|
||||||
|
CONFTARG=x86
|
||||||
|
;;
|
||||||
|
mips)
|
||||||
|
## probably wrong
|
||||||
|
TARGPLAT=mipsel-linux-android
|
||||||
|
ARCH=mips
|
||||||
|
CONFTARG=mips
|
||||||
|
;;
|
||||||
|
*) echo $0: Unknown target; exit
|
||||||
|
esac
|
||||||
|
|
||||||
|
: ${NDK_ROOT:?}
|
||||||
|
|
||||||
|
echo "Using: $NDK_ROOT/toolchains/${TARGPLAT}-${TOOL}/prebuilt/${HOSTPLAT}/bin"
|
||||||
|
|
||||||
|
export PATH="$NDK_ROOT/toolchains/${TARGPLAT}-${TOOL}/prebuilt/${HOSTPLAT}/bin/:$PATH"
|
||||||
|
export SYS_ROOT="$NDK_ROOT/platforms/${PLATFORM}/arch-${ARCH}/"
|
||||||
|
export CC="${TARGPLAT}-gcc --sysroot=$SYS_ROOT"
|
||||||
|
export LD="${TARGPLAT}-ld"
|
||||||
|
export AR="${TARGPLAT}-ar"
|
||||||
|
export RANLIB="${TARGPLAT}-ranlib"
|
||||||
|
export STRIP="${TARGPLAT}-strip"
|
||||||
|
export CFLAGS="-Os"
|
||||||
|
|
||||||
|
mkdir -p $INSTALL_DIR
|
||||||
|
./configure --enable-neon --build=${CONFBUILD} --host=${CONFTARG} --prefix=$INSTALL_DIR LIBS="-lc -lgcc"
|
||||||
|
|
||||||
|
make clean
|
||||||
|
make
|
||||||
|
make install
|
||||||
|
|
||||||
|
if [ -z "$ANDROID_HOME" ] ; then
|
||||||
|
echo ""
|
||||||
|
echo " No ANDROID_HOME defined"
|
||||||
|
echo " Android JNI interfaces will not be built"
|
||||||
|
echo
|
||||||
|
else
|
||||||
|
echo
|
||||||
|
echo "Using android_home ${ANDROID_HOME}"
|
||||||
|
echo
|
||||||
|
( cd java/android ; ${ANDROID_HOME}/tools/android update lib-project -p . ) || exit 1
|
||||||
|
( cd java/android/jni ; ${NDK_ROOT}/ndk-build V=1 ) || exit 1
|
||||||
|
( cd java/android ; ant release ) || exit 1
|
||||||
|
echo
|
||||||
|
echo "Android library project location:"
|
||||||
|
echo " `pwd`/java/android"
|
||||||
|
echo
|
||||||
|
fi
|
||||||
|
exit 0
|
@ -0,0 +1,22 @@
|
|||||||
|
#/bin/sh
|
||||||
|
# Compiles ffts for iOS
|
||||||
|
# Modify INSTALL_DIR, SDKVER and DEVROOT to suit your situation
|
||||||
|
|
||||||
|
INSTALL_DIR="`pwd`/build"
|
||||||
|
|
||||||
|
export SDKVER="6.1"
|
||||||
|
export DEVROOT="/Applications/Xcode.app/Contents/Developer/Platforms/iPhoneOS.platform/Developer"
|
||||||
|
export SDKROOT="$DEVROOT/SDKs/iPhoneOS$SDKVER.sdk"
|
||||||
|
export CFLAGS="-O3 -Wreturn-type -Wparentheses -Wswitch -Wno-unused-parameter -Wno-unused-variable -Wunused-value -Wno-shorten-64-to-32 -Wno-trigraphs -fpascal-strings -miphoneos-version-min=5.0 -mcpu=cortex-a9 -arch armv7 -mfpu=neon -pipe -isysroot $SDKROOT -isystem $SDKROOT/usr/include -isystem $DEVROOT/usr/include -mno-thumb -no-integrated-as"
|
||||||
|
export AR="$DEVROOT/usr/bin/ar"
|
||||||
|
export CC="clang"
|
||||||
|
|
||||||
|
|
||||||
|
mkdir -p $INSTALL_DIR
|
||||||
|
./configure --enable-neon --build=i386-apple-darwin`uname -r` --host=arm-eabi --prefix=$INSTALL_DIR
|
||||||
|
|
||||||
|
make clean
|
||||||
|
make
|
||||||
|
make install
|
||||||
|
|
||||||
|
exit 0
|
File diff suppressed because it is too large
Load Diff
@ -0,0 +1,148 @@
|
|||||||
|
/* config.h.in. Generated from configure.ac by autoheader. */
|
||||||
|
|
||||||
|
/* Define to disable dynamic code generation. */
|
||||||
|
#undef DYNAMIC_DISABLED
|
||||||
|
|
||||||
|
/* JNI being built. */
|
||||||
|
#undef ENABLE_JNI
|
||||||
|
|
||||||
|
/* Define to FFT in single precision. */
|
||||||
|
#undef FFTS_PREC_SINGLE
|
||||||
|
|
||||||
|
/* Set ARM float abi. */
|
||||||
|
#undef FLOAT_ABI
|
||||||
|
|
||||||
|
/* Define to 1 if you have the declaration of `memalign', and to 0 if you
|
||||||
|
don't. */
|
||||||
|
#undef HAVE_DECL_MEMALIGN
|
||||||
|
|
||||||
|
/* Define to 1 if you have the declaration of `posix_memalign', and to 0 if
|
||||||
|
you don't. */
|
||||||
|
#undef HAVE_DECL_POSIX_MEMALIGN
|
||||||
|
|
||||||
|
/* Define to 1 if you have the <dlfcn.h> header file. */
|
||||||
|
#undef HAVE_DLFCN_H
|
||||||
|
|
||||||
|
/* Define to 1 if you have the `gettimeofday' function. */
|
||||||
|
#undef HAVE_GETTIMEOFDAY
|
||||||
|
|
||||||
|
/* Define to 1 if you have the <inttypes.h> header file. */
|
||||||
|
#undef HAVE_INTTYPES_H
|
||||||
|
|
||||||
|
/* Define to 1 if you have the `m' library (-lm). */
|
||||||
|
#undef HAVE_LIBM
|
||||||
|
|
||||||
|
/* Define to 1 if you have the <memory.h> header file. */
|
||||||
|
#undef HAVE_MEMORY_H
|
||||||
|
|
||||||
|
/* Define to FFT with ARM NEON. */
|
||||||
|
#undef HAVE_NEON
|
||||||
|
|
||||||
|
/* Define to 1 if you have the `pow' function. */
|
||||||
|
#undef HAVE_POW
|
||||||
|
|
||||||
|
/* Define to FFT with SSE. */
|
||||||
|
#undef HAVE_SSE
|
||||||
|
|
||||||
|
/* Define to 1 if stdbool.h conforms to C99. */
|
||||||
|
#undef HAVE_STDBOOL_H
|
||||||
|
|
||||||
|
/* Define to 1 if you have the <stdint.h> header file. */
|
||||||
|
#undef HAVE_STDINT_H
|
||||||
|
|
||||||
|
/* Define to 1 if you have the <stdlib.h> header file. */
|
||||||
|
#undef HAVE_STDLIB_H
|
||||||
|
|
||||||
|
/* Define to 1 if you have the <strings.h> header file. */
|
||||||
|
#undef HAVE_STRINGS_H
|
||||||
|
|
||||||
|
/* Define to 1 if you have the <string.h> header file. */
|
||||||
|
#undef HAVE_STRING_H
|
||||||
|
|
||||||
|
/* Define to 1 if you have the <sys/socket.h> header file. */
|
||||||
|
#undef HAVE_SYS_SOCKET_H
|
||||||
|
|
||||||
|
/* Define to 1 if you have the <sys/stat.h> header file. */
|
||||||
|
#undef HAVE_SYS_STAT_H
|
||||||
|
|
||||||
|
/* Define to 1 if you have the <sys/time.h> header file. */
|
||||||
|
#undef HAVE_SYS_TIME_H
|
||||||
|
|
||||||
|
/* Define to 1 if you have the <sys/types.h> header file. */
|
||||||
|
#undef HAVE_SYS_TYPES_H
|
||||||
|
|
||||||
|
/* Define to 1 if you have the <unistd.h> header file. */
|
||||||
|
#undef HAVE_UNISTD_H
|
||||||
|
|
||||||
|
/* Define to FFT with ARM VFP. */
|
||||||
|
#undef HAVE_VFP
|
||||||
|
|
||||||
|
/* Define to 1 if the system has the type `_Bool'. */
|
||||||
|
#undef HAVE__BOOL
|
||||||
|
|
||||||
|
/* Define to the sub-directory in which libtool stores uninstalled libraries.
|
||||||
|
*/
|
||||||
|
#undef LT_OBJDIR
|
||||||
|
|
||||||
|
/* Name of package */
|
||||||
|
#undef PACKAGE
|
||||||
|
|
||||||
|
/* Define to the address where bug reports for this package should be sent. */
|
||||||
|
#undef PACKAGE_BUGREPORT
|
||||||
|
|
||||||
|
/* Define to the full name of this package. */
|
||||||
|
#undef PACKAGE_NAME
|
||||||
|
|
||||||
|
/* Define to the full name and version of this package. */
|
||||||
|
#undef PACKAGE_STRING
|
||||||
|
|
||||||
|
/* Define to the one symbol short name of this package. */
|
||||||
|
#undef PACKAGE_TARNAME
|
||||||
|
|
||||||
|
/* Define to the home page for this package. */
|
||||||
|
#undef PACKAGE_URL
|
||||||
|
|
||||||
|
/* Define to the version of this package. */
|
||||||
|
#undef PACKAGE_VERSION
|
||||||
|
|
||||||
|
/* Define to 1 if you have the ANSI C header files. */
|
||||||
|
#undef STDC_HEADERS
|
||||||
|
|
||||||
|
/* Version number of package */
|
||||||
|
#undef VERSION
|
||||||
|
|
||||||
|
/* Define for Solaris 2.5.1 so the uint64_t typedef from <sys/synch.h>,
|
||||||
|
<pthread.h>, or <semaphore.h> is not used. If the typedef were allowed, the
|
||||||
|
#define below would cause a syntax error. */
|
||||||
|
#undef _UINT64_T
|
||||||
|
|
||||||
|
/* Define to `__inline__' or `__inline' if that's what the C compiler
|
||||||
|
calls it, or to nothing if 'inline' is not supported under any name. */
|
||||||
|
#ifndef __cplusplus
|
||||||
|
#undef inline
|
||||||
|
#endif
|
||||||
|
|
||||||
|
/* Define to the type of a signed integer type of width exactly 32 bits if
|
||||||
|
such a type exists and the standard includes do not define it. */
|
||||||
|
#undef int32_t
|
||||||
|
|
||||||
|
/* Define to the equivalent of the C99 'restrict' keyword, or to
|
||||||
|
nothing if this is not supported. Do not define if restrict is
|
||||||
|
supported directly. */
|
||||||
|
#undef restrict
|
||||||
|
/* Work around a bug in Sun C++: it does not support _Restrict or
|
||||||
|
__restrict__, even though the corresponding Sun C compiler ends up with
|
||||||
|
"#define restrict _Restrict" or "#define restrict __restrict__" in the
|
||||||
|
previous line. Perhaps some future version of Sun C++ will work with
|
||||||
|
restrict; if so, hopefully it defines __RESTRICT like Sun C does. */
|
||||||
|
#if defined __SUNPRO_CC && !defined __RESTRICT
|
||||||
|
# define _Restrict
|
||||||
|
# define __restrict__
|
||||||
|
#endif
|
||||||
|
|
||||||
|
/* Define to `unsigned int' if <sys/types.h> does not define. */
|
||||||
|
#undef size_t
|
||||||
|
|
||||||
|
/* Define to the type of an unsigned integer type of width exactly 64 bits if
|
||||||
|
such a type exists and the standard includes do not define it. */
|
||||||
|
#undef uint64_t
|
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
@ -0,0 +1,137 @@
|
|||||||
|
# -*- Autoconf -*-
|
||||||
|
# Process this file with autoconf to produce a configure script.
|
||||||
|
|
||||||
|
AC_PREREQ([2.68])
|
||||||
|
AC_INIT(ffts, 0.7, amb@anthonix.com)
|
||||||
|
AM_INIT_AUTOMAKE(ffts, 0.7)
|
||||||
|
|
||||||
|
AC_CONFIG_MACRO_DIR([m4])
|
||||||
|
|
||||||
|
# AC_CONFIG_SRCDIR([include/common.h])
|
||||||
|
AC_CONFIG_HEADERS([config.h])
|
||||||
|
|
||||||
|
AC_CANONICAL_HOST
|
||||||
|
|
||||||
|
# Checks for programs.
|
||||||
|
AC_PROG_CXX
|
||||||
|
AC_PROG_CC
|
||||||
|
#AX_COMPILER_VENDOR
|
||||||
|
LT_INIT([disable-shared])
|
||||||
|
AM_PROG_AS
|
||||||
|
#CXX="clang++"
|
||||||
|
#CXXFLAGS="$CXXFLAGS -stdlib=libc++"
|
||||||
|
|
||||||
|
#SFFT_AR="/usr/bin/ar"
|
||||||
|
#SFFT_CFLAGS="$CFLAGS"
|
||||||
|
#SFFT_CC="$CC"
|
||||||
|
AC_ARG_ENABLE(dynamic-code, [AC_HELP_STRING([--enable-dynamic-code],[dynamically generate code])], sfft_dynamic=$enableval, sfft_dynamic=yes)
|
||||||
|
if test "$sfft_dynamic" = "no"; then
|
||||||
|
AC_DEFINE(DYNAMIC_DISABLED,1,[Define to disable dynamic code generation.])
|
||||||
|
fi
|
||||||
|
AM_CONDITIONAL(DYNAMIC_DISABLED, test "$sfft_dynamic" = "no")
|
||||||
|
|
||||||
|
AC_ARG_ENABLE(single, [AC_HELP_STRING([--enable-single],[compile single-precision library])], sfft_single=$enableval, sfft_single=no)
|
||||||
|
if test "$sfft_single" = "yes"; then
|
||||||
|
AC_DEFINE(FFTS_PREC_SINGLE,1,[Define to FFT in single precision.])
|
||||||
|
fi
|
||||||
|
if test "$sfft_single" = "no"; then
|
||||||
|
AC_DEFINE(FFTS_PREC_SINGLE,0,[Define to FFT in single precision.])
|
||||||
|
fi
|
||||||
|
|
||||||
|
AC_ARG_ENABLE(sse, [AC_HELP_STRING([--enable-sse],[enable SSE extensions])], have_sse=$enableval, have_sse=no)
|
||||||
|
if test "$have_sse" = "yes"; then
|
||||||
|
SIMD=sse
|
||||||
|
AC_DEFINE(HAVE_SSE,1,[Define to FFT with SSE.])
|
||||||
|
fi
|
||||||
|
AM_CONDITIONAL(HAVE_SSE, test "$have_sse" = "yes")
|
||||||
|
|
||||||
|
AC_ARG_ENABLE(neon, [AC_HELP_STRING([--enable-neon],[enable NEON extensions])], have_neon=$enableval, have_neon=no)
|
||||||
|
if test "$have_neon" = "yes"; then
|
||||||
|
AC_DEFINE(HAVE_NEON,1,[Define to FFT with ARM NEON.])
|
||||||
|
fi
|
||||||
|
AM_CONDITIONAL(HAVE_NEON, test "$have_neon" = "yes")
|
||||||
|
|
||||||
|
AC_ARG_ENABLE(vfp, [AC_HELP_STRING([--enable-vfp],[enable VFP extensions])], have_vfp=$enableval, have_vfp=no)
|
||||||
|
if test "$have_vfp" = "yes"; then
|
||||||
|
AC_DEFINE(HAVE_VFP,1,[Define to FFT with ARM VFP.])
|
||||||
|
fi
|
||||||
|
AM_CONDITIONAL(HAVE_VFP, test "$have_vfp" = "yes")
|
||||||
|
|
||||||
|
AC_ARG_WITH(float-abi, [AS_HELP_STRING([--with-float-abi=ABI],[set float abi for arm, hard or softfp (default is softfp)])],
|
||||||
|
float_abi=$withval, float_abi=softfp)
|
||||||
|
|
||||||
|
AC_ARG_ENABLE(jni, [AC_HELP_STRING([--enable-jni],[enable JNI binding])], have_jni=$enableval, have_jni=no)
|
||||||
|
if test "$have_jni" = "yes"; then
|
||||||
|
# Java stuff
|
||||||
|
AX_JAVA_OPTIONS
|
||||||
|
AC_CHECK_JAVA_HOME
|
||||||
|
AC_CHECK_CLASSPATH
|
||||||
|
AC_PROG_JAVAC
|
||||||
|
# blah this whinges about something
|
||||||
|
#AC_PROG_JAVAH
|
||||||
|
AC_PROG_JAR
|
||||||
|
AX_JNI_INCLUDE_DIR
|
||||||
|
for JNI_INCLUDE_DIR in $JNI_INCLUDE_DIRS
|
||||||
|
do
|
||||||
|
JNI_CPPFLAGS="$JNI_CPPFLAGS -I$JNI_INCLUDE_DIR"
|
||||||
|
done
|
||||||
|
AC_SUBST(JNI_CPPFLAGS, [$JNI_CPPFLAGS])
|
||||||
|
|
||||||
|
AC_DEFINE(ENABLE_JNI,1,[JNI being built.])
|
||||||
|
fi
|
||||||
|
AM_CONDITIONAL(ENABLE_JNI, test "$have_jni" = "yes")
|
||||||
|
|
||||||
|
fpu=""
|
||||||
|
AS_IF([test "$have_vfp" = "yes"],[fpu="-mfpu=vfp"],
|
||||||
|
[test "$have_neon" = "yes"],[fpu="-mfpu=neon"],
|
||||||
|
[])
|
||||||
|
|
||||||
|
AC_MSG_NOTICE([host is "${host}"])
|
||||||
|
case "${host}" in
|
||||||
|
arm* )
|
||||||
|
CFLAGS="$CFLAGS -mfloat-abi=${float_abi} ${fpu} -std=c99"
|
||||||
|
CCASFLAGS="$CCASFLAGS -mfloat-abi=${float_abi} ${fpu}"
|
||||||
|
;;
|
||||||
|
*)
|
||||||
|
;;
|
||||||
|
esac
|
||||||
|
|
||||||
|
#if test "$ord_sr" = "no"; then
|
||||||
|
# AC_DEFINE(SFFT_ORD_SR,0,[Define to enable ordinary split radix.])
|
||||||
|
#fi
|
||||||
|
|
||||||
|
# Checks for libraries.
|
||||||
|
AC_CHECK_LIB([m], [cos])
|
||||||
|
AC_CHECK_DECLS([posix_memalign,
|
||||||
|
memalign],,,
|
||||||
|
[#define _XOPEN_SOURCE 600
|
||||||
|
#include <stdlib.h>
|
||||||
|
#include <malloc.h>])
|
||||||
|
|
||||||
|
# Checks for header files.
|
||||||
|
AC_CHECK_HEADERS([stdint.h stdlib.h string.h sys/socket.h sys/time.h unistd.h])
|
||||||
|
|
||||||
|
# Checks for typedefs, structures, and compiler characteristics.
|
||||||
|
AC_HEADER_STDBOOL
|
||||||
|
AC_C_INLINE
|
||||||
|
AC_TYPE_INT32_T
|
||||||
|
AC_C_RESTRICT
|
||||||
|
AC_TYPE_SIZE_T
|
||||||
|
AC_TYPE_UINT64_T
|
||||||
|
AC_PROG_CC_STDC
|
||||||
|
AC_PROG_INSTALL
|
||||||
|
AC_PROG_LN_S
|
||||||
|
AC_PROG_LIBTOOL
|
||||||
|
|
||||||
|
# Checks for library functions.
|
||||||
|
#AC_FUNC_MALLOC
|
||||||
|
AC_CHECK_FUNCS([gettimeofday pow])
|
||||||
|
|
||||||
|
|
||||||
|
AC_CONFIG_FILES([Makefile
|
||||||
|
src/Makefile
|
||||||
|
tests/Makefile
|
||||||
|
ffts.pc
|
||||||
|
java/Makefile
|
||||||
|
])
|
||||||
|
AC_OUTPUT
|
@ -0,0 +1,780 @@
|
|||||||
|
#! /bin/sh
|
||||||
|
# depcomp - compile a program generating dependencies as side-effects
|
||||||
|
|
||||||
|
scriptversion=2012-07-12.20; # UTC
|
||||||
|
|
||||||
|
# Copyright (C) 1999-2012 Free Software Foundation, Inc.
|
||||||
|
|
||||||
|
# This program is free software; you can redistribute it and/or modify
|
||||||
|
# it under the terms of the GNU General Public License as published by
|
||||||
|
# the Free Software Foundation; either version 2, or (at your option)
|
||||||
|
# any later version.
|
||||||
|
|
||||||
|
# This program is distributed in the hope that it will be useful,
|
||||||
|
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
# GNU General Public License for more details.
|
||||||
|
|
||||||
|
# You should have received a copy of the GNU General Public License
|
||||||
|
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||||
|
|
||||||
|
# As a special exception to the GNU General Public License, if you
|
||||||
|
# distribute this file as part of a program that contains a
|
||||||
|
# configuration script generated by Autoconf, you may include it under
|
||||||
|
# the same distribution terms that you use for the rest of that program.
|
||||||
|
|
||||||
|
# Originally written by Alexandre Oliva <oliva@dcc.unicamp.br>.
|
||||||
|
|
||||||
|
case $1 in
|
||||||
|
'')
|
||||||
|
echo "$0: No command. Try '$0 --help' for more information." 1>&2
|
||||||
|
exit 1;
|
||||||
|
;;
|
||||||
|
-h | --h*)
|
||||||
|
cat <<\EOF
|
||||||
|
Usage: depcomp [--help] [--version] PROGRAM [ARGS]
|
||||||
|
|
||||||
|
Run PROGRAMS ARGS to compile a file, generating dependencies
|
||||||
|
as side-effects.
|
||||||
|
|
||||||
|
Environment variables:
|
||||||
|
depmode Dependency tracking mode.
|
||||||
|
source Source file read by 'PROGRAMS ARGS'.
|
||||||
|
object Object file output by 'PROGRAMS ARGS'.
|
||||||
|
DEPDIR directory where to store dependencies.
|
||||||
|
depfile Dependency file to output.
|
||||||
|
tmpdepfile Temporary file to use when outputting dependencies.
|
||||||
|
libtool Whether libtool is used (yes/no).
|
||||||
|
|
||||||
|
Report bugs to <bug-automake@gnu.org>.
|
||||||
|
EOF
|
||||||
|
exit $?
|
||||||
|
;;
|
||||||
|
-v | --v*)
|
||||||
|
echo "depcomp $scriptversion"
|
||||||
|
exit $?
|
||||||
|
;;
|
||||||
|
esac
|
||||||
|
|
||||||
|
# A tabulation character.
|
||||||
|
tab=' '
|
||||||
|
# A newline character.
|
||||||
|
nl='
|
||||||
|
'
|
||||||
|
|
||||||
|
if test -z "$depmode" || test -z "$source" || test -z "$object"; then
|
||||||
|
echo "depcomp: Variables source, object and depmode must be set" 1>&2
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
# Dependencies for sub/bar.o or sub/bar.obj go into sub/.deps/bar.Po.
|
||||||
|
depfile=${depfile-`echo "$object" |
|
||||||
|
sed 's|[^\\/]*$|'${DEPDIR-.deps}'/&|;s|\.\([^.]*\)$|.P\1|;s|Pobj$|Po|'`}
|
||||||
|
tmpdepfile=${tmpdepfile-`echo "$depfile" | sed 's/\.\([^.]*\)$/.T\1/'`}
|
||||||
|
|
||||||
|
rm -f "$tmpdepfile"
|
||||||
|
|
||||||
|
# Some modes work just like other modes, but use different flags. We
|
||||||
|
# parameterize here, but still list the modes in the big case below,
|
||||||
|
# to make depend.m4 easier to write. Note that we *cannot* use a case
|
||||||
|
# here, because this file can only contain one case statement.
|
||||||
|
if test "$depmode" = hp; then
|
||||||
|
# HP compiler uses -M and no extra arg.
|
||||||
|
gccflag=-M
|
||||||
|
depmode=gcc
|
||||||
|
fi
|
||||||
|
|
||||||
|
if test "$depmode" = dashXmstdout; then
|
||||||
|
# This is just like dashmstdout with a different argument.
|
||||||
|
dashmflag=-xM
|
||||||
|
depmode=dashmstdout
|
||||||
|
fi
|
||||||
|
|
||||||
|
cygpath_u="cygpath -u -f -"
|
||||||
|
if test "$depmode" = msvcmsys; then
|
||||||
|
# This is just like msvisualcpp but w/o cygpath translation.
|
||||||
|
# Just convert the backslash-escaped backslashes to single forward
|
||||||
|
# slashes to satisfy depend.m4
|
||||||
|
cygpath_u='sed s,\\\\,/,g'
|
||||||
|
depmode=msvisualcpp
|
||||||
|
fi
|
||||||
|
|
||||||
|
if test "$depmode" = msvc7msys; then
|
||||||
|
# This is just like msvc7 but w/o cygpath translation.
|
||||||
|
# Just convert the backslash-escaped backslashes to single forward
|
||||||
|
# slashes to satisfy depend.m4
|
||||||
|
cygpath_u='sed s,\\\\,/,g'
|
||||||
|
depmode=msvc7
|
||||||
|
fi
|
||||||
|
|
||||||
|
if test "$depmode" = xlc; then
|
||||||
|
# IBM C/C++ Compilers xlc/xlC can output gcc-like dependency informations.
|
||||||
|
gccflag=-qmakedep=gcc,-MF
|
||||||
|
depmode=gcc
|
||||||
|
fi
|
||||||
|
|
||||||
|
case "$depmode" in
|
||||||
|
gcc3)
|
||||||
|
## gcc 3 implements dependency tracking that does exactly what
|
||||||
|
## we want. Yay! Note: for some reason libtool 1.4 doesn't like
|
||||||
|
## it if -MD -MP comes after the -MF stuff. Hmm.
|
||||||
|
## Unfortunately, FreeBSD c89 acceptance of flags depends upon
|
||||||
|
## the command line argument order; so add the flags where they
|
||||||
|
## appear in depend2.am. Note that the slowdown incurred here
|
||||||
|
## affects only configure: in makefiles, %FASTDEP% shortcuts this.
|
||||||
|
for arg
|
||||||
|
do
|
||||||
|
case $arg in
|
||||||
|
-c) set fnord "$@" -MT "$object" -MD -MP -MF "$tmpdepfile" "$arg" ;;
|
||||||
|
*) set fnord "$@" "$arg" ;;
|
||||||
|
esac
|
||||||
|
shift # fnord
|
||||||
|
shift # $arg
|
||||||
|
done
|
||||||
|
"$@"
|
||||||
|
stat=$?
|
||||||
|
if test $stat -eq 0; then :
|
||||||
|
else
|
||||||
|
rm -f "$tmpdepfile"
|
||||||
|
exit $stat
|
||||||
|
fi
|
||||||
|
mv "$tmpdepfile" "$depfile"
|
||||||
|
;;
|
||||||
|
|
||||||
|
gcc)
|
||||||
|
## There are various ways to get dependency output from gcc. Here's
|
||||||
|
## why we pick this rather obscure method:
|
||||||
|
## - Don't want to use -MD because we'd like the dependencies to end
|
||||||
|
## up in a subdir. Having to rename by hand is ugly.
|
||||||
|
## (We might end up doing this anyway to support other compilers.)
|
||||||
|
## - The DEPENDENCIES_OUTPUT environment variable makes gcc act like
|
||||||
|
## -MM, not -M (despite what the docs say).
|
||||||
|
## - Using -M directly means running the compiler twice (even worse
|
||||||
|
## than renaming).
|
||||||
|
if test -z "$gccflag"; then
|
||||||
|
gccflag=-MD,
|
||||||
|
fi
|
||||||
|
"$@" -Wp,"$gccflag$tmpdepfile"
|
||||||
|
stat=$?
|
||||||
|
if test $stat -eq 0; then :
|
||||||
|
else
|
||||||
|
rm -f "$tmpdepfile"
|
||||||
|
exit $stat
|
||||||
|
fi
|
||||||
|
rm -f "$depfile"
|
||||||
|
echo "$object : \\" > "$depfile"
|
||||||
|
alpha=ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz
|
||||||
|
## The second -e expression handles DOS-style file names with drive letters.
|
||||||
|
sed -e 's/^[^:]*: / /' \
|
||||||
|
-e 's/^['$alpha']:\/[^:]*: / /' < "$tmpdepfile" >> "$depfile"
|
||||||
|
## This next piece of magic avoids the "deleted header file" problem.
|
||||||
|
## The problem is that when a header file which appears in a .P file
|
||||||
|
## is deleted, the dependency causes make to die (because there is
|
||||||
|
## typically no way to rebuild the header). We avoid this by adding
|
||||||
|
## dummy dependencies for each header file. Too bad gcc doesn't do
|
||||||
|
## this for us directly.
|
||||||
|
tr ' ' "$nl" < "$tmpdepfile" |
|
||||||
|
## Some versions of gcc put a space before the ':'. On the theory
|
||||||
|
## that the space means something, we add a space to the output as
|
||||||
|
## well. hp depmode also adds that space, but also prefixes the VPATH
|
||||||
|
## to the object. Take care to not repeat it in the output.
|
||||||
|
## Some versions of the HPUX 10.20 sed can't process this invocation
|
||||||
|
## correctly. Breaking it into two sed invocations is a workaround.
|
||||||
|
sed -e 's/^\\$//' -e '/^$/d' -e "s|.*$object$||" -e '/:$/d' \
|
||||||
|
| sed -e 's/$/ :/' >> "$depfile"
|
||||||
|
rm -f "$tmpdepfile"
|
||||||
|
;;
|
||||||
|
|
||||||
|
hp)
|
||||||
|
# This case exists only to let depend.m4 do its work. It works by
|
||||||
|
# looking at the text of this script. This case will never be run,
|
||||||
|
# since it is checked for above.
|
||||||
|
exit 1
|
||||||
|
;;
|
||||||
|
|
||||||
|
sgi)
|
||||||
|
if test "$libtool" = yes; then
|
||||||
|
"$@" "-Wp,-MDupdate,$tmpdepfile"
|
||||||
|
else
|
||||||
|
"$@" -MDupdate "$tmpdepfile"
|
||||||
|
fi
|
||||||
|
stat=$?
|
||||||
|
if test $stat -eq 0; then :
|
||||||
|
else
|
||||||
|
rm -f "$tmpdepfile"
|
||||||
|
exit $stat
|
||||||
|
fi
|
||||||
|
rm -f "$depfile"
|
||||||
|
|
||||||
|
if test -f "$tmpdepfile"; then # yes, the sourcefile depend on other files
|
||||||
|
echo "$object : \\" > "$depfile"
|
||||||
|
|
||||||
|
# Clip off the initial element (the dependent). Don't try to be
|
||||||
|
# clever and replace this with sed code, as IRIX sed won't handle
|
||||||
|
# lines with more than a fixed number of characters (4096 in
|
||||||
|
# IRIX 6.2 sed, 8192 in IRIX 6.5). We also remove comment lines;
|
||||||
|
# the IRIX cc adds comments like '#:fec' to the end of the
|
||||||
|
# dependency line.
|
||||||
|
tr ' ' "$nl" < "$tmpdepfile" \
|
||||||
|
| sed -e 's/^.*\.o://' -e 's/#.*$//' -e '/^$/ d' | \
|
||||||
|
tr "$nl" ' ' >> "$depfile"
|
||||||
|
echo >> "$depfile"
|
||||||
|
|
||||||
|
# The second pass generates a dummy entry for each header file.
|
||||||
|
tr ' ' "$nl" < "$tmpdepfile" \
|
||||||
|
| sed -e 's/^.*\.o://' -e 's/#.*$//' -e '/^$/ d' -e 's/$/:/' \
|
||||||
|
>> "$depfile"
|
||||||
|
else
|
||||||
|
# The sourcefile does not contain any dependencies, so just
|
||||||
|
# store a dummy comment line, to avoid errors with the Makefile
|
||||||
|
# "include basename.Plo" scheme.
|
||||||
|
echo "#dummy" > "$depfile"
|
||||||
|
fi
|
||||||
|
rm -f "$tmpdepfile"
|
||||||
|
;;
|
||||||
|
|
||||||
|
xlc)
|
||||||
|
# This case exists only to let depend.m4 do its work. It works by
|
||||||
|
# looking at the text of this script. This case will never be run,
|
||||||
|
# since it is checked for above.
|
||||||
|
exit 1
|
||||||
|
;;
|
||||||
|
|
||||||
|
aix)
|
||||||
|
# The C for AIX Compiler uses -M and outputs the dependencies
|
||||||
|
# in a .u file. In older versions, this file always lives in the
|
||||||
|
# current directory. Also, the AIX compiler puts '$object:' at the
|
||||||
|
# start of each line; $object doesn't have directory information.
|
||||||
|
# Version 6 uses the directory in both cases.
|
||||||
|
dir=`echo "$object" | sed -e 's|/[^/]*$|/|'`
|
||||||
|
test "x$dir" = "x$object" && dir=
|
||||||
|
base=`echo "$object" | sed -e 's|^.*/||' -e 's/\.o$//' -e 's/\.lo$//'`
|
||||||
|
if test "$libtool" = yes; then
|
||||||
|
tmpdepfile1=$dir$base.u
|
||||||
|
tmpdepfile2=$base.u
|
||||||
|
tmpdepfile3=$dir.libs/$base.u
|
||||||
|
"$@" -Wc,-M
|
||||||
|
else
|
||||||
|
tmpdepfile1=$dir$base.u
|
||||||
|
tmpdepfile2=$dir$base.u
|
||||||
|
tmpdepfile3=$dir$base.u
|
||||||
|
"$@" -M
|
||||||
|
fi
|
||||||
|
stat=$?
|
||||||
|
|
||||||
|
if test $stat -eq 0; then :
|
||||||
|
else
|
||||||
|
rm -f "$tmpdepfile1" "$tmpdepfile2" "$tmpdepfile3"
|
||||||
|
exit $stat
|
||||||
|
fi
|
||||||
|
|
||||||
|
for tmpdepfile in "$tmpdepfile1" "$tmpdepfile2" "$tmpdepfile3"
|
||||||
|
do
|
||||||
|
test -f "$tmpdepfile" && break
|
||||||
|
done
|
||||||
|
if test -f "$tmpdepfile"; then
|
||||||
|
# Each line is of the form 'foo.o: dependent.h'.
|
||||||
|
# Do two passes, one to just change these to
|
||||||
|
# '$object: dependent.h' and one to simply 'dependent.h:'.
|
||||||
|
sed -e "s,^.*\.[a-z]*:,$object:," < "$tmpdepfile" > "$depfile"
|
||||||
|
sed -e 's,^.*\.[a-z]*:['"$tab"' ]*,,' -e 's,$,:,' < "$tmpdepfile" >> "$depfile"
|
||||||
|
else
|
||||||
|
# The sourcefile does not contain any dependencies, so just
|
||||||
|
# store a dummy comment line, to avoid errors with the Makefile
|
||||||
|
# "include basename.Plo" scheme.
|
||||||
|
echo "#dummy" > "$depfile"
|
||||||
|
fi
|
||||||
|
rm -f "$tmpdepfile"
|
||||||
|
;;
|
||||||
|
|
||||||
|
icc)
|
||||||
|
# Intel's C compiler anf tcc (Tiny C Compiler) understand '-MD -MF file'.
|
||||||
|
# However on
|
||||||
|
# $CC -MD -MF foo.d -c -o sub/foo.o sub/foo.c
|
||||||
|
# ICC 7.0 will fill foo.d with something like
|
||||||
|
# foo.o: sub/foo.c
|
||||||
|
# foo.o: sub/foo.h
|
||||||
|
# which is wrong. We want
|
||||||
|
# sub/foo.o: sub/foo.c
|
||||||
|
# sub/foo.o: sub/foo.h
|
||||||
|
# sub/foo.c:
|
||||||
|
# sub/foo.h:
|
||||||
|
# ICC 7.1 will output
|
||||||
|
# foo.o: sub/foo.c sub/foo.h
|
||||||
|
# and will wrap long lines using '\':
|
||||||
|
# foo.o: sub/foo.c ... \
|
||||||
|
# sub/foo.h ... \
|
||||||
|
# ...
|
||||||
|
# tcc 0.9.26 (FIXME still under development at the moment of writing)
|
||||||
|
# will emit a similar output, but also prepend the continuation lines
|
||||||
|
# with horizontal tabulation characters.
|
||||||
|
"$@" -MD -MF "$tmpdepfile"
|
||||||
|
stat=$?
|
||||||
|
if test $stat -eq 0; then :
|
||||||
|
else
|
||||||
|
rm -f "$tmpdepfile"
|
||||||
|
exit $stat
|
||||||
|
fi
|
||||||
|
rm -f "$depfile"
|
||||||
|
# Each line is of the form 'foo.o: dependent.h',
|
||||||
|
# or 'foo.o: dep1.h dep2.h \', or ' dep3.h dep4.h \'.
|
||||||
|
# Do two passes, one to just change these to
|
||||||
|
# '$object: dependent.h' and one to simply 'dependent.h:'.
|
||||||
|
sed -e "s/^[ $tab][ $tab]*/ /" -e "s,^[^:]*:,$object :," \
|
||||||
|
< "$tmpdepfile" > "$depfile"
|
||||||
|
sed '
|
||||||
|
s/[ '"$tab"'][ '"$tab"']*/ /g
|
||||||
|
s/^ *//
|
||||||
|
s/ *\\*$//
|
||||||
|
s/^[^:]*: *//
|
||||||
|
/^$/d
|
||||||
|
/:$/d
|
||||||
|
s/$/ :/
|
||||||
|
' < "$tmpdepfile" >> "$depfile"
|
||||||
|
rm -f "$tmpdepfile"
|
||||||
|
;;
|
||||||
|
|
||||||
|
## The order of this option in the case statement is important, since the
|
||||||
|
## shell code in configure will try each of these formats in the order
|
||||||
|
## listed in this file. A plain '-MD' option would be understood by many
|
||||||
|
## compilers, so we must ensure this comes after the gcc and icc options.
|
||||||
|
pgcc)
|
||||||
|
# Portland's C compiler understands '-MD'.
|
||||||
|
# Will always output deps to 'file.d' where file is the root name of the
|
||||||
|
# source file under compilation, even if file resides in a subdirectory.
|
||||||
|
# The object file name does not affect the name of the '.d' file.
|
||||||
|
# pgcc 10.2 will output
|
||||||
|
# foo.o: sub/foo.c sub/foo.h
|
||||||
|
# and will wrap long lines using '\' :
|
||||||
|
# foo.o: sub/foo.c ... \
|
||||||
|
# sub/foo.h ... \
|
||||||
|
# ...
|
||||||
|
dir=`echo "$object" | sed -e 's|/[^/]*$|/|'`
|
||||||
|
test "x$dir" = "x$object" && dir=
|
||||||
|
# Use the source, not the object, to determine the base name, since
|
||||||
|
# that's sadly what pgcc will do too.
|
||||||
|
base=`echo "$source" | sed -e 's|^.*/||' -e 's/\.[-_a-zA-Z0-9]*$//'`
|
||||||
|
tmpdepfile="$base.d"
|
||||||
|
|
||||||
|
# For projects that build the same source file twice into different object
|
||||||
|
# files, the pgcc approach of using the *source* file root name can cause
|
||||||
|
# problems in parallel builds. Use a locking strategy to avoid stomping on
|
||||||
|
# the same $tmpdepfile.
|
||||||
|
lockdir="$base.d-lock"
|
||||||
|
trap "echo '$0: caught signal, cleaning up...' >&2; rm -rf $lockdir" 1 2 13 15
|
||||||
|
numtries=100
|
||||||
|
i=$numtries
|
||||||
|
while test $i -gt 0 ; do
|
||||||
|
# mkdir is a portable test-and-set.
|
||||||
|
if mkdir $lockdir 2>/dev/null; then
|
||||||
|
# This process acquired the lock.
|
||||||
|
"$@" -MD
|
||||||
|
stat=$?
|
||||||
|
# Release the lock.
|
||||||
|
rm -rf $lockdir
|
||||||
|
break
|
||||||
|
else
|
||||||
|
## the lock is being held by a different process,
|
||||||
|
## wait until the winning process is done or we timeout
|
||||||
|
while test -d $lockdir && test $i -gt 0; do
|
||||||
|
sleep 1
|
||||||
|
i=`expr $i - 1`
|
||||||
|
done
|
||||||
|
fi
|
||||||
|
i=`expr $i - 1`
|
||||||
|
done
|
||||||
|
trap - 1 2 13 15
|
||||||
|
if test $i -le 0; then
|
||||||
|
echo "$0: failed to acquire lock after $numtries attempts" >&2
|
||||||
|
echo "$0: check lockdir '$lockdir'" >&2
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
if test $stat -ne 0; then
|
||||||
|
rm -f "$tmpdepfile"
|
||||||
|
exit $stat
|
||||||
|
fi
|
||||||
|
rm -f "$depfile"
|
||||||
|
# Each line is of the form `foo.o: dependent.h',
|
||||||
|
# or `foo.o: dep1.h dep2.h \', or ` dep3.h dep4.h \'.
|
||||||
|
# Do two passes, one to just change these to
|
||||||
|
# `$object: dependent.h' and one to simply `dependent.h:'.
|
||||||
|
sed "s,^[^:]*:,$object :," < "$tmpdepfile" > "$depfile"
|
||||||
|
# Some versions of the HPUX 10.20 sed can't process this invocation
|
||||||
|
# correctly. Breaking it into two sed invocations is a workaround.
|
||||||
|
sed 's,^[^:]*: \(.*\)$,\1,;s/^\\$//;/^$/d;/:$/d' < "$tmpdepfile" |
|
||||||
|
sed -e 's/$/ :/' >> "$depfile"
|
||||||
|
rm -f "$tmpdepfile"
|
||||||
|
;;
|
||||||
|
|
||||||
|
hp2)
|
||||||
|
# The "hp" stanza above does not work with aCC (C++) and HP's ia64
|
||||||
|
# compilers, which have integrated preprocessors. The correct option
|
||||||
|
# to use with these is +Maked; it writes dependencies to a file named
|
||||||
|
# 'foo.d', which lands next to the object file, wherever that
|
||||||
|
# happens to be.
|
||||||
|
# Much of this is similar to the tru64 case; see comments there.
|
||||||
|
dir=`echo "$object" | sed -e 's|/[^/]*$|/|'`
|
||||||
|
test "x$dir" = "x$object" && dir=
|
||||||
|
base=`echo "$object" | sed -e 's|^.*/||' -e 's/\.o$//' -e 's/\.lo$//'`
|
||||||
|
if test "$libtool" = yes; then
|
||||||
|
tmpdepfile1=$dir$base.d
|
||||||
|
tmpdepfile2=$dir.libs/$base.d
|
||||||
|
"$@" -Wc,+Maked
|
||||||
|
else
|
||||||
|
tmpdepfile1=$dir$base.d
|
||||||
|
tmpdepfile2=$dir$base.d
|
||||||
|
"$@" +Maked
|
||||||
|
fi
|
||||||
|
stat=$?
|
||||||
|
if test $stat -eq 0; then :
|
||||||
|
else
|
||||||
|
rm -f "$tmpdepfile1" "$tmpdepfile2"
|
||||||
|
exit $stat
|
||||||
|
fi
|
||||||
|
|
||||||
|
for tmpdepfile in "$tmpdepfile1" "$tmpdepfile2"
|
||||||
|
do
|
||||||
|
test -f "$tmpdepfile" && break
|
||||||
|
done
|
||||||
|
if test -f "$tmpdepfile"; then
|
||||||
|
sed -e "s,^.*\.[a-z]*:,$object:," "$tmpdepfile" > "$depfile"
|
||||||
|
# Add 'dependent.h:' lines.
|
||||||
|
sed -ne '2,${
|
||||||
|
s/^ *//
|
||||||
|
s/ \\*$//
|
||||||
|
s/$/:/
|
||||||
|
p
|
||||||
|
}' "$tmpdepfile" >> "$depfile"
|
||||||
|
else
|
||||||
|
echo "#dummy" > "$depfile"
|
||||||
|
fi
|
||||||
|
rm -f "$tmpdepfile" "$tmpdepfile2"
|
||||||
|
;;
|
||||||
|
|
||||||
|
tru64)
|
||||||
|
# The Tru64 compiler uses -MD to generate dependencies as a side
|
||||||
|
# effect. 'cc -MD -o foo.o ...' puts the dependencies into 'foo.o.d'.
|
||||||
|
# At least on Alpha/Redhat 6.1, Compaq CCC V6.2-504 seems to put
|
||||||
|
# dependencies in 'foo.d' instead, so we check for that too.
|
||||||
|
# Subdirectories are respected.
|
||||||
|
dir=`echo "$object" | sed -e 's|/[^/]*$|/|'`
|
||||||
|
test "x$dir" = "x$object" && dir=
|
||||||
|
base=`echo "$object" | sed -e 's|^.*/||' -e 's/\.o$//' -e 's/\.lo$//'`
|
||||||
|
|
||||||
|
if test "$libtool" = yes; then
|
||||||
|
# With Tru64 cc, shared objects can also be used to make a
|
||||||
|
# static library. This mechanism is used in libtool 1.4 series to
|
||||||
|
# handle both shared and static libraries in a single compilation.
|
||||||
|
# With libtool 1.4, dependencies were output in $dir.libs/$base.lo.d.
|
||||||
|
#
|
||||||
|
# With libtool 1.5 this exception was removed, and libtool now
|
||||||
|
# generates 2 separate objects for the 2 libraries. These two
|
||||||
|
# compilations output dependencies in $dir.libs/$base.o.d and
|
||||||
|
# in $dir$base.o.d. We have to check for both files, because
|
||||||
|
# one of the two compilations can be disabled. We should prefer
|
||||||
|
# $dir$base.o.d over $dir.libs/$base.o.d because the latter is
|
||||||
|
# automatically cleaned when .libs/ is deleted, while ignoring
|
||||||
|
# the former would cause a distcleancheck panic.
|
||||||
|
tmpdepfile1=$dir.libs/$base.lo.d # libtool 1.4
|
||||||
|
tmpdepfile2=$dir$base.o.d # libtool 1.5
|
||||||
|
tmpdepfile3=$dir.libs/$base.o.d # libtool 1.5
|
||||||
|
tmpdepfile4=$dir.libs/$base.d # Compaq CCC V6.2-504
|
||||||
|
"$@" -Wc,-MD
|
||||||
|
else
|
||||||
|
tmpdepfile1=$dir$base.o.d
|
||||||
|
tmpdepfile2=$dir$base.d
|
||||||
|
tmpdepfile3=$dir$base.d
|
||||||
|
tmpdepfile4=$dir$base.d
|
||||||
|
"$@" -MD
|
||||||
|
fi
|
||||||
|
|
||||||
|
stat=$?
|
||||||
|
if test $stat -eq 0; then :
|
||||||
|
else
|
||||||
|
rm -f "$tmpdepfile1" "$tmpdepfile2" "$tmpdepfile3" "$tmpdepfile4"
|
||||||
|
exit $stat
|
||||||
|
fi
|
||||||
|
|
||||||
|
for tmpdepfile in "$tmpdepfile1" "$tmpdepfile2" "$tmpdepfile3" "$tmpdepfile4"
|
||||||
|
do
|
||||||
|
test -f "$tmpdepfile" && break
|
||||||
|
done
|
||||||
|
if test -f "$tmpdepfile"; then
|
||||||
|
sed -e "s,^.*\.[a-z]*:,$object:," < "$tmpdepfile" > "$depfile"
|
||||||
|
sed -e 's,^.*\.[a-z]*:['"$tab"' ]*,,' -e 's,$,:,' < "$tmpdepfile" >> "$depfile"
|
||||||
|
else
|
||||||
|
echo "#dummy" > "$depfile"
|
||||||
|
fi
|
||||||
|
rm -f "$tmpdepfile"
|
||||||
|
;;
|
||||||
|
|
||||||
|
msvc7)
|
||||||
|
if test "$libtool" = yes; then
|
||||||
|
showIncludes=-Wc,-showIncludes
|
||||||
|
else
|
||||||
|
showIncludes=-showIncludes
|
||||||
|
fi
|
||||||
|
"$@" $showIncludes > "$tmpdepfile"
|
||||||
|
stat=$?
|
||||||
|
grep -v '^Note: including file: ' "$tmpdepfile"
|
||||||
|
if test "$stat" = 0; then :
|
||||||
|
else
|
||||||
|
rm -f "$tmpdepfile"
|
||||||
|
exit $stat
|
||||||
|
fi
|
||||||
|
rm -f "$depfile"
|
||||||
|
echo "$object : \\" > "$depfile"
|
||||||
|
# The first sed program below extracts the file names and escapes
|
||||||
|
# backslashes for cygpath. The second sed program outputs the file
|
||||||
|
# name when reading, but also accumulates all include files in the
|
||||||
|
# hold buffer in order to output them again at the end. This only
|
||||||
|
# works with sed implementations that can handle large buffers.
|
||||||
|
sed < "$tmpdepfile" -n '
|
||||||
|
/^Note: including file: *\(.*\)/ {
|
||||||
|
s//\1/
|
||||||
|
s/\\/\\\\/g
|
||||||
|
p
|
||||||
|
}' | $cygpath_u | sort -u | sed -n '
|
||||||
|
s/ /\\ /g
|
||||||
|
s/\(.*\)/'"$tab"'\1 \\/p
|
||||||
|
s/.\(.*\) \\/\1:/
|
||||||
|
H
|
||||||
|
$ {
|
||||||
|
s/.*/'"$tab"'/
|
||||||
|
G
|
||||||
|
p
|
||||||
|
}' >> "$depfile"
|
||||||
|
rm -f "$tmpdepfile"
|
||||||
|
;;
|
||||||
|
|
||||||
|
msvc7msys)
|
||||||
|
# This case exists only to let depend.m4 do its work. It works by
|
||||||
|
# looking at the text of this script. This case will never be run,
|
||||||
|
# since it is checked for above.
|
||||||
|
exit 1
|
||||||
|
;;
|
||||||
|
|
||||||
|
#nosideeffect)
|
||||||
|
# This comment above is used by automake to tell side-effect
|
||||||
|
# dependency tracking mechanisms from slower ones.
|
||||||
|
|
||||||
|
dashmstdout)
|
||||||
|
# Important note: in order to support this mode, a compiler *must*
|
||||||
|
# always write the preprocessed file to stdout, regardless of -o.
|
||||||
|
"$@" || exit $?
|
||||||
|
|
||||||
|
# Remove the call to Libtool.
|
||||||
|
if test "$libtool" = yes; then
|
||||||
|
while test "X$1" != 'X--mode=compile'; do
|
||||||
|
shift
|
||||||
|
done
|
||||||
|
shift
|
||||||
|
fi
|
||||||
|
|
||||||
|
# Remove '-o $object'.
|
||||||
|
IFS=" "
|
||||||
|
for arg
|
||||||
|
do
|
||||||
|
case $arg in
|
||||||
|
-o)
|
||||||
|
shift
|
||||||
|
;;
|
||||||
|
$object)
|
||||||
|
shift
|
||||||
|
;;
|
||||||
|
*)
|
||||||
|
set fnord "$@" "$arg"
|
||||||
|
shift # fnord
|
||||||
|
shift # $arg
|
||||||
|
;;
|
||||||
|
esac
|
||||||
|
done
|
||||||
|
|
||||||
|
test -z "$dashmflag" && dashmflag=-M
|
||||||
|
# Require at least two characters before searching for ':'
|
||||||
|
# in the target name. This is to cope with DOS-style filenames:
|
||||||
|
# a dependency such as 'c:/foo/bar' could be seen as target 'c' otherwise.
|
||||||
|
"$@" $dashmflag |
|
||||||
|
sed 's:^['"$tab"' ]*[^:'"$tab"' ][^:][^:]*\:['"$tab"' ]*:'"$object"'\: :' > "$tmpdepfile"
|
||||||
|
rm -f "$depfile"
|
||||||
|
cat < "$tmpdepfile" > "$depfile"
|
||||||
|
tr ' ' "$nl" < "$tmpdepfile" | \
|
||||||
|
## Some versions of the HPUX 10.20 sed can't process this invocation
|
||||||
|
## correctly. Breaking it into two sed invocations is a workaround.
|
||||||
|
sed -e 's/^\\$//' -e '/^$/d' -e '/:$/d' | sed -e 's/$/ :/' >> "$depfile"
|
||||||
|
rm -f "$tmpdepfile"
|
||||||
|
;;
|
||||||
|
|
||||||
|
dashXmstdout)
|
||||||
|
# This case only exists to satisfy depend.m4. It is never actually
|
||||||
|
# run, as this mode is specially recognized in the preamble.
|
||||||
|
exit 1
|
||||||
|
;;
|
||||||
|
|
||||||
|
makedepend)
|
||||||
|
"$@" || exit $?
|
||||||
|
# Remove any Libtool call
|
||||||
|
if test "$libtool" = yes; then
|
||||||
|
while test "X$1" != 'X--mode=compile'; do
|
||||||
|
shift
|
||||||
|
done
|
||||||
|
shift
|
||||||
|
fi
|
||||||
|
# X makedepend
|
||||||
|
shift
|
||||||
|
cleared=no eat=no
|
||||||
|
for arg
|
||||||
|
do
|
||||||
|
case $cleared in
|
||||||
|
no)
|
||||||
|
set ""; shift
|
||||||
|
cleared=yes ;;
|
||||||
|
esac
|
||||||
|
if test $eat = yes; then
|
||||||
|
eat=no
|
||||||
|
continue
|
||||||
|
fi
|
||||||
|
case "$arg" in
|
||||||
|
-D*|-I*)
|
||||||
|
set fnord "$@" "$arg"; shift ;;
|
||||||
|
# Strip any option that makedepend may not understand. Remove
|
||||||
|
# the object too, otherwise makedepend will parse it as a source file.
|
||||||
|
-arch)
|
||||||
|
eat=yes ;;
|
||||||
|
-*|$object)
|
||||||
|
;;
|
||||||
|
*)
|
||||||
|
set fnord "$@" "$arg"; shift ;;
|
||||||
|
esac
|
||||||
|
done
|
||||||
|
obj_suffix=`echo "$object" | sed 's/^.*\././'`
|
||||||
|
touch "$tmpdepfile"
|
||||||
|
${MAKEDEPEND-makedepend} -o"$obj_suffix" -f"$tmpdepfile" "$@"
|
||||||
|
rm -f "$depfile"
|
||||||
|
# makedepend may prepend the VPATH from the source file name to the object.
|
||||||
|
# No need to regex-escape $object, excess matching of '.' is harmless.
|
||||||
|
sed "s|^.*\($object *:\)|\1|" "$tmpdepfile" > "$depfile"
|
||||||
|
sed '1,2d' "$tmpdepfile" | tr ' ' "$nl" | \
|
||||||
|
## Some versions of the HPUX 10.20 sed can't process this invocation
|
||||||
|
## correctly. Breaking it into two sed invocations is a workaround.
|
||||||
|
sed -e 's/^\\$//' -e '/^$/d' -e '/:$/d' | sed -e 's/$/ :/' >> "$depfile"
|
||||||
|
rm -f "$tmpdepfile" "$tmpdepfile".bak
|
||||||
|
;;
|
||||||
|
|
||||||
|
cpp)
|
||||||
|
# Important note: in order to support this mode, a compiler *must*
|
||||||
|
# always write the preprocessed file to stdout.
|
||||||
|
"$@" || exit $?
|
||||||
|
|
||||||
|
# Remove the call to Libtool.
|
||||||
|
if test "$libtool" = yes; then
|
||||||
|
while test "X$1" != 'X--mode=compile'; do
|
||||||
|
shift
|
||||||
|
done
|
||||||
|
shift
|
||||||
|
fi
|
||||||
|
|
||||||
|
# Remove '-o $object'.
|
||||||
|
IFS=" "
|
||||||
|
for arg
|
||||||
|
do
|
||||||
|
case $arg in
|
||||||
|
-o)
|
||||||
|
shift
|
||||||
|
;;
|
||||||
|
$object)
|
||||||
|
shift
|
||||||
|
;;
|
||||||
|
*)
|
||||||
|
set fnord "$@" "$arg"
|
||||||
|
shift # fnord
|
||||||
|
shift # $arg
|
||||||
|
;;
|
||||||
|
esac
|
||||||
|
done
|
||||||
|
|
||||||
|
"$@" -E |
|
||||||
|
sed -n -e '/^# [0-9][0-9]* "\([^"]*\)".*/ s:: \1 \\:p' \
|
||||||
|
-e '/^#line [0-9][0-9]* "\([^"]*\)".*/ s:: \1 \\:p' |
|
||||||
|
sed '$ s: \\$::' > "$tmpdepfile"
|
||||||
|
rm -f "$depfile"
|
||||||
|
echo "$object : \\" > "$depfile"
|
||||||
|
cat < "$tmpdepfile" >> "$depfile"
|
||||||
|
sed < "$tmpdepfile" '/^$/d;s/^ //;s/ \\$//;s/$/ :/' >> "$depfile"
|
||||||
|
rm -f "$tmpdepfile"
|
||||||
|
;;
|
||||||
|
|
||||||
|
msvisualcpp)
|
||||||
|
# Important note: in order to support this mode, a compiler *must*
|
||||||
|
# always write the preprocessed file to stdout.
|
||||||
|
"$@" || exit $?
|
||||||
|
|
||||||
|
# Remove the call to Libtool.
|
||||||
|
if test "$libtool" = yes; then
|
||||||
|
while test "X$1" != 'X--mode=compile'; do
|
||||||
|
shift
|
||||||
|
done
|
||||||
|
shift
|
||||||
|
fi
|
||||||
|
|
||||||
|
IFS=" "
|
||||||
|
for arg
|
||||||
|
do
|
||||||
|
case "$arg" in
|
||||||
|
-o)
|
||||||
|
shift
|
||||||
|
;;
|
||||||
|
$object)
|
||||||
|
shift
|
||||||
|
;;
|
||||||
|
"-Gm"|"/Gm"|"-Gi"|"/Gi"|"-ZI"|"/ZI")
|
||||||
|
set fnord "$@"
|
||||||
|
shift
|
||||||
|
shift
|
||||||
|
;;
|
||||||
|
*)
|
||||||
|
set fnord "$@" "$arg"
|
||||||
|
shift
|
||||||
|
shift
|
||||||
|
;;
|
||||||
|
esac
|
||||||
|
done
|
||||||
|
"$@" -E 2>/dev/null |
|
||||||
|
sed -n '/^#line [0-9][0-9]* "\([^"]*\)"/ s::\1:p' | $cygpath_u | sort -u > "$tmpdepfile"
|
||||||
|
rm -f "$depfile"
|
||||||
|
echo "$object : \\" > "$depfile"
|
||||||
|
sed < "$tmpdepfile" -n -e 's% %\\ %g' -e '/^\(.*\)$/ s::'"$tab"'\1 \\:p' >> "$depfile"
|
||||||
|
echo "$tab" >> "$depfile"
|
||||||
|
sed < "$tmpdepfile" -n -e 's% %\\ %g' -e '/^\(.*\)$/ s::\1\::p' >> "$depfile"
|
||||||
|
rm -f "$tmpdepfile"
|
||||||
|
;;
|
||||||
|
|
||||||
|
msvcmsys)
|
||||||
|
# This case exists only to let depend.m4 do its work. It works by
|
||||||
|
# looking at the text of this script. This case will never be run,
|
||||||
|
# since it is checked for above.
|
||||||
|
exit 1
|
||||||
|
;;
|
||||||
|
|
||||||
|
none)
|
||||||
|
exec "$@"
|
||||||
|
;;
|
||||||
|
|
||||||
|
*)
|
||||||
|
echo "Unknown depmode $depmode" 1>&2
|
||||||
|
exit 1
|
||||||
|
;;
|
||||||
|
esac
|
||||||
|
|
||||||
|
exit 0
|
||||||
|
|
||||||
|
# Local Variables:
|
||||||
|
# mode: shell-script
|
||||||
|
# sh-indentation: 2
|
||||||
|
# eval: (add-hook 'write-file-hooks 'time-stamp)
|
||||||
|
# time-stamp-start: "scriptversion="
|
||||||
|
# time-stamp-format: "%:y-%02m-%02d.%02H"
|
||||||
|
# time-stamp-time-zone: "UTC"
|
||||||
|
# time-stamp-end: "; # UTC"
|
||||||
|
# End:
|
@ -0,0 +1,10 @@
|
|||||||
|
prefix=@prefix@
|
||||||
|
exec_prefix=@exec_prefix@
|
||||||
|
libdir=@libdir@
|
||||||
|
includedir=@includedir@
|
||||||
|
|
||||||
|
Name: FFTS
|
||||||
|
Description: fast Fourier transform library
|
||||||
|
Version: @VERSION@
|
||||||
|
Libs: -L${libdir} -lffts -lm
|
||||||
|
Cflags: -I${includedir}/ffts
|
@ -0,0 +1,68 @@
|
|||||||
|
/*
|
||||||
|
|
||||||
|
This file is part of FFTS.
|
||||||
|
|
||||||
|
Copyright (c) 2012, Anthony M. Blake
|
||||||
|
All rights reserved.
|
||||||
|
|
||||||
|
Redistribution and use in source and binary forms, with or without
|
||||||
|
modification, are permitted provided that the following conditions are met:
|
||||||
|
* Redistributions of source code must retain the above copyright
|
||||||
|
notice, this list of conditions and the following disclaimer.
|
||||||
|
* Redistributions in binary form must reproduce the above copyright
|
||||||
|
notice, this list of conditions and the following disclaimer in the
|
||||||
|
documentation and/or other materials provided with the distribution.
|
||||||
|
* Neither the name of the organization nor the
|
||||||
|
names of its contributors may be used to endorse or promote products
|
||||||
|
derived from this software without specific prior written permission.
|
||||||
|
|
||||||
|
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
||||||
|
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||||
|
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||||
|
DISCLAIMED. IN NO EVENT SHALL ANTHONY M. BLAKE BE LIABLE FOR ANY
|
||||||
|
DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
|
||||||
|
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
||||||
|
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
|
||||||
|
ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||||
|
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||||
|
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
|
||||||
|
*/
|
||||||
|
|
||||||
|
#ifndef __FFTS_H__
|
||||||
|
#define __FFTS_H__
|
||||||
|
|
||||||
|
#include <stdio.h>
|
||||||
|
#include <stdlib.h>
|
||||||
|
#include <math.h>
|
||||||
|
#include <stdint.h>
|
||||||
|
#include <stddef.h>
|
||||||
|
|
||||||
|
#ifdef __cplusplus
|
||||||
|
extern "C"
|
||||||
|
{
|
||||||
|
#endif /* __cplusplus */
|
||||||
|
|
||||||
|
struct _ffts_plan_t;
|
||||||
|
typedef struct _ffts_plan_t ffts_plan_t;
|
||||||
|
|
||||||
|
ffts_plan_t *ffts_init_1d(size_t N, int sign);
|
||||||
|
ffts_plan_t *ffts_init_2d(size_t N1, size_t N2, int sign);
|
||||||
|
ffts_plan_t *ffts_init_nd(int rank, size_t *Ns, int sign);
|
||||||
|
|
||||||
|
// For real transforms, sign == -1 implies a real-to-complex forwards tranform,
|
||||||
|
// and sign == 1 implies a complex-to-real backwards transform
|
||||||
|
// The output of a real-to-complex transform is N/2+1 complex numbers, where the
|
||||||
|
// redundant outputs have been omitted.
|
||||||
|
ffts_plan_t *ffts_init_1d_real(size_t N, int sign);
|
||||||
|
ffts_plan_t *ffts_init_2d_real(size_t N1, size_t N2, int sign);
|
||||||
|
ffts_plan_t *ffts_init_nd_real(int rank, size_t *Ns, int sign);
|
||||||
|
|
||||||
|
void ffts_execute(ffts_plan_t * , const void *input, void *output);
|
||||||
|
void ffts_free(ffts_plan_t *);
|
||||||
|
|
||||||
|
#ifdef __cplusplus
|
||||||
|
} /* extern "C" */
|
||||||
|
#endif /* __cplusplus */
|
||||||
|
|
||||||
|
#endif
|
@ -0,0 +1,527 @@
|
|||||||
|
#!/bin/sh
|
||||||
|
# install - install a program, script, or datafile
|
||||||
|
|
||||||
|
scriptversion=2011-11-20.07; # UTC
|
||||||
|
|
||||||
|
# This originates from X11R5 (mit/util/scripts/install.sh), which was
|
||||||
|
# later released in X11R6 (xc/config/util/install.sh) with the
|
||||||
|
# following copyright and license.
|
||||||
|
#
|
||||||
|
# Copyright (C) 1994 X Consortium
|
||||||
|
#
|
||||||
|
# Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||||
|
# of this software and associated documentation files (the "Software"), to
|
||||||
|
# deal in the Software without restriction, including without limitation the
|
||||||
|
# rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
|
||||||
|
# sell copies of the Software, and to permit persons to whom the Software is
|
||||||
|
# furnished to do so, subject to the following conditions:
|
||||||
|
#
|
||||||
|
# The above copyright notice and this permission notice shall be included in
|
||||||
|
# all copies or substantial portions of the Software.
|
||||||
|
#
|
||||||
|
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||||
|
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||||
|
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||||
|
# X CONSORTIUM BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
|
||||||
|
# AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNEC-
|
||||||
|
# TION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||||
|
#
|
||||||
|
# Except as contained in this notice, the name of the X Consortium shall not
|
||||||
|
# be used in advertising or otherwise to promote the sale, use or other deal-
|
||||||
|
# ings in this Software without prior written authorization from the X Consor-
|
||||||
|
# tium.
|
||||||
|
#
|
||||||
|
#
|
||||||
|
# FSF changes to this file are in the public domain.
|
||||||
|
#
|
||||||
|
# Calling this script install-sh is preferred over install.sh, to prevent
|
||||||
|
# 'make' implicit rules from creating a file called install from it
|
||||||
|
# when there is no Makefile.
|
||||||
|
#
|
||||||
|
# This script is compatible with the BSD install script, but was written
|
||||||
|
# from scratch.
|
||||||
|
|
||||||
|
nl='
|
||||||
|
'
|
||||||
|
IFS=" "" $nl"
|
||||||
|
|
||||||
|
# set DOITPROG to echo to test this script
|
||||||
|
|
||||||
|
# Don't use :- since 4.3BSD and earlier shells don't like it.
|
||||||
|
doit=${DOITPROG-}
|
||||||
|
if test -z "$doit"; then
|
||||||
|
doit_exec=exec
|
||||||
|
else
|
||||||
|
doit_exec=$doit
|
||||||
|
fi
|
||||||
|
|
||||||
|
# Put in absolute file names if you don't have them in your path;
|
||||||
|
# or use environment vars.
|
||||||
|
|
||||||
|
chgrpprog=${CHGRPPROG-chgrp}
|
||||||
|
chmodprog=${CHMODPROG-chmod}
|
||||||
|
chownprog=${CHOWNPROG-chown}
|
||||||
|
cmpprog=${CMPPROG-cmp}
|
||||||
|
cpprog=${CPPROG-cp}
|
||||||
|
mkdirprog=${MKDIRPROG-mkdir}
|
||||||
|
mvprog=${MVPROG-mv}
|
||||||
|
rmprog=${RMPROG-rm}
|
||||||
|
stripprog=${STRIPPROG-strip}
|
||||||
|
|
||||||
|
posix_glob='?'
|
||||||
|
initialize_posix_glob='
|
||||||
|
test "$posix_glob" != "?" || {
|
||||||
|
if (set -f) 2>/dev/null; then
|
||||||
|
posix_glob=
|
||||||
|
else
|
||||||
|
posix_glob=:
|
||||||
|
fi
|
||||||
|
}
|
||||||
|
'
|
||||||
|
|
||||||
|
posix_mkdir=
|
||||||
|
|
||||||
|
# Desired mode of installed file.
|
||||||
|
mode=0755
|
||||||
|
|
||||||
|
chgrpcmd=
|
||||||
|
chmodcmd=$chmodprog
|
||||||
|
chowncmd=
|
||||||
|
mvcmd=$mvprog
|
||||||
|
rmcmd="$rmprog -f"
|
||||||
|
stripcmd=
|
||||||
|
|
||||||
|
src=
|
||||||
|
dst=
|
||||||
|
dir_arg=
|
||||||
|
dst_arg=
|
||||||
|
|
||||||
|
copy_on_change=false
|
||||||
|
no_target_directory=
|
||||||
|
|
||||||
|
usage="\
|
||||||
|
Usage: $0 [OPTION]... [-T] SRCFILE DSTFILE
|
||||||
|
or: $0 [OPTION]... SRCFILES... DIRECTORY
|
||||||
|
or: $0 [OPTION]... -t DIRECTORY SRCFILES...
|
||||||
|
or: $0 [OPTION]... -d DIRECTORIES...
|
||||||
|
|
||||||
|
In the 1st form, copy SRCFILE to DSTFILE.
|
||||||
|
In the 2nd and 3rd, copy all SRCFILES to DIRECTORY.
|
||||||
|
In the 4th, create DIRECTORIES.
|
||||||
|
|
||||||
|
Options:
|
||||||
|
--help display this help and exit.
|
||||||
|
--version display version info and exit.
|
||||||
|
|
||||||
|
-c (ignored)
|
||||||
|
-C install only if different (preserve the last data modification time)
|
||||||
|
-d create directories instead of installing files.
|
||||||
|
-g GROUP $chgrpprog installed files to GROUP.
|
||||||
|
-m MODE $chmodprog installed files to MODE.
|
||||||
|
-o USER $chownprog installed files to USER.
|
||||||
|
-s $stripprog installed files.
|
||||||
|
-t DIRECTORY install into DIRECTORY.
|
||||||
|
-T report an error if DSTFILE is a directory.
|
||||||
|
|
||||||
|
Environment variables override the default commands:
|
||||||
|
CHGRPPROG CHMODPROG CHOWNPROG CMPPROG CPPROG MKDIRPROG MVPROG
|
||||||
|
RMPROG STRIPPROG
|
||||||
|
"
|
||||||
|
|
||||||
|
while test $# -ne 0; do
|
||||||
|
case $1 in
|
||||||
|
-c) ;;
|
||||||
|
|
||||||
|
-C) copy_on_change=true;;
|
||||||
|
|
||||||
|
-d) dir_arg=true;;
|
||||||
|
|
||||||
|
-g) chgrpcmd="$chgrpprog $2"
|
||||||
|
shift;;
|
||||||
|
|
||||||
|
--help) echo "$usage"; exit $?;;
|
||||||
|
|
||||||
|
-m) mode=$2
|
||||||
|
case $mode in
|
||||||
|
*' '* | *' '* | *'
|
||||||
|
'* | *'*'* | *'?'* | *'['*)
|
||||||
|
echo "$0: invalid mode: $mode" >&2
|
||||||
|
exit 1;;
|
||||||
|
esac
|
||||||
|
shift;;
|
||||||
|
|
||||||
|
-o) chowncmd="$chownprog $2"
|
||||||
|
shift;;
|
||||||
|
|
||||||
|
-s) stripcmd=$stripprog;;
|
||||||
|
|
||||||
|
-t) dst_arg=$2
|
||||||
|
# Protect names problematic for 'test' and other utilities.
|
||||||
|
case $dst_arg in
|
||||||
|
-* | [=\(\)!]) dst_arg=./$dst_arg;;
|
||||||
|
esac
|
||||||
|
shift;;
|
||||||
|
|
||||||
|
-T) no_target_directory=true;;
|
||||||
|
|
||||||
|
--version) echo "$0 $scriptversion"; exit $?;;
|
||||||
|
|
||||||
|
--) shift
|
||||||
|
break;;
|
||||||
|
|
||||||
|
-*) echo "$0: invalid option: $1" >&2
|
||||||
|
exit 1;;
|
||||||
|
|
||||||
|
*) break;;
|
||||||
|
esac
|
||||||
|
shift
|
||||||
|
done
|
||||||
|
|
||||||
|
if test $# -ne 0 && test -z "$dir_arg$dst_arg"; then
|
||||||
|
# When -d is used, all remaining arguments are directories to create.
|
||||||
|
# When -t is used, the destination is already specified.
|
||||||
|
# Otherwise, the last argument is the destination. Remove it from $@.
|
||||||
|
for arg
|
||||||
|
do
|
||||||
|
if test -n "$dst_arg"; then
|
||||||
|
# $@ is not empty: it contains at least $arg.
|
||||||
|
set fnord "$@" "$dst_arg"
|
||||||
|
shift # fnord
|
||||||
|
fi
|
||||||
|
shift # arg
|
||||||
|
dst_arg=$arg
|
||||||
|
# Protect names problematic for 'test' and other utilities.
|
||||||
|
case $dst_arg in
|
||||||
|
-* | [=\(\)!]) dst_arg=./$dst_arg;;
|
||||||
|
esac
|
||||||
|
done
|
||||||
|
fi
|
||||||
|
|
||||||
|
if test $# -eq 0; then
|
||||||
|
if test -z "$dir_arg"; then
|
||||||
|
echo "$0: no input file specified." >&2
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
# It's OK to call 'install-sh -d' without argument.
|
||||||
|
# This can happen when creating conditional directories.
|
||||||
|
exit 0
|
||||||
|
fi
|
||||||
|
|
||||||
|
if test -z "$dir_arg"; then
|
||||||
|
do_exit='(exit $ret); exit $ret'
|
||||||
|
trap "ret=129; $do_exit" 1
|
||||||
|
trap "ret=130; $do_exit" 2
|
||||||
|
trap "ret=141; $do_exit" 13
|
||||||
|
trap "ret=143; $do_exit" 15
|
||||||
|
|
||||||
|
# Set umask so as not to create temps with too-generous modes.
|
||||||
|
# However, 'strip' requires both read and write access to temps.
|
||||||
|
case $mode in
|
||||||
|
# Optimize common cases.
|
||||||
|
*644) cp_umask=133;;
|
||||||
|
*755) cp_umask=22;;
|
||||||
|
|
||||||
|
*[0-7])
|
||||||
|
if test -z "$stripcmd"; then
|
||||||
|
u_plus_rw=
|
||||||
|
else
|
||||||
|
u_plus_rw='% 200'
|
||||||
|
fi
|
||||||
|
cp_umask=`expr '(' 777 - $mode % 1000 ')' $u_plus_rw`;;
|
||||||
|
*)
|
||||||
|
if test -z "$stripcmd"; then
|
||||||
|
u_plus_rw=
|
||||||
|
else
|
||||||
|
u_plus_rw=,u+rw
|
||||||
|
fi
|
||||||
|
cp_umask=$mode$u_plus_rw;;
|
||||||
|
esac
|
||||||
|
fi
|
||||||
|
|
||||||
|
for src
|
||||||
|
do
|
||||||
|
# Protect names problematic for 'test' and other utilities.
|
||||||
|
case $src in
|
||||||
|
-* | [=\(\)!]) src=./$src;;
|
||||||
|
esac
|
||||||
|
|
||||||
|
if test -n "$dir_arg"; then
|
||||||
|
dst=$src
|
||||||
|
dstdir=$dst
|
||||||
|
test -d "$dstdir"
|
||||||
|
dstdir_status=$?
|
||||||
|
else
|
||||||
|
|
||||||
|
# Waiting for this to be detected by the "$cpprog $src $dsttmp" command
|
||||||
|
# might cause directories to be created, which would be especially bad
|
||||||
|
# if $src (and thus $dsttmp) contains '*'.
|
||||||
|
if test ! -f "$src" && test ! -d "$src"; then
|
||||||
|
echo "$0: $src does not exist." >&2
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
if test -z "$dst_arg"; then
|
||||||
|
echo "$0: no destination specified." >&2
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
dst=$dst_arg
|
||||||
|
|
||||||
|
# If destination is a directory, append the input filename; won't work
|
||||||
|
# if double slashes aren't ignored.
|
||||||
|
if test -d "$dst"; then
|
||||||
|
if test -n "$no_target_directory"; then
|
||||||
|
echo "$0: $dst_arg: Is a directory" >&2
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
dstdir=$dst
|
||||||
|
dst=$dstdir/`basename "$src"`
|
||||||
|
dstdir_status=0
|
||||||
|
else
|
||||||
|
# Prefer dirname, but fall back on a substitute if dirname fails.
|
||||||
|
dstdir=`
|
||||||
|
(dirname "$dst") 2>/dev/null ||
|
||||||
|
expr X"$dst" : 'X\(.*[^/]\)//*[^/][^/]*/*$' \| \
|
||||||
|
X"$dst" : 'X\(//\)[^/]' \| \
|
||||||
|
X"$dst" : 'X\(//\)$' \| \
|
||||||
|
X"$dst" : 'X\(/\)' \| . 2>/dev/null ||
|
||||||
|
echo X"$dst" |
|
||||||
|
sed '/^X\(.*[^/]\)\/\/*[^/][^/]*\/*$/{
|
||||||
|
s//\1/
|
||||||
|
q
|
||||||
|
}
|
||||||
|
/^X\(\/\/\)[^/].*/{
|
||||||
|
s//\1/
|
||||||
|
q
|
||||||
|
}
|
||||||
|
/^X\(\/\/\)$/{
|
||||||
|
s//\1/
|
||||||
|
q
|
||||||
|
}
|
||||||
|
/^X\(\/\).*/{
|
||||||
|
s//\1/
|
||||||
|
q
|
||||||
|
}
|
||||||
|
s/.*/./; q'
|
||||||
|
`
|
||||||
|
|
||||||
|
test -d "$dstdir"
|
||||||
|
dstdir_status=$?
|
||||||
|
fi
|
||||||
|
fi
|
||||||
|
|
||||||
|
obsolete_mkdir_used=false
|
||||||
|
|
||||||
|
if test $dstdir_status != 0; then
|
||||||
|
case $posix_mkdir in
|
||||||
|
'')
|
||||||
|
# Create intermediate dirs using mode 755 as modified by the umask.
|
||||||
|
# This is like FreeBSD 'install' as of 1997-10-28.
|
||||||
|
umask=`umask`
|
||||||
|
case $stripcmd.$umask in
|
||||||
|
# Optimize common cases.
|
||||||
|
*[2367][2367]) mkdir_umask=$umask;;
|
||||||
|
.*0[02][02] | .[02][02] | .[02]) mkdir_umask=22;;
|
||||||
|
|
||||||
|
*[0-7])
|
||||||
|
mkdir_umask=`expr $umask + 22 \
|
||||||
|
- $umask % 100 % 40 + $umask % 20 \
|
||||||
|
- $umask % 10 % 4 + $umask % 2
|
||||||
|
`;;
|
||||||
|
*) mkdir_umask=$umask,go-w;;
|
||||||
|
esac
|
||||||
|
|
||||||
|
# With -d, create the new directory with the user-specified mode.
|
||||||
|
# Otherwise, rely on $mkdir_umask.
|
||||||
|
if test -n "$dir_arg"; then
|
||||||
|
mkdir_mode=-m$mode
|
||||||
|
else
|
||||||
|
mkdir_mode=
|
||||||
|
fi
|
||||||
|
|
||||||
|
posix_mkdir=false
|
||||||
|
case $umask in
|
||||||
|
*[123567][0-7][0-7])
|
||||||
|
# POSIX mkdir -p sets u+wx bits regardless of umask, which
|
||||||
|
# is incompatible with FreeBSD 'install' when (umask & 300) != 0.
|
||||||
|
;;
|
||||||
|
*)
|
||||||
|
tmpdir=${TMPDIR-/tmp}/ins$RANDOM-$$
|
||||||
|
trap 'ret=$?; rmdir "$tmpdir/d" "$tmpdir" 2>/dev/null; exit $ret' 0
|
||||||
|
|
||||||
|
if (umask $mkdir_umask &&
|
||||||
|
exec $mkdirprog $mkdir_mode -p -- "$tmpdir/d") >/dev/null 2>&1
|
||||||
|
then
|
||||||
|
if test -z "$dir_arg" || {
|
||||||
|
# Check for POSIX incompatibilities with -m.
|
||||||
|
# HP-UX 11.23 and IRIX 6.5 mkdir -m -p sets group- or
|
||||||
|
# other-writable bit of parent directory when it shouldn't.
|
||||||
|
# FreeBSD 6.1 mkdir -m -p sets mode of existing directory.
|
||||||
|
ls_ld_tmpdir=`ls -ld "$tmpdir"`
|
||||||
|
case $ls_ld_tmpdir in
|
||||||
|
d????-?r-*) different_mode=700;;
|
||||||
|
d????-?--*) different_mode=755;;
|
||||||
|
*) false;;
|
||||||
|
esac &&
|
||||||
|
$mkdirprog -m$different_mode -p -- "$tmpdir" && {
|
||||||
|
ls_ld_tmpdir_1=`ls -ld "$tmpdir"`
|
||||||
|
test "$ls_ld_tmpdir" = "$ls_ld_tmpdir_1"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
then posix_mkdir=:
|
||||||
|
fi
|
||||||
|
rmdir "$tmpdir/d" "$tmpdir"
|
||||||
|
else
|
||||||
|
# Remove any dirs left behind by ancient mkdir implementations.
|
||||||
|
rmdir ./$mkdir_mode ./-p ./-- 2>/dev/null
|
||||||
|
fi
|
||||||
|
trap '' 0;;
|
||||||
|
esac;;
|
||||||
|
esac
|
||||||
|
|
||||||
|
if
|
||||||
|
$posix_mkdir && (
|
||||||
|
umask $mkdir_umask &&
|
||||||
|
$doit_exec $mkdirprog $mkdir_mode -p -- "$dstdir"
|
||||||
|
)
|
||||||
|
then :
|
||||||
|
else
|
||||||
|
|
||||||
|
# The umask is ridiculous, or mkdir does not conform to POSIX,
|
||||||
|
# or it failed possibly due to a race condition. Create the
|
||||||
|
# directory the slow way, step by step, checking for races as we go.
|
||||||
|
|
||||||
|
case $dstdir in
|
||||||
|
/*) prefix='/';;
|
||||||
|
[-=\(\)!]*) prefix='./';;
|
||||||
|
*) prefix='';;
|
||||||
|
esac
|
||||||
|
|
||||||
|
eval "$initialize_posix_glob"
|
||||||
|
|
||||||
|
oIFS=$IFS
|
||||||
|
IFS=/
|
||||||
|
$posix_glob set -f
|
||||||
|
set fnord $dstdir
|
||||||
|
shift
|
||||||
|
$posix_glob set +f
|
||||||
|
IFS=$oIFS
|
||||||
|
|
||||||
|
prefixes=
|
||||||
|
|
||||||
|
for d
|
||||||
|
do
|
||||||
|
test X"$d" = X && continue
|
||||||
|
|
||||||
|
prefix=$prefix$d
|
||||||
|
if test -d "$prefix"; then
|
||||||
|
prefixes=
|
||||||
|
else
|
||||||
|
if $posix_mkdir; then
|
||||||
|
(umask=$mkdir_umask &&
|
||||||
|
$doit_exec $mkdirprog $mkdir_mode -p -- "$dstdir") && break
|
||||||
|
# Don't fail if two instances are running concurrently.
|
||||||
|
test -d "$prefix" || exit 1
|
||||||
|
else
|
||||||
|
case $prefix in
|
||||||
|
*\'*) qprefix=`echo "$prefix" | sed "s/'/'\\\\\\\\''/g"`;;
|
||||||
|
*) qprefix=$prefix;;
|
||||||
|
esac
|
||||||
|
prefixes="$prefixes '$qprefix'"
|
||||||
|
fi
|
||||||
|
fi
|
||||||
|
prefix=$prefix/
|
||||||
|
done
|
||||||
|
|
||||||
|
if test -n "$prefixes"; then
|
||||||
|
# Don't fail if two instances are running concurrently.
|
||||||
|
(umask $mkdir_umask &&
|
||||||
|
eval "\$doit_exec \$mkdirprog $prefixes") ||
|
||||||
|
test -d "$dstdir" || exit 1
|
||||||
|
obsolete_mkdir_used=true
|
||||||
|
fi
|
||||||
|
fi
|
||||||
|
fi
|
||||||
|
|
||||||
|
if test -n "$dir_arg"; then
|
||||||
|
{ test -z "$chowncmd" || $doit $chowncmd "$dst"; } &&
|
||||||
|
{ test -z "$chgrpcmd" || $doit $chgrpcmd "$dst"; } &&
|
||||||
|
{ test "$obsolete_mkdir_used$chowncmd$chgrpcmd" = false ||
|
||||||
|
test -z "$chmodcmd" || $doit $chmodcmd $mode "$dst"; } || exit 1
|
||||||
|
else
|
||||||
|
|
||||||
|
# Make a couple of temp file names in the proper directory.
|
||||||
|
dsttmp=$dstdir/_inst.$$_
|
||||||
|
rmtmp=$dstdir/_rm.$$_
|
||||||
|
|
||||||
|
# Trap to clean up those temp files at exit.
|
||||||
|
trap 'ret=$?; rm -f "$dsttmp" "$rmtmp" && exit $ret' 0
|
||||||
|
|
||||||
|
# Copy the file name to the temp name.
|
||||||
|
(umask $cp_umask && $doit_exec $cpprog "$src" "$dsttmp") &&
|
||||||
|
|
||||||
|
# and set any options; do chmod last to preserve setuid bits.
|
||||||
|
#
|
||||||
|
# If any of these fail, we abort the whole thing. If we want to
|
||||||
|
# ignore errors from any of these, just make sure not to ignore
|
||||||
|
# errors from the above "$doit $cpprog $src $dsttmp" command.
|
||||||
|
#
|
||||||
|
{ test -z "$chowncmd" || $doit $chowncmd "$dsttmp"; } &&
|
||||||
|
{ test -z "$chgrpcmd" || $doit $chgrpcmd "$dsttmp"; } &&
|
||||||
|
{ test -z "$stripcmd" || $doit $stripcmd "$dsttmp"; } &&
|
||||||
|
{ test -z "$chmodcmd" || $doit $chmodcmd $mode "$dsttmp"; } &&
|
||||||
|
|
||||||
|
# If -C, don't bother to copy if it wouldn't change the file.
|
||||||
|
if $copy_on_change &&
|
||||||
|
old=`LC_ALL=C ls -dlL "$dst" 2>/dev/null` &&
|
||||||
|
new=`LC_ALL=C ls -dlL "$dsttmp" 2>/dev/null` &&
|
||||||
|
|
||||||
|
eval "$initialize_posix_glob" &&
|
||||||
|
$posix_glob set -f &&
|
||||||
|
set X $old && old=:$2:$4:$5:$6 &&
|
||||||
|
set X $new && new=:$2:$4:$5:$6 &&
|
||||||
|
$posix_glob set +f &&
|
||||||
|
|
||||||
|
test "$old" = "$new" &&
|
||||||
|
$cmpprog "$dst" "$dsttmp" >/dev/null 2>&1
|
||||||
|
then
|
||||||
|
rm -f "$dsttmp"
|
||||||
|
else
|
||||||
|
# Rename the file to the real destination.
|
||||||
|
$doit $mvcmd -f "$dsttmp" "$dst" 2>/dev/null ||
|
||||||
|
|
||||||
|
# The rename failed, perhaps because mv can't rename something else
|
||||||
|
# to itself, or perhaps because mv is so ancient that it does not
|
||||||
|
# support -f.
|
||||||
|
{
|
||||||
|
# Now remove or move aside any old file at destination location.
|
||||||
|
# We try this two ways since rm can't unlink itself on some
|
||||||
|
# systems and the destination file might be busy for other
|
||||||
|
# reasons. In this case, the final cleanup might fail but the new
|
||||||
|
# file should still install successfully.
|
||||||
|
{
|
||||||
|
test ! -f "$dst" ||
|
||||||
|
$doit $rmcmd -f "$dst" 2>/dev/null ||
|
||||||
|
{ $doit $mvcmd -f "$dst" "$rmtmp" 2>/dev/null &&
|
||||||
|
{ $doit $rmcmd -f "$rmtmp" 2>/dev/null; :; }
|
||||||
|
} ||
|
||||||
|
{ echo "$0: cannot unlink or rename $dst" >&2
|
||||||
|
(exit 1); exit 1
|
||||||
|
}
|
||||||
|
} &&
|
||||||
|
|
||||||
|
# Now rename the file to the real destination.
|
||||||
|
$doit $mvcmd "$dsttmp" "$dst"
|
||||||
|
}
|
||||||
|
fi || exit 1
|
||||||
|
|
||||||
|
trap '' 0
|
||||||
|
fi
|
||||||
|
done
|
||||||
|
|
||||||
|
# Local variables:
|
||||||
|
# eval: (add-hook 'write-file-hooks 'time-stamp)
|
||||||
|
# time-stamp-start: "scriptversion="
|
||||||
|
# time-stamp-format: "%:y-%02m-%02d.%02H"
|
||||||
|
# time-stamp-time-zone: "UTC"
|
||||||
|
# time-stamp-end: "; # UTC"
|
||||||
|
# End:
|
@ -0,0 +1,33 @@
|
|||||||
|
|
||||||
|
# TODO: the ax_prog_javah thing doesn't work so this
|
||||||
|
# requires javah in the path
|
||||||
|
|
||||||
|
if ENABLE_JNI
|
||||||
|
JAVA_SRC=$(shell find $(srcdir)/src -name '*.java')
|
||||||
|
|
||||||
|
BUILT_SOURCES = nz_ac_waikato_ffts_FFTS.h
|
||||||
|
|
||||||
|
all: ffts.jar
|
||||||
|
|
||||||
|
classes ffts.jar: $(JAVA_SRC)
|
||||||
|
-rm -rf classes
|
||||||
|
mkdir classes
|
||||||
|
$(JAVAC) -d classes -sourcepath src $(JAVA_SRC)
|
||||||
|
$(JAR) -cf ffts.jar -C classes .
|
||||||
|
|
||||||
|
lib_LTLIBRARIES = libffts_jni.la
|
||||||
|
libffts_jni_la_SOURCES = jni/ffts_jni.c
|
||||||
|
nodist_include_HEADERS = nz_ac_waikato_ffts_FFTS.h
|
||||||
|
libffts_jni_la_LIBADD = $(top_builddir)/src/libffts.la
|
||||||
|
libffts_jni_la_CFLAGS = @JNI_CPPFLAGS@ $(AM_CFLAGS) -I$(top_srcdir)/include
|
||||||
|
libffts_jni_la_LDFLAGS = -shared
|
||||||
|
|
||||||
|
pkgdata_DATA = ffts.jar
|
||||||
|
|
||||||
|
nz_ac_waikato_ffts_FFTS.h: classes
|
||||||
|
javah -cp $< nz.ac.waikato.ffts.FFTS
|
||||||
|
|
||||||
|
CLEANFILES=ffts.jar nz_ac_waikato_ffts_FFTS.h
|
||||||
|
clean-local:
|
||||||
|
-rm -rf classes
|
||||||
|
endif
|
@ -0,0 +1,681 @@
|
|||||||
|
# Makefile.in generated by automake 1.12.4 from Makefile.am.
|
||||||
|
# @configure_input@
|
||||||
|
|
||||||
|
# Copyright (C) 1994-2012 Free Software Foundation, Inc.
|
||||||
|
|
||||||
|
# This Makefile.in is free software; the Free Software Foundation
|
||||||
|
# gives unlimited permission to copy and/or distribute it,
|
||||||
|
# with or without modifications, as long as this notice is preserved.
|
||||||
|
|
||||||
|
# This program is distributed in the hope that it will be useful,
|
||||||
|
# but WITHOUT ANY WARRANTY, to the extent permitted by law; without
|
||||||
|
# even the implied warranty of MERCHANTABILITY or FITNESS FOR A
|
||||||
|
# PARTICULAR PURPOSE.
|
||||||
|
|
||||||
|
@SET_MAKE@
|
||||||
|
|
||||||
|
# TODO: the ax_prog_javah thing doesn't work so this
|
||||||
|
# requires javah in the path
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
VPATH = @srcdir@
|
||||||
|
am__make_dryrun = \
|
||||||
|
{ \
|
||||||
|
am__dry=no; \
|
||||||
|
case $$MAKEFLAGS in \
|
||||||
|
*\\[\ \ ]*) \
|
||||||
|
echo 'am--echo: ; @echo "AM" OK' | $(MAKE) -f - 2>/dev/null \
|
||||||
|
| grep '^AM OK$$' >/dev/null || am__dry=yes;; \
|
||||||
|
*) \
|
||||||
|
for am__flg in $$MAKEFLAGS; do \
|
||||||
|
case $$am__flg in \
|
||||||
|
*=*|--*) ;; \
|
||||||
|
*n*) am__dry=yes; break;; \
|
||||||
|
esac; \
|
||||||
|
done;; \
|
||||||
|
esac; \
|
||||||
|
test $$am__dry = yes; \
|
||||||
|
}
|
||||||
|
pkgdatadir = $(datadir)/@PACKAGE@
|
||||||
|
pkgincludedir = $(includedir)/@PACKAGE@
|
||||||
|
pkglibdir = $(libdir)/@PACKAGE@
|
||||||
|
pkglibexecdir = $(libexecdir)/@PACKAGE@
|
||||||
|
am__cd = CDPATH="$${ZSH_VERSION+.}$(PATH_SEPARATOR)" && cd
|
||||||
|
install_sh_DATA = $(install_sh) -c -m 644
|
||||||
|
install_sh_PROGRAM = $(install_sh) -c
|
||||||
|
install_sh_SCRIPT = $(install_sh) -c
|
||||||
|
INSTALL_HEADER = $(INSTALL_DATA)
|
||||||
|
transform = $(program_transform_name)
|
||||||
|
NORMAL_INSTALL = :
|
||||||
|
PRE_INSTALL = :
|
||||||
|
POST_INSTALL = :
|
||||||
|
NORMAL_UNINSTALL = :
|
||||||
|
PRE_UNINSTALL = :
|
||||||
|
POST_UNINSTALL = :
|
||||||
|
build_triplet = @build@
|
||||||
|
host_triplet = @host@
|
||||||
|
subdir = java
|
||||||
|
DIST_COMMON = $(srcdir)/Makefile.am $(srcdir)/Makefile.in \
|
||||||
|
$(top_srcdir)/depcomp
|
||||||
|
ACLOCAL_M4 = $(top_srcdir)/aclocal.m4
|
||||||
|
am__aclocal_m4_deps = $(top_srcdir)/m4/ax_check_classpath.m4 \
|
||||||
|
$(top_srcdir)/m4/ax_check_java_home.m4 \
|
||||||
|
$(top_srcdir)/m4/ax_java_options.m4 \
|
||||||
|
$(top_srcdir)/m4/ax_jni_include_dir.m4 \
|
||||||
|
$(top_srcdir)/m4/ax_prog_jar.m4 \
|
||||||
|
$(top_srcdir)/m4/ax_prog_javac.m4 \
|
||||||
|
$(top_srcdir)/m4/ax_prog_javac_works.m4 \
|
||||||
|
$(top_srcdir)/configure.ac
|
||||||
|
am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \
|
||||||
|
$(ACLOCAL_M4)
|
||||||
|
mkinstalldirs = $(install_sh) -d
|
||||||
|
CONFIG_HEADER = $(top_builddir)/config.h
|
||||||
|
CONFIG_CLEAN_FILES =
|
||||||
|
CONFIG_CLEAN_VPATH_FILES =
|
||||||
|
am__vpath_adj_setup = srcdirstrip=`echo "$(srcdir)" | sed 's|.|.|g'`;
|
||||||
|
am__vpath_adj = case $$p in \
|
||||||
|
$(srcdir)/*) f=`echo "$$p" | sed "s|^$$srcdirstrip/||"`;; \
|
||||||
|
*) f=$$p;; \
|
||||||
|
esac;
|
||||||
|
am__strip_dir = f=`echo $$p | sed -e 's|^.*/||'`;
|
||||||
|
am__install_max = 40
|
||||||
|
am__nobase_strip_setup = \
|
||||||
|
srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*|]/\\\\&/g'`
|
||||||
|
am__nobase_strip = \
|
||||||
|
for p in $$list; do echo "$$p"; done | sed -e "s|$$srcdirstrip/||"
|
||||||
|
am__nobase_list = $(am__nobase_strip_setup); \
|
||||||
|
for p in $$list; do echo "$$p $$p"; done | \
|
||||||
|
sed "s| $$srcdirstrip/| |;"' / .*\//!s/ .*/ ./; s,\( .*\)/[^/]*$$,\1,' | \
|
||||||
|
$(AWK) 'BEGIN { files["."] = "" } { files[$$2] = files[$$2] " " $$1; \
|
||||||
|
if (++n[$$2] == $(am__install_max)) \
|
||||||
|
{ print $$2, files[$$2]; n[$$2] = 0; files[$$2] = "" } } \
|
||||||
|
END { for (dir in files) print dir, files[dir] }'
|
||||||
|
am__base_list = \
|
||||||
|
sed '$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;s/\n/ /g' | \
|
||||||
|
sed '$$!N;$$!N;$$!N;$$!N;s/\n/ /g'
|
||||||
|
am__uninstall_files_from_dir = { \
|
||||||
|
test -z "$$files" \
|
||||||
|
|| { test ! -d "$$dir" && test ! -f "$$dir" && test ! -r "$$dir"; } \
|
||||||
|
|| { echo " ( cd '$$dir' && rm -f" $$files ")"; \
|
||||||
|
$(am__cd) "$$dir" && rm -f $$files; }; \
|
||||||
|
}
|
||||||
|
am__installdirs = "$(DESTDIR)$(libdir)" "$(DESTDIR)$(pkgdatadir)" \
|
||||||
|
"$(DESTDIR)$(includedir)"
|
||||||
|
LTLIBRARIES = $(lib_LTLIBRARIES)
|
||||||
|
@ENABLE_JNI_TRUE@libffts_jni_la_DEPENDENCIES = \
|
||||||
|
@ENABLE_JNI_TRUE@ $(top_builddir)/src/libffts.la
|
||||||
|
am__libffts_jni_la_SOURCES_DIST = jni/ffts_jni.c
|
||||||
|
@ENABLE_JNI_TRUE@am_libffts_jni_la_OBJECTS = \
|
||||||
|
@ENABLE_JNI_TRUE@ libffts_jni_la-ffts_jni.lo
|
||||||
|
libffts_jni_la_OBJECTS = $(am_libffts_jni_la_OBJECTS)
|
||||||
|
libffts_jni_la_LINK = $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) \
|
||||||
|
$(LIBTOOLFLAGS) --mode=link $(CCLD) $(libffts_jni_la_CFLAGS) \
|
||||||
|
$(CFLAGS) $(libffts_jni_la_LDFLAGS) $(LDFLAGS) -o $@
|
||||||
|
@ENABLE_JNI_TRUE@am_libffts_jni_la_rpath = -rpath $(libdir)
|
||||||
|
DEFAULT_INCLUDES = -I.@am__isrc@ -I$(top_builddir)
|
||||||
|
depcomp = $(SHELL) $(top_srcdir)/depcomp
|
||||||
|
am__depfiles_maybe = depfiles
|
||||||
|
am__mv = mv -f
|
||||||
|
COMPILE = $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) \
|
||||||
|
$(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS)
|
||||||
|
LTCOMPILE = $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) \
|
||||||
|
--mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) \
|
||||||
|
$(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS)
|
||||||
|
CCLD = $(CC)
|
||||||
|
LINK = $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) \
|
||||||
|
--mode=link $(CCLD) $(AM_CFLAGS) $(CFLAGS) $(AM_LDFLAGS) \
|
||||||
|
$(LDFLAGS) -o $@
|
||||||
|
SOURCES = $(libffts_jni_la_SOURCES)
|
||||||
|
DIST_SOURCES = $(am__libffts_jni_la_SOURCES_DIST)
|
||||||
|
am__can_run_installinfo = \
|
||||||
|
case $$AM_UPDATE_INFO_DIR in \
|
||||||
|
n|no|NO) false;; \
|
||||||
|
*) (install-info --version) >/dev/null 2>&1;; \
|
||||||
|
esac
|
||||||
|
DATA = $(pkgdata_DATA)
|
||||||
|
HEADERS = $(nodist_include_HEADERS)
|
||||||
|
ETAGS = etags
|
||||||
|
CTAGS = ctags
|
||||||
|
DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST)
|
||||||
|
ACLOCAL = @ACLOCAL@
|
||||||
|
AMTAR = @AMTAR@
|
||||||
|
AR = @AR@
|
||||||
|
AUTOCONF = @AUTOCONF@
|
||||||
|
AUTOHEADER = @AUTOHEADER@
|
||||||
|
AUTOMAKE = @AUTOMAKE@
|
||||||
|
AWK = @AWK@
|
||||||
|
CC = @CC@
|
||||||
|
CCAS = @CCAS@
|
||||||
|
CCASDEPMODE = @CCASDEPMODE@
|
||||||
|
CCASFLAGS = @CCASFLAGS@
|
||||||
|
CCDEPMODE = @CCDEPMODE@
|
||||||
|
CFLAGS = @CFLAGS@
|
||||||
|
CPP = @CPP@
|
||||||
|
CPPFLAGS = @CPPFLAGS@
|
||||||
|
CXX = @CXX@
|
||||||
|
CXXCPP = @CXXCPP@
|
||||||
|
CXXDEPMODE = @CXXDEPMODE@
|
||||||
|
CXXFLAGS = @CXXFLAGS@
|
||||||
|
CYGPATH_W = @CYGPATH_W@
|
||||||
|
DEFS = @DEFS@
|
||||||
|
DEPDIR = @DEPDIR@
|
||||||
|
DLLTOOL = @DLLTOOL@
|
||||||
|
DSYMUTIL = @DSYMUTIL@
|
||||||
|
DUMPBIN = @DUMPBIN@
|
||||||
|
ECHO_C = @ECHO_C@
|
||||||
|
ECHO_N = @ECHO_N@
|
||||||
|
ECHO_T = @ECHO_T@
|
||||||
|
EGREP = @EGREP@
|
||||||
|
EXEEXT = @EXEEXT@
|
||||||
|
FGREP = @FGREP@
|
||||||
|
GREP = @GREP@
|
||||||
|
INSTALL = @INSTALL@
|
||||||
|
INSTALL_DATA = @INSTALL_DATA@
|
||||||
|
INSTALL_PROGRAM = @INSTALL_PROGRAM@
|
||||||
|
INSTALL_SCRIPT = @INSTALL_SCRIPT@
|
||||||
|
INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@
|
||||||
|
JAR = @JAR@
|
||||||
|
JAVA = @JAVA@
|
||||||
|
JAVAC = @JAVAC@
|
||||||
|
JAVACFLAGS = @JAVACFLAGS@
|
||||||
|
JAVAFLAGS = @JAVAFLAGS@
|
||||||
|
JAVAPREFIX = @JAVAPREFIX@
|
||||||
|
JAVA_PATH_NAME = @JAVA_PATH_NAME@
|
||||||
|
JNI_CPPFLAGS = @JNI_CPPFLAGS@
|
||||||
|
LD = @LD@
|
||||||
|
LDFLAGS = @LDFLAGS@
|
||||||
|
LIBOBJS = @LIBOBJS@
|
||||||
|
LIBS = @LIBS@
|
||||||
|
LIBTOOL = @LIBTOOL@
|
||||||
|
LIPO = @LIPO@
|
||||||
|
LN_S = @LN_S@
|
||||||
|
LTLIBOBJS = @LTLIBOBJS@
|
||||||
|
MAKEINFO = @MAKEINFO@
|
||||||
|
MANIFEST_TOOL = @MANIFEST_TOOL@
|
||||||
|
MKDIR_P = @MKDIR_P@
|
||||||
|
NM = @NM@
|
||||||
|
NMEDIT = @NMEDIT@
|
||||||
|
OBJDUMP = @OBJDUMP@
|
||||||
|
OBJEXT = @OBJEXT@
|
||||||
|
OTOOL = @OTOOL@
|
||||||
|
OTOOL64 = @OTOOL64@
|
||||||
|
PACKAGE = @PACKAGE@
|
||||||
|
PACKAGE_BUGREPORT = @PACKAGE_BUGREPORT@
|
||||||
|
PACKAGE_NAME = @PACKAGE_NAME@
|
||||||
|
PACKAGE_STRING = @PACKAGE_STRING@
|
||||||
|
PACKAGE_TARNAME = @PACKAGE_TARNAME@
|
||||||
|
PACKAGE_URL = @PACKAGE_URL@
|
||||||
|
PACKAGE_VERSION = @PACKAGE_VERSION@
|
||||||
|
PATH_SEPARATOR = @PATH_SEPARATOR@
|
||||||
|
RANLIB = @RANLIB@
|
||||||
|
SED = @SED@
|
||||||
|
SET_MAKE = @SET_MAKE@
|
||||||
|
SHELL = @SHELL@
|
||||||
|
STRIP = @STRIP@
|
||||||
|
VERSION = @VERSION@
|
||||||
|
_ACJNI_JAVAC = @_ACJNI_JAVAC@
|
||||||
|
abs_builddir = @abs_builddir@
|
||||||
|
abs_srcdir = @abs_srcdir@
|
||||||
|
abs_top_builddir = @abs_top_builddir@
|
||||||
|
abs_top_srcdir = @abs_top_srcdir@
|
||||||
|
ac_ct_AR = @ac_ct_AR@
|
||||||
|
ac_ct_CC = @ac_ct_CC@
|
||||||
|
ac_ct_CXX = @ac_ct_CXX@
|
||||||
|
ac_ct_DUMPBIN = @ac_ct_DUMPBIN@
|
||||||
|
am__include = @am__include@
|
||||||
|
am__leading_dot = @am__leading_dot@
|
||||||
|
am__quote = @am__quote@
|
||||||
|
am__tar = @am__tar@
|
||||||
|
am__untar = @am__untar@
|
||||||
|
bindir = @bindir@
|
||||||
|
build = @build@
|
||||||
|
build_alias = @build_alias@
|
||||||
|
build_cpu = @build_cpu@
|
||||||
|
build_os = @build_os@
|
||||||
|
build_vendor = @build_vendor@
|
||||||
|
builddir = @builddir@
|
||||||
|
datadir = @datadir@
|
||||||
|
datarootdir = @datarootdir@
|
||||||
|
docdir = @docdir@
|
||||||
|
dvidir = @dvidir@
|
||||||
|
exec_prefix = @exec_prefix@
|
||||||
|
host = @host@
|
||||||
|
host_alias = @host_alias@
|
||||||
|
host_cpu = @host_cpu@
|
||||||
|
host_os = @host_os@
|
||||||
|
host_vendor = @host_vendor@
|
||||||
|
htmldir = @htmldir@
|
||||||
|
includedir = @includedir@
|
||||||
|
infodir = @infodir@
|
||||||
|
install_sh = @install_sh@
|
||||||
|
libdir = @libdir@
|
||||||
|
libexecdir = @libexecdir@
|
||||||
|
localedir = @localedir@
|
||||||
|
localstatedir = @localstatedir@
|
||||||
|
mandir = @mandir@
|
||||||
|
mkdir_p = @mkdir_p@
|
||||||
|
oldincludedir = @oldincludedir@
|
||||||
|
pdfdir = @pdfdir@
|
||||||
|
prefix = @prefix@
|
||||||
|
program_transform_name = @program_transform_name@
|
||||||
|
psdir = @psdir@
|
||||||
|
sbindir = @sbindir@
|
||||||
|
sharedstatedir = @sharedstatedir@
|
||||||
|
srcdir = @srcdir@
|
||||||
|
sysconfdir = @sysconfdir@
|
||||||
|
target_alias = @target_alias@
|
||||||
|
top_build_prefix = @top_build_prefix@
|
||||||
|
top_builddir = @top_builddir@
|
||||||
|
top_srcdir = @top_srcdir@
|
||||||
|
@ENABLE_JNI_TRUE@JAVA_SRC = $(shell find $(srcdir)/src -name '*.java')
|
||||||
|
@ENABLE_JNI_TRUE@BUILT_SOURCES = nz_ac_waikato_ffts_FFTS.h
|
||||||
|
@ENABLE_JNI_TRUE@lib_LTLIBRARIES = libffts_jni.la
|
||||||
|
@ENABLE_JNI_TRUE@libffts_jni_la_SOURCES = jni/ffts_jni.c
|
||||||
|
@ENABLE_JNI_TRUE@nodist_include_HEADERS = nz_ac_waikato_ffts_FFTS.h
|
||||||
|
@ENABLE_JNI_TRUE@libffts_jni_la_LIBADD = $(top_builddir)/src/libffts.la
|
||||||
|
@ENABLE_JNI_TRUE@libffts_jni_la_CFLAGS = @JNI_CPPFLAGS@ $(AM_CFLAGS) -I$(top_srcdir)/include
|
||||||
|
@ENABLE_JNI_TRUE@libffts_jni_la_LDFLAGS = -shared
|
||||||
|
@ENABLE_JNI_TRUE@pkgdata_DATA = ffts.jar
|
||||||
|
@ENABLE_JNI_TRUE@CLEANFILES = ffts.jar nz_ac_waikato_ffts_FFTS.h
|
||||||
|
all: $(BUILT_SOURCES)
|
||||||
|
$(MAKE) $(AM_MAKEFLAGS) all-am
|
||||||
|
|
||||||
|
.SUFFIXES:
|
||||||
|
.SUFFIXES: .c .lo .o .obj
|
||||||
|
$(srcdir)/Makefile.in: $(srcdir)/Makefile.am $(am__configure_deps)
|
||||||
|
@for dep in $?; do \
|
||||||
|
case '$(am__configure_deps)' in \
|
||||||
|
*$$dep*) \
|
||||||
|
( cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh ) \
|
||||||
|
&& { if test -f $@; then exit 0; else break; fi; }; \
|
||||||
|
exit 1;; \
|
||||||
|
esac; \
|
||||||
|
done; \
|
||||||
|
echo ' cd $(top_srcdir) && $(AUTOMAKE) --gnu java/Makefile'; \
|
||||||
|
$(am__cd) $(top_srcdir) && \
|
||||||
|
$(AUTOMAKE) --gnu java/Makefile
|
||||||
|
.PRECIOUS: Makefile
|
||||||
|
Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status
|
||||||
|
@case '$?' in \
|
||||||
|
*config.status*) \
|
||||||
|
cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh;; \
|
||||||
|
*) \
|
||||||
|
echo ' cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe)'; \
|
||||||
|
cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe);; \
|
||||||
|
esac;
|
||||||
|
|
||||||
|
$(top_builddir)/config.status: $(top_srcdir)/configure $(CONFIG_STATUS_DEPENDENCIES)
|
||||||
|
cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
|
||||||
|
|
||||||
|
$(top_srcdir)/configure: $(am__configure_deps)
|
||||||
|
cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
|
||||||
|
$(ACLOCAL_M4): $(am__aclocal_m4_deps)
|
||||||
|
cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
|
||||||
|
$(am__aclocal_m4_deps):
|
||||||
|
install-libLTLIBRARIES: $(lib_LTLIBRARIES)
|
||||||
|
@$(NORMAL_INSTALL)
|
||||||
|
@list='$(lib_LTLIBRARIES)'; test -n "$(libdir)" || list=; \
|
||||||
|
list2=; for p in $$list; do \
|
||||||
|
if test -f $$p; then \
|
||||||
|
list2="$$list2 $$p"; \
|
||||||
|
else :; fi; \
|
||||||
|
done; \
|
||||||
|
test -z "$$list2" || { \
|
||||||
|
echo " $(MKDIR_P) '$(DESTDIR)$(libdir)'"; \
|
||||||
|
$(MKDIR_P) "$(DESTDIR)$(libdir)" || exit 1; \
|
||||||
|
echo " $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=install $(INSTALL) $(INSTALL_STRIP_FLAG) $$list2 '$(DESTDIR)$(libdir)'"; \
|
||||||
|
$(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=install $(INSTALL) $(INSTALL_STRIP_FLAG) $$list2 "$(DESTDIR)$(libdir)"; \
|
||||||
|
}
|
||||||
|
|
||||||
|
uninstall-libLTLIBRARIES:
|
||||||
|
@$(NORMAL_UNINSTALL)
|
||||||
|
@list='$(lib_LTLIBRARIES)'; test -n "$(libdir)" || list=; \
|
||||||
|
for p in $$list; do \
|
||||||
|
$(am__strip_dir) \
|
||||||
|
echo " $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=uninstall rm -f '$(DESTDIR)$(libdir)/$$f'"; \
|
||||||
|
$(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=uninstall rm -f "$(DESTDIR)$(libdir)/$$f"; \
|
||||||
|
done
|
||||||
|
|
||||||
|
clean-libLTLIBRARIES:
|
||||||
|
-test -z "$(lib_LTLIBRARIES)" || rm -f $(lib_LTLIBRARIES)
|
||||||
|
@list='$(lib_LTLIBRARIES)'; \
|
||||||
|
locs=`for p in $$list; do echo $$p; done | \
|
||||||
|
sed 's|^[^/]*$$|.|; s|/[^/]*$$||; s|$$|/so_locations|' | \
|
||||||
|
sort -u`; \
|
||||||
|
test -z "$$locs" || { \
|
||||||
|
echo rm -f $${locs}; \
|
||||||
|
rm -f $${locs}; \
|
||||||
|
}
|
||||||
|
libffts_jni.la: $(libffts_jni_la_OBJECTS) $(libffts_jni_la_DEPENDENCIES) $(EXTRA_libffts_jni_la_DEPENDENCIES)
|
||||||
|
$(libffts_jni_la_LINK) $(am_libffts_jni_la_rpath) $(libffts_jni_la_OBJECTS) $(libffts_jni_la_LIBADD) $(LIBS)
|
||||||
|
|
||||||
|
mostlyclean-compile:
|
||||||
|
-rm -f *.$(OBJEXT)
|
||||||
|
|
||||||
|
distclean-compile:
|
||||||
|
-rm -f *.tab.c
|
||||||
|
|
||||||
|
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/libffts_jni_la-ffts_jni.Plo@am__quote@
|
||||||
|
|
||||||
|
.c.o:
|
||||||
|
@am__fastdepCC_TRUE@ $(COMPILE) -MT $@ -MD -MP -MF $(DEPDIR)/$*.Tpo -c -o $@ $<
|
||||||
|
@am__fastdepCC_TRUE@ $(am__mv) $(DEPDIR)/$*.Tpo $(DEPDIR)/$*.Po
|
||||||
|
@AMDEP_TRUE@@am__fastdepCC_FALSE@ source='$<' object='$@' libtool=no @AMDEPBACKSLASH@
|
||||||
|
@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
|
||||||
|
@am__fastdepCC_FALSE@ $(COMPILE) -c $<
|
||||||
|
|
||||||
|
.c.obj:
|
||||||
|
@am__fastdepCC_TRUE@ $(COMPILE) -MT $@ -MD -MP -MF $(DEPDIR)/$*.Tpo -c -o $@ `$(CYGPATH_W) '$<'`
|
||||||
|
@am__fastdepCC_TRUE@ $(am__mv) $(DEPDIR)/$*.Tpo $(DEPDIR)/$*.Po
|
||||||
|
@AMDEP_TRUE@@am__fastdepCC_FALSE@ source='$<' object='$@' libtool=no @AMDEPBACKSLASH@
|
||||||
|
@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
|
||||||
|
@am__fastdepCC_FALSE@ $(COMPILE) -c `$(CYGPATH_W) '$<'`
|
||||||
|
|
||||||
|
.c.lo:
|
||||||
|
@am__fastdepCC_TRUE@ $(LTCOMPILE) -MT $@ -MD -MP -MF $(DEPDIR)/$*.Tpo -c -o $@ $<
|
||||||
|
@am__fastdepCC_TRUE@ $(am__mv) $(DEPDIR)/$*.Tpo $(DEPDIR)/$*.Plo
|
||||||
|
@AMDEP_TRUE@@am__fastdepCC_FALSE@ source='$<' object='$@' libtool=yes @AMDEPBACKSLASH@
|
||||||
|
@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
|
||||||
|
@am__fastdepCC_FALSE@ $(LTCOMPILE) -c -o $@ $<
|
||||||
|
|
||||||
|
libffts_jni_la-ffts_jni.lo: jni/ffts_jni.c
|
||||||
|
@am__fastdepCC_TRUE@ $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libffts_jni_la_CFLAGS) $(CFLAGS) -MT libffts_jni_la-ffts_jni.lo -MD -MP -MF $(DEPDIR)/libffts_jni_la-ffts_jni.Tpo -c -o libffts_jni_la-ffts_jni.lo `test -f 'jni/ffts_jni.c' || echo '$(srcdir)/'`jni/ffts_jni.c
|
||||||
|
@am__fastdepCC_TRUE@ $(am__mv) $(DEPDIR)/libffts_jni_la-ffts_jni.Tpo $(DEPDIR)/libffts_jni_la-ffts_jni.Plo
|
||||||
|
@AMDEP_TRUE@@am__fastdepCC_FALSE@ source='jni/ffts_jni.c' object='libffts_jni_la-ffts_jni.lo' libtool=yes @AMDEPBACKSLASH@
|
||||||
|
@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
|
||||||
|
@am__fastdepCC_FALSE@ $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libffts_jni_la_CFLAGS) $(CFLAGS) -c -o libffts_jni_la-ffts_jni.lo `test -f 'jni/ffts_jni.c' || echo '$(srcdir)/'`jni/ffts_jni.c
|
||||||
|
|
||||||
|
mostlyclean-libtool:
|
||||||
|
-rm -f *.lo
|
||||||
|
|
||||||
|
clean-libtool:
|
||||||
|
-rm -rf .libs _libs
|
||||||
|
install-pkgdataDATA: $(pkgdata_DATA)
|
||||||
|
@$(NORMAL_INSTALL)
|
||||||
|
@list='$(pkgdata_DATA)'; test -n "$(pkgdatadir)" || list=; \
|
||||||
|
if test -n "$$list"; then \
|
||||||
|
echo " $(MKDIR_P) '$(DESTDIR)$(pkgdatadir)'"; \
|
||||||
|
$(MKDIR_P) "$(DESTDIR)$(pkgdatadir)" || exit 1; \
|
||||||
|
fi; \
|
||||||
|
for p in $$list; do \
|
||||||
|
if test -f "$$p"; then d=; else d="$(srcdir)/"; fi; \
|
||||||
|
echo "$$d$$p"; \
|
||||||
|
done | $(am__base_list) | \
|
||||||
|
while read files; do \
|
||||||
|
echo " $(INSTALL_DATA) $$files '$(DESTDIR)$(pkgdatadir)'"; \
|
||||||
|
$(INSTALL_DATA) $$files "$(DESTDIR)$(pkgdatadir)" || exit $$?; \
|
||||||
|
done
|
||||||
|
|
||||||
|
uninstall-pkgdataDATA:
|
||||||
|
@$(NORMAL_UNINSTALL)
|
||||||
|
@list='$(pkgdata_DATA)'; test -n "$(pkgdatadir)" || list=; \
|
||||||
|
files=`for p in $$list; do echo $$p; done | sed -e 's|^.*/||'`; \
|
||||||
|
dir='$(DESTDIR)$(pkgdatadir)'; $(am__uninstall_files_from_dir)
|
||||||
|
install-nodist_includeHEADERS: $(nodist_include_HEADERS)
|
||||||
|
@$(NORMAL_INSTALL)
|
||||||
|
@list='$(nodist_include_HEADERS)'; test -n "$(includedir)" || list=; \
|
||||||
|
if test -n "$$list"; then \
|
||||||
|
echo " $(MKDIR_P) '$(DESTDIR)$(includedir)'"; \
|
||||||
|
$(MKDIR_P) "$(DESTDIR)$(includedir)" || exit 1; \
|
||||||
|
fi; \
|
||||||
|
for p in $$list; do \
|
||||||
|
if test -f "$$p"; then d=; else d="$(srcdir)/"; fi; \
|
||||||
|
echo "$$d$$p"; \
|
||||||
|
done | $(am__base_list) | \
|
||||||
|
while read files; do \
|
||||||
|
echo " $(INSTALL_HEADER) $$files '$(DESTDIR)$(includedir)'"; \
|
||||||
|
$(INSTALL_HEADER) $$files "$(DESTDIR)$(includedir)" || exit $$?; \
|
||||||
|
done
|
||||||
|
|
||||||
|
uninstall-nodist_includeHEADERS:
|
||||||
|
@$(NORMAL_UNINSTALL)
|
||||||
|
@list='$(nodist_include_HEADERS)'; test -n "$(includedir)" || list=; \
|
||||||
|
files=`for p in $$list; do echo $$p; done | sed -e 's|^.*/||'`; \
|
||||||
|
dir='$(DESTDIR)$(includedir)'; $(am__uninstall_files_from_dir)
|
||||||
|
|
||||||
|
ID: $(HEADERS) $(SOURCES) $(LISP) $(TAGS_FILES)
|
||||||
|
list='$(SOURCES) $(HEADERS) $(LISP) $(TAGS_FILES)'; \
|
||||||
|
unique=`for i in $$list; do \
|
||||||
|
if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \
|
||||||
|
done | \
|
||||||
|
$(AWK) '{ files[$$0] = 1; nonempty = 1; } \
|
||||||
|
END { if (nonempty) { for (i in files) print i; }; }'`; \
|
||||||
|
mkid -fID $$unique
|
||||||
|
tags: TAGS
|
||||||
|
|
||||||
|
TAGS: $(HEADERS) $(SOURCES) $(TAGS_DEPENDENCIES) \
|
||||||
|
$(TAGS_FILES) $(LISP)
|
||||||
|
set x; \
|
||||||
|
here=`pwd`; \
|
||||||
|
list='$(SOURCES) $(HEADERS) $(LISP) $(TAGS_FILES)'; \
|
||||||
|
unique=`for i in $$list; do \
|
||||||
|
if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \
|
||||||
|
done | \
|
||||||
|
$(AWK) '{ files[$$0] = 1; nonempty = 1; } \
|
||||||
|
END { if (nonempty) { for (i in files) print i; }; }'`; \
|
||||||
|
shift; \
|
||||||
|
if test -z "$(ETAGS_ARGS)$$*$$unique"; then :; else \
|
||||||
|
test -n "$$unique" || unique=$$empty_fix; \
|
||||||
|
if test $$# -gt 0; then \
|
||||||
|
$(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \
|
||||||
|
"$$@" $$unique; \
|
||||||
|
else \
|
||||||
|
$(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \
|
||||||
|
$$unique; \
|
||||||
|
fi; \
|
||||||
|
fi
|
||||||
|
ctags: CTAGS
|
||||||
|
CTAGS: $(HEADERS) $(SOURCES) $(TAGS_DEPENDENCIES) \
|
||||||
|
$(TAGS_FILES) $(LISP)
|
||||||
|
list='$(SOURCES) $(HEADERS) $(LISP) $(TAGS_FILES)'; \
|
||||||
|
unique=`for i in $$list; do \
|
||||||
|
if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \
|
||||||
|
done | \
|
||||||
|
$(AWK) '{ files[$$0] = 1; nonempty = 1; } \
|
||||||
|
END { if (nonempty) { for (i in files) print i; }; }'`; \
|
||||||
|
test -z "$(CTAGS_ARGS)$$unique" \
|
||||||
|
|| $(CTAGS) $(CTAGSFLAGS) $(AM_CTAGSFLAGS) $(CTAGS_ARGS) \
|
||||||
|
$$unique
|
||||||
|
|
||||||
|
GTAGS:
|
||||||
|
here=`$(am__cd) $(top_builddir) && pwd` \
|
||||||
|
&& $(am__cd) $(top_srcdir) \
|
||||||
|
&& gtags -i $(GTAGS_ARGS) "$$here"
|
||||||
|
|
||||||
|
cscopelist: $(HEADERS) $(SOURCES) $(LISP)
|
||||||
|
list='$(SOURCES) $(HEADERS) $(LISP)'; \
|
||||||
|
case "$(srcdir)" in \
|
||||||
|
[\\/]* | ?:[\\/]*) sdir="$(srcdir)" ;; \
|
||||||
|
*) sdir=$(subdir)/$(srcdir) ;; \
|
||||||
|
esac; \
|
||||||
|
for i in $$list; do \
|
||||||
|
if test -f "$$i"; then \
|
||||||
|
echo "$(subdir)/$$i"; \
|
||||||
|
else \
|
||||||
|
echo "$$sdir/$$i"; \
|
||||||
|
fi; \
|
||||||
|
done >> $(top_builddir)/cscope.files
|
||||||
|
|
||||||
|
distclean-tags:
|
||||||
|
-rm -f TAGS ID GTAGS GRTAGS GSYMS GPATH tags
|
||||||
|
|
||||||
|
distdir: $(DISTFILES)
|
||||||
|
@srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \
|
||||||
|
topsrcdirstrip=`echo "$(top_srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \
|
||||||
|
list='$(DISTFILES)'; \
|
||||||
|
dist_files=`for file in $$list; do echo $$file; done | \
|
||||||
|
sed -e "s|^$$srcdirstrip/||;t" \
|
||||||
|
-e "s|^$$topsrcdirstrip/|$(top_builddir)/|;t"`; \
|
||||||
|
case $$dist_files in \
|
||||||
|
*/*) $(MKDIR_P) `echo "$$dist_files" | \
|
||||||
|
sed '/\//!d;s|^|$(distdir)/|;s,/[^/]*$$,,' | \
|
||||||
|
sort -u` ;; \
|
||||||
|
esac; \
|
||||||
|
for file in $$dist_files; do \
|
||||||
|
if test -f $$file || test -d $$file; then d=.; else d=$(srcdir); fi; \
|
||||||
|
if test -d $$d/$$file; then \
|
||||||
|
dir=`echo "/$$file" | sed -e 's,/[^/]*$$,,'`; \
|
||||||
|
if test -d "$(distdir)/$$file"; then \
|
||||||
|
find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \
|
||||||
|
fi; \
|
||||||
|
if test -d $(srcdir)/$$file && test $$d != $(srcdir); then \
|
||||||
|
cp -fpR $(srcdir)/$$file "$(distdir)$$dir" || exit 1; \
|
||||||
|
find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \
|
||||||
|
fi; \
|
||||||
|
cp -fpR $$d/$$file "$(distdir)$$dir" || exit 1; \
|
||||||
|
else \
|
||||||
|
test -f "$(distdir)/$$file" \
|
||||||
|
|| cp -p $$d/$$file "$(distdir)/$$file" \
|
||||||
|
|| exit 1; \
|
||||||
|
fi; \
|
||||||
|
done
|
||||||
|
check-am: all-am
|
||||||
|
check: $(BUILT_SOURCES)
|
||||||
|
$(MAKE) $(AM_MAKEFLAGS) check-am
|
||||||
|
all-am: Makefile $(LTLIBRARIES) $(DATA) $(HEADERS)
|
||||||
|
installdirs:
|
||||||
|
for dir in "$(DESTDIR)$(libdir)" "$(DESTDIR)$(pkgdatadir)" "$(DESTDIR)$(includedir)"; do \
|
||||||
|
test -z "$$dir" || $(MKDIR_P) "$$dir"; \
|
||||||
|
done
|
||||||
|
install: $(BUILT_SOURCES)
|
||||||
|
$(MAKE) $(AM_MAKEFLAGS) install-am
|
||||||
|
install-exec: install-exec-am
|
||||||
|
install-data: install-data-am
|
||||||
|
uninstall: uninstall-am
|
||||||
|
|
||||||
|
install-am: all-am
|
||||||
|
@$(MAKE) $(AM_MAKEFLAGS) install-exec-am install-data-am
|
||||||
|
|
||||||
|
installcheck: installcheck-am
|
||||||
|
install-strip:
|
||||||
|
if test -z '$(STRIP)'; then \
|
||||||
|
$(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
|
||||||
|
install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
|
||||||
|
install; \
|
||||||
|
else \
|
||||||
|
$(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
|
||||||
|
install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
|
||||||
|
"INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'" install; \
|
||||||
|
fi
|
||||||
|
mostlyclean-generic:
|
||||||
|
|
||||||
|
clean-generic:
|
||||||
|
-test -z "$(CLEANFILES)" || rm -f $(CLEANFILES)
|
||||||
|
|
||||||
|
distclean-generic:
|
||||||
|
-test -z "$(CONFIG_CLEAN_FILES)" || rm -f $(CONFIG_CLEAN_FILES)
|
||||||
|
-test . = "$(srcdir)" || test -z "$(CONFIG_CLEAN_VPATH_FILES)" || rm -f $(CONFIG_CLEAN_VPATH_FILES)
|
||||||
|
|
||||||
|
maintainer-clean-generic:
|
||||||
|
@echo "This command is intended for maintainers to use"
|
||||||
|
@echo "it deletes files that may require special tools to rebuild."
|
||||||
|
-test -z "$(BUILT_SOURCES)" || rm -f $(BUILT_SOURCES)
|
||||||
|
@ENABLE_JNI_FALSE@clean-local:
|
||||||
|
clean: clean-am
|
||||||
|
|
||||||
|
clean-am: clean-generic clean-libLTLIBRARIES clean-libtool clean-local \
|
||||||
|
mostlyclean-am
|
||||||
|
|
||||||
|
distclean: distclean-am
|
||||||
|
-rm -rf ./$(DEPDIR)
|
||||||
|
-rm -f Makefile
|
||||||
|
distclean-am: clean-am distclean-compile distclean-generic \
|
||||||
|
distclean-tags
|
||||||
|
|
||||||
|
dvi: dvi-am
|
||||||
|
|
||||||
|
dvi-am:
|
||||||
|
|
||||||
|
html: html-am
|
||||||
|
|
||||||
|
html-am:
|
||||||
|
|
||||||
|
info: info-am
|
||||||
|
|
||||||
|
info-am:
|
||||||
|
|
||||||
|
install-data-am: install-nodist_includeHEADERS install-pkgdataDATA
|
||||||
|
|
||||||
|
install-dvi: install-dvi-am
|
||||||
|
|
||||||
|
install-dvi-am:
|
||||||
|
|
||||||
|
install-exec-am: install-libLTLIBRARIES
|
||||||
|
|
||||||
|
install-html: install-html-am
|
||||||
|
|
||||||
|
install-html-am:
|
||||||
|
|
||||||
|
install-info: install-info-am
|
||||||
|
|
||||||
|
install-info-am:
|
||||||
|
|
||||||
|
install-man:
|
||||||
|
|
||||||
|
install-pdf: install-pdf-am
|
||||||
|
|
||||||
|
install-pdf-am:
|
||||||
|
|
||||||
|
install-ps: install-ps-am
|
||||||
|
|
||||||
|
install-ps-am:
|
||||||
|
|
||||||
|
installcheck-am:
|
||||||
|
|
||||||
|
maintainer-clean: maintainer-clean-am
|
||||||
|
-rm -rf ./$(DEPDIR)
|
||||||
|
-rm -f Makefile
|
||||||
|
maintainer-clean-am: distclean-am maintainer-clean-generic
|
||||||
|
|
||||||
|
mostlyclean: mostlyclean-am
|
||||||
|
|
||||||
|
mostlyclean-am: mostlyclean-compile mostlyclean-generic \
|
||||||
|
mostlyclean-libtool
|
||||||
|
|
||||||
|
pdf: pdf-am
|
||||||
|
|
||||||
|
pdf-am:
|
||||||
|
|
||||||
|
ps: ps-am
|
||||||
|
|
||||||
|
ps-am:
|
||||||
|
|
||||||
|
uninstall-am: uninstall-libLTLIBRARIES uninstall-nodist_includeHEADERS \
|
||||||
|
uninstall-pkgdataDATA
|
||||||
|
|
||||||
|
.MAKE: all check install install-am install-strip
|
||||||
|
|
||||||
|
.PHONY: CTAGS GTAGS all all-am check check-am clean clean-generic \
|
||||||
|
clean-libLTLIBRARIES clean-libtool clean-local cscopelist \
|
||||||
|
ctags distclean distclean-compile distclean-generic \
|
||||||
|
distclean-libtool distclean-tags distdir dvi dvi-am html \
|
||||||
|
html-am info info-am install install-am install-data \
|
||||||
|
install-data-am install-dvi install-dvi-am install-exec \
|
||||||
|
install-exec-am install-html install-html-am install-info \
|
||||||
|
install-info-am install-libLTLIBRARIES install-man \
|
||||||
|
install-nodist_includeHEADERS install-pdf install-pdf-am \
|
||||||
|
install-pkgdataDATA install-ps install-ps-am install-strip \
|
||||||
|
installcheck installcheck-am installdirs maintainer-clean \
|
||||||
|
maintainer-clean-generic mostlyclean mostlyclean-compile \
|
||||||
|
mostlyclean-generic mostlyclean-libtool pdf pdf-am ps ps-am \
|
||||||
|
tags uninstall uninstall-am uninstall-libLTLIBRARIES \
|
||||||
|
uninstall-nodist_includeHEADERS uninstall-pkgdataDATA
|
||||||
|
|
||||||
|
|
||||||
|
@ENABLE_JNI_TRUE@all: ffts.jar
|
||||||
|
|
||||||
|
@ENABLE_JNI_TRUE@classes ffts.jar: $(JAVA_SRC)
|
||||||
|
@ENABLE_JNI_TRUE@ -rm -rf classes
|
||||||
|
@ENABLE_JNI_TRUE@ mkdir classes
|
||||||
|
@ENABLE_JNI_TRUE@ $(JAVAC) -d classes -sourcepath src $(JAVA_SRC)
|
||||||
|
@ENABLE_JNI_TRUE@ $(JAR) -cf ffts.jar -C classes .
|
||||||
|
|
||||||
|
@ENABLE_JNI_TRUE@nz_ac_waikato_ffts_FFTS.h: classes
|
||||||
|
@ENABLE_JNI_TRUE@ javah -cp $< nz.ac.waikato.ffts.FFTS
|
||||||
|
@ENABLE_JNI_TRUE@clean-local:
|
||||||
|
@ENABLE_JNI_TRUE@ -rm -rf classes
|
||||||
|
|
||||||
|
# Tell versions [3.59,3.63) of GNU make to not export all variables.
|
||||||
|
# Otherwise a system limit (for SysV at least) may be exceeded.
|
||||||
|
.NOEXPORT:
|
@ -0,0 +1,233 @@
|
|||||||
|
/*
|
||||||
|
* This file is part of FFTS -- The Fastest Fourier Transform in the South
|
||||||
|
*
|
||||||
|
* Copyright (c) 2013, Michael Zucchi <notzed@gmail.com>
|
||||||
|
*
|
||||||
|
* All rights reserved.
|
||||||
|
*
|
||||||
|
* Redistribution and use in source and binary forms, with or without
|
||||||
|
* modification, are permitted provided that the following conditions are met:
|
||||||
|
*
|
||||||
|
* * Redistributions of source code must retain the above copyright
|
||||||
|
* notice, this list of conditions and the following disclaimer.
|
||||||
|
* * Redistributions in binary form must reproduce the above copyright
|
||||||
|
* notice, this list of conditions and the following disclaimer in the
|
||||||
|
* documentation and/or other materials provided with the distribution.
|
||||||
|
* * Neither the name of the organization nor the
|
||||||
|
* names of its contributors may be used to endorse or promote products
|
||||||
|
* derived from this software without specific prior written permission.
|
||||||
|
*
|
||||||
|
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
||||||
|
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||||
|
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||||
|
* DISCLAIMED. IN NO EVENT SHALL ANTHONY M. BLAKE BE LIABLE FOR ANY
|
||||||
|
* DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
|
||||||
|
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
||||||
|
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
|
||||||
|
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||||
|
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||||
|
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include "config.h"
|
||||||
|
|
||||||
|
#include <ffts.h>
|
||||||
|
#include <alloca.h>
|
||||||
|
|
||||||
|
// Bit of a hack for android, as we can't build the *.h without
|
||||||
|
// the classes ... but we can't build the project without the jni.
|
||||||
|
#ifdef ANDROID
|
||||||
|
#include <jni.h>
|
||||||
|
#else
|
||||||
|
#include "nz_ac_waikato_ffts_FFTS.h"
|
||||||
|
#endif
|
||||||
|
|
||||||
|
// TODO: feature tests instead
|
||||||
|
#ifdef HAVE_SSE
|
||||||
|
#define NEEDS_ALIGNED
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#ifdef NEEDS_ALIGNED
|
||||||
|
#define ALIGN_MASK 15
|
||||||
|
|
||||||
|
static void *
|
||||||
|
xmemalign(size_t align, size_t size) {
|
||||||
|
#if defined(HAVE_DECL_POSIX_MEMALIGN)
|
||||||
|
void *r;
|
||||||
|
|
||||||
|
if (posix_memalign(&r, align, size) != 0)
|
||||||
|
return NULL;
|
||||||
|
return r;
|
||||||
|
#elif defined(HAVE_DECL_MEMALIGN)
|
||||||
|
return memalign(align, size);
|
||||||
|
#else
|
||||||
|
#error "Require an aligning malloc"
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
|
static void
|
||||||
|
throwOutOfMemoryError(JNIEnv *env, const char *msg) {
|
||||||
|
jclass jc = (*env)->FindClass(env, "java/lang/OutOfMemoryError");
|
||||||
|
|
||||||
|
if (jc)
|
||||||
|
(*env)->ThrowNew(env, jc, msg);
|
||||||
|
}
|
||||||
|
|
||||||
|
JNIEXPORT jlong JNICALL Java_nz_ac_waikato_ffts_FFTS_complex_11d
|
||||||
|
(JNIEnv *env, jclass jc, jint N, jint sign) {
|
||||||
|
ffts_plan_t *plan;
|
||||||
|
|
||||||
|
plan = ffts_init_1d(N, sign);
|
||||||
|
if (!plan)
|
||||||
|
throwOutOfMemoryError(env, NULL);
|
||||||
|
|
||||||
|
return (jlong)plan;
|
||||||
|
}
|
||||||
|
|
||||||
|
JNIEXPORT jlong JNICALL Java_nz_ac_waikato_ffts_FFTS_complex_12d
|
||||||
|
(JNIEnv *env, jclass jc, jint N1, jint N2, jint sign) {
|
||||||
|
ffts_plan_t *plan;
|
||||||
|
|
||||||
|
plan = ffts_init_2d(N1, N2, sign);
|
||||||
|
if (!plan)
|
||||||
|
throwOutOfMemoryError(env, NULL);
|
||||||
|
|
||||||
|
return (jlong)plan;
|
||||||
|
}
|
||||||
|
|
||||||
|
JNIEXPORT jlong JNICALL Java_nz_ac_waikato_ffts_FFTS_complex_1nd
|
||||||
|
(JNIEnv *env, jclass jc, jintArray jNs, jint sign) {
|
||||||
|
ffts_plan_t *plan;
|
||||||
|
int n = (*env)->GetArrayLength(env, jNs);
|
||||||
|
int *cNs;
|
||||||
|
size_t *Ns;
|
||||||
|
int i;
|
||||||
|
|
||||||
|
// Needs to convert java int array to size_t array
|
||||||
|
// Get the int elements and conver to C type
|
||||||
|
Ns = alloca(sizeof(*Ns) * n);
|
||||||
|
cNs = alloca(sizeof(int) * n);
|
||||||
|
(*env)->GetIntArrayRegion(env, jNs, 0, n, cNs);
|
||||||
|
for (i=0;i<n;i++)
|
||||||
|
Ns[i] = cNs[i];
|
||||||
|
|
||||||
|
plan = ffts_init_nd(n, Ns, sign);
|
||||||
|
if (!plan)
|
||||||
|
throwOutOfMemoryError(env, NULL);
|
||||||
|
|
||||||
|
return (jlong)plan;
|
||||||
|
}
|
||||||
|
|
||||||
|
JNIEXPORT jlong JNICALL Java_nz_ac_waikato_ffts_FFTS_real_11d
|
||||||
|
(JNIEnv *env, jclass jc, jint N, jint sign) {
|
||||||
|
ffts_plan_t *plan;
|
||||||
|
|
||||||
|
plan = ffts_init_1d_real(N, sign);
|
||||||
|
if (!plan)
|
||||||
|
throwOutOfMemoryError(env, NULL);
|
||||||
|
|
||||||
|
return (jlong)plan;
|
||||||
|
}
|
||||||
|
|
||||||
|
JNIEXPORT jlong JNICALL Java_nz_ac_waikato_ffts_FFTS_real_12d
|
||||||
|
(JNIEnv *env, jclass jc, jint N1, jint N2, jint sign) {
|
||||||
|
ffts_plan_t *plan;
|
||||||
|
|
||||||
|
plan = ffts_init_2d_real(N1, N2, sign);
|
||||||
|
if (!plan)
|
||||||
|
throwOutOfMemoryError(env, NULL);
|
||||||
|
|
||||||
|
return (jlong)plan;
|
||||||
|
}
|
||||||
|
|
||||||
|
JNIEXPORT jlong JNICALL Java_nz_ac_waikato_ffts_FFTS_real_1nd
|
||||||
|
(JNIEnv *env, jclass jc, jintArray jNs, jint sign) {
|
||||||
|
ffts_plan_t *plan;
|
||||||
|
int n = (*env)->GetArrayLength(env, jNs);
|
||||||
|
int *cNs;
|
||||||
|
size_t *Ns;
|
||||||
|
int i;
|
||||||
|
|
||||||
|
// Needs to convert java int array to size_t array
|
||||||
|
// Get the int elements and conver to C type
|
||||||
|
Ns = alloca(sizeof(*Ns) * n);
|
||||||
|
cNs = alloca(sizeof(int) * n);
|
||||||
|
(*env)->GetIntArrayRegion(env, jNs, 0, n, cNs);
|
||||||
|
for (i=0;i<n;i++)
|
||||||
|
Ns[i] = cNs[i];
|
||||||
|
|
||||||
|
plan = ffts_init_nd_real(n, Ns, sign);
|
||||||
|
if (!plan)
|
||||||
|
throwOutOfMemoryError(env, NULL);
|
||||||
|
|
||||||
|
return (jlong)plan;
|
||||||
|
}
|
||||||
|
|
||||||
|
JNIEXPORT void JNICALL Java_nz_ac_waikato_ffts_FFTS_execute__JJ_3FI_3FI
|
||||||
|
(JNIEnv *env, jclass jc, jlong p, jlong size, jfloatArray jsrc, jint soff, jfloatArray jdst, jint doff) {
|
||||||
|
ffts_plan_t *plan = (ffts_plan_t *)p;
|
||||||
|
|
||||||
|
// TODO: check performance on android/arm
|
||||||
|
#ifdef NEEDS_ALIGNED
|
||||||
|
// On oracle jvm this is faster than GetFloatArrayElements()
|
||||||
|
void *src, *dst;
|
||||||
|
|
||||||
|
src = xmemalign(64, size * 4);
|
||||||
|
if (!src) {
|
||||||
|
throwOutOfMemoryError(env, NULL);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
dst = xmemalign(64, size * 4);
|
||||||
|
if (!dst) {
|
||||||
|
free(src);
|
||||||
|
throwOutOfMemoryError(env, NULL);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
(*env)->GetFloatArrayRegion(env, jsrc, 0, size, src + soff);
|
||||||
|
ffts_execute(plan, src, dst);
|
||||||
|
(*env)->SetFloatArrayRegion(env, jdst, 0, size, dst + doff);
|
||||||
|
|
||||||
|
free(dst);
|
||||||
|
free(src);
|
||||||
|
#else
|
||||||
|
// This is the fastest with oracle jvm, but doesn't work with sse ...
|
||||||
|
void *src = (*env)->GetPrimitiveArrayCritical(env, jsrc, NULL);
|
||||||
|
void *dst = (*env)->GetPrimitiveArrayCritical(env, jdst, NULL);
|
||||||
|
|
||||||
|
ffts_execute(plan, src + soff, dst + doff);
|
||||||
|
|
||||||
|
(*env)->ReleasePrimitiveArrayCritical(env, jdst, dst, 0);
|
||||||
|
(*env)->ReleasePrimitiveArrayCritical(env, jsrc, src, 0);
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#if 0
|
||||||
|
// This is the slowest
|
||||||
|
void *src = (*env)->GetFloatArrayElements(env, jsrc, NULL);
|
||||||
|
void *dst = (*env)->GetFloatArrayElements(env, jdst, NULL);
|
||||||
|
|
||||||
|
ffts_execute(plan, src + soff, dst + doff);
|
||||||
|
|
||||||
|
(*env)->ReleaseFloatArrayElements(env, jdst, dst, 0);
|
||||||
|
(*env)->ReleaseFloatArrayElements(env, jsrc, src, 0);
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
|
JNIEXPORT void JNICALL Java_nz_ac_waikato_ffts_FFTS_execute__JJLjava_nio_FloatBuffer_2Ljava_nio_FloatBuffer_2
|
||||||
|
(JNIEnv *env, jclass jc, jlong p, jlong size, jobject jsrc, jobject jdst) {
|
||||||
|
ffts_plan_t *plan = (ffts_plan_t *)p;
|
||||||
|
void *src = (*env)->GetDirectBufferAddress(env, jsrc);
|
||||||
|
void *dst = (*env)->GetDirectBufferAddress(env, jdst);
|
||||||
|
|
||||||
|
// Bounds checking etc is in java side.
|
||||||
|
|
||||||
|
ffts_execute(plan, src, dst);
|
||||||
|
}
|
||||||
|
|
||||||
|
JNIEXPORT void JNICALL Java_nz_ac_waikato_ffts_FFTS_free
|
||||||
|
(JNIEnv *env, jclass jc, jlong p) {
|
||||||
|
ffts_plan_t *plan = (ffts_plan_t *)p;
|
||||||
|
|
||||||
|
ffts_free(plan);
|
||||||
|
}
|
File diff suppressed because it is too large
Load Diff
@ -0,0 +1,60 @@
|
|||||||
|
# ===========================================================================
|
||||||
|
# http://www.gnu.org/software/autoconf-archive/ax_check_classpath.html
|
||||||
|
# ===========================================================================
|
||||||
|
#
|
||||||
|
# SYNOPSIS
|
||||||
|
#
|
||||||
|
# AX_CHECK_CLASSPATH
|
||||||
|
#
|
||||||
|
# DESCRIPTION
|
||||||
|
#
|
||||||
|
# AX_CHECK_CLASSPATH just displays the CLASSPATH, for the edification of
|
||||||
|
# the user.
|
||||||
|
#
|
||||||
|
# Note: This is part of the set of autoconf M4 macros for Java programs.
|
||||||
|
# It is VERY IMPORTANT that you download the whole set, some macros depend
|
||||||
|
# on other. Unfortunately, the autoconf archive does not support the
|
||||||
|
# concept of set of macros, so I had to break it for submission. The
|
||||||
|
# general documentation, as well as the sample configure.in, is included
|
||||||
|
# in the AX_PROG_JAVA macro.
|
||||||
|
#
|
||||||
|
# LICENSE
|
||||||
|
#
|
||||||
|
# Copyright (c) 2008 Stephane Bortzmeyer <bortzmeyer@pasteur.fr>
|
||||||
|
#
|
||||||
|
# This program is free software; you can redistribute it and/or modify it
|
||||||
|
# under the terms of the GNU General Public License as published by the
|
||||||
|
# Free Software Foundation; either version 2 of the License, or (at your
|
||||||
|
# option) any later version.
|
||||||
|
#
|
||||||
|
# This program is distributed in the hope that it will be useful, but
|
||||||
|
# WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
|
||||||
|
# Public License for more details.
|
||||||
|
#
|
||||||
|
# You should have received a copy of the GNU General Public License along
|
||||||
|
# with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||||
|
#
|
||||||
|
# As a special exception, the respective Autoconf Macro's copyright owner
|
||||||
|
# gives unlimited permission to copy, distribute and modify the configure
|
||||||
|
# scripts that are the output of Autoconf when processing the Macro. You
|
||||||
|
# need not follow the terms of the GNU General Public License when using
|
||||||
|
# or distributing such scripts, even though portions of the text of the
|
||||||
|
# Macro appear in them. The GNU General Public License (GPL) does govern
|
||||||
|
# all other use of the material that constitutes the Autoconf Macro.
|
||||||
|
#
|
||||||
|
# This special exception to the GPL applies to versions of the Autoconf
|
||||||
|
# Macro released by the Autoconf Archive. When you make and distribute a
|
||||||
|
# modified version of the Autoconf Macro, you may extend this special
|
||||||
|
# exception to the GPL to apply to your modified version as well.
|
||||||
|
|
||||||
|
#serial 5
|
||||||
|
|
||||||
|
AU_ALIAS([AC_CHECK_CLASSPATH], [AX_CHECK_CLASSPATH])
|
||||||
|
AC_DEFUN([AX_CHECK_CLASSPATH],[
|
||||||
|
if test "x$CLASSPATH" = x; then
|
||||||
|
echo "You have no CLASSPATH, I hope it is good"
|
||||||
|
else
|
||||||
|
echo "You have CLASSPATH $CLASSPATH, hope it is correct"
|
||||||
|
fi
|
||||||
|
])
|
@ -0,0 +1,80 @@
|
|||||||
|
# ===========================================================================
|
||||||
|
# http://www.gnu.org/software/autoconf-archive/ax_check_java_home.html
|
||||||
|
# ===========================================================================
|
||||||
|
#
|
||||||
|
# SYNOPSIS
|
||||||
|
#
|
||||||
|
# AX_CHECK_JAVA_HOME
|
||||||
|
#
|
||||||
|
# DESCRIPTION
|
||||||
|
#
|
||||||
|
# Check for Sun Java (JDK / JRE) installation, where the 'java' VM is in.
|
||||||
|
# If found, set environment variable JAVA_HOME = Java installation home,
|
||||||
|
# else left JAVA_HOME untouch, which in most case means JAVA_HOME is
|
||||||
|
# empty.
|
||||||
|
#
|
||||||
|
# LICENSE
|
||||||
|
#
|
||||||
|
# Copyright (c) 2008 Gleen Salmon <gleensalmon@yahoo.com>
|
||||||
|
#
|
||||||
|
# This program is free software; you can redistribute it and/or modify it
|
||||||
|
# under the terms of the GNU General Public License as published by the
|
||||||
|
# Free Software Foundation; either version 2 of the License, or (at your
|
||||||
|
# option) any later version.
|
||||||
|
#
|
||||||
|
# This program is distributed in the hope that it will be useful, but
|
||||||
|
# WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
|
||||||
|
# Public License for more details.
|
||||||
|
#
|
||||||
|
# You should have received a copy of the GNU General Public License along
|
||||||
|
# with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||||
|
#
|
||||||
|
# As a special exception, the respective Autoconf Macro's copyright owner
|
||||||
|
# gives unlimited permission to copy, distribute and modify the configure
|
||||||
|
# scripts that are the output of Autoconf when processing the Macro. You
|
||||||
|
# need not follow the terms of the GNU General Public License when using
|
||||||
|
# or distributing such scripts, even though portions of the text of the
|
||||||
|
# Macro appear in them. The GNU General Public License (GPL) does govern
|
||||||
|
# all other use of the material that constitutes the Autoconf Macro.
|
||||||
|
#
|
||||||
|
# This special exception to the GPL applies to versions of the Autoconf
|
||||||
|
# Macro released by the Autoconf Archive. When you make and distribute a
|
||||||
|
# modified version of the Autoconf Macro, you may extend this special
|
||||||
|
# exception to the GPL to apply to your modified version as well.
|
||||||
|
|
||||||
|
#serial 6
|
||||||
|
|
||||||
|
AU_ALIAS([AC_CHECK_JAVA_HOME], [AX_CHECK_JAVA_HOME])
|
||||||
|
|
||||||
|
AC_DEFUN([AX_CHECK_JAVA_HOME],
|
||||||
|
[AC_MSG_CHECKING([for JAVA_HOME])
|
||||||
|
# We used a fake loop so that we can use "break" to exit when the result
|
||||||
|
# is found.
|
||||||
|
while true
|
||||||
|
do
|
||||||
|
# If the user defined JAVA_HOME, don't touch it.
|
||||||
|
test "${JAVA_HOME+set}" = set && break
|
||||||
|
|
||||||
|
# On Mac OS X 10.5 and following, run /usr/libexec/java_home to get
|
||||||
|
# the value of JAVA_HOME to use.
|
||||||
|
# (http://developer.apple.com/library/mac/#qa/qa2001/qa1170.html).
|
||||||
|
JAVA_HOME=`/usr/libexec/java_home 2>/dev/null`
|
||||||
|
test x"$JAVA_HOME" != x && break
|
||||||
|
|
||||||
|
# See if we can find the java executable, and compute from there.
|
||||||
|
TRY_JAVA_HOME=`ls -dr /usr/java/* 2> /dev/null | head -n 1`
|
||||||
|
if test x$TRY_JAVA_HOME != x; then
|
||||||
|
PATH=$PATH:$TRY_JAVA_HOME/bin
|
||||||
|
fi
|
||||||
|
AC_PATH_PROG([JAVA_PATH_NAME], [java])
|
||||||
|
if test "x$JAVA_PATH_NAME" != x; then
|
||||||
|
JAVA_HOME=`echo $JAVA_PATH_NAME | sed "s/\(.*\)[[/]]bin[[/]]java.*/\1/"`
|
||||||
|
break
|
||||||
|
fi
|
||||||
|
|
||||||
|
AC_MSG_NOTICE([Could not compute JAVA_HOME])
|
||||||
|
break
|
||||||
|
done
|
||||||
|
AC_MSG_RESULT([$JAVA_HOME])
|
||||||
|
])
|
@ -0,0 +1,48 @@
|
|||||||
|
# ===========================================================================
|
||||||
|
# http://www.gnu.org/software/autoconf-archive/ax_java_options.html
|
||||||
|
# ===========================================================================
|
||||||
|
#
|
||||||
|
# SYNOPSIS
|
||||||
|
#
|
||||||
|
# AX_JAVA_OPTIONS
|
||||||
|
#
|
||||||
|
# DESCRIPTION
|
||||||
|
#
|
||||||
|
# AX_JAVA_OPTIONS adds configure command line options used for Java m4
|
||||||
|
# macros. This Macro is optional.
|
||||||
|
#
|
||||||
|
# Note: This is part of the set of autoconf M4 macros for Java programs.
|
||||||
|
# It is VERY IMPORTANT that you download the whole set, some macros depend
|
||||||
|
# on other. Unfortunately, the autoconf archive does not support the
|
||||||
|
# concept of set of macros, so I had to break it for submission. The
|
||||||
|
# general documentation, as well as the sample configure.in, is included
|
||||||
|
# in the AX_PROG_JAVA macro.
|
||||||
|
#
|
||||||
|
# LICENSE
|
||||||
|
#
|
||||||
|
# Copyright (c) 2008 Devin Weaver <ktohg@tritarget.com>
|
||||||
|
#
|
||||||
|
# Copying and distribution of this file, with or without modification, are
|
||||||
|
# permitted in any medium without royalty provided the copyright notice
|
||||||
|
# and this notice are preserved. This file is offered as-is, without any
|
||||||
|
# warranty.
|
||||||
|
|
||||||
|
#serial 6
|
||||||
|
|
||||||
|
AU_ALIAS([AC_JAVA_OPTIONS], [AX_JAVA_OPTIONS])
|
||||||
|
AC_DEFUN([AX_JAVA_OPTIONS],[
|
||||||
|
AC_ARG_WITH(java-prefix,
|
||||||
|
[ --with-java-prefix=PFX prefix where Java runtime is installed (optional)])
|
||||||
|
AC_ARG_WITH(javac-flags,
|
||||||
|
[ --with-javac-flags=FLAGS flags to pass to the Java compiler (optional)])
|
||||||
|
AC_ARG_WITH(java-flags,
|
||||||
|
[ --with-java-flags=FLAGS flags to pass to the Java VM (optional)])
|
||||||
|
JAVAPREFIX=$with_java_prefix
|
||||||
|
JAVACFLAGS=$with_javac_flags
|
||||||
|
JAVAFLAGS=$with_java_flags
|
||||||
|
AC_SUBST(JAVAPREFIX)dnl
|
||||||
|
AC_SUBST(JAVACFLAGS)dnl
|
||||||
|
AC_SUBST(JAVAFLAGS)dnl
|
||||||
|
AC_SUBST(JAVA)dnl
|
||||||
|
AC_SUBST(JAVAC)dnl
|
||||||
|
])
|
@ -0,0 +1,120 @@
|
|||||||
|
# ===========================================================================
|
||||||
|
# http://www.gnu.org/software/autoconf-archive/ax_jni_include_dir.html
|
||||||
|
# ===========================================================================
|
||||||
|
#
|
||||||
|
# SYNOPSIS
|
||||||
|
#
|
||||||
|
# AX_JNI_INCLUDE_DIR
|
||||||
|
#
|
||||||
|
# DESCRIPTION
|
||||||
|
#
|
||||||
|
# AX_JNI_INCLUDE_DIR finds include directories needed for compiling
|
||||||
|
# programs using the JNI interface.
|
||||||
|
#
|
||||||
|
# JNI include directories are usually in the java distribution This is
|
||||||
|
# deduced from the value of JAVAC. When this macro completes, a list of
|
||||||
|
# directories is left in the variable JNI_INCLUDE_DIRS.
|
||||||
|
#
|
||||||
|
# Example usage follows:
|
||||||
|
#
|
||||||
|
# AX_JNI_INCLUDE_DIR
|
||||||
|
#
|
||||||
|
# for JNI_INCLUDE_DIR in $JNI_INCLUDE_DIRS
|
||||||
|
# do
|
||||||
|
# CPPFLAGS="$CPPFLAGS -I$JNI_INCLUDE_DIR"
|
||||||
|
# done
|
||||||
|
#
|
||||||
|
# If you want to force a specific compiler:
|
||||||
|
#
|
||||||
|
# - at the configure.in level, set JAVAC=yourcompiler before calling
|
||||||
|
# AX_JNI_INCLUDE_DIR
|
||||||
|
#
|
||||||
|
# - at the configure level, setenv JAVAC
|
||||||
|
#
|
||||||
|
# Note: This macro can work with the autoconf M4 macros for Java programs.
|
||||||
|
# This particular macro is not part of the original set of macros.
|
||||||
|
#
|
||||||
|
# LICENSE
|
||||||
|
#
|
||||||
|
# Copyright (c) 2008 Don Anderson <dda@sleepycat.com>
|
||||||
|
#
|
||||||
|
# Copying and distribution of this file, with or without modification, are
|
||||||
|
# permitted in any medium without royalty provided the copyright notice
|
||||||
|
# and this notice are preserved. This file is offered as-is, without any
|
||||||
|
# warranty.
|
||||||
|
|
||||||
|
#serial 8
|
||||||
|
|
||||||
|
AU_ALIAS([AC_JNI_INCLUDE_DIR], [AX_JNI_INCLUDE_DIR])
|
||||||
|
AC_DEFUN([AX_JNI_INCLUDE_DIR],[
|
||||||
|
|
||||||
|
JNI_INCLUDE_DIRS=""
|
||||||
|
|
||||||
|
test "x$JAVAC" = x && AC_MSG_ERROR(['\$JAVAC' undefined])
|
||||||
|
AC_PATH_PROG([_ACJNI_JAVAC], [$JAVAC], [no])
|
||||||
|
test "x$_ACJNI_JAVAC" = xno && AC_MSG_ERROR([$JAVAC could not be found in path])
|
||||||
|
|
||||||
|
_ACJNI_FOLLOW_SYMLINKS("$_ACJNI_JAVAC")
|
||||||
|
_JTOPDIR=`echo "$_ACJNI_FOLLOWED" | sed -e 's://*:/:g' -e 's:/[[^/]]*$::'`
|
||||||
|
case "$host_os" in
|
||||||
|
darwin*) _JTOPDIR=`echo "$_JTOPDIR" | sed -e 's:/[[^/]]*$::'`
|
||||||
|
_JINC="$_JTOPDIR/Headers";;
|
||||||
|
*) _JINC="$_JTOPDIR/include";;
|
||||||
|
esac
|
||||||
|
_AS_ECHO_LOG([_JTOPDIR=$_JTOPDIR])
|
||||||
|
_AS_ECHO_LOG([_JINC=$_JINC])
|
||||||
|
|
||||||
|
# On Mac OS X 10.6.4, jni.h is a symlink:
|
||||||
|
# /System/Library/Frameworks/JavaVM.framework/Versions/Current/Headers/jni.h
|
||||||
|
# -> ../../CurrentJDK/Headers/jni.h.
|
||||||
|
if test -f "$_JINC/jni.h" || test -L "$_JINC/jni.h"; then
|
||||||
|
JNI_INCLUDE_DIRS="$JNI_INCLUDE_DIRS $_JINC"
|
||||||
|
else
|
||||||
|
_JTOPDIR=`echo "$_JTOPDIR" | sed -e 's:/[[^/]]*$::'`
|
||||||
|
if test -f "$_JTOPDIR/include/jni.h"; then
|
||||||
|
JNI_INCLUDE_DIRS="$JNI_INCLUDE_DIRS $_JTOPDIR/include"
|
||||||
|
else
|
||||||
|
AC_MSG_ERROR([cannot find java include files])
|
||||||
|
fi
|
||||||
|
fi
|
||||||
|
|
||||||
|
# get the likely subdirectories for system specific java includes
|
||||||
|
case "$host_os" in
|
||||||
|
bsdi*) _JNI_INC_SUBDIRS="bsdos";;
|
||||||
|
freebsd*) _JNI_INC_SUBDIRS="freebsd";;
|
||||||
|
linux*) _JNI_INC_SUBDIRS="linux genunix";;
|
||||||
|
osf*) _JNI_INC_SUBDIRS="alpha";;
|
||||||
|
solaris*) _JNI_INC_SUBDIRS="solaris";;
|
||||||
|
mingw*) _JNI_INC_SUBDIRS="win32";;
|
||||||
|
cygwin*) _JNI_INC_SUBDIRS="win32";;
|
||||||
|
*) _JNI_INC_SUBDIRS="genunix";;
|
||||||
|
esac
|
||||||
|
|
||||||
|
# add any subdirectories that are present
|
||||||
|
for JINCSUBDIR in $_JNI_INC_SUBDIRS
|
||||||
|
do
|
||||||
|
if test -d "$_JTOPDIR/include/$JINCSUBDIR"; then
|
||||||
|
JNI_INCLUDE_DIRS="$JNI_INCLUDE_DIRS $_JTOPDIR/include/$JINCSUBDIR"
|
||||||
|
fi
|
||||||
|
done
|
||||||
|
])
|
||||||
|
|
||||||
|
# _ACJNI_FOLLOW_SYMLINKS <path>
|
||||||
|
# Follows symbolic links on <path>,
|
||||||
|
# finally setting variable _ACJNI_FOLLOWED
|
||||||
|
# ----------------------------------------
|
||||||
|
AC_DEFUN([_ACJNI_FOLLOW_SYMLINKS],[
|
||||||
|
# find the include directory relative to the javac executable
|
||||||
|
_cur="$1"
|
||||||
|
while ls -ld "$_cur" 2>/dev/null | grep " -> " >/dev/null; do
|
||||||
|
AC_MSG_CHECKING([symlink for $_cur])
|
||||||
|
_slink=`ls -ld "$_cur" | sed 's/.* -> //'`
|
||||||
|
case "$_slink" in
|
||||||
|
/*) _cur="$_slink";;
|
||||||
|
# 'X' avoids triggering unwanted echo options.
|
||||||
|
*) _cur=`echo "X$_cur" | sed -e 's/^X//' -e 's:[[^/]]*$::'`"$_slink";;
|
||||||
|
esac
|
||||||
|
AC_MSG_RESULT([$_cur])
|
||||||
|
done
|
||||||
|
_ACJNI_FOLLOWED="$_cur"
|
||||||
|
])# _ACJNI
|
@ -0,0 +1,52 @@
|
|||||||
|
# ===========================================================================
|
||||||
|
# http://www.gnu.org/software/autoconf-archive/ax_prog_jar.html
|
||||||
|
# ===========================================================================
|
||||||
|
#
|
||||||
|
# SYNOPSIS
|
||||||
|
#
|
||||||
|
# AX_PROG_JAR
|
||||||
|
#
|
||||||
|
# DESCRIPTION
|
||||||
|
#
|
||||||
|
# AX_PROG_JAR tests for an existing jar program. It uses the environment
|
||||||
|
# variable JAR then tests in sequence various common jar programs.
|
||||||
|
#
|
||||||
|
# If you want to force a specific compiler:
|
||||||
|
#
|
||||||
|
# - at the configure.in level, set JAR=yourcompiler before calling
|
||||||
|
# AX_PROG_JAR
|
||||||
|
#
|
||||||
|
# - at the configure level, setenv JAR
|
||||||
|
#
|
||||||
|
# You can use the JAR variable in your Makefile.in, with @JAR@.
|
||||||
|
#
|
||||||
|
# Note: This macro depends on the autoconf M4 macros for Java programs. It
|
||||||
|
# is VERY IMPORTANT that you download that whole set, some macros depend
|
||||||
|
# on other. Unfortunately, the autoconf archive does not support the
|
||||||
|
# concept of set of macros, so I had to break it for submission.
|
||||||
|
#
|
||||||
|
# The general documentation of those macros, as well as the sample
|
||||||
|
# configure.in, is included in the AX_PROG_JAVA macro.
|
||||||
|
#
|
||||||
|
# LICENSE
|
||||||
|
#
|
||||||
|
# Copyright (c) 2008 Egon Willighagen <e.willighagen@science.ru.nl>
|
||||||
|
#
|
||||||
|
# Copying and distribution of this file, with or without modification, are
|
||||||
|
# permitted in any medium without royalty provided the copyright notice
|
||||||
|
# and this notice are preserved. This file is offered as-is, without any
|
||||||
|
# warranty.
|
||||||
|
|
||||||
|
#serial 6
|
||||||
|
|
||||||
|
AU_ALIAS([AC_PROG_JAR], [AX_PROG_JAR])
|
||||||
|
AC_DEFUN([AX_PROG_JAR],[
|
||||||
|
AC_REQUIRE([AC_EXEEXT])dnl
|
||||||
|
if test "x$JAVAPREFIX" = x; then
|
||||||
|
test "x$JAR" = x && AC_CHECK_PROGS(JAR, jar$EXEEXT)
|
||||||
|
else
|
||||||
|
test "x$JAR" = x && AC_CHECK_PROGS(JAR, jar, $JAVAPREFIX)
|
||||||
|
fi
|
||||||
|
test "x$JAR" = x && AC_MSG_ERROR([no acceptable jar program found in \$PATH])
|
||||||
|
AC_PROVIDE([$0])dnl
|
||||||
|
])
|
@ -0,0 +1,79 @@
|
|||||||
|
# ===========================================================================
|
||||||
|
# http://www.gnu.org/software/autoconf-archive/ax_prog_javac.html
|
||||||
|
# ===========================================================================
|
||||||
|
#
|
||||||
|
# SYNOPSIS
|
||||||
|
#
|
||||||
|
# AX_PROG_JAVAC
|
||||||
|
#
|
||||||
|
# DESCRIPTION
|
||||||
|
#
|
||||||
|
# AX_PROG_JAVAC tests an existing Java compiler. It uses the environment
|
||||||
|
# variable JAVAC then tests in sequence various common Java compilers. For
|
||||||
|
# political reasons, it starts with the free ones.
|
||||||
|
#
|
||||||
|
# If you want to force a specific compiler:
|
||||||
|
#
|
||||||
|
# - at the configure.in level, set JAVAC=yourcompiler before calling
|
||||||
|
# AX_PROG_JAVAC
|
||||||
|
#
|
||||||
|
# - at the configure level, setenv JAVAC
|
||||||
|
#
|
||||||
|
# You can use the JAVAC variable in your Makefile.in, with @JAVAC@.
|
||||||
|
#
|
||||||
|
# *Warning*: its success or failure can depend on a proper setting of the
|
||||||
|
# CLASSPATH env. variable.
|
||||||
|
#
|
||||||
|
# TODO: allow to exclude compilers (rationale: most Java programs cannot
|
||||||
|
# compile with some compilers like guavac).
|
||||||
|
#
|
||||||
|
# Note: This is part of the set of autoconf M4 macros for Java programs.
|
||||||
|
# It is VERY IMPORTANT that you download the whole set, some macros depend
|
||||||
|
# on other. Unfortunately, the autoconf archive does not support the
|
||||||
|
# concept of set of macros, so I had to break it for submission. The
|
||||||
|
# general documentation, as well as the sample configure.in, is included
|
||||||
|
# in the AX_PROG_JAVA macro.
|
||||||
|
#
|
||||||
|
# LICENSE
|
||||||
|
#
|
||||||
|
# Copyright (c) 2008 Stephane Bortzmeyer <bortzmeyer@pasteur.fr>
|
||||||
|
#
|
||||||
|
# This program is free software; you can redistribute it and/or modify it
|
||||||
|
# under the terms of the GNU General Public License as published by the
|
||||||
|
# Free Software Foundation; either version 2 of the License, or (at your
|
||||||
|
# option) any later version.
|
||||||
|
#
|
||||||
|
# This program is distributed in the hope that it will be useful, but
|
||||||
|
# WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
|
||||||
|
# Public License for more details.
|
||||||
|
#
|
||||||
|
# You should have received a copy of the GNU General Public License along
|
||||||
|
# with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||||
|
#
|
||||||
|
# As a special exception, the respective Autoconf Macro's copyright owner
|
||||||
|
# gives unlimited permission to copy, distribute and modify the configure
|
||||||
|
# scripts that are the output of Autoconf when processing the Macro. You
|
||||||
|
# need not follow the terms of the GNU General Public License when using
|
||||||
|
# or distributing such scripts, even though portions of the text of the
|
||||||
|
# Macro appear in them. The GNU General Public License (GPL) does govern
|
||||||
|
# all other use of the material that constitutes the Autoconf Macro.
|
||||||
|
#
|
||||||
|
# This special exception to the GPL applies to versions of the Autoconf
|
||||||
|
# Macro released by the Autoconf Archive. When you make and distribute a
|
||||||
|
# modified version of the Autoconf Macro, you may extend this special
|
||||||
|
# exception to the GPL to apply to your modified version as well.
|
||||||
|
|
||||||
|
#serial 6
|
||||||
|
|
||||||
|
AU_ALIAS([AC_PROG_JAVAC], [AX_PROG_JAVAC])
|
||||||
|
AC_DEFUN([AX_PROG_JAVAC],[
|
||||||
|
if test "x$JAVAPREFIX" = x; then
|
||||||
|
test "x$JAVAC" = x && AC_CHECK_PROGS(JAVAC, "gcj -C" guavac jikes javac)
|
||||||
|
else
|
||||||
|
test "x$JAVAC" = x && AC_CHECK_PROGS(JAVAC, "gcj -C" guavac jikes javac, $JAVAPREFIX)
|
||||||
|
fi
|
||||||
|
test "x$JAVAC" = x && AC_MSG_ERROR([no acceptable Java compiler found in \$PATH])
|
||||||
|
AX_PROG_JAVAC_WORKS
|
||||||
|
AC_PROVIDE([$0])dnl
|
||||||
|
])
|
@ -0,0 +1,72 @@
|
|||||||
|
# ===========================================================================
|
||||||
|
# http://www.gnu.org/software/autoconf-archive/ax_prog_javac_works.html
|
||||||
|
# ===========================================================================
|
||||||
|
#
|
||||||
|
# SYNOPSIS
|
||||||
|
#
|
||||||
|
# AX_PROG_JAVAC_WORKS
|
||||||
|
#
|
||||||
|
# DESCRIPTION
|
||||||
|
#
|
||||||
|
# Internal use ONLY.
|
||||||
|
#
|
||||||
|
# Note: This is part of the set of autoconf M4 macros for Java programs.
|
||||||
|
# It is VERY IMPORTANT that you download the whole set, some macros depend
|
||||||
|
# on other. Unfortunately, the autoconf archive does not support the
|
||||||
|
# concept of set of macros, so I had to break it for submission. The
|
||||||
|
# general documentation, as well as the sample configure.in, is included
|
||||||
|
# in the AX_PROG_JAVA macro.
|
||||||
|
#
|
||||||
|
# LICENSE
|
||||||
|
#
|
||||||
|
# Copyright (c) 2008 Stephane Bortzmeyer <bortzmeyer@pasteur.fr>
|
||||||
|
#
|
||||||
|
# This program is free software; you can redistribute it and/or modify it
|
||||||
|
# under the terms of the GNU General Public License as published by the
|
||||||
|
# Free Software Foundation; either version 2 of the License, or (at your
|
||||||
|
# option) any later version.
|
||||||
|
#
|
||||||
|
# This program is distributed in the hope that it will be useful, but
|
||||||
|
# WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
|
||||||
|
# Public License for more details.
|
||||||
|
#
|
||||||
|
# You should have received a copy of the GNU General Public License along
|
||||||
|
# with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||||
|
#
|
||||||
|
# As a special exception, the respective Autoconf Macro's copyright owner
|
||||||
|
# gives unlimited permission to copy, distribute and modify the configure
|
||||||
|
# scripts that are the output of Autoconf when processing the Macro. You
|
||||||
|
# need not follow the terms of the GNU General Public License when using
|
||||||
|
# or distributing such scripts, even though portions of the text of the
|
||||||
|
# Macro appear in them. The GNU General Public License (GPL) does govern
|
||||||
|
# all other use of the material that constitutes the Autoconf Macro.
|
||||||
|
#
|
||||||
|
# This special exception to the GPL applies to versions of the Autoconf
|
||||||
|
# Macro released by the Autoconf Archive. When you make and distribute a
|
||||||
|
# modified version of the Autoconf Macro, you may extend this special
|
||||||
|
# exception to the GPL to apply to your modified version as well.
|
||||||
|
|
||||||
|
#serial 6
|
||||||
|
|
||||||
|
AU_ALIAS([AC_PROG_JAVAC_WORKS], [AX_PROG_JAVAC_WORKS])
|
||||||
|
AC_DEFUN([AX_PROG_JAVAC_WORKS],[
|
||||||
|
AC_CACHE_CHECK([if $JAVAC works], ac_cv_prog_javac_works, [
|
||||||
|
JAVA_TEST=Test.java
|
||||||
|
CLASS_TEST=Test.class
|
||||||
|
cat << \EOF > $JAVA_TEST
|
||||||
|
/* [#]line __oline__ "configure" */
|
||||||
|
public class Test {
|
||||||
|
}
|
||||||
|
EOF
|
||||||
|
if AC_TRY_COMMAND($JAVAC $JAVACFLAGS $JAVA_TEST) >/dev/null 2>&1; then
|
||||||
|
ac_cv_prog_javac_works=yes
|
||||||
|
else
|
||||||
|
AC_MSG_ERROR([The Java compiler $JAVAC failed (see config.log, check the CLASSPATH?)])
|
||||||
|
echo "configure: failed program was:" >&AS_MESSAGE_LOG_FD
|
||||||
|
cat $JAVA_TEST >&AS_MESSAGE_LOG_FD
|
||||||
|
fi
|
||||||
|
rm -f $JAVA_TEST $CLASS_TEST
|
||||||
|
])
|
||||||
|
AC_PROVIDE([$0])dnl
|
||||||
|
])
|
@ -0,0 +1,330 @@
|
|||||||
|
#! /bin/sh
|
||||||
|
# Common stub for a few missing GNU programs while installing.
|
||||||
|
|
||||||
|
scriptversion=2012-01-06.18; # UTC
|
||||||
|
|
||||||
|
# Copyright (C) 1996-2012 Free Software Foundation, Inc.
|
||||||
|
# Originally by Fran,cois Pinard <pinard@iro.umontreal.ca>, 1996.
|
||||||
|
|
||||||
|
# This program is free software; you can redistribute it and/or modify
|
||||||
|
# it under the terms of the GNU General Public License as published by
|
||||||
|
# the Free Software Foundation; either version 2, or (at your option)
|
||||||
|
# any later version.
|
||||||
|
|
||||||
|
# This program is distributed in the hope that it will be useful,
|
||||||
|
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
# GNU General Public License for more details.
|
||||||
|
|
||||||
|
# You should have received a copy of the GNU General Public License
|
||||||
|
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||||
|
|
||||||
|
# As a special exception to the GNU General Public License, if you
|
||||||
|
# distribute this file as part of a program that contains a
|
||||||
|
# configuration script generated by Autoconf, you may include it under
|
||||||
|
# the same distribution terms that you use for the rest of that program.
|
||||||
|
|
||||||
|
if test $# -eq 0; then
|
||||||
|
echo 1>&2 "Try '$0 --help' for more information"
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
run=:
|
||||||
|
sed_output='s/.* --output[ =]\([^ ]*\).*/\1/p'
|
||||||
|
sed_minuso='s/.* -o \([^ ]*\).*/\1/p'
|
||||||
|
|
||||||
|
# In the cases where this matters, 'missing' is being run in the
|
||||||
|
# srcdir already.
|
||||||
|
if test -f configure.ac; then
|
||||||
|
configure_ac=configure.ac
|
||||||
|
else
|
||||||
|
configure_ac=configure.in
|
||||||
|
fi
|
||||||
|
|
||||||
|
msg="missing on your system"
|
||||||
|
|
||||||
|
case $1 in
|
||||||
|
--run)
|
||||||
|
# Try to run requested program, and just exit if it succeeds.
|
||||||
|
run=
|
||||||
|
shift
|
||||||
|
"$@" && exit 0
|
||||||
|
# Exit code 63 means version mismatch. This often happens
|
||||||
|
# when the user try to use an ancient version of a tool on
|
||||||
|
# a file that requires a minimum version. In this case we
|
||||||
|
# we should proceed has if the program had been absent, or
|
||||||
|
# if --run hadn't been passed.
|
||||||
|
if test $? = 63; then
|
||||||
|
run=:
|
||||||
|
msg="probably too old"
|
||||||
|
fi
|
||||||
|
;;
|
||||||
|
|
||||||
|
-h|--h|--he|--hel|--help)
|
||||||
|
echo "\
|
||||||
|
$0 [OPTION]... PROGRAM [ARGUMENT]...
|
||||||
|
|
||||||
|
Handle 'PROGRAM [ARGUMENT]...' for when PROGRAM is missing, or return an
|
||||||
|
error status if there is no known handling for PROGRAM.
|
||||||
|
|
||||||
|
Options:
|
||||||
|
-h, --help display this help and exit
|
||||||
|
-v, --version output version information and exit
|
||||||
|
--run try to run the given command, and emulate it if it fails
|
||||||
|
|
||||||
|
Supported PROGRAM values:
|
||||||
|
aclocal touch file 'aclocal.m4'
|
||||||
|
autoconf touch file 'configure'
|
||||||
|
autoheader touch file 'config.h.in'
|
||||||
|
autom4te touch the output file, or create a stub one
|
||||||
|
automake touch all 'Makefile.in' files
|
||||||
|
bison create 'y.tab.[ch]', if possible, from existing .[ch]
|
||||||
|
flex create 'lex.yy.c', if possible, from existing .c
|
||||||
|
help2man touch the output file
|
||||||
|
lex create 'lex.yy.c', if possible, from existing .c
|
||||||
|
makeinfo touch the output file
|
||||||
|
yacc create 'y.tab.[ch]', if possible, from existing .[ch]
|
||||||
|
|
||||||
|
Version suffixes to PROGRAM as well as the prefixes 'gnu-', 'gnu', and
|
||||||
|
'g' are ignored when checking the name.
|
||||||
|
|
||||||
|
Send bug reports to <bug-automake@gnu.org>."
|
||||||
|
exit $?
|
||||||
|
;;
|
||||||
|
|
||||||
|
-v|--v|--ve|--ver|--vers|--versi|--versio|--version)
|
||||||
|
echo "missing $scriptversion (GNU Automake)"
|
||||||
|
exit $?
|
||||||
|
;;
|
||||||
|
|
||||||
|
-*)
|
||||||
|
echo 1>&2 "$0: Unknown '$1' option"
|
||||||
|
echo 1>&2 "Try '$0 --help' for more information"
|
||||||
|
exit 1
|
||||||
|
;;
|
||||||
|
|
||||||
|
esac
|
||||||
|
|
||||||
|
# normalize program name to check for.
|
||||||
|
program=`echo "$1" | sed '
|
||||||
|
s/^gnu-//; t
|
||||||
|
s/^gnu//; t
|
||||||
|
s/^g//; t'`
|
||||||
|
|
||||||
|
# Now exit if we have it, but it failed. Also exit now if we
|
||||||
|
# don't have it and --version was passed (most likely to detect
|
||||||
|
# the program). This is about non-GNU programs, so use $1 not
|
||||||
|
# $program.
|
||||||
|
case $1 in
|
||||||
|
lex*|yacc*)
|
||||||
|
# Not GNU programs, they don't have --version.
|
||||||
|
;;
|
||||||
|
|
||||||
|
*)
|
||||||
|
if test -z "$run" && ($1 --version) > /dev/null 2>&1; then
|
||||||
|
# We have it, but it failed.
|
||||||
|
exit 1
|
||||||
|
elif test "x$2" = "x--version" || test "x$2" = "x--help"; then
|
||||||
|
# Could not run --version or --help. This is probably someone
|
||||||
|
# running '$TOOL --version' or '$TOOL --help' to check whether
|
||||||
|
# $TOOL exists and not knowing $TOOL uses missing.
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
;;
|
||||||
|
esac
|
||||||
|
|
||||||
|
# If it does not exist, or fails to run (possibly an outdated version),
|
||||||
|
# try to emulate it.
|
||||||
|
case $program in
|
||||||
|
aclocal*)
|
||||||
|
echo 1>&2 "\
|
||||||
|
WARNING: '$1' is $msg. You should only need it if
|
||||||
|
you modified 'acinclude.m4' or '${configure_ac}'. You might want
|
||||||
|
to install the Automake and Perl packages. Grab them from
|
||||||
|
any GNU archive site."
|
||||||
|
touch aclocal.m4
|
||||||
|
;;
|
||||||
|
|
||||||
|
autoconf*)
|
||||||
|
echo 1>&2 "\
|
||||||
|
WARNING: '$1' is $msg. You should only need it if
|
||||||
|
you modified '${configure_ac}'. You might want to install the
|
||||||
|
Autoconf and GNU m4 packages. Grab them from any GNU
|
||||||
|
archive site."
|
||||||
|
touch configure
|
||||||
|
;;
|
||||||
|
|
||||||
|
autoheader*)
|
||||||
|
echo 1>&2 "\
|
||||||
|
WARNING: '$1' is $msg. You should only need it if
|
||||||
|
you modified 'acconfig.h' or '${configure_ac}'. You might want
|
||||||
|
to install the Autoconf and GNU m4 packages. Grab them
|
||||||
|
from any GNU archive site."
|
||||||
|
files=`sed -n 's/^[ ]*A[CM]_CONFIG_HEADER(\([^)]*\)).*/\1/p' ${configure_ac}`
|
||||||
|
test -z "$files" && files="config.h"
|
||||||
|
touch_files=
|
||||||
|
for f in $files; do
|
||||||
|
case $f in
|
||||||
|
*:*) touch_files="$touch_files "`echo "$f" |
|
||||||
|
sed -e 's/^[^:]*://' -e 's/:.*//'`;;
|
||||||
|
*) touch_files="$touch_files $f.in";;
|
||||||
|
esac
|
||||||
|
done
|
||||||
|
touch $touch_files
|
||||||
|
;;
|
||||||
|
|
||||||
|
automake*)
|
||||||
|
echo 1>&2 "\
|
||||||
|
WARNING: '$1' is $msg. You should only need it if
|
||||||
|
you modified 'Makefile.am', 'acinclude.m4' or '${configure_ac}'.
|
||||||
|
You might want to install the Automake and Perl packages.
|
||||||
|
Grab them from any GNU archive site."
|
||||||
|
find . -type f -name Makefile.am -print |
|
||||||
|
sed 's/\.am$/.in/' |
|
||||||
|
while read f; do touch "$f"; done
|
||||||
|
;;
|
||||||
|
|
||||||
|
autom4te*)
|
||||||
|
echo 1>&2 "\
|
||||||
|
WARNING: '$1' is needed, but is $msg.
|
||||||
|
You might have modified some files without having the
|
||||||
|
proper tools for further handling them.
|
||||||
|
You can get '$1' as part of Autoconf from any GNU
|
||||||
|
archive site."
|
||||||
|
|
||||||
|
file=`echo "$*" | sed -n "$sed_output"`
|
||||||
|
test -z "$file" && file=`echo "$*" | sed -n "$sed_minuso"`
|
||||||
|
if test -f "$file"; then
|
||||||
|
touch $file
|
||||||
|
else
|
||||||
|
test -z "$file" || exec >$file
|
||||||
|
echo "#! /bin/sh"
|
||||||
|
echo "# Created by GNU Automake missing as a replacement of"
|
||||||
|
echo "# $ $@"
|
||||||
|
echo "exit 0"
|
||||||
|
chmod +x $file
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
;;
|
||||||
|
|
||||||
|
bison*|yacc*)
|
||||||
|
echo 1>&2 "\
|
||||||
|
WARNING: '$1' $msg. You should only need it if
|
||||||
|
you modified a '.y' file. You may need the Bison package
|
||||||
|
in order for those modifications to take effect. You can get
|
||||||
|
Bison from any GNU archive site."
|
||||||
|
rm -f y.tab.c y.tab.h
|
||||||
|
if test $# -ne 1; then
|
||||||
|
eval LASTARG=\${$#}
|
||||||
|
case $LASTARG in
|
||||||
|
*.y)
|
||||||
|
SRCFILE=`echo "$LASTARG" | sed 's/y$/c/'`
|
||||||
|
if test -f "$SRCFILE"; then
|
||||||
|
cp "$SRCFILE" y.tab.c
|
||||||
|
fi
|
||||||
|
SRCFILE=`echo "$LASTARG" | sed 's/y$/h/'`
|
||||||
|
if test -f "$SRCFILE"; then
|
||||||
|
cp "$SRCFILE" y.tab.h
|
||||||
|
fi
|
||||||
|
;;
|
||||||
|
esac
|
||||||
|
fi
|
||||||
|
if test ! -f y.tab.h; then
|
||||||
|
echo >y.tab.h
|
||||||
|
fi
|
||||||
|
if test ! -f y.tab.c; then
|
||||||
|
echo 'main() { return 0; }' >y.tab.c
|
||||||
|
fi
|
||||||
|
;;
|
||||||
|
|
||||||
|
lex*|flex*)
|
||||||
|
echo 1>&2 "\
|
||||||
|
WARNING: '$1' is $msg. You should only need it if
|
||||||
|
you modified a '.l' file. You may need the Flex package
|
||||||
|
in order for those modifications to take effect. You can get
|
||||||
|
Flex from any GNU archive site."
|
||||||
|
rm -f lex.yy.c
|
||||||
|
if test $# -ne 1; then
|
||||||
|
eval LASTARG=\${$#}
|
||||||
|
case $LASTARG in
|
||||||
|
*.l)
|
||||||
|
SRCFILE=`echo "$LASTARG" | sed 's/l$/c/'`
|
||||||
|
if test -f "$SRCFILE"; then
|
||||||
|
cp "$SRCFILE" lex.yy.c
|
||||||
|
fi
|
||||||
|
;;
|
||||||
|
esac
|
||||||
|
fi
|
||||||
|
if test ! -f lex.yy.c; then
|
||||||
|
echo 'main() { return 0; }' >lex.yy.c
|
||||||
|
fi
|
||||||
|
;;
|
||||||
|
|
||||||
|
help2man*)
|
||||||
|
echo 1>&2 "\
|
||||||
|
WARNING: '$1' is $msg. You should only need it if
|
||||||
|
you modified a dependency of a manual page. You may need the
|
||||||
|
Help2man package in order for those modifications to take
|
||||||
|
effect. You can get Help2man from any GNU archive site."
|
||||||
|
|
||||||
|
file=`echo "$*" | sed -n "$sed_output"`
|
||||||
|
test -z "$file" && file=`echo "$*" | sed -n "$sed_minuso"`
|
||||||
|
if test -f "$file"; then
|
||||||
|
touch $file
|
||||||
|
else
|
||||||
|
test -z "$file" || exec >$file
|
||||||
|
echo ".ab help2man is required to generate this page"
|
||||||
|
exit $?
|
||||||
|
fi
|
||||||
|
;;
|
||||||
|
|
||||||
|
makeinfo*)
|
||||||
|
echo 1>&2 "\
|
||||||
|
WARNING: '$1' is $msg. You should only need it if
|
||||||
|
you modified a '.texi' or '.texinfo' file, or any other file
|
||||||
|
indirectly affecting the aspect of the manual. The spurious
|
||||||
|
call might also be the consequence of using a buggy 'make' (AIX,
|
||||||
|
DU, IRIX). You might want to install the Texinfo package or
|
||||||
|
the GNU make package. Grab either from any GNU archive site."
|
||||||
|
# The file to touch is that specified with -o ...
|
||||||
|
file=`echo "$*" | sed -n "$sed_output"`
|
||||||
|
test -z "$file" && file=`echo "$*" | sed -n "$sed_minuso"`
|
||||||
|
if test -z "$file"; then
|
||||||
|
# ... or it is the one specified with @setfilename ...
|
||||||
|
infile=`echo "$*" | sed 's/.* \([^ ]*\) *$/\1/'`
|
||||||
|
file=`sed -n '
|
||||||
|
/^@setfilename/{
|
||||||
|
s/.* \([^ ]*\) *$/\1/
|
||||||
|
p
|
||||||
|
q
|
||||||
|
}' $infile`
|
||||||
|
# ... or it is derived from the source name (dir/f.texi becomes f.info)
|
||||||
|
test -z "$file" && file=`echo "$infile" | sed 's,.*/,,;s,.[^.]*$,,'`.info
|
||||||
|
fi
|
||||||
|
# If the file does not exist, the user really needs makeinfo;
|
||||||
|
# let's fail without touching anything.
|
||||||
|
test -f $file || exit 1
|
||||||
|
touch $file
|
||||||
|
;;
|
||||||
|
|
||||||
|
*)
|
||||||
|
echo 1>&2 "\
|
||||||
|
WARNING: '$1' is needed, and is $msg.
|
||||||
|
You might have modified some files without having the
|
||||||
|
proper tools for further handling them. Check the 'README' file,
|
||||||
|
it often tells you about the needed prerequisites for installing
|
||||||
|
this package. You may also peek at any GNU archive site, in case
|
||||||
|
some other package would contain this missing '$1' program."
|
||||||
|
exit 1
|
||||||
|
;;
|
||||||
|
esac
|
||||||
|
|
||||||
|
exit 0
|
||||||
|
|
||||||
|
# Local variables:
|
||||||
|
# eval: (add-hook 'write-file-hooks 'time-stamp)
|
||||||
|
# time-stamp-start: "scriptversion="
|
||||||
|
# time-stamp-format: "%:y-%02m-%02d.%02H"
|
||||||
|
# time-stamp-time-zone: "UTC"
|
||||||
|
# time-stamp-end: "; # UTC"
|
||||||
|
# End:
|
@ -0,0 +1,34 @@
|
|||||||
|
|
||||||
|
|
||||||
|
lib_LTLIBRARIES = libffts.la
|
||||||
|
|
||||||
|
libffts_la_SOURCES = ffts.c ffts_small.c ffts_nd.c ffts_real.c ffts_real_nd.c patterns.c
|
||||||
|
libffts_la_SOURCES += codegen.h codegen_arm.h codegen_sse.h ffts.h ffts_nd.h ffts_real.h ffts_real_nd.h ffts_small.h ffts_static.h macros-alpha.h macros-altivec.h macros-neon.h macros-sse.h macros.h neon.h neon_float.h patterns.h types.h vfp.h
|
||||||
|
|
||||||
|
if DYNAMIC_DISABLED
|
||||||
|
libffts_la_SOURCES += ffts_static.c
|
||||||
|
else
|
||||||
|
libffts_la_SOURCES += codegen.c
|
||||||
|
endif
|
||||||
|
|
||||||
|
libffts_includedir=$(includedir)/ffts
|
||||||
|
libffts_include_HEADERS = ../include/ffts.h
|
||||||
|
|
||||||
|
|
||||||
|
if HAVE_VFP
|
||||||
|
libffts_la_SOURCES += vfp.s
|
||||||
|
else
|
||||||
|
if HAVE_NEON
|
||||||
|
|
||||||
|
if DYNAMIC_DISABLED
|
||||||
|
libffts_la_SOURCES += neon_static_f.s neon_static_i.s
|
||||||
|
else
|
||||||
|
libffts_la_SOURCES += neon.s
|
||||||
|
endif
|
||||||
|
|
||||||
|
else
|
||||||
|
if HAVE_SSE
|
||||||
|
libffts_la_SOURCES += sse.s
|
||||||
|
endif
|
||||||
|
endif
|
||||||
|
endif
|
@ -0,0 +1,666 @@
|
|||||||
|
# Makefile.in generated by automake 1.12.4 from Makefile.am.
|
||||||
|
# @configure_input@
|
||||||
|
|
||||||
|
# Copyright (C) 1994-2012 Free Software Foundation, Inc.
|
||||||
|
|
||||||
|
# This Makefile.in is free software; the Free Software Foundation
|
||||||
|
# gives unlimited permission to copy and/or distribute it,
|
||||||
|
# with or without modifications, as long as this notice is preserved.
|
||||||
|
|
||||||
|
# This program is distributed in the hope that it will be useful,
|
||||||
|
# but WITHOUT ANY WARRANTY, to the extent permitted by law; without
|
||||||
|
# even the implied warranty of MERCHANTABILITY or FITNESS FOR A
|
||||||
|
# PARTICULAR PURPOSE.
|
||||||
|
|
||||||
|
@SET_MAKE@
|
||||||
|
|
||||||
|
|
||||||
|
VPATH = @srcdir@
|
||||||
|
am__make_dryrun = \
|
||||||
|
{ \
|
||||||
|
am__dry=no; \
|
||||||
|
case $$MAKEFLAGS in \
|
||||||
|
*\\[\ \ ]*) \
|
||||||
|
echo 'am--echo: ; @echo "AM" OK' | $(MAKE) -f - 2>/dev/null \
|
||||||
|
| grep '^AM OK$$' >/dev/null || am__dry=yes;; \
|
||||||
|
*) \
|
||||||
|
for am__flg in $$MAKEFLAGS; do \
|
||||||
|
case $$am__flg in \
|
||||||
|
*=*|--*) ;; \
|
||||||
|
*n*) am__dry=yes; break;; \
|
||||||
|
esac; \
|
||||||
|
done;; \
|
||||||
|
esac; \
|
||||||
|
test $$am__dry = yes; \
|
||||||
|
}
|
||||||
|
pkgdatadir = $(datadir)/@PACKAGE@
|
||||||
|
pkgincludedir = $(includedir)/@PACKAGE@
|
||||||
|
pkglibdir = $(libdir)/@PACKAGE@
|
||||||
|
pkglibexecdir = $(libexecdir)/@PACKAGE@
|
||||||
|
am__cd = CDPATH="$${ZSH_VERSION+.}$(PATH_SEPARATOR)" && cd
|
||||||
|
install_sh_DATA = $(install_sh) -c -m 644
|
||||||
|
install_sh_PROGRAM = $(install_sh) -c
|
||||||
|
install_sh_SCRIPT = $(install_sh) -c
|
||||||
|
INSTALL_HEADER = $(INSTALL_DATA)
|
||||||
|
transform = $(program_transform_name)
|
||||||
|
NORMAL_INSTALL = :
|
||||||
|
PRE_INSTALL = :
|
||||||
|
POST_INSTALL = :
|
||||||
|
NORMAL_UNINSTALL = :
|
||||||
|
PRE_UNINSTALL = :
|
||||||
|
POST_UNINSTALL = :
|
||||||
|
build_triplet = @build@
|
||||||
|
host_triplet = @host@
|
||||||
|
@DYNAMIC_DISABLED_TRUE@am__append_1 = ffts_static.c
|
||||||
|
@DYNAMIC_DISABLED_FALSE@am__append_2 = codegen.c
|
||||||
|
@HAVE_VFP_TRUE@am__append_3 = vfp.s
|
||||||
|
@DYNAMIC_DISABLED_TRUE@@HAVE_NEON_TRUE@@HAVE_VFP_FALSE@am__append_4 = neon_static_f.s neon_static_i.s
|
||||||
|
@DYNAMIC_DISABLED_FALSE@@HAVE_NEON_TRUE@@HAVE_VFP_FALSE@am__append_5 = neon.s
|
||||||
|
@HAVE_NEON_FALSE@@HAVE_SSE_TRUE@@HAVE_VFP_FALSE@am__append_6 = sse.s
|
||||||
|
subdir = src
|
||||||
|
DIST_COMMON = $(libffts_include_HEADERS) $(srcdir)/Makefile.am \
|
||||||
|
$(srcdir)/Makefile.in $(top_srcdir)/depcomp
|
||||||
|
ACLOCAL_M4 = $(top_srcdir)/aclocal.m4
|
||||||
|
am__aclocal_m4_deps = $(top_srcdir)/m4/ax_check_classpath.m4 \
|
||||||
|
$(top_srcdir)/m4/ax_check_java_home.m4 \
|
||||||
|
$(top_srcdir)/m4/ax_java_options.m4 \
|
||||||
|
$(top_srcdir)/m4/ax_jni_include_dir.m4 \
|
||||||
|
$(top_srcdir)/m4/ax_prog_jar.m4 \
|
||||||
|
$(top_srcdir)/m4/ax_prog_javac.m4 \
|
||||||
|
$(top_srcdir)/m4/ax_prog_javac_works.m4 \
|
||||||
|
$(top_srcdir)/configure.ac
|
||||||
|
am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \
|
||||||
|
$(ACLOCAL_M4)
|
||||||
|
mkinstalldirs = $(install_sh) -d
|
||||||
|
CONFIG_HEADER = $(top_builddir)/config.h
|
||||||
|
CONFIG_CLEAN_FILES =
|
||||||
|
CONFIG_CLEAN_VPATH_FILES =
|
||||||
|
am__vpath_adj_setup = srcdirstrip=`echo "$(srcdir)" | sed 's|.|.|g'`;
|
||||||
|
am__vpath_adj = case $$p in \
|
||||||
|
$(srcdir)/*) f=`echo "$$p" | sed "s|^$$srcdirstrip/||"`;; \
|
||||||
|
*) f=$$p;; \
|
||||||
|
esac;
|
||||||
|
am__strip_dir = f=`echo $$p | sed -e 's|^.*/||'`;
|
||||||
|
am__install_max = 40
|
||||||
|
am__nobase_strip_setup = \
|
||||||
|
srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*|]/\\\\&/g'`
|
||||||
|
am__nobase_strip = \
|
||||||
|
for p in $$list; do echo "$$p"; done | sed -e "s|$$srcdirstrip/||"
|
||||||
|
am__nobase_list = $(am__nobase_strip_setup); \
|
||||||
|
for p in $$list; do echo "$$p $$p"; done | \
|
||||||
|
sed "s| $$srcdirstrip/| |;"' / .*\//!s/ .*/ ./; s,\( .*\)/[^/]*$$,\1,' | \
|
||||||
|
$(AWK) 'BEGIN { files["."] = "" } { files[$$2] = files[$$2] " " $$1; \
|
||||||
|
if (++n[$$2] == $(am__install_max)) \
|
||||||
|
{ print $$2, files[$$2]; n[$$2] = 0; files[$$2] = "" } } \
|
||||||
|
END { for (dir in files) print dir, files[dir] }'
|
||||||
|
am__base_list = \
|
||||||
|
sed '$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;s/\n/ /g' | \
|
||||||
|
sed '$$!N;$$!N;$$!N;$$!N;s/\n/ /g'
|
||||||
|
am__uninstall_files_from_dir = { \
|
||||||
|
test -z "$$files" \
|
||||||
|
|| { test ! -d "$$dir" && test ! -f "$$dir" && test ! -r "$$dir"; } \
|
||||||
|
|| { echo " ( cd '$$dir' && rm -f" $$files ")"; \
|
||||||
|
$(am__cd) "$$dir" && rm -f $$files; }; \
|
||||||
|
}
|
||||||
|
am__installdirs = "$(DESTDIR)$(libdir)" \
|
||||||
|
"$(DESTDIR)$(libffts_includedir)"
|
||||||
|
LTLIBRARIES = $(lib_LTLIBRARIES)
|
||||||
|
libffts_la_LIBADD =
|
||||||
|
am__libffts_la_SOURCES_DIST = ffts.c ffts_small.c ffts_nd.c \
|
||||||
|
ffts_real.c ffts_real_nd.c patterns.c codegen.h codegen_arm.h \
|
||||||
|
codegen_sse.h ffts.h ffts_nd.h ffts_real.h ffts_real_nd.h \
|
||||||
|
ffts_small.h ffts_static.h macros-alpha.h macros-altivec.h \
|
||||||
|
macros-neon.h macros-sse.h macros.h neon.h neon_float.h \
|
||||||
|
patterns.h types.h vfp.h ffts_static.c codegen.c vfp.s \
|
||||||
|
neon_static_f.s neon_static_i.s neon.s sse.s
|
||||||
|
@DYNAMIC_DISABLED_TRUE@am__objects_1 = ffts_static.lo
|
||||||
|
@DYNAMIC_DISABLED_FALSE@am__objects_2 = codegen.lo
|
||||||
|
@HAVE_VFP_TRUE@am__objects_3 = vfp.lo
|
||||||
|
@DYNAMIC_DISABLED_TRUE@@HAVE_NEON_TRUE@@HAVE_VFP_FALSE@am__objects_4 = neon_static_f.lo \
|
||||||
|
@DYNAMIC_DISABLED_TRUE@@HAVE_NEON_TRUE@@HAVE_VFP_FALSE@ neon_static_i.lo
|
||||||
|
@DYNAMIC_DISABLED_FALSE@@HAVE_NEON_TRUE@@HAVE_VFP_FALSE@am__objects_5 = neon.lo
|
||||||
|
@HAVE_NEON_FALSE@@HAVE_SSE_TRUE@@HAVE_VFP_FALSE@am__objects_6 = \
|
||||||
|
@HAVE_NEON_FALSE@@HAVE_SSE_TRUE@@HAVE_VFP_FALSE@ sse.lo
|
||||||
|
am_libffts_la_OBJECTS = ffts.lo ffts_small.lo ffts_nd.lo ffts_real.lo \
|
||||||
|
ffts_real_nd.lo patterns.lo $(am__objects_1) $(am__objects_2) \
|
||||||
|
$(am__objects_3) $(am__objects_4) $(am__objects_5) \
|
||||||
|
$(am__objects_6)
|
||||||
|
libffts_la_OBJECTS = $(am_libffts_la_OBJECTS)
|
||||||
|
DEFAULT_INCLUDES = -I.@am__isrc@ -I$(top_builddir)
|
||||||
|
depcomp = $(SHELL) $(top_srcdir)/depcomp
|
||||||
|
am__depfiles_maybe = depfiles
|
||||||
|
am__mv = mv -f
|
||||||
|
COMPILE = $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) \
|
||||||
|
$(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS)
|
||||||
|
LTCOMPILE = $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) \
|
||||||
|
--mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) \
|
||||||
|
$(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS)
|
||||||
|
CCLD = $(CC)
|
||||||
|
LINK = $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) \
|
||||||
|
--mode=link $(CCLD) $(AM_CFLAGS) $(CFLAGS) $(AM_LDFLAGS) \
|
||||||
|
$(LDFLAGS) -o $@
|
||||||
|
CCASCOMPILE = $(CCAS) $(AM_CCASFLAGS) $(CCASFLAGS)
|
||||||
|
LTCCASCOMPILE = $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) \
|
||||||
|
--mode=compile $(CCAS) $(AM_CCASFLAGS) $(CCASFLAGS)
|
||||||
|
SOURCES = $(libffts_la_SOURCES)
|
||||||
|
DIST_SOURCES = $(am__libffts_la_SOURCES_DIST)
|
||||||
|
am__can_run_installinfo = \
|
||||||
|
case $$AM_UPDATE_INFO_DIR in \
|
||||||
|
n|no|NO) false;; \
|
||||||
|
*) (install-info --version) >/dev/null 2>&1;; \
|
||||||
|
esac
|
||||||
|
HEADERS = $(libffts_include_HEADERS)
|
||||||
|
ETAGS = etags
|
||||||
|
CTAGS = ctags
|
||||||
|
DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST)
|
||||||
|
ACLOCAL = @ACLOCAL@
|
||||||
|
AMTAR = @AMTAR@
|
||||||
|
AR = @AR@
|
||||||
|
AUTOCONF = @AUTOCONF@
|
||||||
|
AUTOHEADER = @AUTOHEADER@
|
||||||
|
AUTOMAKE = @AUTOMAKE@
|
||||||
|
AWK = @AWK@
|
||||||
|
CC = @CC@
|
||||||
|
CCAS = @CCAS@
|
||||||
|
CCASDEPMODE = @CCASDEPMODE@
|
||||||
|
CCASFLAGS = @CCASFLAGS@
|
||||||
|
CCDEPMODE = @CCDEPMODE@
|
||||||
|
CFLAGS = @CFLAGS@
|
||||||
|
CPP = @CPP@
|
||||||
|
CPPFLAGS = @CPPFLAGS@
|
||||||
|
CXX = @CXX@
|
||||||
|
CXXCPP = @CXXCPP@
|
||||||
|
CXXDEPMODE = @CXXDEPMODE@
|
||||||
|
CXXFLAGS = @CXXFLAGS@
|
||||||
|
CYGPATH_W = @CYGPATH_W@
|
||||||
|
DEFS = @DEFS@
|
||||||
|
DEPDIR = @DEPDIR@
|
||||||
|
DLLTOOL = @DLLTOOL@
|
||||||
|
DSYMUTIL = @DSYMUTIL@
|
||||||
|
DUMPBIN = @DUMPBIN@
|
||||||
|
ECHO_C = @ECHO_C@
|
||||||
|
ECHO_N = @ECHO_N@
|
||||||
|
ECHO_T = @ECHO_T@
|
||||||
|
EGREP = @EGREP@
|
||||||
|
EXEEXT = @EXEEXT@
|
||||||
|
FGREP = @FGREP@
|
||||||
|
GREP = @GREP@
|
||||||
|
INSTALL = @INSTALL@
|
||||||
|
INSTALL_DATA = @INSTALL_DATA@
|
||||||
|
INSTALL_PROGRAM = @INSTALL_PROGRAM@
|
||||||
|
INSTALL_SCRIPT = @INSTALL_SCRIPT@
|
||||||
|
INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@
|
||||||
|
JAR = @JAR@
|
||||||
|
JAVA = @JAVA@
|
||||||
|
JAVAC = @JAVAC@
|
||||||
|
JAVACFLAGS = @JAVACFLAGS@
|
||||||
|
JAVAFLAGS = @JAVAFLAGS@
|
||||||
|
JAVAPREFIX = @JAVAPREFIX@
|
||||||
|
JAVA_PATH_NAME = @JAVA_PATH_NAME@
|
||||||
|
JNI_CPPFLAGS = @JNI_CPPFLAGS@
|
||||||
|
LD = @LD@
|
||||||
|
LDFLAGS = @LDFLAGS@
|
||||||
|
LIBOBJS = @LIBOBJS@
|
||||||
|
LIBS = @LIBS@
|
||||||
|
LIBTOOL = @LIBTOOL@
|
||||||
|
LIPO = @LIPO@
|
||||||
|
LN_S = @LN_S@
|
||||||
|
LTLIBOBJS = @LTLIBOBJS@
|
||||||
|
MAKEINFO = @MAKEINFO@
|
||||||
|
MANIFEST_TOOL = @MANIFEST_TOOL@
|
||||||
|
MKDIR_P = @MKDIR_P@
|
||||||
|
NM = @NM@
|
||||||
|
NMEDIT = @NMEDIT@
|
||||||
|
OBJDUMP = @OBJDUMP@
|
||||||
|
OBJEXT = @OBJEXT@
|
||||||
|
OTOOL = @OTOOL@
|
||||||
|
OTOOL64 = @OTOOL64@
|
||||||
|
PACKAGE = @PACKAGE@
|
||||||
|
PACKAGE_BUGREPORT = @PACKAGE_BUGREPORT@
|
||||||
|
PACKAGE_NAME = @PACKAGE_NAME@
|
||||||
|
PACKAGE_STRING = @PACKAGE_STRING@
|
||||||
|
PACKAGE_TARNAME = @PACKAGE_TARNAME@
|
||||||
|
PACKAGE_URL = @PACKAGE_URL@
|
||||||
|
PACKAGE_VERSION = @PACKAGE_VERSION@
|
||||||
|
PATH_SEPARATOR = @PATH_SEPARATOR@
|
||||||
|
RANLIB = @RANLIB@
|
||||||
|
SED = @SED@
|
||||||
|
SET_MAKE = @SET_MAKE@
|
||||||
|
SHELL = @SHELL@
|
||||||
|
STRIP = @STRIP@
|
||||||
|
VERSION = @VERSION@
|
||||||
|
_ACJNI_JAVAC = @_ACJNI_JAVAC@
|
||||||
|
abs_builddir = @abs_builddir@
|
||||||
|
abs_srcdir = @abs_srcdir@
|
||||||
|
abs_top_builddir = @abs_top_builddir@
|
||||||
|
abs_top_srcdir = @abs_top_srcdir@
|
||||||
|
ac_ct_AR = @ac_ct_AR@
|
||||||
|
ac_ct_CC = @ac_ct_CC@
|
||||||
|
ac_ct_CXX = @ac_ct_CXX@
|
||||||
|
ac_ct_DUMPBIN = @ac_ct_DUMPBIN@
|
||||||
|
am__include = @am__include@
|
||||||
|
am__leading_dot = @am__leading_dot@
|
||||||
|
am__quote = @am__quote@
|
||||||
|
am__tar = @am__tar@
|
||||||
|
am__untar = @am__untar@
|
||||||
|
bindir = @bindir@
|
||||||
|
build = @build@
|
||||||
|
build_alias = @build_alias@
|
||||||
|
build_cpu = @build_cpu@
|
||||||
|
build_os = @build_os@
|
||||||
|
build_vendor = @build_vendor@
|
||||||
|
builddir = @builddir@
|
||||||
|
datadir = @datadir@
|
||||||
|
datarootdir = @datarootdir@
|
||||||
|
docdir = @docdir@
|
||||||
|
dvidir = @dvidir@
|
||||||
|
exec_prefix = @exec_prefix@
|
||||||
|
host = @host@
|
||||||
|
host_alias = @host_alias@
|
||||||
|
host_cpu = @host_cpu@
|
||||||
|
host_os = @host_os@
|
||||||
|
host_vendor = @host_vendor@
|
||||||
|
htmldir = @htmldir@
|
||||||
|
includedir = @includedir@
|
||||||
|
infodir = @infodir@
|
||||||
|
install_sh = @install_sh@
|
||||||
|
libdir = @libdir@
|
||||||
|
libexecdir = @libexecdir@
|
||||||
|
localedir = @localedir@
|
||||||
|
localstatedir = @localstatedir@
|
||||||
|
mandir = @mandir@
|
||||||
|
mkdir_p = @mkdir_p@
|
||||||
|
oldincludedir = @oldincludedir@
|
||||||
|
pdfdir = @pdfdir@
|
||||||
|
prefix = @prefix@
|
||||||
|
program_transform_name = @program_transform_name@
|
||||||
|
psdir = @psdir@
|
||||||
|
sbindir = @sbindir@
|
||||||
|
sharedstatedir = @sharedstatedir@
|
||||||
|
srcdir = @srcdir@
|
||||||
|
sysconfdir = @sysconfdir@
|
||||||
|
target_alias = @target_alias@
|
||||||
|
top_build_prefix = @top_build_prefix@
|
||||||
|
top_builddir = @top_builddir@
|
||||||
|
top_srcdir = @top_srcdir@
|
||||||
|
lib_LTLIBRARIES = libffts.la
|
||||||
|
libffts_la_SOURCES = ffts.c ffts_small.c ffts_nd.c ffts_real.c \
|
||||||
|
ffts_real_nd.c patterns.c codegen.h codegen_arm.h \
|
||||||
|
codegen_sse.h ffts.h ffts_nd.h ffts_real.h ffts_real_nd.h \
|
||||||
|
ffts_small.h ffts_static.h macros-alpha.h macros-altivec.h \
|
||||||
|
macros-neon.h macros-sse.h macros.h neon.h neon_float.h \
|
||||||
|
patterns.h types.h vfp.h $(am__append_1) $(am__append_2) \
|
||||||
|
$(am__append_3) $(am__append_4) $(am__append_5) \
|
||||||
|
$(am__append_6)
|
||||||
|
libffts_includedir = $(includedir)/ffts
|
||||||
|
libffts_include_HEADERS = ../include/ffts.h
|
||||||
|
all: all-am
|
||||||
|
|
||||||
|
.SUFFIXES:
|
||||||
|
.SUFFIXES: .c .lo .o .obj .s
|
||||||
|
$(srcdir)/Makefile.in: $(srcdir)/Makefile.am $(am__configure_deps)
|
||||||
|
@for dep in $?; do \
|
||||||
|
case '$(am__configure_deps)' in \
|
||||||
|
*$$dep*) \
|
||||||
|
( cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh ) \
|
||||||
|
&& { if test -f $@; then exit 0; else break; fi; }; \
|
||||||
|
exit 1;; \
|
||||||
|
esac; \
|
||||||
|
done; \
|
||||||
|
echo ' cd $(top_srcdir) && $(AUTOMAKE) --gnu src/Makefile'; \
|
||||||
|
$(am__cd) $(top_srcdir) && \
|
||||||
|
$(AUTOMAKE) --gnu src/Makefile
|
||||||
|
.PRECIOUS: Makefile
|
||||||
|
Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status
|
||||||
|
@case '$?' in \
|
||||||
|
*config.status*) \
|
||||||
|
cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh;; \
|
||||||
|
*) \
|
||||||
|
echo ' cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe)'; \
|
||||||
|
cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe);; \
|
||||||
|
esac;
|
||||||
|
|
||||||
|
$(top_builddir)/config.status: $(top_srcdir)/configure $(CONFIG_STATUS_DEPENDENCIES)
|
||||||
|
cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
|
||||||
|
|
||||||
|
$(top_srcdir)/configure: $(am__configure_deps)
|
||||||
|
cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
|
||||||
|
$(ACLOCAL_M4): $(am__aclocal_m4_deps)
|
||||||
|
cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
|
||||||
|
$(am__aclocal_m4_deps):
|
||||||
|
install-libLTLIBRARIES: $(lib_LTLIBRARIES)
|
||||||
|
@$(NORMAL_INSTALL)
|
||||||
|
@list='$(lib_LTLIBRARIES)'; test -n "$(libdir)" || list=; \
|
||||||
|
list2=; for p in $$list; do \
|
||||||
|
if test -f $$p; then \
|
||||||
|
list2="$$list2 $$p"; \
|
||||||
|
else :; fi; \
|
||||||
|
done; \
|
||||||
|
test -z "$$list2" || { \
|
||||||
|
echo " $(MKDIR_P) '$(DESTDIR)$(libdir)'"; \
|
||||||
|
$(MKDIR_P) "$(DESTDIR)$(libdir)" || exit 1; \
|
||||||
|
echo " $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=install $(INSTALL) $(INSTALL_STRIP_FLAG) $$list2 '$(DESTDIR)$(libdir)'"; \
|
||||||
|
$(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=install $(INSTALL) $(INSTALL_STRIP_FLAG) $$list2 "$(DESTDIR)$(libdir)"; \
|
||||||
|
}
|
||||||
|
|
||||||
|
uninstall-libLTLIBRARIES:
|
||||||
|
@$(NORMAL_UNINSTALL)
|
||||||
|
@list='$(lib_LTLIBRARIES)'; test -n "$(libdir)" || list=; \
|
||||||
|
for p in $$list; do \
|
||||||
|
$(am__strip_dir) \
|
||||||
|
echo " $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=uninstall rm -f '$(DESTDIR)$(libdir)/$$f'"; \
|
||||||
|
$(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=uninstall rm -f "$(DESTDIR)$(libdir)/$$f"; \
|
||||||
|
done
|
||||||
|
|
||||||
|
clean-libLTLIBRARIES:
|
||||||
|
-test -z "$(lib_LTLIBRARIES)" || rm -f $(lib_LTLIBRARIES)
|
||||||
|
@list='$(lib_LTLIBRARIES)'; \
|
||||||
|
locs=`for p in $$list; do echo $$p; done | \
|
||||||
|
sed 's|^[^/]*$$|.|; s|/[^/]*$$||; s|$$|/so_locations|' | \
|
||||||
|
sort -u`; \
|
||||||
|
test -z "$$locs" || { \
|
||||||
|
echo rm -f $${locs}; \
|
||||||
|
rm -f $${locs}; \
|
||||||
|
}
|
||||||
|
libffts.la: $(libffts_la_OBJECTS) $(libffts_la_DEPENDENCIES) $(EXTRA_libffts_la_DEPENDENCIES)
|
||||||
|
$(LINK) -rpath $(libdir) $(libffts_la_OBJECTS) $(libffts_la_LIBADD) $(LIBS)
|
||||||
|
|
||||||
|
mostlyclean-compile:
|
||||||
|
-rm -f *.$(OBJEXT)
|
||||||
|
|
||||||
|
distclean-compile:
|
||||||
|
-rm -f *.tab.c
|
||||||
|
|
||||||
|
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/codegen.Plo@am__quote@
|
||||||
|
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/ffts.Plo@am__quote@
|
||||||
|
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/ffts_nd.Plo@am__quote@
|
||||||
|
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/ffts_real.Plo@am__quote@
|
||||||
|
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/ffts_real_nd.Plo@am__quote@
|
||||||
|
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/ffts_small.Plo@am__quote@
|
||||||
|
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/ffts_static.Plo@am__quote@
|
||||||
|
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/patterns.Plo@am__quote@
|
||||||
|
|
||||||
|
.c.o:
|
||||||
|
@am__fastdepCC_TRUE@ $(COMPILE) -MT $@ -MD -MP -MF $(DEPDIR)/$*.Tpo -c -o $@ $<
|
||||||
|
@am__fastdepCC_TRUE@ $(am__mv) $(DEPDIR)/$*.Tpo $(DEPDIR)/$*.Po
|
||||||
|
@AMDEP_TRUE@@am__fastdepCC_FALSE@ source='$<' object='$@' libtool=no @AMDEPBACKSLASH@
|
||||||
|
@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
|
||||||
|
@am__fastdepCC_FALSE@ $(COMPILE) -c $<
|
||||||
|
|
||||||
|
.c.obj:
|
||||||
|
@am__fastdepCC_TRUE@ $(COMPILE) -MT $@ -MD -MP -MF $(DEPDIR)/$*.Tpo -c -o $@ `$(CYGPATH_W) '$<'`
|
||||||
|
@am__fastdepCC_TRUE@ $(am__mv) $(DEPDIR)/$*.Tpo $(DEPDIR)/$*.Po
|
||||||
|
@AMDEP_TRUE@@am__fastdepCC_FALSE@ source='$<' object='$@' libtool=no @AMDEPBACKSLASH@
|
||||||
|
@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
|
||||||
|
@am__fastdepCC_FALSE@ $(COMPILE) -c `$(CYGPATH_W) '$<'`
|
||||||
|
|
||||||
|
.c.lo:
|
||||||
|
@am__fastdepCC_TRUE@ $(LTCOMPILE) -MT $@ -MD -MP -MF $(DEPDIR)/$*.Tpo -c -o $@ $<
|
||||||
|
@am__fastdepCC_TRUE@ $(am__mv) $(DEPDIR)/$*.Tpo $(DEPDIR)/$*.Plo
|
||||||
|
@AMDEP_TRUE@@am__fastdepCC_FALSE@ source='$<' object='$@' libtool=yes @AMDEPBACKSLASH@
|
||||||
|
@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
|
||||||
|
@am__fastdepCC_FALSE@ $(LTCOMPILE) -c -o $@ $<
|
||||||
|
|
||||||
|
.s.o:
|
||||||
|
$(CCASCOMPILE) -c -o $@ $<
|
||||||
|
|
||||||
|
.s.obj:
|
||||||
|
$(CCASCOMPILE) -c -o $@ `$(CYGPATH_W) '$<'`
|
||||||
|
|
||||||
|
.s.lo:
|
||||||
|
$(LTCCASCOMPILE) -c -o $@ $<
|
||||||
|
|
||||||
|
mostlyclean-libtool:
|
||||||
|
-rm -f *.lo
|
||||||
|
|
||||||
|
clean-libtool:
|
||||||
|
-rm -rf .libs _libs
|
||||||
|
install-libffts_includeHEADERS: $(libffts_include_HEADERS)
|
||||||
|
@$(NORMAL_INSTALL)
|
||||||
|
@list='$(libffts_include_HEADERS)'; test -n "$(libffts_includedir)" || list=; \
|
||||||
|
if test -n "$$list"; then \
|
||||||
|
echo " $(MKDIR_P) '$(DESTDIR)$(libffts_includedir)'"; \
|
||||||
|
$(MKDIR_P) "$(DESTDIR)$(libffts_includedir)" || exit 1; \
|
||||||
|
fi; \
|
||||||
|
for p in $$list; do \
|
||||||
|
if test -f "$$p"; then d=; else d="$(srcdir)/"; fi; \
|
||||||
|
echo "$$d$$p"; \
|
||||||
|
done | $(am__base_list) | \
|
||||||
|
while read files; do \
|
||||||
|
echo " $(INSTALL_HEADER) $$files '$(DESTDIR)$(libffts_includedir)'"; \
|
||||||
|
$(INSTALL_HEADER) $$files "$(DESTDIR)$(libffts_includedir)" || exit $$?; \
|
||||||
|
done
|
||||||
|
|
||||||
|
uninstall-libffts_includeHEADERS:
|
||||||
|
@$(NORMAL_UNINSTALL)
|
||||||
|
@list='$(libffts_include_HEADERS)'; test -n "$(libffts_includedir)" || list=; \
|
||||||
|
files=`for p in $$list; do echo $$p; done | sed -e 's|^.*/||'`; \
|
||||||
|
dir='$(DESTDIR)$(libffts_includedir)'; $(am__uninstall_files_from_dir)
|
||||||
|
|
||||||
|
ID: $(HEADERS) $(SOURCES) $(LISP) $(TAGS_FILES)
|
||||||
|
list='$(SOURCES) $(HEADERS) $(LISP) $(TAGS_FILES)'; \
|
||||||
|
unique=`for i in $$list; do \
|
||||||
|
if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \
|
||||||
|
done | \
|
||||||
|
$(AWK) '{ files[$$0] = 1; nonempty = 1; } \
|
||||||
|
END { if (nonempty) { for (i in files) print i; }; }'`; \
|
||||||
|
mkid -fID $$unique
|
||||||
|
tags: TAGS
|
||||||
|
|
||||||
|
TAGS: $(HEADERS) $(SOURCES) $(TAGS_DEPENDENCIES) \
|
||||||
|
$(TAGS_FILES) $(LISP)
|
||||||
|
set x; \
|
||||||
|
here=`pwd`; \
|
||||||
|
list='$(SOURCES) $(HEADERS) $(LISP) $(TAGS_FILES)'; \
|
||||||
|
unique=`for i in $$list; do \
|
||||||
|
if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \
|
||||||
|
done | \
|
||||||
|
$(AWK) '{ files[$$0] = 1; nonempty = 1; } \
|
||||||
|
END { if (nonempty) { for (i in files) print i; }; }'`; \
|
||||||
|
shift; \
|
||||||
|
if test -z "$(ETAGS_ARGS)$$*$$unique"; then :; else \
|
||||||
|
test -n "$$unique" || unique=$$empty_fix; \
|
||||||
|
if test $$# -gt 0; then \
|
||||||
|
$(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \
|
||||||
|
"$$@" $$unique; \
|
||||||
|
else \
|
||||||
|
$(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \
|
||||||
|
$$unique; \
|
||||||
|
fi; \
|
||||||
|
fi
|
||||||
|
ctags: CTAGS
|
||||||
|
CTAGS: $(HEADERS) $(SOURCES) $(TAGS_DEPENDENCIES) \
|
||||||
|
$(TAGS_FILES) $(LISP)
|
||||||
|
list='$(SOURCES) $(HEADERS) $(LISP) $(TAGS_FILES)'; \
|
||||||
|
unique=`for i in $$list; do \
|
||||||
|
if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \
|
||||||
|
done | \
|
||||||
|
$(AWK) '{ files[$$0] = 1; nonempty = 1; } \
|
||||||
|
END { if (nonempty) { for (i in files) print i; }; }'`; \
|
||||||
|
test -z "$(CTAGS_ARGS)$$unique" \
|
||||||
|
|| $(CTAGS) $(CTAGSFLAGS) $(AM_CTAGSFLAGS) $(CTAGS_ARGS) \
|
||||||
|
$$unique
|
||||||
|
|
||||||
|
GTAGS:
|
||||||
|
here=`$(am__cd) $(top_builddir) && pwd` \
|
||||||
|
&& $(am__cd) $(top_srcdir) \
|
||||||
|
&& gtags -i $(GTAGS_ARGS) "$$here"
|
||||||
|
|
||||||
|
cscopelist: $(HEADERS) $(SOURCES) $(LISP)
|
||||||
|
list='$(SOURCES) $(HEADERS) $(LISP)'; \
|
||||||
|
case "$(srcdir)" in \
|
||||||
|
[\\/]* | ?:[\\/]*) sdir="$(srcdir)" ;; \
|
||||||
|
*) sdir=$(subdir)/$(srcdir) ;; \
|
||||||
|
esac; \
|
||||||
|
for i in $$list; do \
|
||||||
|
if test -f "$$i"; then \
|
||||||
|
echo "$(subdir)/$$i"; \
|
||||||
|
else \
|
||||||
|
echo "$$sdir/$$i"; \
|
||||||
|
fi; \
|
||||||
|
done >> $(top_builddir)/cscope.files
|
||||||
|
|
||||||
|
distclean-tags:
|
||||||
|
-rm -f TAGS ID GTAGS GRTAGS GSYMS GPATH tags
|
||||||
|
|
||||||
|
distdir: $(DISTFILES)
|
||||||
|
@srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \
|
||||||
|
topsrcdirstrip=`echo "$(top_srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \
|
||||||
|
list='$(DISTFILES)'; \
|
||||||
|
dist_files=`for file in $$list; do echo $$file; done | \
|
||||||
|
sed -e "s|^$$srcdirstrip/||;t" \
|
||||||
|
-e "s|^$$topsrcdirstrip/|$(top_builddir)/|;t"`; \
|
||||||
|
case $$dist_files in \
|
||||||
|
*/*) $(MKDIR_P) `echo "$$dist_files" | \
|
||||||
|
sed '/\//!d;s|^|$(distdir)/|;s,/[^/]*$$,,' | \
|
||||||
|
sort -u` ;; \
|
||||||
|
esac; \
|
||||||
|
for file in $$dist_files; do \
|
||||||
|
if test -f $$file || test -d $$file; then d=.; else d=$(srcdir); fi; \
|
||||||
|
if test -d $$d/$$file; then \
|
||||||
|
dir=`echo "/$$file" | sed -e 's,/[^/]*$$,,'`; \
|
||||||
|
if test -d "$(distdir)/$$file"; then \
|
||||||
|
find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \
|
||||||
|
fi; \
|
||||||
|
if test -d $(srcdir)/$$file && test $$d != $(srcdir); then \
|
||||||
|
cp -fpR $(srcdir)/$$file "$(distdir)$$dir" || exit 1; \
|
||||||
|
find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \
|
||||||
|
fi; \
|
||||||
|
cp -fpR $$d/$$file "$(distdir)$$dir" || exit 1; \
|
||||||
|
else \
|
||||||
|
test -f "$(distdir)/$$file" \
|
||||||
|
|| cp -p $$d/$$file "$(distdir)/$$file" \
|
||||||
|
|| exit 1; \
|
||||||
|
fi; \
|
||||||
|
done
|
||||||
|
check-am: all-am
|
||||||
|
check: check-am
|
||||||
|
all-am: Makefile $(LTLIBRARIES) $(HEADERS)
|
||||||
|
installdirs:
|
||||||
|
for dir in "$(DESTDIR)$(libdir)" "$(DESTDIR)$(libffts_includedir)"; do \
|
||||||
|
test -z "$$dir" || $(MKDIR_P) "$$dir"; \
|
||||||
|
done
|
||||||
|
install: install-am
|
||||||
|
install-exec: install-exec-am
|
||||||
|
install-data: install-data-am
|
||||||
|
uninstall: uninstall-am
|
||||||
|
|
||||||
|
install-am: all-am
|
||||||
|
@$(MAKE) $(AM_MAKEFLAGS) install-exec-am install-data-am
|
||||||
|
|
||||||
|
installcheck: installcheck-am
|
||||||
|
install-strip:
|
||||||
|
if test -z '$(STRIP)'; then \
|
||||||
|
$(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
|
||||||
|
install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
|
||||||
|
install; \
|
||||||
|
else \
|
||||||
|
$(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
|
||||||
|
install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
|
||||||
|
"INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'" install; \
|
||||||
|
fi
|
||||||
|
mostlyclean-generic:
|
||||||
|
|
||||||
|
clean-generic:
|
||||||
|
|
||||||
|
distclean-generic:
|
||||||
|
-test -z "$(CONFIG_CLEAN_FILES)" || rm -f $(CONFIG_CLEAN_FILES)
|
||||||
|
-test . = "$(srcdir)" || test -z "$(CONFIG_CLEAN_VPATH_FILES)" || rm -f $(CONFIG_CLEAN_VPATH_FILES)
|
||||||
|
|
||||||
|
maintainer-clean-generic:
|
||||||
|
@echo "This command is intended for maintainers to use"
|
||||||
|
@echo "it deletes files that may require special tools to rebuild."
|
||||||
|
clean: clean-am
|
||||||
|
|
||||||
|
clean-am: clean-generic clean-libLTLIBRARIES clean-libtool \
|
||||||
|
mostlyclean-am
|
||||||
|
|
||||||
|
distclean: distclean-am
|
||||||
|
-rm -rf ./$(DEPDIR)
|
||||||
|
-rm -f Makefile
|
||||||
|
distclean-am: clean-am distclean-compile distclean-generic \
|
||||||
|
distclean-tags
|
||||||
|
|
||||||
|
dvi: dvi-am
|
||||||
|
|
||||||
|
dvi-am:
|
||||||
|
|
||||||
|
html: html-am
|
||||||
|
|
||||||
|
html-am:
|
||||||
|
|
||||||
|
info: info-am
|
||||||
|
|
||||||
|
info-am:
|
||||||
|
|
||||||
|
install-data-am: install-libffts_includeHEADERS
|
||||||
|
|
||||||
|
install-dvi: install-dvi-am
|
||||||
|
|
||||||
|
install-dvi-am:
|
||||||
|
|
||||||
|
install-exec-am: install-libLTLIBRARIES
|
||||||
|
|
||||||
|
install-html: install-html-am
|
||||||
|
|
||||||
|
install-html-am:
|
||||||
|
|
||||||
|
install-info: install-info-am
|
||||||
|
|
||||||
|
install-info-am:
|
||||||
|
|
||||||
|
install-man:
|
||||||
|
|
||||||
|
install-pdf: install-pdf-am
|
||||||
|
|
||||||
|
install-pdf-am:
|
||||||
|
|
||||||
|
install-ps: install-ps-am
|
||||||
|
|
||||||
|
install-ps-am:
|
||||||
|
|
||||||
|
installcheck-am:
|
||||||
|
|
||||||
|
maintainer-clean: maintainer-clean-am
|
||||||
|
-rm -rf ./$(DEPDIR)
|
||||||
|
-rm -f Makefile
|
||||||
|
maintainer-clean-am: distclean-am maintainer-clean-generic
|
||||||
|
|
||||||
|
mostlyclean: mostlyclean-am
|
||||||
|
|
||||||
|
mostlyclean-am: mostlyclean-compile mostlyclean-generic \
|
||||||
|
mostlyclean-libtool
|
||||||
|
|
||||||
|
pdf: pdf-am
|
||||||
|
|
||||||
|
pdf-am:
|
||||||
|
|
||||||
|
ps: ps-am
|
||||||
|
|
||||||
|
ps-am:
|
||||||
|
|
||||||
|
uninstall-am: uninstall-libLTLIBRARIES \
|
||||||
|
uninstall-libffts_includeHEADERS
|
||||||
|
|
||||||
|
.MAKE: install-am install-strip
|
||||||
|
|
||||||
|
.PHONY: CTAGS GTAGS all all-am check check-am clean clean-generic \
|
||||||
|
clean-libLTLIBRARIES clean-libtool cscopelist ctags distclean \
|
||||||
|
distclean-compile distclean-generic distclean-libtool \
|
||||||
|
distclean-tags distdir dvi dvi-am html html-am info info-am \
|
||||||
|
install install-am install-data install-data-am install-dvi \
|
||||||
|
install-dvi-am install-exec install-exec-am install-html \
|
||||||
|
install-html-am install-info install-info-am \
|
||||||
|
install-libLTLIBRARIES install-libffts_includeHEADERS \
|
||||||
|
install-man install-pdf install-pdf-am install-ps \
|
||||||
|
install-ps-am install-strip installcheck installcheck-am \
|
||||||
|
installdirs maintainer-clean maintainer-clean-generic \
|
||||||
|
mostlyclean mostlyclean-compile mostlyclean-generic \
|
||||||
|
mostlyclean-libtool pdf pdf-am ps ps-am tags uninstall \
|
||||||
|
uninstall-am uninstall-libLTLIBRARIES \
|
||||||
|
uninstall-libffts_includeHEADERS
|
||||||
|
|
||||||
|
|
||||||
|
# Tell versions [3.59,3.63) of GNU make to not export all variables.
|
||||||
|
# Otherwise a system limit (for SysV at least) may be exceeded.
|
||||||
|
.NOEXPORT:
|
@ -0,0 +1,731 @@
|
|||||||
|
/*
|
||||||
|
|
||||||
|
This file is part of FFTS -- The Fastest Fourier Transform in the South
|
||||||
|
|
||||||
|
Copyright (c) 2012, Anthony M. Blake <amb@anthonix.com>
|
||||||
|
Copyright (c) 2012, The University of Waikato
|
||||||
|
|
||||||
|
All rights reserved.
|
||||||
|
|
||||||
|
Redistribution and use in source and binary forms, with or without
|
||||||
|
modification, are permitted provided that the following conditions are met:
|
||||||
|
* Redistributions of source code must retain the above copyright
|
||||||
|
notice, this list of conditions and the following disclaimer.
|
||||||
|
* Redistributions in binary form must reproduce the above copyright
|
||||||
|
notice, this list of conditions and the following disclaimer in the
|
||||||
|
documentation and/or other materials provided with the distribution.
|
||||||
|
* Neither the name of the organization nor the
|
||||||
|
names of its contributors may be used to endorse or promote products
|
||||||
|
derived from this software without specific prior written permission.
|
||||||
|
|
||||||
|
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
||||||
|
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||||
|
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||||
|
DISCLAIMED. IN NO EVENT SHALL ANTHONY M. BLAKE BE LIABLE FOR ANY
|
||||||
|
DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
|
||||||
|
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
||||||
|
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
|
||||||
|
ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||||
|
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||||
|
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include "codegen.h"
|
||||||
|
#include "macros.h"
|
||||||
|
#include "ffts.h"
|
||||||
|
|
||||||
|
#ifdef __APPLE__
|
||||||
|
#include <libkern/OSCacheControl.h>
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#include <sys/types.h>
|
||||||
|
#include <sys/mman.h>
|
||||||
|
|
||||||
|
#ifdef HAVE_NEON
|
||||||
|
#include "codegen_arm.h"
|
||||||
|
#include "neon.h"
|
||||||
|
#elif HAVE_VFP
|
||||||
|
#include "codegen_arm.h"
|
||||||
|
#include "vfp.h"
|
||||||
|
#else
|
||||||
|
#include "codegen_sse.h"
|
||||||
|
#include "macros-sse.h"
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#ifdef __ANDROID__
|
||||||
|
#include <unistd.h>
|
||||||
|
#endif
|
||||||
|
|
||||||
|
int tree_count(int N, int leafN, int offset) {
|
||||||
|
|
||||||
|
if(N <= leafN) return 0;
|
||||||
|
int count = 0;
|
||||||
|
count += tree_count(N/4, leafN, offset);
|
||||||
|
count += tree_count(N/8, leafN, offset + N/4);
|
||||||
|
count += tree_count(N/8, leafN, offset + N/4 + N/8);
|
||||||
|
count += tree_count(N/4, leafN, offset + N/2);
|
||||||
|
count += tree_count(N/4, leafN, offset + 3*N/4);
|
||||||
|
|
||||||
|
return 1 + count;
|
||||||
|
}
|
||||||
|
|
||||||
|
void elaborate_tree(size_t **p, int N, int leafN, int offset) {
|
||||||
|
|
||||||
|
if(N <= leafN) return;
|
||||||
|
elaborate_tree(p, N/4, leafN, offset);
|
||||||
|
elaborate_tree(p, N/8, leafN, offset + N/4);
|
||||||
|
elaborate_tree(p, N/8, leafN, offset + N/4 + N/8);
|
||||||
|
elaborate_tree(p, N/4, leafN, offset + N/2);
|
||||||
|
elaborate_tree(p, N/4, leafN, offset + 3*N/4);
|
||||||
|
|
||||||
|
(*p)[0] = N;
|
||||||
|
(*p)[1] = offset*2;
|
||||||
|
|
||||||
|
(*p)+=2;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
uint32_t LUT_offset(size_t N, size_t leafN) {
|
||||||
|
int i;
|
||||||
|
size_t p_lut_size = 0;
|
||||||
|
size_t lut_size = 0;
|
||||||
|
int hardcoded = 0;
|
||||||
|
size_t n_luts = __builtin_ctzl(N/leafN);
|
||||||
|
int n = leafN*2;
|
||||||
|
//if(N <= 32) { n_luts = __builtin_ctzl(N/4); hardcoded = 1; }
|
||||||
|
|
||||||
|
for(i=0;i<n_luts-1;i++) {
|
||||||
|
p_lut_size = lut_size;
|
||||||
|
if(!i || hardcoded) {
|
||||||
|
#ifdef __arm__
|
||||||
|
if(N <= 32) lut_size += n/4 * 2 * sizeof(cdata_t);
|
||||||
|
else lut_size += n/4 * sizeof(cdata_t);
|
||||||
|
#else
|
||||||
|
lut_size += n/4 * 2 * sizeof(cdata_t);
|
||||||
|
#endif
|
||||||
|
// n *= 2;
|
||||||
|
} else {
|
||||||
|
#ifdef __arm__
|
||||||
|
lut_size += n/8 * 3 * sizeof(cdata_t);
|
||||||
|
#else
|
||||||
|
lut_size += n/8 * 3 * 2 * sizeof(cdata_t);
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
n *= 2;
|
||||||
|
}
|
||||||
|
return lut_size;
|
||||||
|
}
|
||||||
|
|
||||||
|
#ifdef __arm__
|
||||||
|
typedef uint32_t insns_t;
|
||||||
|
#else
|
||||||
|
typedef uint8_t insns_t;
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#define P(x) (*(*p)++ = x)
|
||||||
|
|
||||||
|
void insert_nops(uint8_t **p, uint32_t count) {
|
||||||
|
switch(count) {
|
||||||
|
case 0: break;
|
||||||
|
case 2: P(0x66);
|
||||||
|
case 1: P(0x90); break;
|
||||||
|
case 3: P(0x0F); P(0x1F); P(0x00); break;
|
||||||
|
case 4: P(0x0F); P(0x1F); P(0x40); P(0x00); break;
|
||||||
|
case 5: P(0x0F); P(0x1F); P(0x44); P(0x00); P(0x00); break;
|
||||||
|
case 6: P(0x66); P(0x0F); P(0x1F); P(0x44); P(0x00); P(0x00); break;
|
||||||
|
case 7: P(0x0F); P(0x1F); P(0x80); P(0x00); P(0x00); P(0x00); P(0x00); break;
|
||||||
|
case 8: P(0x0F); P(0x1F); P(0x84); P(0x00); P(0x00); P(0x00); P(0x00); P(0x00); break;
|
||||||
|
case 9: P(0x66); P(0x0F); P(0x1F); P(0x84); P(0x00); P(0x00); P(0x00); P(0x00); P(0x00); break;
|
||||||
|
default:
|
||||||
|
P(0x66); P(0x0F); P(0x1F); P(0x84); P(0x00); P(0x00); P(0x00); P(0x00); P(0x00);
|
||||||
|
insert_nops(p, count-9);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
void align_mem16(uint8_t **p, uint32_t offset) {
|
||||||
|
#ifdef __x86_64__
|
||||||
|
int r = (16 - (offset & 0xf)) - ((uint32_t)(*p) & 0xf);
|
||||||
|
r = (16 + r) & 0xf;
|
||||||
|
insert_nops(p, r);
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
|
void ffts_generate_func_code(ffts_plan_t *p, size_t N, size_t leafN, int sign) {
|
||||||
|
int count = tree_count(N, leafN, 0) + 1;
|
||||||
|
size_t *ps = malloc(count * 2 * sizeof(size_t));
|
||||||
|
size_t *pps = ps;
|
||||||
|
|
||||||
|
#ifdef __x86_64__
|
||||||
|
if(sign < 0) p->constants = sse_constants;
|
||||||
|
else p->constants = sse_constants_inv;
|
||||||
|
#endif
|
||||||
|
|
||||||
|
elaborate_tree(&pps, N, leafN, 0);
|
||||||
|
pps[0] = 0;
|
||||||
|
pps[1] = 0;
|
||||||
|
|
||||||
|
pps = ps;
|
||||||
|
|
||||||
|
#ifdef __arm__
|
||||||
|
if(N < 8192) p->transform_size = 8192;
|
||||||
|
else p->transform_size = N;
|
||||||
|
#else
|
||||||
|
if(N < 2048) p->transform_size = 16384;
|
||||||
|
else p->transform_size = 16384 + 2*N/8 * __builtin_ctzl(N);
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#ifdef __APPLE__
|
||||||
|
p->transform_base = mmap(NULL, p->transform_size, PROT_WRITE | PROT_READ, MAP_ANON | MAP_SHARED, -1, 0);
|
||||||
|
#else
|
||||||
|
#define MAP_ANONYMOUS 0x20
|
||||||
|
p->transform_base = mmap(NULL, p->transform_size, PROT_WRITE | PROT_READ, MAP_ANONYMOUS | MAP_SHARED, -1, 0);
|
||||||
|
#endif
|
||||||
|
|
||||||
|
/*
|
||||||
|
if(p->transform_base == MAP_FAILED) {
|
||||||
|
fprintf(stderr, "MAP FAILED\n");
|
||||||
|
exit(1);
|
||||||
|
}*/
|
||||||
|
insns_t *func = p->transform_base;//valloc(8192);
|
||||||
|
insns_t *fp = func;
|
||||||
|
|
||||||
|
//fprintf(stderr, "Allocating %d bytes \n", p->transform_size);
|
||||||
|
//fprintf(stderr, "Base address = %016p\n", func);
|
||||||
|
|
||||||
|
if(!func) {
|
||||||
|
fprintf(stderr, "NOMEM\n");
|
||||||
|
exit(1);
|
||||||
|
}
|
||||||
|
|
||||||
|
insns_t *x_8_addr = fp;
|
||||||
|
#ifdef __arm__
|
||||||
|
#ifdef HAVE_NEON
|
||||||
|
memcpy(fp, neon_x8, neon_x8_t - neon_x8);
|
||||||
|
/*
|
||||||
|
* Changes adds to subtracts and vice versa to allow the computation
|
||||||
|
* of both the IFFT and FFT
|
||||||
|
*/
|
||||||
|
if(sign < 0) {
|
||||||
|
fp[31] ^= 0x00200000; fp[32] ^= 0x00200000; fp[33] ^= 0x00200000; fp[34] ^= 0x00200000;
|
||||||
|
fp[65] ^= 0x00200000; fp[66] ^= 0x00200000; fp[70] ^= 0x00200000; fp[74] ^= 0x00200000;
|
||||||
|
fp[97] ^= 0x00200000; fp[98] ^= 0x00200000; fp[102] ^= 0x00200000; fp[104] ^= 0x00200000;
|
||||||
|
}
|
||||||
|
fp += (neon_x8_t - neon_x8) / 4;
|
||||||
|
#else
|
||||||
|
memcpy(fp, vfp_x8, vfp_end - vfp_x8);
|
||||||
|
if(sign > 0) {
|
||||||
|
fp[65] ^= 0x00000040;
|
||||||
|
fp[66] ^= 0x00000040;
|
||||||
|
fp[68] ^= 0x00000040;
|
||||||
|
fp[70] ^= 0x00000040;
|
||||||
|
fp[103] ^= 0x00000040;
|
||||||
|
fp[104] ^= 0x00000040;
|
||||||
|
fp[105] ^= 0x00000040;
|
||||||
|
fp[108] ^= 0x00000040;
|
||||||
|
fp[113] ^= 0x00000040;
|
||||||
|
fp[114] ^= 0x00000040;
|
||||||
|
fp[117] ^= 0x00000040;
|
||||||
|
fp[118] ^= 0x00000040;
|
||||||
|
}
|
||||||
|
fp += (vfp_end - vfp_x8) / 4;
|
||||||
|
#endif
|
||||||
|
#else
|
||||||
|
align_mem16(&fp, 0);
|
||||||
|
x_8_addr = fp;
|
||||||
|
align_mem16(&fp, 5);
|
||||||
|
memcpy(fp, x8_soft, x8_hard - x8_soft);
|
||||||
|
fp += (x8_hard - x8_soft);
|
||||||
|
//fprintf(stderr, "X8 start address = %016p\n", x_8_addr);
|
||||||
|
#endif
|
||||||
|
//uint32_t *x_8_t_addr = fp;
|
||||||
|
//memcpy(fp, neon_x8_t, neon_end - neon_x8_t);
|
||||||
|
//fp += (neon_end - neon_x8_t) / 4;
|
||||||
|
insns_t *x_4_addr = fp;
|
||||||
|
#ifdef __arm__
|
||||||
|
#ifdef HAVE_NEON
|
||||||
|
memcpy(fp, neon_x4, neon_x8 - neon_x4);
|
||||||
|
if(sign < 0) {
|
||||||
|
fp[26] ^= 0x00200000; fp[28] ^= 0x00200000; fp[31] ^= 0x00200000; fp[32] ^= 0x00200000;
|
||||||
|
}
|
||||||
|
fp += (neon_x8 - neon_x4) / 4;
|
||||||
|
#else
|
||||||
|
memcpy(fp, vfp_x4, vfp_x8 - vfp_x4);
|
||||||
|
if(sign > 0) {
|
||||||
|
fp[36] ^= 0x00000040;
|
||||||
|
fp[38] ^= 0x00000040;
|
||||||
|
fp[43] ^= 0x00000040;
|
||||||
|
fp[44] ^= 0x00000040;
|
||||||
|
}
|
||||||
|
fp += (vfp_x8 - vfp_x4) / 4;
|
||||||
|
#endif
|
||||||
|
#else
|
||||||
|
align_mem16(&fp, 0);
|
||||||
|
x_4_addr = fp;
|
||||||
|
memcpy(fp, x4, x8_soft - x4);
|
||||||
|
fp += (x8_soft - x4);
|
||||||
|
|
||||||
|
#endif
|
||||||
|
insns_t *start = fp;
|
||||||
|
|
||||||
|
#ifdef __arm__
|
||||||
|
*fp = PUSH_LR(); fp++;
|
||||||
|
*fp = 0xed2d8b10; fp++;
|
||||||
|
|
||||||
|
ADDI(&fp, 3, 1, 0);
|
||||||
|
ADDI(&fp, 7, 1, N);
|
||||||
|
ADDI(&fp, 5, 1, 2*N);
|
||||||
|
ADDI(&fp, 10, 7, 2*N);
|
||||||
|
ADDI(&fp, 4, 5, 2*N);
|
||||||
|
ADDI(&fp, 8, 10, 2*N);
|
||||||
|
ADDI(&fp, 6, 4, 2*N);
|
||||||
|
ADDI(&fp, 9, 8, 2*N);
|
||||||
|
|
||||||
|
*fp = LDRI(12, 0, ((uint32_t)&p->offsets) - ((uint32_t)p)); fp++; // load offsets into r12
|
||||||
|
// *fp++ = LDRI(1, 0, 4); // load ws into r1
|
||||||
|
ADDI(&fp, 1, 0, 0);
|
||||||
|
|
||||||
|
ADDI(&fp, 0, 2, 0), // mov out into r0
|
||||||
|
#endif
|
||||||
|
|
||||||
|
|
||||||
|
#ifdef __arm__
|
||||||
|
*fp = LDRI(2, 1, ((uint32_t)&p->ee_ws) - ((uint32_t)p)); fp++;
|
||||||
|
#ifdef HAVE_NEON
|
||||||
|
MOVI(&fp, 11, p->i0);
|
||||||
|
#else
|
||||||
|
MOVI(&fp, 11, p->i0);
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#else
|
||||||
|
align_mem16(&fp, 0);
|
||||||
|
start = fp;
|
||||||
|
|
||||||
|
*fp++ = 0x4c;
|
||||||
|
*fp++ = 0x8b;
|
||||||
|
*fp++ = 0x07;
|
||||||
|
uint32_t lp_cnt = p->i0 * 4;
|
||||||
|
MOVI(&fp, RCX, lp_cnt);
|
||||||
|
|
||||||
|
//LEA(&fp, R8, RDI, ((uint32_t)&p->offsets) - ((uint32_t)p));
|
||||||
|
#endif
|
||||||
|
//fp++;
|
||||||
|
#ifdef __arm__
|
||||||
|
#ifdef HAVE_NEON
|
||||||
|
memcpy(fp, neon_ee, neon_oo - neon_ee);
|
||||||
|
if(sign < 0) {
|
||||||
|
fp[33] ^= 0x00200000; fp[37] ^= 0x00200000; fp[38] ^= 0x00200000; fp[39] ^= 0x00200000;
|
||||||
|
fp[40] ^= 0x00200000; fp[41] ^= 0x00200000; fp[44] ^= 0x00200000; fp[45] ^= 0x00200000;
|
||||||
|
fp[46] ^= 0x00200000; fp[47] ^= 0x00200000; fp[48] ^= 0x00200000; fp[57] ^= 0x00200000;
|
||||||
|
}
|
||||||
|
fp += (neon_oo - neon_ee) / 4;
|
||||||
|
#else
|
||||||
|
memcpy(fp, vfp_e, vfp_o - vfp_e);
|
||||||
|
if(sign > 0) {
|
||||||
|
fp[64] ^= 0x00000040; fp[65] ^= 0x00000040; fp[68] ^= 0x00000040; fp[75] ^= 0x00000040;
|
||||||
|
fp[76] ^= 0x00000040; fp[79] ^= 0x00000040; fp[80] ^= 0x00000040; fp[83] ^= 0x00000040;
|
||||||
|
fp[84] ^= 0x00000040; fp[87] ^= 0x00000040; fp[91] ^= 0x00000040; fp[93] ^= 0x00000040;
|
||||||
|
}
|
||||||
|
fp += (vfp_o - vfp_e) / 4;
|
||||||
|
#endif
|
||||||
|
#else
|
||||||
|
//fprintf(stderr, "Body start address = %016p\n", start);
|
||||||
|
|
||||||
|
PUSH(&fp, RBP);
|
||||||
|
PUSH(&fp, RBX);
|
||||||
|
PUSH(&fp, R10);
|
||||||
|
PUSH(&fp, R11);
|
||||||
|
PUSH(&fp, R12);
|
||||||
|
PUSH(&fp, R13);
|
||||||
|
PUSH(&fp, R14);
|
||||||
|
PUSH(&fp, R15);
|
||||||
|
|
||||||
|
int i;
|
||||||
|
memcpy(fp, leaf_ee_init, leaf_ee - leaf_ee_init);
|
||||||
|
|
||||||
|
//fprintf(stderr, "Leaf ee init address = %016p\n", leaf_ee_init);
|
||||||
|
//fprintf(stderr, "Constants address = %016p\n", sse_constants);
|
||||||
|
//fprintf(stderr, "Constants address = %016p\n", p->constants);
|
||||||
|
|
||||||
|
//int32_t val = READ_IMM32(fp + 3);
|
||||||
|
//fprintf(stderr, "diff = 0x%x\n", ((uint32_t)&p->constants) - ((uint32_t)p));
|
||||||
|
|
||||||
|
//int64_t v2 = val + (int64_t)((void *)leaf_ee_init - (void *)fp );
|
||||||
|
//fprintf(stderr, "IMM = 0x%llx\n", v2);
|
||||||
|
|
||||||
|
//IMM32_NI(fp + 3, ((int64_t) READ_IMM32(fp + 3)) + ((void *)leaf_ee_init - (void *)fp ));
|
||||||
|
fp += (leaf_ee - leaf_ee_init);
|
||||||
|
|
||||||
|
//fprintf(stderr, "Leaf start address = %016p\n", fp);
|
||||||
|
align_mem16(&fp, 9);
|
||||||
|
memcpy(fp, leaf_ee, leaf_oo - leaf_ee);
|
||||||
|
|
||||||
|
|
||||||
|
uint32_t offsets[8] = {0, N, N/2, 3*N/2, N/4, 5*N/4, 7*N/4, 3*N/4};
|
||||||
|
uint32_t offsets_o[8] = {0, N, N/2, 3*N/2, 7*N/4, 3*N/4, N/4, 5*N/4};
|
||||||
|
uint32_t offsets_oe[8] = {7*N/4, 3*N/4, N/4, 5*N/4, 0, N, 3*N/2, N/2};
|
||||||
|
|
||||||
|
for(i=0;i<8;i++) IMM32_NI(fp + sse_leaf_ee_offsets[i], offsets[i]*4);
|
||||||
|
|
||||||
|
fp += (leaf_oo - leaf_ee);
|
||||||
|
|
||||||
|
if(__builtin_ctzl(N) & 1){
|
||||||
|
|
||||||
|
if(p->i1) {
|
||||||
|
lp_cnt += p->i1 * 4;
|
||||||
|
MOVI(&fp, RCX, lp_cnt);
|
||||||
|
align_mem16(&fp, 4);
|
||||||
|
memcpy(fp, leaf_oo, leaf_eo - leaf_oo);
|
||||||
|
for(i=0;i<8;i++) IMM32_NI(fp + sse_leaf_oo_offsets[i], offsets_o[i]*4);
|
||||||
|
fp += (leaf_eo - leaf_oo);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
memcpy(fp, leaf_oe, leaf_end - leaf_oe);
|
||||||
|
lp_cnt += 4;
|
||||||
|
for(i=0;i<8;i++) IMM32_NI(fp + sse_leaf_oe_offsets[i], offsets_o[i]*4);
|
||||||
|
fp += (leaf_end - leaf_oe);
|
||||||
|
|
||||||
|
}else{
|
||||||
|
|
||||||
|
|
||||||
|
memcpy(fp, leaf_eo, leaf_oe - leaf_eo);
|
||||||
|
lp_cnt += 4;
|
||||||
|
for(i=0;i<8;i++) IMM32_NI(fp + sse_leaf_eo_offsets[i], offsets[i]*4);
|
||||||
|
fp += (leaf_oe - leaf_eo);
|
||||||
|
|
||||||
|
if(p->i1) {
|
||||||
|
lp_cnt += p->i1 * 4;
|
||||||
|
MOVI(&fp, RCX, lp_cnt);
|
||||||
|
align_mem16(&fp, 4);
|
||||||
|
memcpy(fp, leaf_oo, leaf_eo - leaf_oo);
|
||||||
|
for(i=0;i<8;i++) IMM32_NI(fp + sse_leaf_oo_offsets[i], offsets_o[i]*4);
|
||||||
|
fp += (leaf_eo - leaf_oo);
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
if(p->i1) {
|
||||||
|
lp_cnt += p->i1 * 4;
|
||||||
|
MOVI(&fp, RCX, lp_cnt);
|
||||||
|
align_mem16(&fp, 9);
|
||||||
|
memcpy(fp, leaf_ee, leaf_oo - leaf_ee);
|
||||||
|
for(i=0;i<8;i++) IMM32_NI(fp + sse_leaf_ee_offsets[i], offsets_oe[i]*4);
|
||||||
|
fp += (leaf_oo - leaf_ee);
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
//fprintf(stderr, "Body start address = %016p\n", fp);
|
||||||
|
//LEA(&fp, R8, RDI, ((uint32_t)&p->ws) - ((uint32_t)p));
|
||||||
|
memcpy(fp, x_init, x4 - x_init);
|
||||||
|
//IMM32_NI(fp + 3, ((int64_t)READ_IMM32(fp + 3)) + ((void *)x_init - (void *)fp ));
|
||||||
|
fp += (x4 - x_init);
|
||||||
|
|
||||||
|
int32_t pAddr = 0;
|
||||||
|
int32_t pN = 0;
|
||||||
|
int32_t pLUT = 0;
|
||||||
|
count = 2;
|
||||||
|
while(pps[0]) {
|
||||||
|
|
||||||
|
if(!pN) {
|
||||||
|
MOVI(&fp, RCX, pps[0] / 4);
|
||||||
|
}else{
|
||||||
|
if((pps[1]*4)-pAddr) ADDI(&fp, RDX, (pps[1] * 4)- pAddr);
|
||||||
|
if(pps[0] > leafN && pps[0] - pN) {
|
||||||
|
|
||||||
|
int diff = __builtin_ctzl(pps[0]) - __builtin_ctzl(pN);
|
||||||
|
*fp++ = 0xc1;
|
||||||
|
|
||||||
|
if(diff > 0) {
|
||||||
|
*fp++ = 0xe1;
|
||||||
|
*fp++ = (diff & 0xff);
|
||||||
|
}else{
|
||||||
|
*fp++ = 0xe9;
|
||||||
|
*fp++ = ((-diff) & 0xff);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if(p->ws_is[__builtin_ctzl(pps[0]/leafN)-1]*8 - pLUT)
|
||||||
|
ADDI(&fp, R8, p->ws_is[__builtin_ctzl(pps[0]/leafN)-1]*8 - pLUT);
|
||||||
|
|
||||||
|
|
||||||
|
if(pps[0] == 2*leafN) {
|
||||||
|
CALL(&fp, x_4_addr);
|
||||||
|
// }else if(!pps[2]){
|
||||||
|
// //uint32_t *x_8_t_addr = fp;
|
||||||
|
// memcpy(fp, neon_x8_t, neon_ee - neon_x8_t);
|
||||||
|
// fp += (neon_ee - neon_x8_t) / 4;
|
||||||
|
// //*fp++ = BL(fp+2, x_8_t_addr);
|
||||||
|
}else{
|
||||||
|
CALL(&fp, x_8_addr);
|
||||||
|
}
|
||||||
|
|
||||||
|
pAddr = pps[1] * 4;
|
||||||
|
if(pps[0] > leafN)
|
||||||
|
pN = pps[0];
|
||||||
|
pLUT = p->ws_is[__builtin_ctzl(pps[0]/leafN)-1]*8;//LUT_offset(pps[0], leafN);
|
||||||
|
// fprintf(stderr, "LUT offset for %d is %d\n", pN, pLUT);
|
||||||
|
count += 4;
|
||||||
|
pps += 2;
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
#ifdef __arm__
|
||||||
|
#ifdef HAVE_NEON
|
||||||
|
if(__builtin_ctzl(N) & 1){
|
||||||
|
ADDI(&fp, 2, 7, 0);
|
||||||
|
ADDI(&fp, 7, 9, 0);
|
||||||
|
ADDI(&fp, 9, 2, 0);
|
||||||
|
|
||||||
|
ADDI(&fp, 2, 8, 0);
|
||||||
|
ADDI(&fp, 8, 10, 0);
|
||||||
|
ADDI(&fp, 10, 2, 0);
|
||||||
|
|
||||||
|
if(p->i1) {
|
||||||
|
MOVI(&fp, 11, p->i1);
|
||||||
|
memcpy(fp, neon_oo, neon_eo - neon_oo);
|
||||||
|
if(sign < 0) {
|
||||||
|
fp[12] ^= 0x00200000; fp[13] ^= 0x00200000; fp[14] ^= 0x00200000; fp[15] ^= 0x00200000;
|
||||||
|
fp[27] ^= 0x00200000; fp[29] ^= 0x00200000; fp[30] ^= 0x00200000; fp[31] ^= 0x00200000;
|
||||||
|
fp[46] ^= 0x00200000; fp[47] ^= 0x00200000; fp[48] ^= 0x00200000; fp[57] ^= 0x00200000;
|
||||||
|
}
|
||||||
|
fp += (neon_eo - neon_oo) / 4;
|
||||||
|
}
|
||||||
|
|
||||||
|
*fp = LDRI(11, 1, ((uint32_t)&p->oe_ws) - ((uint32_t)p)); fp++;
|
||||||
|
|
||||||
|
memcpy(fp, neon_oe, neon_end - neon_oe);
|
||||||
|
if(sign < 0) {
|
||||||
|
fp[19] ^= 0x00200000; fp[20] ^= 0x00200000; fp[22] ^= 0x00200000; fp[23] ^= 0x00200000;
|
||||||
|
fp[37] ^= 0x00200000; fp[38] ^= 0x00200000; fp[40] ^= 0x00200000; fp[41] ^= 0x00200000;
|
||||||
|
fp[64] ^= 0x00200000; fp[65] ^= 0x00200000; fp[66] ^= 0x00200000; fp[67] ^= 0x00200000;
|
||||||
|
}
|
||||||
|
fp += (neon_end - neon_oe) / 4;
|
||||||
|
|
||||||
|
}else{
|
||||||
|
|
||||||
|
*fp = LDRI(11, 1, ((uint32_t)&p->eo_ws) - ((uint32_t)p)); fp++;
|
||||||
|
|
||||||
|
memcpy(fp, neon_eo, neon_oe - neon_eo);
|
||||||
|
if(sign < 0) {
|
||||||
|
fp[10] ^= 0x00200000; fp[11] ^= 0x00200000; fp[13] ^= 0x00200000; fp[14] ^= 0x00200000;
|
||||||
|
fp[31] ^= 0x00200000; fp[33] ^= 0x00200000; fp[34] ^= 0x00200000; fp[35] ^= 0x00200000;
|
||||||
|
fp[59] ^= 0x00200000; fp[60] ^= 0x00200000; fp[61] ^= 0x00200000; fp[62] ^= 0x00200000;
|
||||||
|
}
|
||||||
|
fp += (neon_oe - neon_eo) / 4;
|
||||||
|
|
||||||
|
ADDI(&fp, 2, 7, 0);
|
||||||
|
ADDI(&fp, 7, 9, 0);
|
||||||
|
ADDI(&fp, 9, 2, 0);
|
||||||
|
|
||||||
|
ADDI(&fp, 2, 8, 0);
|
||||||
|
ADDI(&fp, 8, 10, 0);
|
||||||
|
ADDI(&fp, 10, 2, 0);
|
||||||
|
|
||||||
|
if(p->i1) {
|
||||||
|
MOVI(&fp, 11, p->i1);
|
||||||
|
memcpy(fp, neon_oo, neon_eo - neon_oo);
|
||||||
|
if(sign < 0) {
|
||||||
|
fp[12] ^= 0x00200000; fp[13] ^= 0x00200000; fp[14] ^= 0x00200000; fp[15] ^= 0x00200000;
|
||||||
|
fp[27] ^= 0x00200000; fp[29] ^= 0x00200000; fp[30] ^= 0x00200000; fp[31] ^= 0x00200000;
|
||||||
|
fp[46] ^= 0x00200000; fp[47] ^= 0x00200000; fp[48] ^= 0x00200000; fp[57] ^= 0x00200000;
|
||||||
|
}
|
||||||
|
fp += (neon_eo - neon_oo) / 4;
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
if(p->i1) {
|
||||||
|
ADDI(&fp, 2, 3, 0);
|
||||||
|
ADDI(&fp, 3, 7, 0);
|
||||||
|
ADDI(&fp, 7, 2, 0);
|
||||||
|
|
||||||
|
ADDI(&fp, 2, 4, 0);
|
||||||
|
ADDI(&fp, 4, 8, 0);
|
||||||
|
ADDI(&fp, 8, 2, 0);
|
||||||
|
|
||||||
|
ADDI(&fp, 2, 5, 0);
|
||||||
|
ADDI(&fp, 5, 9, 0);
|
||||||
|
ADDI(&fp, 9, 2, 0);
|
||||||
|
|
||||||
|
ADDI(&fp, 2, 6, 0);
|
||||||
|
ADDI(&fp, 6, 10, 0);
|
||||||
|
ADDI(&fp, 10, 2, 0);
|
||||||
|
|
||||||
|
ADDI(&fp, 2, 9, 0);
|
||||||
|
ADDI(&fp, 9, 10, 0);
|
||||||
|
ADDI(&fp, 10, 2, 0);
|
||||||
|
|
||||||
|
*fp = LDRI(2, 1, ((uint32_t)&p->ee_ws) - ((uint32_t)p)); fp++;
|
||||||
|
MOVI(&fp, 11, p->i1);
|
||||||
|
memcpy(fp, neon_ee, neon_oo - neon_ee);
|
||||||
|
if(sign < 0) {
|
||||||
|
fp[33] ^= 0x00200000; fp[37] ^= 0x00200000; fp[38] ^= 0x00200000; fp[39] ^= 0x00200000;
|
||||||
|
fp[40] ^= 0x00200000; fp[41] ^= 0x00200000; fp[44] ^= 0x00200000; fp[45] ^= 0x00200000;
|
||||||
|
fp[46] ^= 0x00200000; fp[47] ^= 0x00200000; fp[48] ^= 0x00200000; fp[57] ^= 0x00200000;
|
||||||
|
}
|
||||||
|
fp += (neon_oo - neon_ee) / 4;
|
||||||
|
|
||||||
|
}
|
||||||
|
#else
|
||||||
|
ADDI(&fp, 2, 7, 0);
|
||||||
|
ADDI(&fp, 7, 9, 0);
|
||||||
|
ADDI(&fp, 9, 2, 0);
|
||||||
|
|
||||||
|
ADDI(&fp, 2, 8, 0);
|
||||||
|
ADDI(&fp, 8, 10, 0);
|
||||||
|
ADDI(&fp, 10, 2, 0);
|
||||||
|
|
||||||
|
MOVI(&fp, 11, (p->i1>0) ? p->i1 : 1);
|
||||||
|
memcpy(fp, vfp_o, vfp_x4 - vfp_o);
|
||||||
|
if(sign > 0) {
|
||||||
|
fp[22] ^= 0x00000040; fp[24] ^= 0x00000040; fp[25] ^= 0x00000040; fp[26] ^= 0x00000040;
|
||||||
|
fp[62] ^= 0x00000040; fp[64] ^= 0x00000040; fp[65] ^= 0x00000040; fp[66] ^= 0x00000040;
|
||||||
|
}
|
||||||
|
fp += (vfp_x4 - vfp_o) / 4;
|
||||||
|
|
||||||
|
ADDI(&fp, 2, 3, 0);
|
||||||
|
ADDI(&fp, 3, 7, 0);
|
||||||
|
ADDI(&fp, 7, 2, 0);
|
||||||
|
|
||||||
|
ADDI(&fp, 2, 4, 0);
|
||||||
|
ADDI(&fp, 4, 8, 0);
|
||||||
|
ADDI(&fp, 8, 2, 0);
|
||||||
|
|
||||||
|
ADDI(&fp, 2, 5, 0);
|
||||||
|
ADDI(&fp, 5, 9, 0);
|
||||||
|
ADDI(&fp, 9, 2, 0);
|
||||||
|
|
||||||
|
ADDI(&fp, 2, 6, 0);
|
||||||
|
ADDI(&fp, 6, 10, 0);
|
||||||
|
ADDI(&fp, 10, 2, 0);
|
||||||
|
|
||||||
|
ADDI(&fp, 2, 9, 0);
|
||||||
|
ADDI(&fp, 9, 10, 0);
|
||||||
|
ADDI(&fp, 10, 2, 0);
|
||||||
|
|
||||||
|
*fp = LDRI(2, 1, ((uint32_t)&p->ee_ws) - ((uint32_t)p)); fp++;
|
||||||
|
MOVI(&fp, 11, (p->i2>0) ? p->i2 : 1);
|
||||||
|
memcpy(fp, vfp_e, vfp_o - vfp_e);
|
||||||
|
if(sign > 0) {
|
||||||
|
fp[64] ^= 0x00000040; fp[65] ^= 0x00000040; fp[68] ^= 0x00000040; fp[75] ^= 0x00000040;
|
||||||
|
fp[76] ^= 0x00000040; fp[79] ^= 0x00000040; fp[80] ^= 0x00000040; fp[83] ^= 0x00000040;
|
||||||
|
fp[84] ^= 0x00000040; fp[87] ^= 0x00000040; fp[91] ^= 0x00000040; fp[93] ^= 0x00000040;
|
||||||
|
}
|
||||||
|
fp += (vfp_o - vfp_e) / 4;
|
||||||
|
|
||||||
|
#endif
|
||||||
|
*fp = LDRI(2, 1, ((uint32_t)&p->ws) - ((uint32_t)p)); fp++; // load offsets into r12
|
||||||
|
//ADDI(&fp, 2, 1, 0);
|
||||||
|
MOVI(&fp, 1, 0);
|
||||||
|
|
||||||
|
// args: r0 - out
|
||||||
|
// r1 - N
|
||||||
|
// r2 - ws
|
||||||
|
// ADDI(&fp, 3, 1, 0); // put N into r3 for counter
|
||||||
|
|
||||||
|
int32_t pAddr = 0;
|
||||||
|
int32_t pN = 0;
|
||||||
|
int32_t pLUT = 0;
|
||||||
|
count = 2;
|
||||||
|
while(pps[0]) {
|
||||||
|
|
||||||
|
// fprintf(stderr, "size %zu at %zu - diff %zu\n", pps[0], pps[1]*4, (pps[1]*4) - pAddr);
|
||||||
|
if(!pN) {
|
||||||
|
MOVI(&fp, 1, pps[0]);
|
||||||
|
}else{
|
||||||
|
if((pps[1]*4)-pAddr) ADDI(&fp, 0, 0, (pps[1] * 4)- pAddr);
|
||||||
|
if(pps[0] - pN) ADDI(&fp, 1, 1, pps[0] - pN);
|
||||||
|
}
|
||||||
|
|
||||||
|
if(p->ws_is[__builtin_ctzl(pps[0]/leafN)-1]*8 - pLUT)
|
||||||
|
ADDI(&fp, 2, 2, p->ws_is[__builtin_ctzl(pps[0]/leafN)-1]*8 - pLUT);
|
||||||
|
|
||||||
|
|
||||||
|
if(pps[0] == 2*leafN) {
|
||||||
|
*fp = BL(fp+2, x_4_addr); fp++;
|
||||||
|
}else if(!pps[2]){
|
||||||
|
//uint32_t *x_8_t_addr = fp;
|
||||||
|
#ifdef HAVE_NEON
|
||||||
|
memcpy(fp, neon_x8_t, neon_ee - neon_x8_t);
|
||||||
|
if(sign < 0) {
|
||||||
|
fp[31] ^= 0x00200000; fp[32] ^= 0x00200000; fp[33] ^= 0x00200000; fp[34] ^= 0x00200000;
|
||||||
|
fp[65] ^= 0x00200000; fp[66] ^= 0x00200000; fp[70] ^= 0x00200000; fp[74] ^= 0x00200000;
|
||||||
|
fp[97] ^= 0x00200000; fp[98] ^= 0x00200000; fp[102] ^= 0x00200000; fp[104] ^= 0x00200000;
|
||||||
|
}
|
||||||
|
fp += (neon_ee - neon_x8_t) / 4;
|
||||||
|
//*fp++ = BL(fp+2, x_8_t_addr);
|
||||||
|
|
||||||
|
#else
|
||||||
|
*fp = BL(fp+2, x_8_addr); fp++;
|
||||||
|
#endif
|
||||||
|
}else{
|
||||||
|
*fp = BL(fp+2, x_8_addr); fp++;
|
||||||
|
}
|
||||||
|
|
||||||
|
pAddr = pps[1] * 4;
|
||||||
|
pN = pps[0];
|
||||||
|
pLUT = p->ws_is[__builtin_ctzl(pps[0]/leafN)-1]*8;//LUT_offset(pps[0], leafN);
|
||||||
|
// fprintf(stderr, "LUT offset for %d is %d\n", pN, pLUT);
|
||||||
|
count += 4;
|
||||||
|
pps += 2;
|
||||||
|
}
|
||||||
|
|
||||||
|
*fp++ = 0xecbd8b10;
|
||||||
|
*fp++ = POP_LR(); count++;
|
||||||
|
#else
|
||||||
|
POP(&fp, R15);
|
||||||
|
POP(&fp, R14);
|
||||||
|
POP(&fp, R13);
|
||||||
|
POP(&fp, R12);
|
||||||
|
POP(&fp, R11);
|
||||||
|
POP(&fp, R10);
|
||||||
|
POP(&fp, RBX);
|
||||||
|
POP(&fp, RBP);
|
||||||
|
RET(&fp);
|
||||||
|
|
||||||
|
|
||||||
|
//uint8_t *pp = func;
|
||||||
|
//int counter = 0;
|
||||||
|
//do{
|
||||||
|
// printf("%02x ", *pp);
|
||||||
|
// if(counter++ % 16 == 15) printf("\n");
|
||||||
|
//} while(++pp < fp);
|
||||||
|
|
||||||
|
//printf("\n");
|
||||||
|
|
||||||
|
|
||||||
|
#endif
|
||||||
|
|
||||||
|
|
||||||
|
// *fp++ = B(14); count++;
|
||||||
|
|
||||||
|
//for(int i=0;i<(neon_x8 - neon_x4)/4;i++)
|
||||||
|
// fprintf(stderr, "%08x\n", x_4_addr[i]);
|
||||||
|
//fprintf(stderr, "\n");
|
||||||
|
//for(int i=0;i<count;i++)
|
||||||
|
|
||||||
|
free(ps);
|
||||||
|
|
||||||
|
if (mprotect(func, p->transform_size, PROT_READ | PROT_EXEC)) {
|
||||||
|
perror("Couldn't mprotect");
|
||||||
|
exit(1);
|
||||||
|
}
|
||||||
|
#ifdef __APPLE__
|
||||||
|
sys_icache_invalidate(func, p->transform_size);
|
||||||
|
#elif __ANDROID__
|
||||||
|
cacheflush((long)(func), (long)(func) + p->transform_size, 0);
|
||||||
|
#elif __linux__
|
||||||
|
#ifdef __GNUC__
|
||||||
|
__clear_cache((long)(func), (long)(func) + p->transform_size);
|
||||||
|
#endif
|
||||||
|
#endif
|
||||||
|
|
||||||
|
//fprintf(stderr, "size of transform %zu = %d\n", N, (fp-func)*4);
|
||||||
|
|
||||||
|
p->transform = (void *) (start);
|
||||||
|
}
|
@ -0,0 +1,49 @@
|
|||||||
|
/*
|
||||||
|
|
||||||
|
This file is part of FFTS -- The Fastest Fourier Transform in the South
|
||||||
|
|
||||||
|
Copyright (c) 2012, Anthony M. Blake <amb@anthonix.com>
|
||||||
|
Copyright (c) 2012, The University of Waikato
|
||||||
|
|
||||||
|
All rights reserved.
|
||||||
|
|
||||||
|
Redistribution and use in source and binary forms, with or without
|
||||||
|
modification, are permitted provided that the following conditions are met:
|
||||||
|
* Redistributions of source code must retain the above copyright
|
||||||
|
notice, this list of conditions and the following disclaimer.
|
||||||
|
* Redistributions in binary form must reproduce the above copyright
|
||||||
|
notice, this list of conditions and the following disclaimer in the
|
||||||
|
documentation and/or other materials provided with the distribution.
|
||||||
|
* Neither the name of the organization nor the
|
||||||
|
names of its contributors may be used to endorse or promote products
|
||||||
|
derived from this software without specific prior written permission.
|
||||||
|
|
||||||
|
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
||||||
|
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||||
|
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||||
|
DISCLAIMED. IN NO EVENT SHALL ANTHONY M. BLAKE BE LIABLE FOR ANY
|
||||||
|
DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
|
||||||
|
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
||||||
|
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
|
||||||
|
ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||||
|
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||||
|
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
|
||||||
|
*/
|
||||||
|
|
||||||
|
#ifndef __CODEGEN_H__
|
||||||
|
#define __CODEGEN_H__
|
||||||
|
|
||||||
|
#include <stddef.h>
|
||||||
|
#include <stdio.h>
|
||||||
|
#include <stdlib.h>
|
||||||
|
#include <errno.h>
|
||||||
|
#include <sys/mman.h>
|
||||||
|
#include <string.h>
|
||||||
|
#include <limits.h> /* for PAGESIZE */
|
||||||
|
|
||||||
|
#include "ffts.h"
|
||||||
|
|
||||||
|
void ffts_generate_func_code(ffts_plan_t *, size_t N, size_t leafN, int sign);
|
||||||
|
|
||||||
|
#endif
|
@ -0,0 +1,101 @@
|
|||||||
|
/*
|
||||||
|
|
||||||
|
This file is part of FFTS -- The Fastest Fourier Transform in the South
|
||||||
|
|
||||||
|
Copyright (c) 2012, Anthony M. Blake <amb@anthonix.com>
|
||||||
|
Copyright (c) 2012, The University of Waikato
|
||||||
|
|
||||||
|
All rights reserved.
|
||||||
|
|
||||||
|
Redistribution and use in source and binary forms, with or without
|
||||||
|
modification, are permitted provided that the following conditions are met:
|
||||||
|
* Redistributions of source code must retain the above copyright
|
||||||
|
notice, this list of conditions and the following disclaimer.
|
||||||
|
* Redistributions in binary form must reproduce the above copyright
|
||||||
|
notice, this list of conditions and the following disclaimer in the
|
||||||
|
documentation and/or other materials provided with the distribution.
|
||||||
|
* Neither the name of the organization nor the
|
||||||
|
names of its contributors may be used to endorse or promote products
|
||||||
|
derived from this software without specific prior written permission.
|
||||||
|
|
||||||
|
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
||||||
|
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||||
|
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||||
|
DISCLAIMED. IN NO EVENT SHALL ANTHONY M. BLAKE BE LIABLE FOR ANY
|
||||||
|
DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
|
||||||
|
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
||||||
|
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
|
||||||
|
ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||||
|
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||||
|
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
|
||||||
|
*/
|
||||||
|
|
||||||
|
#ifndef __CODEGEN_ARM_H__
|
||||||
|
#define __CODEGEN_ARM_H__
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
uint32_t BL(void *pos, void *target) {
|
||||||
|
return 0xeb000000 | (((target - pos) / 4) & 0xffffff);
|
||||||
|
}
|
||||||
|
|
||||||
|
uint32_t B(uint8_t r) {
|
||||||
|
return 0xe12fff10 | r;
|
||||||
|
}
|
||||||
|
|
||||||
|
uint32_t MOV(uint8_t dst, uint8_t src) {
|
||||||
|
return 0xe1a00000 | (src & 0xf) | ((dst & 0xf) << 12);
|
||||||
|
}
|
||||||
|
|
||||||
|
void ADDI(uint32_t **p, uint8_t dst, uint8_t src, int32_t imm) {
|
||||||
|
int32_t oimm = imm;
|
||||||
|
if(imm < 0) {
|
||||||
|
imm = -imm;
|
||||||
|
uint32_t shamt = (__builtin_ctzl(imm)>23)?23:__builtin_ctzl(imm);
|
||||||
|
if(shamt & 1) shamt -= 1;
|
||||||
|
imm >>= shamt;
|
||||||
|
shamt = (32 - shamt)/2;
|
||||||
|
|
||||||
|
// if(imm > 255) fprintf(stderr, "imm>255: %d\n", oimm);
|
||||||
|
*(*p)++ = 0xe2400000 | ((src & 0xf) << 16) | ((dst & 0xf) << 12) | ((shamt & 0xf) << 8) | (imm & 0xff);
|
||||||
|
|
||||||
|
if(imm > 255) ADDI(p, dst, src, (oimm + ((imm & 0xff) << (32-shamt*2))));
|
||||||
|
|
||||||
|
}else{
|
||||||
|
uint32_t shamt = (__builtin_ctzl(imm)>23)?23:__builtin_ctzl(imm);
|
||||||
|
if(shamt & 1) shamt -= 1;
|
||||||
|
imm >>= shamt;
|
||||||
|
shamt = (32 - shamt)/2;
|
||||||
|
|
||||||
|
// if(imm > 255) fprintf(stderr, "imm>255: %d\n", oimm);
|
||||||
|
|
||||||
|
*(*p)++ = 0xe2800000 | ((src & 0xf) << 16) | ((dst & 0xf) << 12) | ((shamt & 0xf) << 8) | (imm & 0xff);
|
||||||
|
|
||||||
|
if(imm > 255) ADDI(p, dst, src, (oimm - ((imm & 0xff) << (32-shamt*2))));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
uint32_t LDRI(uint8_t dst, uint8_t base, uint32_t offset) {
|
||||||
|
return 0xe5900000 | ((dst & 0xf) << 12)
|
||||||
|
| ((base & 0xf) << 16) | (offset & 0xfff) ;
|
||||||
|
}
|
||||||
|
|
||||||
|
void MOVI(uint32_t **p, uint8_t dst, uint32_t imm) {
|
||||||
|
uint32_t oimm = imm;
|
||||||
|
|
||||||
|
uint32_t shamt = (__builtin_ctzl(imm)>23)?23:__builtin_ctzl(imm);
|
||||||
|
if(shamt & 1) shamt -= 1;
|
||||||
|
imm >>= shamt;
|
||||||
|
shamt = (32 - shamt)/2;
|
||||||
|
*(*p)++ = 0xe3a00000 | ((dst & 0xf) << 12) | ((shamt & 0xf) << 8) | (imm & 0xff) ;
|
||||||
|
if(imm > 255) ADDI(p, dst, dst, (oimm - ((imm & 0xff) << (32-shamt*2))));
|
||||||
|
}
|
||||||
|
|
||||||
|
uint32_t PUSH_LR() { return 0xe92d4ff0; } //0xe92d4000; }
|
||||||
|
uint32_t POP_LR() { return 0xe8bd8ff0; } //0xe8bd8000; }
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
#endif
|
@ -0,0 +1,195 @@
|
|||||||
|
/*
|
||||||
|
|
||||||
|
This file is part of FFTS -- The Fastest Fourier Transform in the South
|
||||||
|
|
||||||
|
Copyright (c) 2012, Anthony M. Blake <amb@anthonix.com>
|
||||||
|
Copyright (c) 2012, The University of Waikato
|
||||||
|
|
||||||
|
All rights reserved.
|
||||||
|
|
||||||
|
Redistribution and use in source and binary forms, with or without
|
||||||
|
modification, are permitted provided that the following conditions are met:
|
||||||
|
* Redistributions of source code must retain the above copyright
|
||||||
|
notice, this list of conditions and the following disclaimer.
|
||||||
|
* Redistributions in binary form must reproduce the above copyright
|
||||||
|
notice, this list of conditions and the following disclaimer in the
|
||||||
|
documentation and/or other materials provided with the distribution.
|
||||||
|
* Neither the name of the organization nor the
|
||||||
|
names of its contributors may be used to endorse or promote products
|
||||||
|
derived from this software without specific prior written permission.
|
||||||
|
|
||||||
|
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
||||||
|
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||||
|
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||||
|
DISCLAIMED. IN NO EVENT SHALL ANTHONY M. BLAKE BE LIABLE FOR ANY
|
||||||
|
DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
|
||||||
|
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
||||||
|
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
|
||||||
|
ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||||
|
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||||
|
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
|
||||||
|
*/
|
||||||
|
|
||||||
|
|
||||||
|
#ifndef __CODEGEN_SSE_H__
|
||||||
|
#define __CODEGEN_SSE_H__
|
||||||
|
|
||||||
|
void neon_x4(float *, size_t, float *);
|
||||||
|
void neon_x8(float *, size_t, float *);
|
||||||
|
void neon_x8_t(float *, size_t, float *);
|
||||||
|
void leaf_ee_init();
|
||||||
|
void leaf_ee();
|
||||||
|
void leaf_oo();
|
||||||
|
void leaf_eo();
|
||||||
|
void leaf_oe();
|
||||||
|
void leaf_end();
|
||||||
|
void x_init();
|
||||||
|
void x4();
|
||||||
|
void x8_soft();
|
||||||
|
void x8_hard();
|
||||||
|
|
||||||
|
void sse_constants();
|
||||||
|
void sse_constants_inv();
|
||||||
|
|
||||||
|
// typedef uint8_t insns_t;
|
||||||
|
|
||||||
|
extern const uint32_t sse_leaf_ee_offsets[8];
|
||||||
|
extern const uint32_t sse_leaf_oo_offsets[8];
|
||||||
|
extern const uint32_t sse_leaf_eo_offsets[8];
|
||||||
|
extern const uint32_t sse_leaf_oe_offsets[8];
|
||||||
|
|
||||||
|
#define EAX 0
|
||||||
|
#define ECX 1
|
||||||
|
#define EDX 2
|
||||||
|
#define EBX 3
|
||||||
|
#define ESI 6
|
||||||
|
#define EDI 7
|
||||||
|
#define EBP 5
|
||||||
|
|
||||||
|
#define RAX 0
|
||||||
|
#define RCX 1
|
||||||
|
#define RDX 2
|
||||||
|
#define RBX 3
|
||||||
|
#define RSI 6
|
||||||
|
#define RDI 7
|
||||||
|
#define RBP 5
|
||||||
|
#define R8 8
|
||||||
|
#define R9 9
|
||||||
|
#define R10 10
|
||||||
|
#define R11 11
|
||||||
|
#define R12 12
|
||||||
|
#define R13 13
|
||||||
|
#define R14 14
|
||||||
|
#define R15 15
|
||||||
|
|
||||||
|
void IMM8(uint8_t **p, int32_t imm) {
|
||||||
|
*(*p)++ = (imm & 0xff);
|
||||||
|
}
|
||||||
|
|
||||||
|
void IMM16(uint8_t **p, int32_t imm) {
|
||||||
|
int i;
|
||||||
|
for(i=0;i<2;i++) {
|
||||||
|
*(*p)++ = (imm & (0xff << (i*8))) >> (i*8);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
void IMM32(uint8_t **p, int32_t imm) {
|
||||||
|
int i;
|
||||||
|
for(i=0;i<4;i++) {
|
||||||
|
*(*p)++ = (imm & (0xff << (i*8))) >> (i*8);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
void IMM32_NI(uint8_t *p, int32_t imm) {
|
||||||
|
int i;
|
||||||
|
for(i=0;i<4;i++) {
|
||||||
|
*(p+i) = (imm & (0xff << (i*8))) >> (i*8);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
int32_t READ_IMM32(uint8_t *p) {
|
||||||
|
int32_t rval = 0;
|
||||||
|
int i;
|
||||||
|
for(i=0;i<4;i++) {
|
||||||
|
rval |= *(p+i) << (i*8);
|
||||||
|
}
|
||||||
|
return rval;
|
||||||
|
}
|
||||||
|
|
||||||
|
void MOVI(uint8_t **p, uint8_t dst, uint32_t imm) {
|
||||||
|
// if(imm < 65536) *(*p)++ = 0x66;
|
||||||
|
if(dst >= 8) *(*p)++ = 0x41;
|
||||||
|
|
||||||
|
//if(imm < 65536 && imm >= 256) *(*p)++ = 0x66;
|
||||||
|
|
||||||
|
//if(imm >= 256)
|
||||||
|
*(*p)++ = 0xb8 | (dst & 0x7);
|
||||||
|
// else *(*p)++ = 0xb0 | (dst & 0x7);
|
||||||
|
|
||||||
|
// if(imm < 256) IMM8(p, imm);
|
||||||
|
// else
|
||||||
|
//if(imm < 65536) IMM16(p, imm);
|
||||||
|
//else
|
||||||
|
IMM32(p, imm);
|
||||||
|
|
||||||
|
//if(dst < 8) {
|
||||||
|
// *(*p)++ = 0xb8 + dst;
|
||||||
|
//}else{
|
||||||
|
// *(*p)++ = 0x49;
|
||||||
|
// *(*p)++ = 0xc7;
|
||||||
|
// *(*p)++ = 0xc0 | (dst - 8);
|
||||||
|
//}
|
||||||
|
//IMM32(p, imm);
|
||||||
|
}
|
||||||
|
|
||||||
|
void ADDRMODE(uint8_t **p, uint8_t reg, uint8_t rm, int32_t disp) {
|
||||||
|
if(disp == 0) {
|
||||||
|
*(*p)++ = (rm & 7) | ((reg & 7) << 3);
|
||||||
|
}else if(disp <= 127 || disp >= -128) {
|
||||||
|
*(*p)++ = 0x40 | (rm & 7) | ((reg & 7) << 3);
|
||||||
|
IMM8(p, disp);
|
||||||
|
}else{
|
||||||
|
*(*p)++ = 0x80 | (rm & 7) | ((reg & 7) << 3);
|
||||||
|
IMM32(p, disp);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void LEA(uint8_t **p, uint8_t dst, uint8_t base, int32_t disp) {
|
||||||
|
|
||||||
|
*(*p)++ = 0x48 | ((base & 0x8) >> 3) | ((dst & 0x8) >> 1);
|
||||||
|
*(*p)++ = 0x8d;
|
||||||
|
ADDRMODE(p, dst, base, disp);
|
||||||
|
}
|
||||||
|
|
||||||
|
void RET(uint8_t **p) {
|
||||||
|
*(*p)++ = 0xc3;
|
||||||
|
}
|
||||||
|
|
||||||
|
void ADDI(uint8_t **p, uint8_t dst, int32_t imm) {
|
||||||
|
|
||||||
|
if(dst >= 8) *(*p)++ = 0x49;
|
||||||
|
else *(*p)++ = 0x48;
|
||||||
|
|
||||||
|
if(imm > 127 || imm <= -128) *(*p)++ = 0x81;
|
||||||
|
else *(*p)++ = 0x83;
|
||||||
|
|
||||||
|
*(*p)++ = 0xc0 | (dst & 0x7);
|
||||||
|
|
||||||
|
if(imm > 127 || imm <= -128) IMM32(p, imm);
|
||||||
|
else IMM8(p, imm);
|
||||||
|
}
|
||||||
|
|
||||||
|
void CALL(uint8_t **p, uint8_t *func) {
|
||||||
|
*(*p)++ = 0xe8;
|
||||||
|
IMM32(p, ((void *)func) - (void *)(*p) - 4);
|
||||||
|
}
|
||||||
|
|
||||||
|
void PUSH(uint8_t **p, uint8_t reg) {
|
||||||
|
if(reg >= 8) *(*p)++ = 0x41;
|
||||||
|
*(*p)++ = 0x50 | (reg & 7);
|
||||||
|
}
|
||||||
|
void POP(uint8_t **p, uint8_t reg) {
|
||||||
|
if(reg >= 8) *(*p)++ = 0x41;
|
||||||
|
*(*p)++ = 0x58 | (reg & 7);
|
||||||
|
}
|
||||||
|
|
||||||
|
#endif
|
@ -0,0 +1,398 @@
|
|||||||
|
/*
|
||||||
|
|
||||||
|
This file is part of FFTS -- The Fastest Fourier Transform in the South
|
||||||
|
|
||||||
|
Copyright (c) 2012, Anthony M. Blake <amb@anthonix.com>
|
||||||
|
Copyright (c) 2012, The University of Waikato
|
||||||
|
|
||||||
|
All rights reserved.
|
||||||
|
|
||||||
|
Redistribution and use in source and binary forms, with or without
|
||||||
|
modification, are permitted provided that the following conditions are met:
|
||||||
|
* Redistributions of source code must retain the above copyright
|
||||||
|
notice, this list of conditions and the following disclaimer.
|
||||||
|
* Redistributions in binary form must reproduce the above copyright
|
||||||
|
notice, this list of conditions and the following disclaimer in the
|
||||||
|
documentation and/or other materials provided with the distribution.
|
||||||
|
* Neither the name of the organization nor the
|
||||||
|
names of its contributors may be used to endorse or promote products
|
||||||
|
derived from this software without specific prior written permission.
|
||||||
|
|
||||||
|
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
||||||
|
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||||
|
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||||
|
DISCLAIMED. IN NO EVENT SHALL ANTHONY M. BLAKE BE LIABLE FOR ANY
|
||||||
|
DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
|
||||||
|
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
||||||
|
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
|
||||||
|
ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||||
|
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||||
|
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
|
||||||
|
*/
|
||||||
|
#include "ffts.h"
|
||||||
|
#include "macros.h"
|
||||||
|
//#include "mini_macros.h"
|
||||||
|
#include "patterns.h"
|
||||||
|
#include "ffts_small.h"
|
||||||
|
|
||||||
|
#ifdef DYNAMIC_DISABLED
|
||||||
|
#include "ffts_static.h"
|
||||||
|
#else
|
||||||
|
#include "codegen.h"
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#include <errno.h>
|
||||||
|
#include <sys/mman.h>
|
||||||
|
#include <string.h>
|
||||||
|
#include <limits.h> /* for PAGESIZE */
|
||||||
|
|
||||||
|
#if __APPLE__
|
||||||
|
#include <libkern/OSCacheControl.h>
|
||||||
|
#else
|
||||||
|
#endif
|
||||||
|
|
||||||
|
void ffts_execute(ffts_plan_t *p, const void * in, void * out) {
|
||||||
|
p->transform(p, (const float *)in, (float *)out);
|
||||||
|
}
|
||||||
|
|
||||||
|
void ffts_free(ffts_plan_t *p) {
|
||||||
|
p->destroy(p);
|
||||||
|
}
|
||||||
|
|
||||||
|
void ffts_free_1d(ffts_plan_t *p) {
|
||||||
|
|
||||||
|
size_t i;
|
||||||
|
|
||||||
|
if(p->ws) {
|
||||||
|
FFTS_FREE(p->ws);
|
||||||
|
}
|
||||||
|
if(p->is) free(p->is);
|
||||||
|
if(p->ws_is) free(p->ws_is);
|
||||||
|
if(p->offsets) free(p->offsets);
|
||||||
|
//free(p->transforms);
|
||||||
|
if(p->transforms) free(p->transforms);
|
||||||
|
|
||||||
|
if(p->transform_base) {
|
||||||
|
if (mprotect(p->transform_base, p->transform_size, PROT_READ | PROT_WRITE)) {
|
||||||
|
perror("Couldn't mprotect");
|
||||||
|
exit(errno);
|
||||||
|
}
|
||||||
|
munmap(p->transform_base, p->transform_size);
|
||||||
|
//free(p->transform_base);
|
||||||
|
}
|
||||||
|
free(p);
|
||||||
|
}
|
||||||
|
|
||||||
|
ffts_plan_t *ffts_init_1d(size_t N, int sign) {
|
||||||
|
ffts_plan_t *p = malloc(sizeof(ffts_plan_t));
|
||||||
|
size_t leafN = 8;
|
||||||
|
size_t i;
|
||||||
|
|
||||||
|
#ifdef __arm__
|
||||||
|
//#ifdef HAVE_NEON
|
||||||
|
V MULI_SIGN;
|
||||||
|
|
||||||
|
if(sign < 0) MULI_SIGN = VLIT4(-0.0f, 0.0f, -0.0f, 0.0f);
|
||||||
|
else MULI_SIGN = VLIT4(0.0f, -0.0f, 0.0f, -0.0f);
|
||||||
|
//#endif
|
||||||
|
#else
|
||||||
|
V MULI_SIGN;
|
||||||
|
|
||||||
|
if(sign < 0) MULI_SIGN = VLIT4(-0.0f, 0.0f, -0.0f, 0.0f);
|
||||||
|
else MULI_SIGN = VLIT4(0.0f, -0.0f, 0.0f, -0.0f);
|
||||||
|
#endif
|
||||||
|
|
||||||
|
p->transform = NULL;
|
||||||
|
p->transform_base = NULL;
|
||||||
|
p->transforms = NULL;
|
||||||
|
p->is = NULL;
|
||||||
|
p->ws_is = NULL;
|
||||||
|
p->ws = NULL;
|
||||||
|
p->offsets = NULL;
|
||||||
|
p->destroy = ffts_free_1d;
|
||||||
|
|
||||||
|
if(N >= 32) {
|
||||||
|
ffts_init_offsets(p, N, leafN);
|
||||||
|
#ifdef __arm__
|
||||||
|
#ifdef HAVE_NEON
|
||||||
|
ffts_init_is(p, N, leafN, 1);
|
||||||
|
#else
|
||||||
|
ffts_init_is(p, N, leafN, 1);
|
||||||
|
#endif
|
||||||
|
#else
|
||||||
|
ffts_init_is(p, N, leafN, 1);
|
||||||
|
#endif
|
||||||
|
|
||||||
|
p->i0 = N/leafN/3+1;
|
||||||
|
p->i1 = N/leafN/3;
|
||||||
|
if((N/leafN) % 3 > 1) p->i1++;
|
||||||
|
p->i2 = N/leafN/3;
|
||||||
|
|
||||||
|
#ifdef __arm__
|
||||||
|
#ifdef HAVE_NEON
|
||||||
|
p->i0/=2;
|
||||||
|
p->i1/=2;
|
||||||
|
#endif
|
||||||
|
#else
|
||||||
|
p->i0/=2;
|
||||||
|
p->i1/=2;
|
||||||
|
#endif
|
||||||
|
|
||||||
|
}else{
|
||||||
|
p->transforms = malloc(2 * sizeof(transform_index_t));
|
||||||
|
p->transforms[0] = 0;
|
||||||
|
p->transforms[1] = 1;
|
||||||
|
if(N == 2) p->transform = &firstpass_2;
|
||||||
|
else if(N == 4 && sign == -1) p->transform = &firstpass_4_f;
|
||||||
|
else if(N == 4 && sign == 1) p->transform = &firstpass_4_b;
|
||||||
|
else if(N == 8 && sign == -1) p->transform = &firstpass_8_f;
|
||||||
|
else if(N == 8 && sign == 1) p->transform = &firstpass_8_b;
|
||||||
|
else if(N == 16 && sign == -1) p->transform = &firstpass_16_f;
|
||||||
|
else if(N == 16 && sign == 1) p->transform = &firstpass_16_b;
|
||||||
|
|
||||||
|
p->is = NULL;
|
||||||
|
p->offsets = NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
int hardcoded = 0;
|
||||||
|
|
||||||
|
/* LUTS */
|
||||||
|
size_t n_luts = __builtin_ctzl(N/leafN);
|
||||||
|
if(N < 32) { n_luts = __builtin_ctzl(N/4); hardcoded = 1; }
|
||||||
|
|
||||||
|
if(n_luts >= 32) n_luts = 0;
|
||||||
|
|
||||||
|
// fprintf(stderr, "n_luts = %zu\n", n_luts);
|
||||||
|
|
||||||
|
cdata_t *w;
|
||||||
|
|
||||||
|
int n = leafN*2;
|
||||||
|
if(hardcoded) n = 8;
|
||||||
|
|
||||||
|
size_t lut_size = 0;
|
||||||
|
|
||||||
|
for(i=0;i<n_luts;i++) {
|
||||||
|
if(!i || hardcoded) {
|
||||||
|
#ifdef __arm__
|
||||||
|
if(N <= 32) lut_size += n/4 * 2 * sizeof(cdata_t);
|
||||||
|
else lut_size += n/4 * sizeof(cdata_t);
|
||||||
|
#else
|
||||||
|
lut_size += n/4 * 2 * sizeof(cdata_t);
|
||||||
|
#endif
|
||||||
|
n *= 2;
|
||||||
|
} else {
|
||||||
|
#ifdef __arm__
|
||||||
|
lut_size += n/8 * 3 * sizeof(cdata_t);
|
||||||
|
#else
|
||||||
|
lut_size += n/8 * 3 * 2 * sizeof(cdata_t);
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
n *= 2;
|
||||||
|
}
|
||||||
|
|
||||||
|
// lut_size *= 16;
|
||||||
|
|
||||||
|
// fprintf(stderr, "lut size = %zu\n", lut_size);
|
||||||
|
if(n_luts) {
|
||||||
|
p->ws = FFTS_MALLOC(lut_size,32);
|
||||||
|
p->ws_is = malloc(n_luts * sizeof(size_t));
|
||||||
|
}else{
|
||||||
|
p->ws = NULL;
|
||||||
|
p->ws_is = NULL;
|
||||||
|
}
|
||||||
|
w = p->ws;
|
||||||
|
|
||||||
|
n = leafN*2;
|
||||||
|
if(hardcoded) n = 8;
|
||||||
|
|
||||||
|
#ifdef HAVE_NEON
|
||||||
|
V neg = (sign < 0) ? VLIT4(0.0f, 0.0f, 0.0f, 0.0f) : VLIT4(-0.0f, -0.0f, -0.0f, -0.0f);
|
||||||
|
#endif
|
||||||
|
|
||||||
|
for(i=0;i<n_luts;i++) {
|
||||||
|
p->ws_is[i] = w - (cdata_t *)p->ws;
|
||||||
|
//fprintf(stderr, "LUT[%zu] = %d @ %08x - %zu\n", i, n, w, p->ws_is[i]);
|
||||||
|
|
||||||
|
if(!i || hardcoded) {
|
||||||
|
cdata_t *w0 = FFTS_MALLOC(n/4 * sizeof(cdata_t), 32);
|
||||||
|
|
||||||
|
size_t j;
|
||||||
|
for(j=0;j<n/4;j++) {
|
||||||
|
w0[j][0] = W_re(n,j);
|
||||||
|
w0[j][1] = W_im(n,j);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
float *fw0 = (float *)w0;
|
||||||
|
#ifdef __arm__
|
||||||
|
if(N < 32) {
|
||||||
|
//w = FFTS_MALLOC(n/4 * 2 * sizeof(cdata_t), 32);
|
||||||
|
float *fw = (float *)w;
|
||||||
|
V temp0, temp1, temp2;
|
||||||
|
for(j=0;j<n/4;j+=2) {
|
||||||
|
// #ifdef HAVE_NEON
|
||||||
|
temp0 = VLD(fw0 + j*2);
|
||||||
|
V re, im;
|
||||||
|
re = VDUPRE(temp0);
|
||||||
|
im = VDUPIM(temp0);
|
||||||
|
#ifdef HAVE_NEON
|
||||||
|
im = VXOR(im, MULI_SIGN);
|
||||||
|
//im = IMULI(sign>0, im);
|
||||||
|
#else
|
||||||
|
im = MULI(sign>0, im);
|
||||||
|
#endif
|
||||||
|
VST(fw + j*4 , re);
|
||||||
|
VST(fw + j*4+4, im);
|
||||||
|
// #endif
|
||||||
|
}
|
||||||
|
w += n/4 * 2;
|
||||||
|
}else{
|
||||||
|
//w = FFTS_MALLOC(n/4 * sizeof(cdata_t), 32);
|
||||||
|
float *fw = (float *)w;
|
||||||
|
#ifdef HAVE_NEON
|
||||||
|
VS temp0, temp1, temp2;
|
||||||
|
for(j=0;j<n/4;j+=4) {
|
||||||
|
temp0 = VLD2(fw0 + j*2);
|
||||||
|
temp0.val[1] = VXOR(temp0.val[1], neg);
|
||||||
|
STORESPR(fw + j*2, temp0);
|
||||||
|
}
|
||||||
|
#else
|
||||||
|
for(j=0;j<n/4;j+=1) {
|
||||||
|
fw[j*2] = fw0[j*2];
|
||||||
|
fw[j*2+1] = (sign < 0) ? fw0[j*2+1] : -fw0[j*2+1];
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
w += n/4;
|
||||||
|
}
|
||||||
|
#else
|
||||||
|
//w = FFTS_MALLOC(n/4 * 2 * sizeof(cdata_t), 32);
|
||||||
|
float *fw = (float *)w;
|
||||||
|
V temp0, temp1, temp2;
|
||||||
|
for(j=0;j<n/4;j+=2) {
|
||||||
|
temp0 = VLD(fw0 + j*2);
|
||||||
|
V re, im;
|
||||||
|
re = VDUPRE(temp0);
|
||||||
|
im = VDUPIM(temp0);
|
||||||
|
im = VXOR(im, MULI_SIGN);
|
||||||
|
VST(fw + j*4 , re);
|
||||||
|
VST(fw + j*4+4, im);
|
||||||
|
}
|
||||||
|
w += n/4 * 2;
|
||||||
|
#endif
|
||||||
|
|
||||||
|
FFTS_FREE(w0);
|
||||||
|
}else{
|
||||||
|
|
||||||
|
cdata_t *w0 = FFTS_MALLOC(n/8 * sizeof(cdata_t), 32);
|
||||||
|
cdata_t *w1 = FFTS_MALLOC(n/8 * sizeof(cdata_t), 32);
|
||||||
|
cdata_t *w2 = FFTS_MALLOC(n/8 * sizeof(cdata_t), 32);
|
||||||
|
|
||||||
|
size_t j;
|
||||||
|
for(j=0;j<n/8;j++) {
|
||||||
|
w0[j][0] = W_re(n,j*2);
|
||||||
|
w0[j][1] = W_im(n,j*2);
|
||||||
|
w1[j][0] = W_re(n,j);
|
||||||
|
w1[j][1] = W_im(n,j);
|
||||||
|
w2[j][0] = W_re(n,j + (n/8));
|
||||||
|
w2[j][1] = W_im(n,j + (n/8));
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
float *fw0 = (float *)w0;
|
||||||
|
float *fw1 = (float *)w1;
|
||||||
|
float *fw2 = (float *)w2;
|
||||||
|
#ifdef __arm__
|
||||||
|
//w = FFTS_MALLOC(n/8 * 3 * sizeof(cdata_t), 32);
|
||||||
|
float *fw = (float *)w;
|
||||||
|
#ifdef HAVE_NEON
|
||||||
|
VS temp0, temp1, temp2;
|
||||||
|
for(j=0;j<n/8;j+=4) {
|
||||||
|
temp0 = VLD2(fw0 + j*2);
|
||||||
|
temp0.val[1] = VXOR(temp0.val[1], neg);
|
||||||
|
STORESPR(fw + j*2*3, temp0);
|
||||||
|
temp1 = VLD2(fw1 + j*2);
|
||||||
|
temp1.val[1] = VXOR(temp1.val[1], neg);
|
||||||
|
STORESPR(fw + j*2*3 + 8, temp1);
|
||||||
|
temp2 = VLD2(fw2 + j*2);
|
||||||
|
temp2.val[1] = VXOR(temp2.val[1], neg);
|
||||||
|
STORESPR(fw + j*2*3 + 16, temp2);
|
||||||
|
}
|
||||||
|
#else
|
||||||
|
for(j=0;j<n/8;j+=1) {
|
||||||
|
fw[j*6] = fw0[j*2];
|
||||||
|
fw[j*6+1] = (sign < 0) ? fw0[j*2+1] : -fw0[j*2+1];
|
||||||
|
fw[j*6+2] = fw1[j*2+0];
|
||||||
|
fw[j*6+3] = (sign < 0) ? fw1[j*2+1] : -fw1[j*2+1];
|
||||||
|
fw[j*6+4] = fw2[j*2+0];
|
||||||
|
fw[j*6+5] = (sign < 0) ? fw2[j*2+1] : -fw2[j*2+1];
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
w += n/8 * 3;
|
||||||
|
#else
|
||||||
|
//w = FFTS_MALLOC(n/8 * 3 * 2 * sizeof(cdata_t), 32);
|
||||||
|
float *fw = (float *)w;
|
||||||
|
V temp0, temp1, temp2, re, im;
|
||||||
|
for(j=0;j<n/8;j+=2) {
|
||||||
|
temp0 = VLD(fw0 + j*2);
|
||||||
|
re = VDUPRE(temp0);
|
||||||
|
im = VDUPIM(temp0);
|
||||||
|
im = VXOR(im, MULI_SIGN);
|
||||||
|
VST(fw + j*2*6 , re);
|
||||||
|
VST(fw + j*2*6+4, im);
|
||||||
|
|
||||||
|
temp1 = VLD(fw1 + j*2);
|
||||||
|
re = VDUPRE(temp1);
|
||||||
|
im = VDUPIM(temp1);
|
||||||
|
im = VXOR(im, MULI_SIGN);
|
||||||
|
VST(fw + j*2*6+8 , re);
|
||||||
|
VST(fw + j*2*6+12, im);
|
||||||
|
|
||||||
|
temp2 = VLD(fw2 + j*2);
|
||||||
|
re = VDUPRE(temp2);
|
||||||
|
im = VDUPIM(temp2);
|
||||||
|
im = VXOR(im, MULI_SIGN);
|
||||||
|
VST(fw + j*2*6+16, re);
|
||||||
|
VST(fw + j*2*6+20, im);
|
||||||
|
}
|
||||||
|
w += n/8 * 3 * 2;
|
||||||
|
#endif
|
||||||
|
|
||||||
|
FFTS_FREE(w0);
|
||||||
|
FFTS_FREE(w1);
|
||||||
|
FFTS_FREE(w2);
|
||||||
|
}
|
||||||
|
///p->ws[i] = w;
|
||||||
|
|
||||||
|
n *= 2;
|
||||||
|
}
|
||||||
|
|
||||||
|
float *tmp = (float *)p->ws;
|
||||||
|
|
||||||
|
if(sign < 0) {
|
||||||
|
p->oe_ws = (void *)(&w_data[4]);
|
||||||
|
p->ee_ws = (void *)(w_data);
|
||||||
|
p->eo_ws = (void *)(&w_data[4]);
|
||||||
|
}else{
|
||||||
|
p->oe_ws = (void *)(w_data + 12);
|
||||||
|
p->ee_ws = (void *)(w_data + 8);
|
||||||
|
p->eo_ws = (void *)(w_data + 12);
|
||||||
|
}
|
||||||
|
|
||||||
|
p->N = N;
|
||||||
|
p->lastlut = w;
|
||||||
|
p->n_luts = n_luts;
|
||||||
|
#ifdef DYNAMIC_DISABLED
|
||||||
|
if(sign < 0) {
|
||||||
|
if(N >= 32) p->transform = ffts_static_transform_f;
|
||||||
|
}else{
|
||||||
|
if(N >= 32) p->transform = ffts_static_transform_i;
|
||||||
|
}
|
||||||
|
|
||||||
|
#else
|
||||||
|
if(N>=32) ffts_generate_func_code(p, N, leafN, sign);
|
||||||
|
#endif
|
||||||
|
|
||||||
|
return p;
|
||||||
|
}
|
||||||
|
|
@ -0,0 +1,177 @@
|
|||||||
|
/*
|
||||||
|
|
||||||
|
This file is part of FFTS -- The Fastest Fourier Transform in the South
|
||||||
|
|
||||||
|
Copyright (c) 2012, Anthony M. Blake <amb@anthonix.com>
|
||||||
|
Copyright (c) 2012, The University of Waikato
|
||||||
|
|
||||||
|
All rights reserved.
|
||||||
|
|
||||||
|
Redistribution and use in source and binary forms, with or without
|
||||||
|
modification, are permitted provided that the following conditions are met:
|
||||||
|
* Redistributions of source code must retain the above copyright
|
||||||
|
notice, this list of conditions and the following disclaimer.
|
||||||
|
* Redistributions in binary form must reproduce the above copyright
|
||||||
|
notice, this list of conditions and the following disclaimer in the
|
||||||
|
documentation and/or other materials provided with the distribution.
|
||||||
|
* Neither the name of the organization nor the
|
||||||
|
names of its contributors may be used to endorse or promote products
|
||||||
|
derived from this software without specific prior written permission.
|
||||||
|
|
||||||
|
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
||||||
|
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||||
|
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||||
|
DISCLAIMED. IN NO EVENT SHALL ANTHONY M. BLAKE BE LIABLE FOR ANY
|
||||||
|
DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
|
||||||
|
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
||||||
|
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
|
||||||
|
ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||||
|
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||||
|
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
|
||||||
|
*/
|
||||||
|
#ifndef __CP_SSE_H__
|
||||||
|
#define __CP_SSE_H__
|
||||||
|
|
||||||
|
#include "config.h"
|
||||||
|
|
||||||
|
#include <stdio.h>
|
||||||
|
#include <stdlib.h>
|
||||||
|
#include <math.h>
|
||||||
|
#include <stddef.h>
|
||||||
|
#include <stdint.h>
|
||||||
|
//#include <stdalign.h>
|
||||||
|
|
||||||
|
//#include "codegen.h"
|
||||||
|
#include "types.h"
|
||||||
|
|
||||||
|
#define PI 3.1415926535897932384626433832795028841971693993751058209
|
||||||
|
|
||||||
|
static const __attribute__ ((aligned(64))) float w_data[16] = {
|
||||||
|
0.70710678118654757273731092936941, 0.70710678118654746171500846685376,
|
||||||
|
-0.70710678118654757273731092936941, -0.70710678118654746171500846685376,
|
||||||
|
1.0f, 0.70710678118654757273731092936941f,
|
||||||
|
-0.0f, -0.70710678118654746171500846685376,
|
||||||
|
0.70710678118654757273731092936941, 0.70710678118654746171500846685376,
|
||||||
|
0.70710678118654757273731092936941, 0.70710678118654746171500846685376,
|
||||||
|
1.0f, 0.70710678118654757273731092936941f,
|
||||||
|
0.0f, 0.70710678118654746171500846685376
|
||||||
|
};
|
||||||
|
|
||||||
|
__INLINE float W_re(float N, float k) { return cos(-2.0f * PI * k / N); }
|
||||||
|
__INLINE float W_im(float N, float k) { return sin(-2.0f * PI * k / N); }
|
||||||
|
|
||||||
|
typedef size_t transform_index_t;
|
||||||
|
|
||||||
|
//typedef void (*transform_func_t)(float *data, size_t N, float *LUT);
|
||||||
|
typedef void (*transform_func_t)(float *data, size_t N, float *LUT);
|
||||||
|
|
||||||
|
typedef struct _ffts_plan_t ffts_plan_t;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Contains all the Information need to perform FFT
|
||||||
|
*
|
||||||
|
*
|
||||||
|
* DO NOT CHANGE THE ORDER OF MEMBERS
|
||||||
|
* ASSEMBLY CODE USES HARD CODED OFFSETS TO REFERENCE
|
||||||
|
* SOME OF THESE VARIABES!!
|
||||||
|
*/
|
||||||
|
struct _ffts_plan_t {
|
||||||
|
|
||||||
|
/**
|
||||||
|
*
|
||||||
|
*/
|
||||||
|
ptrdiff_t *offsets;
|
||||||
|
#ifdef DYNAMIC_DISABLED
|
||||||
|
/**
|
||||||
|
* Twiddle factors
|
||||||
|
*/
|
||||||
|
void *ws;
|
||||||
|
/**
|
||||||
|
* ee - 2 size x size8
|
||||||
|
* oo - 2 x size4 in parallel
|
||||||
|
* oe -
|
||||||
|
*/
|
||||||
|
void *oe_ws, *eo_ws, *ee_ws;
|
||||||
|
#else
|
||||||
|
void __attribute__((aligned(32))) *ws;
|
||||||
|
void __attribute__((aligned(32))) *oe_ws, *eo_ws, *ee_ws;
|
||||||
|
#endif
|
||||||
|
/**
|
||||||
|
* Pointer into an array of precomputed indexes for the input data array
|
||||||
|
*/
|
||||||
|
ptrdiff_t *is;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Twiddle Factor Indexes
|
||||||
|
*/
|
||||||
|
size_t *ws_is;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Size of the loops for the base cases
|
||||||
|
*/
|
||||||
|
size_t i0, i1, n_luts;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Size fo the Transform
|
||||||
|
*/
|
||||||
|
size_t N;
|
||||||
|
void *lastlut;
|
||||||
|
/**
|
||||||
|
* Used in multidimensional Code ??
|
||||||
|
*/
|
||||||
|
transform_index_t *transforms;
|
||||||
|
//transform_func_t transform;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Pointer to the dynamically generated function
|
||||||
|
* that will execute the FFT
|
||||||
|
*/
|
||||||
|
void (*transform)(ffts_plan_t * , const void * , void * );
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Pointer to the base memory address of
|
||||||
|
* of the transform function
|
||||||
|
*/
|
||||||
|
void *transform_base;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Size of the memory block contain the
|
||||||
|
* generated code
|
||||||
|
*/
|
||||||
|
size_t transform_size;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Points to the cosnant variables used by
|
||||||
|
* the Assembly Code
|
||||||
|
*/
|
||||||
|
void *constants;
|
||||||
|
|
||||||
|
// multi-dimensional stuff:
|
||||||
|
struct _ffts_plan_t **plans;
|
||||||
|
int rank;
|
||||||
|
size_t *Ns, *Ms;
|
||||||
|
void *buf;
|
||||||
|
|
||||||
|
void *transpose_buf;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Pointer to the destroy function
|
||||||
|
* to clean up the plan after use
|
||||||
|
* (differs for real and multi dimension transforms
|
||||||
|
*/
|
||||||
|
void (*destroy)(ffts_plan_t *);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Coefficiants for the real valued transforms
|
||||||
|
*/
|
||||||
|
float *A, *B;
|
||||||
|
|
||||||
|
size_t i2;
|
||||||
|
};
|
||||||
|
|
||||||
|
|
||||||
|
void ffts_free(ffts_plan_t *);
|
||||||
|
ffts_plan_t *ffts_init_1d(size_t N, int sign);
|
||||||
|
void ffts_execute(ffts_plan_t *, const void *, void *);
|
||||||
|
#endif
|
@ -0,0 +1,282 @@
|
|||||||
|
/*
|
||||||
|
|
||||||
|
This file is part of FFTS -- The Fastest Fourier Transform in the South
|
||||||
|
|
||||||
|
Copyright (c) 2012, Anthony M. Blake <amb@anthonix.com>
|
||||||
|
Copyright (c) 2012, The University of Waikato
|
||||||
|
|
||||||
|
All rights reserved.
|
||||||
|
|
||||||
|
Redistribution and use in source and binary forms, with or without
|
||||||
|
modification, are permitted provided that the following conditions are met:
|
||||||
|
* Redistributions of source code must retain the above copyright
|
||||||
|
notice, this list of conditions and the following disclaimer.
|
||||||
|
* Redistributions in binary form must reproduce the above copyright
|
||||||
|
notice, this list of conditions and the following disclaimer in the
|
||||||
|
documentation and/or other materials provided with the distribution.
|
||||||
|
* Neither the name of the organization nor the
|
||||||
|
names of its contributors may be used to endorse or promote products
|
||||||
|
derived from this software without specific prior written permission.
|
||||||
|
|
||||||
|
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
||||||
|
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||||
|
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||||
|
DISCLAIMED. IN NO EVENT SHALL ANTHONY M. BLAKE BE LIABLE FOR ANY
|
||||||
|
DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
|
||||||
|
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
||||||
|
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
|
||||||
|
ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||||
|
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||||
|
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include "ffts_nd.h"
|
||||||
|
|
||||||
|
#ifdef HAVE_NEON
|
||||||
|
#include "neon.h"
|
||||||
|
#endif
|
||||||
|
|
||||||
|
void ffts_free_nd(ffts_plan_t *p) {
|
||||||
|
|
||||||
|
int i;
|
||||||
|
for(i=0;i<p->rank;i++) {
|
||||||
|
|
||||||
|
ffts_plan_t *x = p->plans[i];
|
||||||
|
int k;
|
||||||
|
for(k=0;k<i;k++) {
|
||||||
|
if(p->Ms[i] == p->Ms[k]) x = NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
if(x) ffts_free(x);
|
||||||
|
}
|
||||||
|
|
||||||
|
free(p->Ns);
|
||||||
|
free(p->Ms);
|
||||||
|
free(p->plans);
|
||||||
|
free(p->buf);
|
||||||
|
free(p->transpose_buf);
|
||||||
|
free(p);
|
||||||
|
}
|
||||||
|
#define TSIZE 8
|
||||||
|
#include <string.h>
|
||||||
|
void ffts_transpose(uint64_t *in, uint64_t *out, int w, int h, uint64_t *buf) {
|
||||||
|
|
||||||
|
#ifdef HAVE_NEON
|
||||||
|
size_t i,j,k;
|
||||||
|
int linebytes = w*8;
|
||||||
|
|
||||||
|
for(j=0;j<h;j+=8) {
|
||||||
|
for(i=0;i<w;i+=8) {
|
||||||
|
neon_transpose_to_buf(in + j*w + i, buf, w);
|
||||||
|
|
||||||
|
uint64_t *p = out + i*h + j;
|
||||||
|
uint64_t *pbuf = buf;
|
||||||
|
uint64_t *ptemp;
|
||||||
|
|
||||||
|
__asm__ __volatile__(
|
||||||
|
"mov %[ptemp], %[p]\n\t"
|
||||||
|
"add %[p], %[p], %[w], lsl #3\n\t"
|
||||||
|
"vld1.32 {q8,q9}, [%[pbuf], :128]!\n\t"
|
||||||
|
"vld1.32 {q10,q11}, [%[pbuf], :128]!\n\t"
|
||||||
|
"vld1.32 {q12,q13}, [%[pbuf], :128]!\n\t"
|
||||||
|
"vld1.32 {q14,q15}, [%[pbuf], :128]!\n\t"
|
||||||
|
"vst1.32 {q8,q9}, [%[ptemp], :128]!\n\t"
|
||||||
|
"vst1.32 {q10,q11}, [%[ptemp], :128]!\n\t"
|
||||||
|
"mov %[ptemp], %[p]\n\t"
|
||||||
|
"add %[p], %[p], %[w], lsl #3\n\t"
|
||||||
|
"vst1.32 {q12,q13}, [%[ptemp], :128]!\n\t"
|
||||||
|
"vst1.32 {q14,q15}, [%[ptemp], :128]!\n\t"
|
||||||
|
"mov %[ptemp], %[p]\n\t"
|
||||||
|
"add %[p], %[p], %[w], lsl #3\n\t"
|
||||||
|
"vld1.32 {q8,q9}, [%[pbuf], :128]!\n\t"
|
||||||
|
"vld1.32 {q10,q11}, [%[pbuf], :128]!\n\t"
|
||||||
|
"vld1.32 {q12,q13}, [%[pbuf], :128]!\n\t"
|
||||||
|
"vld1.32 {q14,q15}, [%[pbuf], :128]!\n\t"
|
||||||
|
"vst1.32 {q8,q9}, [%[ptemp], :128]!\n\t"
|
||||||
|
"vst1.32 {q10,q11}, [%[ptemp], :128]!\n\t"
|
||||||
|
"mov %[ptemp], %[p]\n\t"
|
||||||
|
"add %[p], %[p], %[w], lsl #3\n\t"
|
||||||
|
"vst1.32 {q12,q13}, [%[ptemp], :128]!\n\t"
|
||||||
|
"vst1.32 {q14,q15}, [%[ptemp], :128]!\n\t"
|
||||||
|
"mov %[ptemp], %[p]\n\t"
|
||||||
|
"add %[p], %[p], %[w], lsl #3\n\t"
|
||||||
|
"vld1.32 {q8,q9}, [%[pbuf], :128]!\n\t"
|
||||||
|
"vld1.32 {q10,q11}, [%[pbuf], :128]!\n\t"
|
||||||
|
"vld1.32 {q12,q13}, [%[pbuf], :128]!\n\t"
|
||||||
|
"vld1.32 {q14,q15}, [%[pbuf], :128]!\n\t"
|
||||||
|
"vst1.32 {q8,q9}, [%[ptemp], :128]!\n\t"
|
||||||
|
"vst1.32 {q10,q11}, [%[ptemp], :128]!\n\t"
|
||||||
|
"mov %[ptemp], %[p]\n\t"
|
||||||
|
"add %[p], %[p], %[w], lsl #3\n\t"
|
||||||
|
"vst1.32 {q12,q13}, [%[ptemp], :128]!\n\t"
|
||||||
|
"vst1.32 {q14,q15}, [%[ptemp], :128]!\n\t"
|
||||||
|
"mov %[ptemp], %[p]\n\t"
|
||||||
|
"add %[p], %[p], %[w], lsl #3\n\t"
|
||||||
|
"vld1.32 {q8,q9}, [%[pbuf], :128]!\n\t"
|
||||||
|
"vld1.32 {q10,q11}, [%[pbuf], :128]!\n\t"
|
||||||
|
"vld1.32 {q12,q13}, [%[pbuf], :128]!\n\t"
|
||||||
|
"vld1.32 {q14,q15}, [%[pbuf], :128]!\n\t"
|
||||||
|
"vst1.32 {q8,q9}, [%[ptemp], :128]!\n\t"
|
||||||
|
"vst1.32 {q10,q11}, [%[ptemp], :128]!\n\t"
|
||||||
|
"mov %[ptemp], %[p]\n\t"
|
||||||
|
"vst1.32 {q12,q13}, [%[ptemp], :128]!\n\t"
|
||||||
|
"vst1.32 {q14,q15}, [%[ptemp], :128]!\n\t"
|
||||||
|
|
||||||
|
: [p] "+r" (p), [pbuf] "+r" (pbuf), [ptemp] "+r" (ptemp)
|
||||||
|
: [w] "r" (w)
|
||||||
|
: "memory", "q8", "q9", "q10", "q11"
|
||||||
|
);
|
||||||
|
// out[i*h + j] = in[j*w + i];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
#else
|
||||||
|
#ifdef HAVE_SSE
|
||||||
|
uint64_t tmp[TSIZE*TSIZE] __attribute__((aligned(64)));
|
||||||
|
int tx, ty;
|
||||||
|
int x, y;
|
||||||
|
int tw = w / TSIZE;
|
||||||
|
int th = h / TSIZE;
|
||||||
|
for (ty=0;ty<th;ty++) {
|
||||||
|
for (tx=0;tx<tw;tx++) {
|
||||||
|
uint64_t *ip0 = in + w*TSIZE*ty + tx * TSIZE;
|
||||||
|
uint64_t *op0 = tmp;//out + h*TSIZE*tx + ty*TSIZE;
|
||||||
|
|
||||||
|
// Copy/transpose to tmp
|
||||||
|
for (y=0;y<TSIZE;y+=2) {
|
||||||
|
//for (x=0;x<TSIZE;x+=2) {
|
||||||
|
//op[x*TSIZE] = ip[x];
|
||||||
|
__m128d q0 = _mm_load_pd((double *)(ip0 + 0*w));
|
||||||
|
__m128d q1 = _mm_load_pd((double *)(ip0 + 1*w));
|
||||||
|
__m128d q2 = _mm_load_pd((double *)(ip0 + 2*w));
|
||||||
|
__m128d q3 = _mm_load_pd((double *)(ip0 + 3*w));
|
||||||
|
__m128d q4 = _mm_load_pd((double *)(ip0 + 4*w));
|
||||||
|
__m128d q5 = _mm_load_pd((double *)(ip0 + 5*w));
|
||||||
|
__m128d q6 = _mm_load_pd((double *)(ip0 + 6*w));
|
||||||
|
__m128d q7 = _mm_load_pd((double *)(ip0 + 7*w));
|
||||||
|
ip0 += 2;
|
||||||
|
|
||||||
|
__m128d t0 = _mm_shuffle_pd(q0, q1, _MM_SHUFFLE2(0, 0));
|
||||||
|
__m128d t1 = _mm_shuffle_pd(q0, q1, _MM_SHUFFLE2(1, 1));
|
||||||
|
__m128d t2 = _mm_shuffle_pd(q2, q3, _MM_SHUFFLE2(0, 0));
|
||||||
|
__m128d t3 = _mm_shuffle_pd(q2, q3, _MM_SHUFFLE2(1, 1));
|
||||||
|
__m128d t4 = _mm_shuffle_pd(q4, q5, _MM_SHUFFLE2(0, 0));
|
||||||
|
__m128d t5 = _mm_shuffle_pd(q4, q5, _MM_SHUFFLE2(1, 1));
|
||||||
|
__m128d t6 = _mm_shuffle_pd(q6, q7, _MM_SHUFFLE2(0, 0));
|
||||||
|
__m128d t7 = _mm_shuffle_pd(q6, q7, _MM_SHUFFLE2(1, 1));
|
||||||
|
//_mm_store_pd((double *)(op0 + y*h + x), t0);
|
||||||
|
//_mm_store_pd((double *)(op0 + y*h + x + h), t1);
|
||||||
|
_mm_store_pd((double *)(op0 + 0), t0);
|
||||||
|
_mm_store_pd((double *)(op0 + 0 + TSIZE), t1);
|
||||||
|
_mm_store_pd((double *)(op0 + 2 ), t2);
|
||||||
|
_mm_store_pd((double *)(op0 + 2 + TSIZE), t3);
|
||||||
|
_mm_store_pd((double *)(op0 + 4 ), t4);
|
||||||
|
_mm_store_pd((double *)(op0 + 4 + TSIZE), t5);
|
||||||
|
_mm_store_pd((double *)(op0 + 6 ), t6);
|
||||||
|
_mm_store_pd((double *)(op0 + 6 + TSIZE), t7);
|
||||||
|
//}
|
||||||
|
op0 += 2*TSIZE;
|
||||||
|
}
|
||||||
|
|
||||||
|
op0 = out + h*tx*TSIZE + ty*TSIZE;
|
||||||
|
ip0 = tmp;
|
||||||
|
for (y=0;y<TSIZE;y+=1) {
|
||||||
|
// memcpy(op0, ip0, TSIZE * sizeof(*ip0));
|
||||||
|
|
||||||
|
__m128d q0 = _mm_load_pd((double *)(ip0 + 0));
|
||||||
|
__m128d q1 = _mm_load_pd((double *)(ip0 + 2));
|
||||||
|
__m128d q2 = _mm_load_pd((double *)(ip0 + 4));
|
||||||
|
__m128d q3 = _mm_load_pd((double *)(ip0 + 6));
|
||||||
|
_mm_store_pd((double *)(op0 + 0), q0);
|
||||||
|
_mm_store_pd((double *)(op0 + 2), q1);
|
||||||
|
_mm_store_pd((double *)(op0 + 4), q2);
|
||||||
|
_mm_store_pd((double *)(op0 + 6), q3);
|
||||||
|
|
||||||
|
op0 += h;
|
||||||
|
ip0 += TSIZE;
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
}
|
||||||
|
/*
|
||||||
|
size_t i,j;
|
||||||
|
for(i=0;i<w;i+=2) {
|
||||||
|
for(j=0;j<h;j+=2) {
|
||||||
|
// out[i*h + j] = in[j*w + i];
|
||||||
|
__m128d q0 = _mm_load_pd((double *)(in + j*w + i));
|
||||||
|
__m128d q1 = _mm_load_pd((double *)(in + j*w + i + w));
|
||||||
|
__m128d t0 = _mm_shuffle_pd(q0, q1, _MM_SHUFFLE2(0, 0));
|
||||||
|
__m128d t1 = _mm_shuffle_pd(q0, q1, _MM_SHUFFLE2(1, 1));
|
||||||
|
_mm_store_pd((double *)(out + i*h + j), t0);
|
||||||
|
_mm_store_pd((double *)(out + i*h + j + h), t1);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
*/
|
||||||
|
#endif
|
||||||
|
#endif
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
void ffts_execute_nd(ffts_plan_t *p, const void * in, void * out) {
|
||||||
|
|
||||||
|
uint64_t *din = (uint64_t *)in;
|
||||||
|
uint64_t *buf = p->buf;
|
||||||
|
uint64_t *dout = (uint64_t *)out;
|
||||||
|
|
||||||
|
size_t i,j;
|
||||||
|
for(i=0;i<p->Ns[0];i++) {
|
||||||
|
p->plans[0]->transform(p->plans[0], din + (i * p->Ms[0]), buf + (i * p->Ms[0]));
|
||||||
|
}
|
||||||
|
ffts_transpose(buf, dout, p->Ms[0], p->Ns[0], p->transpose_buf);
|
||||||
|
|
||||||
|
for(i=1;i<p->rank;i++) {
|
||||||
|
for(j=0;j<p->Ns[i];j++) {
|
||||||
|
p->plans[i]->transform(p->plans[i], dout + (j * p->Ms[i]), buf + (j * p->Ms[i]));
|
||||||
|
}
|
||||||
|
ffts_transpose(buf, dout, p->Ms[i], p->Ns[i], p->transpose_buf);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
ffts_plan_t *ffts_init_nd(int rank, size_t *Ns, int sign) {
|
||||||
|
size_t vol = 1;
|
||||||
|
|
||||||
|
ffts_plan_t *p = malloc(sizeof(ffts_plan_t));
|
||||||
|
|
||||||
|
p->transform = &ffts_execute_nd;
|
||||||
|
p->destroy = &ffts_free_nd;
|
||||||
|
|
||||||
|
p->rank = rank;
|
||||||
|
p->Ns = malloc(sizeof(size_t) * rank);
|
||||||
|
p->Ms = malloc(sizeof(size_t) * rank);
|
||||||
|
p->plans = malloc(sizeof(ffts_plan_t **) * rank);
|
||||||
|
int i;
|
||||||
|
for(i=0;i<rank;i++) {
|
||||||
|
p->Ns[i] = Ns[i];
|
||||||
|
vol *= Ns[i];
|
||||||
|
}
|
||||||
|
p->buf = valloc(sizeof(float) * 2 * vol);
|
||||||
|
|
||||||
|
for(i=0;i<rank;i++) {
|
||||||
|
p->Ms[i] = vol / p->Ns[i];
|
||||||
|
|
||||||
|
p->plans[i] = NULL;
|
||||||
|
int k;
|
||||||
|
for(k=0;k<i;k++) {
|
||||||
|
if(p->Ms[k] == p->Ms[i])
|
||||||
|
p->plans[i] = p->plans[k];
|
||||||
|
}
|
||||||
|
|
||||||
|
if(!p->plans[i]) p->plans[i] = ffts_init_1d(p->Ms[i], sign);
|
||||||
|
}
|
||||||
|
|
||||||
|
p->transpose_buf = valloc(sizeof(float) * 2 * 8 * 8);
|
||||||
|
return p;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
ffts_plan_t *ffts_init_2d(size_t N1, size_t N2, int sign) {
|
||||||
|
size_t Ns[2];
|
||||||
|
Ns[0] = N1;
|
||||||
|
Ns[1] = N2;
|
||||||
|
return ffts_init_nd(2, Ns, sign);
|
||||||
|
}
|
@ -0,0 +1,58 @@
|
|||||||
|
/*
|
||||||
|
|
||||||
|
This file is part of FFTS -- The Fastest Fourier Transform in the South
|
||||||
|
|
||||||
|
Copyright (c) 2012, Anthony M. Blake <amb@anthonix.com>
|
||||||
|
Copyright (c) 2012, The University of Waikato
|
||||||
|
|
||||||
|
All rights reserved.
|
||||||
|
|
||||||
|
Redistribution and use in source and binary forms, with or without
|
||||||
|
modification, are permitted provided that the following conditions are met:
|
||||||
|
* Redistributions of source code must retain the above copyright
|
||||||
|
notice, this list of conditions and the following disclaimer.
|
||||||
|
* Redistributions in binary form must reproduce the above copyright
|
||||||
|
notice, this list of conditions and the following disclaimer in the
|
||||||
|
documentation and/or other materials provided with the distribution.
|
||||||
|
* Neither the name of the organization nor the
|
||||||
|
names of its contributors may be used to endorse or promote products
|
||||||
|
derived from this software without specific prior written permission.
|
||||||
|
|
||||||
|
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
||||||
|
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||||
|
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||||
|
DISCLAIMED. IN NO EVENT SHALL ANTHONY M. BLAKE BE LIABLE FOR ANY
|
||||||
|
DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
|
||||||
|
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
||||||
|
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
|
||||||
|
ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||||
|
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||||
|
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
|
||||||
|
*/
|
||||||
|
|
||||||
|
#ifndef __FFTS_ND_H__
|
||||||
|
#define __FFTS_ND_H__
|
||||||
|
|
||||||
|
#include <stdint.h>
|
||||||
|
#include <stddef.h>
|
||||||
|
#include <stdio.h>
|
||||||
|
|
||||||
|
#include "ffts.h"
|
||||||
|
|
||||||
|
#ifdef HAVE_NEON
|
||||||
|
#include <arm_neon.h>
|
||||||
|
#endif
|
||||||
|
#ifdef HAVE_SSE
|
||||||
|
#include <xmmintrin.h>
|
||||||
|
#endif
|
||||||
|
|
||||||
|
void ffts_free_nd(ffts_plan_t *p);
|
||||||
|
void ffts_transpose(uint64_t *in, uint64_t *out, int w, int h, uint64_t *buf);
|
||||||
|
|
||||||
|
void ffts_execute_nd(ffts_plan_t *p, const void * in, void * out);
|
||||||
|
ffts_plan_t *ffts_init_nd(int rank, size_t *Ns, int sign);
|
||||||
|
ffts_plan_t *ffts_init_2d(size_t N1, size_t N2, int sign);
|
||||||
|
|
||||||
|
#endif
|
||||||
|
|
@ -0,0 +1,226 @@
|
|||||||
|
/*
|
||||||
|
|
||||||
|
This file is part of FFTS -- The Fastest Fourier Transform in the South
|
||||||
|
|
||||||
|
Copyright (c) 2012, Anthony M. Blake <amb@anthonix.com>
|
||||||
|
Copyright (c) 2012, The University of Waikato
|
||||||
|
|
||||||
|
All rights reserved.
|
||||||
|
|
||||||
|
Redistribution and use in source and binary forms, with or without
|
||||||
|
modification, are permitted provided that the following conditions are met:
|
||||||
|
* Redistributions of source code must retain the above copyright
|
||||||
|
notice, this list of conditions and the following disclaimer.
|
||||||
|
* Redistributions in binary form must reproduce the above copyright
|
||||||
|
notice, this list of conditions and the following disclaimer in the
|
||||||
|
documentation and/or other materials provided with the distribution.
|
||||||
|
* Neither the name of the organization nor the
|
||||||
|
names of its contributors may be used to endorse or promote products
|
||||||
|
derived from this software without specific prior written permission.
|
||||||
|
|
||||||
|
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
||||||
|
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||||
|
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||||
|
DISCLAIMED. IN NO EVENT SHALL ANTHONY M. BLAKE BE LIABLE FOR ANY
|
||||||
|
DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
|
||||||
|
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
||||||
|
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
|
||||||
|
ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||||
|
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||||
|
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include "ffts_real.h"
|
||||||
|
|
||||||
|
void ffts_free_1d_real(ffts_plan_t *p) {
|
||||||
|
ffts_free(p->plans[0]);
|
||||||
|
free(p->A);
|
||||||
|
free(p->B);
|
||||||
|
free(p->plans);
|
||||||
|
free(p->buf);
|
||||||
|
free(p);
|
||||||
|
}
|
||||||
|
|
||||||
|
void ffts_execute_1d_real(ffts_plan_t *p, const void *vin, void *vout) {
|
||||||
|
float *out = (float *)vout;
|
||||||
|
float *buf = (float *)p->buf;
|
||||||
|
float *A = p->A;
|
||||||
|
float *B = p->B;
|
||||||
|
|
||||||
|
p->plans[0]->transform(p->plans[0], vin, buf);
|
||||||
|
|
||||||
|
size_t N = p->N;
|
||||||
|
buf[N] = buf[0];
|
||||||
|
buf[N+1] = buf[1];
|
||||||
|
|
||||||
|
float *p_buf0 = buf;
|
||||||
|
float *p_buf1 = buf + N - 2;
|
||||||
|
float *p_out = out;
|
||||||
|
|
||||||
|
size_t i;
|
||||||
|
#ifdef __ARM_NEON__
|
||||||
|
for(i=0;i<N/2;i+=2) {
|
||||||
|
__asm__ __volatile__ ("vld1.32 {q8}, [%[pa], :128]!\n\t"
|
||||||
|
"vld1.32 {q9}, [%[pb], :128]!\n\t"
|
||||||
|
"vld1.32 {q10}, [%[buf0], :128]!\n\t"
|
||||||
|
"vld1.32 {q11}, [%[buf1], :64]\n\t"
|
||||||
|
"sub %[buf1], %[buf1], #16\n\t"
|
||||||
|
|
||||||
|
"vdup.32 d26, d16[1]\n\t"
|
||||||
|
"vdup.32 d27, d17[1]\n\t"
|
||||||
|
"vdup.32 d24, d16[0]\n\t"
|
||||||
|
"vdup.32 d25, d17[0]\n\t"
|
||||||
|
|
||||||
|
"vdup.32 d30, d23[1]\n\t"
|
||||||
|
"vdup.32 d31, d22[1]\n\t"
|
||||||
|
"vdup.32 d28, d23[0]\n\t"
|
||||||
|
"vdup.32 d29, d22[0]\n\t"
|
||||||
|
|
||||||
|
"vmul.f32 q13, q13, q10\n\t"
|
||||||
|
"vmul.f32 q15, q15, q9\n\t"
|
||||||
|
"vmul.f32 q12, q12, q10\n\t"
|
||||||
|
"vmul.f32 q14, q14, q9\n\t"
|
||||||
|
"vrev64.f32 q13, q13\n\t"
|
||||||
|
"vrev64.f32 q15, q15\n\t"
|
||||||
|
|
||||||
|
"vtrn.32 d26, d27\n\t"
|
||||||
|
"vtrn.32 d30, d31\n\t"
|
||||||
|
"vneg.f32 d26, d26\n\t"
|
||||||
|
"vneg.f32 d31, d31\n\t"
|
||||||
|
"vtrn.32 d26, d27\n\t"
|
||||||
|
"vtrn.32 d30, d31\n\t"
|
||||||
|
|
||||||
|
"vadd.f32 q12, q12, q14\n\t"
|
||||||
|
"vadd.f32 q13, q13, q15\n\t"
|
||||||
|
"vadd.f32 q12, q12, q13\n\t"
|
||||||
|
"vst1.32 {q12}, [%[pout], :128]!\n\t"
|
||||||
|
: [pa] "+r" (A), [pb] "+r" (B), [buf0] "+r" (p_buf0), [buf1] "+r" (p_buf1),
|
||||||
|
[pout] "+r" (p_out)
|
||||||
|
:
|
||||||
|
: "memory", "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15"
|
||||||
|
);
|
||||||
|
#else
|
||||||
|
for(i=0;i<N/2;i++) {
|
||||||
|
out[2*i] = buf[2*i]*A[2*i] - buf[2*i+1]*A[2*i+1] + buf[N-2*i]*B[2*i] + buf[N-2*i+1]*B[2*i+1];
|
||||||
|
out[2*i+1] = buf[2*i+1]*A[2*i] + buf[2*i]*A[2*i+1] + buf[N-2*i]*B[2*i+1] - buf[N-2*i+1]*B[2*i];
|
||||||
|
|
||||||
|
// out[2*N-2*i] = out[2*i];
|
||||||
|
// out[2*N-2*i+1] = -out[2*i+1];
|
||||||
|
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
|
out[N] = buf[0] - buf[1];
|
||||||
|
out[N+1] = 0.0f;
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
void ffts_execute_1d_real_inv(ffts_plan_t *p, const void *vin, void *vout) {
|
||||||
|
float *out = (float *)vout;
|
||||||
|
float *in = (float *)vin;
|
||||||
|
float *buf = (float *)p->buf;
|
||||||
|
float *A = p->A;
|
||||||
|
float *B = p->B;
|
||||||
|
size_t N = p->N;
|
||||||
|
|
||||||
|
float *p_buf0 = in;
|
||||||
|
float *p_buf1 = in + N - 2;
|
||||||
|
|
||||||
|
float *p_out = buf;
|
||||||
|
|
||||||
|
size_t i;
|
||||||
|
#ifdef __ARM_NEON__
|
||||||
|
for(i=0;i<N/2;i+=2) {
|
||||||
|
__asm__ __volatile__ ("vld1.32 {q8}, [%[pa], :128]!\n\t"
|
||||||
|
"vld1.32 {q9}, [%[pb], :128]!\n\t"
|
||||||
|
"vld1.32 {q10}, [%[buf0], :128]!\n\t"
|
||||||
|
"vld1.32 {q11}, [%[buf1], :64]\n\t"
|
||||||
|
"sub %[buf1], %[buf1], #16\n\t"
|
||||||
|
|
||||||
|
"vdup.32 d26, d16[1]\n\t"
|
||||||
|
"vdup.32 d27, d17[1]\n\t"
|
||||||
|
"vdup.32 d24, d16[0]\n\t"
|
||||||
|
"vdup.32 d25, d17[0]\n\t"
|
||||||
|
|
||||||
|
"vdup.32 d30, d23[1]\n\t"
|
||||||
|
"vdup.32 d31, d22[1]\n\t"
|
||||||
|
"vdup.32 d28, d23[0]\n\t"
|
||||||
|
"vdup.32 d29, d22[0]\n\t"
|
||||||
|
|
||||||
|
"vmul.f32 q13, q13, q10\n\t"
|
||||||
|
"vmul.f32 q15, q15, q9\n\t"
|
||||||
|
"vmul.f32 q12, q12, q10\n\t"
|
||||||
|
"vmul.f32 q14, q14, q9\n\t"
|
||||||
|
"vrev64.f32 q13, q13\n\t"
|
||||||
|
"vrev64.f32 q15, q15\n\t"
|
||||||
|
|
||||||
|
"vtrn.32 d26, d27\n\t"
|
||||||
|
"vtrn.32 d28, d29\n\t"
|
||||||
|
"vneg.f32 d27, d27\n\t"
|
||||||
|
"vneg.f32 d29, d29\n\t"
|
||||||
|
"vtrn.32 d26, d27\n\t"
|
||||||
|
"vtrn.32 d28, d29\n\t"
|
||||||
|
|
||||||
|
"vadd.f32 q12, q12, q14\n\t"
|
||||||
|
"vsub.f32 q13, q13, q15\n\t"
|
||||||
|
"vadd.f32 q12, q12, q13\n\t"
|
||||||
|
"vst1.32 {q12}, [%[pout], :128]!\n\t"
|
||||||
|
: [pa] "+r" (A), [pb] "+r" (B), [buf0] "+r" (p_buf0), [buf1] "+r" (p_buf1),
|
||||||
|
[pout] "+r" (p_out)
|
||||||
|
:
|
||||||
|
: "memory", "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15"
|
||||||
|
);
|
||||||
|
|
||||||
|
|
||||||
|
#else
|
||||||
|
for(i=0;i<N/2;i++) {
|
||||||
|
buf[2*i] = in[2*i]*A[2*i] + in[2*i+1]*A[2*i+1] + in[N-2*i]*B[2*i] - in[N-2*i+1]*B[2*i+1];
|
||||||
|
buf[2*i+1] = in[2*i+1]*A[2*i] - in[2*i]*A[2*i+1] - in[N-2*i]*B[2*i+1] - in[N-2*i+1]*B[2*i];
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
|
p->plans[0]->transform(p->plans[0], buf, out);
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
ffts_plan_t *ffts_init_1d_real(size_t N, int sign) {
|
||||||
|
ffts_plan_t *p = malloc(sizeof(ffts_plan_t));
|
||||||
|
|
||||||
|
if(sign < 0) p->transform = &ffts_execute_1d_real;
|
||||||
|
else p->transform = &ffts_execute_1d_real_inv;
|
||||||
|
|
||||||
|
p->destroy = &ffts_free_1d_real;
|
||||||
|
p->N = N;
|
||||||
|
p->rank = 1;
|
||||||
|
p->plans = malloc(sizeof(ffts_plan_t **) * 1);
|
||||||
|
|
||||||
|
p->plans[0] = ffts_init_1d(N/2, sign);
|
||||||
|
|
||||||
|
p->buf = valloc(sizeof(float) * 2 * ((N/2) + 1));
|
||||||
|
|
||||||
|
p->A = valloc(sizeof(float) * N);
|
||||||
|
p->B = valloc(sizeof(float) * N);
|
||||||
|
|
||||||
|
if(sign < 0) {
|
||||||
|
int i;
|
||||||
|
for (i = 0; i < N/2; i++) {
|
||||||
|
p->A[2 * i] = 0.5 * (1.0 - sin (2.0f * PI / (double) (N) * (double) i));
|
||||||
|
p->A[2 * i + 1] = 0.5 * (-1.0 * cos (2.0f * PI / (double) (N) * (double) i));
|
||||||
|
p->B[2 * i] = 0.5 * (1.0 + sin (2.0f * PI / (double) (N) * (double) i));
|
||||||
|
p->B[2 * i + 1] = 0.5 * (1.0 * cos (2.0f * PI / (double) (N) * (double) i));
|
||||||
|
}
|
||||||
|
}else{
|
||||||
|
int i;
|
||||||
|
for (i = 0; i < N/2; i++) {
|
||||||
|
p->A[2 * i] = 1.0 * (1.0 - sin (2.0f * PI / (double) (N) * (double) i));
|
||||||
|
p->A[2 * i + 1] = 1.0 * (-1.0 * cos (2.0f * PI / (double) (N) * (double) i));
|
||||||
|
p->B[2 * i] = 1.0 * (1.0 + sin (2.0f * PI / (double) (N) * (double) i));
|
||||||
|
p->B[2 * i + 1] = 1.0 * (1.0 * cos (2.0f * PI / (double) (N) * (double) i));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return p;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
@ -0,0 +1,53 @@
|
|||||||
|
/*
|
||||||
|
|
||||||
|
This file is part of FFTS -- The Fastest Fourier Transform in the South
|
||||||
|
|
||||||
|
Copyright (c) 2012, Anthony M. Blake <amb@anthonix.com>
|
||||||
|
Copyright (c) 2012, The University of Waikato
|
||||||
|
|
||||||
|
All rights reserved.
|
||||||
|
|
||||||
|
Redistribution and use in source and binary forms, with or without
|
||||||
|
modification, are permitted provided that the following conditions are met:
|
||||||
|
* Redistributions of source code must retain the above copyright
|
||||||
|
notice, this list of conditions and the following disclaimer.
|
||||||
|
* Redistributions in binary form must reproduce the above copyright
|
||||||
|
notice, this list of conditions and the following disclaimer in the
|
||||||
|
documentation and/or other materials provided with the distribution.
|
||||||
|
* Neither the name of the organization nor the
|
||||||
|
names of its contributors may be used to endorse or promote products
|
||||||
|
derived from this software without specific prior written permission.
|
||||||
|
|
||||||
|
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
||||||
|
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||||
|
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||||
|
DISCLAIMED. IN NO EVENT SHALL ANTHONY M. BLAKE BE LIABLE FOR ANY
|
||||||
|
DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
|
||||||
|
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
||||||
|
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
|
||||||
|
ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||||
|
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||||
|
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
|
||||||
|
*/
|
||||||
|
|
||||||
|
#ifndef __FFTS_REAL_H__
|
||||||
|
#define __FFTS_REAL_H__
|
||||||
|
|
||||||
|
#include <stdint.h>
|
||||||
|
#include <stddef.h>
|
||||||
|
#include <stdio.h>
|
||||||
|
|
||||||
|
#include "ffts.h"
|
||||||
|
|
||||||
|
#ifdef HAVE_NEON
|
||||||
|
#include <arm_neon.h>
|
||||||
|
#endif
|
||||||
|
#ifdef HAVE_SSE
|
||||||
|
#include <xmmintrin.h>
|
||||||
|
#endif
|
||||||
|
|
||||||
|
ffts_plan_t *ffts_init_1d_real(size_t N, int sign);
|
||||||
|
|
||||||
|
#endif
|
||||||
|
|
@ -0,0 +1,177 @@
|
|||||||
|
/*
|
||||||
|
|
||||||
|
This file is part of FFTS -- The Fastest Fourier Transform in the South
|
||||||
|
|
||||||
|
Copyright (c) 2012, Anthony M. Blake <amb@anthonix.com>
|
||||||
|
Copyright (c) 2012, The University of Waikato
|
||||||
|
|
||||||
|
All rights reserved.
|
||||||
|
|
||||||
|
Redistribution and use in source and binary forms, with or without
|
||||||
|
modification, are permitted provided that the following conditions are met:
|
||||||
|
* Redistributions of source code must retain the above copyright
|
||||||
|
notice, this list of conditions and the following disclaimer.
|
||||||
|
* Redistributions in binary form must reproduce the above copyright
|
||||||
|
notice, this list of conditions and the following disclaimer in the
|
||||||
|
documentation and/or other materials provided with the distribution.
|
||||||
|
* Neither the name of the organization nor the
|
||||||
|
names of its contributors may be used to endorse or promote products
|
||||||
|
derived from this software without specific prior written permission.
|
||||||
|
|
||||||
|
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
||||||
|
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||||
|
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||||
|
DISCLAIMED. IN NO EVENT SHALL ANTHONY M. BLAKE BE LIABLE FOR ANY
|
||||||
|
DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
|
||||||
|
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
||||||
|
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
|
||||||
|
ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||||
|
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||||
|
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include "ffts_real_nd.h"
|
||||||
|
|
||||||
|
#ifdef __ARM_NEON__
|
||||||
|
#include "neon.h"
|
||||||
|
#endif
|
||||||
|
|
||||||
|
void ffts_free_nd_real(ffts_plan_t *p) {
|
||||||
|
|
||||||
|
int i;
|
||||||
|
for(i=0;i<p->rank;i++) {
|
||||||
|
|
||||||
|
ffts_plan_t *x = p->plans[i];
|
||||||
|
|
||||||
|
int k;
|
||||||
|
for(k=i+1;k<p->rank;k++) {
|
||||||
|
if(x == p->plans[k]) p->plans[k] = NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
if(x) ffts_free(x);
|
||||||
|
}
|
||||||
|
|
||||||
|
free(p->Ns);
|
||||||
|
free(p->Ms);
|
||||||
|
free(p->plans);
|
||||||
|
free(p->buf);
|
||||||
|
free(p->transpose_buf);
|
||||||
|
free(p);
|
||||||
|
}
|
||||||
|
|
||||||
|
void ffts_scalar_transpose(uint64_t *in, uint64_t *out, int w, int h, uint64_t *buf) {
|
||||||
|
|
||||||
|
size_t i,j;
|
||||||
|
for(i=0;i<w;i+=1) {
|
||||||
|
for(j=0;j<h;j+=1) {
|
||||||
|
out[i*h + j] = in[j*w + i];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
void ffts_execute_nd_real(ffts_plan_t *p, const void * in, void * out) {
|
||||||
|
|
||||||
|
uint32_t *din = (uint32_t *)in;
|
||||||
|
uint64_t *buf = p->buf;
|
||||||
|
uint64_t *dout = (uint64_t *)out;
|
||||||
|
|
||||||
|
size_t i,j;
|
||||||
|
for(i=0;i<p->Ns[0];i++) {
|
||||||
|
p->plans[0]->transform(p->plans[0], din + (i * p->Ms[0]), buf + (i * (p->Ms[0] / 2 + 1)));
|
||||||
|
}
|
||||||
|
ffts_scalar_transpose(buf, dout, p->Ms[0] / 2 + 1, p->Ns[0], p->transpose_buf);
|
||||||
|
|
||||||
|
for(i=1;i<p->rank;i++) {
|
||||||
|
for(j=0;j<p->Ns[i];j++) {
|
||||||
|
p->plans[i]->transform(p->plans[i], dout + (j * p->Ms[i]), buf + (j * p->Ms[i]));
|
||||||
|
}
|
||||||
|
ffts_scalar_transpose(buf, dout, p->Ms[i], p->Ns[i], p->transpose_buf);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void ffts_execute_nd_real_inv(ffts_plan_t *p, const void * in, void * out) {
|
||||||
|
|
||||||
|
uint64_t *din = (uint64_t *)in;
|
||||||
|
uint64_t *buf = p->buf;
|
||||||
|
uint64_t *dout = (uint64_t *)out;
|
||||||
|
|
||||||
|
float *bufr = (float *)(p->buf);
|
||||||
|
float *doutr = (float *)out;
|
||||||
|
|
||||||
|
size_t i,j;
|
||||||
|
ffts_scalar_transpose(din, buf, p->Ms[0], p->Ns[0], p->transpose_buf);
|
||||||
|
|
||||||
|
for(i=0;i<p->Ms[0];i++) {
|
||||||
|
p->plans[0]->transform(p->plans[0], buf + (i * p->Ns[0]), dout + (i * p->Ns[0]));
|
||||||
|
}
|
||||||
|
|
||||||
|
ffts_scalar_transpose(dout, buf, p->Ns[0], p->Ms[0], p->transpose_buf);
|
||||||
|
for(j=0;j<p->Ms[1];j++) {
|
||||||
|
p->plans[1]->transform(p->plans[1], buf + (j * (p->Ms[0])), &doutr[j * p->Ns[1]]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
ffts_plan_t *ffts_init_nd_real(int rank, size_t *Ns, int sign) {
|
||||||
|
size_t vol = 1;
|
||||||
|
|
||||||
|
ffts_plan_t *p = malloc(sizeof(ffts_plan_t));
|
||||||
|
|
||||||
|
if(sign < 0) p->transform = &ffts_execute_nd_real;
|
||||||
|
else p->transform = &ffts_execute_nd_real_inv;
|
||||||
|
|
||||||
|
p->destroy = &ffts_free_nd_real;
|
||||||
|
|
||||||
|
p->rank = rank;
|
||||||
|
p->Ns = malloc(sizeof(size_t) * rank);
|
||||||
|
p->Ms = malloc(sizeof(size_t) * rank);
|
||||||
|
p->plans = malloc(sizeof(ffts_plan_t **) * rank);
|
||||||
|
int i;
|
||||||
|
for(i=0;i<rank;i++) {
|
||||||
|
p->Ns[i] = Ns[i];
|
||||||
|
vol *= Ns[i];
|
||||||
|
}
|
||||||
|
p->buf = valloc(sizeof(float) * 2 * vol);
|
||||||
|
|
||||||
|
for(i=0;i<rank;i++) {
|
||||||
|
p->Ms[i] = vol / p->Ns[i];
|
||||||
|
|
||||||
|
p->plans[i] = NULL;
|
||||||
|
int k;
|
||||||
|
|
||||||
|
if(sign < 0) {
|
||||||
|
for(k=1;k<i;k++) {
|
||||||
|
if(p->Ms[k] == p->Ms[i]) p->plans[i] = p->plans[k];
|
||||||
|
}
|
||||||
|
if(!i) p->plans[i] = ffts_init_1d_real(p->Ms[i], sign);
|
||||||
|
else if(!p->plans[i]) p->plans[i] = ffts_init_1d(p->Ms[i], sign);
|
||||||
|
}else{
|
||||||
|
for(k=0;k<i;k++) {
|
||||||
|
if(p->Ns[k] == p->Ns[i]) p->plans[i] = p->plans[k];
|
||||||
|
}
|
||||||
|
if(i==rank-1) p->plans[i] = ffts_init_1d_real(p->Ns[i], sign);
|
||||||
|
else if(!p->plans[i]) p->plans[i] = ffts_init_1d(p->Ns[i], sign);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if(sign < 0) {
|
||||||
|
for(i=1;i<rank;i++) {
|
||||||
|
p->Ns[i] = p->Ns[i] / 2 + 1;
|
||||||
|
}
|
||||||
|
}else{
|
||||||
|
for(i=0;i<rank-1;i++) {
|
||||||
|
p->Ms[i] = p->Ms[i] / 2 + 1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
p->transpose_buf = valloc(sizeof(float) * 2 * 8 * 8);
|
||||||
|
return p;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
ffts_plan_t *ffts_init_2d_real(size_t N1, size_t N2, int sign) {
|
||||||
|
size_t Ns[2];
|
||||||
|
Ns[0] = N1;
|
||||||
|
Ns[1] = N2;
|
||||||
|
return ffts_init_nd_real(2, Ns, sign);
|
||||||
|
}
|
@ -0,0 +1,53 @@
|
|||||||
|
/*
|
||||||
|
|
||||||
|
This file is part of FFTS -- The Fastest Fourier Transform in the South
|
||||||
|
|
||||||
|
Copyright (c) 2012, Anthony M. Blake <amb@anthonix.com>
|
||||||
|
Copyright (c) 2012, The University of Waikato
|
||||||
|
|
||||||
|
All rights reserved.
|
||||||
|
|
||||||
|
Redistribution and use in source and binary forms, with or without
|
||||||
|
modification, are permitted provided that the following conditions are met:
|
||||||
|
* Redistributions of source code must retain the above copyright
|
||||||
|
notice, this list of conditions and the following disclaimer.
|
||||||
|
* Redistributions in binary form must reproduce the above copyright
|
||||||
|
notice, this list of conditions and the following disclaimer in the
|
||||||
|
documentation and/or other materials provided with the distribution.
|
||||||
|
* Neither the name of the organization nor the
|
||||||
|
names of its contributors may be used to endorse or promote products
|
||||||
|
derived from this software without specific prior written permission.
|
||||||
|
|
||||||
|
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
||||||
|
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||||
|
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||||
|
DISCLAIMED. IN NO EVENT SHALL ANTHONY M. BLAKE BE LIABLE FOR ANY
|
||||||
|
DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
|
||||||
|
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
||||||
|
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
|
||||||
|
ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||||
|
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||||
|
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
|
||||||
|
*/
|
||||||
|
|
||||||
|
#ifndef __FFTS_REAL_ND_H__
|
||||||
|
#define __FFTS_REAL_ND_H__
|
||||||
|
|
||||||
|
#include <stdint.h>
|
||||||
|
#include <stddef.h>
|
||||||
|
#include <stdio.h>
|
||||||
|
|
||||||
|
#include "ffts_nd.h"
|
||||||
|
#include "ffts_real.h"
|
||||||
|
#include "ffts.h"
|
||||||
|
|
||||||
|
#ifdef HAVE_NEON
|
||||||
|
#include <arm_neon.h>
|
||||||
|
#endif
|
||||||
|
#ifdef HAVE_SSE
|
||||||
|
#include <xmmintrin.h>
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#endif
|
||||||
|
|
@ -0,0 +1,156 @@
|
|||||||
|
/*
|
||||||
|
|
||||||
|
This file is part of FFTS -- The Fastest Fourier Transform in the South
|
||||||
|
|
||||||
|
Copyright (c) 2013, Michael J. Cree <mcree@orcon.net.nz>
|
||||||
|
Copyright (c) 2012, 2013, Anthony M. Blake <amb@anthonix.com>
|
||||||
|
|
||||||
|
All rights reserved.
|
||||||
|
|
||||||
|
Redistribution and use in source and binary forms, with or without
|
||||||
|
modification, are permitted provided that the following conditions are met:
|
||||||
|
* Redistributions of source code must retain the above copyright
|
||||||
|
notice, this list of conditions and the following disclaimer.
|
||||||
|
* Redistributions in binary form must reproduce the above copyright
|
||||||
|
notice, this list of conditions and the following disclaimer in the
|
||||||
|
documentation and/or other materials provided with the distribution.
|
||||||
|
* Neither the name of the organization nor the
|
||||||
|
names of its contributors may be used to endorse or promote products
|
||||||
|
derived from this software without specific prior written permission.
|
||||||
|
|
||||||
|
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
||||||
|
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||||
|
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||||
|
DISCLAIMED. IN NO EVENT SHALL ANTHONY M. BLAKE BE LIABLE FOR ANY
|
||||||
|
DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
|
||||||
|
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
||||||
|
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
|
||||||
|
ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||||
|
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||||
|
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include "ffts.h"
|
||||||
|
#include "macros.h"
|
||||||
|
|
||||||
|
#include <stdlib.h>
|
||||||
|
|
||||||
|
#define DEBUG(x)
|
||||||
|
|
||||||
|
#include "ffts_small.h"
|
||||||
|
|
||||||
|
void firstpass_16_f(ffts_plan_t * p, const void * in, void * out)
|
||||||
|
{
|
||||||
|
const data_t *din = (const data_t *)in;
|
||||||
|
data_t *dout = (data_t *)out;
|
||||||
|
V r0_1,r2_3,r4_5,r6_7,r8_9,r10_11,r12_13,r14_15;
|
||||||
|
float *LUT8 = p->ws;
|
||||||
|
|
||||||
|
L_4_4(0, din+0,din+16,din+8,din+24,&r0_1,&r2_3,&r8_9,&r10_11);
|
||||||
|
L_2_4(0, din+4,din+20,din+28,din+12,&r4_5,&r6_7,&r14_15,&r12_13);
|
||||||
|
K_N(0, VLD(LUT8),VLD(LUT8+4),&r0_1,&r2_3,&r4_5,&r6_7);
|
||||||
|
K_N(0, VLD(LUT8+8),VLD(LUT8+12),&r0_1,&r4_5,&r8_9,&r12_13);
|
||||||
|
S_4(r0_1,r4_5,r8_9,r12_13,dout+0,dout+8,dout+16,dout+24);
|
||||||
|
K_N(0, VLD(LUT8+16),VLD(LUT8+20),&r2_3,&r6_7,&r10_11,&r14_15);
|
||||||
|
S_4(r2_3,r6_7,r10_11,r14_15,dout+4,dout+12,dout+20,dout+28);
|
||||||
|
}
|
||||||
|
|
||||||
|
void firstpass_16_b(ffts_plan_t * p, const void * in, void * out)
|
||||||
|
{
|
||||||
|
const data_t *din = (const data_t *)in;
|
||||||
|
data_t *dout = (data_t *)out;
|
||||||
|
V r0_1,r2_3,r4_5,r6_7,r8_9,r10_11,r12_13,r14_15;
|
||||||
|
float *LUT8 = p->ws;
|
||||||
|
|
||||||
|
L_4_4(1, din+0,din+16,din+8,din+24,&r0_1,&r2_3,&r8_9,&r10_11);
|
||||||
|
L_2_4(1, din+4,din+20,din+28,din+12,&r4_5,&r6_7,&r14_15,&r12_13);
|
||||||
|
K_N(1, VLD(LUT8),VLD(LUT8+4),&r0_1,&r2_3,&r4_5,&r6_7);
|
||||||
|
K_N(1, VLD(LUT8+8),VLD(LUT8+12),&r0_1,&r4_5,&r8_9,&r12_13);
|
||||||
|
S_4(r0_1,r4_5,r8_9,r12_13,dout+0,dout+8,dout+16,dout+24);
|
||||||
|
K_N(1, VLD(LUT8+16),VLD(LUT8+20),&r2_3,&r6_7,&r10_11,&r14_15);
|
||||||
|
S_4(r2_3,r6_7,r10_11,r14_15,dout+4,dout+12,dout+20,dout+28);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
void firstpass_8_f(ffts_plan_t *p, const void *in, void *out)
|
||||||
|
{
|
||||||
|
const data_t *din = (const data_t *)in;
|
||||||
|
data_t *dout = (data_t *)out;
|
||||||
|
V r0_1, r2_3, r4_5, r6_7;
|
||||||
|
float *LUT8 = p->ws + p->ws_is[0];
|
||||||
|
|
||||||
|
L_4_2(0, din, din+8, din+4, din+12, &r0_1, &r2_3, &r4_5, &r6_7);
|
||||||
|
K_N(0, VLD(LUT8), VLD(LUT8+4), &r0_1, &r2_3, &r4_5, &r6_7);
|
||||||
|
S_4(r0_1,r2_3,r4_5,r6_7,dout+0,dout+4,dout+8,dout+12);
|
||||||
|
}
|
||||||
|
|
||||||
|
void firstpass_8_b(ffts_plan_t *p, const void *in, void *out)
|
||||||
|
{
|
||||||
|
const data_t *din = (const data_t *)in;
|
||||||
|
data_t *dout = (data_t *)out;
|
||||||
|
V r0_1, r2_3, r4_5, r6_7;
|
||||||
|
float *LUT8 = p->ws + p->ws_is[0];
|
||||||
|
|
||||||
|
L_4_2(1, din, din+8, din+4, din+12, &r0_1, &r2_3, &r4_5, &r6_7);
|
||||||
|
K_N(1, VLD(LUT8), VLD(LUT8+4), &r0_1, &r2_3, &r4_5, &r6_7);
|
||||||
|
S_4(r0_1,r2_3,r4_5,r6_7,dout+0,dout+4,dout+8,dout+12);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
void firstpass_4_f(ffts_plan_t *p, const void *in, void *out)
|
||||||
|
{
|
||||||
|
const data_t *din = (const data_t *)in;
|
||||||
|
data_t *dout = (data_t *)out;
|
||||||
|
cdata_t t0, t1, t2, t3, t4, t5, t6, t7;
|
||||||
|
t0[0] = din[0]; t0[1] = din[1];
|
||||||
|
t1[0] = din[4]; t1[1] = din[5];
|
||||||
|
t2[0] = din[2]; t2[1] = din[3];
|
||||||
|
t3[0] = din[6]; t3[1] = din[7];
|
||||||
|
|
||||||
|
t4[0] = t0[0] + t1[0]; t4[1] = t0[1] + t1[1];
|
||||||
|
t5[0] = t0[0] - t1[0]; t5[1] = t0[1] - t1[1];
|
||||||
|
t6[0] = t2[0] + t3[0]; t6[1] = t2[1] + t3[1];
|
||||||
|
t7[0] = t2[0] - t3[0]; t7[1] = t2[1] - t3[1];
|
||||||
|
|
||||||
|
dout[0] = t4[0] + t6[0]; dout[1] = t4[1] + t6[1];
|
||||||
|
dout[4] = t4[0] - t6[0]; dout[5] = t4[1] - t6[1];
|
||||||
|
dout[2] = t5[0] + t7[1]; dout[3] = t5[1] - t7[0];
|
||||||
|
dout[6] = t5[0] - t7[1]; dout[7] = t5[1] + t7[0];
|
||||||
|
}
|
||||||
|
|
||||||
|
void firstpass_4_b(ffts_plan_t *p, const void *in, void *out)
|
||||||
|
{
|
||||||
|
const data_t *din = (const data_t *)in;
|
||||||
|
data_t *dout = (data_t *)out;
|
||||||
|
cdata_t t0, t1, t2, t3, t4, t5, t6, t7;
|
||||||
|
t0[0] = din[0]; t0[1] = din[1];
|
||||||
|
t1[0] = din[4]; t1[1] = din[5];
|
||||||
|
t2[0] = din[2]; t2[1] = din[3];
|
||||||
|
t3[0] = din[6]; t3[1] = din[7];
|
||||||
|
|
||||||
|
t4[0] = t0[0] + t1[0]; t4[1] = t0[1] + t1[1];
|
||||||
|
t5[0] = t0[0] - t1[0]; t5[1] = t0[1] - t1[1];
|
||||||
|
t6[0] = t2[0] + t3[0]; t6[1] = t2[1] + t3[1];
|
||||||
|
t7[0] = t2[0] - t3[0]; t7[1] = t2[1] - t3[1];
|
||||||
|
|
||||||
|
dout[0] = t4[0] + t6[0]; dout[1] = t4[1] + t6[1];
|
||||||
|
dout[4] = t4[0] - t6[0]; dout[5] = t4[1] - t6[1];
|
||||||
|
dout[2] = t5[0] - t7[1]; dout[3] = t5[1] + t7[0];
|
||||||
|
dout[6] = t5[0] + t7[1]; dout[7] = t5[1] - t7[0];
|
||||||
|
}
|
||||||
|
|
||||||
|
void firstpass_2(ffts_plan_t *p, const void *in, void *out)
|
||||||
|
{
|
||||||
|
const data_t *din = (const data_t *)in;
|
||||||
|
data_t *dout = (data_t *)out;
|
||||||
|
cdata_t t0, t1, r0,r1;
|
||||||
|
t0[0] = din[0]; t0[1] = din[1];
|
||||||
|
t1[0] = din[2]; t1[1] = din[3];
|
||||||
|
r0[0] = t0[0] + t1[0];
|
||||||
|
r0[1] = t0[1] + t1[1];
|
||||||
|
r1[0] = t0[0] - t1[0];
|
||||||
|
r1[1] = t0[1] - t1[1];
|
||||||
|
dout[0] = r0[0]; dout[1] = r0[1];
|
||||||
|
dout[2] = r1[0]; dout[3] = r1[1];
|
||||||
|
}
|
@ -0,0 +1,13 @@
|
|||||||
|
#ifndef __FFTS_SMALL_H__
|
||||||
|
#define __FFTS_SMALL_H__
|
||||||
|
|
||||||
|
|
||||||
|
void firstpass_16_f(ffts_plan_t * p, const void * in, void * out);
|
||||||
|
void firstpass_16_b(ffts_plan_t * p, const void * in, void * out);
|
||||||
|
void firstpass_8_f(ffts_plan_t * p, const void * in, void * out);
|
||||||
|
void firstpass_8_b(ffts_plan_t * p, const void * in, void * out);
|
||||||
|
void firstpass_4_f(ffts_plan_t * p, const void * in, void * out);
|
||||||
|
void firstpass_4_b(ffts_plan_t * p, const void * in, void * out);
|
||||||
|
void firstpass_2(ffts_plan_t * p, const void * in, void * out);
|
||||||
|
|
||||||
|
#endif
|
@ -0,0 +1,101 @@
|
|||||||
|
/*
|
||||||
|
|
||||||
|
This file is part of FFTS -- The Fastest Fourier Transform in the South
|
||||||
|
|
||||||
|
Copyright (c) 2012, Anthony M. Blake <amb@anthonix.com>
|
||||||
|
Copyright (c) 2012, The University of Waikato
|
||||||
|
|
||||||
|
All rights reserved.
|
||||||
|
|
||||||
|
Redistribution and use in source and binary forms, with or without
|
||||||
|
modification, are permitted provided that the following conditions are met:
|
||||||
|
* Redistributions of source code must retain the above copyright
|
||||||
|
notice, this list of conditions and the following disclaimer.
|
||||||
|
* Redistributions in binary form must reproduce the above copyright
|
||||||
|
notice, this list of conditions and the following disclaimer in the
|
||||||
|
documentation and/or other materials provided with the distribution.
|
||||||
|
* Neither the name of the organization nor the
|
||||||
|
names of its contributors may be used to endorse or promote products
|
||||||
|
derived from this software without specific prior written permission.
|
||||||
|
|
||||||
|
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
||||||
|
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||||
|
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||||
|
DISCLAIMED. IN NO EVENT SHALL ANTHONY M. BLAKE BE LIABLE FOR ANY
|
||||||
|
DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
|
||||||
|
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
||||||
|
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
|
||||||
|
ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||||
|
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||||
|
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
|
||||||
|
*/
|
||||||
|
#include "ffts_static.h"
|
||||||
|
|
||||||
|
void ffts_static_rec_i(ffts_plan_t *p, float *data, size_t N) {
|
||||||
|
if(N > 16) {
|
||||||
|
size_t N1 = N >> 1;
|
||||||
|
size_t N2 = N >> 2;
|
||||||
|
size_t N3 = N >> 3;
|
||||||
|
float *ws = ((float *)(p->ws)) + (p->ws_is[__builtin_ctzl(N)-4] << 1);
|
||||||
|
|
||||||
|
ffts_static_rec_i(p, data, N2);
|
||||||
|
ffts_static_rec_i(p, data + N1, N3);
|
||||||
|
ffts_static_rec_i(p, data + N1 + N2, N3);
|
||||||
|
ffts_static_rec_i(p, data + N, N2);
|
||||||
|
ffts_static_rec_i(p, data + N + N1, N2);
|
||||||
|
|
||||||
|
if(N == p->N) {
|
||||||
|
neon_static_x8_t_i(data, N, ws);
|
||||||
|
}else{
|
||||||
|
neon_static_x8_i(data, N, ws);
|
||||||
|
}
|
||||||
|
|
||||||
|
}else if(N==16){
|
||||||
|
neon_static_x4_i(data, N, p->ws);
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
void ffts_static_rec_f(ffts_plan_t *p, float *data, size_t N) {
|
||||||
|
if(N > 16) {
|
||||||
|
size_t N1 = N >> 1;
|
||||||
|
size_t N2 = N >> 2;
|
||||||
|
size_t N3 = N >> 3;
|
||||||
|
float *ws = ((float *)(p->ws)) + (p->ws_is[__builtin_ctzl(N)-4] << 1);
|
||||||
|
|
||||||
|
ffts_static_rec_f(p, data, N2);
|
||||||
|
ffts_static_rec_f(p, data + N1, N3);
|
||||||
|
ffts_static_rec_f(p, data + N1 + N2, N3);
|
||||||
|
ffts_static_rec_f(p, data + N, N2);
|
||||||
|
ffts_static_rec_f(p, data + N + N1, N2);
|
||||||
|
|
||||||
|
if(N == p->N) {
|
||||||
|
neon_static_x8_t_f(data, N, ws);
|
||||||
|
}else{
|
||||||
|
neon_static_x8_f(data, N, ws);
|
||||||
|
}
|
||||||
|
|
||||||
|
}else if(N==16){
|
||||||
|
neon_static_x4_f(data, N, p->ws);
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
void ffts_static_transform_f(ffts_plan_t *p, const void *in, void *out) {
|
||||||
|
|
||||||
|
if(__builtin_ctzl(p->N) & 1)
|
||||||
|
neon_static_o_f(p, in, out);
|
||||||
|
else
|
||||||
|
neon_static_e_f(p, in, out);
|
||||||
|
ffts_static_rec_f(p, out, p->N);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
void ffts_static_transform_i(ffts_plan_t *p, const void *in, void *out) {
|
||||||
|
|
||||||
|
if(__builtin_ctzl(p->N) & 1)
|
||||||
|
neon_static_o_i(p, in, out);
|
||||||
|
else
|
||||||
|
neon_static_e_i(p, in, out);
|
||||||
|
ffts_static_rec_i(p, out, p->N);
|
||||||
|
}
|
@ -0,0 +1,46 @@
|
|||||||
|
/*
|
||||||
|
|
||||||
|
This file is part of FFTS -- The Fastest Fourier Transform in the South
|
||||||
|
|
||||||
|
Copyright (c) 2012, Anthony M. Blake <amb@anthonix.com>
|
||||||
|
Copyright (c) 2012, The University of Waikato
|
||||||
|
|
||||||
|
All rights reserved.
|
||||||
|
|
||||||
|
Redistribution and use in source and binary forms, with or without
|
||||||
|
modification, are permitted provided that the following conditions are met:
|
||||||
|
* Redistributions of source code must retain the above copyright
|
||||||
|
notice, this list of conditions and the following disclaimer.
|
||||||
|
* Redistributions in binary form must reproduce the above copyright
|
||||||
|
notice, this list of conditions and the following disclaimer in the
|
||||||
|
documentation and/or other materials provided with the distribution.
|
||||||
|
* Neither the name of the organization nor the
|
||||||
|
names of its contributors may be used to endorse or promote products
|
||||||
|
derived from this software without specific prior written permission.
|
||||||
|
|
||||||
|
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
||||||
|
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||||
|
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||||
|
DISCLAIMED. IN NO EVENT SHALL ANTHONY M. BLAKE BE LIABLE FOR ANY
|
||||||
|
DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
|
||||||
|
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
||||||
|
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
|
||||||
|
ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||||
|
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||||
|
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
|
||||||
|
*/
|
||||||
|
|
||||||
|
#ifndef __FFTS_STATIC_H__
|
||||||
|
#define __FFTS_STATIC_H__
|
||||||
|
|
||||||
|
#include "ffts.h"
|
||||||
|
#include "neon.h"
|
||||||
|
|
||||||
|
void ffts_static_rec_f(ffts_plan_t *p, float *data, size_t N) ;
|
||||||
|
void ffts_static_transform_f(ffts_plan_t *p, const void *in, void *out);
|
||||||
|
|
||||||
|
void ffts_static_rec_i(ffts_plan_t *p, float *data, size_t N) ;
|
||||||
|
void ffts_static_transform_i(ffts_plan_t *p, const void *in, void *out);
|
||||||
|
|
||||||
|
#endif
|
@ -0,0 +1,206 @@
|
|||||||
|
/*
|
||||||
|
|
||||||
|
This file is part of FFTS -- The Fastest Fourier Transform in the South
|
||||||
|
|
||||||
|
Copyright (c) 2013, Michael J. Cree <mcree@orcon.net.nz>
|
||||||
|
Copyright (c) 2012, 2013, Anthony M. Blake <amb@anthonix.com>
|
||||||
|
|
||||||
|
All rights reserved.
|
||||||
|
|
||||||
|
Redistribution and use in source and binary forms, with or without
|
||||||
|
modification, are permitted provided that the following conditions are met:
|
||||||
|
* Redistributions of source code must retain the above copyright
|
||||||
|
notice, this list of conditions and the following disclaimer.
|
||||||
|
* Redistributions in binary form must reproduce the above copyright
|
||||||
|
notice, this list of conditions and the following disclaimer in the
|
||||||
|
documentation and/or other materials provided with the distribution.
|
||||||
|
* Neither the name of the organization nor the
|
||||||
|
names of its contributors may be used to endorse or promote products
|
||||||
|
derived from this software without specific prior written permission.
|
||||||
|
|
||||||
|
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
||||||
|
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||||
|
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||||
|
DISCLAIMED. IN NO EVENT SHALL ANTHONY M. BLAKE BE LIABLE FOR ANY
|
||||||
|
DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
|
||||||
|
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
||||||
|
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
|
||||||
|
ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||||
|
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||||
|
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
|
||||||
|
*/
|
||||||
|
|
||||||
|
#ifndef __MACROS_ALPHA_H__
|
||||||
|
#define __MACROS_ALPHA_H__
|
||||||
|
|
||||||
|
#include <math.h>
|
||||||
|
|
||||||
|
#ifdef __alpha__
|
||||||
|
#define restrict
|
||||||
|
#endif
|
||||||
|
|
||||||
|
typedef struct {float r1, i1, r2, i2;} V;
|
||||||
|
|
||||||
|
#define FFTS_MALLOC(d,a) malloc(d)
|
||||||
|
#define FFTS_FREE(d) free(d)
|
||||||
|
|
||||||
|
#define VLIT4(f3,f2,f1,f0) ((V){f0,f1,f2,f3})
|
||||||
|
|
||||||
|
static inline V VADD(V x, V y)
|
||||||
|
{
|
||||||
|
V z;
|
||||||
|
z.r1 = x.r1 + y.r1;
|
||||||
|
z.i1 = x.i1 + y.i1;
|
||||||
|
z.r2 = x.r2 + y.r2;
|
||||||
|
z.i2 = x.i2 + y.i2;
|
||||||
|
return z;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
static inline V VSUB(V x, V y)
|
||||||
|
{
|
||||||
|
V z;
|
||||||
|
z.r1 = x.r1 - y.r1;
|
||||||
|
z.i1 = x.i1 - y.i1;
|
||||||
|
z.r2 = x.r2 - y.r2;
|
||||||
|
z.i2 = x.i2 - y.i2;
|
||||||
|
return z;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
static inline V VMUL(V x, V y)
|
||||||
|
{
|
||||||
|
V z;
|
||||||
|
z.r1 = x.r1 * y.r1;
|
||||||
|
z.i1 = x.i1 * y.i1;
|
||||||
|
z.r2 = x.r2 * y.r2;
|
||||||
|
z.i2 = x.i2 * y.i2;
|
||||||
|
return z;
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline V VXOR(V x, V y)
|
||||||
|
{
|
||||||
|
V r;
|
||||||
|
r.r1 = (uint32_t)x.r1 ^ (uint32_t)y.r1;
|
||||||
|
r.i1 = (uint32_t)x.i1 ^ (uint32_t)y.i1;
|
||||||
|
r.r2 = (uint32_t)x.r2 ^ (uint32_t)y.r2;
|
||||||
|
r.i2 = (uint32_t)x.i2 ^ (uint32_t)y.i2;
|
||||||
|
return r;
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline V VSWAPPAIRS(V x)
|
||||||
|
{
|
||||||
|
V z;
|
||||||
|
z.r1 = x.i1;
|
||||||
|
z.i1 = x.r1;
|
||||||
|
z.r2 = x.i2;
|
||||||
|
z.i2 = x.r2;
|
||||||
|
return z;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
static inline V VBLEND(V x, V y)
|
||||||
|
{
|
||||||
|
V z;
|
||||||
|
z.r1 = x.r1;
|
||||||
|
z.i1 = x.i1;
|
||||||
|
z.r2 = y.r2;
|
||||||
|
z.i2 = y.i2;
|
||||||
|
return z;
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline V VUNPACKHI(V x, V y)
|
||||||
|
{
|
||||||
|
V z;
|
||||||
|
z.r1 = x.r2;
|
||||||
|
z.i1 = x.i2;
|
||||||
|
z.r2 = y.r2;
|
||||||
|
z.i2 = y.i2;
|
||||||
|
return z;
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline V VUNPACKLO(V x, V y)
|
||||||
|
{
|
||||||
|
V z;
|
||||||
|
z.r1 = x.r1;
|
||||||
|
z.i1 = x.i1;
|
||||||
|
z.r2 = y.r1;
|
||||||
|
z.i2 = y.i1;
|
||||||
|
return z;
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline V VDUPRE(V x)
|
||||||
|
{
|
||||||
|
V z;
|
||||||
|
z.r1 = x.r1;
|
||||||
|
z.i1 = x.r1;
|
||||||
|
z.r2 = x.r2;
|
||||||
|
z.i2 = x.r2;
|
||||||
|
return z;
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline V VDUPIM(V x)
|
||||||
|
{
|
||||||
|
V z;
|
||||||
|
z.r1 = x.i1;
|
||||||
|
z.i1 = x.i1;
|
||||||
|
z.r2 = x.i2;
|
||||||
|
z.i2 = x.i2;
|
||||||
|
return z;
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline V IMUL(V d, V re, V im)
|
||||||
|
{
|
||||||
|
re = VMUL(re, d);
|
||||||
|
im = VMUL(im, VSWAPPAIRS(d));
|
||||||
|
return VSUB(re, im);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
static inline V IMULJ(V d, V re, V im)
|
||||||
|
{
|
||||||
|
re = VMUL(re, d);
|
||||||
|
im = VMUL(im, VSWAPPAIRS(d));
|
||||||
|
return VADD(re, im);
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline V MULI(int inv, V x)
|
||||||
|
{
|
||||||
|
V z;
|
||||||
|
|
||||||
|
if (inv) {
|
||||||
|
z.r1 = -x.r1;
|
||||||
|
z.i1 = x.i1;
|
||||||
|
z.r2 = -x.r2;
|
||||||
|
z.i2 = x.i2;
|
||||||
|
}else{
|
||||||
|
z.r1 = x.r1;
|
||||||
|
z.i1 = -x.i1;
|
||||||
|
z.r2 = x.r2;
|
||||||
|
z.i2 = -x.i2;
|
||||||
|
}
|
||||||
|
return z;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
static inline V IMULI(int inv, V x)
|
||||||
|
{
|
||||||
|
return VSWAPPAIRS(MULI(inv, x));
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
static inline V VLD(const void *s)
|
||||||
|
{
|
||||||
|
V *d = (V *)s;
|
||||||
|
return *d;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
static inline void VST(void *d, V s)
|
||||||
|
{
|
||||||
|
V *r = (V *)d;
|
||||||
|
*r = s;
|
||||||
|
}
|
||||||
|
|
||||||
|
#endif
|
@ -0,0 +1,137 @@
|
|||||||
|
/*
|
||||||
|
|
||||||
|
This file is part of FFTS -- The Fastest Fourier Transform in the South
|
||||||
|
|
||||||
|
Copyright (c) 2013, Michael J. Cree <mcree@orcon.net.nz>
|
||||||
|
Copyright (c) 2012, 2013, Anthony M. Blake <amb@anthonix.com>
|
||||||
|
|
||||||
|
All rights reserved.
|
||||||
|
|
||||||
|
Redistribution and use in source and binary forms, with or without
|
||||||
|
modification, are permitted provided that the following conditions are met:
|
||||||
|
* Redistributions of source code must retain the above copyright
|
||||||
|
notice, this list of conditions and the following disclaimer.
|
||||||
|
* Redistributions in binary form must reproduce the above copyright
|
||||||
|
notice, this list of conditions and the following disclaimer in the
|
||||||
|
documentation and/or other materials provided with the distribution.
|
||||||
|
* Neither the name of the organization nor the
|
||||||
|
names of its contributors may be used to endorse or promote products
|
||||||
|
derived from this software without specific prior written permission.
|
||||||
|
|
||||||
|
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
||||||
|
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||||
|
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||||
|
DISCLAIMED. IN NO EVENT SHALL ANTHONY M. BLAKE BE LIABLE FOR ANY
|
||||||
|
DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
|
||||||
|
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
||||||
|
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
|
||||||
|
ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||||
|
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||||
|
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
|
||||||
|
*/
|
||||||
|
|
||||||
|
#ifndef __MACROS_ALTIVEC_H__
|
||||||
|
#define __MACROS_ALTIVEC_H__
|
||||||
|
|
||||||
|
#include <math.h>
|
||||||
|
#include <altivec.h>
|
||||||
|
|
||||||
|
#define restrict
|
||||||
|
|
||||||
|
typedef vector float V;
|
||||||
|
typedef vector unsigned char VUC;
|
||||||
|
|
||||||
|
#ifdef __apple__
|
||||||
|
#define FFTS_MALLOC(d,a) vec_malloc(d)
|
||||||
|
#define FFTS_FREE(d) vec_free(d)
|
||||||
|
#else
|
||||||
|
/* It appears vec_malloc() and friends are not implemented on Linux */
|
||||||
|
#include <malloc.h>
|
||||||
|
#define FFTS_MALLOC(d,a) memalign(16,d)
|
||||||
|
#define FFTS_FREE(d) free(d)
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#define VLIT4(f0,f1,f2,f3) ((V){f0, f1, f2, f3})
|
||||||
|
|
||||||
|
#define VADD(x,y) vec_add(x,y)
|
||||||
|
#define VSUB(x,y) vec_sub(x,y)
|
||||||
|
#define VMUL(x,y) vec_madd(x,y,(V){0})
|
||||||
|
#define VMULADD(x,y,z) vec_madd(x,y,z)
|
||||||
|
#define VNMULSUB(x,y,z) vec_nmsub(x,y,z)
|
||||||
|
#define VXOR(x,y) vec_xor((x),(y))
|
||||||
|
#define VSWAPPAIRS(x) \
|
||||||
|
vec_perm(x,x,(VUC){0x04,0x05,0x06,0x07,0x00,0x01,0x02,0x03, \
|
||||||
|
0x0c,0x0d,0x0e,0x0f,0x08,0x09,0x0a,0x0b})
|
||||||
|
|
||||||
|
#define VBLEND(x,y) \
|
||||||
|
vec_perm(x,y,(VUC){0x00,0x01,0x02,0x03,0x04,0x05,0x06,0x07, \
|
||||||
|
0x18,0x19,0x1a,0x1b,0x1c,0x1d,0x1e,0x1f})
|
||||||
|
|
||||||
|
#define VUNPACKHI(x,y) \
|
||||||
|
vec_perm(x,y,(VUC){0x08,0x09,0x0a,0x0b,0x0c,0x0d,0x0e,0x0f, \
|
||||||
|
0x18,0x19,0x1a,0x1b,0x1c,0x1d,0x1e,0x1f})
|
||||||
|
|
||||||
|
#define VUNPACKLO(x,y) \
|
||||||
|
vec_perm(x,y,(VUC){0x00,0x01,0x02,0x03,0x04,0x05,0x06,0x07, \
|
||||||
|
0x10,0x11,0x12,0x13,0x14,0x15,0x16,0x17})
|
||||||
|
|
||||||
|
#define VDUPRE(x) \
|
||||||
|
vec_perm(x,x,(VUC){0x00,0x01,0x02,0x03,0x00,0x01,0x02,0x03, \
|
||||||
|
0x18,0x19,0x1a,0x1b,0x18,0x19,0x1a,0x1b})
|
||||||
|
|
||||||
|
#define VDUPIM(x) \
|
||||||
|
vec_perm(x,x,(VUC){0x04,0x05,0x06,0x07,0x04,0x05,0x06,0x07, \
|
||||||
|
0x1c,0x1d,0x1e,0x1f,0x1c,0x1d,0x1e,0x1f})
|
||||||
|
|
||||||
|
|
||||||
|
static inline V IMUL(V d, V re, V im)
|
||||||
|
{
|
||||||
|
im = VMUL(im, VSWAPPAIRS(d));
|
||||||
|
re = VMUL(re, d);
|
||||||
|
return VSUB(re, im);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
static inline V IMULJ(V d, V re, V im)
|
||||||
|
{
|
||||||
|
im = VMUL(im, VSWAPPAIRS(d));
|
||||||
|
return VMULADD(re, d, im);
|
||||||
|
}
|
||||||
|
|
||||||
|
#ifndef __GNUC__
|
||||||
|
/* gcc (4.6 and 4.7) ICEs on this code! */
|
||||||
|
static inline V MULI(int inv, V x)
|
||||||
|
{
|
||||||
|
return VXOR(x, inv ? VLIT4(-0.0f,0.0f,-0.0f,0.0f) : VLIT4(0.0f,-0.0f,0.0f,-0.0f));
|
||||||
|
}
|
||||||
|
#else
|
||||||
|
/* but compiles this fine... */
|
||||||
|
static inline V MULI(int inv, V x)
|
||||||
|
{
|
||||||
|
V t;
|
||||||
|
t = inv ? VLIT4(-0.0f,0.0f,-0.0f,0.0f) : VLIT4(0.0f,-0.0f,0.0f,-0.0f);
|
||||||
|
return VXOR(x, t);
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
|
|
||||||
|
static inline V IMULI(int inv, V x)
|
||||||
|
{
|
||||||
|
return VSWAPPAIRS(MULI(inv, x));
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
static inline V VLD(const void *s)
|
||||||
|
{
|
||||||
|
V *d = (V *)s;
|
||||||
|
return *d;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
static inline void VST(void *d, V s)
|
||||||
|
{
|
||||||
|
V *r = (V *)d;
|
||||||
|
*r = s;
|
||||||
|
}
|
||||||
|
#endif
|
@ -0,0 +1,96 @@
|
|||||||
|
/*
|
||||||
|
|
||||||
|
This file is part of FFTS -- The Fastest Fourier Transform in the South
|
||||||
|
|
||||||
|
Copyright (c) 2012, 2013, Anthony M. Blake <amb@anthonix.com>
|
||||||
|
|
||||||
|
All rights reserved.
|
||||||
|
|
||||||
|
Redistribution and use in source and binary forms, with or without
|
||||||
|
modification, are permitted provided that the following conditions are met:
|
||||||
|
* Redistributions of source code must retain the above copyright
|
||||||
|
notice, this list of conditions and the following disclaimer.
|
||||||
|
* Redistributions in binary form must reproduce the above copyright
|
||||||
|
notice, this list of conditions and the following disclaimer in the
|
||||||
|
documentation and/or other materials provided with the distribution.
|
||||||
|
* Neither the name of the organization nor the
|
||||||
|
names of its contributors may be used to endorse or promote products
|
||||||
|
derived from this software without specific prior written permission.
|
||||||
|
|
||||||
|
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
||||||
|
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||||
|
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||||
|
DISCLAIMED. IN NO EVENT SHALL ANTHONY M. BLAKE BE LIABLE FOR ANY
|
||||||
|
DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
|
||||||
|
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
||||||
|
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
|
||||||
|
ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||||
|
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||||
|
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
|
||||||
|
*/
|
||||||
|
#ifndef __MACROS_NEON_H__
|
||||||
|
#define __MACROS_NEON_H__
|
||||||
|
|
||||||
|
#include "neon.h"
|
||||||
|
#include <arm_neon.h>
|
||||||
|
|
||||||
|
typedef float32x4_t V;
|
||||||
|
|
||||||
|
typedef float32x4x2_t VS;
|
||||||
|
|
||||||
|
#define ADD vaddq_f32
|
||||||
|
#define SUB vsubq_f32
|
||||||
|
#define MUL vmulq_f32
|
||||||
|
#define VADD vaddq_f32
|
||||||
|
#define VSUB vsubq_f32
|
||||||
|
#define VMUL vmulq_f32
|
||||||
|
#define VXOR(x,y) (vreinterpretq_f32_u32(veorq_u32(vreinterpretq_u32_f32(x), vreinterpretq_u32_f32(y))))
|
||||||
|
#define VST vst1q_f32
|
||||||
|
#define VLD vld1q_f32
|
||||||
|
#define VST2 vst2q_f32
|
||||||
|
#define VLD2 vld2q_f32
|
||||||
|
|
||||||
|
#define VSWAPPAIRS(x) (vrev64q_f32(x))
|
||||||
|
|
||||||
|
#define VUNPACKHI(a,b) (vcombine_f32(vget_high_f32(a), vget_high_f32(b)))
|
||||||
|
#define VUNPACKLO(a,b) (vcombine_f32(vget_low_f32(a), vget_low_f32(b)))
|
||||||
|
|
||||||
|
#define VBLEND(x,y) (vcombine_f32(vget_low_f32(x), vget_high_f32(y)))
|
||||||
|
|
||||||
|
__INLINE V VLIT4(data_t f3, data_t f2, data_t f1, data_t f0) {
|
||||||
|
data_t __attribute__ ((aligned(16))) d[4] = {f0, f1, f2, f3};
|
||||||
|
return VLD(d);
|
||||||
|
}
|
||||||
|
|
||||||
|
#define VDUPRE(r) vcombine_f32(vdup_lane_f32(vget_low_f32(r),0), vdup_lane_f32(vget_high_f32(r),0))
|
||||||
|
#define VDUPIM(r) vcombine_f32(vdup_lane_f32(vget_low_f32(r),1), vdup_lane_f32(vget_high_f32(r),1))
|
||||||
|
|
||||||
|
#define FFTS_MALLOC(d,a) (valloc(d))
|
||||||
|
#define FFTS_FREE(d) (free(d))
|
||||||
|
|
||||||
|
__INLINE void STORESPR(data_t * addr, VS p) {
|
||||||
|
|
||||||
|
vst1q_f32(addr, p.val[0]);
|
||||||
|
vst1q_f32(addr + 4, p.val[1]);
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
__INLINE V IMULI(int inv, V a) {
|
||||||
|
if(inv) return VSWAPPAIRS(VXOR(a, VLIT4(0.0f, -0.0f, 0.0f, -0.0f)));
|
||||||
|
else return VSWAPPAIRS(VXOR(a, VLIT4(-0.0f, 0.0f, -0.0f, 0.0f)));
|
||||||
|
}
|
||||||
|
|
||||||
|
__INLINE V IMUL(V d, V re, V im) {
|
||||||
|
re = VMUL(re, d);
|
||||||
|
im = VMUL(im, VSWAPPAIRS(d));
|
||||||
|
return VSUB(re, im);
|
||||||
|
}
|
||||||
|
|
||||||
|
__INLINE V IMULJ(V d, V re, V im) {
|
||||||
|
re = VMUL(re, d);
|
||||||
|
im = VMUL(im, VSWAPPAIRS(d));
|
||||||
|
return VADD(re, im);
|
||||||
|
}
|
||||||
|
|
||||||
|
#endif
|
@ -0,0 +1,84 @@
|
|||||||
|
/*
|
||||||
|
|
||||||
|
This file is part of FFTS -- The Fastest Fourier Transform in the South
|
||||||
|
|
||||||
|
Copyright (c) 2012, Anthony M. Blake <amb@anthonix.com>
|
||||||
|
Copyright (c) 2012, The University of Waikato
|
||||||
|
|
||||||
|
All rights reserved.
|
||||||
|
|
||||||
|
Redistribution and use in source and binary forms, with or without
|
||||||
|
modification, are permitted provided that the following conditions are met:
|
||||||
|
* Redistributions of source code must retain the above copyright
|
||||||
|
notice, this list of conditions and the following disclaimer.
|
||||||
|
* Redistributions in binary form must reproduce the above copyright
|
||||||
|
notice, this list of conditions and the following disclaimer in the
|
||||||
|
documentation and/or other materials provided with the distribution.
|
||||||
|
* Neither the name of the organization nor the
|
||||||
|
names of its contributors may be used to endorse or promote products
|
||||||
|
derived from this software without specific prior written permission.
|
||||||
|
|
||||||
|
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
||||||
|
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||||
|
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||||
|
DISCLAIMED. IN NO EVENT SHALL ANTHONY M. BLAKE BE LIABLE FOR ANY
|
||||||
|
DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
|
||||||
|
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
||||||
|
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
|
||||||
|
ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||||
|
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||||
|
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
|
||||||
|
*/
|
||||||
|
|
||||||
|
#ifndef __SSE_FLOAT_H__
|
||||||
|
#define __SSE_FLOAT_H__
|
||||||
|
|
||||||
|
#include <xmmintrin.h>
|
||||||
|
|
||||||
|
//#define VL 4
|
||||||
|
|
||||||
|
typedef __m128 V;
|
||||||
|
|
||||||
|
#define VADD _mm_add_ps
|
||||||
|
#define VSUB _mm_sub_ps
|
||||||
|
#define VMUL _mm_mul_ps
|
||||||
|
//#define VLIT4 _mm_set_ps
|
||||||
|
#define VXOR _mm_xor_ps
|
||||||
|
#define VST _mm_store_ps
|
||||||
|
#define VLD _mm_load_ps
|
||||||
|
|
||||||
|
#define VSWAPPAIRS(x) (_mm_shuffle_ps(x,x,_MM_SHUFFLE(2,3,0,1)))
|
||||||
|
|
||||||
|
#define VUNPACKHI(x,y) (_mm_shuffle_ps(x,y,_MM_SHUFFLE(3,2,3,2)))
|
||||||
|
#define VUNPACKLO(x,y) (_mm_shuffle_ps(x,y,_MM_SHUFFLE(1,0,1,0)))
|
||||||
|
|
||||||
|
#define VBLEND(x,y) (_mm_shuffle_ps(x,y,_MM_SHUFFLE(3,2,1,0)))
|
||||||
|
|
||||||
|
#define VLIT4 _mm_set_ps
|
||||||
|
|
||||||
|
#define VDUPRE(r) (_mm_shuffle_ps(r,r,_MM_SHUFFLE(2,2,0,0)))
|
||||||
|
#define VDUPIM(r) (_mm_shuffle_ps(r,r,_MM_SHUFFLE(3,3,1,1)))
|
||||||
|
|
||||||
|
#define FFTS_MALLOC(d,a) (_mm_malloc(d,a))
|
||||||
|
#define FFTS_FREE(d) (_mm_free(d))
|
||||||
|
|
||||||
|
__INLINE V IMULI(int inv, V a) {
|
||||||
|
if(inv) return VSWAPPAIRS(VXOR(a, VLIT4(0.0f, -0.0f, 0.0f, -0.0f)));
|
||||||
|
else return VSWAPPAIRS(VXOR(a, VLIT4(-0.0f, 0.0f, -0.0f, 0.0f)));
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
__INLINE V IMUL(V d, V re, V im) {
|
||||||
|
re = VMUL(re, d);
|
||||||
|
im = VMUL(im, VSWAPPAIRS(d));
|
||||||
|
return VSUB(re, im);
|
||||||
|
}
|
||||||
|
|
||||||
|
__INLINE V IMULJ(V d, V re, V im) {
|
||||||
|
re = VMUL(re, d);
|
||||||
|
im = VMUL(im, VSWAPPAIRS(d));
|
||||||
|
return VADD(re, im);
|
||||||
|
}
|
||||||
|
|
||||||
|
#endif
|
@ -0,0 +1,161 @@
|
|||||||
|
/*
|
||||||
|
|
||||||
|
This file is part of FFTS -- The Fastest Fourier Transform in the South
|
||||||
|
|
||||||
|
Copyright (c) 2013, Michael J. Cree <mcree@orcon.net.nz>
|
||||||
|
Copyright (c) 2012, 2013, Anthony M. Blake <amb@anthonix.com>
|
||||||
|
|
||||||
|
All rights reserved.
|
||||||
|
|
||||||
|
Redistribution and use in source and binary forms, with or without
|
||||||
|
modification, are permitted provided that the following conditions are met:
|
||||||
|
* Redistributions of source code must retain the above copyright
|
||||||
|
notice, this list of conditions and the following disclaimer.
|
||||||
|
* Redistributions in binary form must reproduce the above copyright
|
||||||
|
notice, this list of conditions and the following disclaimer in the
|
||||||
|
documentation and/or other materials provided with the distribution.
|
||||||
|
* Neither the name of the organization nor the
|
||||||
|
names of its contributors may be used to endorse or promote products
|
||||||
|
derived from this software without specific prior written permission.
|
||||||
|
|
||||||
|
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
||||||
|
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||||
|
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||||
|
DISCLAIMED. IN NO EVENT SHALL ANTHONY M. BLAKE BE LIABLE FOR ANY
|
||||||
|
DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
|
||||||
|
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
||||||
|
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
|
||||||
|
ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||||
|
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||||
|
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
|
||||||
|
*/
|
||||||
|
|
||||||
|
#ifndef __MACROS_H__
|
||||||
|
#define __MACROS_H__
|
||||||
|
|
||||||
|
#ifdef HAVE_NEON
|
||||||
|
#include "macros-neon.h"
|
||||||
|
#else
|
||||||
|
#ifdef __alpha__
|
||||||
|
#include "macros-alpha.h"
|
||||||
|
#else
|
||||||
|
#ifdef __powerpc__
|
||||||
|
#include "macros-altivec.h"
|
||||||
|
#endif
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#endif
|
||||||
|
|
||||||
|
|
||||||
|
#ifdef HAVE_VFP
|
||||||
|
#include "macros-alpha.h"
|
||||||
|
#endif
|
||||||
|
#ifdef HAVE_SSE
|
||||||
|
#include "macros-sse.h"
|
||||||
|
#endif
|
||||||
|
|
||||||
|
static inline void TX2(V *a, V *b)
|
||||||
|
{
|
||||||
|
V TX2_t0 = VUNPACKLO(*a, *b);
|
||||||
|
V TX2_t1 = VUNPACKHI(*a, *b);
|
||||||
|
*a = TX2_t0; *b = TX2_t1;
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline void K_N(int inv, V re, V im, V *r0, V *r1, V *r2, V *r3)
|
||||||
|
{
|
||||||
|
V uk, uk2, zk_p, zk_n, zk, zk_d;
|
||||||
|
uk = *r0; uk2 = *r1;
|
||||||
|
zk_p = IMUL(*r2, re, im);
|
||||||
|
zk_n = IMULJ(*r3, re, im);
|
||||||
|
|
||||||
|
zk = VADD(zk_p, zk_n);
|
||||||
|
zk_d = IMULI(inv, VSUB(zk_p, zk_n));
|
||||||
|
|
||||||
|
*r2 = VSUB(uk, zk);
|
||||||
|
*r0 = VADD(uk, zk);
|
||||||
|
*r3 = VADD(uk2, zk_d);
|
||||||
|
*r1 = VSUB(uk2, zk_d);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
static inline void S_4(V r0, V r1, V r2, V r3,
|
||||||
|
data_t * restrict o0, data_t * restrict o1,
|
||||||
|
data_t * restrict o2, data_t * restrict o3)
|
||||||
|
{
|
||||||
|
VST(o0, r0); VST(o1, r1); VST(o2, r2); VST(o3, r3);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
static inline void L_2_4(int inv,
|
||||||
|
const data_t * restrict i0, const data_t * restrict i1,
|
||||||
|
const data_t * restrict i2, const data_t * restrict i3,
|
||||||
|
V *r0, V *r1, V *r2, V *r3)
|
||||||
|
{
|
||||||
|
V t0, t1, t2, t3, t4, t5, t6, t7;
|
||||||
|
|
||||||
|
t0 = VLD(i0); t1 = VLD(i1); t2 = VLD(i2); t3 = VLD(i3);
|
||||||
|
t4 = VADD(t0, t1);
|
||||||
|
t5 = VSUB(t0, t1);
|
||||||
|
t6 = VADD(t2, t3);
|
||||||
|
t7 = VSUB(t2, t3);
|
||||||
|
*r0 = VUNPACKLO(t4, t5);
|
||||||
|
*r1 = VUNPACKLO(t6, t7);
|
||||||
|
t5 = IMULI(inv, t5);
|
||||||
|
t0 = VADD(t6, t4);
|
||||||
|
t2 = VSUB(t6, t4);
|
||||||
|
t1 = VSUB(t7, t5);
|
||||||
|
t3 = VADD(t7, t5);
|
||||||
|
*r3 = VUNPACKHI(t0, t1);
|
||||||
|
*r2 = VUNPACKHI(t2, t3);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
static inline void L_4_4(int inv,
|
||||||
|
const data_t * restrict i0, const data_t * restrict i1,
|
||||||
|
const data_t * restrict i2, const data_t * restrict i3,
|
||||||
|
V *r0, V *r1, V *r2, V *r3)
|
||||||
|
{
|
||||||
|
V t0, t1, t2, t3, t4, t5, t6, t7;
|
||||||
|
|
||||||
|
t0 = VLD(i0); t1 = VLD(i1); t2 = VLD(i2); t3 = VLD(i3);
|
||||||
|
t4 = VADD(t0, t1);
|
||||||
|
t5 = VSUB(t0, t1);
|
||||||
|
t6 = VADD(t2, t3);
|
||||||
|
t7 = IMULI(inv, VSUB(t2, t3));
|
||||||
|
t0 = VADD(t4, t6);
|
||||||
|
t2 = VSUB(t4, t6);
|
||||||
|
t1 = VSUB(t5, t7);
|
||||||
|
t3 = VADD(t5, t7);
|
||||||
|
TX2(&t0, &t1);
|
||||||
|
TX2(&t2, &t3);
|
||||||
|
*r0 = t0; *r2 = t1; *r1 = t2; *r3 = t3;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
static inline void L_4_2(int inv,
|
||||||
|
const data_t * restrict i0, const data_t * restrict i1,
|
||||||
|
const data_t * restrict i2, const data_t * restrict i3,
|
||||||
|
V *r0, V *r1, V *r2, V *r3)
|
||||||
|
{
|
||||||
|
V t0, t1, t2, t3, t4, t5, t6, t7;
|
||||||
|
|
||||||
|
t0 = VLD(i0); t1 = VLD(i1); t6 = VLD(i2); t7 = VLD(i3);
|
||||||
|
t2 = VBLEND(t6, t7);
|
||||||
|
t3 = VBLEND(t7, t6);
|
||||||
|
t4 = VADD(t0, t1);
|
||||||
|
t5 = VSUB(t0, t1);
|
||||||
|
t6 = VADD(t2, t3);
|
||||||
|
t7 = VSUB(t2, t3);
|
||||||
|
*r2 = VUNPACKHI(t4, t5);
|
||||||
|
*r3 = VUNPACKHI(t6, t7);
|
||||||
|
t7 = IMULI(inv, t7);
|
||||||
|
t0 = VADD(t4, t6);
|
||||||
|
t2 = VSUB(t4, t6);
|
||||||
|
t1 = VSUB(t5, t7);
|
||||||
|
t3 = VADD(t5, t7);
|
||||||
|
*r0 = VUNPACKLO(t0, t1);
|
||||||
|
*r1 = VUNPACKLO(t2, t3);
|
||||||
|
}
|
||||||
|
#endif
|
@ -0,0 +1,65 @@
|
|||||||
|
/*
|
||||||
|
|
||||||
|
This file is part of FFTS -- The Fastest Fourier Transform in the South
|
||||||
|
|
||||||
|
Copyright (c) 2012, Anthony M. Blake <amb@anthonix.com>
|
||||||
|
Copyright (c) 2012, The University of Waikato
|
||||||
|
|
||||||
|
All rights reserved.
|
||||||
|
|
||||||
|
Redistribution and use in source and binary forms, with or without
|
||||||
|
modification, are permitted provided that the following conditions are met:
|
||||||
|
* Redistributions of source code must retain the above copyright
|
||||||
|
notice, this list of conditions and the following disclaimer.
|
||||||
|
* Redistributions in binary form must reproduce the above copyright
|
||||||
|
notice, this list of conditions and the following disclaimer in the
|
||||||
|
documentation and/or other materials provided with the distribution.
|
||||||
|
* Neither the name of the organization nor the
|
||||||
|
names of its contributors may be used to endorse or promote products
|
||||||
|
derived from this software without specific prior written permission.
|
||||||
|
|
||||||
|
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
||||||
|
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||||
|
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||||
|
DISCLAIMED. IN NO EVENT SHALL ANTHONY M. BLAKE BE LIABLE FOR ANY
|
||||||
|
DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
|
||||||
|
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
||||||
|
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
|
||||||
|
ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||||
|
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||||
|
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
|
||||||
|
*/
|
||||||
|
|
||||||
|
#ifndef __NEON_H__
|
||||||
|
#define __NEON_H__
|
||||||
|
|
||||||
|
#include "ffts.h"
|
||||||
|
|
||||||
|
void neon_x4(float *, size_t, float *);
|
||||||
|
void neon_x8(float *, size_t, float *);
|
||||||
|
void neon_x8_t(float *, size_t, float *);
|
||||||
|
void neon_ee();
|
||||||
|
void neon_oo();
|
||||||
|
void neon_eo();
|
||||||
|
void neon_oe();
|
||||||
|
void neon_end();
|
||||||
|
|
||||||
|
void neon_transpose(uint64_t *in, uint64_t *out, int w, int h);
|
||||||
|
void neon_transpose_to_buf(uint64_t *in, uint64_t *out, int w);
|
||||||
|
|
||||||
|
//typedef struct _ffts_plan_t ffts_plan_t;
|
||||||
|
|
||||||
|
void neon_static_e_f(ffts_plan_t * , const void * , void * );
|
||||||
|
void neon_static_o_f(ffts_plan_t * , const void * , void * );
|
||||||
|
void neon_static_x4_f(float *, size_t, float *);
|
||||||
|
void neon_static_x8_f(float *, size_t, float *);
|
||||||
|
void neon_static_x8_t_f(float *, size_t, float *);
|
||||||
|
|
||||||
|
void neon_static_e_i(ffts_plan_t * , const void * , void * );
|
||||||
|
void neon_static_o_i(ffts_plan_t * , const void * , void * );
|
||||||
|
void neon_static_x4_i(float *, size_t, float *);
|
||||||
|
void neon_static_x8_i(float *, size_t, float *);
|
||||||
|
void neon_static_x8_t_i(float *, size_t, float *);
|
||||||
|
|
||||||
|
#endif
|
@ -0,0 +1,738 @@
|
|||||||
|
/*
|
||||||
|
|
||||||
|
This file is part of FFTS -- The Fastest Fourier Transform in the South
|
||||||
|
|
||||||
|
Copyright (c) 2012, Anthony M. Blake <amb@anthonix.com>
|
||||||
|
Copyright (c) 2012, The University of Waikato
|
||||||
|
|
||||||
|
All rights reserved.
|
||||||
|
|
||||||
|
Redistribution and use in source and binary forms, with or without
|
||||||
|
modification, are permitted provided that the following conditions are met:
|
||||||
|
* Redistributions of source code must retain the above copyright
|
||||||
|
notice, this list of conditions and the following disclaimer.
|
||||||
|
* Redistributions in binary form must reproduce the above copyright
|
||||||
|
notice, this list of conditions and the following disclaimer in the
|
||||||
|
documentation and/or other materials provided with the distribution.
|
||||||
|
* Neither the name of the organization nor the
|
||||||
|
names of its contributors may be used to endorse or promote products
|
||||||
|
derived from this software without specific prior written permission.
|
||||||
|
|
||||||
|
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
||||||
|
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||||
|
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||||
|
DISCLAIMED. IN NO EVENT SHALL ANTHONY M. BLAKE BE LIABLE FOR ANY
|
||||||
|
DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
|
||||||
|
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
||||||
|
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
|
||||||
|
ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||||
|
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||||
|
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
|
||||||
|
*/
|
||||||
|
|
||||||
|
.align 4
|
||||||
|
#ifdef __APPLE__
|
||||||
|
.globl _neon_x4
|
||||||
|
_neon_x4:
|
||||||
|
#else
|
||||||
|
.globl neon_x4
|
||||||
|
neon_x4:
|
||||||
|
#endif
|
||||||
|
@ add r3, r0, #0
|
||||||
|
|
||||||
|
vld1.32 {q8,q9}, [r0, :128]
|
||||||
|
add r4, r0, r1, lsl #1
|
||||||
|
vld1.32 {q10,q11}, [r4, :128]
|
||||||
|
add r5, r0, r1, lsl #2
|
||||||
|
vld1.32 {q12,q13}, [r5, :128]
|
||||||
|
add r6, r4, r1, lsl #2
|
||||||
|
vld1.32 {q14,q15}, [r6, :128]
|
||||||
|
vld1.32 {q2,q3}, [r2, :128]
|
||||||
|
|
||||||
|
vmul.f32 q0, q13, q3
|
||||||
|
vmul.f32 q5, q12, q2
|
||||||
|
vmul.f32 q1, q14, q2
|
||||||
|
vmul.f32 q4, q14, q3
|
||||||
|
vmul.f32 q14, q12, q3
|
||||||
|
vmul.f32 q13, q13, q2
|
||||||
|
vmul.f32 q12, q15, q3
|
||||||
|
vmul.f32 q2, q15, q2
|
||||||
|
vsub.f32 q0, q5, q0
|
||||||
|
vadd.f32 q13, q13, q14
|
||||||
|
vadd.f32 q12, q12, q1
|
||||||
|
vsub.f32 q1, q2, q4
|
||||||
|
vadd.f32 q15, q0, q12
|
||||||
|
vsub.f32 q12, q0, q12
|
||||||
|
vadd.f32 q14, q13, q1
|
||||||
|
vsub.f32 q13, q13, q1
|
||||||
|
vadd.f32 q0, q8, q15
|
||||||
|
vadd.f32 q1, q9, q14
|
||||||
|
vsub.f32 q2, q10, q13 @
|
||||||
|
vsub.f32 q4, q8, q15
|
||||||
|
vadd.f32 q3, q11, q12 @
|
||||||
|
vst1.32 {q0,q1}, [r0, :128]
|
||||||
|
vsub.f32 q5, q9, q14
|
||||||
|
vadd.f32 q6, q10, q13 @
|
||||||
|
vsub.f32 q7, q11, q12 @
|
||||||
|
vst1.32 {q2,q3}, [r4, :128]
|
||||||
|
vst1.32 {q4,q5}, [r5, :128]
|
||||||
|
vst1.32 {q6,q7}, [r6, :128]
|
||||||
|
bx lr
|
||||||
|
|
||||||
|
.align 4
|
||||||
|
#ifdef __APPLE__
|
||||||
|
.globl _neon_x8
|
||||||
|
_neon_x8:
|
||||||
|
#else
|
||||||
|
.globl neon_x8
|
||||||
|
neon_x8:
|
||||||
|
#endif
|
||||||
|
mov r11, #0
|
||||||
|
add r3, r0, #0 @ data0
|
||||||
|
add r5, r0, r1, lsl #1 @ data2
|
||||||
|
add r4, r0, r1 @ data1
|
||||||
|
add r7, r5, r1, lsl #1 @ data4
|
||||||
|
add r6, r5, r1 @ data3
|
||||||
|
add r9, r7, r1, lsl #1 @ data6
|
||||||
|
add r8, r7, r1 @ data5
|
||||||
|
add r10, r9, r1 @ data7
|
||||||
|
add r12, r2, #0 @ LUT
|
||||||
|
|
||||||
|
sub r11, r11, r1, lsr #5
|
||||||
|
neon_x8_loop:
|
||||||
|
vld1.32 {q2,q3}, [r12, :128]!
|
||||||
|
vld1.32 {q14,q15}, [r6, :128]
|
||||||
|
vld1.32 {q10,q11}, [r5, :128]
|
||||||
|
adds r11, r11, #1
|
||||||
|
vmul.f32 q12, q15, q2
|
||||||
|
vmul.f32 q8, q14, q3
|
||||||
|
vmul.f32 q13, q14, q2
|
||||||
|
vmul.f32 q9, q10, q3
|
||||||
|
vmul.f32 q1, q10, q2
|
||||||
|
vmul.f32 q0, q11, q2
|
||||||
|
vmul.f32 q14, q11, q3
|
||||||
|
vmul.f32 q15, q15, q3
|
||||||
|
vld1.32 {q2,q3}, [r12, :128]!
|
||||||
|
vsub.f32 q10, q12, q8
|
||||||
|
vadd.f32 q11, q0, q9
|
||||||
|
vadd.f32 q8, q15, q13
|
||||||
|
vld1.32 {q12,q13}, [r4, :128]
|
||||||
|
vsub.f32 q9, q1, q14
|
||||||
|
vsub.f32 q15, q11, q10
|
||||||
|
vsub.f32 q14, q9, q8
|
||||||
|
vsub.f32 q4, q12, q15 @
|
||||||
|
vadd.f32 q6, q12, q15 @
|
||||||
|
vadd.f32 q5, q13, q14 @
|
||||||
|
vsub.f32 q7, q13, q14 @
|
||||||
|
vld1.32 {q14,q15}, [r9, :128]
|
||||||
|
vld1.32 {q12,q13}, [r7, :128]
|
||||||
|
vmul.f32 q1, q14, q2
|
||||||
|
vmul.f32 q0, q14, q3
|
||||||
|
vst1.32 {q4,q5}, [r4, :128]
|
||||||
|
vmul.f32 q14, q15, q3
|
||||||
|
vmul.f32 q4, q15, q2
|
||||||
|
vadd.f32 q15, q9, q8
|
||||||
|
vst1.32 {q6,q7}, [r6, :128]
|
||||||
|
vmul.f32 q8, q12, q3
|
||||||
|
vmul.f32 q5, q13, q3
|
||||||
|
vmul.f32 q12, q12, q2
|
||||||
|
vmul.f32 q9, q13, q2
|
||||||
|
vadd.f32 q14, q14, q1
|
||||||
|
vsub.f32 q13, q4, q0
|
||||||
|
vadd.f32 q0, q9, q8
|
||||||
|
vld1.32 {q8,q9}, [r3, :128]
|
||||||
|
vadd.f32 q1, q11, q10
|
||||||
|
vsub.f32 q12, q12, q5
|
||||||
|
vadd.f32 q11, q8, q15
|
||||||
|
vsub.f32 q8, q8, q15
|
||||||
|
vadd.f32 q2, q12, q14
|
||||||
|
vsub.f32 q10, q0, q13
|
||||||
|
vadd.f32 q15, q0, q13
|
||||||
|
vadd.f32 q13, q9, q1
|
||||||
|
vsub.f32 q9, q9, q1
|
||||||
|
vsub.f32 q12, q12, q14
|
||||||
|
vadd.f32 q0, q11, q2
|
||||||
|
vadd.f32 q1, q13, q15
|
||||||
|
vsub.f32 q4, q11, q2
|
||||||
|
vsub.f32 q2, q8, q10 @
|
||||||
|
vadd.f32 q3, q9, q12 @
|
||||||
|
vst1.32 {q0,q1}, [r3, :128]!
|
||||||
|
vsub.f32 q5, q13, q15
|
||||||
|
vld1.32 {q14,q15}, [r10, :128]
|
||||||
|
vsub.f32 q7, q9, q12 @
|
||||||
|
vld1.32 {q12,q13}, [r8, :128]
|
||||||
|
vst1.32 {q2,q3}, [r5, :128]!
|
||||||
|
vld1.32 {q2,q3}, [r12, :128]!
|
||||||
|
vadd.f32 q6, q8, q10 @
|
||||||
|
vmul.f32 q8, q14, q2
|
||||||
|
vst1.32 {q4,q5}, [r7, :128]!
|
||||||
|
vmul.f32 q10, q15, q3
|
||||||
|
vmul.f32 q9, q13, q3
|
||||||
|
vmul.f32 q11, q12, q2
|
||||||
|
vmul.f32 q14, q14, q3
|
||||||
|
vst1.32 {q6,q7}, [r9, :128]!
|
||||||
|
vmul.f32 q15, q15, q2
|
||||||
|
vmul.f32 q12, q12, q3
|
||||||
|
vmul.f32 q13, q13, q2
|
||||||
|
vadd.f32 q10, q10, q8
|
||||||
|
vsub.f32 q11, q11, q9
|
||||||
|
vld1.32 {q8,q9}, [r4, :128]
|
||||||
|
vsub.f32 q14, q15, q14
|
||||||
|
vadd.f32 q15, q13, q12
|
||||||
|
vadd.f32 q13, q11, q10
|
||||||
|
vadd.f32 q12, q15, q14
|
||||||
|
vsub.f32 q15, q15, q14
|
||||||
|
vsub.f32 q14, q11, q10
|
||||||
|
vld1.32 {q10,q11}, [r6, :128]
|
||||||
|
vadd.f32 q0, q8, q13
|
||||||
|
vadd.f32 q1, q9, q12
|
||||||
|
vsub.f32 q2, q10, q15 @
|
||||||
|
vadd.f32 q3, q11, q14 @
|
||||||
|
vsub.f32 q4, q8, q13
|
||||||
|
vst1.32 {q0,q1}, [r4, :128]!
|
||||||
|
vsub.f32 q5, q9, q12
|
||||||
|
vadd.f32 q6, q10, q15 @
|
||||||
|
vst1.32 {q2,q3}, [r6, :128]!
|
||||||
|
vsub.f32 q7, q11, q14 @
|
||||||
|
vst1.32 {q4,q5}, [r8, :128]!
|
||||||
|
vst1.32 {q6,q7}, [r10, :128]!
|
||||||
|
bne neon_x8_loop
|
||||||
|
|
||||||
|
bx lr
|
||||||
|
|
||||||
|
.align 4
|
||||||
|
#ifdef __APPLE__
|
||||||
|
.globl _neon_x8_t
|
||||||
|
_neon_x8_t:
|
||||||
|
#else
|
||||||
|
.globl neon_x8_t
|
||||||
|
neon_x8_t:
|
||||||
|
#endif
|
||||||
|
mov r11, #0
|
||||||
|
add r3, r0, #0 @ data0
|
||||||
|
add r5, r0, r1, lsl #1 @ data2
|
||||||
|
add r4, r0, r1 @ data1
|
||||||
|
add r7, r5, r1, lsl #1 @ data4
|
||||||
|
add r6, r5, r1 @ data3
|
||||||
|
add r9, r7, r1, lsl #1 @ data6
|
||||||
|
add r8, r7, r1 @ data5
|
||||||
|
add r10, r9, r1 @ data7
|
||||||
|
add r12, r2, #0 @ LUT
|
||||||
|
|
||||||
|
sub r11, r11, r1, lsr #5
|
||||||
|
neon_x8_t_loop:
|
||||||
|
vld1.32 {q2,q3}, [r12, :128]!
|
||||||
|
vld1.32 {q14,q15}, [r6, :128]
|
||||||
|
vld1.32 {q10,q11}, [r5, :128]
|
||||||
|
adds r11, r11, #1
|
||||||
|
vmul.f32 q12, q15, q2
|
||||||
|
vmul.f32 q8, q14, q3
|
||||||
|
vmul.f32 q13, q14, q2
|
||||||
|
vmul.f32 q9, q10, q3
|
||||||
|
vmul.f32 q1, q10, q2
|
||||||
|
vmul.f32 q0, q11, q2
|
||||||
|
vmul.f32 q14, q11, q3
|
||||||
|
vmul.f32 q15, q15, q3
|
||||||
|
vld1.32 {q2,q3}, [r12, :128]!
|
||||||
|
vsub.f32 q10, q12, q8
|
||||||
|
vadd.f32 q11, q0, q9
|
||||||
|
vadd.f32 q8, q15, q13
|
||||||
|
vld1.32 {q12,q13}, [r4, :128]
|
||||||
|
vsub.f32 q9, q1, q14
|
||||||
|
vsub.f32 q15, q11, q10
|
||||||
|
vsub.f32 q14, q9, q8
|
||||||
|
vsub.f32 q4, q12, q15 @
|
||||||
|
vadd.f32 q6, q12, q15 @
|
||||||
|
vadd.f32 q5, q13, q14 @
|
||||||
|
vsub.f32 q7, q13, q14 @
|
||||||
|
vld1.32 {q14,q15}, [r9, :128]
|
||||||
|
vld1.32 {q12,q13}, [r7, :128]
|
||||||
|
vmul.f32 q1, q14, q2
|
||||||
|
vmul.f32 q0, q14, q3
|
||||||
|
vst1.32 {q4,q5}, [r4, :128]
|
||||||
|
vmul.f32 q14, q15, q3
|
||||||
|
vmul.f32 q4, q15, q2
|
||||||
|
vadd.f32 q15, q9, q8
|
||||||
|
vst1.32 {q6,q7}, [r6, :128]
|
||||||
|
vmul.f32 q8, q12, q3
|
||||||
|
vmul.f32 q5, q13, q3
|
||||||
|
vmul.f32 q12, q12, q2
|
||||||
|
vmul.f32 q9, q13, q2
|
||||||
|
vadd.f32 q14, q14, q1
|
||||||
|
vsub.f32 q13, q4, q0
|
||||||
|
vadd.f32 q0, q9, q8
|
||||||
|
vld1.32 {q8,q9}, [r3, :128]
|
||||||
|
vadd.f32 q1, q11, q10
|
||||||
|
vsub.f32 q12, q12, q5
|
||||||
|
vadd.f32 q11, q8, q15
|
||||||
|
vsub.f32 q8, q8, q15
|
||||||
|
vadd.f32 q2, q12, q14
|
||||||
|
vsub.f32 q10, q0, q13
|
||||||
|
vadd.f32 q15, q0, q13
|
||||||
|
vadd.f32 q13, q9, q1
|
||||||
|
vsub.f32 q9, q9, q1
|
||||||
|
vsub.f32 q12, q12, q14
|
||||||
|
vadd.f32 q0, q11, q2
|
||||||
|
vadd.f32 q1, q13, q15
|
||||||
|
vsub.f32 q4, q11, q2
|
||||||
|
vsub.f32 q2, q8, q10 @
|
||||||
|
vadd.f32 q3, q9, q12 @
|
||||||
|
vst2.32 {q0,q1}, [r3, :128]!
|
||||||
|
vsub.f32 q5, q13, q15
|
||||||
|
vld1.32 {q14,q15}, [r10, :128]
|
||||||
|
vsub.f32 q7, q9, q12 @
|
||||||
|
vld1.32 {q12,q13}, [r8, :128]
|
||||||
|
vst2.32 {q2,q3}, [r5, :128]!
|
||||||
|
vld1.32 {q2,q3}, [r12, :128]!
|
||||||
|
vadd.f32 q6, q8, q10 @
|
||||||
|
vmul.f32 q8, q14, q2
|
||||||
|
vst2.32 {q4,q5}, [r7, :128]!
|
||||||
|
vmul.f32 q10, q15, q3
|
||||||
|
vmul.f32 q9, q13, q3
|
||||||
|
vmul.f32 q11, q12, q2
|
||||||
|
vmul.f32 q14, q14, q3
|
||||||
|
vst2.32 {q6,q7}, [r9, :128]!
|
||||||
|
vmul.f32 q15, q15, q2
|
||||||
|
vmul.f32 q12, q12, q3
|
||||||
|
vmul.f32 q13, q13, q2
|
||||||
|
vadd.f32 q10, q10, q8
|
||||||
|
vsub.f32 q11, q11, q9
|
||||||
|
vld1.32 {q8,q9}, [r4, :128]
|
||||||
|
vsub.f32 q14, q15, q14
|
||||||
|
vadd.f32 q15, q13, q12
|
||||||
|
vadd.f32 q13, q11, q10
|
||||||
|
vadd.f32 q12, q15, q14
|
||||||
|
vsub.f32 q15, q15, q14
|
||||||
|
vsub.f32 q14, q11, q10
|
||||||
|
vld1.32 {q10,q11}, [r6, :128]
|
||||||
|
vadd.f32 q0, q8, q13
|
||||||
|
vadd.f32 q1, q9, q12
|
||||||
|
vsub.f32 q2, q10, q15 @
|
||||||
|
vadd.f32 q3, q11, q14 @
|
||||||
|
vsub.f32 q4, q8, q13
|
||||||
|
vst2.32 {q0,q1}, [r4, :128]!
|
||||||
|
vsub.f32 q5, q9, q12
|
||||||
|
vadd.f32 q6, q10, q15 @
|
||||||
|
vst2.32 {q2,q3}, [r6, :128]!
|
||||||
|
vsub.f32 q7, q11, q14 @
|
||||||
|
vst2.32 {q4,q5}, [r8, :128]!
|
||||||
|
vst2.32 {q6,q7}, [r10, :128]!
|
||||||
|
bne neon_x8_t_loop
|
||||||
|
|
||||||
|
@bx lr
|
||||||
|
|
||||||
|
@ assumes r0 = out
|
||||||
|
@ r1 = in ?
|
||||||
|
@
|
||||||
|
@ r12 = offsets
|
||||||
|
@ r3-r10 = data pointers
|
||||||
|
@ r11 = loop iterations
|
||||||
|
@ r2 & lr = temps
|
||||||
|
.align 4
|
||||||
|
#ifdef __APPLE__
|
||||||
|
.globl _neon_ee
|
||||||
|
_neon_ee:
|
||||||
|
#else
|
||||||
|
.globl neon_ee
|
||||||
|
neon_ee:
|
||||||
|
#endif
|
||||||
|
vld1.32 {d16, d17}, [r2, :128]
|
||||||
|
_neon_ee_loop:
|
||||||
|
vld2.32 {q15}, [r10, :128]!
|
||||||
|
vld2.32 {q13}, [r8, :128]!
|
||||||
|
vld2.32 {q14}, [r7, :128]!
|
||||||
|
vld2.32 {q9}, [r4, :128]!
|
||||||
|
vld2.32 {q10}, [r3, :128]!
|
||||||
|
vld2.32 {q11}, [r6, :128]!
|
||||||
|
vld2.32 {q12}, [r5, :128]!
|
||||||
|
vsub.f32 q1, q14, q13
|
||||||
|
vld2.32 {q0}, [r9, :128]!
|
||||||
|
subs r11, r11, #1
|
||||||
|
vsub.f32 q2, q0, q15
|
||||||
|
vadd.f32 q0, q0, q15
|
||||||
|
vmul.f32 d10, d2, d17
|
||||||
|
vmul.f32 d11, d3, d16
|
||||||
|
vmul.f32 d12, d3, d17
|
||||||
|
vmul.f32 d6, d4, d17
|
||||||
|
vmul.f32 d7, d5, d16
|
||||||
|
vmul.f32 d8, d4, d16
|
||||||
|
vmul.f32 d9, d5, d17
|
||||||
|
vmul.f32 d13, d2, d16
|
||||||
|
vsub.f32 d7, d7, d6
|
||||||
|
vadd.f32 d11, d11, d10
|
||||||
|
vsub.f32 q1, q12, q11
|
||||||
|
vsub.f32 q2, q10, q9
|
||||||
|
vadd.f32 d6, d9, d8
|
||||||
|
vadd.f32 q4, q14, q13
|
||||||
|
vadd.f32 q11, q12, q11
|
||||||
|
vadd.f32 q12, q10, q9
|
||||||
|
vsub.f32 d10, d13, d12
|
||||||
|
vsub.f32 q7, q4, q0
|
||||||
|
vsub.f32 q9, q12, q11
|
||||||
|
vsub.f32 q13, q5, q3
|
||||||
|
vadd.f32 d29, d5, d2 @
|
||||||
|
vadd.f32 q5, q5, q3
|
||||||
|
vadd.f32 q10, q4, q0
|
||||||
|
vadd.f32 q11, q12, q11
|
||||||
|
vsub.f32 d31, d5, d2 @
|
||||||
|
vsub.f32 d28, d4, d3 @
|
||||||
|
vadd.f32 d30, d4, d3 @
|
||||||
|
vadd.f32 d5, d19, d14 @-
|
||||||
|
vadd.f32 d7, d31, d26 @-
|
||||||
|
vadd.f32 q1, q14, q5
|
||||||
|
vadd.f32 q0, q11, q10
|
||||||
|
vsub.f32 d6, d30, d27 @-
|
||||||
|
vsub.f32 d4, d18, d15 @-
|
||||||
|
vsub.f32 d13, d19, d14 @-
|
||||||
|
vadd.f32 d12, d18, d15 @-
|
||||||
|
vsub.f32 d15, d31, d26 @-
|
||||||
|
ldr r2, [r12], #4
|
||||||
|
vtrn.32 q1, q3
|
||||||
|
ldr lr, [r12], #4
|
||||||
|
vtrn.32 q0, q2
|
||||||
|
add r2, r0, r2, lsl #2
|
||||||
|
vsub.f32 q4, q11, q10
|
||||||
|
add lr, r0, lr, lsl #2
|
||||||
|
vsub.f32 q5, q14, q5
|
||||||
|
vadd.f32 d14, d30, d27 @-
|
||||||
|
vst2.32 {q0,q1}, [r2, :128]!
|
||||||
|
vst2.32 {q2,q3}, [lr, :128]!
|
||||||
|
vtrn.32 q4, q6
|
||||||
|
vtrn.32 q5, q7
|
||||||
|
vst2.32 {q4,q5}, [r2, :128]!
|
||||||
|
vst2.32 {q6,q7}, [lr, :128]!
|
||||||
|
bne _neon_ee_loop
|
||||||
|
|
||||||
|
@ assumes r0 = out
|
||||||
|
@
|
||||||
|
@ r12 = offsets
|
||||||
|
@ r3-r10 = data pointers
|
||||||
|
@ r11 = loop iterations
|
||||||
|
@ r2 & lr = temps
|
||||||
|
.align 4
|
||||||
|
#ifdef __APPLE__
|
||||||
|
.globl _neon_oo
|
||||||
|
_neon_oo:
|
||||||
|
#else
|
||||||
|
.globl neon_oo
|
||||||
|
neon_oo:
|
||||||
|
#endif
|
||||||
|
_neon_oo_loop:
|
||||||
|
vld2.32 {q8}, [r6, :128]!
|
||||||
|
vld2.32 {q9}, [r5, :128]!
|
||||||
|
vld2.32 {q10}, [r4, :128]!
|
||||||
|
vld2.32 {q13}, [r3, :128]!
|
||||||
|
vadd.f32 q11, q9, q8
|
||||||
|
vsub.f32 q8, q9, q8
|
||||||
|
vsub.f32 q9, q13, q10
|
||||||
|
vadd.f32 q12, q13, q10
|
||||||
|
subs r11, r11, #1
|
||||||
|
vld2.32 {q10}, [r7, :128]!
|
||||||
|
vld2.32 {q13}, [r9, :128]!
|
||||||
|
vsub.f32 q2, q12, q11
|
||||||
|
vsub.f32 d7, d19, d16 @
|
||||||
|
vadd.f32 d3, d19, d16 @
|
||||||
|
vadd.f32 d6, d18, d17 @
|
||||||
|
vsub.f32 d2, d18, d17 @
|
||||||
|
vld2.32 {q9}, [r8, :128]!
|
||||||
|
vld2.32 {q8}, [r10, :128]!
|
||||||
|
vadd.f32 q0, q12, q11
|
||||||
|
vadd.f32 q11, q13, q8
|
||||||
|
vadd.f32 q12, q10, q9
|
||||||
|
vsub.f32 q8, q13, q8
|
||||||
|
vsub.f32 q9, q10, q9
|
||||||
|
vsub.f32 q6, q12, q11
|
||||||
|
vadd.f32 q4, q12, q11
|
||||||
|
vtrn.32 q0, q2
|
||||||
|
ldr r2, [r12], #4
|
||||||
|
vsub.f32 d15, d19, d16 @
|
||||||
|
ldr lr, [r12], #4
|
||||||
|
vadd.f32 d11, d19, d16 @
|
||||||
|
vadd.f32 d14, d18, d17 @
|
||||||
|
vsub.f32 d10, d18, d17 @
|
||||||
|
add r2, r0, r2, lsl #2
|
||||||
|
vtrn.32 q1, q3
|
||||||
|
add lr, r0, lr, lsl #2
|
||||||
|
vst2.32 {q0,q1}, [r2, :128]!
|
||||||
|
vst2.32 {q2,q3}, [lr, :128]!
|
||||||
|
vtrn.32 q4, q6
|
||||||
|
vtrn.32 q5, q7
|
||||||
|
vst2.32 {q4,q5}, [r2, :128]!
|
||||||
|
vst2.32 {q6,q7}, [lr, :128]!
|
||||||
|
bne _neon_oo_loop
|
||||||
|
|
||||||
|
@ assumes r0 = out
|
||||||
|
@
|
||||||
|
@ r12 = offsets
|
||||||
|
@ r3-r10 = data pointers
|
||||||
|
@ r11 = addr of twiddle
|
||||||
|
@ r2 & lr = temps
|
||||||
|
.align 4
|
||||||
|
#ifdef __APPLE__
|
||||||
|
.globl _neon_eo
|
||||||
|
_neon_eo:
|
||||||
|
#else
|
||||||
|
.globl neon_eo
|
||||||
|
neon_eo:
|
||||||
|
#endif
|
||||||
|
vld2.32 {q9}, [r5, :128]! @tag2
|
||||||
|
vld2.32 {q13}, [r3, :128]! @tag0
|
||||||
|
vld2.32 {q12}, [r4, :128]! @tag1
|
||||||
|
vld2.32 {q0}, [r7, :128]! @tag4
|
||||||
|
vsub.f32 q11, q13, q12
|
||||||
|
vld2.32 {q8}, [r6, :128]! @tag3
|
||||||
|
vadd.f32 q12, q13, q12
|
||||||
|
vsub.f32 q10, q9, q8
|
||||||
|
vadd.f32 q8, q9, q8
|
||||||
|
vadd.f32 q9, q12, q8
|
||||||
|
vadd.f32 d9, d23, d20 @
|
||||||
|
vsub.f32 d11, d23, d20 @
|
||||||
|
vsub.f32 q8, q12, q8
|
||||||
|
vsub.f32 d8, d22, d21 @
|
||||||
|
vadd.f32 d10, d22, d21 @
|
||||||
|
ldr r2, [r12], #4
|
||||||
|
vld1.32 {d20, d21}, [r11, :128]
|
||||||
|
ldr lr, [r12], #4
|
||||||
|
vtrn.32 q9, q4
|
||||||
|
add r2, r0, r2, lsl #2
|
||||||
|
vtrn.32 q8, q5
|
||||||
|
add lr, r0, lr, lsl #2
|
||||||
|
vswp d9,d10
|
||||||
|
vst1.32 {d8,d9,d10,d11}, [lr, :128]!
|
||||||
|
vld2.32 {q13}, [r10, :128]! @tag7
|
||||||
|
vld2.32 {q15}, [r9, :128]! @tag6
|
||||||
|
vld2.32 {q11}, [r8, :128]! @tag5
|
||||||
|
vsub.f32 q14, q15, q13
|
||||||
|
vsub.f32 q12, q0, q11
|
||||||
|
vadd.f32 q11, q0, q11
|
||||||
|
vadd.f32 q13, q15, q13
|
||||||
|
vadd.f32 d13, d29, d24 @
|
||||||
|
vadd.f32 q15, q13, q11
|
||||||
|
vsub.f32 d12, d28, d25 @
|
||||||
|
vsub.f32 d15, d29, d24 @
|
||||||
|
vadd.f32 d14, d28, d25 @
|
||||||
|
vtrn.32 q15, q6
|
||||||
|
vsub.f32 q15, q13, q11
|
||||||
|
vtrn.32 q15, q7
|
||||||
|
vswp d13, d14
|
||||||
|
vst1.32 {d12,d13,d14,d15}, [lr, :128]!
|
||||||
|
vtrn.32 q13, q14
|
||||||
|
vtrn.32 q11, q12
|
||||||
|
vmul.f32 d24, d26, d21
|
||||||
|
vmul.f32 d28, d27, d20
|
||||||
|
vmul.f32 d25, d26, d20
|
||||||
|
vmul.f32 d26, d27, d21
|
||||||
|
vmul.f32 d27, d22, d21
|
||||||
|
vmul.f32 d30, d23, d20
|
||||||
|
vmul.f32 d29, d23, d21
|
||||||
|
vmul.f32 d22, d22, d20
|
||||||
|
vsub.f32 d21, d28, d24
|
||||||
|
vadd.f32 d20, d26, d25
|
||||||
|
vadd.f32 d25, d30, d27
|
||||||
|
vsub.f32 d24, d22, d29
|
||||||
|
vadd.f32 q11, q12, q10
|
||||||
|
vsub.f32 q10, q12, q10
|
||||||
|
vadd.f32 q0, q9, q11
|
||||||
|
vsub.f32 q2, q9, q11
|
||||||
|
vadd.f32 d3, d17, d20 @
|
||||||
|
vsub.f32 d7, d17, d20 @
|
||||||
|
vsub.f32 d2, d16, d21 @
|
||||||
|
vadd.f32 d6, d16, d21 @
|
||||||
|
vswp d1, d2
|
||||||
|
vswp d5, d6
|
||||||
|
vstmia r2!, {q0-q3}
|
||||||
|
|
||||||
|
|
||||||
|
@ assumes r0 = out
|
||||||
|
@
|
||||||
|
@ r12 = offsets
|
||||||
|
@ r3-r10 = data pointers
|
||||||
|
@ r11 = addr of twiddle
|
||||||
|
@ r2 & lr = temps
|
||||||
|
.align 4
|
||||||
|
#ifdef __APPLE__
|
||||||
|
.globl _neon_oe
|
||||||
|
_neon_oe:
|
||||||
|
#else
|
||||||
|
.globl neon_oe
|
||||||
|
neon_oe:
|
||||||
|
#endif
|
||||||
|
vld1.32 {q8}, [r5, :128]!
|
||||||
|
vld1.32 {q10}, [r6, :128]!
|
||||||
|
vld2.32 {q11}, [r4, :128]!
|
||||||
|
vld2.32 {q13}, [r3, :128]!
|
||||||
|
vld2.32 {q15}, [r10, :128]!
|
||||||
|
vorr d25, d17, d17
|
||||||
|
vorr d24, d20, d20
|
||||||
|
vorr d20, d16, d16
|
||||||
|
vsub.f32 q9, q13, q11
|
||||||
|
vadd.f32 q11, q13, q11
|
||||||
|
ldr r2, [r12], #4
|
||||||
|
vtrn.32 d24, d25
|
||||||
|
ldr lr, [r12], #4
|
||||||
|
vtrn.32 d20, d21
|
||||||
|
add r2, r0, r2, lsl #2
|
||||||
|
vsub.f32 q8, q10, q12
|
||||||
|
add lr, r0, lr, lsl #2
|
||||||
|
vadd.f32 q10, q10, q12
|
||||||
|
vadd.f32 q0, q11, q10
|
||||||
|
vadd.f32 d25, d19, d16 @
|
||||||
|
vsub.f32 d27, d19, d16 @
|
||||||
|
vsub.f32 q1, q11, q10
|
||||||
|
vsub.f32 d24, d18, d17 @
|
||||||
|
vadd.f32 d26, d18, d17 @
|
||||||
|
vtrn.32 q0, q12
|
||||||
|
vtrn.32 q1, q13
|
||||||
|
vld1.32 {d24, d25}, [r11, :128]
|
||||||
|
vswp d1, d2
|
||||||
|
vst1.32 {q0, q1}, [r2, :128]!
|
||||||
|
vld2.32 {q0}, [r9, :128]!
|
||||||
|
vadd.f32 q1, q0, q15
|
||||||
|
vld2.32 {q13}, [r8, :128]!
|
||||||
|
vld2.32 {q14}, [r7, :128]!
|
||||||
|
vsub.f32 q15, q0, q15
|
||||||
|
vsub.f32 q0, q14, q13
|
||||||
|
vadd.f32 q3, q14, q13
|
||||||
|
vadd.f32 q2, q3, q1
|
||||||
|
vadd.f32 d29, d1, d30 @
|
||||||
|
vsub.f32 d27, d1, d30 @
|
||||||
|
vsub.f32 q3, q3, q1
|
||||||
|
vsub.f32 d28, d0, d31 @
|
||||||
|
vadd.f32 d26, d0, d31 @
|
||||||
|
vtrn.32 q2, q14
|
||||||
|
vtrn.32 q3, q13
|
||||||
|
vswp d5, d6
|
||||||
|
vst1.32 {q2, q3}, [r2, :128]!
|
||||||
|
vtrn.32 q11, q9
|
||||||
|
vtrn.32 q10, q8
|
||||||
|
vmul.f32 d20, d18, d25
|
||||||
|
vmul.f32 d22, d19, d24
|
||||||
|
vmul.f32 d21, d19, d25
|
||||||
|
vmul.f32 d18, d18, d24
|
||||||
|
vmul.f32 d19, d16, d25
|
||||||
|
vmul.f32 d30, d17, d24
|
||||||
|
vmul.f32 d23, d16, d24
|
||||||
|
vmul.f32 d24, d17, d25
|
||||||
|
vadd.f32 d17, d22, d20
|
||||||
|
vsub.f32 d16, d18, d21
|
||||||
|
vsub.f32 d21, d30, d19
|
||||||
|
vadd.f32 d20, d24, d23
|
||||||
|
vadd.f32 q9, q8, q10
|
||||||
|
vsub.f32 q8, q8, q10
|
||||||
|
vadd.f32 q4, q14, q9
|
||||||
|
vsub.f32 q6, q14, q9
|
||||||
|
vadd.f32 d11, d27, d16 @
|
||||||
|
vsub.f32 d15, d27, d16 @
|
||||||
|
vsub.f32 d10, d26, d17 @
|
||||||
|
vadd.f32 d14, d26, d17 @
|
||||||
|
vswp d9, d10
|
||||||
|
vswp d13, d14
|
||||||
|
vstmia lr!, {q4-q7}
|
||||||
|
|
||||||
|
|
||||||
|
.align 4
|
||||||
|
#ifdef __APPLE__
|
||||||
|
.globl _neon_end
|
||||||
|
_neon_end:
|
||||||
|
#else
|
||||||
|
.globl neon_end
|
||||||
|
neon_end:
|
||||||
|
#endif
|
||||||
|
bx lr
|
||||||
|
|
||||||
|
|
||||||
|
.align 4
|
||||||
|
#ifdef __APPLE__
|
||||||
|
.globl _neon_transpose
|
||||||
|
_neon_transpose:
|
||||||
|
#else
|
||||||
|
.globl neon_transpose
|
||||||
|
neon_transpose:
|
||||||
|
#endif
|
||||||
|
push {r4-r8}
|
||||||
|
@ vpush {q8-q9}
|
||||||
|
mov r5, r3
|
||||||
|
_neon_transpose_col:
|
||||||
|
mov r7, r1
|
||||||
|
add r8, r1, r3, lsl #3
|
||||||
|
mov r4, r2
|
||||||
|
add r6, r0, r2, lsl #3
|
||||||
|
_neon_transpose_row:
|
||||||
|
vld1.32 {q8,q9}, [r0, :128]!
|
||||||
|
@ vld1.32 {q10,q11}, [r0, :128]!
|
||||||
|
vld1.32 {q12,q13}, [r6, :128]!
|
||||||
|
@ vld1.32 {q14,q15}, [r6, :128]!
|
||||||
|
sub r4, r4, #4
|
||||||
|
cmp r4, #0
|
||||||
|
vswp d17,d24
|
||||||
|
vswp d19,d26
|
||||||
|
vswp d21,d28
|
||||||
|
vswp d23,d30
|
||||||
|
vst1.32 {q8}, [r7, :128]
|
||||||
|
vst1.32 {q12}, [r8, :128]
|
||||||
|
add r7, r7, r3, lsl #4
|
||||||
|
add r8, r8, r3, lsl #4
|
||||||
|
vst1.32 {q9}, [r7, :128]
|
||||||
|
vst1.32 {q13}, [r8, :128]
|
||||||
|
add r7, r7, r3, lsl #4
|
||||||
|
add r8, r8, r3, lsl #4
|
||||||
|
@@vst1.32 {q10}, [r7, :128]
|
||||||
|
@@vst1.32 {q14}, [r8, :128]
|
||||||
|
@@add r7, r7, r3, lsl #4
|
||||||
|
@@add r8, r8, r3, lsl #4
|
||||||
|
@@vst1.32 {q11}, [r7, :128]
|
||||||
|
@@vst1.32 {q15}, [r8, :128]
|
||||||
|
@@add r7, r7, r3, lsl #4
|
||||||
|
@@add r8, r8, r3, lsl #4
|
||||||
|
bne _neon_transpose_row
|
||||||
|
sub r5, r5, #2
|
||||||
|
cmp r5, #0
|
||||||
|
add r0, r0, r2, lsl #3
|
||||||
|
add r1, r1, #16
|
||||||
|
bne _neon_transpose_col
|
||||||
|
@ vpop {q8-q9}
|
||||||
|
pop {r4-r8}
|
||||||
|
bx lr
|
||||||
|
|
||||||
|
.align 4
|
||||||
|
#ifdef __APPLE__
|
||||||
|
.globl _neon_transpose_to_buf
|
||||||
|
_neon_transpose_to_buf:
|
||||||
|
#else
|
||||||
|
.globl neon_transpose_to_buf
|
||||||
|
neon_transpose_to_buf:
|
||||||
|
#endif
|
||||||
|
push {r4-r10}
|
||||||
|
mov r5, #8
|
||||||
|
_neon_transpose_to_buf_col:
|
||||||
|
mov r4, #8
|
||||||
|
add r6, r0, r2, lsl #3
|
||||||
|
mov r7, r1
|
||||||
|
add r8, r1, #64
|
||||||
|
add r9, r1, #128
|
||||||
|
add r10, r1, #192
|
||||||
|
_neon_transpose_to_buf_row:
|
||||||
|
vld1.32 {q8,q9}, [r0, :128]!
|
||||||
|
vld1.32 {q12,q13}, [r6, :128]!
|
||||||
|
sub r4, r4, #4
|
||||||
|
cmp r4, #0
|
||||||
|
vswp d17,d24
|
||||||
|
vswp d19,d26
|
||||||
|
vst1.32 {q8}, [r7, :128]
|
||||||
|
vst1.32 {q12}, [r8, :128]
|
||||||
|
vst1.32 {q9}, [r9, :128]
|
||||||
|
vst1.32 {q13}, [r10, :128]
|
||||||
|
add r7, r7, #256
|
||||||
|
add r8, r8, #256
|
||||||
|
add r9, r9, #256
|
||||||
|
add r10, r10, #256
|
||||||
|
bne _neon_transpose_to_buf_row
|
||||||
|
sub r5, r5, #2
|
||||||
|
cmp r5, #0
|
||||||
|
sub r0, r0, #64
|
||||||
|
add r0, r0, r2, lsl #4
|
||||||
|
add r1, r1, #16
|
||||||
|
bne _neon_transpose_to_buf_col
|
||||||
|
pop {r4-r10}
|
||||||
|
bx lr
|
File diff suppressed because it is too large
Load Diff
@ -0,0 +1,956 @@
|
|||||||
|
/*
|
||||||
|
|
||||||
|
This file is part of FFTS -- The Fastest Fourier Transform in the South
|
||||||
|
|
||||||
|
Copyright (c) 2012, Anthony M. Blake <amb@anthonix.com>
|
||||||
|
Copyright (c) 2012, The University of Waikato
|
||||||
|
|
||||||
|
All rights reserved.
|
||||||
|
|
||||||
|
Redistribution and use in source and binary forms, with or without
|
||||||
|
modification, are permitted provided that the following conditions are met:
|
||||||
|
* Redistributions of source code must retain the above copyright
|
||||||
|
notice, this list of conditions and the following disclaimer.
|
||||||
|
* Redistributions in binary form must reproduce the above copyright
|
||||||
|
notice, this list of conditions and the following disclaimer in the
|
||||||
|
documentation and/or other materials provided with the distribution.
|
||||||
|
* Neither the name of the organization nor the
|
||||||
|
names of its contributors may be used to endorse or promote products
|
||||||
|
derived from this software without specific prior written permission.
|
||||||
|
|
||||||
|
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
||||||
|
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||||
|
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||||
|
DISCLAIMED. IN NO EVENT SHALL ANTHONY M. BLAKE BE LIABLE FOR ANY
|
||||||
|
DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
|
||||||
|
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
||||||
|
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
|
||||||
|
ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||||
|
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||||
|
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
|
||||||
|
*/
|
||||||
|
|
||||||
|
.align 4
|
||||||
|
#ifdef __APPLE__
|
||||||
|
.globl _neon_static_e_f
|
||||||
|
_neon_static_e_f:
|
||||||
|
#else
|
||||||
|
.globl neon_static_e_f
|
||||||
|
neon_static_e_f:
|
||||||
|
#endif
|
||||||
|
push {r4, r5, r6, r7, r8, r9, r10, r11, lr}
|
||||||
|
vstmdb sp!, {d8-d15}
|
||||||
|
ldr lr, [r0, #40] @ this is p->N
|
||||||
|
add r3, r1, #0
|
||||||
|
add r7, r1, lr
|
||||||
|
add r5, r7, lr
|
||||||
|
add r10, r5, lr
|
||||||
|
add r4, r10, lr
|
||||||
|
add r8, r4, lr
|
||||||
|
add r6, r8, lr
|
||||||
|
add r9, r6, lr
|
||||||
|
ldr r12, [r0]
|
||||||
|
add r1, r0, #0
|
||||||
|
add r0, r2, #0
|
||||||
|
ldr r2, [r1, #16] @ this is p->ee_ws
|
||||||
|
ldr r11, [r1, #28] @ this is p->i0
|
||||||
|
|
||||||
|
vld1.32 {d16, d17}, [r2, :128]
|
||||||
|
_neon_ee_loop:
|
||||||
|
vld2.32 {q15}, [r10, :128]!
|
||||||
|
vld2.32 {q13}, [r8, :128]!
|
||||||
|
vld2.32 {q14}, [r7, :128]!
|
||||||
|
vld2.32 {q9}, [r4, :128]!
|
||||||
|
vld2.32 {q10}, [r3, :128]!
|
||||||
|
vld2.32 {q11}, [r6, :128]!
|
||||||
|
vld2.32 {q12}, [r5, :128]!
|
||||||
|
vsub.f32 q1, q14, q13
|
||||||
|
vld2.32 {q0}, [r9, :128]!
|
||||||
|
subs r11, r11, #1
|
||||||
|
vsub.f32 q2, q0, q15
|
||||||
|
vadd.f32 q0, q0, q15
|
||||||
|
vmul.f32 d10, d2, d17
|
||||||
|
vmul.f32 d11, d3, d16
|
||||||
|
vmul.f32 d12, d3, d17
|
||||||
|
vmul.f32 d6, d4, d17
|
||||||
|
vmul.f32 d7, d5, d16
|
||||||
|
vmul.f32 d8, d4, d16
|
||||||
|
vmul.f32 d9, d5, d17
|
||||||
|
vmul.f32 d13, d2, d16
|
||||||
|
vsub.f32 d7, d7, d6
|
||||||
|
vadd.f32 d11, d11, d10
|
||||||
|
vsub.f32 q1, q12, q11
|
||||||
|
vsub.f32 q2, q10, q9
|
||||||
|
vadd.f32 d6, d9, d8
|
||||||
|
vadd.f32 q4, q14, q13
|
||||||
|
vadd.f32 q11, q12, q11
|
||||||
|
vadd.f32 q12, q10, q9
|
||||||
|
vsub.f32 d10, d13, d12
|
||||||
|
vsub.f32 q7, q4, q0
|
||||||
|
vsub.f32 q9, q12, q11
|
||||||
|
vsub.f32 q13, q5, q3
|
||||||
|
vsub.f32 d29, d5, d2 @
|
||||||
|
vadd.f32 q5, q5, q3
|
||||||
|
vadd.f32 q10, q4, q0
|
||||||
|
vadd.f32 q11, q12, q11
|
||||||
|
vadd.f32 d31, d5, d2 @
|
||||||
|
vadd.f32 d28, d4, d3 @
|
||||||
|
vsub.f32 d30, d4, d3 @
|
||||||
|
vsub.f32 d5, d19, d14 @
|
||||||
|
vsub.f32 d7, d31, d26 @
|
||||||
|
vadd.f32 q1, q14, q5
|
||||||
|
vadd.f32 q0, q11, q10
|
||||||
|
vadd.f32 d6, d30, d27 @
|
||||||
|
vadd.f32 d4, d18, d15 @
|
||||||
|
vadd.f32 d13, d19, d14 @
|
||||||
|
vsub.f32 d12, d18, d15 @
|
||||||
|
vadd.f32 d15, d31, d26 @
|
||||||
|
ldr r2, [r12], #4
|
||||||
|
vtrn.32 q1, q3
|
||||||
|
ldr lr, [r12], #4
|
||||||
|
vtrn.32 q0, q2
|
||||||
|
add r2, r0, r2, lsl #2
|
||||||
|
vsub.f32 q4, q11, q10
|
||||||
|
add lr, r0, lr, lsl #2
|
||||||
|
vsub.f32 q5, q14, q5
|
||||||
|
vsub.f32 d14, d30, d27 @
|
||||||
|
vst2.32 {q0,q1}, [r2, :128]!
|
||||||
|
vst2.32 {q2,q3}, [lr, :128]!
|
||||||
|
vtrn.32 q4, q6
|
||||||
|
vtrn.32 q5, q7
|
||||||
|
vst2.32 {q4,q5}, [r2, :128]!
|
||||||
|
vst2.32 {q6,q7}, [lr, :128]!
|
||||||
|
bne _neon_ee_loop
|
||||||
|
|
||||||
|
ldr r11, [r1, #12]
|
||||||
|
vld2.32 {q9}, [r5, :128]! @tag2
|
||||||
|
vld2.32 {q13}, [r3, :128]! @tag0
|
||||||
|
vld2.32 {q12}, [r4, :128]! @tag1
|
||||||
|
vld2.32 {q0}, [r7, :128]! @tag4
|
||||||
|
vsub.f32 q11, q13, q12
|
||||||
|
vld2.32 {q8}, [r6, :128]! @tag3
|
||||||
|
vadd.f32 q12, q13, q12
|
||||||
|
vsub.f32 q10, q9, q8
|
||||||
|
vadd.f32 q8, q9, q8
|
||||||
|
vadd.f32 q9, q12, q8
|
||||||
|
vsub.f32 d9, d23, d20 @
|
||||||
|
vadd.f32 d11, d23, d20 @
|
||||||
|
vsub.f32 q8, q12, q8
|
||||||
|
vadd.f32 d8, d22, d21 @
|
||||||
|
vsub.f32 d10, d22, d21 @
|
||||||
|
ldr r2, [r12], #4
|
||||||
|
vld1.32 {d20, d21}, [r11, :128]
|
||||||
|
ldr lr, [r12], #4
|
||||||
|
vtrn.32 q9, q4
|
||||||
|
add r2, r0, r2, lsl #2
|
||||||
|
vtrn.32 q8, q5
|
||||||
|
add lr, r0, lr, lsl #2
|
||||||
|
vswp d9,d10
|
||||||
|
vst1.32 {d8,d9,d10,d11}, [lr, :128]!
|
||||||
|
vld2.32 {q13}, [r10, :128]! @tag7
|
||||||
|
vld2.32 {q15}, [r9, :128]! @tag6
|
||||||
|
vld2.32 {q11}, [r8, :128]! @tag5
|
||||||
|
vsub.f32 q14, q15, q13
|
||||||
|
vsub.f32 q12, q0, q11
|
||||||
|
vadd.f32 q11, q0, q11
|
||||||
|
vadd.f32 q13, q15, q13
|
||||||
|
vsub.f32 d13, d29, d24 @
|
||||||
|
vadd.f32 q15, q13, q11
|
||||||
|
vadd.f32 d12, d28, d25 @
|
||||||
|
vadd.f32 d15, d29, d24 @
|
||||||
|
vsub.f32 d14, d28, d25 @
|
||||||
|
vtrn.32 q15, q6
|
||||||
|
vsub.f32 q15, q13, q11
|
||||||
|
vtrn.32 q15, q7
|
||||||
|
vswp d13, d14
|
||||||
|
vst1.32 {d12,d13,d14,d15}, [lr, :128]!
|
||||||
|
vtrn.32 q13, q14
|
||||||
|
vtrn.32 q11, q12
|
||||||
|
vmul.f32 d24, d26, d21
|
||||||
|
vmul.f32 d28, d27, d20
|
||||||
|
vmul.f32 d25, d26, d20
|
||||||
|
vmul.f32 d26, d27, d21
|
||||||
|
vmul.f32 d27, d22, d21
|
||||||
|
vmul.f32 d30, d23, d20
|
||||||
|
vmul.f32 d29, d23, d21
|
||||||
|
vmul.f32 d22, d22, d20
|
||||||
|
vsub.f32 d21, d28, d24
|
||||||
|
vadd.f32 d20, d26, d25
|
||||||
|
vadd.f32 d25, d30, d27
|
||||||
|
vsub.f32 d24, d22, d29
|
||||||
|
vadd.f32 q11, q12, q10
|
||||||
|
vsub.f32 q10, q12, q10
|
||||||
|
vadd.f32 q0, q9, q11
|
||||||
|
vsub.f32 q2, q9, q11
|
||||||
|
vsub.f32 d3, d17, d20 @
|
||||||
|
vadd.f32 d7, d17, d20 @
|
||||||
|
vadd.f32 d2, d16, d21 @
|
||||||
|
vsub.f32 d6, d16, d21 @
|
||||||
|
vswp d1, d2
|
||||||
|
vswp d5, d6
|
||||||
|
vstmia r2!, {q0-q3}
|
||||||
|
|
||||||
|
add r2, r7, #0
|
||||||
|
add r7, r9, #0
|
||||||
|
add r9, r2, #0
|
||||||
|
add r2, r8, #0
|
||||||
|
add r8, r10, #0
|
||||||
|
add r10, r2, #0
|
||||||
|
ldr r11, [r1, #32] @ this is p->i1
|
||||||
|
cmp r11, #0
|
||||||
|
beq _neon_oo_loop_exit
|
||||||
|
_neon_oo_loop:
|
||||||
|
vld2.32 {q8}, [r6, :128]!
|
||||||
|
vld2.32 {q9}, [r5, :128]!
|
||||||
|
vld2.32 {q10}, [r4, :128]!
|
||||||
|
vld2.32 {q13}, [r3, :128]!
|
||||||
|
vadd.f32 q11, q9, q8
|
||||||
|
vsub.f32 q8, q9, q8
|
||||||
|
vsub.f32 q9, q13, q10
|
||||||
|
vadd.f32 q12, q13, q10
|
||||||
|
subs r11, r11, #1
|
||||||
|
vld2.32 {q10}, [r7, :128]!
|
||||||
|
vld2.32 {q13}, [r9, :128]!
|
||||||
|
vsub.f32 q2, q12, q11
|
||||||
|
vadd.f32 d7, d19, d16 @
|
||||||
|
vsub.f32 d3, d19, d16 @
|
||||||
|
vsub.f32 d6, d18, d17 @
|
||||||
|
vadd.f32 d2, d18, d17 @
|
||||||
|
vld2.32 {q9}, [r8, :128]!
|
||||||
|
vld2.32 {q8}, [r10, :128]!
|
||||||
|
vadd.f32 q0, q12, q11
|
||||||
|
vadd.f32 q11, q13, q8
|
||||||
|
vadd.f32 q12, q10, q9
|
||||||
|
vsub.f32 q8, q13, q8
|
||||||
|
vsub.f32 q9, q10, q9
|
||||||
|
vsub.f32 q6, q12, q11
|
||||||
|
vadd.f32 q4, q12, q11
|
||||||
|
vtrn.32 q0, q2
|
||||||
|
ldr r2, [r12], #4
|
||||||
|
vadd.f32 d15, d19, d16 @
|
||||||
|
ldr lr, [r12], #4
|
||||||
|
vsub.f32 d11, d19, d16 @
|
||||||
|
vsub.f32 d14, d18, d17 @
|
||||||
|
vadd.f32 d10, d18, d17 @
|
||||||
|
add r2, r0, r2, lsl #2
|
||||||
|
vtrn.32 q1, q3
|
||||||
|
add lr, r0, lr, lsl #2
|
||||||
|
vst2.32 {q0,q1}, [r2, :128]!
|
||||||
|
vst2.32 {q2,q3}, [lr, :128]!
|
||||||
|
vtrn.32 q4, q6
|
||||||
|
vtrn.32 q5, q7
|
||||||
|
vst2.32 {q4,q5}, [r2, :128]!
|
||||||
|
vst2.32 {q6,q7}, [lr, :128]!
|
||||||
|
bne _neon_oo_loop
|
||||||
|
_neon_oo_loop_exit:
|
||||||
|
|
||||||
|
|
||||||
|
add r2, r3, #0
|
||||||
|
add r3, r7, #0
|
||||||
|
add r7, r2, #0
|
||||||
|
add r2, r4, #0
|
||||||
|
add r4, r8, #0
|
||||||
|
add r8, r2, #0
|
||||||
|
add r2, r5, #0
|
||||||
|
add r5, r9, #0
|
||||||
|
add r9, r2, #0
|
||||||
|
add r2, r6, #0
|
||||||
|
add r6, r10, #0
|
||||||
|
add r10, r2, #0
|
||||||
|
add r2, r9, #0
|
||||||
|
add r9, r10, #0
|
||||||
|
add r10, r2, #0
|
||||||
|
ldr r2, [r1, #16]
|
||||||
|
ldr r11, [r1, #32] @ this is p->i1
|
||||||
|
cmp r11, #0
|
||||||
|
beq _neon_ee_loop2_exit
|
||||||
|
|
||||||
|
vld1.32 {d16, d17}, [r2, :128]
|
||||||
|
_neon_ee_loop2:
|
||||||
|
vld2.32 {q15}, [r10, :128]!
|
||||||
|
vld2.32 {q13}, [r8, :128]!
|
||||||
|
vld2.32 {q14}, [r7, :128]!
|
||||||
|
vld2.32 {q9}, [r4, :128]!
|
||||||
|
vld2.32 {q10}, [r3, :128]!
|
||||||
|
vld2.32 {q11}, [r6, :128]!
|
||||||
|
vld2.32 {q12}, [r5, :128]!
|
||||||
|
vsub.f32 q1, q14, q13
|
||||||
|
vld2.32 {q0}, [r9, :128]!
|
||||||
|
subs r11, r11, #1
|
||||||
|
vsub.f32 q2, q0, q15
|
||||||
|
vadd.f32 q0, q0, q15
|
||||||
|
vmul.f32 d10, d2, d17
|
||||||
|
vmul.f32 d11, d3, d16
|
||||||
|
vmul.f32 d12, d3, d17
|
||||||
|
vmul.f32 d6, d4, d17
|
||||||
|
vmul.f32 d7, d5, d16
|
||||||
|
vmul.f32 d8, d4, d16
|
||||||
|
vmul.f32 d9, d5, d17
|
||||||
|
vmul.f32 d13, d2, d16
|
||||||
|
vsub.f32 d7, d7, d6
|
||||||
|
vadd.f32 d11, d11, d10
|
||||||
|
vsub.f32 q1, q12, q11
|
||||||
|
vsub.f32 q2, q10, q9
|
||||||
|
vadd.f32 d6, d9, d8
|
||||||
|
vadd.f32 q4, q14, q13
|
||||||
|
vadd.f32 q11, q12, q11
|
||||||
|
vadd.f32 q12, q10, q9
|
||||||
|
vsub.f32 d10, d13, d12
|
||||||
|
vsub.f32 q7, q4, q0
|
||||||
|
vsub.f32 q9, q12, q11
|
||||||
|
vsub.f32 q13, q5, q3
|
||||||
|
vsub.f32 d29, d5, d2 @
|
||||||
|
vadd.f32 q5, q5, q3
|
||||||
|
vadd.f32 q10, q4, q0
|
||||||
|
vadd.f32 q11, q12, q11
|
||||||
|
vadd.f32 d31, d5, d2 @
|
||||||
|
vadd.f32 d28, d4, d3 @
|
||||||
|
vsub.f32 d30, d4, d3 @
|
||||||
|
vsub.f32 d5, d19, d14 @
|
||||||
|
vsub.f32 d7, d31, d26 @
|
||||||
|
vadd.f32 q1, q14, q5
|
||||||
|
vadd.f32 q0, q11, q10
|
||||||
|
vadd.f32 d6, d30, d27 @
|
||||||
|
vadd.f32 d4, d18, d15 @
|
||||||
|
vadd.f32 d13, d19, d14 @
|
||||||
|
vsub.f32 d12, d18, d15 @
|
||||||
|
vadd.f32 d15, d31, d26 @
|
||||||
|
ldr r2, [r12], #4
|
||||||
|
vtrn.32 q1, q3
|
||||||
|
ldr lr, [r12], #4
|
||||||
|
vtrn.32 q0, q2
|
||||||
|
add r2, r0, r2, lsl #2
|
||||||
|
vsub.f32 q4, q11, q10
|
||||||
|
add lr, r0, lr, lsl #2
|
||||||
|
vsub.f32 q5, q14, q5
|
||||||
|
vsub.f32 d14, d30, d27 @
|
||||||
|
vst2.32 {q0,q1}, [r2, :128]!
|
||||||
|
vst2.32 {q2,q3}, [lr, :128]!
|
||||||
|
vtrn.32 q4, q6
|
||||||
|
vtrn.32 q5, q7
|
||||||
|
vst2.32 {q4,q5}, [r2, :128]!
|
||||||
|
vst2.32 {q6,q7}, [lr, :128]!
|
||||||
|
bne _neon_ee_loop2
|
||||||
|
_neon_ee_loop2_exit:
|
||||||
|
|
||||||
|
vldmia sp!, {d8-d15}
|
||||||
|
pop {r4, r5, r6, r7, r8, r9, r10, r11, pc}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
.align 4
|
||||||
|
#ifdef __APPLE__
|
||||||
|
.globl _neon_static_o_f
|
||||||
|
_neon_static_o_f:
|
||||||
|
#else
|
||||||
|
.globl neon_static_o_f
|
||||||
|
neon_static_o_f:
|
||||||
|
#endif
|
||||||
|
push {r4, r5, r6, r7, r8, r9, r10, r11, lr}
|
||||||
|
vstmdb sp!, {d8-d15}
|
||||||
|
ldr lr, [r0, #40] @ this is p->N
|
||||||
|
add r3, r1, #0
|
||||||
|
add r7, r1, lr
|
||||||
|
add r5, r7, lr
|
||||||
|
add r10, r5, lr
|
||||||
|
add r4, r10, lr
|
||||||
|
add r8, r4, lr
|
||||||
|
add r6, r8, lr
|
||||||
|
add r9, r6, lr
|
||||||
|
ldr r12, [r0]
|
||||||
|
add r1, r0, #0
|
||||||
|
add r0, r2, #0
|
||||||
|
ldr r2, [r1, #16] @ this is p->ee_ws
|
||||||
|
ldr r11, [r1, #28] @ this is p->i0
|
||||||
|
|
||||||
|
vld1.32 {d16, d17}, [r2, :128]
|
||||||
|
_neon_ee_o_loop:
|
||||||
|
vld2.32 {q15}, [r10, :128]!
|
||||||
|
vld2.32 {q13}, [r8, :128]!
|
||||||
|
vld2.32 {q14}, [r7, :128]!
|
||||||
|
vld2.32 {q9}, [r4, :128]!
|
||||||
|
vld2.32 {q10}, [r3, :128]!
|
||||||
|
vld2.32 {q11}, [r6, :128]!
|
||||||
|
vld2.32 {q12}, [r5, :128]!
|
||||||
|
vsub.f32 q1, q14, q13
|
||||||
|
vld2.32 {q0}, [r9, :128]!
|
||||||
|
subs r11, r11, #1
|
||||||
|
vsub.f32 q2, q0, q15
|
||||||
|
vadd.f32 q0, q0, q15
|
||||||
|
vmul.f32 d10, d2, d17
|
||||||
|
vmul.f32 d11, d3, d16
|
||||||
|
vmul.f32 d12, d3, d17
|
||||||
|
vmul.f32 d6, d4, d17
|
||||||
|
vmul.f32 d7, d5, d16
|
||||||
|
vmul.f32 d8, d4, d16
|
||||||
|
vmul.f32 d9, d5, d17
|
||||||
|
vmul.f32 d13, d2, d16
|
||||||
|
vsub.f32 d7, d7, d6
|
||||||
|
vadd.f32 d11, d11, d10
|
||||||
|
vsub.f32 q1, q12, q11
|
||||||
|
vsub.f32 q2, q10, q9
|
||||||
|
vadd.f32 d6, d9, d8
|
||||||
|
vadd.f32 q4, q14, q13
|
||||||
|
vadd.f32 q11, q12, q11
|
||||||
|
vadd.f32 q12, q10, q9
|
||||||
|
vsub.f32 d10, d13, d12
|
||||||
|
vsub.f32 q7, q4, q0
|
||||||
|
vsub.f32 q9, q12, q11
|
||||||
|
vsub.f32 q13, q5, q3
|
||||||
|
vsub.f32 d29, d5, d2 @
|
||||||
|
vadd.f32 q5, q5, q3
|
||||||
|
vadd.f32 q10, q4, q0
|
||||||
|
vadd.f32 q11, q12, q11
|
||||||
|
vadd.f32 d31, d5, d2 @
|
||||||
|
vadd.f32 d28, d4, d3 @
|
||||||
|
vsub.f32 d30, d4, d3 @
|
||||||
|
vsub.f32 d5, d19, d14 @
|
||||||
|
vsub.f32 d7, d31, d26 @
|
||||||
|
vadd.f32 q1, q14, q5
|
||||||
|
vadd.f32 q0, q11, q10
|
||||||
|
vadd.f32 d6, d30, d27 @
|
||||||
|
vadd.f32 d4, d18, d15 @
|
||||||
|
vadd.f32 d13, d19, d14 @
|
||||||
|
vsub.f32 d12, d18, d15 @
|
||||||
|
vadd.f32 d15, d31, d26 @
|
||||||
|
ldr r2, [r12], #4
|
||||||
|
vtrn.32 q1, q3
|
||||||
|
ldr lr, [r12], #4
|
||||||
|
vtrn.32 q0, q2
|
||||||
|
add r2, r0, r2, lsl #2
|
||||||
|
vsub.f32 q4, q11, q10
|
||||||
|
add lr, r0, lr, lsl #2
|
||||||
|
vsub.f32 q5, q14, q5
|
||||||
|
vsub.f32 d14, d30, d27 @
|
||||||
|
vst2.32 {q0,q1}, [r2, :128]!
|
||||||
|
vst2.32 {q2,q3}, [lr, :128]!
|
||||||
|
vtrn.32 q4, q6
|
||||||
|
vtrn.32 q5, q7
|
||||||
|
vst2.32 {q4,q5}, [r2, :128]!
|
||||||
|
vst2.32 {q6,q7}, [lr, :128]!
|
||||||
|
bne _neon_ee_o_loop
|
||||||
|
|
||||||
|
add r2, r7, #0
|
||||||
|
add r7, r9, #0
|
||||||
|
add r9, r2, #0
|
||||||
|
add r2, r8, #0
|
||||||
|
add r8, r10, #0
|
||||||
|
add r10, r2, #0
|
||||||
|
ldr r11, [r1, #32] @ this is p->i1
|
||||||
|
cmp r11, #0
|
||||||
|
beq _neon_oo_o_loop_exit
|
||||||
|
_neon_oo_o_loop:
|
||||||
|
vld2.32 {q8}, [r6, :128]!
|
||||||
|
vld2.32 {q9}, [r5, :128]!
|
||||||
|
vld2.32 {q10}, [r4, :128]!
|
||||||
|
vld2.32 {q13}, [r3, :128]!
|
||||||
|
vadd.f32 q11, q9, q8
|
||||||
|
vsub.f32 q8, q9, q8
|
||||||
|
vsub.f32 q9, q13, q10
|
||||||
|
vadd.f32 q12, q13, q10
|
||||||
|
subs r11, r11, #1
|
||||||
|
vld2.32 {q10}, [r7, :128]!
|
||||||
|
vld2.32 {q13}, [r9, :128]!
|
||||||
|
vsub.f32 q2, q12, q11
|
||||||
|
vadd.f32 d7, d19, d16 @
|
||||||
|
vsub.f32 d3, d19, d16 @
|
||||||
|
vsub.f32 d6, d18, d17 @
|
||||||
|
vadd.f32 d2, d18, d17 @
|
||||||
|
vld2.32 {q9}, [r8, :128]!
|
||||||
|
vld2.32 {q8}, [r10, :128]!
|
||||||
|
vadd.f32 q0, q12, q11
|
||||||
|
vadd.f32 q11, q13, q8
|
||||||
|
vadd.f32 q12, q10, q9
|
||||||
|
vsub.f32 q8, q13, q8
|
||||||
|
vsub.f32 q9, q10, q9
|
||||||
|
vsub.f32 q6, q12, q11
|
||||||
|
vadd.f32 q4, q12, q11
|
||||||
|
vtrn.32 q0, q2
|
||||||
|
ldr r2, [r12], #4
|
||||||
|
vadd.f32 d15, d19, d16 @
|
||||||
|
ldr lr, [r12], #4
|
||||||
|
vsub.f32 d11, d19, d16 @
|
||||||
|
vsub.f32 d14, d18, d17 @
|
||||||
|
vadd.f32 d10, d18, d17 @
|
||||||
|
add r2, r0, r2, lsl #2
|
||||||
|
vtrn.32 q1, q3
|
||||||
|
add lr, r0, lr, lsl #2
|
||||||
|
vst2.32 {q0,q1}, [r2, :128]!
|
||||||
|
vst2.32 {q2,q3}, [lr, :128]!
|
||||||
|
vtrn.32 q4, q6
|
||||||
|
vtrn.32 q5, q7
|
||||||
|
vst2.32 {q4,q5}, [r2, :128]!
|
||||||
|
vst2.32 {q6,q7}, [lr, :128]!
|
||||||
|
bne _neon_oo_o_loop
|
||||||
|
_neon_oo_o_loop_exit:
|
||||||
|
|
||||||
|
ldr r11, [r1, #8]
|
||||||
|
vld1.32 {q8}, [r5, :128]!
|
||||||
|
vld1.32 {q10}, [r6, :128]!
|
||||||
|
vld2.32 {q11}, [r4, :128]!
|
||||||
|
vld2.32 {q13}, [r3, :128]!
|
||||||
|
vld2.32 {q15}, [r10, :128]!
|
||||||
|
vorr d25, d17, d17
|
||||||
|
vorr d24, d20, d20
|
||||||
|
vorr d20, d16, d16
|
||||||
|
vsub.f32 q9, q13, q11
|
||||||
|
vadd.f32 q11, q13, q11
|
||||||
|
ldr r2, [r12], #4
|
||||||
|
vtrn.32 d24, d25
|
||||||
|
ldr lr, [r12], #4
|
||||||
|
vtrn.32 d20, d21
|
||||||
|
add r2, r0, r2, lsl #2
|
||||||
|
vsub.f32 q8, q10, q12
|
||||||
|
add lr, r0, lr, lsl #2
|
||||||
|
vadd.f32 q10, q10, q12
|
||||||
|
vadd.f32 q0, q11, q10
|
||||||
|
vsub.f32 d25, d19, d16 @
|
||||||
|
vadd.f32 d27, d19, d16 @
|
||||||
|
vsub.f32 q1, q11, q10
|
||||||
|
vadd.f32 d24, d18, d17 @
|
||||||
|
vsub.f32 d26, d18, d17 @
|
||||||
|
vtrn.32 q0, q12
|
||||||
|
vtrn.32 q1, q13
|
||||||
|
vld1.32 {d24, d25}, [r11, :128]
|
||||||
|
vswp d1, d2
|
||||||
|
vst1.32 {q0, q1}, [r2, :128]!
|
||||||
|
vld2.32 {q0}, [r9, :128]!
|
||||||
|
vadd.f32 q1, q0, q15
|
||||||
|
vld2.32 {q13}, [r8, :128]!
|
||||||
|
vld2.32 {q14}, [r7, :128]!
|
||||||
|
vsub.f32 q15, q0, q15
|
||||||
|
vsub.f32 q0, q14, q13
|
||||||
|
vadd.f32 q3, q14, q13
|
||||||
|
vadd.f32 q2, q3, q1
|
||||||
|
vsub.f32 d29, d1, d30 @
|
||||||
|
vadd.f32 d27, d1, d30 @
|
||||||
|
vsub.f32 q3, q3, q1
|
||||||
|
vadd.f32 d28, d0, d31 @
|
||||||
|
vsub.f32 d26, d0, d31 @
|
||||||
|
vtrn.32 q2, q14
|
||||||
|
vtrn.32 q3, q13
|
||||||
|
vswp d5, d6
|
||||||
|
vst1.32 {q2, q3}, [r2, :128]!
|
||||||
|
vtrn.32 q11, q9
|
||||||
|
vtrn.32 q10, q8
|
||||||
|
vmul.f32 d20, d18, d25
|
||||||
|
vmul.f32 d22, d19, d24
|
||||||
|
vmul.f32 d21, d19, d25
|
||||||
|
vmul.f32 d18, d18, d24
|
||||||
|
vmul.f32 d19, d16, d25
|
||||||
|
vmul.f32 d30, d17, d24
|
||||||
|
vmul.f32 d23, d16, d24
|
||||||
|
vmul.f32 d24, d17, d25
|
||||||
|
vadd.f32 d17, d22, d20
|
||||||
|
vsub.f32 d16, d18, d21
|
||||||
|
vsub.f32 d21, d30, d19
|
||||||
|
vadd.f32 d20, d24, d23
|
||||||
|
vadd.f32 q9, q8, q10
|
||||||
|
vsub.f32 q8, q8, q10
|
||||||
|
vadd.f32 q4, q14, q9
|
||||||
|
vsub.f32 q6, q14, q9
|
||||||
|
vsub.f32 d11, d27, d16 @
|
||||||
|
vadd.f32 d15, d27, d16 @
|
||||||
|
vadd.f32 d10, d26, d17 @
|
||||||
|
vsub.f32 d14, d26, d17 @
|
||||||
|
vswp d9, d10
|
||||||
|
vswp d13, d14
|
||||||
|
vstmia lr!, {q4-q7}
|
||||||
|
|
||||||
|
|
||||||
|
add r2, r3, #0
|
||||||
|
add r3, r7, #0
|
||||||
|
add r7, r2, #0
|
||||||
|
add r2, r4, #0
|
||||||
|
add r4, r8, #0
|
||||||
|
add r8, r2, #0
|
||||||
|
add r2, r5, #0
|
||||||
|
add r5, r9, #0
|
||||||
|
add r9, r2, #0
|
||||||
|
add r2, r6, #0
|
||||||
|
add r6, r10, #0
|
||||||
|
add r10, r2, #0
|
||||||
|
add r2, r9, #0
|
||||||
|
add r9, r10, #0
|
||||||
|
add r10, r2, #0
|
||||||
|
ldr r2, [r1, #16]
|
||||||
|
ldr r11, [r1, #32] @ this is p->i1
|
||||||
|
cmp r11, #0
|
||||||
|
beq _neon_ee_o_loop2_exit
|
||||||
|
|
||||||
|
vld1.32 {d16, d17}, [r2, :128]
|
||||||
|
_neon_ee_o_loop2:
|
||||||
|
vld2.32 {q15}, [r10, :128]!
|
||||||
|
vld2.32 {q13}, [r8, :128]!
|
||||||
|
vld2.32 {q14}, [r7, :128]!
|
||||||
|
vld2.32 {q9}, [r4, :128]!
|
||||||
|
vld2.32 {q10}, [r3, :128]!
|
||||||
|
vld2.32 {q11}, [r6, :128]!
|
||||||
|
vld2.32 {q12}, [r5, :128]!
|
||||||
|
vsub.f32 q1, q14, q13
|
||||||
|
vld2.32 {q0}, [r9, :128]!
|
||||||
|
subs r11, r11, #1
|
||||||
|
vsub.f32 q2, q0, q15
|
||||||
|
vadd.f32 q0, q0, q15
|
||||||
|
vmul.f32 d10, d2, d17
|
||||||
|
vmul.f32 d11, d3, d16
|
||||||
|
vmul.f32 d12, d3, d17
|
||||||
|
vmul.f32 d6, d4, d17
|
||||||
|
vmul.f32 d7, d5, d16
|
||||||
|
vmul.f32 d8, d4, d16
|
||||||
|
vmul.f32 d9, d5, d17
|
||||||
|
vmul.f32 d13, d2, d16
|
||||||
|
vsub.f32 d7, d7, d6
|
||||||
|
vadd.f32 d11, d11, d10
|
||||||
|
vsub.f32 q1, q12, q11
|
||||||
|
vsub.f32 q2, q10, q9
|
||||||
|
vadd.f32 d6, d9, d8
|
||||||
|
vadd.f32 q4, q14, q13
|
||||||
|
vadd.f32 q11, q12, q11
|
||||||
|
vadd.f32 q12, q10, q9
|
||||||
|
vsub.f32 d10, d13, d12
|
||||||
|
vsub.f32 q7, q4, q0
|
||||||
|
vsub.f32 q9, q12, q11
|
||||||
|
vsub.f32 q13, q5, q3
|
||||||
|
vsub.f32 d29, d5, d2 @
|
||||||
|
vadd.f32 q5, q5, q3
|
||||||
|
vadd.f32 q10, q4, q0
|
||||||
|
vadd.f32 q11, q12, q11
|
||||||
|
vadd.f32 d31, d5, d2 @
|
||||||
|
vadd.f32 d28, d4, d3 @
|
||||||
|
vsub.f32 d30, d4, d3 @
|
||||||
|
vsub.f32 d5, d19, d14 @
|
||||||
|
vsub.f32 d7, d31, d26 @
|
||||||
|
vadd.f32 q1, q14, q5
|
||||||
|
vadd.f32 q0, q11, q10
|
||||||
|
vadd.f32 d6, d30, d27 @
|
||||||
|
vadd.f32 d4, d18, d15 @
|
||||||
|
vadd.f32 d13, d19, d14 @
|
||||||
|
vsub.f32 d12, d18, d15 @
|
||||||
|
vadd.f32 d15, d31, d26 @
|
||||||
|
ldr r2, [r12], #4
|
||||||
|
vtrn.32 q1, q3
|
||||||
|
ldr lr, [r12], #4
|
||||||
|
vtrn.32 q0, q2
|
||||||
|
add r2, r0, r2, lsl #2
|
||||||
|
vsub.f32 q4, q11, q10
|
||||||
|
add lr, r0, lr, lsl #2
|
||||||
|
vsub.f32 q5, q14, q5
|
||||||
|
vsub.f32 d14, d30, d27 @
|
||||||
|
vst2.32 {q0,q1}, [r2, :128]!
|
||||||
|
vst2.32 {q2,q3}, [lr, :128]!
|
||||||
|
vtrn.32 q4, q6
|
||||||
|
vtrn.32 q5, q7
|
||||||
|
vst2.32 {q4,q5}, [r2, :128]!
|
||||||
|
vst2.32 {q6,q7}, [lr, :128]!
|
||||||
|
bne _neon_ee_o_loop2
|
||||||
|
_neon_ee_o_loop2_exit:
|
||||||
|
|
||||||
|
vldmia sp!, {d8-d15}
|
||||||
|
pop {r4, r5, r6, r7, r8, r9, r10, r11, pc}
|
||||||
|
|
||||||
|
.align 4
|
||||||
|
#ifdef __APPLE__
|
||||||
|
.globl _neon_static_x4_f
|
||||||
|
_neon_static_x4_f:
|
||||||
|
#else
|
||||||
|
.globl neon_static_x4_f
|
||||||
|
neon_static_x4_f:
|
||||||
|
#endif
|
||||||
|
@ add r3, r0, #0
|
||||||
|
push {r4, r5, r6, lr}
|
||||||
|
vstmdb sp!, {d8-d15}
|
||||||
|
|
||||||
|
vld1.32 {q8,q9}, [r0, :128]
|
||||||
|
add r4, r0, r1, lsl #1
|
||||||
|
vld1.32 {q10,q11}, [r4, :128]
|
||||||
|
add r5, r0, r1, lsl #2
|
||||||
|
vld1.32 {q12,q13}, [r5, :128]
|
||||||
|
add r6, r4, r1, lsl #2
|
||||||
|
vld1.32 {q14,q15}, [r6, :128]
|
||||||
|
vld1.32 {q2,q3}, [r2, :128]
|
||||||
|
|
||||||
|
vmul.f32 q0, q13, q3
|
||||||
|
vmul.f32 q5, q12, q2
|
||||||
|
vmul.f32 q1, q14, q2
|
||||||
|
vmul.f32 q4, q14, q3
|
||||||
|
vmul.f32 q14, q12, q3
|
||||||
|
vmul.f32 q13, q13, q2
|
||||||
|
vmul.f32 q12, q15, q3
|
||||||
|
vmul.f32 q2, q15, q2
|
||||||
|
vsub.f32 q0, q5, q0
|
||||||
|
vadd.f32 q13, q13, q14
|
||||||
|
vadd.f32 q12, q12, q1
|
||||||
|
vsub.f32 q1, q2, q4
|
||||||
|
vadd.f32 q15, q0, q12
|
||||||
|
vsub.f32 q12, q0, q12
|
||||||
|
vadd.f32 q14, q13, q1
|
||||||
|
vsub.f32 q13, q13, q1
|
||||||
|
vadd.f32 q0, q8, q15
|
||||||
|
vadd.f32 q1, q9, q14
|
||||||
|
vadd.f32 q2, q10, q13 @
|
||||||
|
vsub.f32 q4, q8, q15
|
||||||
|
vsub.f32 q3, q11, q12 @
|
||||||
|
vst1.32 {q0,q1}, [r0, :128]
|
||||||
|
vsub.f32 q5, q9, q14
|
||||||
|
vsub.f32 q6, q10, q13 @
|
||||||
|
vadd.f32 q7, q11, q12 @
|
||||||
|
vst1.32 {q2,q3}, [r4, :128]
|
||||||
|
vst1.32 {q4,q5}, [r5, :128]
|
||||||
|
vst1.32 {q6,q7}, [r6, :128]
|
||||||
|
vldmia sp!, {d8-d15}
|
||||||
|
pop {r4, r5, r6, pc}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
.align 4
|
||||||
|
#ifdef __APPLE__
|
||||||
|
.globl _neon_static_x8_f
|
||||||
|
_neon_static_x8_f:
|
||||||
|
#else
|
||||||
|
.globl neon_static_x8_f
|
||||||
|
neon_static_x8_f:
|
||||||
|
#endif
|
||||||
|
push {r4, r5, r6, r7, r8, r9, r10, r11, lr}
|
||||||
|
vstmdb sp!, {d8-d15}
|
||||||
|
mov r11, #0
|
||||||
|
add r3, r0, #0 @ data0
|
||||||
|
add r5, r0, r1, lsl #1 @ data2
|
||||||
|
add r4, r0, r1 @ data1
|
||||||
|
add r7, r5, r1, lsl #1 @ data4
|
||||||
|
add r6, r5, r1 @ data3
|
||||||
|
add r9, r7, r1, lsl #1 @ data6
|
||||||
|
add r8, r7, r1 @ data5
|
||||||
|
add r10, r9, r1 @ data7
|
||||||
|
add r12, r2, #0 @ LUT
|
||||||
|
|
||||||
|
sub r11, r11, r1, lsr #5
|
||||||
|
neon_x8_loop:
|
||||||
|
vld1.32 {q2,q3}, [r12, :128]!
|
||||||
|
vld1.32 {q14,q15}, [r6, :128]
|
||||||
|
vld1.32 {q10,q11}, [r5, :128]
|
||||||
|
adds r11, r11, #1
|
||||||
|
vmul.f32 q12, q15, q2
|
||||||
|
vmul.f32 q8, q14, q3
|
||||||
|
vmul.f32 q13, q14, q2
|
||||||
|
vmul.f32 q9, q10, q3
|
||||||
|
vmul.f32 q1, q10, q2
|
||||||
|
vmul.f32 q0, q11, q2
|
||||||
|
vmul.f32 q14, q11, q3
|
||||||
|
vmul.f32 q15, q15, q3
|
||||||
|
vld1.32 {q2,q3}, [r12, :128]!
|
||||||
|
vsub.f32 q10, q12, q8
|
||||||
|
vadd.f32 q11, q0, q9
|
||||||
|
vadd.f32 q8, q15, q13
|
||||||
|
vld1.32 {q12,q13}, [r4, :128]
|
||||||
|
vsub.f32 q9, q1, q14
|
||||||
|
vsub.f32 q15, q11, q10
|
||||||
|
vsub.f32 q14, q9, q8
|
||||||
|
vadd.f32 q4, q12, q15 @
|
||||||
|
vsub.f32 q6, q12, q15 @
|
||||||
|
vsub.f32 q5, q13, q14 @
|
||||||
|
vadd.f32 q7, q13, q14 @
|
||||||
|
vld1.32 {q14,q15}, [r9, :128]
|
||||||
|
vld1.32 {q12,q13}, [r7, :128]
|
||||||
|
vmul.f32 q1, q14, q2
|
||||||
|
vmul.f32 q0, q14, q3
|
||||||
|
vst1.32 {q4,q5}, [r4, :128]
|
||||||
|
vmul.f32 q14, q15, q3
|
||||||
|
vmul.f32 q4, q15, q2
|
||||||
|
vadd.f32 q15, q9, q8
|
||||||
|
vst1.32 {q6,q7}, [r6, :128]
|
||||||
|
vmul.f32 q8, q12, q3
|
||||||
|
vmul.f32 q5, q13, q3
|
||||||
|
vmul.f32 q12, q12, q2
|
||||||
|
vmul.f32 q9, q13, q2
|
||||||
|
vadd.f32 q14, q14, q1
|
||||||
|
vsub.f32 q13, q4, q0
|
||||||
|
vadd.f32 q0, q9, q8
|
||||||
|
vld1.32 {q8,q9}, [r3, :128]
|
||||||
|
vadd.f32 q1, q11, q10
|
||||||
|
vsub.f32 q12, q12, q5
|
||||||
|
vadd.f32 q11, q8, q15
|
||||||
|
vsub.f32 q8, q8, q15
|
||||||
|
vadd.f32 q2, q12, q14
|
||||||
|
vsub.f32 q10, q0, q13
|
||||||
|
vadd.f32 q15, q0, q13
|
||||||
|
vadd.f32 q13, q9, q1
|
||||||
|
vsub.f32 q9, q9, q1
|
||||||
|
vsub.f32 q12, q12, q14
|
||||||
|
vadd.f32 q0, q11, q2
|
||||||
|
vadd.f32 q1, q13, q15
|
||||||
|
vsub.f32 q4, q11, q2
|
||||||
|
vadd.f32 q2, q8, q10 @
|
||||||
|
vsub.f32 q3, q9, q12 @
|
||||||
|
vst1.32 {q0,q1}, [r3, :128]!
|
||||||
|
vsub.f32 q5, q13, q15
|
||||||
|
vld1.32 {q14,q15}, [r10, :128]
|
||||||
|
vadd.f32 q7, q9, q12 @
|
||||||
|
vld1.32 {q12,q13}, [r8, :128]
|
||||||
|
vst1.32 {q2,q3}, [r5, :128]!
|
||||||
|
vld1.32 {q2,q3}, [r12, :128]!
|
||||||
|
vsub.f32 q6, q8, q10 @
|
||||||
|
vmul.f32 q8, q14, q2
|
||||||
|
vst1.32 {q4,q5}, [r7, :128]!
|
||||||
|
vmul.f32 q10, q15, q3
|
||||||
|
vmul.f32 q9, q13, q3
|
||||||
|
vmul.f32 q11, q12, q2
|
||||||
|
vmul.f32 q14, q14, q3
|
||||||
|
vst1.32 {q6,q7}, [r9, :128]!
|
||||||
|
vmul.f32 q15, q15, q2
|
||||||
|
vmul.f32 q12, q12, q3
|
||||||
|
vmul.f32 q13, q13, q2
|
||||||
|
vadd.f32 q10, q10, q8
|
||||||
|
vsub.f32 q11, q11, q9
|
||||||
|
vld1.32 {q8,q9}, [r4, :128]
|
||||||
|
vsub.f32 q14, q15, q14
|
||||||
|
vadd.f32 q15, q13, q12
|
||||||
|
vadd.f32 q13, q11, q10
|
||||||
|
vadd.f32 q12, q15, q14
|
||||||
|
vsub.f32 q15, q15, q14
|
||||||
|
vsub.f32 q14, q11, q10
|
||||||
|
vld1.32 {q10,q11}, [r6, :128]
|
||||||
|
vadd.f32 q0, q8, q13
|
||||||
|
vadd.f32 q1, q9, q12
|
||||||
|
vadd.f32 q2, q10, q15 @
|
||||||
|
vsub.f32 q3, q11, q14 @
|
||||||
|
vsub.f32 q4, q8, q13
|
||||||
|
vst1.32 {q0,q1}, [r4, :128]!
|
||||||
|
vsub.f32 q5, q9, q12
|
||||||
|
vsub.f32 q6, q10, q15 @
|
||||||
|
vst1.32 {q2,q3}, [r6, :128]!
|
||||||
|
vadd.f32 q7, q11, q14 @
|
||||||
|
vst1.32 {q4,q5}, [r8, :128]!
|
||||||
|
vst1.32 {q6,q7}, [r10, :128]!
|
||||||
|
bne neon_x8_loop
|
||||||
|
|
||||||
|
vldmia sp!, {d8-d15}
|
||||||
|
pop {r4, r5, r6, r7, r8, r9, r10, r11, pc}
|
||||||
|
|
||||||
|
.align 4
|
||||||
|
#ifdef __APPLE__
|
||||||
|
.globl _neon_static_x8_t_f
|
||||||
|
_neon_static_x8_t_f:
|
||||||
|
#else
|
||||||
|
.globl neon_static_x8_t_f
|
||||||
|
neon_static_x8_t_f:
|
||||||
|
#endif
|
||||||
|
push {r4, r5, r6, r7, r8, r9, r10, r11, lr}
|
||||||
|
vstmdb sp!, {d8-d15}
|
||||||
|
mov r11, #0
|
||||||
|
add r3, r0, #0 @ data0
|
||||||
|
add r5, r0, r1, lsl #1 @ data2
|
||||||
|
add r4, r0, r1 @ data1
|
||||||
|
add r7, r5, r1, lsl #1 @ data4
|
||||||
|
add r6, r5, r1 @ data3
|
||||||
|
add r9, r7, r1, lsl #1 @ data6
|
||||||
|
add r8, r7, r1 @ data5
|
||||||
|
add r10, r9, r1 @ data7
|
||||||
|
add r12, r2, #0 @ LUT
|
||||||
|
|
||||||
|
sub r11, r11, r1, lsr #5
|
||||||
|
neon_x8_t_loop:
|
||||||
|
vld1.32 {q2,q3}, [r12, :128]!
|
||||||
|
vld1.32 {q14,q15}, [r6, :128]
|
||||||
|
vld1.32 {q10,q11}, [r5, :128]
|
||||||
|
adds r11, r11, #1
|
||||||
|
vmul.f32 q12, q15, q2
|
||||||
|
vmul.f32 q8, q14, q3
|
||||||
|
vmul.f32 q13, q14, q2
|
||||||
|
vmul.f32 q9, q10, q3
|
||||||
|
vmul.f32 q1, q10, q2
|
||||||
|
vmul.f32 q0, q11, q2
|
||||||
|
vmul.f32 q14, q11, q3
|
||||||
|
vmul.f32 q15, q15, q3
|
||||||
|
vld1.32 {q2,q3}, [r12, :128]!
|
||||||
|
vsub.f32 q10, q12, q8
|
||||||
|
vadd.f32 q11, q0, q9
|
||||||
|
vadd.f32 q8, q15, q13
|
||||||
|
vld1.32 {q12,q13}, [r4, :128]
|
||||||
|
vsub.f32 q9, q1, q14
|
||||||
|
vsub.f32 q15, q11, q10
|
||||||
|
vsub.f32 q14, q9, q8
|
||||||
|
vadd.f32 q4, q12, q15 @
|
||||||
|
vsub.f32 q6, q12, q15 @
|
||||||
|
vsub.f32 q5, q13, q14 @
|
||||||
|
vadd.f32 q7, q13, q14 @
|
||||||
|
vld1.32 {q14,q15}, [r9, :128]
|
||||||
|
vld1.32 {q12,q13}, [r7, :128]
|
||||||
|
vmul.f32 q1, q14, q2
|
||||||
|
vmul.f32 q0, q14, q3
|
||||||
|
vst1.32 {q4,q5}, [r4, :128]
|
||||||
|
vmul.f32 q14, q15, q3
|
||||||
|
vmul.f32 q4, q15, q2
|
||||||
|
vadd.f32 q15, q9, q8
|
||||||
|
vst1.32 {q6,q7}, [r6, :128]
|
||||||
|
vmul.f32 q8, q12, q3
|
||||||
|
vmul.f32 q5, q13, q3
|
||||||
|
vmul.f32 q12, q12, q2
|
||||||
|
vmul.f32 q9, q13, q2
|
||||||
|
vadd.f32 q14, q14, q1
|
||||||
|
vsub.f32 q13, q4, q0
|
||||||
|
vadd.f32 q0, q9, q8
|
||||||
|
vld1.32 {q8,q9}, [r3, :128]
|
||||||
|
vadd.f32 q1, q11, q10
|
||||||
|
vsub.f32 q12, q12, q5
|
||||||
|
vadd.f32 q11, q8, q15
|
||||||
|
vsub.f32 q8, q8, q15
|
||||||
|
vadd.f32 q2, q12, q14
|
||||||
|
vsub.f32 q10, q0, q13
|
||||||
|
vadd.f32 q15, q0, q13
|
||||||
|
vadd.f32 q13, q9, q1
|
||||||
|
vsub.f32 q9, q9, q1
|
||||||
|
vsub.f32 q12, q12, q14
|
||||||
|
vadd.f32 q0, q11, q2
|
||||||
|
vadd.f32 q1, q13, q15
|
||||||
|
vsub.f32 q4, q11, q2
|
||||||
|
vadd.f32 q2, q8, q10 @
|
||||||
|
vsub.f32 q3, q9, q12 @
|
||||||
|
vst2.32 {q0,q1}, [r3, :128]!
|
||||||
|
vsub.f32 q5, q13, q15
|
||||||
|
vld1.32 {q14,q15}, [r10, :128]
|
||||||
|
vadd.f32 q7, q9, q12 @
|
||||||
|
vld1.32 {q12,q13}, [r8, :128]
|
||||||
|
vst2.32 {q2,q3}, [r5, :128]!
|
||||||
|
vld1.32 {q2,q3}, [r12, :128]!
|
||||||
|
vsub.f32 q6, q8, q10 @
|
||||||
|
vmul.f32 q8, q14, q2
|
||||||
|
vst2.32 {q4,q5}, [r7, :128]!
|
||||||
|
vmul.f32 q10, q15, q3
|
||||||
|
vmul.f32 q9, q13, q3
|
||||||
|
vmul.f32 q11, q12, q2
|
||||||
|
vmul.f32 q14, q14, q3
|
||||||
|
vst2.32 {q6,q7}, [r9, :128]!
|
||||||
|
vmul.f32 q15, q15, q2
|
||||||
|
vmul.f32 q12, q12, q3
|
||||||
|
vmul.f32 q13, q13, q2
|
||||||
|
vadd.f32 q10, q10, q8
|
||||||
|
vsub.f32 q11, q11, q9
|
||||||
|
vld1.32 {q8,q9}, [r4, :128]
|
||||||
|
vsub.f32 q14, q15, q14
|
||||||
|
vadd.f32 q15, q13, q12
|
||||||
|
vadd.f32 q13, q11, q10
|
||||||
|
vadd.f32 q12, q15, q14
|
||||||
|
vsub.f32 q15, q15, q14
|
||||||
|
vsub.f32 q14, q11, q10
|
||||||
|
vld1.32 {q10,q11}, [r6, :128]
|
||||||
|
vadd.f32 q0, q8, q13
|
||||||
|
vadd.f32 q1, q9, q12
|
||||||
|
vadd.f32 q2, q10, q15 @
|
||||||
|
vsub.f32 q3, q11, q14 @
|
||||||
|
vsub.f32 q4, q8, q13
|
||||||
|
vst2.32 {q0,q1}, [r4, :128]!
|
||||||
|
vsub.f32 q5, q9, q12
|
||||||
|
vsub.f32 q6, q10, q15 @
|
||||||
|
vst2.32 {q2,q3}, [r6, :128]!
|
||||||
|
vadd.f32 q7, q11, q14 @
|
||||||
|
vst2.32 {q4,q5}, [r8, :128]!
|
||||||
|
vst2.32 {q6,q7}, [r10, :128]!
|
||||||
|
bne neon_x8_t_loop
|
||||||
|
|
||||||
|
vldmia sp!, {d8-d15}
|
||||||
|
pop {r4, r5, r6, r7, r8, r9, r10, r11, pc}
|
||||||
|
|
||||||
|
|
@ -0,0 +1,955 @@
|
|||||||
|
/*
|
||||||
|
|
||||||
|
This file is part of FFTS -- The Fastest Fourier Transform in the South
|
||||||
|
|
||||||
|
Copyright (c) 2012, Anthony M. Blake <amb@anthonix.com>
|
||||||
|
Copyright (c) 2012, The University of Waikato
|
||||||
|
|
||||||
|
All rights reserved.
|
||||||
|
|
||||||
|
Redistribution and use in source and binary forms, with or without
|
||||||
|
modification, are permitted provided that the following conditions are met:
|
||||||
|
* Redistributions of source code must retain the above copyright
|
||||||
|
notice, this list of conditions and the following disclaimer.
|
||||||
|
* Redistributions in binary form must reproduce the above copyright
|
||||||
|
notice, this list of conditions and the following disclaimer in the
|
||||||
|
documentation and/or other materials provided with the distribution.
|
||||||
|
* Neither the name of the organization nor the
|
||||||
|
names of its contributors may be used to endorse or promote products
|
||||||
|
derived from this software without specific prior written permission.
|
||||||
|
|
||||||
|
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
||||||
|
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||||
|
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||||
|
DISCLAIMED. IN NO EVENT SHALL ANTHONY M. BLAKE BE LIABLE FOR ANY
|
||||||
|
DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
|
||||||
|
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
||||||
|
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
|
||||||
|
ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||||
|
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||||
|
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
|
||||||
|
*/
|
||||||
|
|
||||||
|
.align 4
|
||||||
|
#ifdef __APPLE__
|
||||||
|
.globl _neon_static_e_i
|
||||||
|
_neon_static_e_i:
|
||||||
|
#else
|
||||||
|
.globl neon_static_e_i
|
||||||
|
neon_static_e_i:
|
||||||
|
#endif
|
||||||
|
push {r4, r5, r6, r7, r8, r9, r10, r11, lr}
|
||||||
|
vstmdb sp!, {d8-d15}
|
||||||
|
ldr lr, [r0, #40] @ this is p->N
|
||||||
|
add r3, r1, #0
|
||||||
|
add r7, r1, lr
|
||||||
|
add r5, r7, lr
|
||||||
|
add r10, r5, lr
|
||||||
|
add r4, r10, lr
|
||||||
|
add r8, r4, lr
|
||||||
|
add r6, r8, lr
|
||||||
|
add r9, r6, lr
|
||||||
|
ldr r12, [r0]
|
||||||
|
add r1, r0, #0
|
||||||
|
add r0, r2, #0
|
||||||
|
ldr r2, [r1, #16] @ this is p->ee_ws
|
||||||
|
ldr r11, [r1, #28] @ this is p->i0
|
||||||
|
|
||||||
|
vld1.32 {d16, d17}, [r2, :128]
|
||||||
|
_neon_ee_loop:
|
||||||
|
vld2.32 {q15}, [r10, :128]!
|
||||||
|
vld2.32 {q13}, [r8, :128]!
|
||||||
|
vld2.32 {q14}, [r7, :128]!
|
||||||
|
vld2.32 {q9}, [r4, :128]!
|
||||||
|
vld2.32 {q10}, [r3, :128]!
|
||||||
|
vld2.32 {q11}, [r6, :128]!
|
||||||
|
vld2.32 {q12}, [r5, :128]!
|
||||||
|
vsub.f32 q1, q14, q13
|
||||||
|
vld2.32 {q0}, [r9, :128]!
|
||||||
|
subs r11, r11, #1
|
||||||
|
vsub.f32 q2, q0, q15
|
||||||
|
vadd.f32 q0, q0, q15
|
||||||
|
vmul.f32 d10, d2, d17
|
||||||
|
vmul.f32 d11, d3, d16
|
||||||
|
vmul.f32 d12, d3, d17
|
||||||
|
vmul.f32 d6, d4, d17
|
||||||
|
vmul.f32 d7, d5, d16
|
||||||
|
vmul.f32 d8, d4, d16
|
||||||
|
vmul.f32 d9, d5, d17
|
||||||
|
vmul.f32 d13, d2, d16
|
||||||
|
vsub.f32 d7, d7, d6
|
||||||
|
vadd.f32 d11, d11, d10
|
||||||
|
vsub.f32 q1, q12, q11
|
||||||
|
vsub.f32 q2, q10, q9
|
||||||
|
vadd.f32 d6, d9, d8
|
||||||
|
vadd.f32 q4, q14, q13
|
||||||
|
vadd.f32 q11, q12, q11
|
||||||
|
vadd.f32 q12, q10, q9
|
||||||
|
vsub.f32 d10, d13, d12
|
||||||
|
vsub.f32 q7, q4, q0
|
||||||
|
vsub.f32 q9, q12, q11
|
||||||
|
vsub.f32 q13, q5, q3
|
||||||
|
vadd.f32 d29, d5, d2 @
|
||||||
|
vadd.f32 q5, q5, q3
|
||||||
|
vadd.f32 q10, q4, q0
|
||||||
|
vadd.f32 q11, q12, q11
|
||||||
|
vsub.f32 d31, d5, d2 @
|
||||||
|
vsub.f32 d28, d4, d3 @
|
||||||
|
vadd.f32 d30, d4, d3 @
|
||||||
|
vadd.f32 d5, d19, d14 @
|
||||||
|
vadd.f32 d7, d31, d26 @
|
||||||
|
vadd.f32 q1, q14, q5
|
||||||
|
vadd.f32 q0, q11, q10
|
||||||
|
vsub.f32 d6, d30, d27 @
|
||||||
|
vsub.f32 d4, d18, d15 @
|
||||||
|
vsub.f32 d13, d19, d14 @
|
||||||
|
vadd.f32 d12, d18, d15 @
|
||||||
|
vsub.f32 d15, d31, d26 @
|
||||||
|
ldr r2, [r12], #4
|
||||||
|
vtrn.32 q1, q3
|
||||||
|
ldr lr, [r12], #4
|
||||||
|
vtrn.32 q0, q2
|
||||||
|
add r2, r0, r2, lsl #2
|
||||||
|
vsub.f32 q4, q11, q10
|
||||||
|
add lr, r0, lr, lsl #2
|
||||||
|
vsub.f32 q5, q14, q5
|
||||||
|
vadd.f32 d14, d30, d27 @
|
||||||
|
vst2.32 {q0,q1}, [r2, :128]!
|
||||||
|
vst2.32 {q2,q3}, [lr, :128]!
|
||||||
|
vtrn.32 q4, q6
|
||||||
|
vtrn.32 q5, q7
|
||||||
|
vst2.32 {q4,q5}, [r2, :128]!
|
||||||
|
vst2.32 {q6,q7}, [lr, :128]!
|
||||||
|
bne _neon_ee_loop
|
||||||
|
|
||||||
|
ldr r11, [r1, #12]
|
||||||
|
vld2.32 {q9}, [r5, :128]! @tag2
|
||||||
|
vld2.32 {q13}, [r3, :128]! @tag0
|
||||||
|
vld2.32 {q12}, [r4, :128]! @tag1
|
||||||
|
vld2.32 {q0}, [r7, :128]! @tag4
|
||||||
|
vsub.f32 q11, q13, q12
|
||||||
|
vld2.32 {q8}, [r6, :128]! @tag3
|
||||||
|
vadd.f32 q12, q13, q12
|
||||||
|
vsub.f32 q10, q9, q8
|
||||||
|
vadd.f32 q8, q9, q8
|
||||||
|
vadd.f32 q9, q12, q8
|
||||||
|
vadd.f32 d9, d23, d20 @
|
||||||
|
vsub.f32 d11, d23, d20 @
|
||||||
|
vsub.f32 q8, q12, q8
|
||||||
|
vsub.f32 d8, d22, d21 @
|
||||||
|
vadd.f32 d10, d22, d21 @
|
||||||
|
ldr r2, [r12], #4
|
||||||
|
vld1.32 {d20, d21}, [r11, :128]
|
||||||
|
ldr lr, [r12], #4
|
||||||
|
vtrn.32 q9, q4
|
||||||
|
add r2, r0, r2, lsl #2
|
||||||
|
vtrn.32 q8, q5
|
||||||
|
add lr, r0, lr, lsl #2
|
||||||
|
vswp d9,d10
|
||||||
|
vst1.32 {d8,d9,d10,d11}, [lr, :128]!
|
||||||
|
vld2.32 {q13}, [r10, :128]! @tag7
|
||||||
|
vld2.32 {q15}, [r9, :128]! @tag6
|
||||||
|
vld2.32 {q11}, [r8, :128]! @tag5
|
||||||
|
vsub.f32 q14, q15, q13
|
||||||
|
vsub.f32 q12, q0, q11
|
||||||
|
vadd.f32 q11, q0, q11
|
||||||
|
vadd.f32 q13, q15, q13
|
||||||
|
vadd.f32 d13, d29, d24 @
|
||||||
|
vadd.f32 q15, q13, q11
|
||||||
|
vsub.f32 d12, d28, d25 @
|
||||||
|
vsub.f32 d15, d29, d24 @
|
||||||
|
vadd.f32 d14, d28, d25 @
|
||||||
|
vtrn.32 q15, q6
|
||||||
|
vsub.f32 q15, q13, q11
|
||||||
|
vtrn.32 q15, q7
|
||||||
|
vswp d13, d14
|
||||||
|
vst1.32 {d12,d13,d14,d15}, [lr, :128]!
|
||||||
|
vtrn.32 q13, q14
|
||||||
|
vtrn.32 q11, q12
|
||||||
|
vmul.f32 d24, d26, d21
|
||||||
|
vmul.f32 d28, d27, d20
|
||||||
|
vmul.f32 d25, d26, d20
|
||||||
|
vmul.f32 d26, d27, d21
|
||||||
|
vmul.f32 d27, d22, d21
|
||||||
|
vmul.f32 d30, d23, d20
|
||||||
|
vmul.f32 d29, d23, d21
|
||||||
|
vmul.f32 d22, d22, d20
|
||||||
|
vsub.f32 d21, d28, d24
|
||||||
|
vadd.f32 d20, d26, d25
|
||||||
|
vadd.f32 d25, d30, d27
|
||||||
|
vsub.f32 d24, d22, d29
|
||||||
|
vadd.f32 q11, q12, q10
|
||||||
|
vsub.f32 q10, q12, q10
|
||||||
|
vadd.f32 q0, q9, q11
|
||||||
|
vsub.f32 q2, q9, q11
|
||||||
|
vadd.f32 d3, d17, d20 @
|
||||||
|
vsub.f32 d7, d17, d20 @
|
||||||
|
vsub.f32 d2, d16, d21 @
|
||||||
|
vadd.f32 d6, d16, d21 @
|
||||||
|
vswp d1, d2
|
||||||
|
vswp d5, d6
|
||||||
|
vstmia r2!, {q0-q3}
|
||||||
|
|
||||||
|
add r2, r7, #0
|
||||||
|
add r7, r9, #0
|
||||||
|
add r9, r2, #0
|
||||||
|
add r2, r8, #0
|
||||||
|
add r8, r10, #0
|
||||||
|
add r10, r2, #0
|
||||||
|
ldr r11, [r1, #32] @ this is p->i1
|
||||||
|
cmp r11, #0
|
||||||
|
beq _neon_oo_loop_exit
|
||||||
|
_neon_oo_loop:
|
||||||
|
vld2.32 {q8}, [r6, :128]!
|
||||||
|
vld2.32 {q9}, [r5, :128]!
|
||||||
|
vld2.32 {q10}, [r4, :128]!
|
||||||
|
vld2.32 {q13}, [r3, :128]!
|
||||||
|
vadd.f32 q11, q9, q8
|
||||||
|
vsub.f32 q8, q9, q8
|
||||||
|
vsub.f32 q9, q13, q10
|
||||||
|
vadd.f32 q12, q13, q10
|
||||||
|
subs r11, r11, #1
|
||||||
|
vld2.32 {q10}, [r7, :128]!
|
||||||
|
vld2.32 {q13}, [r9, :128]!
|
||||||
|
vsub.f32 q2, q12, q11
|
||||||
|
vsub.f32 d7, d19, d16 @
|
||||||
|
vadd.f32 d3, d19, d16 @
|
||||||
|
vadd.f32 d6, d18, d17 @
|
||||||
|
vsub.f32 d2, d18, d17 @
|
||||||
|
vld2.32 {q9}, [r8, :128]!
|
||||||
|
vld2.32 {q8}, [r10, :128]!
|
||||||
|
vadd.f32 q0, q12, q11
|
||||||
|
vadd.f32 q11, q13, q8
|
||||||
|
vadd.f32 q12, q10, q9
|
||||||
|
vsub.f32 q8, q13, q8
|
||||||
|
vsub.f32 q9, q10, q9
|
||||||
|
vsub.f32 q6, q12, q11
|
||||||
|
vadd.f32 q4, q12, q11
|
||||||
|
vtrn.32 q0, q2
|
||||||
|
ldr r2, [r12], #4
|
||||||
|
vsub.f32 d15, d19, d16 @
|
||||||
|
ldr lr, [r12], #4
|
||||||
|
vadd.f32 d11, d19, d16 @
|
||||||
|
vadd.f32 d14, d18, d17 @
|
||||||
|
vsub.f32 d10, d18, d17 @
|
||||||
|
add r2, r0, r2, lsl #2
|
||||||
|
vtrn.32 q1, q3
|
||||||
|
add lr, r0, lr, lsl #2
|
||||||
|
vst2.32 {q0,q1}, [r2, :128]!
|
||||||
|
vst2.32 {q2,q3}, [lr, :128]!
|
||||||
|
vtrn.32 q4, q6
|
||||||
|
vtrn.32 q5, q7
|
||||||
|
vst2.32 {q4,q5}, [r2, :128]!
|
||||||
|
vst2.32 {q6,q7}, [lr, :128]!
|
||||||
|
bne _neon_oo_loop
|
||||||
|
_neon_oo_loop_exit:
|
||||||
|
|
||||||
|
add r2, r3, #0
|
||||||
|
add r3, r7, #0
|
||||||
|
add r7, r2, #0
|
||||||
|
add r2, r4, #0
|
||||||
|
add r4, r8, #0
|
||||||
|
add r8, r2, #0
|
||||||
|
add r2, r5, #0
|
||||||
|
add r5, r9, #0
|
||||||
|
add r9, r2, #0
|
||||||
|
add r2, r6, #0
|
||||||
|
add r6, r10, #0
|
||||||
|
add r10, r2, #0
|
||||||
|
add r2, r9, #0
|
||||||
|
add r9, r10, #0
|
||||||
|
add r10, r2, #0
|
||||||
|
ldr r2, [r1, #16]
|
||||||
|
ldr r11, [r1, #32] @ this is p->i1
|
||||||
|
cmp r11, #0
|
||||||
|
beq _neon_ee_loop2_exit
|
||||||
|
|
||||||
|
vld1.32 {d16, d17}, [r2, :128]
|
||||||
|
_neon_ee_loop2:
|
||||||
|
vld2.32 {q15}, [r10, :128]!
|
||||||
|
vld2.32 {q13}, [r8, :128]!
|
||||||
|
vld2.32 {q14}, [r7, :128]!
|
||||||
|
vld2.32 {q9}, [r4, :128]!
|
||||||
|
vld2.32 {q10}, [r3, :128]!
|
||||||
|
vld2.32 {q11}, [r6, :128]!
|
||||||
|
vld2.32 {q12}, [r5, :128]!
|
||||||
|
vsub.f32 q1, q14, q13
|
||||||
|
vld2.32 {q0}, [r9, :128]!
|
||||||
|
subs r11, r11, #1
|
||||||
|
vsub.f32 q2, q0, q15
|
||||||
|
vadd.f32 q0, q0, q15
|
||||||
|
vmul.f32 d10, d2, d17
|
||||||
|
vmul.f32 d11, d3, d16
|
||||||
|
vmul.f32 d12, d3, d17
|
||||||
|
vmul.f32 d6, d4, d17
|
||||||
|
vmul.f32 d7, d5, d16
|
||||||
|
vmul.f32 d8, d4, d16
|
||||||
|
vmul.f32 d9, d5, d17
|
||||||
|
vmul.f32 d13, d2, d16
|
||||||
|
vsub.f32 d7, d7, d6
|
||||||
|
vadd.f32 d11, d11, d10
|
||||||
|
vsub.f32 q1, q12, q11
|
||||||
|
vsub.f32 q2, q10, q9
|
||||||
|
vadd.f32 d6, d9, d8
|
||||||
|
vadd.f32 q4, q14, q13
|
||||||
|
vadd.f32 q11, q12, q11
|
||||||
|
vadd.f32 q12, q10, q9
|
||||||
|
vsub.f32 d10, d13, d12
|
||||||
|
vsub.f32 q7, q4, q0
|
||||||
|
vsub.f32 q9, q12, q11
|
||||||
|
vsub.f32 q13, q5, q3
|
||||||
|
vadd.f32 d29, d5, d2 @
|
||||||
|
vadd.f32 q5, q5, q3
|
||||||
|
vadd.f32 q10, q4, q0
|
||||||
|
vadd.f32 q11, q12, q11
|
||||||
|
vsub.f32 d31, d5, d2 @
|
||||||
|
vsub.f32 d28, d4, d3 @
|
||||||
|
vadd.f32 d30, d4, d3 @
|
||||||
|
vadd.f32 d5, d19, d14 @
|
||||||
|
vadd.f32 d7, d31, d26 @
|
||||||
|
vadd.f32 q1, q14, q5
|
||||||
|
vadd.f32 q0, q11, q10
|
||||||
|
vsub.f32 d6, d30, d27 @
|
||||||
|
vsub.f32 d4, d18, d15 @
|
||||||
|
vsub.f32 d13, d19, d14 @
|
||||||
|
vadd.f32 d12, d18, d15 @
|
||||||
|
vsub.f32 d15, d31, d26 @
|
||||||
|
ldr r2, [r12], #4
|
||||||
|
vtrn.32 q1, q3
|
||||||
|
ldr lr, [r12], #4
|
||||||
|
vtrn.32 q0, q2
|
||||||
|
add r2, r0, r2, lsl #2
|
||||||
|
vsub.f32 q4, q11, q10
|
||||||
|
add lr, r0, lr, lsl #2
|
||||||
|
vsub.f32 q5, q14, q5
|
||||||
|
vadd.f32 d14, d30, d27 @
|
||||||
|
vst2.32 {q0,q1}, [r2, :128]!
|
||||||
|
vst2.32 {q2,q3}, [lr, :128]!
|
||||||
|
vtrn.32 q4, q6
|
||||||
|
vtrn.32 q5, q7
|
||||||
|
vst2.32 {q4,q5}, [r2, :128]!
|
||||||
|
vst2.32 {q6,q7}, [lr, :128]!
|
||||||
|
bne _neon_ee_loop2
|
||||||
|
_neon_ee_loop2_exit:
|
||||||
|
|
||||||
|
vldmia sp!, {d8-d15}
|
||||||
|
pop {r4, r5, r6, r7, r8, r9, r10, r11, pc}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
.align 4
|
||||||
|
#ifdef __APPLE__
|
||||||
|
.globl _neon_static_o_i
|
||||||
|
_neon_static_o_i:
|
||||||
|
#else
|
||||||
|
.globl neon_static_o_i
|
||||||
|
neon_static_o_i:
|
||||||
|
#endif
|
||||||
|
push {r4, r5, r6, r7, r8, r9, r10, r11, lr}
|
||||||
|
vstmdb sp!, {d8-d15}
|
||||||
|
ldr lr, [r0, #40] @ this is p->N
|
||||||
|
add r3, r1, #0
|
||||||
|
add r7, r1, lr
|
||||||
|
add r5, r7, lr
|
||||||
|
add r10, r5, lr
|
||||||
|
add r4, r10, lr
|
||||||
|
add r8, r4, lr
|
||||||
|
add r6, r8, lr
|
||||||
|
add r9, r6, lr
|
||||||
|
ldr r12, [r0]
|
||||||
|
add r1, r0, #0
|
||||||
|
add r0, r2, #0
|
||||||
|
ldr r2, [r1, #16] @ this is p->ee_ws
|
||||||
|
ldr r11, [r1, #28] @ this is p->i0
|
||||||
|
|
||||||
|
vld1.32 {d16, d17}, [r2, :128]
|
||||||
|
_neon_ee_o_loop:
|
||||||
|
vld2.32 {q15}, [r10, :128]!
|
||||||
|
vld2.32 {q13}, [r8, :128]!
|
||||||
|
vld2.32 {q14}, [r7, :128]!
|
||||||
|
vld2.32 {q9}, [r4, :128]!
|
||||||
|
vld2.32 {q10}, [r3, :128]!
|
||||||
|
vld2.32 {q11}, [r6, :128]!
|
||||||
|
vld2.32 {q12}, [r5, :128]!
|
||||||
|
vsub.f32 q1, q14, q13
|
||||||
|
vld2.32 {q0}, [r9, :128]!
|
||||||
|
subs r11, r11, #1
|
||||||
|
vsub.f32 q2, q0, q15
|
||||||
|
vadd.f32 q0, q0, q15
|
||||||
|
vmul.f32 d10, d2, d17
|
||||||
|
vmul.f32 d11, d3, d16
|
||||||
|
vmul.f32 d12, d3, d17
|
||||||
|
vmul.f32 d6, d4, d17
|
||||||
|
vmul.f32 d7, d5, d16
|
||||||
|
vmul.f32 d8, d4, d16
|
||||||
|
vmul.f32 d9, d5, d17
|
||||||
|
vmul.f32 d13, d2, d16
|
||||||
|
vsub.f32 d7, d7, d6
|
||||||
|
vadd.f32 d11, d11, d10
|
||||||
|
vsub.f32 q1, q12, q11
|
||||||
|
vsub.f32 q2, q10, q9
|
||||||
|
vadd.f32 d6, d9, d8
|
||||||
|
vadd.f32 q4, q14, q13
|
||||||
|
vadd.f32 q11, q12, q11
|
||||||
|
vadd.f32 q12, q10, q9
|
||||||
|
vsub.f32 d10, d13, d12
|
||||||
|
vsub.f32 q7, q4, q0
|
||||||
|
vsub.f32 q9, q12, q11
|
||||||
|
vsub.f32 q13, q5, q3
|
||||||
|
vadd.f32 d29, d5, d2 @
|
||||||
|
vadd.f32 q5, q5, q3
|
||||||
|
vadd.f32 q10, q4, q0
|
||||||
|
vadd.f32 q11, q12, q11
|
||||||
|
vsub.f32 d31, d5, d2 @
|
||||||
|
vsub.f32 d28, d4, d3 @
|
||||||
|
vadd.f32 d30, d4, d3 @
|
||||||
|
vadd.f32 d5, d19, d14 @
|
||||||
|
vadd.f32 d7, d31, d26 @
|
||||||
|
vadd.f32 q1, q14, q5
|
||||||
|
vadd.f32 q0, q11, q10
|
||||||
|
vsub.f32 d6, d30, d27 @
|
||||||
|
vsub.f32 d4, d18, d15 @
|
||||||
|
vsub.f32 d13, d19, d14 @
|
||||||
|
vadd.f32 d12, d18, d15 @
|
||||||
|
vsub.f32 d15, d31, d26 @
|
||||||
|
ldr r2, [r12], #4
|
||||||
|
vtrn.32 q1, q3
|
||||||
|
ldr lr, [r12], #4
|
||||||
|
vtrn.32 q0, q2
|
||||||
|
add r2, r0, r2, lsl #2
|
||||||
|
vsub.f32 q4, q11, q10
|
||||||
|
add lr, r0, lr, lsl #2
|
||||||
|
vsub.f32 q5, q14, q5
|
||||||
|
vadd.f32 d14, d30, d27 @
|
||||||
|
vst2.32 {q0,q1}, [r2, :128]!
|
||||||
|
vst2.32 {q2,q3}, [lr, :128]!
|
||||||
|
vtrn.32 q4, q6
|
||||||
|
vtrn.32 q5, q7
|
||||||
|
vst2.32 {q4,q5}, [r2, :128]!
|
||||||
|
vst2.32 {q6,q7}, [lr, :128]!
|
||||||
|
bne _neon_ee_o_loop
|
||||||
|
|
||||||
|
add r2, r7, #0
|
||||||
|
add r7, r9, #0
|
||||||
|
add r9, r2, #0
|
||||||
|
add r2, r8, #0
|
||||||
|
add r8, r10, #0
|
||||||
|
add r10, r2, #0
|
||||||
|
ldr r11, [r1, #32] @ this is p->i1
|
||||||
|
cmp r11, #0
|
||||||
|
beq _neon_oo_o_loop_exit
|
||||||
|
_neon_oo_o_loop:
|
||||||
|
vld2.32 {q8}, [r6, :128]!
|
||||||
|
vld2.32 {q9}, [r5, :128]!
|
||||||
|
vld2.32 {q10}, [r4, :128]!
|
||||||
|
vld2.32 {q13}, [r3, :128]!
|
||||||
|
vadd.f32 q11, q9, q8
|
||||||
|
vsub.f32 q8, q9, q8
|
||||||
|
vsub.f32 q9, q13, q10
|
||||||
|
vadd.f32 q12, q13, q10
|
||||||
|
subs r11, r11, #1
|
||||||
|
vld2.32 {q10}, [r7, :128]!
|
||||||
|
vld2.32 {q13}, [r9, :128]!
|
||||||
|
vsub.f32 q2, q12, q11
|
||||||
|
vsub.f32 d7, d19, d16 @
|
||||||
|
vadd.f32 d3, d19, d16 @
|
||||||
|
vadd.f32 d6, d18, d17 @
|
||||||
|
vsub.f32 d2, d18, d17 @
|
||||||
|
vld2.32 {q9}, [r8, :128]!
|
||||||
|
vld2.32 {q8}, [r10, :128]!
|
||||||
|
vadd.f32 q0, q12, q11
|
||||||
|
vadd.f32 q11, q13, q8
|
||||||
|
vadd.f32 q12, q10, q9
|
||||||
|
vsub.f32 q8, q13, q8
|
||||||
|
vsub.f32 q9, q10, q9
|
||||||
|
vsub.f32 q6, q12, q11
|
||||||
|
vadd.f32 q4, q12, q11
|
||||||
|
vtrn.32 q0, q2
|
||||||
|
ldr r2, [r12], #4
|
||||||
|
vsub.f32 d15, d19, d16 @
|
||||||
|
ldr lr, [r12], #4
|
||||||
|
vadd.f32 d11, d19, d16 @
|
||||||
|
vadd.f32 d14, d18, d17 @
|
||||||
|
vsub.f32 d10, d18, d17 @
|
||||||
|
add r2, r0, r2, lsl #2
|
||||||
|
vtrn.32 q1, q3
|
||||||
|
add lr, r0, lr, lsl #2
|
||||||
|
vst2.32 {q0,q1}, [r2, :128]!
|
||||||
|
vst2.32 {q2,q3}, [lr, :128]!
|
||||||
|
vtrn.32 q4, q6
|
||||||
|
vtrn.32 q5, q7
|
||||||
|
vst2.32 {q4,q5}, [r2, :128]!
|
||||||
|
vst2.32 {q6,q7}, [lr, :128]!
|
||||||
|
bne _neon_oo_o_loop
|
||||||
|
_neon_oo_o_loop_exit:
|
||||||
|
|
||||||
|
ldr r11, [r1, #8]
|
||||||
|
vld1.32 {q8}, [r5, :128]!
|
||||||
|
vld1.32 {q10}, [r6, :128]!
|
||||||
|
vld2.32 {q11}, [r4, :128]!
|
||||||
|
vld2.32 {q13}, [r3, :128]!
|
||||||
|
vld2.32 {q15}, [r10, :128]!
|
||||||
|
vorr d25, d17, d17
|
||||||
|
vorr d24, d20, d20
|
||||||
|
vorr d20, d16, d16
|
||||||
|
vsub.f32 q9, q13, q11
|
||||||
|
vadd.f32 q11, q13, q11
|
||||||
|
ldr r2, [r12], #4
|
||||||
|
vtrn.32 d24, d25
|
||||||
|
ldr lr, [r12], #4
|
||||||
|
vtrn.32 d20, d21
|
||||||
|
add r2, r0, r2, lsl #2
|
||||||
|
vsub.f32 q8, q10, q12
|
||||||
|
add lr, r0, lr, lsl #2
|
||||||
|
vadd.f32 q10, q10, q12
|
||||||
|
vadd.f32 q0, q11, q10
|
||||||
|
vadd.f32 d25, d19, d16 @
|
||||||
|
vsub.f32 d27, d19, d16 @
|
||||||
|
vsub.f32 q1, q11, q10
|
||||||
|
vsub.f32 d24, d18, d17 @
|
||||||
|
vadd.f32 d26, d18, d17 @
|
||||||
|
vtrn.32 q0, q12
|
||||||
|
vtrn.32 q1, q13
|
||||||
|
vld1.32 {d24, d25}, [r11, :128]
|
||||||
|
vswp d1, d2
|
||||||
|
vst1.32 {q0, q1}, [r2, :128]!
|
||||||
|
vld2.32 {q0}, [r9, :128]!
|
||||||
|
vadd.f32 q1, q0, q15
|
||||||
|
vld2.32 {q13}, [r8, :128]!
|
||||||
|
vld2.32 {q14}, [r7, :128]!
|
||||||
|
vsub.f32 q15, q0, q15
|
||||||
|
vsub.f32 q0, q14, q13
|
||||||
|
vadd.f32 q3, q14, q13
|
||||||
|
vadd.f32 q2, q3, q1
|
||||||
|
vadd.f32 d29, d1, d30 @
|
||||||
|
vsub.f32 d27, d1, d30 @
|
||||||
|
vsub.f32 q3, q3, q1
|
||||||
|
vsub.f32 d28, d0, d31 @
|
||||||
|
vadd.f32 d26, d0, d31 @
|
||||||
|
vtrn.32 q2, q14
|
||||||
|
vtrn.32 q3, q13
|
||||||
|
vswp d5, d6
|
||||||
|
vst1.32 {q2, q3}, [r2, :128]!
|
||||||
|
vtrn.32 q11, q9
|
||||||
|
vtrn.32 q10, q8
|
||||||
|
vmul.f32 d20, d18, d25
|
||||||
|
vmul.f32 d22, d19, d24
|
||||||
|
vmul.f32 d21, d19, d25
|
||||||
|
vmul.f32 d18, d18, d24
|
||||||
|
vmul.f32 d19, d16, d25
|
||||||
|
vmul.f32 d30, d17, d24
|
||||||
|
vmul.f32 d23, d16, d24
|
||||||
|
vmul.f32 d24, d17, d25
|
||||||
|
vadd.f32 d17, d22, d20
|
||||||
|
vsub.f32 d16, d18, d21
|
||||||
|
vsub.f32 d21, d30, d19
|
||||||
|
vadd.f32 d20, d24, d23
|
||||||
|
vadd.f32 q9, q8, q10
|
||||||
|
vsub.f32 q8, q8, q10
|
||||||
|
vadd.f32 q4, q14, q9
|
||||||
|
vsub.f32 q6, q14, q9
|
||||||
|
vadd.f32 d11, d27, d16 @
|
||||||
|
vsub.f32 d15, d27, d16 @
|
||||||
|
vsub.f32 d10, d26, d17 @
|
||||||
|
vadd.f32 d14, d26, d17 @
|
||||||
|
vswp d9, d10
|
||||||
|
vswp d13, d14
|
||||||
|
vstmia lr!, {q4-q7}
|
||||||
|
|
||||||
|
|
||||||
|
add r2, r3, #0
|
||||||
|
add r3, r7, #0
|
||||||
|
add r7, r2, #0
|
||||||
|
add r2, r4, #0
|
||||||
|
add r4, r8, #0
|
||||||
|
add r8, r2, #0
|
||||||
|
add r2, r5, #0
|
||||||
|
add r5, r9, #0
|
||||||
|
add r9, r2, #0
|
||||||
|
add r2, r6, #0
|
||||||
|
add r6, r10, #0
|
||||||
|
add r10, r2, #0
|
||||||
|
add r2, r9, #0
|
||||||
|
add r9, r10, #0
|
||||||
|
add r10, r2, #0
|
||||||
|
ldr r2, [r1, #16]
|
||||||
|
ldr r11, [r1, #32] @ this is p->i1
|
||||||
|
cmp r11, #0
|
||||||
|
beq _neon_ee_o_loop2_exit
|
||||||
|
|
||||||
|
vld1.32 {d16, d17}, [r2, :128]
|
||||||
|
_neon_ee_o_loop2:
|
||||||
|
vld2.32 {q15}, [r10, :128]!
|
||||||
|
vld2.32 {q13}, [r8, :128]!
|
||||||
|
vld2.32 {q14}, [r7, :128]!
|
||||||
|
vld2.32 {q9}, [r4, :128]!
|
||||||
|
vld2.32 {q10}, [r3, :128]!
|
||||||
|
vld2.32 {q11}, [r6, :128]!
|
||||||
|
vld2.32 {q12}, [r5, :128]!
|
||||||
|
vsub.f32 q1, q14, q13
|
||||||
|
vld2.32 {q0}, [r9, :128]!
|
||||||
|
subs r11, r11, #1
|
||||||
|
vsub.f32 q2, q0, q15
|
||||||
|
vadd.f32 q0, q0, q15
|
||||||
|
vmul.f32 d10, d2, d17
|
||||||
|
vmul.f32 d11, d3, d16
|
||||||
|
vmul.f32 d12, d3, d17
|
||||||
|
vmul.f32 d6, d4, d17
|
||||||
|
vmul.f32 d7, d5, d16
|
||||||
|
vmul.f32 d8, d4, d16
|
||||||
|
vmul.f32 d9, d5, d17
|
||||||
|
vmul.f32 d13, d2, d16
|
||||||
|
vsub.f32 d7, d7, d6
|
||||||
|
vadd.f32 d11, d11, d10
|
||||||
|
vsub.f32 q1, q12, q11
|
||||||
|
vsub.f32 q2, q10, q9
|
||||||
|
vadd.f32 d6, d9, d8
|
||||||
|
vadd.f32 q4, q14, q13
|
||||||
|
vadd.f32 q11, q12, q11
|
||||||
|
vadd.f32 q12, q10, q9
|
||||||
|
vsub.f32 d10, d13, d12
|
||||||
|
vsub.f32 q7, q4, q0
|
||||||
|
vsub.f32 q9, q12, q11
|
||||||
|
vsub.f32 q13, q5, q3
|
||||||
|
vadd.f32 d29, d5, d2 @
|
||||||
|
vadd.f32 q5, q5, q3
|
||||||
|
vadd.f32 q10, q4, q0
|
||||||
|
vadd.f32 q11, q12, q11
|
||||||
|
vsub.f32 d31, d5, d2 @
|
||||||
|
vsub.f32 d28, d4, d3 @
|
||||||
|
vadd.f32 d30, d4, d3 @
|
||||||
|
vadd.f32 d5, d19, d14 @
|
||||||
|
vadd.f32 d7, d31, d26 @
|
||||||
|
vadd.f32 q1, q14, q5
|
||||||
|
vadd.f32 q0, q11, q10
|
||||||
|
vsub.f32 d6, d30, d27 @
|
||||||
|
vsub.f32 d4, d18, d15 @
|
||||||
|
vsub.f32 d13, d19, d14 @
|
||||||
|
vadd.f32 d12, d18, d15 @
|
||||||
|
vsub.f32 d15, d31, d26 @
|
||||||
|
ldr r2, [r12], #4
|
||||||
|
vtrn.32 q1, q3
|
||||||
|
ldr lr, [r12], #4
|
||||||
|
vtrn.32 q0, q2
|
||||||
|
add r2, r0, r2, lsl #2
|
||||||
|
vsub.f32 q4, q11, q10
|
||||||
|
add lr, r0, lr, lsl #2
|
||||||
|
vsub.f32 q5, q14, q5
|
||||||
|
vadd.f32 d14, d30, d27 @
|
||||||
|
vst2.32 {q0,q1}, [r2, :128]!
|
||||||
|
vst2.32 {q2,q3}, [lr, :128]!
|
||||||
|
vtrn.32 q4, q6
|
||||||
|
vtrn.32 q5, q7
|
||||||
|
vst2.32 {q4,q5}, [r2, :128]!
|
||||||
|
vst2.32 {q6,q7}, [lr, :128]!
|
||||||
|
bne _neon_ee_o_loop2
|
||||||
|
_neon_ee_o_loop2_exit:
|
||||||
|
|
||||||
|
vldmia sp!, {d8-d15}
|
||||||
|
pop {r4, r5, r6, r7, r8, r9, r10, r11, pc}
|
||||||
|
|
||||||
|
.align 4
|
||||||
|
#ifdef __APPLE__
|
||||||
|
.globl _neon_static_x4_i
|
||||||
|
_neon_static_x4_i:
|
||||||
|
#else
|
||||||
|
.globl neon_static_x4_i
|
||||||
|
neon_static_x4_i:
|
||||||
|
#endif
|
||||||
|
@ add r3, r0, #0
|
||||||
|
push {r4, r5, r6, lr}
|
||||||
|
vstmdb sp!, {d8-d15}
|
||||||
|
|
||||||
|
vld1.32 {q8,q9}, [r0, :128]
|
||||||
|
add r4, r0, r1, lsl #1
|
||||||
|
vld1.32 {q10,q11}, [r4, :128]
|
||||||
|
add r5, r0, r1, lsl #2
|
||||||
|
vld1.32 {q12,q13}, [r5, :128]
|
||||||
|
add r6, r4, r1, lsl #2
|
||||||
|
vld1.32 {q14,q15}, [r6, :128]
|
||||||
|
vld1.32 {q2,q3}, [r2, :128]
|
||||||
|
|
||||||
|
vmul.f32 q0, q13, q3
|
||||||
|
vmul.f32 q5, q12, q2
|
||||||
|
vmul.f32 q1, q14, q2
|
||||||
|
vmul.f32 q4, q14, q3
|
||||||
|
vmul.f32 q14, q12, q3
|
||||||
|
vmul.f32 q13, q13, q2
|
||||||
|
vmul.f32 q12, q15, q3
|
||||||
|
vmul.f32 q2, q15, q2
|
||||||
|
vsub.f32 q0, q5, q0
|
||||||
|
vadd.f32 q13, q13, q14
|
||||||
|
vadd.f32 q12, q12, q1
|
||||||
|
vsub.f32 q1, q2, q4
|
||||||
|
vadd.f32 q15, q0, q12
|
||||||
|
vsub.f32 q12, q0, q12
|
||||||
|
vadd.f32 q14, q13, q1
|
||||||
|
vsub.f32 q13, q13, q1
|
||||||
|
vadd.f32 q0, q8, q15
|
||||||
|
vadd.f32 q1, q9, q14
|
||||||
|
vsub.f32 q2, q10, q13 @
|
||||||
|
vsub.f32 q4, q8, q15
|
||||||
|
vadd.f32 q3, q11, q12 @
|
||||||
|
vst1.32 {q0,q1}, [r0, :128]
|
||||||
|
vsub.f32 q5, q9, q14
|
||||||
|
vadd.f32 q6, q10, q13 @
|
||||||
|
vsub.f32 q7, q11, q12 @
|
||||||
|
vst1.32 {q2,q3}, [r4, :128]
|
||||||
|
vst1.32 {q4,q5}, [r5, :128]
|
||||||
|
vst1.32 {q6,q7}, [r6, :128]
|
||||||
|
vldmia sp!, {d8-d15}
|
||||||
|
pop {r4, r5, r6, pc}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
.align 4
|
||||||
|
#ifdef __APPLE__
|
||||||
|
.globl _neon_static_x8_i
|
||||||
|
_neon_static_x8_i:
|
||||||
|
#else
|
||||||
|
.globl neon_static_x8_i
|
||||||
|
neon_static_x8_i:
|
||||||
|
#endif
|
||||||
|
push {r4, r5, r6, r7, r8, r9, r10, r11, lr}
|
||||||
|
vstmdb sp!, {d8-d15}
|
||||||
|
mov r11, #0
|
||||||
|
add r3, r0, #0 @ data0
|
||||||
|
add r5, r0, r1, lsl #1 @ data2
|
||||||
|
add r4, r0, r1 @ data1
|
||||||
|
add r7, r5, r1, lsl #1 @ data4
|
||||||
|
add r6, r5, r1 @ data3
|
||||||
|
add r9, r7, r1, lsl #1 @ data6
|
||||||
|
add r8, r7, r1 @ data5
|
||||||
|
add r10, r9, r1 @ data7
|
||||||
|
add r12, r2, #0 @ LUT
|
||||||
|
|
||||||
|
sub r11, r11, r1, lsr #5
|
||||||
|
neon_x8_loop:
|
||||||
|
vld1.32 {q2,q3}, [r12, :128]!
|
||||||
|
vld1.32 {q14,q15}, [r6, :128]
|
||||||
|
vld1.32 {q10,q11}, [r5, :128]
|
||||||
|
adds r11, r11, #1
|
||||||
|
vmul.f32 q12, q15, q2
|
||||||
|
vmul.f32 q8, q14, q3
|
||||||
|
vmul.f32 q13, q14, q2
|
||||||
|
vmul.f32 q9, q10, q3
|
||||||
|
vmul.f32 q1, q10, q2
|
||||||
|
vmul.f32 q0, q11, q2
|
||||||
|
vmul.f32 q14, q11, q3
|
||||||
|
vmul.f32 q15, q15, q3
|
||||||
|
vld1.32 {q2,q3}, [r12, :128]!
|
||||||
|
vsub.f32 q10, q12, q8
|
||||||
|
vadd.f32 q11, q0, q9
|
||||||
|
vadd.f32 q8, q15, q13
|
||||||
|
vld1.32 {q12,q13}, [r4, :128]
|
||||||
|
vsub.f32 q9, q1, q14
|
||||||
|
vsub.f32 q15, q11, q10
|
||||||
|
vsub.f32 q14, q9, q8
|
||||||
|
vsub.f32 q4, q12, q15 @
|
||||||
|
vadd.f32 q6, q12, q15 @
|
||||||
|
vadd.f32 q5, q13, q14 @
|
||||||
|
vsub.f32 q7, q13, q14 @
|
||||||
|
vld1.32 {q14,q15}, [r9, :128]
|
||||||
|
vld1.32 {q12,q13}, [r7, :128]
|
||||||
|
vmul.f32 q1, q14, q2
|
||||||
|
vmul.f32 q0, q14, q3
|
||||||
|
vst1.32 {q4,q5}, [r4, :128]
|
||||||
|
vmul.f32 q14, q15, q3
|
||||||
|
vmul.f32 q4, q15, q2
|
||||||
|
vadd.f32 q15, q9, q8
|
||||||
|
vst1.32 {q6,q7}, [r6, :128]
|
||||||
|
vmul.f32 q8, q12, q3
|
||||||
|
vmul.f32 q5, q13, q3
|
||||||
|
vmul.f32 q12, q12, q2
|
||||||
|
vmul.f32 q9, q13, q2
|
||||||
|
vadd.f32 q14, q14, q1
|
||||||
|
vsub.f32 q13, q4, q0
|
||||||
|
vadd.f32 q0, q9, q8
|
||||||
|
vld1.32 {q8,q9}, [r3, :128]
|
||||||
|
vadd.f32 q1, q11, q10
|
||||||
|
vsub.f32 q12, q12, q5
|
||||||
|
vadd.f32 q11, q8, q15
|
||||||
|
vsub.f32 q8, q8, q15
|
||||||
|
vadd.f32 q2, q12, q14
|
||||||
|
vsub.f32 q10, q0, q13
|
||||||
|
vadd.f32 q15, q0, q13
|
||||||
|
vadd.f32 q13, q9, q1
|
||||||
|
vsub.f32 q9, q9, q1
|
||||||
|
vsub.f32 q12, q12, q14
|
||||||
|
vadd.f32 q0, q11, q2
|
||||||
|
vadd.f32 q1, q13, q15
|
||||||
|
vsub.f32 q4, q11, q2
|
||||||
|
vsub.f32 q2, q8, q10 @
|
||||||
|
vadd.f32 q3, q9, q12 @
|
||||||
|
vst1.32 {q0,q1}, [r3, :128]!
|
||||||
|
vsub.f32 q5, q13, q15
|
||||||
|
vld1.32 {q14,q15}, [r10, :128]
|
||||||
|
vsub.f32 q7, q9, q12 @
|
||||||
|
vld1.32 {q12,q13}, [r8, :128]
|
||||||
|
vst1.32 {q2,q3}, [r5, :128]!
|
||||||
|
vld1.32 {q2,q3}, [r12, :128]!
|
||||||
|
vadd.f32 q6, q8, q10 @
|
||||||
|
vmul.f32 q8, q14, q2
|
||||||
|
vst1.32 {q4,q5}, [r7, :128]!
|
||||||
|
vmul.f32 q10, q15, q3
|
||||||
|
vmul.f32 q9, q13, q3
|
||||||
|
vmul.f32 q11, q12, q2
|
||||||
|
vmul.f32 q14, q14, q3
|
||||||
|
vst1.32 {q6,q7}, [r9, :128]!
|
||||||
|
vmul.f32 q15, q15, q2
|
||||||
|
vmul.f32 q12, q12, q3
|
||||||
|
vmul.f32 q13, q13, q2
|
||||||
|
vadd.f32 q10, q10, q8
|
||||||
|
vsub.f32 q11, q11, q9
|
||||||
|
vld1.32 {q8,q9}, [r4, :128]
|
||||||
|
vsub.f32 q14, q15, q14
|
||||||
|
vadd.f32 q15, q13, q12
|
||||||
|
vadd.f32 q13, q11, q10
|
||||||
|
vadd.f32 q12, q15, q14
|
||||||
|
vsub.f32 q15, q15, q14
|
||||||
|
vsub.f32 q14, q11, q10
|
||||||
|
vld1.32 {q10,q11}, [r6, :128]
|
||||||
|
vadd.f32 q0, q8, q13
|
||||||
|
vadd.f32 q1, q9, q12
|
||||||
|
vsub.f32 q2, q10, q15 @
|
||||||
|
vadd.f32 q3, q11, q14 @
|
||||||
|
vsub.f32 q4, q8, q13
|
||||||
|
vst1.32 {q0,q1}, [r4, :128]!
|
||||||
|
vsub.f32 q5, q9, q12
|
||||||
|
vadd.f32 q6, q10, q15 @
|
||||||
|
vst1.32 {q2,q3}, [r6, :128]!
|
||||||
|
vsub.f32 q7, q11, q14 @
|
||||||
|
vst1.32 {q4,q5}, [r8, :128]!
|
||||||
|
vst1.32 {q6,q7}, [r10, :128]!
|
||||||
|
bne neon_x8_loop
|
||||||
|
|
||||||
|
vldmia sp!, {d8-d15}
|
||||||
|
pop {r4, r5, r6, r7, r8, r9, r10, r11, pc}
|
||||||
|
|
||||||
|
.align 4
|
||||||
|
#ifdef __APPLE__
|
||||||
|
.globl _neon_static_x8_t_i
|
||||||
|
_neon_static_x8_t_i:
|
||||||
|
#else
|
||||||
|
.globl neon_static_x8_t_i
|
||||||
|
neon_static_x8_t_i:
|
||||||
|
#endif
|
||||||
|
push {r4, r5, r6, r7, r8, r9, r10, r11, lr}
|
||||||
|
vstmdb sp!, {d8-d15}
|
||||||
|
mov r11, #0
|
||||||
|
add r3, r0, #0 @ data0
|
||||||
|
add r5, r0, r1, lsl #1 @ data2
|
||||||
|
add r4, r0, r1 @ data1
|
||||||
|
add r7, r5, r1, lsl #1 @ data4
|
||||||
|
add r6, r5, r1 @ data3
|
||||||
|
add r9, r7, r1, lsl #1 @ data6
|
||||||
|
add r8, r7, r1 @ data5
|
||||||
|
add r10, r9, r1 @ data7
|
||||||
|
add r12, r2, #0 @ LUT
|
||||||
|
|
||||||
|
sub r11, r11, r1, lsr #5
|
||||||
|
neon_x8_t_loop:
|
||||||
|
vld1.32 {q2,q3}, [r12, :128]!
|
||||||
|
vld1.32 {q14,q15}, [r6, :128]
|
||||||
|
vld1.32 {q10,q11}, [r5, :128]
|
||||||
|
adds r11, r11, #1
|
||||||
|
vmul.f32 q12, q15, q2
|
||||||
|
vmul.f32 q8, q14, q3
|
||||||
|
vmul.f32 q13, q14, q2
|
||||||
|
vmul.f32 q9, q10, q3
|
||||||
|
vmul.f32 q1, q10, q2
|
||||||
|
vmul.f32 q0, q11, q2
|
||||||
|
vmul.f32 q14, q11, q3
|
||||||
|
vmul.f32 q15, q15, q3
|
||||||
|
vld1.32 {q2,q3}, [r12, :128]!
|
||||||
|
vsub.f32 q10, q12, q8
|
||||||
|
vadd.f32 q11, q0, q9
|
||||||
|
vadd.f32 q8, q15, q13
|
||||||
|
vld1.32 {q12,q13}, [r4, :128]
|
||||||
|
vsub.f32 q9, q1, q14
|
||||||
|
vsub.f32 q15, q11, q10
|
||||||
|
vsub.f32 q14, q9, q8
|
||||||
|
vsub.f32 q4, q12, q15 @
|
||||||
|
vadd.f32 q6, q12, q15 @
|
||||||
|
vadd.f32 q5, q13, q14 @
|
||||||
|
vsub.f32 q7, q13, q14 @
|
||||||
|
vld1.32 {q14,q15}, [r9, :128]
|
||||||
|
vld1.32 {q12,q13}, [r7, :128]
|
||||||
|
vmul.f32 q1, q14, q2
|
||||||
|
vmul.f32 q0, q14, q3
|
||||||
|
vst1.32 {q4,q5}, [r4, :128]
|
||||||
|
vmul.f32 q14, q15, q3
|
||||||
|
vmul.f32 q4, q15, q2
|
||||||
|
vadd.f32 q15, q9, q8
|
||||||
|
vst1.32 {q6,q7}, [r6, :128]
|
||||||
|
vmul.f32 q8, q12, q3
|
||||||
|
vmul.f32 q5, q13, q3
|
||||||
|
vmul.f32 q12, q12, q2
|
||||||
|
vmul.f32 q9, q13, q2
|
||||||
|
vadd.f32 q14, q14, q1
|
||||||
|
vsub.f32 q13, q4, q0
|
||||||
|
vadd.f32 q0, q9, q8
|
||||||
|
vld1.32 {q8,q9}, [r3, :128]
|
||||||
|
vadd.f32 q1, q11, q10
|
||||||
|
vsub.f32 q12, q12, q5
|
||||||
|
vadd.f32 q11, q8, q15
|
||||||
|
vsub.f32 q8, q8, q15
|
||||||
|
vadd.f32 q2, q12, q14
|
||||||
|
vsub.f32 q10, q0, q13
|
||||||
|
vadd.f32 q15, q0, q13
|
||||||
|
vadd.f32 q13, q9, q1
|
||||||
|
vsub.f32 q9, q9, q1
|
||||||
|
vsub.f32 q12, q12, q14
|
||||||
|
vadd.f32 q0, q11, q2
|
||||||
|
vadd.f32 q1, q13, q15
|
||||||
|
vsub.f32 q4, q11, q2
|
||||||
|
vsub.f32 q2, q8, q10 @
|
||||||
|
vadd.f32 q3, q9, q12 @
|
||||||
|
vst2.32 {q0,q1}, [r3, :128]!
|
||||||
|
vsub.f32 q5, q13, q15
|
||||||
|
vld1.32 {q14,q15}, [r10, :128]
|
||||||
|
vsub.f32 q7, q9, q12 @
|
||||||
|
vld1.32 {q12,q13}, [r8, :128]
|
||||||
|
vst2.32 {q2,q3}, [r5, :128]!
|
||||||
|
vld1.32 {q2,q3}, [r12, :128]!
|
||||||
|
vadd.f32 q6, q8, q10 @
|
||||||
|
vmul.f32 q8, q14, q2
|
||||||
|
vst2.32 {q4,q5}, [r7, :128]!
|
||||||
|
vmul.f32 q10, q15, q3
|
||||||
|
vmul.f32 q9, q13, q3
|
||||||
|
vmul.f32 q11, q12, q2
|
||||||
|
vmul.f32 q14, q14, q3
|
||||||
|
vst2.32 {q6,q7}, [r9, :128]!
|
||||||
|
vmul.f32 q15, q15, q2
|
||||||
|
vmul.f32 q12, q12, q3
|
||||||
|
vmul.f32 q13, q13, q2
|
||||||
|
vadd.f32 q10, q10, q8
|
||||||
|
vsub.f32 q11, q11, q9
|
||||||
|
vld1.32 {q8,q9}, [r4, :128]
|
||||||
|
vsub.f32 q14, q15, q14
|
||||||
|
vadd.f32 q15, q13, q12
|
||||||
|
vadd.f32 q13, q11, q10
|
||||||
|
vadd.f32 q12, q15, q14
|
||||||
|
vsub.f32 q15, q15, q14
|
||||||
|
vsub.f32 q14, q11, q10
|
||||||
|
vld1.32 {q10,q11}, [r6, :128]
|
||||||
|
vadd.f32 q0, q8, q13
|
||||||
|
vadd.f32 q1, q9, q12
|
||||||
|
vsub.f32 q2, q10, q15 @
|
||||||
|
vadd.f32 q3, q11, q14 @
|
||||||
|
vsub.f32 q4, q8, q13
|
||||||
|
vst2.32 {q0,q1}, [r4, :128]!
|
||||||
|
vsub.f32 q5, q9, q12
|
||||||
|
vadd.f32 q6, q10, q15 @
|
||||||
|
vst2.32 {q2,q3}, [r6, :128]!
|
||||||
|
vsub.f32 q7, q11, q14 @
|
||||||
|
vst2.32 {q4,q5}, [r8, :128]!
|
||||||
|
vst2.32 {q6,q7}, [r10, :128]!
|
||||||
|
bne neon_x8_t_loop
|
||||||
|
|
||||||
|
vldmia sp!, {d8-d15}
|
||||||
|
pop {r4, r5, r6, r7, r8, r9, r10, r11, pc}
|
||||||
|
|
||||||
|
|
@ -0,0 +1,208 @@
|
|||||||
|
/*
|
||||||
|
|
||||||
|
This file is part of FFTS -- The Fastest Fourier Transform in the South
|
||||||
|
|
||||||
|
Copyright (c) 2012, Anthony M. Blake <amb@anthonix.com>
|
||||||
|
Copyright (c) 2012, The University of Waikato
|
||||||
|
|
||||||
|
All rights reserved.
|
||||||
|
|
||||||
|
Redistribution and use in source and binary forms, with or without
|
||||||
|
modification, are permitted provided that the following conditions are met:
|
||||||
|
* Redistributions of source code must retain the above copyright
|
||||||
|
notice, this list of conditions and the following disclaimer.
|
||||||
|
* Redistributions in binary form must reproduce the above copyright
|
||||||
|
notice, this list of conditions and the following disclaimer in the
|
||||||
|
documentation and/or other materials provided with the distribution.
|
||||||
|
* Neither the name of the organization nor the
|
||||||
|
names of its contributors may be used to endorse or promote products
|
||||||
|
derived from this software without specific prior written permission.
|
||||||
|
|
||||||
|
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
||||||
|
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||||
|
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||||
|
DISCLAIMED. IN NO EVENT SHALL ANTHONY M. BLAKE BE LIABLE FOR ANY
|
||||||
|
DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
|
||||||
|
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
||||||
|
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
|
||||||
|
ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||||
|
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||||
|
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include "patterns.h"
|
||||||
|
|
||||||
|
void permute_addr(int N, int offset, int stride, int *d) {
|
||||||
|
int i, a[4] = {0,2,1,3};
|
||||||
|
for(i=0;i<4;i++) {
|
||||||
|
d[i] = offset + (a[i] << stride);
|
||||||
|
if(d[i] < 0) d[i] += N;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void ffts_hardcodedleaf_is_rec(ptrdiff_t **is, int bigN, int N, int poffset, int offset, int stride, int even, int VL) {
|
||||||
|
|
||||||
|
if(N > 4) {
|
||||||
|
ffts_hardcodedleaf_is_rec(is, bigN, N/2, poffset, offset, stride + 1, even, VL);
|
||||||
|
if(N/4 >= 4) ffts_hardcodedleaf_is_rec(is, bigN, N/4, poffset+(1<<stride),offset+(N/2), stride + 2, 0, VL);
|
||||||
|
if(N/4 >= 4) ffts_hardcodedleaf_is_rec(is, bigN, N/4, poffset-(1<<stride),offset+(3*N/4), stride + 2, 0, VL);
|
||||||
|
else {
|
||||||
|
int temp = poffset+(1<<stride);
|
||||||
|
if(temp < 0) temp += bigN;
|
||||||
|
temp *= 2;
|
||||||
|
|
||||||
|
if(!(temp % (VL*2))) {
|
||||||
|
(*is)[0] = poffset+(1<<stride);
|
||||||
|
(*is)[1] = poffset+(1<<stride)+(1<<(stride+2));
|
||||||
|
(*is)[2] = poffset-(1<<stride);
|
||||||
|
(*is)[3] = poffset-(1<<stride)+(1<<(stride+2));
|
||||||
|
int i;
|
||||||
|
for(i=0;i<4;i++) if((*is)[i] < 0) (*is)[i] += bigN;
|
||||||
|
for(i=0;i<4;i++) (*is)[i] *= 2;
|
||||||
|
*is += 4;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}else if(N == 4) {
|
||||||
|
int perm[4];
|
||||||
|
permute_addr(bigN, poffset, stride, perm);
|
||||||
|
if(!((perm[0]*2) % (VL*2))) {
|
||||||
|
int i;
|
||||||
|
for(i=0;i<4;i++) {
|
||||||
|
(*is)[i] = perm[i] * 2;
|
||||||
|
}
|
||||||
|
*is += 4;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void ffts_init_is(ffts_plan_t *p, int N, int leafN, int VL) {
|
||||||
|
int i, i0 = N/leafN/3+1, i1=N/leafN/3, i2 = N/leafN/3;
|
||||||
|
int stride = log(N/leafN)/log(2);
|
||||||
|
|
||||||
|
p->is = malloc(N/VL * sizeof(ptrdiff_t));
|
||||||
|
|
||||||
|
ptrdiff_t *is = p->is;
|
||||||
|
|
||||||
|
if((N/leafN) % 3 > 1) i1++;
|
||||||
|
|
||||||
|
for(i=0;i<i0;i++) ffts_hardcodedleaf_is_rec(&is, N, leafN, i, 0, stride, 1, VL);
|
||||||
|
for(i=i0;i<i0+i1;i++) {
|
||||||
|
ffts_hardcodedleaf_is_rec(&is, N, leafN/2, i, 0, stride+1, 1, VL);
|
||||||
|
ffts_hardcodedleaf_is_rec(&is, N, leafN/2, i-(1<<stride), 0, stride+1, 1, VL);
|
||||||
|
}
|
||||||
|
for(i=0-i2;i<0;i++) ffts_hardcodedleaf_is_rec(&is, N, leafN, i, 0, stride, 1, VL);
|
||||||
|
|
||||||
|
|
||||||
|
//for(i=0;i<N/VL;i++) {
|
||||||
|
// printf("%td ", p->is[i]);
|
||||||
|
// if(i % 16 == 15) printf("\n");
|
||||||
|
//}
|
||||||
|
|
||||||
|
p->i0 = i0; p->i1 = i1;
|
||||||
|
}
|
||||||
|
/**
|
||||||
|
*
|
||||||
|
*
|
||||||
|
*/
|
||||||
|
void ffts_elaborate_offsets(ptrdiff_t *offsets, int leafN, int N, int ioffset, int ooffset, int stride, int even) {
|
||||||
|
if((even && N == leafN) || (!even && N <= leafN)) {
|
||||||
|
offsets[2*(ooffset/leafN)] = ioffset*2;
|
||||||
|
offsets[2*(ooffset/leafN)+1] = ooffset;
|
||||||
|
}else if(N > 4) {
|
||||||
|
ffts_elaborate_offsets(offsets, leafN, N/2, ioffset, ooffset, stride+1, even);
|
||||||
|
ffts_elaborate_offsets(offsets, leafN, N/4, ioffset+(1<<stride), ooffset+N/2, stride+2, 0);
|
||||||
|
if(N/4 >= leafN)
|
||||||
|
ffts_elaborate_offsets(offsets, leafN, N/4, ioffset-(1<<stride), ooffset+3*N/4, stride+2, 0);
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
int compare_offsets(const void *a, const void *b) {
|
||||||
|
return ((ptrdiff_t *)a)[0] - ((ptrdiff_t *)b)[0];
|
||||||
|
}
|
||||||
|
|
||||||
|
uint32_t reverse_bits(uint32_t a, int n) {
|
||||||
|
uint32_t x = 0;
|
||||||
|
|
||||||
|
int i;
|
||||||
|
for(i=0;i<n;i++) {
|
||||||
|
if(a & (1 << i)) x |= 1 << (n-i-1);
|
||||||
|
}
|
||||||
|
return x;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
void ffts_init_offsets(ffts_plan_t *p, int N, int leafN) {
|
||||||
|
|
||||||
|
ptrdiff_t *offsets = malloc(2 * N/leafN * sizeof(ptrdiff_t));
|
||||||
|
|
||||||
|
ffts_elaborate_offsets(offsets, leafN, N, 0, 0, 1, 1);
|
||||||
|
|
||||||
|
size_t i;
|
||||||
|
for(i=0;i<2*N/leafN;i+=2) {
|
||||||
|
if(offsets[i] < 0) offsets[i] = N + offsets[i];
|
||||||
|
}
|
||||||
|
|
||||||
|
qsort(offsets, N/leafN, 2 * sizeof(ptrdiff_t), compare_offsets);
|
||||||
|
//elaborate_is(p, N, 0, 0, 1);
|
||||||
|
p->offsets = malloc(N/leafN * sizeof(ptrdiff_t));
|
||||||
|
for(i=0;i<N/leafN;i++) {
|
||||||
|
p->offsets[i] = offsets[i*2+1]*2;
|
||||||
|
}
|
||||||
|
//for(i=0;i<N/leafN;i++) {
|
||||||
|
// printf("%4d %4d\n", p->offsets[i], reverse_bits(p->offsets[i], __builtin_ctzl(2*N)));
|
||||||
|
//}
|
||||||
|
free(offsets);
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
int tree_count(int N, int leafN, int offset) {
|
||||||
|
|
||||||
|
if(N <= leafN) return 0;
|
||||||
|
int count = 0;
|
||||||
|
count += tree_count(N/4, leafN, offset);
|
||||||
|
count += tree_count(N/8, leafN, offset + N/4);
|
||||||
|
count += tree_count(N/8, leafN, offset + N/4 + N/8);
|
||||||
|
count += tree_count(N/4, leafN, offset + N/2);
|
||||||
|
count += tree_count(N/4, leafN, offset + 3*N/4);
|
||||||
|
|
||||||
|
return 1 + count;
|
||||||
|
}
|
||||||
|
|
||||||
|
void elaborate_tree(transform_index_t **p, int N, int leafN, int offset) {
|
||||||
|
|
||||||
|
if(N <= leafN) return;
|
||||||
|
elaborate_tree(p, N/4, leafN, offset);
|
||||||
|
elaborate_tree(p, N/8, leafN, offset + N/4);
|
||||||
|
elaborate_tree(p, N/8, leafN, offset + N/4 + N/8);
|
||||||
|
elaborate_tree(p, N/4, leafN, offset + N/2);
|
||||||
|
elaborate_tree(p, N/4, leafN, offset + 3*N/4);
|
||||||
|
|
||||||
|
(*p)[0] = N;
|
||||||
|
(*p)[1] = offset*2;
|
||||||
|
|
||||||
|
(*p)+=2;
|
||||||
|
}
|
||||||
|
|
||||||
|
void ffts_init_tree(ffts_plan_t *p, int N, int leafN) {
|
||||||
|
|
||||||
|
int count = tree_count(N, leafN, 0) + 1;
|
||||||
|
transform_index_t *ps = p->transforms = malloc(count * 2 * sizeof(transform_index_t));
|
||||||
|
|
||||||
|
//printf("count = %d\n", count);
|
||||||
|
|
||||||
|
elaborate_tree(&ps, N, leafN, 0);
|
||||||
|
#ifdef __ARM_NEON__
|
||||||
|
ps -= 2;
|
||||||
|
#endif
|
||||||
|
ps[0] = 0;
|
||||||
|
ps[1] = 0;
|
||||||
|
//int i;
|
||||||
|
//for(i=0;i<count;i++) {
|
||||||
|
// fprintf(stderr, "%lu %lu - %d\n", p->transforms[i*2], p->transforms[i*2+1],
|
||||||
|
// __builtin_ctzl(p->transforms[i*2]) - 5);
|
||||||
|
//}
|
||||||
|
|
||||||
|
}
|
||||||
|
*/
|
@ -0,0 +1,44 @@
|
|||||||
|
/*
|
||||||
|
|
||||||
|
This file is part of FFTS -- The Fastest Fourier Transform in the South
|
||||||
|
|
||||||
|
Copyright (c) 2012, Anthony M. Blake <amb@anthonix.com>
|
||||||
|
Copyright (c) 2012, The University of Waikato
|
||||||
|
|
||||||
|
All rights reserved.
|
||||||
|
|
||||||
|
Redistribution and use in source and binary forms, with or without
|
||||||
|
modification, are permitted provided that the following conditions are met:
|
||||||
|
* Redistributions of source code must retain the above copyright
|
||||||
|
notice, this list of conditions and the following disclaimer.
|
||||||
|
* Redistributions in binary form must reproduce the above copyright
|
||||||
|
notice, this list of conditions and the following disclaimer in the
|
||||||
|
documentation and/or other materials provided with the distribution.
|
||||||
|
* Neither the name of the organization nor the
|
||||||
|
names of its contributors may be used to endorse or promote products
|
||||||
|
derived from this software without specific prior written permission.
|
||||||
|
|
||||||
|
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
||||||
|
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||||
|
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||||
|
DISCLAIMED. IN NO EVENT SHALL ANTHONY M. BLAKE BE LIABLE FOR ANY
|
||||||
|
DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
|
||||||
|
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
||||||
|
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
|
||||||
|
ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||||
|
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||||
|
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
|
||||||
|
*/
|
||||||
|
|
||||||
|
|
||||||
|
#ifndef __PATTERNS_H__
|
||||||
|
#define __PATTERNS_H__
|
||||||
|
|
||||||
|
#include "ffts.h"
|
||||||
|
|
||||||
|
void ffts_init_is(ffts_plan_t *p, int N, int leafN, int VL);
|
||||||
|
void ffts_init_offsets(ffts_plan_t *p, int N, int leafN);
|
||||||
|
//void ffts_init_tree(ffts_plan_t *p, int N, int leafN);
|
||||||
|
|
||||||
|
#endif
|
@ -0,0 +1,878 @@
|
|||||||
|
/*
|
||||||
|
|
||||||
|
This file is part of FFTS -- The Fastest Fourier Transform in the South
|
||||||
|
|
||||||
|
Copyright (c) 2012, Anthony M. Blake <amb@anthonix.com>
|
||||||
|
Copyright (c) 2012, The University of Waikato
|
||||||
|
|
||||||
|
All rights reserved.
|
||||||
|
|
||||||
|
Redistribution and use in source and binary forms, with or without
|
||||||
|
modification, are permitted provided that the following conditions are met:
|
||||||
|
* Redistributions of source code must retain the above copyright
|
||||||
|
notice, this list of conditions and the following disclaimer.
|
||||||
|
* Redistributions in binary form must reproduce the above copyright
|
||||||
|
notice, this list of conditions and the following disclaimer in the
|
||||||
|
documentation and/or other materials provided with the distribution.
|
||||||
|
* Neither the name of the organization nor the
|
||||||
|
names of its contributors may be used to endorse or promote products
|
||||||
|
derived from this software without specific prior written permission.
|
||||||
|
|
||||||
|
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
||||||
|
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||||
|
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||||
|
DISCLAIMED. IN NO EVENT SHALL ANTHONY M. BLAKE BE LIABLE FOR ANY
|
||||||
|
DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
|
||||||
|
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
||||||
|
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
|
||||||
|
ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||||
|
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||||
|
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
|
||||||
|
*/
|
||||||
|
|
||||||
|
|
||||||
|
.globl _neon_x4
|
||||||
|
.align 4
|
||||||
|
_neon_x4:
|
||||||
|
|
||||||
|
.globl _neon_x8
|
||||||
|
.align 4
|
||||||
|
_neon_x8:
|
||||||
|
|
||||||
|
.globl _neon_x8_t
|
||||||
|
.align 4
|
||||||
|
_neon_x8_t:
|
||||||
|
|
||||||
|
|
||||||
|
#ifdef __APPLE__
|
||||||
|
.globl _leaf_ee_init
|
||||||
|
_leaf_ee_init:
|
||||||
|
#else
|
||||||
|
.globl leaf_ee_init
|
||||||
|
leaf_ee_init:
|
||||||
|
#endif
|
||||||
|
#lea L_sse_constants(%rip), %r9
|
||||||
|
movq 0xe0(%rdi), %r9
|
||||||
|
xorl %eax, %eax
|
||||||
|
# eax is loop counter (init to 0)
|
||||||
|
# rcx is loop max count
|
||||||
|
# rsi is 'in' base pointer
|
||||||
|
# rdx is 'out' base pointer
|
||||||
|
# r8 is offsets pointer
|
||||||
|
# r9 is constants pointer
|
||||||
|
# scratch: rax r11 r12
|
||||||
|
# .align 4, 0x90
|
||||||
|
|
||||||
|
# _leaf_ee + 9 needs 16 byte alignment
|
||||||
|
#ifdef __APPLE__
|
||||||
|
.globl _leaf_ee
|
||||||
|
_leaf_ee:
|
||||||
|
#else
|
||||||
|
.globl leaf_ee
|
||||||
|
leaf_ee:
|
||||||
|
#endif
|
||||||
|
movaps 32(%r9), %xmm0 #83.5
|
||||||
|
movaps (%r9), %xmm8 #83.5
|
||||||
|
LEAF_EE_1:
|
||||||
|
LEAF_EE_const_0:
|
||||||
|
movaps 0xFECA(%rsi,%rax,4), %xmm7 #83.5
|
||||||
|
LEAF_EE_const_2:
|
||||||
|
movaps 0xFECA(%rsi,%rax,4), %xmm12 #83.5
|
||||||
|
movaps %xmm7, %xmm6 #83.5
|
||||||
|
LEAF_EE_const_3:
|
||||||
|
movaps 0xFECA(%rsi,%rax,4), %xmm10 #83.5
|
||||||
|
movaps %xmm12, %xmm11 #83.5
|
||||||
|
subps %xmm10, %xmm12 #83.5
|
||||||
|
addps %xmm10, %xmm11 #83.5
|
||||||
|
xorps %xmm8, %xmm12 #83.5
|
||||||
|
LEAF_EE_const_1:
|
||||||
|
movaps 0xFECA(%rsi,%rax,4), %xmm9 #83.5
|
||||||
|
LEAF_EE_const_4:
|
||||||
|
movaps 0xFECA(%rsi,%rax,4), %xmm10 #83.5
|
||||||
|
addps %xmm9, %xmm6 #83.5
|
||||||
|
subps %xmm9, %xmm7 #83.5
|
||||||
|
LEAF_EE_const_5:
|
||||||
|
movaps 0xFECA(%rsi,%rax,4), %xmm13 #83.5
|
||||||
|
movaps %xmm10, %xmm9 #83.5
|
||||||
|
LEAF_EE_const_6:
|
||||||
|
movaps 0xFECA(%rsi,%rax,4), %xmm3 #83.5
|
||||||
|
movaps %xmm6, %xmm5 #83.5
|
||||||
|
LEAF_EE_const_7:
|
||||||
|
movaps 0xFECA(%rsi,%rax,4), %xmm14 #83.5
|
||||||
|
movaps %xmm3, %xmm15 #83.5
|
||||||
|
shufps $177, %xmm12, %xmm12 #83.5
|
||||||
|
movaps %xmm7, %xmm4 #83.5
|
||||||
|
movslq (%r8, %rax, 4), %r11 #83.44
|
||||||
|
subps %xmm13, %xmm10 #83.5
|
||||||
|
subps %xmm14, %xmm3 #83.5
|
||||||
|
addps %xmm11, %xmm5 #83.5
|
||||||
|
subps %xmm11, %xmm6 #83.5
|
||||||
|
subps %xmm12, %xmm4 #83.5
|
||||||
|
addps %xmm12, %xmm7 #83.5
|
||||||
|
addps %xmm13, %xmm9 #83.5
|
||||||
|
addps %xmm14, %xmm15 #83.5
|
||||||
|
movaps 16(%r9), %xmm12 #83.5
|
||||||
|
movaps %xmm9, %xmm1 #83.5
|
||||||
|
movaps 16(%r9), %xmm11 #83.5
|
||||||
|
movaps %xmm5, %xmm2 #83.5
|
||||||
|
mulps %xmm10, %xmm12 #83.5
|
||||||
|
subps %xmm15, %xmm9 #83.5
|
||||||
|
addps %xmm15, %xmm1 #83.5
|
||||||
|
mulps %xmm3, %xmm11 #83.5
|
||||||
|
addps %xmm1, %xmm2 #83.5
|
||||||
|
subps %xmm1, %xmm5 #83.5
|
||||||
|
shufps $177, %xmm10, %xmm10 #83.5
|
||||||
|
xorps %xmm8, %xmm9 #83.5
|
||||||
|
shufps $177, %xmm3, %xmm3 #83.5
|
||||||
|
movaps %xmm6, %xmm1 #83.5
|
||||||
|
mulps %xmm0, %xmm10 #83.5
|
||||||
|
movaps %xmm4, %xmm13 #83.5
|
||||||
|
mulps %xmm0, %xmm3 #83.5
|
||||||
|
subps %xmm10, %xmm12 #83.5
|
||||||
|
addps %xmm3, %xmm11 #83.5
|
||||||
|
movaps %xmm12, %xmm3 #83.5
|
||||||
|
movaps %xmm7, %xmm14 #83.5
|
||||||
|
shufps $177, %xmm9, %xmm9 #83.5
|
||||||
|
subps %xmm11, %xmm12 #83.5
|
||||||
|
addps %xmm11, %xmm3 #83.5
|
||||||
|
subps %xmm9, %xmm1 #83.5
|
||||||
|
addps %xmm9, %xmm6 #83.5
|
||||||
|
addps %xmm3, %xmm4 #83.5
|
||||||
|
subps %xmm3, %xmm13 #83.5
|
||||||
|
xorps %xmm8, %xmm12 #83.5
|
||||||
|
movaps %xmm2, %xmm3 #83.5
|
||||||
|
shufps $177, %xmm12, %xmm12 #83.5
|
||||||
|
movaps %xmm6, %xmm9 #83.5
|
||||||
|
movslq 8(%r8, %rax, 4), %r12 #83.59
|
||||||
|
movlhps %xmm4, %xmm3 #83.5
|
||||||
|
addq $4, %rax
|
||||||
|
shufps $238, %xmm4, %xmm2 #83.5
|
||||||
|
movaps %xmm1, %xmm4 #83.5
|
||||||
|
#movntdq %xmm3, (%rdx,%r11,4) #83.5
|
||||||
|
subps %xmm12, %xmm7 #83.5
|
||||||
|
addps %xmm12, %xmm14 #83.5
|
||||||
|
movlhps %xmm7, %xmm4 #83.5
|
||||||
|
shufps $238, %xmm7, %xmm1 #83.5
|
||||||
|
movaps %xmm5, %xmm7 #83.5
|
||||||
|
movlhps %xmm13, %xmm7 #83.5
|
||||||
|
movlhps %xmm14, %xmm9 #83.5
|
||||||
|
shufps $238, %xmm13, %xmm5 #83.5
|
||||||
|
shufps $238, %xmm14, %xmm6 #83.5
|
||||||
|
movaps %xmm3, (%rdx,%r11,4) #83.5
|
||||||
|
movaps %xmm4, 16(%rdx,%r11,4) #83.5
|
||||||
|
movaps %xmm7, 32(%rdx,%r11,4) #83.5
|
||||||
|
movaps %xmm9, 48(%rdx,%r11,4) #83.5
|
||||||
|
movaps %xmm2, (%rdx,%r12,4) #83.5
|
||||||
|
movaps %xmm1, 16(%rdx,%r12,4) #83.5
|
||||||
|
movaps %xmm5, 32(%rdx,%r12,4) #83.5
|
||||||
|
movaps %xmm6, 48(%rdx,%r12,4) #83.5
|
||||||
|
cmpq %rcx, %rax
|
||||||
|
jne LEAF_EE_1
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
# _leaf_oo + 4 needs to be 16 byte aligned
|
||||||
|
#ifdef __APPLE__
|
||||||
|
.globl _leaf_oo
|
||||||
|
_leaf_oo:
|
||||||
|
#else
|
||||||
|
.globl leaf_oo
|
||||||
|
leaf_oo:
|
||||||
|
#endif
|
||||||
|
movaps (%r9), %xmm5 #92.7
|
||||||
|
LEAF_OO_1:
|
||||||
|
LEAF_OO_const_0:
|
||||||
|
movaps 0xFECA(%rsi,%rax,4), %xmm4 #93.5
|
||||||
|
movaps %xmm4, %xmm6 #93.5
|
||||||
|
LEAF_OO_const_1:
|
||||||
|
movaps 0xFECA(%rsi,%rax,4), %xmm7 #93.5
|
||||||
|
LEAF_OO_const_2:
|
||||||
|
movaps 0xFECA(%rsi,%rax,4), %xmm10 #93.5
|
||||||
|
addps %xmm7, %xmm6 #93.5
|
||||||
|
subps %xmm7, %xmm4 #93.5
|
||||||
|
LEAF_OO_const_3:
|
||||||
|
movaps 0xFECA(%rsi,%rax,4), %xmm8 #93.5
|
||||||
|
movaps %xmm10, %xmm9 #93.5
|
||||||
|
LEAF_OO_const_4:
|
||||||
|
movaps 0xFECA(%rsi,%rax,4), %xmm1 #93.5
|
||||||
|
movaps %xmm6, %xmm3 #93.5
|
||||||
|
LEAF_OO_const_5:
|
||||||
|
movaps 0xFECA(%rsi,%rax,4), %xmm11 #93.5
|
||||||
|
movaps %xmm1, %xmm2 #93.5
|
||||||
|
LEAF_OO_const_6:
|
||||||
|
movaps 0xFECA(%rsi,%rax,4), %xmm14 #93.5
|
||||||
|
movaps %xmm4, %xmm15 #93.5
|
||||||
|
LEAF_OO_const_7:
|
||||||
|
movaps 0xFECA(%rsi,%rax,4), %xmm12 #93.5
|
||||||
|
movaps %xmm14, %xmm13 #93.5
|
||||||
|
movslq (%r8, %rax, 4), %r11 #83.44
|
||||||
|
subps %xmm8, %xmm10 #93.5
|
||||||
|
addps %xmm8, %xmm9 #93.5
|
||||||
|
addps %xmm11, %xmm2 #93.5
|
||||||
|
subps %xmm12, %xmm14 #93.5
|
||||||
|
subps %xmm11, %xmm1 #93.5
|
||||||
|
addps %xmm12, %xmm13 #93.5
|
||||||
|
addps %xmm9, %xmm3 #93.5
|
||||||
|
subps %xmm9, %xmm6 #93.5
|
||||||
|
xorps %xmm5, %xmm10 #93.5
|
||||||
|
xorps %xmm5, %xmm14 #93.5
|
||||||
|
shufps $177, %xmm10, %xmm10 #93.5
|
||||||
|
movaps %xmm2, %xmm9 #93.5
|
||||||
|
shufps $177, %xmm14, %xmm14 #93.5
|
||||||
|
movaps %xmm6, %xmm7 #93.5
|
||||||
|
movslq 8(%r8, %rax, 4), %r12 #83.59
|
||||||
|
addq $4, %rax #92.18
|
||||||
|
addps %xmm10, %xmm4 #93.5
|
||||||
|
addps %xmm13, %xmm9 #93.5
|
||||||
|
subps %xmm13, %xmm2 #93.5
|
||||||
|
subps %xmm10, %xmm15 #93.5
|
||||||
|
movaps %xmm1, %xmm13 #93.5
|
||||||
|
movaps %xmm2, %xmm8 #93.5
|
||||||
|
movlhps %xmm4, %xmm7 #93.5
|
||||||
|
subps %xmm14, %xmm13 #93.5
|
||||||
|
addps %xmm14, %xmm1 #93.5
|
||||||
|
shufps $238, %xmm4, %xmm6 #93.5
|
||||||
|
movaps %xmm3, %xmm14 #93.5
|
||||||
|
movaps %xmm9, %xmm4 #93.5
|
||||||
|
movlhps %xmm15, %xmm14 #93.5
|
||||||
|
movlhps %xmm13, %xmm4 #93.5
|
||||||
|
movlhps %xmm1, %xmm8 #93.5
|
||||||
|
shufps $238, %xmm15, %xmm3 #93.5
|
||||||
|
shufps $238, %xmm13, %xmm9 #93.5
|
||||||
|
shufps $238, %xmm1, %xmm2 #93.5
|
||||||
|
movaps %xmm14, (%rdx,%r11,4) #93.5
|
||||||
|
movaps %xmm7, 16(%rdx,%r11,4) #93.5
|
||||||
|
movaps %xmm4, 32(%rdx,%r11,4) #93.5
|
||||||
|
movaps %xmm8, 48(%rdx,%r11,4) #93.5
|
||||||
|
movaps %xmm3, (%rdx,%r12,4) #93.5
|
||||||
|
movaps %xmm6, 16(%rdx,%r12,4) #93.5
|
||||||
|
movaps %xmm9, 32(%rdx,%r12,4) #93.5
|
||||||
|
movaps %xmm2, 48(%rdx,%r12,4) #93.5
|
||||||
|
cmpq %rcx, %rax
|
||||||
|
jne LEAF_OO_1 # Prob 95% #92.14
|
||||||
|
|
||||||
|
#ifdef __APPLE__
|
||||||
|
.globl _leaf_eo
|
||||||
|
_leaf_eo:
|
||||||
|
#else
|
||||||
|
.globl leaf_eo
|
||||||
|
leaf_eo:
|
||||||
|
#endif
|
||||||
|
LEAF_EO_const_0:
|
||||||
|
movaps 0xFECA(%rsi,%rax,4), %xmm9 #88.5
|
||||||
|
LEAF_EO_const_2:
|
||||||
|
movaps 0xFECA(%rsi,%rax,4), %xmm7 #88.5
|
||||||
|
movaps %xmm9, %xmm11 #88.5
|
||||||
|
LEAF_EO_const_3:
|
||||||
|
movaps 0xFECA(%rsi,%rax,4), %xmm5 #88.5
|
||||||
|
movaps %xmm7, %xmm6 #88.5
|
||||||
|
LEAF_EO_const_1:
|
||||||
|
movaps 0xFECA(%rsi,%rax,4), %xmm4 #88.5
|
||||||
|
subps %xmm5, %xmm7 #88.5
|
||||||
|
addps %xmm4, %xmm11 #88.5
|
||||||
|
subps %xmm4, %xmm9 #88.5
|
||||||
|
addps %xmm5, %xmm6 #88.5
|
||||||
|
movaps (%r9), %xmm3 #88.5
|
||||||
|
movaps %xmm11, %xmm10 #88.5
|
||||||
|
xorps %xmm3, %xmm7 #88.5
|
||||||
|
movaps %xmm9, %xmm8 #88.5
|
||||||
|
shufps $177, %xmm7, %xmm7 #88.5
|
||||||
|
addps %xmm6, %xmm10 #88.5
|
||||||
|
subps %xmm6, %xmm11 #88.5
|
||||||
|
subps %xmm7, %xmm8 #88.5
|
||||||
|
addps %xmm7, %xmm9 #88.5
|
||||||
|
movslq 8(%r8, %rax, 4), %r12 #83.59
|
||||||
|
movaps %xmm10, %xmm2 #88.5
|
||||||
|
movslq (%r8, %rax, 4), %r11 #83.44
|
||||||
|
movaps %xmm11, %xmm1 #88.5
|
||||||
|
shufps $238, %xmm8, %xmm10 #88.5
|
||||||
|
shufps $238, %xmm9, %xmm11 #88.5
|
||||||
|
movaps %xmm10, (%rdx,%r12,4) #88.5
|
||||||
|
movaps %xmm11, 16(%rdx,%r12,4) #88.5
|
||||||
|
LEAF_EO_const_4:
|
||||||
|
movaps 0xFECA(%rsi,%rax,4), %xmm15 #88.5
|
||||||
|
LEAF_EO_const_5:
|
||||||
|
movaps 0xFECA(%rsi,%rax,4), %xmm12 #88.5
|
||||||
|
movaps %xmm15, %xmm14 #88.5
|
||||||
|
LEAF_EO_const_6:
|
||||||
|
movaps 0xFECA(%rsi,%rax,4), %xmm4 #88.5
|
||||||
|
addps %xmm12, %xmm14 #88.5
|
||||||
|
subps %xmm12, %xmm15 #88.5
|
||||||
|
LEAF_EO_const_7:
|
||||||
|
movaps 0xFECA(%rsi,%rax,4), %xmm13 #88.5
|
||||||
|
movaps %xmm4, %xmm5 #88.5
|
||||||
|
movaps %xmm14, %xmm7 #88.5
|
||||||
|
addps %xmm13, %xmm5 #88.5
|
||||||
|
subps %xmm13, %xmm4 #88.5
|
||||||
|
movlhps %xmm8, %xmm2 #88.5
|
||||||
|
movaps %xmm5, %xmm8 #88.5
|
||||||
|
movlhps %xmm15, %xmm7 #88.5
|
||||||
|
xorps %xmm3, %xmm15 #88.5
|
||||||
|
movaps %xmm5, %xmm6 #88.5
|
||||||
|
subps %xmm14, %xmm5 #88.5
|
||||||
|
addps %xmm14, %xmm6 #88.5
|
||||||
|
movlhps %xmm9, %xmm1 #88.5
|
||||||
|
movaps %xmm4, %xmm14 #88.5
|
||||||
|
movlhps %xmm4, %xmm8 #88.5
|
||||||
|
movaps %xmm1, %xmm12 #88.5
|
||||||
|
shufps $177, %xmm15, %xmm15 #88.5
|
||||||
|
movaps 0x30(%r9), %xmm11 #88.5
|
||||||
|
addq $4, %rax #90.5
|
||||||
|
subps %xmm15, %xmm14 #88.5
|
||||||
|
mulps %xmm7, %xmm11 #88.5
|
||||||
|
addps %xmm15, %xmm4 #88.5
|
||||||
|
movaps 0x30(%r9), %xmm9 #88.5
|
||||||
|
movaps 0x40(%r9), %xmm15 #88.5
|
||||||
|
shufps $177, %xmm7, %xmm7 #88.5
|
||||||
|
mulps %xmm8, %xmm9 #88.5
|
||||||
|
mulps %xmm15, %xmm7 #88.5
|
||||||
|
shufps $177, %xmm8, %xmm8 #88.5
|
||||||
|
subps %xmm7, %xmm11 #88.5
|
||||||
|
mulps %xmm15, %xmm8 #88.5
|
||||||
|
movaps %xmm11, %xmm10 #88.5
|
||||||
|
addps %xmm8, %xmm9 #88.5
|
||||||
|
shufps $238, %xmm14, %xmm6 #88.5
|
||||||
|
subps %xmm9, %xmm11 #88.5
|
||||||
|
addps %xmm9, %xmm10 #88.5
|
||||||
|
xorps %xmm3, %xmm11 #88.5
|
||||||
|
movaps %xmm2, %xmm3 #88.5
|
||||||
|
shufps $177, %xmm11, %xmm11 #88.5
|
||||||
|
subps %xmm10, %xmm3 #88.5
|
||||||
|
addps %xmm10, %xmm2 #88.5
|
||||||
|
addps %xmm11, %xmm12 #88.5
|
||||||
|
subps %xmm11, %xmm1 #88.5
|
||||||
|
shufps $238, %xmm4, %xmm5 #88.5
|
||||||
|
movaps %xmm5, 48(%rdx,%r12,4) #88.5
|
||||||
|
movaps %xmm6, 32(%rdx,%r12,4) #88.5
|
||||||
|
movaps %xmm2, (%rdx,%r11,4) #88.5
|
||||||
|
movaps %xmm1, 16(%rdx,%r11,4) #88.5
|
||||||
|
movaps %xmm3, 32(%rdx,%r11,4) #88.5
|
||||||
|
movaps %xmm12, 48(%rdx,%r11,4) #88.5
|
||||||
|
|
||||||
|
|
||||||
|
#ifdef __APPLE__
|
||||||
|
.globl _leaf_oe
|
||||||
|
_leaf_oe:
|
||||||
|
#else
|
||||||
|
.globl leaf_oe
|
||||||
|
leaf_oe:
|
||||||
|
#endif
|
||||||
|
movaps (%r9), %xmm0 #59.5
|
||||||
|
#movaps 0x20(%r9), %xmm1 #59.5
|
||||||
|
LEAF_OE_const_2:
|
||||||
|
movaps 0xFECA(%rsi,%rax,4), %xmm6 #70.5
|
||||||
|
LEAF_OE_const_3:
|
||||||
|
movaps 0xFECA(%rsi,%rax,4), %xmm8 #70.5
|
||||||
|
movaps %xmm6, %xmm10 #70.5
|
||||||
|
shufps $228, %xmm8, %xmm10 #70.5
|
||||||
|
movaps %xmm10, %xmm9 #70.5
|
||||||
|
shufps $228, %xmm6, %xmm8 #70.5
|
||||||
|
LEAF_OE_const_0:
|
||||||
|
movaps 0xFECA(%rsi,%rax,4), %xmm12 #70.5
|
||||||
|
LEAF_OE_const_1:
|
||||||
|
movaps 0xFECA(%rsi,%rax,4), %xmm7 #70.5
|
||||||
|
movaps %xmm12, %xmm14 #70.5
|
||||||
|
movslq (%r8, %rax, 4), %r11 #83.44
|
||||||
|
addps %xmm8, %xmm9 #70.5
|
||||||
|
subps %xmm8, %xmm10 #70.5
|
||||||
|
addps %xmm7, %xmm14 #70.5
|
||||||
|
subps %xmm7, %xmm12 #70.5
|
||||||
|
movaps %xmm9, %xmm4 #70.5
|
||||||
|
movaps %xmm14, %xmm13 #70.5
|
||||||
|
shufps $238, %xmm10, %xmm4 #70.5
|
||||||
|
xorps %xmm0, %xmm10 #70.5
|
||||||
|
shufps $177, %xmm10, %xmm10 #70.5
|
||||||
|
movaps %xmm12, %xmm11 #70.5
|
||||||
|
movaps %xmm14, %xmm5 #70.5
|
||||||
|
addps %xmm9, %xmm13 #70.5
|
||||||
|
subps %xmm10, %xmm11 #70.5
|
||||||
|
subps %xmm9, %xmm14 #70.5
|
||||||
|
shufps $238, %xmm12, %xmm5 #70.5
|
||||||
|
addps %xmm10, %xmm12 #70.5
|
||||||
|
movslq 8(%r8, %rax, 4), %r12 #83.59
|
||||||
|
movlhps %xmm11, %xmm13 #70.5
|
||||||
|
movaps %xmm13, (%rdx,%r11,4) #70.5
|
||||||
|
movaps 0x30(%r9), %xmm13 #70.5
|
||||||
|
movlhps %xmm12, %xmm14 #70.5
|
||||||
|
movaps 0x40(%r9), %xmm12 #70.5
|
||||||
|
mulps %xmm5, %xmm13 #70.5
|
||||||
|
shufps $177, %xmm5, %xmm5 #70.5
|
||||||
|
mulps %xmm12, %xmm5 #70.5
|
||||||
|
movaps %xmm14, 16(%rdx,%r11,4) #70.5
|
||||||
|
subps %xmm5, %xmm13 #70.5
|
||||||
|
movaps 0x30(%r9), %xmm5 #70.5
|
||||||
|
mulps %xmm4, %xmm5 #70.5
|
||||||
|
shufps $177, %xmm4, %xmm4 #70.5
|
||||||
|
mulps %xmm12, %xmm4 #70.5
|
||||||
|
LEAF_OE_const_4:
|
||||||
|
movaps 0xFECA(%rsi,%rax,4), %xmm9 #70.5
|
||||||
|
addps %xmm4, %xmm5 #70.5
|
||||||
|
LEAF_OE_const_6:
|
||||||
|
movaps 0xFECA(%rsi,%rax,4), %xmm7 #70.5
|
||||||
|
movaps %xmm9, %xmm3 #70.5
|
||||||
|
LEAF_OE_const_7:
|
||||||
|
movaps 0xFECA(%rsi,%rax,4), %xmm2 #70.5
|
||||||
|
movaps %xmm7, %xmm6 #70.5
|
||||||
|
LEAF_OE_const_5:
|
||||||
|
movaps 0xFECA(%rsi,%rax,4), %xmm15 #70.5
|
||||||
|
movaps %xmm13, %xmm4 #70.5
|
||||||
|
subps %xmm2, %xmm7 #70.5
|
||||||
|
addps %xmm15, %xmm3 #70.5
|
||||||
|
subps %xmm15, %xmm9 #70.5
|
||||||
|
addps %xmm2, %xmm6 #70.5
|
||||||
|
subps %xmm5, %xmm13 #70.5
|
||||||
|
addps %xmm5, %xmm4 #70.5
|
||||||
|
xorps %xmm0, %xmm7 #70.5
|
||||||
|
addq $4, %rax #72.5
|
||||||
|
movaps %xmm3, %xmm2 #70.5
|
||||||
|
shufps $177, %xmm7, %xmm7 #70.5
|
||||||
|
movaps %xmm9, %xmm8 #70.5
|
||||||
|
xorps %xmm0, %xmm13 #70.5
|
||||||
|
addps %xmm6, %xmm2 #70.5
|
||||||
|
subps %xmm7, %xmm8 #70.5
|
||||||
|
subps %xmm6, %xmm3 #70.5
|
||||||
|
addps %xmm7, %xmm9 #70.5
|
||||||
|
movaps %xmm2, %xmm10 #70.5
|
||||||
|
movaps %xmm3, %xmm11 #70.5
|
||||||
|
shufps $238, %xmm8, %xmm2 #70.5
|
||||||
|
shufps $238, %xmm9, %xmm3 #70.5
|
||||||
|
movaps %xmm2, %xmm14 #70.5
|
||||||
|
shufps $177, %xmm13, %xmm13 #70.5
|
||||||
|
subps %xmm4, %xmm14 #70.5
|
||||||
|
addps %xmm4, %xmm2 #70.5
|
||||||
|
movaps %xmm3, %xmm4 #70.5
|
||||||
|
subps %xmm13, %xmm3 #70.5
|
||||||
|
addps %xmm13, %xmm4 #70.5
|
||||||
|
movlhps %xmm8, %xmm10 #70.5
|
||||||
|
movlhps %xmm9, %xmm11 #70.5
|
||||||
|
movaps %xmm10, 32(%rdx,%r11,4) #70.5
|
||||||
|
movaps %xmm11, 48(%rdx,%r11,4) #70.5
|
||||||
|
movaps %xmm2, (%rdx,%r12,4) #70.5
|
||||||
|
movaps %xmm3, 16(%rdx,%r12,4) #70.5
|
||||||
|
movaps %xmm14, 32(%rdx,%r12,4) #70.5
|
||||||
|
movaps %xmm4, 48(%rdx,%r12,4) #70.5
|
||||||
|
|
||||||
|
|
||||||
|
#ifdef __APPLE__
|
||||||
|
.globl _leaf_end
|
||||||
|
_leaf_end:
|
||||||
|
#else
|
||||||
|
.globl leaf_end
|
||||||
|
leaf_end:
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#ifdef __APPLE__
|
||||||
|
.globl _x_init
|
||||||
|
_x_init:
|
||||||
|
#else
|
||||||
|
.globl x_init
|
||||||
|
x_init:
|
||||||
|
#endif
|
||||||
|
#movaps L_sse_constants(%rip), %xmm3 #34.3
|
||||||
|
movaps (%r9), %xmm3 #34.3
|
||||||
|
movq 0x20(%rdi),%r8
|
||||||
|
#ifdef __APPLE__
|
||||||
|
.globl _x4
|
||||||
|
_x4:
|
||||||
|
#else
|
||||||
|
.globl x4
|
||||||
|
x4:
|
||||||
|
#endif
|
||||||
|
movaps 64(%rdx), %xmm0 #34.3
|
||||||
|
movaps 96(%rdx), %xmm1 #34.3
|
||||||
|
movaps (%rdx), %xmm7 #34.3
|
||||||
|
movaps (%r8), %xmm4 #const
|
||||||
|
movaps %xmm7, %xmm9 #34.3
|
||||||
|
movaps %xmm4, %xmm6 #34.3
|
||||||
|
movaps 16(%r8), %xmm2 #const
|
||||||
|
mulps %xmm0, %xmm6 #34.3
|
||||||
|
mulps %xmm1, %xmm4 #34.3
|
||||||
|
shufps $177, %xmm0, %xmm0 #34.3
|
||||||
|
shufps $177, %xmm1, %xmm1 #34.3
|
||||||
|
mulps %xmm2, %xmm0 #34.3
|
||||||
|
mulps %xmm1, %xmm2 #34.3
|
||||||
|
subps %xmm0, %xmm6 #34.3
|
||||||
|
addps %xmm2, %xmm4 #34.3
|
||||||
|
movaps %xmm6, %xmm5 #34.3
|
||||||
|
subps %xmm4, %xmm6 #34.3
|
||||||
|
addps %xmm4, %xmm5 #34.3
|
||||||
|
movaps 32(%rdx), %xmm8 #34.3
|
||||||
|
xorps %xmm3, %xmm6 #34.3
|
||||||
|
shufps $177, %xmm6, %xmm6 #34.3
|
||||||
|
movaps %xmm8, %xmm10 #34.3
|
||||||
|
movaps 112(%rdx), %xmm12 #34.3
|
||||||
|
subps %xmm5, %xmm9 #34.3
|
||||||
|
addps %xmm5, %xmm7 #34.3
|
||||||
|
addps %xmm6, %xmm10 #34.3
|
||||||
|
subps %xmm6, %xmm8 #34.3
|
||||||
|
movaps %xmm7, (%rdx) #34.3
|
||||||
|
movaps %xmm8, 32(%rdx) #34.3
|
||||||
|
movaps %xmm9, 64(%rdx) #34.3
|
||||||
|
movaps %xmm10, 96(%rdx) #34.3
|
||||||
|
movaps 32(%r8), %xmm14 #const #34.3
|
||||||
|
movaps 80(%rdx), %xmm11 #34.3
|
||||||
|
movaps %xmm14, %xmm0 #34.3
|
||||||
|
movaps 48(%r8), %xmm13 #const #34.3
|
||||||
|
mulps %xmm11, %xmm0 #34.3
|
||||||
|
mulps %xmm12, %xmm14 #34.3
|
||||||
|
shufps $177, %xmm11, %xmm11 #34.3
|
||||||
|
shufps $177, %xmm12, %xmm12 #34.3
|
||||||
|
mulps %xmm13, %xmm11 #34.3
|
||||||
|
mulps %xmm12, %xmm13 #34.3
|
||||||
|
subps %xmm11, %xmm0 #34.3
|
||||||
|
addps %xmm13, %xmm14 #34.3
|
||||||
|
movaps %xmm0, %xmm15 #34.3
|
||||||
|
subps %xmm14, %xmm0 #34.3
|
||||||
|
addps %xmm14, %xmm15 #34.3
|
||||||
|
xorps %xmm3, %xmm0 #34.3
|
||||||
|
movaps 16(%rdx), %xmm1 #34.3
|
||||||
|
movaps 48(%rdx), %xmm2 #34.3
|
||||||
|
movaps %xmm1, %xmm4 #34.3
|
||||||
|
shufps $177, %xmm0, %xmm0 #34.3
|
||||||
|
movaps %xmm2, %xmm5 #34.3
|
||||||
|
addps %xmm15, %xmm1 #34.3
|
||||||
|
subps %xmm0, %xmm2 #34.3
|
||||||
|
subps %xmm15, %xmm4 #34.3
|
||||||
|
addps %xmm0, %xmm5 #34.3
|
||||||
|
movaps %xmm1, 16(%rdx) #34.3
|
||||||
|
movaps %xmm2, 48(%rdx) #34.3
|
||||||
|
movaps %xmm4, 80(%rdx) #34.3
|
||||||
|
movaps %xmm5, 112(%rdx) #34.3
|
||||||
|
ret
|
||||||
|
|
||||||
|
# _x8_soft + 5 needs to be 16 byte aligned
|
||||||
|
#ifdef __APPLE__
|
||||||
|
.globl _x8_soft
|
||||||
|
_x8_soft:
|
||||||
|
#else
|
||||||
|
.globl x8_soft
|
||||||
|
x8_soft:
|
||||||
|
#endif
|
||||||
|
xorl %eax, %eax
|
||||||
|
movq %rdx, %rbx
|
||||||
|
movq %r8, %rsi
|
||||||
|
leaq (%rdx,%rcx,4), %r9
|
||||||
|
leaq (%r9,%rcx,4), %r10
|
||||||
|
leaq (%r10,%rcx,4), %r11
|
||||||
|
leaq (%r11,%rcx,4), %r12
|
||||||
|
leaq (%r12,%rcx,4), %r13
|
||||||
|
leaq (%r13,%rcx,4), %r14
|
||||||
|
leaq (%r14,%rcx,4), %r15
|
||||||
|
X8_soft_loop:
|
||||||
|
movaps (%rsi), %xmm9
|
||||||
|
movaps (%r10,%rax,4), %xmm6
|
||||||
|
movaps %xmm9, %xmm11
|
||||||
|
movaps (%r11,%rax,4), %xmm7
|
||||||
|
movaps 16(%rsi), %xmm8
|
||||||
|
mulps %xmm6, %xmm11
|
||||||
|
mulps %xmm7, %xmm9
|
||||||
|
shufps $177, %xmm6, %xmm6
|
||||||
|
mulps %xmm8, %xmm6
|
||||||
|
shufps $177, %xmm7, %xmm7
|
||||||
|
subps %xmm6, %xmm11
|
||||||
|
mulps %xmm7, %xmm8
|
||||||
|
movaps %xmm11, %xmm10
|
||||||
|
addps %xmm8, %xmm9
|
||||||
|
movaps 32(%rsi), %xmm15
|
||||||
|
addps %xmm9, %xmm10
|
||||||
|
subps %xmm9, %xmm11
|
||||||
|
movaps (%rbx,%rax,4), %xmm5
|
||||||
|
movaps %xmm15, %xmm6
|
||||||
|
movaps (%r12,%rax,4), %xmm12
|
||||||
|
movaps %xmm5, %xmm2
|
||||||
|
movaps (%r14,%rax,4), %xmm13
|
||||||
|
xorps %xmm3, %xmm11 #const
|
||||||
|
movaps 48(%rsi), %xmm14
|
||||||
|
subps %xmm10, %xmm2
|
||||||
|
mulps %xmm12, %xmm6
|
||||||
|
addps %xmm10, %xmm5
|
||||||
|
mulps %xmm13, %xmm15
|
||||||
|
movaps 64(%rsi), %xmm10
|
||||||
|
movaps %xmm5, %xmm0
|
||||||
|
shufps $177, %xmm12, %xmm12
|
||||||
|
shufps $177, %xmm13, %xmm13
|
||||||
|
mulps %xmm14, %xmm12
|
||||||
|
mulps %xmm13, %xmm14
|
||||||
|
subps %xmm12, %xmm6
|
||||||
|
addps %xmm14, %xmm15
|
||||||
|
movaps (%r13,%rax,4), %xmm7
|
||||||
|
movaps %xmm10, %xmm13
|
||||||
|
movaps (%r15,%rax,4), %xmm8
|
||||||
|
movaps %xmm6, %xmm12
|
||||||
|
movaps 80(%rsi), %xmm9
|
||||||
|
addq $96, %rsi
|
||||||
|
mulps %xmm7, %xmm13
|
||||||
|
subps %xmm15, %xmm6
|
||||||
|
addps %xmm15, %xmm12
|
||||||
|
mulps %xmm8, %xmm10
|
||||||
|
subps %xmm12, %xmm0
|
||||||
|
addps %xmm12, %xmm5
|
||||||
|
shufps $177, %xmm7, %xmm7
|
||||||
|
xorps %xmm3, %xmm6 #const
|
||||||
|
shufps $177, %xmm8, %xmm8
|
||||||
|
movaps %xmm2, %xmm12
|
||||||
|
mulps %xmm9, %xmm7
|
||||||
|
mulps %xmm8, %xmm9
|
||||||
|
subps %xmm7, %xmm13
|
||||||
|
addps %xmm9, %xmm10
|
||||||
|
movaps (%r9,%rax,4), %xmm4
|
||||||
|
shufps $177, %xmm11, %xmm11
|
||||||
|
movaps %xmm4, %xmm1
|
||||||
|
shufps $177, %xmm6, %xmm6
|
||||||
|
addps %xmm11, %xmm1
|
||||||
|
subps %xmm11, %xmm4
|
||||||
|
addps %xmm6, %xmm12
|
||||||
|
subps %xmm6, %xmm2
|
||||||
|
movaps %xmm13, %xmm11
|
||||||
|
movaps %xmm4, %xmm14
|
||||||
|
movaps %xmm1, %xmm6
|
||||||
|
subps %xmm10, %xmm13
|
||||||
|
addps %xmm10, %xmm11
|
||||||
|
xorps %xmm3, %xmm13 #const
|
||||||
|
addps %xmm11, %xmm4
|
||||||
|
subps %xmm11, %xmm14
|
||||||
|
shufps $177, %xmm13, %xmm13
|
||||||
|
movaps %xmm5, (%rbx,%rax,4)
|
||||||
|
movaps %xmm4, (%r9,%rax,4)
|
||||||
|
movaps %xmm2, (%r10,%rax,4)
|
||||||
|
subps %xmm13, %xmm1
|
||||||
|
addps %xmm13, %xmm6
|
||||||
|
movaps %xmm1, (%r11,%rax,4)
|
||||||
|
movaps %xmm0, (%r12,%rax,4)
|
||||||
|
movaps %xmm14, (%r13,%rax,4)
|
||||||
|
movaps %xmm12, (%r14,%rax,4)
|
||||||
|
movaps %xmm6, (%r15,%rax,4)
|
||||||
|
addq $4, %rax
|
||||||
|
cmpq %rcx, %rax
|
||||||
|
jne X8_soft_loop
|
||||||
|
ret
|
||||||
|
|
||||||
|
#ifdef __APPLE__
|
||||||
|
.globl _x8_hard
|
||||||
|
_x8_hard:
|
||||||
|
#else
|
||||||
|
.globl x8_hard
|
||||||
|
x8_hard:
|
||||||
|
#endif
|
||||||
|
movaps (%r9), %xmm5
|
||||||
|
X8_loop:
|
||||||
|
movaps (%r8), %xmm9
|
||||||
|
X8_const_2:
|
||||||
|
movaps 0xFECA(%rdx,%rax,4), %xmm6
|
||||||
|
movaps %xmm9, %xmm11
|
||||||
|
X8_const_3:
|
||||||
|
movaps 0xFECA(%rdx,%rax,4), %xmm7
|
||||||
|
movaps 16(%r8), %xmm8
|
||||||
|
mulps %xmm6, %xmm11
|
||||||
|
mulps %xmm7, %xmm9
|
||||||
|
shufps $177, %xmm6, %xmm6
|
||||||
|
mulps %xmm8, %xmm6
|
||||||
|
shufps $177, %xmm7, %xmm7
|
||||||
|
subps %xmm6, %xmm11
|
||||||
|
mulps %xmm7, %xmm8
|
||||||
|
movaps %xmm11, %xmm10
|
||||||
|
addps %xmm8, %xmm9
|
||||||
|
movaps 32(%r8), %xmm15
|
||||||
|
addps %xmm9, %xmm10
|
||||||
|
subps %xmm9, %xmm11
|
||||||
|
X8_const_0:
|
||||||
|
movaps 0xFECA(%rdx,%rax,4), %xmm3
|
||||||
|
movaps %xmm15, %xmm6
|
||||||
|
X8_const_4:
|
||||||
|
movaps 0xFECA(%rdx,%rax,4), %xmm12
|
||||||
|
movaps %xmm3, %xmm2
|
||||||
|
X8_const_6:
|
||||||
|
movaps 0xFECA(%rdx,%rax,4), %xmm13
|
||||||
|
xorps %xmm5, %xmm11
|
||||||
|
movaps 48(%r8), %xmm14
|
||||||
|
subps %xmm10, %xmm2
|
||||||
|
mulps %xmm12, %xmm6
|
||||||
|
addps %xmm10, %xmm3
|
||||||
|
mulps %xmm13, %xmm15
|
||||||
|
movaps 64(%r8), %xmm10
|
||||||
|
movaps %xmm3, %xmm0
|
||||||
|
shufps $177, %xmm12, %xmm12
|
||||||
|
shufps $177, %xmm13, %xmm13
|
||||||
|
mulps %xmm14, %xmm12
|
||||||
|
mulps %xmm13, %xmm14
|
||||||
|
subps %xmm12, %xmm6
|
||||||
|
addps %xmm14, %xmm15
|
||||||
|
X8_const_5:
|
||||||
|
movaps 0xFECA(%rdx,%rax,4), %xmm7
|
||||||
|
movaps %xmm10, %xmm13
|
||||||
|
X8_const_7:
|
||||||
|
movaps 0xFECA(%rdx,%rax,4), %xmm8
|
||||||
|
movaps %xmm6, %xmm12
|
||||||
|
movaps 80(%r8), %xmm9
|
||||||
|
addq $96, %r8
|
||||||
|
mulps %xmm7, %xmm13
|
||||||
|
subps %xmm15, %xmm6
|
||||||
|
addps %xmm15, %xmm12
|
||||||
|
mulps %xmm8, %xmm10
|
||||||
|
subps %xmm12, %xmm0
|
||||||
|
addps %xmm12, %xmm3
|
||||||
|
shufps $177, %xmm7, %xmm7
|
||||||
|
xorps %xmm5, %xmm6
|
||||||
|
shufps $177, %xmm8, %xmm8
|
||||||
|
movaps %xmm2, %xmm12
|
||||||
|
mulps %xmm9, %xmm7
|
||||||
|
mulps %xmm8, %xmm9
|
||||||
|
subps %xmm7, %xmm13
|
||||||
|
addps %xmm9, %xmm10
|
||||||
|
X8_const_1:
|
||||||
|
movaps 0xFECA(%rdx,%rax,4), %xmm4
|
||||||
|
shufps $177, %xmm11, %xmm11
|
||||||
|
movaps %xmm4, %xmm1
|
||||||
|
shufps $177, %xmm6, %xmm6
|
||||||
|
addps %xmm11, %xmm1
|
||||||
|
subps %xmm11, %xmm4
|
||||||
|
addps %xmm6, %xmm12
|
||||||
|
subps %xmm6, %xmm2
|
||||||
|
movaps %xmm13, %xmm11
|
||||||
|
movaps %xmm4, %xmm14
|
||||||
|
movaps %xmm1, %xmm6
|
||||||
|
subps %xmm10, %xmm13
|
||||||
|
addps %xmm10, %xmm11
|
||||||
|
xorps %xmm5, %xmm13
|
||||||
|
addps %xmm11, %xmm4
|
||||||
|
subps %xmm11, %xmm14
|
||||||
|
shufps $177, %xmm13, %xmm13
|
||||||
|
X8_const1_0:
|
||||||
|
movaps %xmm3, 0xFECA(%rdx,%rax,4)
|
||||||
|
X8_const1_1:
|
||||||
|
movaps %xmm4, 0xFECA(%rdx,%rax,4)
|
||||||
|
X8_const1_2:
|
||||||
|
movaps %xmm2, 0xFECA(%rdx,%rax,4)
|
||||||
|
subps %xmm13, %xmm1
|
||||||
|
addps %xmm13, %xmm6
|
||||||
|
X8_const1_3:
|
||||||
|
movaps %xmm1, 0xFECA(%rdx,%rax,4)
|
||||||
|
X8_const1_4:
|
||||||
|
movaps %xmm0, 0xFECA(%rdx,%rax,4)
|
||||||
|
X8_const1_5:
|
||||||
|
movaps %xmm14, 0xFECA(%rdx,%rax,4)
|
||||||
|
X8_const1_6:
|
||||||
|
movaps %xmm12, 0xFECA(%rdx,%rax,4)
|
||||||
|
X8_const1_7:
|
||||||
|
movaps %xmm6, 0xFECA(%rdx,%rax,4)
|
||||||
|
addq $4, %rax
|
||||||
|
cmpq %rcx, %rax
|
||||||
|
jne X8_loop
|
||||||
|
|
||||||
|
#ifdef __APPLE__
|
||||||
|
.globl _sse_leaf_ee_offsets
|
||||||
|
.globl _sse_leaf_oo_offsets
|
||||||
|
.globl _sse_leaf_eo_offsets
|
||||||
|
.globl _sse_leaf_oe_offsets
|
||||||
|
.align 4
|
||||||
|
_sse_leaf_ee_offsets:
|
||||||
|
.long LEAF_EE_const_0-_leaf_ee+0x4
|
||||||
|
.long LEAF_EE_const_1-_leaf_ee+0x5
|
||||||
|
.long LEAF_EE_const_2-_leaf_ee+0x5
|
||||||
|
.long LEAF_EE_const_3-_leaf_ee+0x5
|
||||||
|
.long LEAF_EE_const_4-_leaf_ee+0x5
|
||||||
|
.long LEAF_EE_const_5-_leaf_ee+0x5
|
||||||
|
.long LEAF_EE_const_6-_leaf_ee+0x4
|
||||||
|
.long LEAF_EE_const_7-_leaf_ee+0x5
|
||||||
|
_sse_leaf_oo_offsets:
|
||||||
|
.long LEAF_OO_const_0-_leaf_oo+0x4
|
||||||
|
.long LEAF_OO_const_1-_leaf_oo+0x4
|
||||||
|
.long LEAF_OO_const_2-_leaf_oo+0x5
|
||||||
|
.long LEAF_OO_const_3-_leaf_oo+0x5
|
||||||
|
.long LEAF_OO_const_4-_leaf_oo+0x4
|
||||||
|
.long LEAF_OO_const_5-_leaf_oo+0x5
|
||||||
|
.long LEAF_OO_const_6-_leaf_oo+0x5
|
||||||
|
.long LEAF_OO_const_7-_leaf_oo+0x5
|
||||||
|
_sse_leaf_eo_offsets:
|
||||||
|
.long LEAF_EO_const_0-_leaf_eo+0x5
|
||||||
|
.long LEAF_EO_const_1-_leaf_eo+0x4
|
||||||
|
.long LEAF_EO_const_2-_leaf_eo+0x4
|
||||||
|
.long LEAF_EO_const_3-_leaf_eo+0x4
|
||||||
|
.long LEAF_EO_const_4-_leaf_eo+0x5
|
||||||
|
.long LEAF_EO_const_5-_leaf_eo+0x5
|
||||||
|
.long LEAF_EO_const_6-_leaf_eo+0x4
|
||||||
|
.long LEAF_EO_const_7-_leaf_eo+0x5
|
||||||
|
_sse_leaf_oe_offsets:
|
||||||
|
.long LEAF_OE_const_0-_leaf_oe+0x5
|
||||||
|
.long LEAF_OE_const_1-_leaf_oe+0x4
|
||||||
|
.long LEAF_OE_const_2-_leaf_oe+0x4
|
||||||
|
.long LEAF_OE_const_3-_leaf_oe+0x5
|
||||||
|
.long LEAF_OE_const_4-_leaf_oe+0x5
|
||||||
|
.long LEAF_OE_const_5-_leaf_oe+0x5
|
||||||
|
.long LEAF_OE_const_6-_leaf_oe+0x4
|
||||||
|
.long LEAF_OE_const_7-_leaf_oe+0x4
|
||||||
|
#else
|
||||||
|
.globl sse_leaf_ee_offsets
|
||||||
|
.globl sse_leaf_oo_offsets
|
||||||
|
.globl sse_leaf_eo_offsets
|
||||||
|
.globl sse_leaf_oe_offsets
|
||||||
|
.align 4
|
||||||
|
sse_leaf_ee_offsets:
|
||||||
|
.long LEAF_EE_const_0-leaf_ee+0x4
|
||||||
|
.long LEAF_EE_const_1-leaf_ee+0x5
|
||||||
|
.long LEAF_EE_const_2-leaf_ee+0x5
|
||||||
|
.long LEAF_EE_const_3-leaf_ee+0x5
|
||||||
|
.long LEAF_EE_const_4-leaf_ee+0x5
|
||||||
|
.long LEAF_EE_const_5-leaf_ee+0x5
|
||||||
|
.long LEAF_EE_const_6-leaf_ee+0x4
|
||||||
|
.long LEAF_EE_const_7-leaf_ee+0x5
|
||||||
|
sse_leaf_oo_offsets:
|
||||||
|
.long LEAF_OO_const_0-leaf_oo+0x4
|
||||||
|
.long LEAF_OO_const_1-leaf_oo+0x4
|
||||||
|
.long LEAF_OO_const_2-leaf_oo+0x5
|
||||||
|
.long LEAF_OO_const_3-leaf_oo+0x5
|
||||||
|
.long LEAF_OO_const_4-leaf_oo+0x4
|
||||||
|
.long LEAF_OO_const_5-leaf_oo+0x5
|
||||||
|
.long LEAF_OO_const_6-leaf_oo+0x5
|
||||||
|
.long LEAF_OO_const_7-leaf_oo+0x5
|
||||||
|
sse_leaf_eo_offsets:
|
||||||
|
.long LEAF_EO_const_0-leaf_eo+0x5
|
||||||
|
.long LEAF_EO_const_1-leaf_eo+0x4
|
||||||
|
.long LEAF_EO_const_2-leaf_eo+0x4
|
||||||
|
.long LEAF_EO_const_3-leaf_eo+0x4
|
||||||
|
.long LEAF_EO_const_4-leaf_eo+0x5
|
||||||
|
.long LEAF_EO_const_5-leaf_eo+0x5
|
||||||
|
.long LEAF_EO_const_6-leaf_eo+0x4
|
||||||
|
.long LEAF_EO_const_7-leaf_eo+0x5
|
||||||
|
sse_leaf_oe_offsets:
|
||||||
|
.long LEAF_OE_const_0-leaf_oe+0x5
|
||||||
|
.long LEAF_OE_const_1-leaf_oe+0x4
|
||||||
|
.long LEAF_OE_const_2-leaf_oe+0x4
|
||||||
|
.long LEAF_OE_const_3-leaf_oe+0x5
|
||||||
|
.long LEAF_OE_const_4-leaf_oe+0x5
|
||||||
|
.long LEAF_OE_const_5-leaf_oe+0x5
|
||||||
|
.long LEAF_OE_const_6-leaf_oe+0x4
|
||||||
|
.long LEAF_OE_const_7-leaf_oe+0x4
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#ifdef __APPLE__
|
||||||
|
.data
|
||||||
|
#else
|
||||||
|
.section .data
|
||||||
|
#endif
|
||||||
|
.p2align 4
|
||||||
|
#ifdef __APPLE__
|
||||||
|
.globl _sse_constants
|
||||||
|
_sse_constants:
|
||||||
|
#else
|
||||||
|
.globl sse_constants
|
||||||
|
sse_constants:
|
||||||
|
#endif
|
||||||
|
.long 0x00000000,0x80000000,0x00000000,0x80000000
|
||||||
|
.long 0x3f3504f3,0x3f3504f3,0x3f3504f3,0x3f3504f3
|
||||||
|
.long 0xbf3504f3,0x3f3504f3,0xbf3504f3,0x3f3504f3
|
||||||
|
.long 0x3f800000,0x3f800000,0x3f3504f3,0x3f3504f3
|
||||||
|
.long 0x00000000,0x00000000,0xbf3504f3,0x3f3504f3
|
||||||
|
#ifdef __APPLE__
|
||||||
|
.globl _sse_constants_inv
|
||||||
|
_sse_constants_inv:
|
||||||
|
#else
|
||||||
|
.globl sse_constants_inv
|
||||||
|
sse_constants_inv:
|
||||||
|
#endif
|
||||||
|
.long 0x80000000,0x00000000,0x80000000,0x00000000
|
||||||
|
.long 0x3f3504f3,0x3f3504f3,0x3f3504f3,0x3f3504f3
|
||||||
|
.long 0x3f3504f3,0xbf3504f3,0x3f3504f3,0xbf3504f3
|
||||||
|
.long 0x3f800000,0x3f800000,0x3f3504f3,0x3f3504f3
|
||||||
|
.long 0x00000000,0x00000000,0x3f3504f3,0xbf3504f3
|
@ -0,0 +1,49 @@
|
|||||||
|
/*
|
||||||
|
|
||||||
|
This file is part of FFTS -- The Fastest Fourier Transform in the South
|
||||||
|
|
||||||
|
Copyright (c) 2012, Anthony M. Blake <amb@anthonix.com>
|
||||||
|
Copyright (c) 2012, The University of Waikato
|
||||||
|
|
||||||
|
All rights reserved.
|
||||||
|
|
||||||
|
Redistribution and use in source and binary forms, with or without
|
||||||
|
modification, are permitted provided that the following conditions are met:
|
||||||
|
* Redistributions of source code must retain the above copyright
|
||||||
|
notice, this list of conditions and the following disclaimer.
|
||||||
|
* Redistributions in binary form must reproduce the above copyright
|
||||||
|
notice, this list of conditions and the following disclaimer in the
|
||||||
|
documentation and/or other materials provided with the distribution.
|
||||||
|
* Neither the name of the organization nor the
|
||||||
|
names of its contributors may be used to endorse or promote products
|
||||||
|
derived from this software without specific prior written permission.
|
||||||
|
|
||||||
|
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
||||||
|
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||||
|
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||||
|
DISCLAIMED. IN NO EVENT SHALL ANTHONY M. BLAKE BE LIABLE FOR ANY
|
||||||
|
DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
|
||||||
|
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
||||||
|
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
|
||||||
|
ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||||
|
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||||
|
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
|
||||||
|
*/
|
||||||
|
|
||||||
|
|
||||||
|
#ifndef __TYPES_H__
|
||||||
|
#define __TYPES_H__
|
||||||
|
|
||||||
|
#define __INLINE static inline __attribute__((always_inline))
|
||||||
|
|
||||||
|
#if defined(complex)
|
||||||
|
typedef complex float cdata_t;
|
||||||
|
#else
|
||||||
|
typedef float cdata_t[2];
|
||||||
|
#endif
|
||||||
|
typedef float data_t;
|
||||||
|
|
||||||
|
#endif
|
||||||
|
|
||||||
|
|
@ -0,0 +1,45 @@
|
|||||||
|
/*
|
||||||
|
|
||||||
|
This file is part of FFTS -- The Fastest Fourier Transform in the South
|
||||||
|
|
||||||
|
Copyright (c) 2012, 2013 Anthony M. Blake <amb@anthonix.com>
|
||||||
|
Copyright (c) 2012, 2013 The University of Waikato
|
||||||
|
|
||||||
|
All rights reserved.
|
||||||
|
|
||||||
|
Redistribution and use in source and binary forms, with or without
|
||||||
|
modification, are permitted provided that the following conditions are met:
|
||||||
|
* Redistributions of source code must retain the above copyright
|
||||||
|
notice, this list of conditions and the following disclaimer.
|
||||||
|
* Redistributions in binary form must reproduce the above copyright
|
||||||
|
notice, this list of conditions and the following disclaimer in the
|
||||||
|
documentation and/or other materials provided with the distribution.
|
||||||
|
* Neither the name of the organization nor the
|
||||||
|
names of its contributors may be used to endorse or promote products
|
||||||
|
derived from this software without specific prior written permission.
|
||||||
|
|
||||||
|
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
||||||
|
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||||
|
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||||
|
DISCLAIMED. IN NO EVENT SHALL ANTHONY M. BLAKE BE LIABLE FOR ANY
|
||||||
|
DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
|
||||||
|
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
||||||
|
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
|
||||||
|
ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||||
|
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||||
|
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
|
||||||
|
*/
|
||||||
|
|
||||||
|
#ifndef __VFP_H__
|
||||||
|
#define __VFP_H__
|
||||||
|
|
||||||
|
#include "ffts.h"
|
||||||
|
|
||||||
|
void vfp_e();
|
||||||
|
void vfp_o();
|
||||||
|
void vfp_x4();
|
||||||
|
void vfp_x8();
|
||||||
|
void vfp_end();
|
||||||
|
|
||||||
|
#endif
|
@ -0,0 +1,473 @@
|
|||||||
|
/*
|
||||||
|
|
||||||
|
This file is part of FFTS -- The Fastest Fourier Transform in the South
|
||||||
|
|
||||||
|
Copyright (c) 2012, 2013 Anthony M. Blake <amb@anthonix.com>
|
||||||
|
Copyright (c) 2012, 2013 The University of Waikato
|
||||||
|
|
||||||
|
All rights reserved.
|
||||||
|
|
||||||
|
Redistribution and use in source and binary forms, with or without
|
||||||
|
modification, are permitted provided that the following conditions are met:
|
||||||
|
* Redistributions of source code must retain the above copyright
|
||||||
|
notice, this list of conditions and the following disclaimer.
|
||||||
|
* Redistributions in binary form must reproduce the above copyright
|
||||||
|
notice, this list of conditions and the following disclaimer in the
|
||||||
|
documentation and/or other materials provided with the distribution.
|
||||||
|
* Neither the name of the organization nor the
|
||||||
|
names of its contributors may be used to endorse or promote products
|
||||||
|
derived from this software without specific prior written permission.
|
||||||
|
|
||||||
|
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
||||||
|
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||||
|
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||||
|
DISCLAIMED. IN NO EVENT SHALL ANTHONY M. BLAKE BE LIABLE FOR ANY
|
||||||
|
DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
|
||||||
|
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
||||||
|
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
|
||||||
|
ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||||
|
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||||
|
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
|
||||||
|
*/
|
||||||
|
|
||||||
|
|
||||||
|
@ assumes r0 = out
|
||||||
|
@ r1 = in ?
|
||||||
|
@
|
||||||
|
@ r12 = offsets
|
||||||
|
@ r3-r10 = data pointers
|
||||||
|
@ r11 = loop iterations
|
||||||
|
@ r2 = const pointer
|
||||||
|
@ & lr = temps
|
||||||
|
|
||||||
|
.align 4
|
||||||
|
#ifdef __APPLE__
|
||||||
|
.globl _vfp_e
|
||||||
|
_vfp_e:
|
||||||
|
#else
|
||||||
|
.globl vfp_e
|
||||||
|
vfp_e:
|
||||||
|
#endif
|
||||||
|
_vfp_e_loop:
|
||||||
|
vldr s15, [r2, #8]
|
||||||
|
vldr s2, [r3] @ x0
|
||||||
|
vldr s0, [r3, #4]
|
||||||
|
vldr s4, [r4] @ x1
|
||||||
|
vldr s11, [r2]
|
||||||
|
vldr s10, [r7] @ x4
|
||||||
|
vldr s3, [r7, #4]
|
||||||
|
vldr s8, [r8] @ x5
|
||||||
|
vldr s1, [r8, #4]
|
||||||
|
vldr s14, [r9] @ x6
|
||||||
|
vldr s9, [r9, #4]
|
||||||
|
vldr s6, [r10] @ x7
|
||||||
|
vldr s12, [r10, #4]
|
||||||
|
vsub.f32 s18, s3, s1
|
||||||
|
vsub.f32 s7, s10, s8
|
||||||
|
vsub.f32 s5, s14, s6
|
||||||
|
vadd.f32 s6, s14, s6
|
||||||
|
vldr s24, [r5, #4]
|
||||||
|
vsub.f32 s14, s9, s12
|
||||||
|
vldr s22, [r6, #4]
|
||||||
|
vadd.f32 s8, s10, s8
|
||||||
|
vldr s28, [r6] @ x3
|
||||||
|
vldr s17, [r5] @ x2
|
||||||
|
vadd.f32 s10, s9, s12
|
||||||
|
vmul.f32 s13, s18, s15
|
||||||
|
vmul.f32 s9, s7, s11
|
||||||
|
vmul.f32 s16, s5, s11
|
||||||
|
vmul.f32 s18, s18, s11
|
||||||
|
vmul.f32 s30, s14, s11
|
||||||
|
vldr s11, [r4, #4]
|
||||||
|
add r3, r3, #8
|
||||||
|
add r4, r4, #8
|
||||||
|
add r5, r5, #8
|
||||||
|
add r6, r6, #8
|
||||||
|
add r7, r7, #8
|
||||||
|
add r8, r8, #8
|
||||||
|
add r9, r9, #8
|
||||||
|
add r10, r10, #8
|
||||||
|
vmul.f32 s12, s5, s15
|
||||||
|
vmul.f32 s20, s14, s15
|
||||||
|
vadd.f32 s5, s2, s4
|
||||||
|
vadd.f32 s3, s3, s1
|
||||||
|
vmul.f32 s15, s7, s15
|
||||||
|
vadd.f32 s1, s24, s22
|
||||||
|
vsub.f32 s7, s24, s22
|
||||||
|
vadd.f32 s24, s17, s28
|
||||||
|
vadd.f32 s26, s0, s11
|
||||||
|
vsub.f32 s14, s9, s13
|
||||||
|
vsub.f32 s2, s2, s4
|
||||||
|
vadd.f32 s4, s16, s20
|
||||||
|
vsub.f32 s22, s0, s11
|
||||||
|
vsub.f32 s16, s17, s28
|
||||||
|
vadd.f32 s9, s5, s24
|
||||||
|
vadd.f32 s28, s18, s15
|
||||||
|
vadd.f32 s13, s8, s6
|
||||||
|
vsub.f32 s5, s5, s24
|
||||||
|
vsub.f32 s24, s8, s6
|
||||||
|
vadd.f32 s11, s26, s1
|
||||||
|
vsub.f32 s12, s30, s12
|
||||||
|
vadd.f32 s20, s3, s10
|
||||||
|
vsub.f32 s15, s3, s10
|
||||||
|
vsub.f32 s3, s26, s1
|
||||||
|
vadd.f32 s18, s9, s13
|
||||||
|
vadd.f32 s10, s14, s4
|
||||||
|
vadd.f32 s6, s2, s7 @
|
||||||
|
vsub.f32 s0, s2, s7 @
|
||||||
|
vadd.f32 s26, s11, s20
|
||||||
|
vsub.f32 s4, s14, s4
|
||||||
|
vsub.f32 s8, s22, s16 @
|
||||||
|
vadd.f32 s1, s28, s12
|
||||||
|
ldr lr, [r12], #4
|
||||||
|
add lr, r0, lr, lsl #2
|
||||||
|
subs r11, r11, #1
|
||||||
|
vstr s18, [lr]
|
||||||
|
vsub.f32 s2, s28, s12
|
||||||
|
vadd.f32 s12, s22, s16 @
|
||||||
|
vsub.f32 s16, s3, s24 @
|
||||||
|
vsub.f32 s13, s9, s13
|
||||||
|
vstr s26, [lr, #4]
|
||||||
|
vadd.f32 s28, s5, s15 @
|
||||||
|
vsub.f32 s7, s5, s15 @
|
||||||
|
vadd.f32 s14, s6, s10
|
||||||
|
vadd.f32 s5, s8, s1
|
||||||
|
vadd.f32 s9, s0, s2 @
|
||||||
|
vsub.f32 s2, s0, s2 @
|
||||||
|
vsub.f32 s11, s11, s20
|
||||||
|
vstr s28, [lr, #16]
|
||||||
|
vadd.f32 s3, s3, s24 @
|
||||||
|
vstr s16, [lr, #20]
|
||||||
|
vsub.f32 s6, s6, s10
|
||||||
|
vstr s13, [lr, #32]
|
||||||
|
vsub.f32 s13, s12, s4 @
|
||||||
|
vsub.f32 s8, s8, s1
|
||||||
|
vadd.f32 s0, s12, s4 @
|
||||||
|
vstr s11, [lr, #36]
|
||||||
|
vstr s7, [lr, #48]
|
||||||
|
vstr s3, [lr, #52]
|
||||||
|
vstr s14, [lr, #8]
|
||||||
|
vstr s5, [lr, #12]
|
||||||
|
vstr s9, [lr, #24]
|
||||||
|
vstr s13, [lr, #28]
|
||||||
|
vstr s6, [lr, #40]
|
||||||
|
vstr s8, [lr, #44]
|
||||||
|
vstr s2, [lr, #56]
|
||||||
|
vstr s0, [lr, #60]
|
||||||
|
bne _vfp_e_loop
|
||||||
|
|
||||||
|
@ assumes r0 = out
|
||||||
|
@ r1 = in ?
|
||||||
|
@
|
||||||
|
@ r12 = offsets
|
||||||
|
@ r3-r10 = data pointers
|
||||||
|
@ r11 = loop iterations
|
||||||
|
@ r2 & lr = temps
|
||||||
|
.align 4
|
||||||
|
#ifdef __APPLE__
|
||||||
|
.globl _vfp_o
|
||||||
|
_vfp_o:
|
||||||
|
#else
|
||||||
|
.globl vfp_o
|
||||||
|
vfp_o:
|
||||||
|
#endif
|
||||||
|
_vfp_o_loop:
|
||||||
|
vldr s4, [r3] @ x0
|
||||||
|
vldr s0, [r3, #4]
|
||||||
|
vldr s6, [r4] @ x1
|
||||||
|
vldr s5, [r4, #4]
|
||||||
|
vldr s7, [r5] @ x2
|
||||||
|
vldr s1, [r5, #4]
|
||||||
|
vldr s3, [r6] @ x3
|
||||||
|
vldr s8, [r6, #4]
|
||||||
|
subs r11, r11, #1
|
||||||
|
ldr r2, [r12], #4
|
||||||
|
add r2, r0, r2, lsl #2
|
||||||
|
vadd.f32 s2, s4, s6
|
||||||
|
vadd.f32 s14, s0, s5
|
||||||
|
vadd.f32 s10, s1, s8
|
||||||
|
vsub.f32 s4, s4, s6
|
||||||
|
vsub.f32 s0, s0, s5
|
||||||
|
vadd.f32 s12, s7, s3
|
||||||
|
vsub.f32 s6, s7, s3
|
||||||
|
vsub.f32 s8, s1, s8
|
||||||
|
vadd.f32 s5, s14, s10
|
||||||
|
vsub.f32 s10, s14, s10
|
||||||
|
vadd.f32 s7, s2, s12
|
||||||
|
vsub.f32 s1, s0, s6 @
|
||||||
|
vsub.f32 s12, s2, s12
|
||||||
|
vadd.f32 s3, s4, s8 @
|
||||||
|
vsub.f32 s2, s4, s8 @
|
||||||
|
vadd.f32 s0, s0, s6 @
|
||||||
|
vstr s7, [r2]
|
||||||
|
vldr s7, [r9] @ x2
|
||||||
|
vstr s5, [r2, #4]
|
||||||
|
vstr s3, [r2, #8]
|
||||||
|
vstr s1, [r2, #12]
|
||||||
|
vstr s12, [r2, #16]
|
||||||
|
vstr s10, [r2, #20]
|
||||||
|
vstr s2, [r2, #24]
|
||||||
|
vstr s0, [r2, #28]
|
||||||
|
vldr s4, [r7] @ x0
|
||||||
|
vldr s0, [r7, #4]
|
||||||
|
vldr s6, [r8] @ x1
|
||||||
|
vldr s5, [r8, #4]
|
||||||
|
vldr s3, [r10] @ x3
|
||||||
|
vldr s8, [r10, #4]
|
||||||
|
vldr s1, [r9, #4]
|
||||||
|
add r3, r3, #8
|
||||||
|
add r4, r4, #8
|
||||||
|
add r5, r5, #8
|
||||||
|
add r6, r6, #8
|
||||||
|
add r7, r7, #8
|
||||||
|
add r8, r8, #8
|
||||||
|
add r9, r9, #8
|
||||||
|
add r10, r10, #8
|
||||||
|
vadd.f32 s2, s4, s6
|
||||||
|
vadd.f32 s14, s0, s5
|
||||||
|
vadd.f32 s10, s1, s8
|
||||||
|
vsub.f32 s4, s4, s6
|
||||||
|
vsub.f32 s0, s0, s5
|
||||||
|
vadd.f32 s12, s7, s3
|
||||||
|
vsub.f32 s6, s7, s3
|
||||||
|
vsub.f32 s8, s1, s8
|
||||||
|
vadd.f32 s5, s14, s10
|
||||||
|
vsub.f32 s10, s14, s10
|
||||||
|
vadd.f32 s7, s2, s12
|
||||||
|
vsub.f32 s1, s0, s6 @
|
||||||
|
vsub.f32 s12, s2, s12
|
||||||
|
vadd.f32 s3, s4, s8 @
|
||||||
|
vsub.f32 s2, s4, s8 @
|
||||||
|
vadd.f32 s0, s0, s6 @
|
||||||
|
vstr s7, [r2, #32]
|
||||||
|
vstr s5, [r2, #36]
|
||||||
|
vstr s3, [r2, #40]
|
||||||
|
vstr s1, [r2, #44]
|
||||||
|
vstr s12, [r2, #48]
|
||||||
|
vstr s10, [r2, #52]
|
||||||
|
vstr s2, [r2, #56]
|
||||||
|
vstr s0, [r2, #60]
|
||||||
|
bne _vfp_o_loop
|
||||||
|
|
||||||
|
.align 4
|
||||||
|
#ifdef __APPLE__
|
||||||
|
.globl _vfp_x4
|
||||||
|
_vfp_x4:
|
||||||
|
#else
|
||||||
|
.globl vfp_x4
|
||||||
|
vfp_x4:
|
||||||
|
#endif
|
||||||
|
add r3, r0, #0
|
||||||
|
add r7, r2, #0
|
||||||
|
add r4, r0, r1, lsl #1
|
||||||
|
add r5, r0, r1, lsl #2
|
||||||
|
add r6, r4, r1, lsl #2
|
||||||
|
mov r11, #4
|
||||||
|
_vfp_x4_loop:
|
||||||
|
|
||||||
|
vldr s8, [r3, #0]
|
||||||
|
vldr s9, [r3, #4]
|
||||||
|
vldr s10, [r4, #0]
|
||||||
|
vldr s11, [r4, #4]
|
||||||
|
vldr s12, [r5, #0]
|
||||||
|
vldr s13, [r5, #4]
|
||||||
|
vldr s14, [r6, #0]
|
||||||
|
vldr s15, [r6, #4]
|
||||||
|
vldr s2, [r7, #0]
|
||||||
|
vldr s3, [r7, #4]
|
||||||
|
add r7, r7, #8
|
||||||
|
subs r11, r11, #1
|
||||||
|
vmul.f32 s0, s13, s3
|
||||||
|
vmul.f32 s5, s12, s2
|
||||||
|
vmul.f32 s1, s14, s2
|
||||||
|
vmul.f32 s4, s14, s3
|
||||||
|
vmul.f32 s14, s12, s3
|
||||||
|
vmul.f32 s13, s13, s2
|
||||||
|
vmul.f32 s12, s15, s3
|
||||||
|
vmul.f32 s2, s15, s2
|
||||||
|
vsub.f32 s0, s5, s0
|
||||||
|
vadd.f32 s13, s13, s14
|
||||||
|
vadd.f32 s12, s12, s1
|
||||||
|
vsub.f32 s1, s2, s4
|
||||||
|
vadd.f32 s15, s0, s12
|
||||||
|
vsub.f32 s12, s0, s12
|
||||||
|
vadd.f32 s14, s13, s1
|
||||||
|
vsub.f32 s13, s13, s1
|
||||||
|
vadd.f32 s0, s8, s15
|
||||||
|
vadd.f32 s1, s9, s14
|
||||||
|
vadd.f32 s2, s10, s13 @
|
||||||
|
vsub.f32 s4, s8, s15
|
||||||
|
vsub.f32 s3, s11, s12 @
|
||||||
|
vstr s0, [r3, #0]
|
||||||
|
vstr s1, [r3, #4]
|
||||||
|
add r3, r3, #8
|
||||||
|
vsub.f32 s5, s9, s14
|
||||||
|
vsub.f32 s6, s10, s13 @
|
||||||
|
vadd.f32 s7, s11, s12 @
|
||||||
|
vstr s2, [r4, #0]
|
||||||
|
vstr s3, [r4, #4]
|
||||||
|
add r4, r4, #8
|
||||||
|
vstr s4, [r5, #0]
|
||||||
|
vstr s5, [r5, #4]
|
||||||
|
add r5, r5, #8
|
||||||
|
vstr s6, [r6, #0]
|
||||||
|
vstr s7, [r6, #4]
|
||||||
|
add r6, r6, #8
|
||||||
|
bne _vfp_x4_loop
|
||||||
|
bx lr
|
||||||
|
|
||||||
|
.align 4
|
||||||
|
#ifdef __APPLE__
|
||||||
|
.globl _vfp_x8
|
||||||
|
_vfp_x8:
|
||||||
|
#else
|
||||||
|
.globl vfp_x8
|
||||||
|
vfp_x8:
|
||||||
|
#endif
|
||||||
|
mov r11, #0
|
||||||
|
add r3, r0, #0 @ data0
|
||||||
|
add r5, r0, r1, lsl #1 @ data2
|
||||||
|
add r4, r0, r1 @ data1
|
||||||
|
add r7, r5, r1, lsl #1 @ data4
|
||||||
|
add r6, r5, r1 @ data3
|
||||||
|
add r9, r7, r1, lsl #1 @ data6
|
||||||
|
add r8, r7, r1 @ data5
|
||||||
|
add r10, r9, r1 @ data7
|
||||||
|
add r12, r2, #0 @ LUT
|
||||||
|
|
||||||
|
sub r11, r11, r1, lsr #3
|
||||||
|
_vfp_x8_loop:
|
||||||
|
vldr s10, [r3, #0] @ x0-re
|
||||||
|
vldr s8, [r3, #4] @ x0-im
|
||||||
|
vldr s2, [r4, #0] @ x1-re
|
||||||
|
vldr s0, [r4, #4] @ x1-im
|
||||||
|
vldr s6, [r5, #0] @ x2-re
|
||||||
|
vldr s4, [r5, #4] @ x2-im
|
||||||
|
vldr s13, [r6, #0] @ x3-re
|
||||||
|
vldr s15, [r6, #4] @ x3-im
|
||||||
|
vldr s7, [r12]
|
||||||
|
vldr s11, [r12, #4]
|
||||||
|
vldr s5, [r7, #0] @ x4-re
|
||||||
|
vldr s1, [r7, #4] @ x4-im
|
||||||
|
vldr s28, [r9, #0] @ x6-re
|
||||||
|
vldr s18, [r9, #4] @ x6-im
|
||||||
|
adds r11, r11, #1
|
||||||
|
vmul.f32 s14, s15, s7
|
||||||
|
vldr s24, [r12, #12]
|
||||||
|
vmul.f32 s12, s13, s11
|
||||||
|
vmul.f32 s26, s13, s7
|
||||||
|
vldr s13, [r12, #8]
|
||||||
|
vmul.f32 s3, s4, s11
|
||||||
|
vmul.f32 s15, s15, s11
|
||||||
|
vmul.f32 s16, s4, s7
|
||||||
|
vmul.f32 s9, s6, s7
|
||||||
|
vmul.f32 s11, s6, s11
|
||||||
|
vmul.f32 s7, s18, s24
|
||||||
|
vmul.f32 s20, s1, s24
|
||||||
|
vmul.f32 s30, s5, s13
|
||||||
|
vadd.f32 s4, s26, s15
|
||||||
|
vsub.f32 s12, s14, s12
|
||||||
|
vsub.f32 s6, s9, s3
|
||||||
|
vadd.f32 s14, s16, s11
|
||||||
|
vmul.f32 s22, s28, s13
|
||||||
|
vmul.f32 s26, s28, s24
|
||||||
|
vmul.f32 s18, s18, s13
|
||||||
|
vmul.f32 s5, s5, s24
|
||||||
|
vmul.f32 s1, s1, s13
|
||||||
|
vsub.f32 s9, s30, s20
|
||||||
|
vadd.f32 s16, s14, s12
|
||||||
|
vadd.f32 s3, s22, s7
|
||||||
|
vadd.f32 s15, s6, s4
|
||||||
|
vsub.f32 s11, s18, s26
|
||||||
|
vadd.f32 s18, s1, s5
|
||||||
|
vadd.f32 s13, s8, s16
|
||||||
|
vadd.f32 s1, s9, s3
|
||||||
|
vadd.f32 s7, s10, s15
|
||||||
|
vsub.f32 s15, s10, s15
|
||||||
|
vsub.f32 s10, s9, s3
|
||||||
|
vadd.f32 s5, s18, s11
|
||||||
|
vsub.f32 s11, s18, s11
|
||||||
|
vsub.f32 s8, s8, s16
|
||||||
|
vadd.f32 s20, s7, s1
|
||||||
|
vsub.f32 s7, s7, s1
|
||||||
|
vadd.f32 s18, s13, s5
|
||||||
|
vadd.f32 s16, s15, s11 @
|
||||||
|
vsub.f32 s9, s8, s10 @
|
||||||
|
vsub.f32 s3, s13, s5
|
||||||
|
vsub.f32 s1, s15, s11 @
|
||||||
|
vstr s20, [r3]
|
||||||
|
vadd.f32 s8, s8, s10 @
|
||||||
|
vstr s18, [r3, #4]
|
||||||
|
add r3, r3, #8
|
||||||
|
vstr s16, [r5]
|
||||||
|
vstr s9, [r5, #4]
|
||||||
|
add r5, r5, #8
|
||||||
|
vstr s7, [r7]
|
||||||
|
vstr s3, [r7, #4]
|
||||||
|
add r7, r7, #8
|
||||||
|
vstr s1, [r9]
|
||||||
|
vstr s8, [r9, #4]
|
||||||
|
add r9, r9, #8
|
||||||
|
vldr s10, [r8, #0] @ x5-re
|
||||||
|
vldr s8, [r8, #4] @ x5-im
|
||||||
|
vldr s5, [r10, #0] @ x7-re
|
||||||
|
vldr s11, [r10, #4] @ x7-im
|
||||||
|
vldr s1, [r12, #16]
|
||||||
|
vldr s15, [r12, #20]
|
||||||
|
add r12, r12, #24
|
||||||
|
vmul.f32 s9, s5, s1
|
||||||
|
vmul.f32 s3, s11, s15
|
||||||
|
vmul.f32 s13, s10, s1
|
||||||
|
vmul.f32 s7, s8, s15
|
||||||
|
vmul.f32 s5, s5, s15
|
||||||
|
vmul.f32 s11, s11, s1
|
||||||
|
vmul.f32 s10, s10, s15
|
||||||
|
vmul.f32 s15, s8, s1
|
||||||
|
vsub.f32 s1, s14, s12
|
||||||
|
vadd.f32 s8, s9, s3
|
||||||
|
vsub.f32 s3, s6, s4
|
||||||
|
vsub.f32 s12, s13, s7
|
||||||
|
vsub.f32 s5, s11, s5
|
||||||
|
vadd.f32 s7, s15, s10
|
||||||
|
vadd.f32 s4, s2, s1 @
|
||||||
|
vsub.f32 s2, s2, s1 @
|
||||||
|
vsub.f32 s6, s0, s3 @
|
||||||
|
vadd.f32 s10, s12, s8
|
||||||
|
vsub.f32 s9, s12, s8
|
||||||
|
vadd.f32 s0, s0, s3 @
|
||||||
|
vsub.f32 s1, s7, s5
|
||||||
|
vadd.f32 s14, s7, s5
|
||||||
|
vadd.f32 s7, s4, s10
|
||||||
|
vsub.f32 s8, s4, s10
|
||||||
|
vsub.f32 s12, s0, s9 @
|
||||||
|
vadd.f32 s3, s2, s1 @
|
||||||
|
vadd.f32 s5, s6, s14
|
||||||
|
vsub.f32 s4, s6, s14
|
||||||
|
vsub.f32 s2, s2, s1 @
|
||||||
|
vadd.f32 s0, s0, s9 @
|
||||||
|
vstr s7, [r4]
|
||||||
|
vstr s5, [r4, #4]
|
||||||
|
add r4, r4, #8
|
||||||
|
vstr s3, [r6]
|
||||||
|
vstr s12, [r6, #4]
|
||||||
|
add r6, r6, #8
|
||||||
|
vstr s8, [r8]
|
||||||
|
vstr s4, [r8, #4]
|
||||||
|
add r8, r8, #8
|
||||||
|
vstr s2, [r10]
|
||||||
|
vstr s0, [r10, #4]
|
||||||
|
add r10, r10, #8
|
||||||
|
bne _vfp_x8_loop
|
||||||
|
bx lr
|
||||||
|
|
||||||
|
|
||||||
|
.align 4
|
||||||
|
#ifdef __APPLE__
|
||||||
|
.globl _vfp_end
|
||||||
|
_vfp_end:
|
||||||
|
#else
|
||||||
|
.globl vfp_end
|
||||||
|
vfp_end:
|
||||||
|
#endif
|
||||||
|
bx lr
|
@ -0,0 +1,4 @@
|
|||||||
|
|
||||||
|
noinst_PROGRAMS = test
|
||||||
|
test_SOURCES = test.c
|
||||||
|
test_LDADD = $(top_builddir)/src/libffts.la
|
@ -0,0 +1,532 @@
|
|||||||
|
# Makefile.in generated by automake 1.12.4 from Makefile.am.
|
||||||
|
# @configure_input@
|
||||||
|
|
||||||
|
# Copyright (C) 1994-2012 Free Software Foundation, Inc.
|
||||||
|
|
||||||
|
# This Makefile.in is free software; the Free Software Foundation
|
||||||
|
# gives unlimited permission to copy and/or distribute it,
|
||||||
|
# with or without modifications, as long as this notice is preserved.
|
||||||
|
|
||||||
|
# This program is distributed in the hope that it will be useful,
|
||||||
|
# but WITHOUT ANY WARRANTY, to the extent permitted by law; without
|
||||||
|
# even the implied warranty of MERCHANTABILITY or FITNESS FOR A
|
||||||
|
# PARTICULAR PURPOSE.
|
||||||
|
|
||||||
|
@SET_MAKE@
|
||||||
|
|
||||||
|
VPATH = @srcdir@
|
||||||
|
am__make_dryrun = \
|
||||||
|
{ \
|
||||||
|
am__dry=no; \
|
||||||
|
case $$MAKEFLAGS in \
|
||||||
|
*\\[\ \ ]*) \
|
||||||
|
echo 'am--echo: ; @echo "AM" OK' | $(MAKE) -f - 2>/dev/null \
|
||||||
|
| grep '^AM OK$$' >/dev/null || am__dry=yes;; \
|
||||||
|
*) \
|
||||||
|
for am__flg in $$MAKEFLAGS; do \
|
||||||
|
case $$am__flg in \
|
||||||
|
*=*|--*) ;; \
|
||||||
|
*n*) am__dry=yes; break;; \
|
||||||
|
esac; \
|
||||||
|
done;; \
|
||||||
|
esac; \
|
||||||
|
test $$am__dry = yes; \
|
||||||
|
}
|
||||||
|
pkgdatadir = $(datadir)/@PACKAGE@
|
||||||
|
pkgincludedir = $(includedir)/@PACKAGE@
|
||||||
|
pkglibdir = $(libdir)/@PACKAGE@
|
||||||
|
pkglibexecdir = $(libexecdir)/@PACKAGE@
|
||||||
|
am__cd = CDPATH="$${ZSH_VERSION+.}$(PATH_SEPARATOR)" && cd
|
||||||
|
install_sh_DATA = $(install_sh) -c -m 644
|
||||||
|
install_sh_PROGRAM = $(install_sh) -c
|
||||||
|
install_sh_SCRIPT = $(install_sh) -c
|
||||||
|
INSTALL_HEADER = $(INSTALL_DATA)
|
||||||
|
transform = $(program_transform_name)
|
||||||
|
NORMAL_INSTALL = :
|
||||||
|
PRE_INSTALL = :
|
||||||
|
POST_INSTALL = :
|
||||||
|
NORMAL_UNINSTALL = :
|
||||||
|
PRE_UNINSTALL = :
|
||||||
|
POST_UNINSTALL = :
|
||||||
|
build_triplet = @build@
|
||||||
|
host_triplet = @host@
|
||||||
|
noinst_PROGRAMS = test$(EXEEXT)
|
||||||
|
subdir = tests
|
||||||
|
DIST_COMMON = $(srcdir)/Makefile.am $(srcdir)/Makefile.in \
|
||||||
|
$(top_srcdir)/depcomp
|
||||||
|
ACLOCAL_M4 = $(top_srcdir)/aclocal.m4
|
||||||
|
am__aclocal_m4_deps = $(top_srcdir)/m4/ax_check_classpath.m4 \
|
||||||
|
$(top_srcdir)/m4/ax_check_java_home.m4 \
|
||||||
|
$(top_srcdir)/m4/ax_java_options.m4 \
|
||||||
|
$(top_srcdir)/m4/ax_jni_include_dir.m4 \
|
||||||
|
$(top_srcdir)/m4/ax_prog_jar.m4 \
|
||||||
|
$(top_srcdir)/m4/ax_prog_javac.m4 \
|
||||||
|
$(top_srcdir)/m4/ax_prog_javac_works.m4 \
|
||||||
|
$(top_srcdir)/configure.ac
|
||||||
|
am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \
|
||||||
|
$(ACLOCAL_M4)
|
||||||
|
mkinstalldirs = $(install_sh) -d
|
||||||
|
CONFIG_HEADER = $(top_builddir)/config.h
|
||||||
|
CONFIG_CLEAN_FILES =
|
||||||
|
CONFIG_CLEAN_VPATH_FILES =
|
||||||
|
PROGRAMS = $(noinst_PROGRAMS)
|
||||||
|
am_test_OBJECTS = test.$(OBJEXT)
|
||||||
|
test_OBJECTS = $(am_test_OBJECTS)
|
||||||
|
test_DEPENDENCIES = $(top_builddir)/src/libffts.la
|
||||||
|
DEFAULT_INCLUDES = -I.@am__isrc@ -I$(top_builddir)
|
||||||
|
depcomp = $(SHELL) $(top_srcdir)/depcomp
|
||||||
|
am__depfiles_maybe = depfiles
|
||||||
|
am__mv = mv -f
|
||||||
|
COMPILE = $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) \
|
||||||
|
$(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS)
|
||||||
|
LTCOMPILE = $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) \
|
||||||
|
--mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) \
|
||||||
|
$(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS)
|
||||||
|
CCLD = $(CC)
|
||||||
|
LINK = $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) \
|
||||||
|
--mode=link $(CCLD) $(AM_CFLAGS) $(CFLAGS) $(AM_LDFLAGS) \
|
||||||
|
$(LDFLAGS) -o $@
|
||||||
|
SOURCES = $(test_SOURCES)
|
||||||
|
DIST_SOURCES = $(test_SOURCES)
|
||||||
|
am__can_run_installinfo = \
|
||||||
|
case $$AM_UPDATE_INFO_DIR in \
|
||||||
|
n|no|NO) false;; \
|
||||||
|
*) (install-info --version) >/dev/null 2>&1;; \
|
||||||
|
esac
|
||||||
|
ETAGS = etags
|
||||||
|
CTAGS = ctags
|
||||||
|
DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST)
|
||||||
|
ACLOCAL = @ACLOCAL@
|
||||||
|
AMTAR = @AMTAR@
|
||||||
|
AR = @AR@
|
||||||
|
AUTOCONF = @AUTOCONF@
|
||||||
|
AUTOHEADER = @AUTOHEADER@
|
||||||
|
AUTOMAKE = @AUTOMAKE@
|
||||||
|
AWK = @AWK@
|
||||||
|
CC = @CC@
|
||||||
|
CCAS = @CCAS@
|
||||||
|
CCASDEPMODE = @CCASDEPMODE@
|
||||||
|
CCASFLAGS = @CCASFLAGS@
|
||||||
|
CCDEPMODE = @CCDEPMODE@
|
||||||
|
CFLAGS = @CFLAGS@
|
||||||
|
CPP = @CPP@
|
||||||
|
CPPFLAGS = @CPPFLAGS@
|
||||||
|
CXX = @CXX@
|
||||||
|
CXXCPP = @CXXCPP@
|
||||||
|
CXXDEPMODE = @CXXDEPMODE@
|
||||||
|
CXXFLAGS = @CXXFLAGS@
|
||||||
|
CYGPATH_W = @CYGPATH_W@
|
||||||
|
DEFS = @DEFS@
|
||||||
|
DEPDIR = @DEPDIR@
|
||||||
|
DLLTOOL = @DLLTOOL@
|
||||||
|
DSYMUTIL = @DSYMUTIL@
|
||||||
|
DUMPBIN = @DUMPBIN@
|
||||||
|
ECHO_C = @ECHO_C@
|
||||||
|
ECHO_N = @ECHO_N@
|
||||||
|
ECHO_T = @ECHO_T@
|
||||||
|
EGREP = @EGREP@
|
||||||
|
EXEEXT = @EXEEXT@
|
||||||
|
FGREP = @FGREP@
|
||||||
|
GREP = @GREP@
|
||||||
|
INSTALL = @INSTALL@
|
||||||
|
INSTALL_DATA = @INSTALL_DATA@
|
||||||
|
INSTALL_PROGRAM = @INSTALL_PROGRAM@
|
||||||
|
INSTALL_SCRIPT = @INSTALL_SCRIPT@
|
||||||
|
INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@
|
||||||
|
JAR = @JAR@
|
||||||
|
JAVA = @JAVA@
|
||||||
|
JAVAC = @JAVAC@
|
||||||
|
JAVACFLAGS = @JAVACFLAGS@
|
||||||
|
JAVAFLAGS = @JAVAFLAGS@
|
||||||
|
JAVAPREFIX = @JAVAPREFIX@
|
||||||
|
JAVA_PATH_NAME = @JAVA_PATH_NAME@
|
||||||
|
JNI_CPPFLAGS = @JNI_CPPFLAGS@
|
||||||
|
LD = @LD@
|
||||||
|
LDFLAGS = @LDFLAGS@
|
||||||
|
LIBOBJS = @LIBOBJS@
|
||||||
|
LIBS = @LIBS@
|
||||||
|
LIBTOOL = @LIBTOOL@
|
||||||
|
LIPO = @LIPO@
|
||||||
|
LN_S = @LN_S@
|
||||||
|
LTLIBOBJS = @LTLIBOBJS@
|
||||||
|
MAKEINFO = @MAKEINFO@
|
||||||
|
MANIFEST_TOOL = @MANIFEST_TOOL@
|
||||||
|
MKDIR_P = @MKDIR_P@
|
||||||
|
NM = @NM@
|
||||||
|
NMEDIT = @NMEDIT@
|
||||||
|
OBJDUMP = @OBJDUMP@
|
||||||
|
OBJEXT = @OBJEXT@
|
||||||
|
OTOOL = @OTOOL@
|
||||||
|
OTOOL64 = @OTOOL64@
|
||||||
|
PACKAGE = @PACKAGE@
|
||||||
|
PACKAGE_BUGREPORT = @PACKAGE_BUGREPORT@
|
||||||
|
PACKAGE_NAME = @PACKAGE_NAME@
|
||||||
|
PACKAGE_STRING = @PACKAGE_STRING@
|
||||||
|
PACKAGE_TARNAME = @PACKAGE_TARNAME@
|
||||||
|
PACKAGE_URL = @PACKAGE_URL@
|
||||||
|
PACKAGE_VERSION = @PACKAGE_VERSION@
|
||||||
|
PATH_SEPARATOR = @PATH_SEPARATOR@
|
||||||
|
RANLIB = @RANLIB@
|
||||||
|
SED = @SED@
|
||||||
|
SET_MAKE = @SET_MAKE@
|
||||||
|
SHELL = @SHELL@
|
||||||
|
STRIP = @STRIP@
|
||||||
|
VERSION = @VERSION@
|
||||||
|
_ACJNI_JAVAC = @_ACJNI_JAVAC@
|
||||||
|
abs_builddir = @abs_builddir@
|
||||||
|
abs_srcdir = @abs_srcdir@
|
||||||
|
abs_top_builddir = @abs_top_builddir@
|
||||||
|
abs_top_srcdir = @abs_top_srcdir@
|
||||||
|
ac_ct_AR = @ac_ct_AR@
|
||||||
|
ac_ct_CC = @ac_ct_CC@
|
||||||
|
ac_ct_CXX = @ac_ct_CXX@
|
||||||
|
ac_ct_DUMPBIN = @ac_ct_DUMPBIN@
|
||||||
|
am__include = @am__include@
|
||||||
|
am__leading_dot = @am__leading_dot@
|
||||||
|
am__quote = @am__quote@
|
||||||
|
am__tar = @am__tar@
|
||||||
|
am__untar = @am__untar@
|
||||||
|
bindir = @bindir@
|
||||||
|
build = @build@
|
||||||
|
build_alias = @build_alias@
|
||||||
|
build_cpu = @build_cpu@
|
||||||
|
build_os = @build_os@
|
||||||
|
build_vendor = @build_vendor@
|
||||||
|
builddir = @builddir@
|
||||||
|
datadir = @datadir@
|
||||||
|
datarootdir = @datarootdir@
|
||||||
|
docdir = @docdir@
|
||||||
|
dvidir = @dvidir@
|
||||||
|
exec_prefix = @exec_prefix@
|
||||||
|
host = @host@
|
||||||
|
host_alias = @host_alias@
|
||||||
|
host_cpu = @host_cpu@
|
||||||
|
host_os = @host_os@
|
||||||
|
host_vendor = @host_vendor@
|
||||||
|
htmldir = @htmldir@
|
||||||
|
includedir = @includedir@
|
||||||
|
infodir = @infodir@
|
||||||
|
install_sh = @install_sh@
|
||||||
|
libdir = @libdir@
|
||||||
|
libexecdir = @libexecdir@
|
||||||
|
localedir = @localedir@
|
||||||
|
localstatedir = @localstatedir@
|
||||||
|
mandir = @mandir@
|
||||||
|
mkdir_p = @mkdir_p@
|
||||||
|
oldincludedir = @oldincludedir@
|
||||||
|
pdfdir = @pdfdir@
|
||||||
|
prefix = @prefix@
|
||||||
|
program_transform_name = @program_transform_name@
|
||||||
|
psdir = @psdir@
|
||||||
|
sbindir = @sbindir@
|
||||||
|
sharedstatedir = @sharedstatedir@
|
||||||
|
srcdir = @srcdir@
|
||||||
|
sysconfdir = @sysconfdir@
|
||||||
|
target_alias = @target_alias@
|
||||||
|
top_build_prefix = @top_build_prefix@
|
||||||
|
top_builddir = @top_builddir@
|
||||||
|
top_srcdir = @top_srcdir@
|
||||||
|
test_SOURCES = test.c
|
||||||
|
test_LDADD = $(top_builddir)/src/libffts.la
|
||||||
|
all: all-am
|
||||||
|
|
||||||
|
.SUFFIXES:
|
||||||
|
.SUFFIXES: .c .lo .o .obj
|
||||||
|
$(srcdir)/Makefile.in: $(srcdir)/Makefile.am $(am__configure_deps)
|
||||||
|
@for dep in $?; do \
|
||||||
|
case '$(am__configure_deps)' in \
|
||||||
|
*$$dep*) \
|
||||||
|
( cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh ) \
|
||||||
|
&& { if test -f $@; then exit 0; else break; fi; }; \
|
||||||
|
exit 1;; \
|
||||||
|
esac; \
|
||||||
|
done; \
|
||||||
|
echo ' cd $(top_srcdir) && $(AUTOMAKE) --gnu tests/Makefile'; \
|
||||||
|
$(am__cd) $(top_srcdir) && \
|
||||||
|
$(AUTOMAKE) --gnu tests/Makefile
|
||||||
|
.PRECIOUS: Makefile
|
||||||
|
Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status
|
||||||
|
@case '$?' in \
|
||||||
|
*config.status*) \
|
||||||
|
cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh;; \
|
||||||
|
*) \
|
||||||
|
echo ' cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe)'; \
|
||||||
|
cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe);; \
|
||||||
|
esac;
|
||||||
|
|
||||||
|
$(top_builddir)/config.status: $(top_srcdir)/configure $(CONFIG_STATUS_DEPENDENCIES)
|
||||||
|
cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
|
||||||
|
|
||||||
|
$(top_srcdir)/configure: $(am__configure_deps)
|
||||||
|
cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
|
||||||
|
$(ACLOCAL_M4): $(am__aclocal_m4_deps)
|
||||||
|
cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
|
||||||
|
$(am__aclocal_m4_deps):
|
||||||
|
|
||||||
|
clean-noinstPROGRAMS:
|
||||||
|
@list='$(noinst_PROGRAMS)'; test -n "$$list" || exit 0; \
|
||||||
|
echo " rm -f" $$list; \
|
||||||
|
rm -f $$list || exit $$?; \
|
||||||
|
test -n "$(EXEEXT)" || exit 0; \
|
||||||
|
list=`for p in $$list; do echo "$$p"; done | sed 's/$(EXEEXT)$$//'`; \
|
||||||
|
echo " rm -f" $$list; \
|
||||||
|
rm -f $$list
|
||||||
|
test$(EXEEXT): $(test_OBJECTS) $(test_DEPENDENCIES) $(EXTRA_test_DEPENDENCIES)
|
||||||
|
@rm -f test$(EXEEXT)
|
||||||
|
$(LINK) $(test_OBJECTS) $(test_LDADD) $(LIBS)
|
||||||
|
|
||||||
|
mostlyclean-compile:
|
||||||
|
-rm -f *.$(OBJEXT)
|
||||||
|
|
||||||
|
distclean-compile:
|
||||||
|
-rm -f *.tab.c
|
||||||
|
|
||||||
|
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/test.Po@am__quote@
|
||||||
|
|
||||||
|
.c.o:
|
||||||
|
@am__fastdepCC_TRUE@ $(COMPILE) -MT $@ -MD -MP -MF $(DEPDIR)/$*.Tpo -c -o $@ $<
|
||||||
|
@am__fastdepCC_TRUE@ $(am__mv) $(DEPDIR)/$*.Tpo $(DEPDIR)/$*.Po
|
||||||
|
@AMDEP_TRUE@@am__fastdepCC_FALSE@ source='$<' object='$@' libtool=no @AMDEPBACKSLASH@
|
||||||
|
@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
|
||||||
|
@am__fastdepCC_FALSE@ $(COMPILE) -c $<
|
||||||
|
|
||||||
|
.c.obj:
|
||||||
|
@am__fastdepCC_TRUE@ $(COMPILE) -MT $@ -MD -MP -MF $(DEPDIR)/$*.Tpo -c -o $@ `$(CYGPATH_W) '$<'`
|
||||||
|
@am__fastdepCC_TRUE@ $(am__mv) $(DEPDIR)/$*.Tpo $(DEPDIR)/$*.Po
|
||||||
|
@AMDEP_TRUE@@am__fastdepCC_FALSE@ source='$<' object='$@' libtool=no @AMDEPBACKSLASH@
|
||||||
|
@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
|
||||||
|
@am__fastdepCC_FALSE@ $(COMPILE) -c `$(CYGPATH_W) '$<'`
|
||||||
|
|
||||||
|
.c.lo:
|
||||||
|
@am__fastdepCC_TRUE@ $(LTCOMPILE) -MT $@ -MD -MP -MF $(DEPDIR)/$*.Tpo -c -o $@ $<
|
||||||
|
@am__fastdepCC_TRUE@ $(am__mv) $(DEPDIR)/$*.Tpo $(DEPDIR)/$*.Plo
|
||||||
|
@AMDEP_TRUE@@am__fastdepCC_FALSE@ source='$<' object='$@' libtool=yes @AMDEPBACKSLASH@
|
||||||
|
@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
|
||||||
|
@am__fastdepCC_FALSE@ $(LTCOMPILE) -c -o $@ $<
|
||||||
|
|
||||||
|
mostlyclean-libtool:
|
||||||
|
-rm -f *.lo
|
||||||
|
|
||||||
|
clean-libtool:
|
||||||
|
-rm -rf .libs _libs
|
||||||
|
|
||||||
|
ID: $(HEADERS) $(SOURCES) $(LISP) $(TAGS_FILES)
|
||||||
|
list='$(SOURCES) $(HEADERS) $(LISP) $(TAGS_FILES)'; \
|
||||||
|
unique=`for i in $$list; do \
|
||||||
|
if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \
|
||||||
|
done | \
|
||||||
|
$(AWK) '{ files[$$0] = 1; nonempty = 1; } \
|
||||||
|
END { if (nonempty) { for (i in files) print i; }; }'`; \
|
||||||
|
mkid -fID $$unique
|
||||||
|
tags: TAGS
|
||||||
|
|
||||||
|
TAGS: $(HEADERS) $(SOURCES) $(TAGS_DEPENDENCIES) \
|
||||||
|
$(TAGS_FILES) $(LISP)
|
||||||
|
set x; \
|
||||||
|
here=`pwd`; \
|
||||||
|
list='$(SOURCES) $(HEADERS) $(LISP) $(TAGS_FILES)'; \
|
||||||
|
unique=`for i in $$list; do \
|
||||||
|
if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \
|
||||||
|
done | \
|
||||||
|
$(AWK) '{ files[$$0] = 1; nonempty = 1; } \
|
||||||
|
END { if (nonempty) { for (i in files) print i; }; }'`; \
|
||||||
|
shift; \
|
||||||
|
if test -z "$(ETAGS_ARGS)$$*$$unique"; then :; else \
|
||||||
|
test -n "$$unique" || unique=$$empty_fix; \
|
||||||
|
if test $$# -gt 0; then \
|
||||||
|
$(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \
|
||||||
|
"$$@" $$unique; \
|
||||||
|
else \
|
||||||
|
$(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \
|
||||||
|
$$unique; \
|
||||||
|
fi; \
|
||||||
|
fi
|
||||||
|
ctags: CTAGS
|
||||||
|
CTAGS: $(HEADERS) $(SOURCES) $(TAGS_DEPENDENCIES) \
|
||||||
|
$(TAGS_FILES) $(LISP)
|
||||||
|
list='$(SOURCES) $(HEADERS) $(LISP) $(TAGS_FILES)'; \
|
||||||
|
unique=`for i in $$list; do \
|
||||||
|
if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \
|
||||||
|
done | \
|
||||||
|
$(AWK) '{ files[$$0] = 1; nonempty = 1; } \
|
||||||
|
END { if (nonempty) { for (i in files) print i; }; }'`; \
|
||||||
|
test -z "$(CTAGS_ARGS)$$unique" \
|
||||||
|
|| $(CTAGS) $(CTAGSFLAGS) $(AM_CTAGSFLAGS) $(CTAGS_ARGS) \
|
||||||
|
$$unique
|
||||||
|
|
||||||
|
GTAGS:
|
||||||
|
here=`$(am__cd) $(top_builddir) && pwd` \
|
||||||
|
&& $(am__cd) $(top_srcdir) \
|
||||||
|
&& gtags -i $(GTAGS_ARGS) "$$here"
|
||||||
|
|
||||||
|
cscopelist: $(HEADERS) $(SOURCES) $(LISP)
|
||||||
|
list='$(SOURCES) $(HEADERS) $(LISP)'; \
|
||||||
|
case "$(srcdir)" in \
|
||||||
|
[\\/]* | ?:[\\/]*) sdir="$(srcdir)" ;; \
|
||||||
|
*) sdir=$(subdir)/$(srcdir) ;; \
|
||||||
|
esac; \
|
||||||
|
for i in $$list; do \
|
||||||
|
if test -f "$$i"; then \
|
||||||
|
echo "$(subdir)/$$i"; \
|
||||||
|
else \
|
||||||
|
echo "$$sdir/$$i"; \
|
||||||
|
fi; \
|
||||||
|
done >> $(top_builddir)/cscope.files
|
||||||
|
|
||||||
|
distclean-tags:
|
||||||
|
-rm -f TAGS ID GTAGS GRTAGS GSYMS GPATH tags
|
||||||
|
|
||||||
|
distdir: $(DISTFILES)
|
||||||
|
@srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \
|
||||||
|
topsrcdirstrip=`echo "$(top_srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \
|
||||||
|
list='$(DISTFILES)'; \
|
||||||
|
dist_files=`for file in $$list; do echo $$file; done | \
|
||||||
|
sed -e "s|^$$srcdirstrip/||;t" \
|
||||||
|
-e "s|^$$topsrcdirstrip/|$(top_builddir)/|;t"`; \
|
||||||
|
case $$dist_files in \
|
||||||
|
*/*) $(MKDIR_P) `echo "$$dist_files" | \
|
||||||
|
sed '/\//!d;s|^|$(distdir)/|;s,/[^/]*$$,,' | \
|
||||||
|
sort -u` ;; \
|
||||||
|
esac; \
|
||||||
|
for file in $$dist_files; do \
|
||||||
|
if test -f $$file || test -d $$file; then d=.; else d=$(srcdir); fi; \
|
||||||
|
if test -d $$d/$$file; then \
|
||||||
|
dir=`echo "/$$file" | sed -e 's,/[^/]*$$,,'`; \
|
||||||
|
if test -d "$(distdir)/$$file"; then \
|
||||||
|
find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \
|
||||||
|
fi; \
|
||||||
|
if test -d $(srcdir)/$$file && test $$d != $(srcdir); then \
|
||||||
|
cp -fpR $(srcdir)/$$file "$(distdir)$$dir" || exit 1; \
|
||||||
|
find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \
|
||||||
|
fi; \
|
||||||
|
cp -fpR $$d/$$file "$(distdir)$$dir" || exit 1; \
|
||||||
|
else \
|
||||||
|
test -f "$(distdir)/$$file" \
|
||||||
|
|| cp -p $$d/$$file "$(distdir)/$$file" \
|
||||||
|
|| exit 1; \
|
||||||
|
fi; \
|
||||||
|
done
|
||||||
|
check-am: all-am
|
||||||
|
check: check-am
|
||||||
|
all-am: Makefile $(PROGRAMS)
|
||||||
|
installdirs:
|
||||||
|
install: install-am
|
||||||
|
install-exec: install-exec-am
|
||||||
|
install-data: install-data-am
|
||||||
|
uninstall: uninstall-am
|
||||||
|
|
||||||
|
install-am: all-am
|
||||||
|
@$(MAKE) $(AM_MAKEFLAGS) install-exec-am install-data-am
|
||||||
|
|
||||||
|
installcheck: installcheck-am
|
||||||
|
install-strip:
|
||||||
|
if test -z '$(STRIP)'; then \
|
||||||
|
$(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
|
||||||
|
install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
|
||||||
|
install; \
|
||||||
|
else \
|
||||||
|
$(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
|
||||||
|
install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
|
||||||
|
"INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'" install; \
|
||||||
|
fi
|
||||||
|
mostlyclean-generic:
|
||||||
|
|
||||||
|
clean-generic:
|
||||||
|
|
||||||
|
distclean-generic:
|
||||||
|
-test -z "$(CONFIG_CLEAN_FILES)" || rm -f $(CONFIG_CLEAN_FILES)
|
||||||
|
-test . = "$(srcdir)" || test -z "$(CONFIG_CLEAN_VPATH_FILES)" || rm -f $(CONFIG_CLEAN_VPATH_FILES)
|
||||||
|
|
||||||
|
maintainer-clean-generic:
|
||||||
|
@echo "This command is intended for maintainers to use"
|
||||||
|
@echo "it deletes files that may require special tools to rebuild."
|
||||||
|
clean: clean-am
|
||||||
|
|
||||||
|
clean-am: clean-generic clean-libtool clean-noinstPROGRAMS \
|
||||||
|
mostlyclean-am
|
||||||
|
|
||||||
|
distclean: distclean-am
|
||||||
|
-rm -rf ./$(DEPDIR)
|
||||||
|
-rm -f Makefile
|
||||||
|
distclean-am: clean-am distclean-compile distclean-generic \
|
||||||
|
distclean-tags
|
||||||
|
|
||||||
|
dvi: dvi-am
|
||||||
|
|
||||||
|
dvi-am:
|
||||||
|
|
||||||
|
html: html-am
|
||||||
|
|
||||||
|
html-am:
|
||||||
|
|
||||||
|
info: info-am
|
||||||
|
|
||||||
|
info-am:
|
||||||
|
|
||||||
|
install-data-am:
|
||||||
|
|
||||||
|
install-dvi: install-dvi-am
|
||||||
|
|
||||||
|
install-dvi-am:
|
||||||
|
|
||||||
|
install-exec-am:
|
||||||
|
|
||||||
|
install-html: install-html-am
|
||||||
|
|
||||||
|
install-html-am:
|
||||||
|
|
||||||
|
install-info: install-info-am
|
||||||
|
|
||||||
|
install-info-am:
|
||||||
|
|
||||||
|
install-man:
|
||||||
|
|
||||||
|
install-pdf: install-pdf-am
|
||||||
|
|
||||||
|
install-pdf-am:
|
||||||
|
|
||||||
|
install-ps: install-ps-am
|
||||||
|
|
||||||
|
install-ps-am:
|
||||||
|
|
||||||
|
installcheck-am:
|
||||||
|
|
||||||
|
maintainer-clean: maintainer-clean-am
|
||||||
|
-rm -rf ./$(DEPDIR)
|
||||||
|
-rm -f Makefile
|
||||||
|
maintainer-clean-am: distclean-am maintainer-clean-generic
|
||||||
|
|
||||||
|
mostlyclean: mostlyclean-am
|
||||||
|
|
||||||
|
mostlyclean-am: mostlyclean-compile mostlyclean-generic \
|
||||||
|
mostlyclean-libtool
|
||||||
|
|
||||||
|
pdf: pdf-am
|
||||||
|
|
||||||
|
pdf-am:
|
||||||
|
|
||||||
|
ps: ps-am
|
||||||
|
|
||||||
|
ps-am:
|
||||||
|
|
||||||
|
uninstall-am:
|
||||||
|
|
||||||
|
.MAKE: install-am install-strip
|
||||||
|
|
||||||
|
.PHONY: CTAGS GTAGS all all-am check check-am clean clean-generic \
|
||||||
|
clean-libtool clean-noinstPROGRAMS cscopelist ctags distclean \
|
||||||
|
distclean-compile distclean-generic distclean-libtool \
|
||||||
|
distclean-tags distdir dvi dvi-am html html-am info info-am \
|
||||||
|
install install-am install-data install-data-am install-dvi \
|
||||||
|
install-dvi-am install-exec install-exec-am install-html \
|
||||||
|
install-html-am install-info install-info-am install-man \
|
||||||
|
install-pdf install-pdf-am install-ps install-ps-am \
|
||||||
|
install-strip installcheck installcheck-am installdirs \
|
||||||
|
maintainer-clean maintainer-clean-generic mostlyclean \
|
||||||
|
mostlyclean-compile mostlyclean-generic mostlyclean-libtool \
|
||||||
|
pdf pdf-am ps ps-am tags uninstall uninstall-am
|
||||||
|
|
||||||
|
|
||||||
|
# Tell versions [3.59,3.63) of GNU make to not export all variables.
|
||||||
|
# Otherwise a system limit (for SysV at least) may be exceeded.
|
||||||
|
.NOEXPORT:
|
@ -0,0 +1,176 @@
|
|||||||
|
/*
|
||||||
|
|
||||||
|
This file is part of SFFT.
|
||||||
|
|
||||||
|
Copyright (c) 2012, Anthony M. Blake
|
||||||
|
All rights reserved.
|
||||||
|
|
||||||
|
Redistribution and use in source and binary forms, with or without
|
||||||
|
modification, are permitted provided that the following conditions are met:
|
||||||
|
* Redistributions of source code must retain the above copyright
|
||||||
|
notice, this list of conditions and the following disclaimer.
|
||||||
|
* Redistributions in binary form must reproduce the above copyright
|
||||||
|
notice, this list of conditions and the following disclaimer in the
|
||||||
|
documentation and/or other materials provided with the distribution.
|
||||||
|
* Neither the name of the organization nor the
|
||||||
|
names of its contributors may be used to endorse or promote products
|
||||||
|
derived from this software without specific prior written permission.
|
||||||
|
|
||||||
|
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
||||||
|
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||||
|
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||||
|
DISCLAIMED. IN NO EVENT SHALL ANTHONY M. BLAKE BE LIABLE FOR ANY
|
||||||
|
DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
|
||||||
|
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
||||||
|
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
|
||||||
|
ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||||
|
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||||
|
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include <stdio.h>
|
||||||
|
#include <math.h>
|
||||||
|
|
||||||
|
#ifdef __ARM_NEON__
|
||||||
|
#endif
|
||||||
|
#ifdef HAVE_SSE
|
||||||
|
#include <xmmintrin.h>
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#include "../include/ffts.h"
|
||||||
|
|
||||||
|
|
||||||
|
#define PI 3.1415926535897932384626433832795028841971693993751058209
|
||||||
|
|
||||||
|
float impulse_error(int N, int sign, float *data) {
|
||||||
|
#ifdef __ANDROID__
|
||||||
|
double delta_sum = 0.0f;
|
||||||
|
double sum = 0.0f;
|
||||||
|
#else
|
||||||
|
long double delta_sum = 0.0f;
|
||||||
|
long double sum = 0.0f;
|
||||||
|
#endif
|
||||||
|
|
||||||
|
int i;
|
||||||
|
for(i=0;i<N;i++) {
|
||||||
|
#ifdef __ANDROID__
|
||||||
|
double re, im;
|
||||||
|
if(sign < 0) {
|
||||||
|
re = cos(2 * PI * (double)i / (double)N);
|
||||||
|
im = -sin(2 * PI * (double)i / (double)N);
|
||||||
|
}else{
|
||||||
|
re = cos(2 * PI * (double)i / (double)N);
|
||||||
|
im = sin(2 * PI * (double)i / (double)N);
|
||||||
|
}
|
||||||
|
#else
|
||||||
|
long double re, im;
|
||||||
|
if(sign < 0) {
|
||||||
|
re = cosl(2 * PI * (long double)i / (long double)N);
|
||||||
|
im = -sinl(2 * PI * (long double)i / (long double)N);
|
||||||
|
}else{
|
||||||
|
re = cosl(2 * PI * (long double)i / (long double)N);
|
||||||
|
im = sinl(2 * PI * (long double)i / (long double)N);
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
sum += re * re + im * im;
|
||||||
|
|
||||||
|
re = re - data[2*i];
|
||||||
|
im = im - data[2*i+1];
|
||||||
|
|
||||||
|
delta_sum += re * re + im * im;
|
||||||
|
|
||||||
|
}
|
||||||
|
#ifdef __ANDROID__
|
||||||
|
return sqrt(delta_sum) / sqrt(sum);
|
||||||
|
#else
|
||||||
|
return sqrtl(delta_sum) / sqrtl(sum);
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
|
int
|
||||||
|
test_transform(int n, int sign) {
|
||||||
|
|
||||||
|
#ifdef HAVE_SSE
|
||||||
|
float __attribute__ ((aligned(32))) *input = _mm_malloc(2 * n * sizeof(float), 32);
|
||||||
|
float __attribute__ ((aligned(32))) *output = _mm_malloc(2 * n * sizeof(float), 32);
|
||||||
|
#else
|
||||||
|
float __attribute__ ((aligned(32))) *input = valloc(2 * n * sizeof(float));
|
||||||
|
float __attribute__ ((aligned(32))) *output = valloc(2 * n * sizeof(float));
|
||||||
|
#endif
|
||||||
|
int i;
|
||||||
|
for(i=0;i<n;i++) {
|
||||||
|
input[2*i] = 0.0f;
|
||||||
|
input[2*i+1] = 0.0f;
|
||||||
|
}
|
||||||
|
|
||||||
|
input[2] = 1.0f;
|
||||||
|
|
||||||
|
ffts_plan_t *p = ffts_init_1d(i, sign);
|
||||||
|
if(p) {
|
||||||
|
ffts_execute(p, input, output);
|
||||||
|
printf(" %3d | %9d | %10E\n", sign, n, impulse_error(n, sign, output));
|
||||||
|
ffts_free(p);
|
||||||
|
}else{
|
||||||
|
printf("Plan unsupported\n");
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
int
|
||||||
|
main(int argc, char *argv[]) {
|
||||||
|
|
||||||
|
if(argc == 3) {
|
||||||
|
// test specific transform with test pattern and display output
|
||||||
|
int n = atoi(argv[1]);
|
||||||
|
int sign = atoi(argv[2]);
|
||||||
|
|
||||||
|
#ifdef HAVE_SSE
|
||||||
|
float __attribute__ ((aligned(32))) *input = _mm_malloc(2 * n * sizeof(float), 32);
|
||||||
|
float __attribute__ ((aligned(32))) *output = _mm_malloc(2 * n * sizeof(float), 32);
|
||||||
|
#else
|
||||||
|
float __attribute__ ((aligned(32))) *input = valloc(2 * n * sizeof(float));
|
||||||
|
float __attribute__ ((aligned(32))) *output = valloc(2 * n * sizeof(float));
|
||||||
|
#endif
|
||||||
|
int i;
|
||||||
|
for(i=0;i<n;i++) {
|
||||||
|
input[2*i] = i;
|
||||||
|
input[2*i+1] = 0.0f;
|
||||||
|
}
|
||||||
|
|
||||||
|
// input[2] = 1.0f;
|
||||||
|
|
||||||
|
ffts_plan_t *p = ffts_init_1d(i, sign);
|
||||||
|
if(p) {
|
||||||
|
ffts_execute(p, input, output);
|
||||||
|
for(i=0;i<n;i++) printf("%d %d %f %f\n", i, sign, output[2*i], output[2*i+1]);
|
||||||
|
ffts_free(p);
|
||||||
|
}else{
|
||||||
|
printf("Plan unsupported\n");
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
#ifdef HAVE_NEON
|
||||||
|
_mm_free(input);
|
||||||
|
_mm_free(output);
|
||||||
|
#else
|
||||||
|
free(input);
|
||||||
|
free(output);
|
||||||
|
#endif
|
||||||
|
|
||||||
|
}else{
|
||||||
|
// test various sizes and display error
|
||||||
|
printf(" Sign | Size | L2 Error\n");
|
||||||
|
printf("------+-----------+-------------\n");
|
||||||
|
int n;
|
||||||
|
for(n=1;n<=18;n++) {
|
||||||
|
test_transform(pow(2,n), -1);
|
||||||
|
}
|
||||||
|
for(n=1;n<=18;n++) {
|
||||||
|
test_transform(pow(2,n), 1);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return 0;
|
||||||
|
}
|
Loading…
Reference in new issue