Add FFTS v0.7

master
Anthony Blake 10 years ago committed by Timothy Pearson
parent c223a6a35f
commit 14d918151b

@ -0,0 +1,6 @@
FFTS was developed at the University of Waikato by Anthony Blake <amb@anthonix.com>
The following authors have also graciously contributed code:
Michael Zucchi <notzed@gmail.com> -- JNI java/android support
Michael Cree <mcree@orcon.net.nz> -- Architecture specific code, including support for Altivec and DEC Alpha

@ -0,0 +1,31 @@
/*
This file is part of FFTS -- The Fastest Fourier Transform in the South
Copyright (c) 2012, 2013 Anthony M. Blake <amb@anthonix.com>
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of the organization nor the
names of its contributors may be used to endorse or promote products
derived from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
DISCLAIMED. IN NO EVENT SHALL ANTHONY M. BLAKE BE LIABLE FOR ANY
DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/

@ -0,0 +1,11 @@
AUTOMAKE_OPTIONS = foreign
SUBDIRS = src tests
EXTRA_DIST=COPYRIGHT ffts.pc.in build_iphone.sh build_android.sh
ACLOCAL_AMFLAGS = -Im4
pkgconfigdir = $(libdir)/pkgconfig
pkgconfig_DATA = ffts.pc
if ENABLE_JNI
SUBDIRS += java
endif

@ -0,0 +1,842 @@
# Makefile.in generated by automake 1.12.4 from Makefile.am.
# @configure_input@
# Copyright (C) 1994-2012 Free Software Foundation, Inc.
# This Makefile.in is free software; the Free Software Foundation
# gives unlimited permission to copy and/or distribute it,
# with or without modifications, as long as this notice is preserved.
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY, to the extent permitted by law; without
# even the implied warranty of MERCHANTABILITY or FITNESS FOR A
# PARTICULAR PURPOSE.
@SET_MAKE@
VPATH = @srcdir@
am__make_dryrun = \
{ \
am__dry=no; \
case $$MAKEFLAGS in \
*\\[\ \ ]*) \
echo 'am--echo: ; @echo "AM" OK' | $(MAKE) -f - 2>/dev/null \
| grep '^AM OK$$' >/dev/null || am__dry=yes;; \
*) \
for am__flg in $$MAKEFLAGS; do \
case $$am__flg in \
*=*|--*) ;; \
*n*) am__dry=yes; break;; \
esac; \
done;; \
esac; \
test $$am__dry = yes; \
}
pkgdatadir = $(datadir)/@PACKAGE@
pkgincludedir = $(includedir)/@PACKAGE@
pkglibdir = $(libdir)/@PACKAGE@
pkglibexecdir = $(libexecdir)/@PACKAGE@
am__cd = CDPATH="$${ZSH_VERSION+.}$(PATH_SEPARATOR)" && cd
install_sh_DATA = $(install_sh) -c -m 644
install_sh_PROGRAM = $(install_sh) -c
install_sh_SCRIPT = $(install_sh) -c
INSTALL_HEADER = $(INSTALL_DATA)
transform = $(program_transform_name)
NORMAL_INSTALL = :
PRE_INSTALL = :
POST_INSTALL = :
NORMAL_UNINSTALL = :
PRE_UNINSTALL = :
POST_UNINSTALL = :
build_triplet = @build@
host_triplet = @host@
@ENABLE_JNI_TRUE@am__append_1 = java
subdir = .
DIST_COMMON = README $(am__configure_deps) $(srcdir)/Makefile.am \
$(srcdir)/Makefile.in $(srcdir)/config.h.in \
$(srcdir)/ffts.pc.in $(top_srcdir)/configure AUTHORS \
config.guess config.sub depcomp install-sh ltmain.sh missing
ACLOCAL_M4 = $(top_srcdir)/aclocal.m4
am__aclocal_m4_deps = $(top_srcdir)/m4/ax_check_classpath.m4 \
$(top_srcdir)/m4/ax_check_java_home.m4 \
$(top_srcdir)/m4/ax_java_options.m4 \
$(top_srcdir)/m4/ax_jni_include_dir.m4 \
$(top_srcdir)/m4/ax_prog_jar.m4 \
$(top_srcdir)/m4/ax_prog_javac.m4 \
$(top_srcdir)/m4/ax_prog_javac_works.m4 \
$(top_srcdir)/configure.ac
am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \
$(ACLOCAL_M4)
am__CONFIG_DISTCLEAN_FILES = config.status config.cache config.log \
configure.lineno config.status.lineno
mkinstalldirs = $(install_sh) -d
CONFIG_HEADER = config.h
CONFIG_CLEAN_FILES = ffts.pc
CONFIG_CLEAN_VPATH_FILES =
SOURCES =
DIST_SOURCES =
RECURSIVE_TARGETS = all-recursive check-recursive dvi-recursive \
html-recursive info-recursive install-data-recursive \
install-dvi-recursive install-exec-recursive \
install-html-recursive install-info-recursive \
install-pdf-recursive install-ps-recursive install-recursive \
installcheck-recursive installdirs-recursive pdf-recursive \
ps-recursive uninstall-recursive
am__can_run_installinfo = \
case $$AM_UPDATE_INFO_DIR in \
n|no|NO) false;; \
*) (install-info --version) >/dev/null 2>&1;; \
esac
am__vpath_adj_setup = srcdirstrip=`echo "$(srcdir)" | sed 's|.|.|g'`;
am__vpath_adj = case $$p in \
$(srcdir)/*) f=`echo "$$p" | sed "s|^$$srcdirstrip/||"`;; \
*) f=$$p;; \
esac;
am__strip_dir = f=`echo $$p | sed -e 's|^.*/||'`;
am__install_max = 40
am__nobase_strip_setup = \
srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*|]/\\\\&/g'`
am__nobase_strip = \
for p in $$list; do echo "$$p"; done | sed -e "s|$$srcdirstrip/||"
am__nobase_list = $(am__nobase_strip_setup); \
for p in $$list; do echo "$$p $$p"; done | \
sed "s| $$srcdirstrip/| |;"' / .*\//!s/ .*/ ./; s,\( .*\)/[^/]*$$,\1,' | \
$(AWK) 'BEGIN { files["."] = "" } { files[$$2] = files[$$2] " " $$1; \
if (++n[$$2] == $(am__install_max)) \
{ print $$2, files[$$2]; n[$$2] = 0; files[$$2] = "" } } \
END { for (dir in files) print dir, files[dir] }'
am__base_list = \
sed '$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;s/\n/ /g' | \
sed '$$!N;$$!N;$$!N;$$!N;s/\n/ /g'
am__uninstall_files_from_dir = { \
test -z "$$files" \
|| { test ! -d "$$dir" && test ! -f "$$dir" && test ! -r "$$dir"; } \
|| { echo " ( cd '$$dir' && rm -f" $$files ")"; \
$(am__cd) "$$dir" && rm -f $$files; }; \
}
am__installdirs = "$(DESTDIR)$(pkgconfigdir)"
DATA = $(pkgconfig_DATA)
RECURSIVE_CLEAN_TARGETS = mostlyclean-recursive clean-recursive \
distclean-recursive maintainer-clean-recursive
AM_RECURSIVE_TARGETS = $(RECURSIVE_TARGETS:-recursive=) \
$(RECURSIVE_CLEAN_TARGETS:-recursive=) tags TAGS ctags CTAGS \
cscope distdir dist dist-all distcheck
ETAGS = etags
CTAGS = ctags
CSCOPE = cscope
DIST_SUBDIRS = src tests java
DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST)
distdir = $(PACKAGE)-$(VERSION)
top_distdir = $(distdir)
am__remove_distdir = \
if test -d "$(distdir)"; then \
find "$(distdir)" -type d ! -perm -200 -exec chmod u+w {} ';' \
&& rm -rf "$(distdir)" \
|| { sleep 5 && rm -rf "$(distdir)"; }; \
else :; fi
am__post_remove_distdir = $(am__remove_distdir)
am__relativize = \
dir0=`pwd`; \
sed_first='s,^\([^/]*\)/.*$$,\1,'; \
sed_rest='s,^[^/]*/*,,'; \
sed_last='s,^.*/\([^/]*\)$$,\1,'; \
sed_butlast='s,/*[^/]*$$,,'; \
while test -n "$$dir1"; do \
first=`echo "$$dir1" | sed -e "$$sed_first"`; \
if test "$$first" != "."; then \
if test "$$first" = ".."; then \
dir2=`echo "$$dir0" | sed -e "$$sed_last"`/"$$dir2"; \
dir0=`echo "$$dir0" | sed -e "$$sed_butlast"`; \
else \
first2=`echo "$$dir2" | sed -e "$$sed_first"`; \
if test "$$first2" = "$$first"; then \
dir2=`echo "$$dir2" | sed -e "$$sed_rest"`; \
else \
dir2="../$$dir2"; \
fi; \
dir0="$$dir0"/"$$first"; \
fi; \
fi; \
dir1=`echo "$$dir1" | sed -e "$$sed_rest"`; \
done; \
reldir="$$dir2"
DIST_ARCHIVES = $(distdir).tar.gz
GZIP_ENV = --best
DIST_TARGETS = dist-gzip
distuninstallcheck_listfiles = find . -type f -print
am__distuninstallcheck_listfiles = $(distuninstallcheck_listfiles) \
| sed 's|^\./|$(prefix)/|' | grep -v '$(infodir)/dir$$'
distcleancheck_listfiles = find . -type f -print
ACLOCAL = @ACLOCAL@
AMTAR = @AMTAR@
AR = @AR@
AUTOCONF = @AUTOCONF@
AUTOHEADER = @AUTOHEADER@
AUTOMAKE = @AUTOMAKE@
AWK = @AWK@
CC = @CC@
CCAS = @CCAS@
CCASDEPMODE = @CCASDEPMODE@
CCASFLAGS = @CCASFLAGS@
CCDEPMODE = @CCDEPMODE@
CFLAGS = @CFLAGS@
CPP = @CPP@
CPPFLAGS = @CPPFLAGS@
CXX = @CXX@
CXXCPP = @CXXCPP@
CXXDEPMODE = @CXXDEPMODE@
CXXFLAGS = @CXXFLAGS@
CYGPATH_W = @CYGPATH_W@
DEFS = @DEFS@
DEPDIR = @DEPDIR@
DLLTOOL = @DLLTOOL@
DSYMUTIL = @DSYMUTIL@
DUMPBIN = @DUMPBIN@
ECHO_C = @ECHO_C@
ECHO_N = @ECHO_N@
ECHO_T = @ECHO_T@
EGREP = @EGREP@
EXEEXT = @EXEEXT@
FGREP = @FGREP@
GREP = @GREP@
INSTALL = @INSTALL@
INSTALL_DATA = @INSTALL_DATA@
INSTALL_PROGRAM = @INSTALL_PROGRAM@
INSTALL_SCRIPT = @INSTALL_SCRIPT@
INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@
JAR = @JAR@
JAVA = @JAVA@
JAVAC = @JAVAC@
JAVACFLAGS = @JAVACFLAGS@
JAVAFLAGS = @JAVAFLAGS@
JAVAPREFIX = @JAVAPREFIX@
JAVA_PATH_NAME = @JAVA_PATH_NAME@
JNI_CPPFLAGS = @JNI_CPPFLAGS@
LD = @LD@
LDFLAGS = @LDFLAGS@
LIBOBJS = @LIBOBJS@
LIBS = @LIBS@
LIBTOOL = @LIBTOOL@
LIPO = @LIPO@
LN_S = @LN_S@
LTLIBOBJS = @LTLIBOBJS@
MAKEINFO = @MAKEINFO@
MANIFEST_TOOL = @MANIFEST_TOOL@
MKDIR_P = @MKDIR_P@
NM = @NM@
NMEDIT = @NMEDIT@
OBJDUMP = @OBJDUMP@
OBJEXT = @OBJEXT@
OTOOL = @OTOOL@
OTOOL64 = @OTOOL64@
PACKAGE = @PACKAGE@
PACKAGE_BUGREPORT = @PACKAGE_BUGREPORT@
PACKAGE_NAME = @PACKAGE_NAME@
PACKAGE_STRING = @PACKAGE_STRING@
PACKAGE_TARNAME = @PACKAGE_TARNAME@
PACKAGE_URL = @PACKAGE_URL@
PACKAGE_VERSION = @PACKAGE_VERSION@
PATH_SEPARATOR = @PATH_SEPARATOR@
RANLIB = @RANLIB@
SED = @SED@
SET_MAKE = @SET_MAKE@
SHELL = @SHELL@
STRIP = @STRIP@
VERSION = @VERSION@
_ACJNI_JAVAC = @_ACJNI_JAVAC@
abs_builddir = @abs_builddir@
abs_srcdir = @abs_srcdir@
abs_top_builddir = @abs_top_builddir@
abs_top_srcdir = @abs_top_srcdir@
ac_ct_AR = @ac_ct_AR@
ac_ct_CC = @ac_ct_CC@
ac_ct_CXX = @ac_ct_CXX@
ac_ct_DUMPBIN = @ac_ct_DUMPBIN@
am__include = @am__include@
am__leading_dot = @am__leading_dot@
am__quote = @am__quote@
am__tar = @am__tar@
am__untar = @am__untar@
bindir = @bindir@
build = @build@
build_alias = @build_alias@
build_cpu = @build_cpu@
build_os = @build_os@
build_vendor = @build_vendor@
builddir = @builddir@
datadir = @datadir@
datarootdir = @datarootdir@
docdir = @docdir@
dvidir = @dvidir@
exec_prefix = @exec_prefix@
host = @host@
host_alias = @host_alias@
host_cpu = @host_cpu@
host_os = @host_os@
host_vendor = @host_vendor@
htmldir = @htmldir@
includedir = @includedir@
infodir = @infodir@
install_sh = @install_sh@
libdir = @libdir@
libexecdir = @libexecdir@
localedir = @localedir@
localstatedir = @localstatedir@
mandir = @mandir@
mkdir_p = @mkdir_p@
oldincludedir = @oldincludedir@
pdfdir = @pdfdir@
prefix = @prefix@
program_transform_name = @program_transform_name@
psdir = @psdir@
sbindir = @sbindir@
sharedstatedir = @sharedstatedir@
srcdir = @srcdir@
sysconfdir = @sysconfdir@
target_alias = @target_alias@
top_build_prefix = @top_build_prefix@
top_builddir = @top_builddir@
top_srcdir = @top_srcdir@
AUTOMAKE_OPTIONS = foreign
SUBDIRS = src tests $(am__append_1)
EXTRA_DIST = COPYRIGHT ffts.pc.in build_iphone.sh build_android.sh
ACLOCAL_AMFLAGS = -Im4
pkgconfigdir = $(libdir)/pkgconfig
pkgconfig_DATA = ffts.pc
all: config.h
$(MAKE) $(AM_MAKEFLAGS) all-recursive
.SUFFIXES:
am--refresh: Makefile
@:
$(srcdir)/Makefile.in: $(srcdir)/Makefile.am $(am__configure_deps)
@for dep in $?; do \
case '$(am__configure_deps)' in \
*$$dep*) \
echo ' cd $(srcdir) && $(AUTOMAKE) --foreign'; \
$(am__cd) $(srcdir) && $(AUTOMAKE) --foreign \
&& exit 0; \
exit 1;; \
esac; \
done; \
echo ' cd $(top_srcdir) && $(AUTOMAKE) --foreign Makefile'; \
$(am__cd) $(top_srcdir) && \
$(AUTOMAKE) --foreign Makefile
.PRECIOUS: Makefile
Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status
@case '$?' in \
*config.status*) \
echo ' $(SHELL) ./config.status'; \
$(SHELL) ./config.status;; \
*) \
echo ' cd $(top_builddir) && $(SHELL) ./config.status $@ $(am__depfiles_maybe)'; \
cd $(top_builddir) && $(SHELL) ./config.status $@ $(am__depfiles_maybe);; \
esac;
$(top_builddir)/config.status: $(top_srcdir)/configure $(CONFIG_STATUS_DEPENDENCIES)
$(SHELL) ./config.status --recheck
$(top_srcdir)/configure: $(am__configure_deps)
$(am__cd) $(srcdir) && $(AUTOCONF)
$(ACLOCAL_M4): $(am__aclocal_m4_deps)
$(am__cd) $(srcdir) && $(ACLOCAL) $(ACLOCAL_AMFLAGS)
$(am__aclocal_m4_deps):
config.h: stamp-h1
@if test ! -f $@; then rm -f stamp-h1; else :; fi
@if test ! -f $@; then $(MAKE) $(AM_MAKEFLAGS) stamp-h1; else :; fi
stamp-h1: $(srcdir)/config.h.in $(top_builddir)/config.status
@rm -f stamp-h1
cd $(top_builddir) && $(SHELL) ./config.status config.h
$(srcdir)/config.h.in: $(am__configure_deps)
($(am__cd) $(top_srcdir) && $(AUTOHEADER))
rm -f stamp-h1
touch $@
distclean-hdr:
-rm -f config.h stamp-h1
ffts.pc: $(top_builddir)/config.status $(srcdir)/ffts.pc.in
cd $(top_builddir) && $(SHELL) ./config.status $@
mostlyclean-libtool:
-rm -f *.lo
clean-libtool:
-rm -rf .libs _libs
distclean-libtool:
-rm -f libtool config.lt
install-pkgconfigDATA: $(pkgconfig_DATA)
@$(NORMAL_INSTALL)
@list='$(pkgconfig_DATA)'; test -n "$(pkgconfigdir)" || list=; \
if test -n "$$list"; then \
echo " $(MKDIR_P) '$(DESTDIR)$(pkgconfigdir)'"; \
$(MKDIR_P) "$(DESTDIR)$(pkgconfigdir)" || exit 1; \
fi; \
for p in $$list; do \
if test -f "$$p"; then d=; else d="$(srcdir)/"; fi; \
echo "$$d$$p"; \
done | $(am__base_list) | \
while read files; do \
echo " $(INSTALL_DATA) $$files '$(DESTDIR)$(pkgconfigdir)'"; \
$(INSTALL_DATA) $$files "$(DESTDIR)$(pkgconfigdir)" || exit $$?; \
done
uninstall-pkgconfigDATA:
@$(NORMAL_UNINSTALL)
@list='$(pkgconfig_DATA)'; test -n "$(pkgconfigdir)" || list=; \
files=`for p in $$list; do echo $$p; done | sed -e 's|^.*/||'`; \
dir='$(DESTDIR)$(pkgconfigdir)'; $(am__uninstall_files_from_dir)
# This directory's subdirectories are mostly independent; you can cd
# into them and run 'make' without going through this Makefile.
# To change the values of 'make' variables: instead of editing Makefiles,
# (1) if the variable is set in 'config.status', edit 'config.status'
# (which will cause the Makefiles to be regenerated when you run 'make');
# (2) otherwise, pass the desired values on the 'make' command line.
$(RECURSIVE_TARGETS) $(RECURSIVE_CLEAN_TARGETS):
@fail= failcom='exit 1'; \
for f in x $$MAKEFLAGS; do \
case $$f in \
*=* | --[!k]*);; \
*k*) failcom='fail=yes';; \
esac; \
done; \
dot_seen=no; \
target=`echo $@ | sed s/-recursive//`; \
case "$@" in \
distclean-* | maintainer-clean-*) list='$(DIST_SUBDIRS)' ;; \
*) list='$(SUBDIRS)' ;; \
esac; \
for subdir in $$list; do \
echo "Making $$target in $$subdir"; \
if test "$$subdir" = "."; then \
dot_seen=yes; \
local_target="$$target-am"; \
else \
local_target="$$target"; \
fi; \
($(am__cd) $$subdir && $(MAKE) $(AM_MAKEFLAGS) $$local_target) \
|| eval $$failcom; \
done; \
if test "$$dot_seen" = "no"; then \
$(MAKE) $(AM_MAKEFLAGS) "$$target-am" || exit 1; \
fi; test -z "$$fail"
tags-recursive:
list='$(SUBDIRS)'; for subdir in $$list; do \
test "$$subdir" = . || ($(am__cd) $$subdir && $(MAKE) $(AM_MAKEFLAGS) tags); \
done
ctags-recursive:
list='$(SUBDIRS)'; for subdir in $$list; do \
test "$$subdir" = . || ($(am__cd) $$subdir && $(MAKE) $(AM_MAKEFLAGS) ctags); \
done
cscopelist-recursive:
list='$(SUBDIRS)'; for subdir in $$list; do \
test "$$subdir" = . || ($(am__cd) $$subdir && $(MAKE) $(AM_MAKEFLAGS) cscopelist); \
done
ID: $(HEADERS) $(SOURCES) $(LISP) $(TAGS_FILES)
list='$(SOURCES) $(HEADERS) $(LISP) $(TAGS_FILES)'; \
unique=`for i in $$list; do \
if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \
done | \
$(AWK) '{ files[$$0] = 1; nonempty = 1; } \
END { if (nonempty) { for (i in files) print i; }; }'`; \
mkid -fID $$unique
tags: TAGS
TAGS: tags-recursive $(HEADERS) $(SOURCES) config.h.in $(TAGS_DEPENDENCIES) \
$(TAGS_FILES) $(LISP)
set x; \
here=`pwd`; \
if ($(ETAGS) --etags-include --version) >/dev/null 2>&1; then \
include_option=--etags-include; \
empty_fix=.; \
else \
include_option=--include; \
empty_fix=; \
fi; \
list='$(SUBDIRS)'; for subdir in $$list; do \
if test "$$subdir" = .; then :; else \
test ! -f $$subdir/TAGS || \
set "$$@" "$$include_option=$$here/$$subdir/TAGS"; \
fi; \
done; \
list='$(SOURCES) $(HEADERS) config.h.in $(LISP) $(TAGS_FILES)'; \
unique=`for i in $$list; do \
if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \
done | \
$(AWK) '{ files[$$0] = 1; nonempty = 1; } \
END { if (nonempty) { for (i in files) print i; }; }'`; \
shift; \
if test -z "$(ETAGS_ARGS)$$*$$unique"; then :; else \
test -n "$$unique" || unique=$$empty_fix; \
if test $$# -gt 0; then \
$(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \
"$$@" $$unique; \
else \
$(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \
$$unique; \
fi; \
fi
ctags: CTAGS
CTAGS: ctags-recursive $(HEADERS) $(SOURCES) config.h.in $(TAGS_DEPENDENCIES) \
$(TAGS_FILES) $(LISP)
list='$(SOURCES) $(HEADERS) config.h.in $(LISP) $(TAGS_FILES)'; \
unique=`for i in $$list; do \
if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \
done | \
$(AWK) '{ files[$$0] = 1; nonempty = 1; } \
END { if (nonempty) { for (i in files) print i; }; }'`; \
test -z "$(CTAGS_ARGS)$$unique" \
|| $(CTAGS) $(CTAGSFLAGS) $(AM_CTAGSFLAGS) $(CTAGS_ARGS) \
$$unique
GTAGS:
here=`$(am__cd) $(top_builddir) && pwd` \
&& $(am__cd) $(top_srcdir) \
&& gtags -i $(GTAGS_ARGS) "$$here"
cscope: cscope.files
test ! -s cscope.files \
|| $(CSCOPE) -b -q $(AM_CSCOPEFLAGS) $(CSCOPEFLAGS) -i cscope.files $(CSCOPE_ARGS)
clean-cscope:
-rm -f cscope.files
cscope.files: clean-cscope cscopelist-recursive cscopelist
cscopelist: cscopelist-recursive $(HEADERS) $(SOURCES) $(LISP)
list='$(SOURCES) $(HEADERS) $(LISP)'; \
case "$(srcdir)" in \
[\\/]* | ?:[\\/]*) sdir="$(srcdir)" ;; \
*) sdir=$(subdir)/$(srcdir) ;; \
esac; \
for i in $$list; do \
if test -f "$$i"; then \
echo "$(subdir)/$$i"; \
else \
echo "$$sdir/$$i"; \
fi; \
done >> $(top_builddir)/cscope.files
distclean-tags:
-rm -f TAGS ID GTAGS GRTAGS GSYMS GPATH tags
-rm -f cscope.out cscope.in.out cscope.po.out cscope.files
distdir: $(DISTFILES)
$(am__remove_distdir)
test -d "$(distdir)" || mkdir "$(distdir)"
@srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \
topsrcdirstrip=`echo "$(top_srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \
list='$(DISTFILES)'; \
dist_files=`for file in $$list; do echo $$file; done | \
sed -e "s|^$$srcdirstrip/||;t" \
-e "s|^$$topsrcdirstrip/|$(top_builddir)/|;t"`; \
case $$dist_files in \
*/*) $(MKDIR_P) `echo "$$dist_files" | \
sed '/\//!d;s|^|$(distdir)/|;s,/[^/]*$$,,' | \
sort -u` ;; \
esac; \
for file in $$dist_files; do \
if test -f $$file || test -d $$file; then d=.; else d=$(srcdir); fi; \
if test -d $$d/$$file; then \
dir=`echo "/$$file" | sed -e 's,/[^/]*$$,,'`; \
if test -d "$(distdir)/$$file"; then \
find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \
fi; \
if test -d $(srcdir)/$$file && test $$d != $(srcdir); then \
cp -fpR $(srcdir)/$$file "$(distdir)$$dir" || exit 1; \
find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \
fi; \
cp -fpR $$d/$$file "$(distdir)$$dir" || exit 1; \
else \
test -f "$(distdir)/$$file" \
|| cp -p $$d/$$file "$(distdir)/$$file" \
|| exit 1; \
fi; \
done
@list='$(DIST_SUBDIRS)'; for subdir in $$list; do \
if test "$$subdir" = .; then :; else \
$(am__make_dryrun) \
|| test -d "$(distdir)/$$subdir" \
|| $(MKDIR_P) "$(distdir)/$$subdir" \
|| exit 1; \
dir1=$$subdir; dir2="$(distdir)/$$subdir"; \
$(am__relativize); \
new_distdir=$$reldir; \
dir1=$$subdir; dir2="$(top_distdir)"; \
$(am__relativize); \
new_top_distdir=$$reldir; \
echo " (cd $$subdir && $(MAKE) $(AM_MAKEFLAGS) top_distdir="$$new_top_distdir" distdir="$$new_distdir" \\"; \
echo " am__remove_distdir=: am__skip_length_check=: am__skip_mode_fix=: distdir)"; \
($(am__cd) $$subdir && \
$(MAKE) $(AM_MAKEFLAGS) \
top_distdir="$$new_top_distdir" \
distdir="$$new_distdir" \
am__remove_distdir=: \
am__skip_length_check=: \
am__skip_mode_fix=: \
distdir) \
|| exit 1; \
fi; \
done
-test -n "$(am__skip_mode_fix)" \
|| find "$(distdir)" -type d ! -perm -755 \
-exec chmod u+rwx,go+rx {} \; -o \
! -type d ! -perm -444 -links 1 -exec chmod a+r {} \; -o \
! -type d ! -perm -400 -exec chmod a+r {} \; -o \
! -type d ! -perm -444 -exec $(install_sh) -c -m a+r {} {} \; \
|| chmod -R a+r "$(distdir)"
dist-gzip: distdir
tardir=$(distdir) && $(am__tar) | GZIP=$(GZIP_ENV) gzip -c >$(distdir).tar.gz
$(am__post_remove_distdir)
dist-bzip2: distdir
tardir=$(distdir) && $(am__tar) | BZIP2=$${BZIP2--9} bzip2 -c >$(distdir).tar.bz2
$(am__post_remove_distdir)
dist-lzip: distdir
tardir=$(distdir) && $(am__tar) | lzip -c $${LZIP_OPT--9} >$(distdir).tar.lz
$(am__post_remove_distdir)
dist-xz: distdir
tardir=$(distdir) && $(am__tar) | XZ_OPT=$${XZ_OPT--e} xz -c >$(distdir).tar.xz
$(am__post_remove_distdir)
dist-tarZ: distdir
tardir=$(distdir) && $(am__tar) | compress -c >$(distdir).tar.Z
$(am__post_remove_distdir)
dist-shar: distdir
shar $(distdir) | GZIP=$(GZIP_ENV) gzip -c >$(distdir).shar.gz
$(am__post_remove_distdir)
dist-zip: distdir
-rm -f $(distdir).zip
zip -rq $(distdir).zip $(distdir)
$(am__post_remove_distdir)
dist dist-all:
$(MAKE) $(AM_MAKEFLAGS) $(DIST_TARGETS) am__post_remove_distdir='@:'
$(am__post_remove_distdir)
# This target untars the dist file and tries a VPATH configuration. Then
# it guarantees that the distribution is self-contained by making another
# tarfile.
distcheck: dist
case '$(DIST_ARCHIVES)' in \
*.tar.gz*) \
GZIP=$(GZIP_ENV) gzip -dc $(distdir).tar.gz | $(am__untar) ;;\
*.tar.bz2*) \
bzip2 -dc $(distdir).tar.bz2 | $(am__untar) ;;\
*.tar.lz*) \
lzip -dc $(distdir).tar.lz | $(am__untar) ;;\
*.tar.xz*) \
xz -dc $(distdir).tar.xz | $(am__untar) ;;\
*.tar.Z*) \
uncompress -c $(distdir).tar.Z | $(am__untar) ;;\
*.shar.gz*) \
GZIP=$(GZIP_ENV) gzip -dc $(distdir).shar.gz | unshar ;;\
*.zip*) \
unzip $(distdir).zip ;;\
esac
chmod -R a-w $(distdir)
chmod u+w $(distdir)
mkdir $(distdir)/_build $(distdir)/_inst
chmod a-w $(distdir)
test -d $(distdir)/_build || exit 0; \
dc_install_base=`$(am__cd) $(distdir)/_inst && pwd | sed -e 's,^[^:\\/]:[\\/],/,'` \
&& dc_destdir="$${TMPDIR-/tmp}/am-dc-$$$$/" \
&& am__cwd=`pwd` \
&& $(am__cd) $(distdir)/_build \
&& ../configure --srcdir=.. --prefix="$$dc_install_base" \
$(AM_DISTCHECK_CONFIGURE_FLAGS) \
$(DISTCHECK_CONFIGURE_FLAGS) \
&& $(MAKE) $(AM_MAKEFLAGS) \
&& $(MAKE) $(AM_MAKEFLAGS) dvi \
&& $(MAKE) $(AM_MAKEFLAGS) check \
&& $(MAKE) $(AM_MAKEFLAGS) install \
&& $(MAKE) $(AM_MAKEFLAGS) installcheck \
&& $(MAKE) $(AM_MAKEFLAGS) uninstall \
&& $(MAKE) $(AM_MAKEFLAGS) distuninstallcheck_dir="$$dc_install_base" \
distuninstallcheck \
&& chmod -R a-w "$$dc_install_base" \
&& ({ \
(cd ../.. && umask 077 && mkdir "$$dc_destdir") \
&& $(MAKE) $(AM_MAKEFLAGS) DESTDIR="$$dc_destdir" install \
&& $(MAKE) $(AM_MAKEFLAGS) DESTDIR="$$dc_destdir" uninstall \
&& $(MAKE) $(AM_MAKEFLAGS) DESTDIR="$$dc_destdir" \
distuninstallcheck_dir="$$dc_destdir" distuninstallcheck; \
} || { rm -rf "$$dc_destdir"; exit 1; }) \
&& rm -rf "$$dc_destdir" \
&& $(MAKE) $(AM_MAKEFLAGS) dist \
&& rm -rf $(DIST_ARCHIVES) \
&& $(MAKE) $(AM_MAKEFLAGS) distcleancheck \
&& cd "$$am__cwd" \
|| exit 1
$(am__post_remove_distdir)
@(echo "$(distdir) archives ready for distribution: "; \
list='$(DIST_ARCHIVES)'; for i in $$list; do echo $$i; done) | \
sed -e 1h -e 1s/./=/g -e 1p -e 1x -e '$$p' -e '$$x'
distuninstallcheck:
@test -n '$(distuninstallcheck_dir)' || { \
echo 'ERROR: trying to run $@ with an empty' \
'$$(distuninstallcheck_dir)' >&2; \
exit 1; \
}; \
$(am__cd) '$(distuninstallcheck_dir)' || { \
echo 'ERROR: cannot chdir into $(distuninstallcheck_dir)' >&2; \
exit 1; \
}; \
test `$(am__distuninstallcheck_listfiles) | wc -l` -eq 0 \
|| { echo "ERROR: files left after uninstall:" ; \
if test -n "$(DESTDIR)"; then \
echo " (check DESTDIR support)"; \
fi ; \
$(distuninstallcheck_listfiles) ; \
exit 1; } >&2
distcleancheck: distclean
@if test '$(srcdir)' = . ; then \
echo "ERROR: distcleancheck can only run from a VPATH build" ; \
exit 1 ; \
fi
@test `$(distcleancheck_listfiles) | wc -l` -eq 0 \
|| { echo "ERROR: files left in build directory after distclean:" ; \
$(distcleancheck_listfiles) ; \
exit 1; } >&2
check-am: all-am
check: check-recursive
all-am: Makefile $(DATA) config.h
installdirs: installdirs-recursive
installdirs-am:
for dir in "$(DESTDIR)$(pkgconfigdir)"; do \
test -z "$$dir" || $(MKDIR_P) "$$dir"; \
done
install: install-recursive
install-exec: install-exec-recursive
install-data: install-data-recursive
uninstall: uninstall-recursive
install-am: all-am
@$(MAKE) $(AM_MAKEFLAGS) install-exec-am install-data-am
installcheck: installcheck-recursive
install-strip:
if test -z '$(STRIP)'; then \
$(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
install; \
else \
$(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
"INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'" install; \
fi
mostlyclean-generic:
clean-generic:
distclean-generic:
-test -z "$(CONFIG_CLEAN_FILES)" || rm -f $(CONFIG_CLEAN_FILES)
-test . = "$(srcdir)" || test -z "$(CONFIG_CLEAN_VPATH_FILES)" || rm -f $(CONFIG_CLEAN_VPATH_FILES)
maintainer-clean-generic:
@echo "This command is intended for maintainers to use"
@echo "it deletes files that may require special tools to rebuild."
clean: clean-recursive
clean-am: clean-generic clean-libtool mostlyclean-am
distclean: distclean-recursive
-rm -f $(am__CONFIG_DISTCLEAN_FILES)
-rm -f Makefile
distclean-am: clean-am distclean-generic distclean-hdr \
distclean-libtool distclean-tags
dvi: dvi-recursive
dvi-am:
html: html-recursive
html-am:
info: info-recursive
info-am:
install-data-am: install-pkgconfigDATA
install-dvi: install-dvi-recursive
install-dvi-am:
install-exec-am:
install-html: install-html-recursive
install-html-am:
install-info: install-info-recursive
install-info-am:
install-man:
install-pdf: install-pdf-recursive
install-pdf-am:
install-ps: install-ps-recursive
install-ps-am:
installcheck-am:
maintainer-clean: maintainer-clean-recursive
-rm -f $(am__CONFIG_DISTCLEAN_FILES)
-rm -rf $(top_srcdir)/autom4te.cache
-rm -f Makefile
maintainer-clean-am: distclean-am maintainer-clean-generic
mostlyclean: mostlyclean-recursive
mostlyclean-am: mostlyclean-generic mostlyclean-libtool
pdf: pdf-recursive
pdf-am:
ps: ps-recursive
ps-am:
uninstall-am: uninstall-pkgconfigDATA
.MAKE: $(RECURSIVE_CLEAN_TARGETS) $(RECURSIVE_TARGETS) all \
cscopelist-recursive ctags-recursive install-am install-strip \
tags-recursive
.PHONY: $(RECURSIVE_CLEAN_TARGETS) $(RECURSIVE_TARGETS) CTAGS GTAGS \
all all-am am--refresh check check-am clean clean-cscope \
clean-generic clean-libtool cscope cscopelist \
cscopelist-recursive ctags ctags-recursive dist dist-all \
dist-bzip2 dist-gzip dist-lzip dist-shar dist-tarZ dist-xz \
dist-zip distcheck distclean distclean-generic distclean-hdr \
distclean-libtool distclean-tags distcleancheck distdir \
distuninstallcheck dvi dvi-am html html-am info info-am \
install install-am install-data install-data-am install-dvi \
install-dvi-am install-exec install-exec-am install-html \
install-html-am install-info install-info-am install-man \
install-pdf install-pdf-am install-pkgconfigDATA install-ps \
install-ps-am install-strip installcheck installcheck-am \
installdirs installdirs-am maintainer-clean \
maintainer-clean-generic mostlyclean mostlyclean-generic \
mostlyclean-libtool pdf pdf-am ps ps-am tags tags-recursive \
uninstall uninstall-am uninstall-pkgconfigDATA
# Tell versions [3.59,3.63) of GNU make to not export all variables.
# Otherwise a system limit (for SysV at least) may be exceeded.
.NOEXPORT:

@ -0,0 +1,27 @@
FFTS -- The Fastest Fourier Transform in the South
by Anthony Blake <anthonix@me.com>
To build for Android, edit and run build_android.sh
To build for iOS, edit and run build_iphone.sh
To build for Linux or OS X on x86, run
./configure --enable-sse --enable-single --prefix=/usr/local
make
make install
FFTS dynamically generates code at runtime. This can be disabled with
--disable-dynamic-code
For JNI targets: --enable-jni will build the jni stuff automatically for
the host target, and --enable-shared must also be added manually for it to
work.
If you like FFTS, please show your support by sending a postcard to:
Anthony Blake
Department of Computer Science
The University of Waikato
Private Bag 3105
Hamilton 3240
NEW ZEALAND

9558
lib/ffts/aclocal.m4 vendored

File diff suppressed because it is too large Load Diff

@ -0,0 +1,80 @@
#!/bin/sh
# Compiles ffts for Android
# Make sure you have NDK_ROOT defined in .bashrc or .bash_profile
# Modify INSTALL_DIR to suit your situation
INSTALL_DIR="`pwd`/java/android/bin"
PLATFORM=android-8
TOOL="4.6"
case $(uname -s) in
Darwin)
CONFBUILD=i386-apple-darwin`uname -r`
HOSTPLAT=darwin-x86
;;
Linux)
CONFBUILD=x86-unknown-linux
HOSTPLAT=linux-`uname -m`
;;
*) echo $0: Unknown platform; exit
esac
case arm in
arm)
TARGPLAT=arm-linux-androideabi
ARCH=arm
CONFTARG=arm-eabi
;;
x86)
TARGPLAT=x86
ARCH=x86
CONFTARG=x86
;;
mips)
## probably wrong
TARGPLAT=mipsel-linux-android
ARCH=mips
CONFTARG=mips
;;
*) echo $0: Unknown target; exit
esac
: ${NDK_ROOT:?}
echo "Using: $NDK_ROOT/toolchains/${TARGPLAT}-${TOOL}/prebuilt/${HOSTPLAT}/bin"
export PATH="$NDK_ROOT/toolchains/${TARGPLAT}-${TOOL}/prebuilt/${HOSTPLAT}/bin/:$PATH"
export SYS_ROOT="$NDK_ROOT/platforms/${PLATFORM}/arch-${ARCH}/"
export CC="${TARGPLAT}-gcc --sysroot=$SYS_ROOT"
export LD="${TARGPLAT}-ld"
export AR="${TARGPLAT}-ar"
export RANLIB="${TARGPLAT}-ranlib"
export STRIP="${TARGPLAT}-strip"
export CFLAGS="-Os"
mkdir -p $INSTALL_DIR
./configure --enable-neon --build=${CONFBUILD} --host=${CONFTARG} --prefix=$INSTALL_DIR LIBS="-lc -lgcc"
make clean
make
make install
if [ -z "$ANDROID_HOME" ] ; then
echo ""
echo " No ANDROID_HOME defined"
echo " Android JNI interfaces will not be built"
echo
else
echo
echo "Using android_home ${ANDROID_HOME}"
echo
( cd java/android ; ${ANDROID_HOME}/tools/android update lib-project -p . ) || exit 1
( cd java/android/jni ; ${NDK_ROOT}/ndk-build V=1 ) || exit 1
( cd java/android ; ant release ) || exit 1
echo
echo "Android library project location:"
echo " `pwd`/java/android"
echo
fi
exit 0

@ -0,0 +1,22 @@
#/bin/sh
# Compiles ffts for iOS
# Modify INSTALL_DIR, SDKVER and DEVROOT to suit your situation
INSTALL_DIR="`pwd`/build"
export SDKVER="6.1"
export DEVROOT="/Applications/Xcode.app/Contents/Developer/Platforms/iPhoneOS.platform/Developer"
export SDKROOT="$DEVROOT/SDKs/iPhoneOS$SDKVER.sdk"
export CFLAGS="-O3 -Wreturn-type -Wparentheses -Wswitch -Wno-unused-parameter -Wno-unused-variable -Wunused-value -Wno-shorten-64-to-32 -Wno-trigraphs -fpascal-strings -miphoneos-version-min=5.0 -mcpu=cortex-a9 -arch armv7 -mfpu=neon -pipe -isysroot $SDKROOT -isystem $SDKROOT/usr/include -isystem $DEVROOT/usr/include -mno-thumb -no-integrated-as"
export AR="$DEVROOT/usr/bin/ar"
export CC="clang"
mkdir -p $INSTALL_DIR
./configure --enable-neon --build=i386-apple-darwin`uname -r` --host=arm-eabi --prefix=$INSTALL_DIR
make clean
make
make install
exit 0

1537
lib/ffts/config.guess vendored

File diff suppressed because it is too large Load Diff

@ -0,0 +1,148 @@
/* config.h.in. Generated from configure.ac by autoheader. */
/* Define to disable dynamic code generation. */
#undef DYNAMIC_DISABLED
/* JNI being built. */
#undef ENABLE_JNI
/* Define to FFT in single precision. */
#undef FFTS_PREC_SINGLE
/* Set ARM float abi. */
#undef FLOAT_ABI
/* Define to 1 if you have the declaration of `memalign', and to 0 if you
don't. */
#undef HAVE_DECL_MEMALIGN
/* Define to 1 if you have the declaration of `posix_memalign', and to 0 if
you don't. */
#undef HAVE_DECL_POSIX_MEMALIGN
/* Define to 1 if you have the <dlfcn.h> header file. */
#undef HAVE_DLFCN_H
/* Define to 1 if you have the `gettimeofday' function. */
#undef HAVE_GETTIMEOFDAY
/* Define to 1 if you have the <inttypes.h> header file. */
#undef HAVE_INTTYPES_H
/* Define to 1 if you have the `m' library (-lm). */
#undef HAVE_LIBM
/* Define to 1 if you have the <memory.h> header file. */
#undef HAVE_MEMORY_H
/* Define to FFT with ARM NEON. */
#undef HAVE_NEON
/* Define to 1 if you have the `pow' function. */
#undef HAVE_POW
/* Define to FFT with SSE. */
#undef HAVE_SSE
/* Define to 1 if stdbool.h conforms to C99. */
#undef HAVE_STDBOOL_H
/* Define to 1 if you have the <stdint.h> header file. */
#undef HAVE_STDINT_H
/* Define to 1 if you have the <stdlib.h> header file. */
#undef HAVE_STDLIB_H
/* Define to 1 if you have the <strings.h> header file. */
#undef HAVE_STRINGS_H
/* Define to 1 if you have the <string.h> header file. */
#undef HAVE_STRING_H
/* Define to 1 if you have the <sys/socket.h> header file. */
#undef HAVE_SYS_SOCKET_H
/* Define to 1 if you have the <sys/stat.h> header file. */
#undef HAVE_SYS_STAT_H
/* Define to 1 if you have the <sys/time.h> header file. */
#undef HAVE_SYS_TIME_H
/* Define to 1 if you have the <sys/types.h> header file. */
#undef HAVE_SYS_TYPES_H
/* Define to 1 if you have the <unistd.h> header file. */
#undef HAVE_UNISTD_H
/* Define to FFT with ARM VFP. */
#undef HAVE_VFP
/* Define to 1 if the system has the type `_Bool'. */
#undef HAVE__BOOL
/* Define to the sub-directory in which libtool stores uninstalled libraries.
*/
#undef LT_OBJDIR
/* Name of package */
#undef PACKAGE
/* Define to the address where bug reports for this package should be sent. */
#undef PACKAGE_BUGREPORT
/* Define to the full name of this package. */
#undef PACKAGE_NAME
/* Define to the full name and version of this package. */
#undef PACKAGE_STRING
/* Define to the one symbol short name of this package. */
#undef PACKAGE_TARNAME
/* Define to the home page for this package. */
#undef PACKAGE_URL
/* Define to the version of this package. */
#undef PACKAGE_VERSION
/* Define to 1 if you have the ANSI C header files. */
#undef STDC_HEADERS
/* Version number of package */
#undef VERSION
/* Define for Solaris 2.5.1 so the uint64_t typedef from <sys/synch.h>,
<pthread.h>, or <semaphore.h> is not used. If the typedef were allowed, the
#define below would cause a syntax error. */
#undef _UINT64_T
/* Define to `__inline__' or `__inline' if that's what the C compiler
calls it, or to nothing if 'inline' is not supported under any name. */
#ifndef __cplusplus
#undef inline
#endif
/* Define to the type of a signed integer type of width exactly 32 bits if
such a type exists and the standard includes do not define it. */
#undef int32_t
/* Define to the equivalent of the C99 'restrict' keyword, or to
nothing if this is not supported. Do not define if restrict is
supported directly. */
#undef restrict
/* Work around a bug in Sun C++: it does not support _Restrict or
__restrict__, even though the corresponding Sun C compiler ends up with
"#define restrict _Restrict" or "#define restrict __restrict__" in the
previous line. Perhaps some future version of Sun C++ will work with
restrict; if so, hopefully it defines __RESTRICT like Sun C does. */
#if defined __SUNPRO_CC && !defined __RESTRICT
# define _Restrict
# define __restrict__
#endif
/* Define to `unsigned int' if <sys/types.h> does not define. */
#undef size_t
/* Define to the type of an unsigned integer type of width exactly 64 bits if
such a type exists and the standard includes do not define it. */
#undef uint64_t

1786
lib/ffts/config.sub vendored

File diff suppressed because it is too large Load Diff

19306
lib/ffts/configure vendored

File diff suppressed because it is too large Load Diff

@ -0,0 +1,137 @@
# -*- Autoconf -*-
# Process this file with autoconf to produce a configure script.
AC_PREREQ([2.68])
AC_INIT(ffts, 0.7, amb@anthonix.com)
AM_INIT_AUTOMAKE(ffts, 0.7)
AC_CONFIG_MACRO_DIR([m4])
# AC_CONFIG_SRCDIR([include/common.h])
AC_CONFIG_HEADERS([config.h])
AC_CANONICAL_HOST
# Checks for programs.
AC_PROG_CXX
AC_PROG_CC
#AX_COMPILER_VENDOR
LT_INIT([disable-shared])
AM_PROG_AS
#CXX="clang++"
#CXXFLAGS="$CXXFLAGS -stdlib=libc++"
#SFFT_AR="/usr/bin/ar"
#SFFT_CFLAGS="$CFLAGS"
#SFFT_CC="$CC"
AC_ARG_ENABLE(dynamic-code, [AC_HELP_STRING([--enable-dynamic-code],[dynamically generate code])], sfft_dynamic=$enableval, sfft_dynamic=yes)
if test "$sfft_dynamic" = "no"; then
AC_DEFINE(DYNAMIC_DISABLED,1,[Define to disable dynamic code generation.])
fi
AM_CONDITIONAL(DYNAMIC_DISABLED, test "$sfft_dynamic" = "no")
AC_ARG_ENABLE(single, [AC_HELP_STRING([--enable-single],[compile single-precision library])], sfft_single=$enableval, sfft_single=no)
if test "$sfft_single" = "yes"; then
AC_DEFINE(FFTS_PREC_SINGLE,1,[Define to FFT in single precision.])
fi
if test "$sfft_single" = "no"; then
AC_DEFINE(FFTS_PREC_SINGLE,0,[Define to FFT in single precision.])
fi
AC_ARG_ENABLE(sse, [AC_HELP_STRING([--enable-sse],[enable SSE extensions])], have_sse=$enableval, have_sse=no)
if test "$have_sse" = "yes"; then
SIMD=sse
AC_DEFINE(HAVE_SSE,1,[Define to FFT with SSE.])
fi
AM_CONDITIONAL(HAVE_SSE, test "$have_sse" = "yes")
AC_ARG_ENABLE(neon, [AC_HELP_STRING([--enable-neon],[enable NEON extensions])], have_neon=$enableval, have_neon=no)
if test "$have_neon" = "yes"; then
AC_DEFINE(HAVE_NEON,1,[Define to FFT with ARM NEON.])
fi
AM_CONDITIONAL(HAVE_NEON, test "$have_neon" = "yes")
AC_ARG_ENABLE(vfp, [AC_HELP_STRING([--enable-vfp],[enable VFP extensions])], have_vfp=$enableval, have_vfp=no)
if test "$have_vfp" = "yes"; then
AC_DEFINE(HAVE_VFP,1,[Define to FFT with ARM VFP.])
fi
AM_CONDITIONAL(HAVE_VFP, test "$have_vfp" = "yes")
AC_ARG_WITH(float-abi, [AS_HELP_STRING([--with-float-abi=ABI],[set float abi for arm, hard or softfp (default is softfp)])],
float_abi=$withval, float_abi=softfp)
AC_ARG_ENABLE(jni, [AC_HELP_STRING([--enable-jni],[enable JNI binding])], have_jni=$enableval, have_jni=no)
if test "$have_jni" = "yes"; then
# Java stuff
AX_JAVA_OPTIONS
AC_CHECK_JAVA_HOME
AC_CHECK_CLASSPATH
AC_PROG_JAVAC
# blah this whinges about something
#AC_PROG_JAVAH
AC_PROG_JAR
AX_JNI_INCLUDE_DIR
for JNI_INCLUDE_DIR in $JNI_INCLUDE_DIRS
do
JNI_CPPFLAGS="$JNI_CPPFLAGS -I$JNI_INCLUDE_DIR"
done
AC_SUBST(JNI_CPPFLAGS, [$JNI_CPPFLAGS])
AC_DEFINE(ENABLE_JNI,1,[JNI being built.])
fi
AM_CONDITIONAL(ENABLE_JNI, test "$have_jni" = "yes")
fpu=""
AS_IF([test "$have_vfp" = "yes"],[fpu="-mfpu=vfp"],
[test "$have_neon" = "yes"],[fpu="-mfpu=neon"],
[])
AC_MSG_NOTICE([host is "${host}"])
case "${host}" in
arm* )
CFLAGS="$CFLAGS -mfloat-abi=${float_abi} ${fpu} -std=c99"
CCASFLAGS="$CCASFLAGS -mfloat-abi=${float_abi} ${fpu}"
;;
*)
;;
esac
#if test "$ord_sr" = "no"; then
# AC_DEFINE(SFFT_ORD_SR,0,[Define to enable ordinary split radix.])
#fi
# Checks for libraries.
AC_CHECK_LIB([m], [cos])
AC_CHECK_DECLS([posix_memalign,
memalign],,,
[#define _XOPEN_SOURCE 600
#include <stdlib.h>
#include <malloc.h>])
# Checks for header files.
AC_CHECK_HEADERS([stdint.h stdlib.h string.h sys/socket.h sys/time.h unistd.h])
# Checks for typedefs, structures, and compiler characteristics.
AC_HEADER_STDBOOL
AC_C_INLINE
AC_TYPE_INT32_T
AC_C_RESTRICT
AC_TYPE_SIZE_T
AC_TYPE_UINT64_T
AC_PROG_CC_STDC
AC_PROG_INSTALL
AC_PROG_LN_S
AC_PROG_LIBTOOL
# Checks for library functions.
#AC_FUNC_MALLOC
AC_CHECK_FUNCS([gettimeofday pow])
AC_CONFIG_FILES([Makefile
src/Makefile
tests/Makefile
ffts.pc
java/Makefile
])
AC_OUTPUT

@ -0,0 +1,780 @@
#! /bin/sh
# depcomp - compile a program generating dependencies as side-effects
scriptversion=2012-07-12.20; # UTC
# Copyright (C) 1999-2012 Free Software Foundation, Inc.
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2, or (at your option)
# any later version.
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
# As a special exception to the GNU General Public License, if you
# distribute this file as part of a program that contains a
# configuration script generated by Autoconf, you may include it under
# the same distribution terms that you use for the rest of that program.
# Originally written by Alexandre Oliva <oliva@dcc.unicamp.br>.
case $1 in
'')
echo "$0: No command. Try '$0 --help' for more information." 1>&2
exit 1;
;;
-h | --h*)
cat <<\EOF
Usage: depcomp [--help] [--version] PROGRAM [ARGS]
Run PROGRAMS ARGS to compile a file, generating dependencies
as side-effects.
Environment variables:
depmode Dependency tracking mode.
source Source file read by 'PROGRAMS ARGS'.
object Object file output by 'PROGRAMS ARGS'.
DEPDIR directory where to store dependencies.
depfile Dependency file to output.
tmpdepfile Temporary file to use when outputting dependencies.
libtool Whether libtool is used (yes/no).
Report bugs to <bug-automake@gnu.org>.
EOF
exit $?
;;
-v | --v*)
echo "depcomp $scriptversion"
exit $?
;;
esac
# A tabulation character.
tab=' '
# A newline character.
nl='
'
if test -z "$depmode" || test -z "$source" || test -z "$object"; then
echo "depcomp: Variables source, object and depmode must be set" 1>&2
exit 1
fi
# Dependencies for sub/bar.o or sub/bar.obj go into sub/.deps/bar.Po.
depfile=${depfile-`echo "$object" |
sed 's|[^\\/]*$|'${DEPDIR-.deps}'/&|;s|\.\([^.]*\)$|.P\1|;s|Pobj$|Po|'`}
tmpdepfile=${tmpdepfile-`echo "$depfile" | sed 's/\.\([^.]*\)$/.T\1/'`}
rm -f "$tmpdepfile"
# Some modes work just like other modes, but use different flags. We
# parameterize here, but still list the modes in the big case below,
# to make depend.m4 easier to write. Note that we *cannot* use a case
# here, because this file can only contain one case statement.
if test "$depmode" = hp; then
# HP compiler uses -M and no extra arg.
gccflag=-M
depmode=gcc
fi
if test "$depmode" = dashXmstdout; then
# This is just like dashmstdout with a different argument.
dashmflag=-xM
depmode=dashmstdout
fi
cygpath_u="cygpath -u -f -"
if test "$depmode" = msvcmsys; then
# This is just like msvisualcpp but w/o cygpath translation.
# Just convert the backslash-escaped backslashes to single forward
# slashes to satisfy depend.m4
cygpath_u='sed s,\\\\,/,g'
depmode=msvisualcpp
fi
if test "$depmode" = msvc7msys; then
# This is just like msvc7 but w/o cygpath translation.
# Just convert the backslash-escaped backslashes to single forward
# slashes to satisfy depend.m4
cygpath_u='sed s,\\\\,/,g'
depmode=msvc7
fi
if test "$depmode" = xlc; then
# IBM C/C++ Compilers xlc/xlC can output gcc-like dependency informations.
gccflag=-qmakedep=gcc,-MF
depmode=gcc
fi
case "$depmode" in
gcc3)
## gcc 3 implements dependency tracking that does exactly what
## we want. Yay! Note: for some reason libtool 1.4 doesn't like
## it if -MD -MP comes after the -MF stuff. Hmm.
## Unfortunately, FreeBSD c89 acceptance of flags depends upon
## the command line argument order; so add the flags where they
## appear in depend2.am. Note that the slowdown incurred here
## affects only configure: in makefiles, %FASTDEP% shortcuts this.
for arg
do
case $arg in
-c) set fnord "$@" -MT "$object" -MD -MP -MF "$tmpdepfile" "$arg" ;;
*) set fnord "$@" "$arg" ;;
esac
shift # fnord
shift # $arg
done
"$@"
stat=$?
if test $stat -eq 0; then :
else
rm -f "$tmpdepfile"
exit $stat
fi
mv "$tmpdepfile" "$depfile"
;;
gcc)
## There are various ways to get dependency output from gcc. Here's
## why we pick this rather obscure method:
## - Don't want to use -MD because we'd like the dependencies to end
## up in a subdir. Having to rename by hand is ugly.
## (We might end up doing this anyway to support other compilers.)
## - The DEPENDENCIES_OUTPUT environment variable makes gcc act like
## -MM, not -M (despite what the docs say).
## - Using -M directly means running the compiler twice (even worse
## than renaming).
if test -z "$gccflag"; then
gccflag=-MD,
fi
"$@" -Wp,"$gccflag$tmpdepfile"
stat=$?
if test $stat -eq 0; then :
else
rm -f "$tmpdepfile"
exit $stat
fi
rm -f "$depfile"
echo "$object : \\" > "$depfile"
alpha=ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz
## The second -e expression handles DOS-style file names with drive letters.
sed -e 's/^[^:]*: / /' \
-e 's/^['$alpha']:\/[^:]*: / /' < "$tmpdepfile" >> "$depfile"
## This next piece of magic avoids the "deleted header file" problem.
## The problem is that when a header file which appears in a .P file
## is deleted, the dependency causes make to die (because there is
## typically no way to rebuild the header). We avoid this by adding
## dummy dependencies for each header file. Too bad gcc doesn't do
## this for us directly.
tr ' ' "$nl" < "$tmpdepfile" |
## Some versions of gcc put a space before the ':'. On the theory
## that the space means something, we add a space to the output as
## well. hp depmode also adds that space, but also prefixes the VPATH
## to the object. Take care to not repeat it in the output.
## Some versions of the HPUX 10.20 sed can't process this invocation
## correctly. Breaking it into two sed invocations is a workaround.
sed -e 's/^\\$//' -e '/^$/d' -e "s|.*$object$||" -e '/:$/d' \
| sed -e 's/$/ :/' >> "$depfile"
rm -f "$tmpdepfile"
;;
hp)
# This case exists only to let depend.m4 do its work. It works by
# looking at the text of this script. This case will never be run,
# since it is checked for above.
exit 1
;;
sgi)
if test "$libtool" = yes; then
"$@" "-Wp,-MDupdate,$tmpdepfile"
else
"$@" -MDupdate "$tmpdepfile"
fi
stat=$?
if test $stat -eq 0; then :
else
rm -f "$tmpdepfile"
exit $stat
fi
rm -f "$depfile"
if test -f "$tmpdepfile"; then # yes, the sourcefile depend on other files
echo "$object : \\" > "$depfile"
# Clip off the initial element (the dependent). Don't try to be
# clever and replace this with sed code, as IRIX sed won't handle
# lines with more than a fixed number of characters (4096 in
# IRIX 6.2 sed, 8192 in IRIX 6.5). We also remove comment lines;
# the IRIX cc adds comments like '#:fec' to the end of the
# dependency line.
tr ' ' "$nl" < "$tmpdepfile" \
| sed -e 's/^.*\.o://' -e 's/#.*$//' -e '/^$/ d' | \
tr "$nl" ' ' >> "$depfile"
echo >> "$depfile"
# The second pass generates a dummy entry for each header file.
tr ' ' "$nl" < "$tmpdepfile" \
| sed -e 's/^.*\.o://' -e 's/#.*$//' -e '/^$/ d' -e 's/$/:/' \
>> "$depfile"
else
# The sourcefile does not contain any dependencies, so just
# store a dummy comment line, to avoid errors with the Makefile
# "include basename.Plo" scheme.
echo "#dummy" > "$depfile"
fi
rm -f "$tmpdepfile"
;;
xlc)
# This case exists only to let depend.m4 do its work. It works by
# looking at the text of this script. This case will never be run,
# since it is checked for above.
exit 1
;;
aix)
# The C for AIX Compiler uses -M and outputs the dependencies
# in a .u file. In older versions, this file always lives in the
# current directory. Also, the AIX compiler puts '$object:' at the
# start of each line; $object doesn't have directory information.
# Version 6 uses the directory in both cases.
dir=`echo "$object" | sed -e 's|/[^/]*$|/|'`
test "x$dir" = "x$object" && dir=
base=`echo "$object" | sed -e 's|^.*/||' -e 's/\.o$//' -e 's/\.lo$//'`
if test "$libtool" = yes; then
tmpdepfile1=$dir$base.u
tmpdepfile2=$base.u
tmpdepfile3=$dir.libs/$base.u
"$@" -Wc,-M
else
tmpdepfile1=$dir$base.u
tmpdepfile2=$dir$base.u
tmpdepfile3=$dir$base.u
"$@" -M
fi
stat=$?
if test $stat -eq 0; then :
else
rm -f "$tmpdepfile1" "$tmpdepfile2" "$tmpdepfile3"
exit $stat
fi
for tmpdepfile in "$tmpdepfile1" "$tmpdepfile2" "$tmpdepfile3"
do
test -f "$tmpdepfile" && break
done
if test -f "$tmpdepfile"; then
# Each line is of the form 'foo.o: dependent.h'.
# Do two passes, one to just change these to
# '$object: dependent.h' and one to simply 'dependent.h:'.
sed -e "s,^.*\.[a-z]*:,$object:," < "$tmpdepfile" > "$depfile"
sed -e 's,^.*\.[a-z]*:['"$tab"' ]*,,' -e 's,$,:,' < "$tmpdepfile" >> "$depfile"
else
# The sourcefile does not contain any dependencies, so just
# store a dummy comment line, to avoid errors with the Makefile
# "include basename.Plo" scheme.
echo "#dummy" > "$depfile"
fi
rm -f "$tmpdepfile"
;;
icc)
# Intel's C compiler anf tcc (Tiny C Compiler) understand '-MD -MF file'.
# However on
# $CC -MD -MF foo.d -c -o sub/foo.o sub/foo.c
# ICC 7.0 will fill foo.d with something like
# foo.o: sub/foo.c
# foo.o: sub/foo.h
# which is wrong. We want
# sub/foo.o: sub/foo.c
# sub/foo.o: sub/foo.h
# sub/foo.c:
# sub/foo.h:
# ICC 7.1 will output
# foo.o: sub/foo.c sub/foo.h
# and will wrap long lines using '\':
# foo.o: sub/foo.c ... \
# sub/foo.h ... \
# ...
# tcc 0.9.26 (FIXME still under development at the moment of writing)
# will emit a similar output, but also prepend the continuation lines
# with horizontal tabulation characters.
"$@" -MD -MF "$tmpdepfile"
stat=$?
if test $stat -eq 0; then :
else
rm -f "$tmpdepfile"
exit $stat
fi
rm -f "$depfile"
# Each line is of the form 'foo.o: dependent.h',
# or 'foo.o: dep1.h dep2.h \', or ' dep3.h dep4.h \'.
# Do two passes, one to just change these to
# '$object: dependent.h' and one to simply 'dependent.h:'.
sed -e "s/^[ $tab][ $tab]*/ /" -e "s,^[^:]*:,$object :," \
< "$tmpdepfile" > "$depfile"
sed '
s/[ '"$tab"'][ '"$tab"']*/ /g
s/^ *//
s/ *\\*$//
s/^[^:]*: *//
/^$/d
/:$/d
s/$/ :/
' < "$tmpdepfile" >> "$depfile"
rm -f "$tmpdepfile"
;;
## The order of this option in the case statement is important, since the
## shell code in configure will try each of these formats in the order
## listed in this file. A plain '-MD' option would be understood by many
## compilers, so we must ensure this comes after the gcc and icc options.
pgcc)
# Portland's C compiler understands '-MD'.
# Will always output deps to 'file.d' where file is the root name of the
# source file under compilation, even if file resides in a subdirectory.
# The object file name does not affect the name of the '.d' file.
# pgcc 10.2 will output
# foo.o: sub/foo.c sub/foo.h
# and will wrap long lines using '\' :
# foo.o: sub/foo.c ... \
# sub/foo.h ... \
# ...
dir=`echo "$object" | sed -e 's|/[^/]*$|/|'`
test "x$dir" = "x$object" && dir=
# Use the source, not the object, to determine the base name, since
# that's sadly what pgcc will do too.
base=`echo "$source" | sed -e 's|^.*/||' -e 's/\.[-_a-zA-Z0-9]*$//'`
tmpdepfile="$base.d"
# For projects that build the same source file twice into different object
# files, the pgcc approach of using the *source* file root name can cause
# problems in parallel builds. Use a locking strategy to avoid stomping on
# the same $tmpdepfile.
lockdir="$base.d-lock"
trap "echo '$0: caught signal, cleaning up...' >&2; rm -rf $lockdir" 1 2 13 15
numtries=100
i=$numtries
while test $i -gt 0 ; do
# mkdir is a portable test-and-set.
if mkdir $lockdir 2>/dev/null; then
# This process acquired the lock.
"$@" -MD
stat=$?
# Release the lock.
rm -rf $lockdir
break
else
## the lock is being held by a different process,
## wait until the winning process is done or we timeout
while test -d $lockdir && test $i -gt 0; do
sleep 1
i=`expr $i - 1`
done
fi
i=`expr $i - 1`
done
trap - 1 2 13 15
if test $i -le 0; then
echo "$0: failed to acquire lock after $numtries attempts" >&2
echo "$0: check lockdir '$lockdir'" >&2
exit 1
fi
if test $stat -ne 0; then
rm -f "$tmpdepfile"
exit $stat
fi
rm -f "$depfile"
# Each line is of the form `foo.o: dependent.h',
# or `foo.o: dep1.h dep2.h \', or ` dep3.h dep4.h \'.
# Do two passes, one to just change these to
# `$object: dependent.h' and one to simply `dependent.h:'.
sed "s,^[^:]*:,$object :," < "$tmpdepfile" > "$depfile"
# Some versions of the HPUX 10.20 sed can't process this invocation
# correctly. Breaking it into two sed invocations is a workaround.
sed 's,^[^:]*: \(.*\)$,\1,;s/^\\$//;/^$/d;/:$/d' < "$tmpdepfile" |
sed -e 's/$/ :/' >> "$depfile"
rm -f "$tmpdepfile"
;;
hp2)
# The "hp" stanza above does not work with aCC (C++) and HP's ia64
# compilers, which have integrated preprocessors. The correct option
# to use with these is +Maked; it writes dependencies to a file named
# 'foo.d', which lands next to the object file, wherever that
# happens to be.
# Much of this is similar to the tru64 case; see comments there.
dir=`echo "$object" | sed -e 's|/[^/]*$|/|'`
test "x$dir" = "x$object" && dir=
base=`echo "$object" | sed -e 's|^.*/||' -e 's/\.o$//' -e 's/\.lo$//'`
if test "$libtool" = yes; then
tmpdepfile1=$dir$base.d
tmpdepfile2=$dir.libs/$base.d
"$@" -Wc,+Maked
else
tmpdepfile1=$dir$base.d
tmpdepfile2=$dir$base.d
"$@" +Maked
fi
stat=$?
if test $stat -eq 0; then :
else
rm -f "$tmpdepfile1" "$tmpdepfile2"
exit $stat
fi
for tmpdepfile in "$tmpdepfile1" "$tmpdepfile2"
do
test -f "$tmpdepfile" && break
done
if test -f "$tmpdepfile"; then
sed -e "s,^.*\.[a-z]*:,$object:," "$tmpdepfile" > "$depfile"
# Add 'dependent.h:' lines.
sed -ne '2,${
s/^ *//
s/ \\*$//
s/$/:/
p
}' "$tmpdepfile" >> "$depfile"
else
echo "#dummy" > "$depfile"
fi
rm -f "$tmpdepfile" "$tmpdepfile2"
;;
tru64)
# The Tru64 compiler uses -MD to generate dependencies as a side
# effect. 'cc -MD -o foo.o ...' puts the dependencies into 'foo.o.d'.
# At least on Alpha/Redhat 6.1, Compaq CCC V6.2-504 seems to put
# dependencies in 'foo.d' instead, so we check for that too.
# Subdirectories are respected.
dir=`echo "$object" | sed -e 's|/[^/]*$|/|'`
test "x$dir" = "x$object" && dir=
base=`echo "$object" | sed -e 's|^.*/||' -e 's/\.o$//' -e 's/\.lo$//'`
if test "$libtool" = yes; then
# With Tru64 cc, shared objects can also be used to make a
# static library. This mechanism is used in libtool 1.4 series to
# handle both shared and static libraries in a single compilation.
# With libtool 1.4, dependencies were output in $dir.libs/$base.lo.d.
#
# With libtool 1.5 this exception was removed, and libtool now
# generates 2 separate objects for the 2 libraries. These two
# compilations output dependencies in $dir.libs/$base.o.d and
# in $dir$base.o.d. We have to check for both files, because
# one of the two compilations can be disabled. We should prefer
# $dir$base.o.d over $dir.libs/$base.o.d because the latter is
# automatically cleaned when .libs/ is deleted, while ignoring
# the former would cause a distcleancheck panic.
tmpdepfile1=$dir.libs/$base.lo.d # libtool 1.4
tmpdepfile2=$dir$base.o.d # libtool 1.5
tmpdepfile3=$dir.libs/$base.o.d # libtool 1.5
tmpdepfile4=$dir.libs/$base.d # Compaq CCC V6.2-504
"$@" -Wc,-MD
else
tmpdepfile1=$dir$base.o.d
tmpdepfile2=$dir$base.d
tmpdepfile3=$dir$base.d
tmpdepfile4=$dir$base.d
"$@" -MD
fi
stat=$?
if test $stat -eq 0; then :
else
rm -f "$tmpdepfile1" "$tmpdepfile2" "$tmpdepfile3" "$tmpdepfile4"
exit $stat
fi
for tmpdepfile in "$tmpdepfile1" "$tmpdepfile2" "$tmpdepfile3" "$tmpdepfile4"
do
test -f "$tmpdepfile" && break
done
if test -f "$tmpdepfile"; then
sed -e "s,^.*\.[a-z]*:,$object:," < "$tmpdepfile" > "$depfile"
sed -e 's,^.*\.[a-z]*:['"$tab"' ]*,,' -e 's,$,:,' < "$tmpdepfile" >> "$depfile"
else
echo "#dummy" > "$depfile"
fi
rm -f "$tmpdepfile"
;;
msvc7)
if test "$libtool" = yes; then
showIncludes=-Wc,-showIncludes
else
showIncludes=-showIncludes
fi
"$@" $showIncludes > "$tmpdepfile"
stat=$?
grep -v '^Note: including file: ' "$tmpdepfile"
if test "$stat" = 0; then :
else
rm -f "$tmpdepfile"
exit $stat
fi
rm -f "$depfile"
echo "$object : \\" > "$depfile"
# The first sed program below extracts the file names and escapes
# backslashes for cygpath. The second sed program outputs the file
# name when reading, but also accumulates all include files in the
# hold buffer in order to output them again at the end. This only
# works with sed implementations that can handle large buffers.
sed < "$tmpdepfile" -n '
/^Note: including file: *\(.*\)/ {
s//\1/
s/\\/\\\\/g
p
}' | $cygpath_u | sort -u | sed -n '
s/ /\\ /g
s/\(.*\)/'"$tab"'\1 \\/p
s/.\(.*\) \\/\1:/
H
$ {
s/.*/'"$tab"'/
G
p
}' >> "$depfile"
rm -f "$tmpdepfile"
;;
msvc7msys)
# This case exists only to let depend.m4 do its work. It works by
# looking at the text of this script. This case will never be run,
# since it is checked for above.
exit 1
;;
#nosideeffect)
# This comment above is used by automake to tell side-effect
# dependency tracking mechanisms from slower ones.
dashmstdout)
# Important note: in order to support this mode, a compiler *must*
# always write the preprocessed file to stdout, regardless of -o.
"$@" || exit $?
# Remove the call to Libtool.
if test "$libtool" = yes; then
while test "X$1" != 'X--mode=compile'; do
shift
done
shift
fi
# Remove '-o $object'.
IFS=" "
for arg
do
case $arg in
-o)
shift
;;
$object)
shift
;;
*)
set fnord "$@" "$arg"
shift # fnord
shift # $arg
;;
esac
done
test -z "$dashmflag" && dashmflag=-M
# Require at least two characters before searching for ':'
# in the target name. This is to cope with DOS-style filenames:
# a dependency such as 'c:/foo/bar' could be seen as target 'c' otherwise.
"$@" $dashmflag |
sed 's:^['"$tab"' ]*[^:'"$tab"' ][^:][^:]*\:['"$tab"' ]*:'"$object"'\: :' > "$tmpdepfile"
rm -f "$depfile"
cat < "$tmpdepfile" > "$depfile"
tr ' ' "$nl" < "$tmpdepfile" | \
## Some versions of the HPUX 10.20 sed can't process this invocation
## correctly. Breaking it into two sed invocations is a workaround.
sed -e 's/^\\$//' -e '/^$/d' -e '/:$/d' | sed -e 's/$/ :/' >> "$depfile"
rm -f "$tmpdepfile"
;;
dashXmstdout)
# This case only exists to satisfy depend.m4. It is never actually
# run, as this mode is specially recognized in the preamble.
exit 1
;;
makedepend)
"$@" || exit $?
# Remove any Libtool call
if test "$libtool" = yes; then
while test "X$1" != 'X--mode=compile'; do
shift
done
shift
fi
# X makedepend
shift
cleared=no eat=no
for arg
do
case $cleared in
no)
set ""; shift
cleared=yes ;;
esac
if test $eat = yes; then
eat=no
continue
fi
case "$arg" in
-D*|-I*)
set fnord "$@" "$arg"; shift ;;
# Strip any option that makedepend may not understand. Remove
# the object too, otherwise makedepend will parse it as a source file.
-arch)
eat=yes ;;
-*|$object)
;;
*)
set fnord "$@" "$arg"; shift ;;
esac
done
obj_suffix=`echo "$object" | sed 's/^.*\././'`
touch "$tmpdepfile"
${MAKEDEPEND-makedepend} -o"$obj_suffix" -f"$tmpdepfile" "$@"
rm -f "$depfile"
# makedepend may prepend the VPATH from the source file name to the object.
# No need to regex-escape $object, excess matching of '.' is harmless.
sed "s|^.*\($object *:\)|\1|" "$tmpdepfile" > "$depfile"
sed '1,2d' "$tmpdepfile" | tr ' ' "$nl" | \
## Some versions of the HPUX 10.20 sed can't process this invocation
## correctly. Breaking it into two sed invocations is a workaround.
sed -e 's/^\\$//' -e '/^$/d' -e '/:$/d' | sed -e 's/$/ :/' >> "$depfile"
rm -f "$tmpdepfile" "$tmpdepfile".bak
;;
cpp)
# Important note: in order to support this mode, a compiler *must*
# always write the preprocessed file to stdout.
"$@" || exit $?
# Remove the call to Libtool.
if test "$libtool" = yes; then
while test "X$1" != 'X--mode=compile'; do
shift
done
shift
fi
# Remove '-o $object'.
IFS=" "
for arg
do
case $arg in
-o)
shift
;;
$object)
shift
;;
*)
set fnord "$@" "$arg"
shift # fnord
shift # $arg
;;
esac
done
"$@" -E |
sed -n -e '/^# [0-9][0-9]* "\([^"]*\)".*/ s:: \1 \\:p' \
-e '/^#line [0-9][0-9]* "\([^"]*\)".*/ s:: \1 \\:p' |
sed '$ s: \\$::' > "$tmpdepfile"
rm -f "$depfile"
echo "$object : \\" > "$depfile"
cat < "$tmpdepfile" >> "$depfile"
sed < "$tmpdepfile" '/^$/d;s/^ //;s/ \\$//;s/$/ :/' >> "$depfile"
rm -f "$tmpdepfile"
;;
msvisualcpp)
# Important note: in order to support this mode, a compiler *must*
# always write the preprocessed file to stdout.
"$@" || exit $?
# Remove the call to Libtool.
if test "$libtool" = yes; then
while test "X$1" != 'X--mode=compile'; do
shift
done
shift
fi
IFS=" "
for arg
do
case "$arg" in
-o)
shift
;;
$object)
shift
;;
"-Gm"|"/Gm"|"-Gi"|"/Gi"|"-ZI"|"/ZI")
set fnord "$@"
shift
shift
;;
*)
set fnord "$@" "$arg"
shift
shift
;;
esac
done
"$@" -E 2>/dev/null |
sed -n '/^#line [0-9][0-9]* "\([^"]*\)"/ s::\1:p' | $cygpath_u | sort -u > "$tmpdepfile"
rm -f "$depfile"
echo "$object : \\" > "$depfile"
sed < "$tmpdepfile" -n -e 's% %\\ %g' -e '/^\(.*\)$/ s::'"$tab"'\1 \\:p' >> "$depfile"
echo "$tab" >> "$depfile"
sed < "$tmpdepfile" -n -e 's% %\\ %g' -e '/^\(.*\)$/ s::\1\::p' >> "$depfile"
rm -f "$tmpdepfile"
;;
msvcmsys)
# This case exists only to let depend.m4 do its work. It works by
# looking at the text of this script. This case will never be run,
# since it is checked for above.
exit 1
;;
none)
exec "$@"
;;
*)
echo "Unknown depmode $depmode" 1>&2
exit 1
;;
esac
exit 0
# Local Variables:
# mode: shell-script
# sh-indentation: 2
# eval: (add-hook 'write-file-hooks 'time-stamp)
# time-stamp-start: "scriptversion="
# time-stamp-format: "%:y-%02m-%02d.%02H"
# time-stamp-time-zone: "UTC"
# time-stamp-end: "; # UTC"
# End:

@ -0,0 +1,10 @@
prefix=@prefix@
exec_prefix=@exec_prefix@
libdir=@libdir@
includedir=@includedir@
Name: FFTS
Description: fast Fourier transform library
Version: @VERSION@
Libs: -L${libdir} -lffts -lm
Cflags: -I${includedir}/ffts

@ -0,0 +1,68 @@
/*
This file is part of FFTS.
Copyright (c) 2012, Anthony M. Blake
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of the organization nor the
names of its contributors may be used to endorse or promote products
derived from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
DISCLAIMED. IN NO EVENT SHALL ANTHONY M. BLAKE BE LIABLE FOR ANY
DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef __FFTS_H__
#define __FFTS_H__
#include <stdio.h>
#include <stdlib.h>
#include <math.h>
#include <stdint.h>
#include <stddef.h>
#ifdef __cplusplus
extern "C"
{
#endif /* __cplusplus */
struct _ffts_plan_t;
typedef struct _ffts_plan_t ffts_plan_t;
ffts_plan_t *ffts_init_1d(size_t N, int sign);
ffts_plan_t *ffts_init_2d(size_t N1, size_t N2, int sign);
ffts_plan_t *ffts_init_nd(int rank, size_t *Ns, int sign);
// For real transforms, sign == -1 implies a real-to-complex forwards tranform,
// and sign == 1 implies a complex-to-real backwards transform
// The output of a real-to-complex transform is N/2+1 complex numbers, where the
// redundant outputs have been omitted.
ffts_plan_t *ffts_init_1d_real(size_t N, int sign);
ffts_plan_t *ffts_init_2d_real(size_t N1, size_t N2, int sign);
ffts_plan_t *ffts_init_nd_real(int rank, size_t *Ns, int sign);
void ffts_execute(ffts_plan_t * , const void *input, void *output);
void ffts_free(ffts_plan_t *);
#ifdef __cplusplus
} /* extern "C" */
#endif /* __cplusplus */
#endif

@ -0,0 +1,527 @@
#!/bin/sh
# install - install a program, script, or datafile
scriptversion=2011-11-20.07; # UTC
# This originates from X11R5 (mit/util/scripts/install.sh), which was
# later released in X11R6 (xc/config/util/install.sh) with the
# following copyright and license.
#
# Copyright (C) 1994 X Consortium
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to
# deal in the Software without restriction, including without limitation the
# rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
# sell copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in
# all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# X CONSORTIUM BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
# AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNEC-
# TION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
#
# Except as contained in this notice, the name of the X Consortium shall not
# be used in advertising or otherwise to promote the sale, use or other deal-
# ings in this Software without prior written authorization from the X Consor-
# tium.
#
#
# FSF changes to this file are in the public domain.
#
# Calling this script install-sh is preferred over install.sh, to prevent
# 'make' implicit rules from creating a file called install from it
# when there is no Makefile.
#
# This script is compatible with the BSD install script, but was written
# from scratch.
nl='
'
IFS=" "" $nl"
# set DOITPROG to echo to test this script
# Don't use :- since 4.3BSD and earlier shells don't like it.
doit=${DOITPROG-}
if test -z "$doit"; then
doit_exec=exec
else
doit_exec=$doit
fi
# Put in absolute file names if you don't have them in your path;
# or use environment vars.
chgrpprog=${CHGRPPROG-chgrp}
chmodprog=${CHMODPROG-chmod}
chownprog=${CHOWNPROG-chown}
cmpprog=${CMPPROG-cmp}
cpprog=${CPPROG-cp}
mkdirprog=${MKDIRPROG-mkdir}
mvprog=${MVPROG-mv}
rmprog=${RMPROG-rm}
stripprog=${STRIPPROG-strip}
posix_glob='?'
initialize_posix_glob='
test "$posix_glob" != "?" || {
if (set -f) 2>/dev/null; then
posix_glob=
else
posix_glob=:
fi
}
'
posix_mkdir=
# Desired mode of installed file.
mode=0755
chgrpcmd=
chmodcmd=$chmodprog
chowncmd=
mvcmd=$mvprog
rmcmd="$rmprog -f"
stripcmd=
src=
dst=
dir_arg=
dst_arg=
copy_on_change=false
no_target_directory=
usage="\
Usage: $0 [OPTION]... [-T] SRCFILE DSTFILE
or: $0 [OPTION]... SRCFILES... DIRECTORY
or: $0 [OPTION]... -t DIRECTORY SRCFILES...
or: $0 [OPTION]... -d DIRECTORIES...
In the 1st form, copy SRCFILE to DSTFILE.
In the 2nd and 3rd, copy all SRCFILES to DIRECTORY.
In the 4th, create DIRECTORIES.
Options:
--help display this help and exit.
--version display version info and exit.
-c (ignored)
-C install only if different (preserve the last data modification time)
-d create directories instead of installing files.
-g GROUP $chgrpprog installed files to GROUP.
-m MODE $chmodprog installed files to MODE.
-o USER $chownprog installed files to USER.
-s $stripprog installed files.
-t DIRECTORY install into DIRECTORY.
-T report an error if DSTFILE is a directory.
Environment variables override the default commands:
CHGRPPROG CHMODPROG CHOWNPROG CMPPROG CPPROG MKDIRPROG MVPROG
RMPROG STRIPPROG
"
while test $# -ne 0; do
case $1 in
-c) ;;
-C) copy_on_change=true;;
-d) dir_arg=true;;
-g) chgrpcmd="$chgrpprog $2"
shift;;
--help) echo "$usage"; exit $?;;
-m) mode=$2
case $mode in
*' '* | *' '* | *'
'* | *'*'* | *'?'* | *'['*)
echo "$0: invalid mode: $mode" >&2
exit 1;;
esac
shift;;
-o) chowncmd="$chownprog $2"
shift;;
-s) stripcmd=$stripprog;;
-t) dst_arg=$2
# Protect names problematic for 'test' and other utilities.
case $dst_arg in
-* | [=\(\)!]) dst_arg=./$dst_arg;;
esac
shift;;
-T) no_target_directory=true;;
--version) echo "$0 $scriptversion"; exit $?;;
--) shift
break;;
-*) echo "$0: invalid option: $1" >&2
exit 1;;
*) break;;
esac
shift
done
if test $# -ne 0 && test -z "$dir_arg$dst_arg"; then
# When -d is used, all remaining arguments are directories to create.
# When -t is used, the destination is already specified.
# Otherwise, the last argument is the destination. Remove it from $@.
for arg
do
if test -n "$dst_arg"; then
# $@ is not empty: it contains at least $arg.
set fnord "$@" "$dst_arg"
shift # fnord
fi
shift # arg
dst_arg=$arg
# Protect names problematic for 'test' and other utilities.
case $dst_arg in
-* | [=\(\)!]) dst_arg=./$dst_arg;;
esac
done
fi
if test $# -eq 0; then
if test -z "$dir_arg"; then
echo "$0: no input file specified." >&2
exit 1
fi
# It's OK to call 'install-sh -d' without argument.
# This can happen when creating conditional directories.
exit 0
fi
if test -z "$dir_arg"; then
do_exit='(exit $ret); exit $ret'
trap "ret=129; $do_exit" 1
trap "ret=130; $do_exit" 2
trap "ret=141; $do_exit" 13
trap "ret=143; $do_exit" 15
# Set umask so as not to create temps with too-generous modes.
# However, 'strip' requires both read and write access to temps.
case $mode in
# Optimize common cases.
*644) cp_umask=133;;
*755) cp_umask=22;;
*[0-7])
if test -z "$stripcmd"; then
u_plus_rw=
else
u_plus_rw='% 200'
fi
cp_umask=`expr '(' 777 - $mode % 1000 ')' $u_plus_rw`;;
*)
if test -z "$stripcmd"; then
u_plus_rw=
else
u_plus_rw=,u+rw
fi
cp_umask=$mode$u_plus_rw;;
esac
fi
for src
do
# Protect names problematic for 'test' and other utilities.
case $src in
-* | [=\(\)!]) src=./$src;;
esac
if test -n "$dir_arg"; then
dst=$src
dstdir=$dst
test -d "$dstdir"
dstdir_status=$?
else
# Waiting for this to be detected by the "$cpprog $src $dsttmp" command
# might cause directories to be created, which would be especially bad
# if $src (and thus $dsttmp) contains '*'.
if test ! -f "$src" && test ! -d "$src"; then
echo "$0: $src does not exist." >&2
exit 1
fi
if test -z "$dst_arg"; then
echo "$0: no destination specified." >&2
exit 1
fi
dst=$dst_arg
# If destination is a directory, append the input filename; won't work
# if double slashes aren't ignored.
if test -d "$dst"; then
if test -n "$no_target_directory"; then
echo "$0: $dst_arg: Is a directory" >&2
exit 1
fi
dstdir=$dst
dst=$dstdir/`basename "$src"`
dstdir_status=0
else
# Prefer dirname, but fall back on a substitute if dirname fails.
dstdir=`
(dirname "$dst") 2>/dev/null ||
expr X"$dst" : 'X\(.*[^/]\)//*[^/][^/]*/*$' \| \
X"$dst" : 'X\(//\)[^/]' \| \
X"$dst" : 'X\(//\)$' \| \
X"$dst" : 'X\(/\)' \| . 2>/dev/null ||
echo X"$dst" |
sed '/^X\(.*[^/]\)\/\/*[^/][^/]*\/*$/{
s//\1/
q
}
/^X\(\/\/\)[^/].*/{
s//\1/
q
}
/^X\(\/\/\)$/{
s//\1/
q
}
/^X\(\/\).*/{
s//\1/
q
}
s/.*/./; q'
`
test -d "$dstdir"
dstdir_status=$?
fi
fi
obsolete_mkdir_used=false
if test $dstdir_status != 0; then
case $posix_mkdir in
'')
# Create intermediate dirs using mode 755 as modified by the umask.
# This is like FreeBSD 'install' as of 1997-10-28.
umask=`umask`
case $stripcmd.$umask in
# Optimize common cases.
*[2367][2367]) mkdir_umask=$umask;;
.*0[02][02] | .[02][02] | .[02]) mkdir_umask=22;;
*[0-7])
mkdir_umask=`expr $umask + 22 \
- $umask % 100 % 40 + $umask % 20 \
- $umask % 10 % 4 + $umask % 2
`;;
*) mkdir_umask=$umask,go-w;;
esac
# With -d, create the new directory with the user-specified mode.
# Otherwise, rely on $mkdir_umask.
if test -n "$dir_arg"; then
mkdir_mode=-m$mode
else
mkdir_mode=
fi
posix_mkdir=false
case $umask in
*[123567][0-7][0-7])
# POSIX mkdir -p sets u+wx bits regardless of umask, which
# is incompatible with FreeBSD 'install' when (umask & 300) != 0.
;;
*)
tmpdir=${TMPDIR-/tmp}/ins$RANDOM-$$
trap 'ret=$?; rmdir "$tmpdir/d" "$tmpdir" 2>/dev/null; exit $ret' 0
if (umask $mkdir_umask &&
exec $mkdirprog $mkdir_mode -p -- "$tmpdir/d") >/dev/null 2>&1
then
if test -z "$dir_arg" || {
# Check for POSIX incompatibilities with -m.
# HP-UX 11.23 and IRIX 6.5 mkdir -m -p sets group- or
# other-writable bit of parent directory when it shouldn't.
# FreeBSD 6.1 mkdir -m -p sets mode of existing directory.
ls_ld_tmpdir=`ls -ld "$tmpdir"`
case $ls_ld_tmpdir in
d????-?r-*) different_mode=700;;
d????-?--*) different_mode=755;;
*) false;;
esac &&
$mkdirprog -m$different_mode -p -- "$tmpdir" && {
ls_ld_tmpdir_1=`ls -ld "$tmpdir"`
test "$ls_ld_tmpdir" = "$ls_ld_tmpdir_1"
}
}
then posix_mkdir=:
fi
rmdir "$tmpdir/d" "$tmpdir"
else
# Remove any dirs left behind by ancient mkdir implementations.
rmdir ./$mkdir_mode ./-p ./-- 2>/dev/null
fi
trap '' 0;;
esac;;
esac
if
$posix_mkdir && (
umask $mkdir_umask &&
$doit_exec $mkdirprog $mkdir_mode -p -- "$dstdir"
)
then :
else
# The umask is ridiculous, or mkdir does not conform to POSIX,
# or it failed possibly due to a race condition. Create the
# directory the slow way, step by step, checking for races as we go.
case $dstdir in
/*) prefix='/';;
[-=\(\)!]*) prefix='./';;
*) prefix='';;
esac
eval "$initialize_posix_glob"
oIFS=$IFS
IFS=/
$posix_glob set -f
set fnord $dstdir
shift
$posix_glob set +f
IFS=$oIFS
prefixes=
for d
do
test X"$d" = X && continue
prefix=$prefix$d
if test -d "$prefix"; then
prefixes=
else
if $posix_mkdir; then
(umask=$mkdir_umask &&
$doit_exec $mkdirprog $mkdir_mode -p -- "$dstdir") && break
# Don't fail if two instances are running concurrently.
test -d "$prefix" || exit 1
else
case $prefix in
*\'*) qprefix=`echo "$prefix" | sed "s/'/'\\\\\\\\''/g"`;;
*) qprefix=$prefix;;
esac
prefixes="$prefixes '$qprefix'"
fi
fi
prefix=$prefix/
done
if test -n "$prefixes"; then
# Don't fail if two instances are running concurrently.
(umask $mkdir_umask &&
eval "\$doit_exec \$mkdirprog $prefixes") ||
test -d "$dstdir" || exit 1
obsolete_mkdir_used=true
fi
fi
fi
if test -n "$dir_arg"; then
{ test -z "$chowncmd" || $doit $chowncmd "$dst"; } &&
{ test -z "$chgrpcmd" || $doit $chgrpcmd "$dst"; } &&
{ test "$obsolete_mkdir_used$chowncmd$chgrpcmd" = false ||
test -z "$chmodcmd" || $doit $chmodcmd $mode "$dst"; } || exit 1
else
# Make a couple of temp file names in the proper directory.
dsttmp=$dstdir/_inst.$$_
rmtmp=$dstdir/_rm.$$_
# Trap to clean up those temp files at exit.
trap 'ret=$?; rm -f "$dsttmp" "$rmtmp" && exit $ret' 0
# Copy the file name to the temp name.
(umask $cp_umask && $doit_exec $cpprog "$src" "$dsttmp") &&
# and set any options; do chmod last to preserve setuid bits.
#
# If any of these fail, we abort the whole thing. If we want to
# ignore errors from any of these, just make sure not to ignore
# errors from the above "$doit $cpprog $src $dsttmp" command.
#
{ test -z "$chowncmd" || $doit $chowncmd "$dsttmp"; } &&
{ test -z "$chgrpcmd" || $doit $chgrpcmd "$dsttmp"; } &&
{ test -z "$stripcmd" || $doit $stripcmd "$dsttmp"; } &&
{ test -z "$chmodcmd" || $doit $chmodcmd $mode "$dsttmp"; } &&
# If -C, don't bother to copy if it wouldn't change the file.
if $copy_on_change &&
old=`LC_ALL=C ls -dlL "$dst" 2>/dev/null` &&
new=`LC_ALL=C ls -dlL "$dsttmp" 2>/dev/null` &&
eval "$initialize_posix_glob" &&
$posix_glob set -f &&
set X $old && old=:$2:$4:$5:$6 &&
set X $new && new=:$2:$4:$5:$6 &&
$posix_glob set +f &&
test "$old" = "$new" &&
$cmpprog "$dst" "$dsttmp" >/dev/null 2>&1
then
rm -f "$dsttmp"
else
# Rename the file to the real destination.
$doit $mvcmd -f "$dsttmp" "$dst" 2>/dev/null ||
# The rename failed, perhaps because mv can't rename something else
# to itself, or perhaps because mv is so ancient that it does not
# support -f.
{
# Now remove or move aside any old file at destination location.
# We try this two ways since rm can't unlink itself on some
# systems and the destination file might be busy for other
# reasons. In this case, the final cleanup might fail but the new
# file should still install successfully.
{
test ! -f "$dst" ||
$doit $rmcmd -f "$dst" 2>/dev/null ||
{ $doit $mvcmd -f "$dst" "$rmtmp" 2>/dev/null &&
{ $doit $rmcmd -f "$rmtmp" 2>/dev/null; :; }
} ||
{ echo "$0: cannot unlink or rename $dst" >&2
(exit 1); exit 1
}
} &&
# Now rename the file to the real destination.
$doit $mvcmd "$dsttmp" "$dst"
}
fi || exit 1
trap '' 0
fi
done
# Local variables:
# eval: (add-hook 'write-file-hooks 'time-stamp)
# time-stamp-start: "scriptversion="
# time-stamp-format: "%:y-%02m-%02d.%02H"
# time-stamp-time-zone: "UTC"
# time-stamp-end: "; # UTC"
# End:

@ -0,0 +1,33 @@
# TODO: the ax_prog_javah thing doesn't work so this
# requires javah in the path
if ENABLE_JNI
JAVA_SRC=$(shell find $(srcdir)/src -name '*.java')
BUILT_SOURCES = nz_ac_waikato_ffts_FFTS.h
all: ffts.jar
classes ffts.jar: $(JAVA_SRC)
-rm -rf classes
mkdir classes
$(JAVAC) -d classes -sourcepath src $(JAVA_SRC)
$(JAR) -cf ffts.jar -C classes .
lib_LTLIBRARIES = libffts_jni.la
libffts_jni_la_SOURCES = jni/ffts_jni.c
nodist_include_HEADERS = nz_ac_waikato_ffts_FFTS.h
libffts_jni_la_LIBADD = $(top_builddir)/src/libffts.la
libffts_jni_la_CFLAGS = @JNI_CPPFLAGS@ $(AM_CFLAGS) -I$(top_srcdir)/include
libffts_jni_la_LDFLAGS = -shared
pkgdata_DATA = ffts.jar
nz_ac_waikato_ffts_FFTS.h: classes
javah -cp $< nz.ac.waikato.ffts.FFTS
CLEANFILES=ffts.jar nz_ac_waikato_ffts_FFTS.h
clean-local:
-rm -rf classes
endif

@ -0,0 +1,681 @@
# Makefile.in generated by automake 1.12.4 from Makefile.am.
# @configure_input@
# Copyright (C) 1994-2012 Free Software Foundation, Inc.
# This Makefile.in is free software; the Free Software Foundation
# gives unlimited permission to copy and/or distribute it,
# with or without modifications, as long as this notice is preserved.
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY, to the extent permitted by law; without
# even the implied warranty of MERCHANTABILITY or FITNESS FOR A
# PARTICULAR PURPOSE.
@SET_MAKE@
# TODO: the ax_prog_javah thing doesn't work so this
# requires javah in the path
VPATH = @srcdir@
am__make_dryrun = \
{ \
am__dry=no; \
case $$MAKEFLAGS in \
*\\[\ \ ]*) \
echo 'am--echo: ; @echo "AM" OK' | $(MAKE) -f - 2>/dev/null \
| grep '^AM OK$$' >/dev/null || am__dry=yes;; \
*) \
for am__flg in $$MAKEFLAGS; do \
case $$am__flg in \
*=*|--*) ;; \
*n*) am__dry=yes; break;; \
esac; \
done;; \
esac; \
test $$am__dry = yes; \
}
pkgdatadir = $(datadir)/@PACKAGE@
pkgincludedir = $(includedir)/@PACKAGE@
pkglibdir = $(libdir)/@PACKAGE@
pkglibexecdir = $(libexecdir)/@PACKAGE@
am__cd = CDPATH="$${ZSH_VERSION+.}$(PATH_SEPARATOR)" && cd
install_sh_DATA = $(install_sh) -c -m 644
install_sh_PROGRAM = $(install_sh) -c
install_sh_SCRIPT = $(install_sh) -c
INSTALL_HEADER = $(INSTALL_DATA)
transform = $(program_transform_name)
NORMAL_INSTALL = :
PRE_INSTALL = :
POST_INSTALL = :
NORMAL_UNINSTALL = :
PRE_UNINSTALL = :
POST_UNINSTALL = :
build_triplet = @build@
host_triplet = @host@
subdir = java
DIST_COMMON = $(srcdir)/Makefile.am $(srcdir)/Makefile.in \
$(top_srcdir)/depcomp
ACLOCAL_M4 = $(top_srcdir)/aclocal.m4
am__aclocal_m4_deps = $(top_srcdir)/m4/ax_check_classpath.m4 \
$(top_srcdir)/m4/ax_check_java_home.m4 \
$(top_srcdir)/m4/ax_java_options.m4 \
$(top_srcdir)/m4/ax_jni_include_dir.m4 \
$(top_srcdir)/m4/ax_prog_jar.m4 \
$(top_srcdir)/m4/ax_prog_javac.m4 \
$(top_srcdir)/m4/ax_prog_javac_works.m4 \
$(top_srcdir)/configure.ac
am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \
$(ACLOCAL_M4)
mkinstalldirs = $(install_sh) -d
CONFIG_HEADER = $(top_builddir)/config.h
CONFIG_CLEAN_FILES =
CONFIG_CLEAN_VPATH_FILES =
am__vpath_adj_setup = srcdirstrip=`echo "$(srcdir)" | sed 's|.|.|g'`;
am__vpath_adj = case $$p in \
$(srcdir)/*) f=`echo "$$p" | sed "s|^$$srcdirstrip/||"`;; \
*) f=$$p;; \
esac;
am__strip_dir = f=`echo $$p | sed -e 's|^.*/||'`;
am__install_max = 40
am__nobase_strip_setup = \
srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*|]/\\\\&/g'`
am__nobase_strip = \
for p in $$list; do echo "$$p"; done | sed -e "s|$$srcdirstrip/||"
am__nobase_list = $(am__nobase_strip_setup); \
for p in $$list; do echo "$$p $$p"; done | \
sed "s| $$srcdirstrip/| |;"' / .*\//!s/ .*/ ./; s,\( .*\)/[^/]*$$,\1,' | \
$(AWK) 'BEGIN { files["."] = "" } { files[$$2] = files[$$2] " " $$1; \
if (++n[$$2] == $(am__install_max)) \
{ print $$2, files[$$2]; n[$$2] = 0; files[$$2] = "" } } \
END { for (dir in files) print dir, files[dir] }'
am__base_list = \
sed '$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;s/\n/ /g' | \
sed '$$!N;$$!N;$$!N;$$!N;s/\n/ /g'
am__uninstall_files_from_dir = { \
test -z "$$files" \
|| { test ! -d "$$dir" && test ! -f "$$dir" && test ! -r "$$dir"; } \
|| { echo " ( cd '$$dir' && rm -f" $$files ")"; \
$(am__cd) "$$dir" && rm -f $$files; }; \
}
am__installdirs = "$(DESTDIR)$(libdir)" "$(DESTDIR)$(pkgdatadir)" \
"$(DESTDIR)$(includedir)"
LTLIBRARIES = $(lib_LTLIBRARIES)
@ENABLE_JNI_TRUE@libffts_jni_la_DEPENDENCIES = \
@ENABLE_JNI_TRUE@ $(top_builddir)/src/libffts.la
am__libffts_jni_la_SOURCES_DIST = jni/ffts_jni.c
@ENABLE_JNI_TRUE@am_libffts_jni_la_OBJECTS = \
@ENABLE_JNI_TRUE@ libffts_jni_la-ffts_jni.lo
libffts_jni_la_OBJECTS = $(am_libffts_jni_la_OBJECTS)
libffts_jni_la_LINK = $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) \
$(LIBTOOLFLAGS) --mode=link $(CCLD) $(libffts_jni_la_CFLAGS) \
$(CFLAGS) $(libffts_jni_la_LDFLAGS) $(LDFLAGS) -o $@
@ENABLE_JNI_TRUE@am_libffts_jni_la_rpath = -rpath $(libdir)
DEFAULT_INCLUDES = -I.@am__isrc@ -I$(top_builddir)
depcomp = $(SHELL) $(top_srcdir)/depcomp
am__depfiles_maybe = depfiles
am__mv = mv -f
COMPILE = $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) \
$(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS)
LTCOMPILE = $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) \
--mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) \
$(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS)
CCLD = $(CC)
LINK = $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) \
--mode=link $(CCLD) $(AM_CFLAGS) $(CFLAGS) $(AM_LDFLAGS) \
$(LDFLAGS) -o $@
SOURCES = $(libffts_jni_la_SOURCES)
DIST_SOURCES = $(am__libffts_jni_la_SOURCES_DIST)
am__can_run_installinfo = \
case $$AM_UPDATE_INFO_DIR in \
n|no|NO) false;; \
*) (install-info --version) >/dev/null 2>&1;; \
esac
DATA = $(pkgdata_DATA)
HEADERS = $(nodist_include_HEADERS)
ETAGS = etags
CTAGS = ctags
DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST)
ACLOCAL = @ACLOCAL@
AMTAR = @AMTAR@
AR = @AR@
AUTOCONF = @AUTOCONF@
AUTOHEADER = @AUTOHEADER@
AUTOMAKE = @AUTOMAKE@
AWK = @AWK@
CC = @CC@
CCAS = @CCAS@
CCASDEPMODE = @CCASDEPMODE@
CCASFLAGS = @CCASFLAGS@
CCDEPMODE = @CCDEPMODE@
CFLAGS = @CFLAGS@
CPP = @CPP@
CPPFLAGS = @CPPFLAGS@
CXX = @CXX@
CXXCPP = @CXXCPP@
CXXDEPMODE = @CXXDEPMODE@
CXXFLAGS = @CXXFLAGS@
CYGPATH_W = @CYGPATH_W@
DEFS = @DEFS@
DEPDIR = @DEPDIR@
DLLTOOL = @DLLTOOL@
DSYMUTIL = @DSYMUTIL@
DUMPBIN = @DUMPBIN@
ECHO_C = @ECHO_C@
ECHO_N = @ECHO_N@
ECHO_T = @ECHO_T@
EGREP = @EGREP@
EXEEXT = @EXEEXT@
FGREP = @FGREP@
GREP = @GREP@
INSTALL = @INSTALL@
INSTALL_DATA = @INSTALL_DATA@
INSTALL_PROGRAM = @INSTALL_PROGRAM@
INSTALL_SCRIPT = @INSTALL_SCRIPT@
INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@
JAR = @JAR@
JAVA = @JAVA@
JAVAC = @JAVAC@
JAVACFLAGS = @JAVACFLAGS@
JAVAFLAGS = @JAVAFLAGS@
JAVAPREFIX = @JAVAPREFIX@
JAVA_PATH_NAME = @JAVA_PATH_NAME@
JNI_CPPFLAGS = @JNI_CPPFLAGS@
LD = @LD@
LDFLAGS = @LDFLAGS@
LIBOBJS = @LIBOBJS@
LIBS = @LIBS@
LIBTOOL = @LIBTOOL@
LIPO = @LIPO@
LN_S = @LN_S@
LTLIBOBJS = @LTLIBOBJS@
MAKEINFO = @MAKEINFO@
MANIFEST_TOOL = @MANIFEST_TOOL@
MKDIR_P = @MKDIR_P@
NM = @NM@
NMEDIT = @NMEDIT@
OBJDUMP = @OBJDUMP@
OBJEXT = @OBJEXT@
OTOOL = @OTOOL@
OTOOL64 = @OTOOL64@
PACKAGE = @PACKAGE@
PACKAGE_BUGREPORT = @PACKAGE_BUGREPORT@
PACKAGE_NAME = @PACKAGE_NAME@
PACKAGE_STRING = @PACKAGE_STRING@
PACKAGE_TARNAME = @PACKAGE_TARNAME@
PACKAGE_URL = @PACKAGE_URL@
PACKAGE_VERSION = @PACKAGE_VERSION@
PATH_SEPARATOR = @PATH_SEPARATOR@
RANLIB = @RANLIB@
SED = @SED@
SET_MAKE = @SET_MAKE@
SHELL = @SHELL@
STRIP = @STRIP@
VERSION = @VERSION@
_ACJNI_JAVAC = @_ACJNI_JAVAC@
abs_builddir = @abs_builddir@
abs_srcdir = @abs_srcdir@
abs_top_builddir = @abs_top_builddir@
abs_top_srcdir = @abs_top_srcdir@
ac_ct_AR = @ac_ct_AR@
ac_ct_CC = @ac_ct_CC@
ac_ct_CXX = @ac_ct_CXX@
ac_ct_DUMPBIN = @ac_ct_DUMPBIN@
am__include = @am__include@
am__leading_dot = @am__leading_dot@
am__quote = @am__quote@
am__tar = @am__tar@
am__untar = @am__untar@
bindir = @bindir@
build = @build@
build_alias = @build_alias@
build_cpu = @build_cpu@
build_os = @build_os@
build_vendor = @build_vendor@
builddir = @builddir@
datadir = @datadir@
datarootdir = @datarootdir@
docdir = @docdir@
dvidir = @dvidir@
exec_prefix = @exec_prefix@
host = @host@
host_alias = @host_alias@
host_cpu = @host_cpu@
host_os = @host_os@
host_vendor = @host_vendor@
htmldir = @htmldir@
includedir = @includedir@
infodir = @infodir@
install_sh = @install_sh@
libdir = @libdir@
libexecdir = @libexecdir@
localedir = @localedir@
localstatedir = @localstatedir@
mandir = @mandir@
mkdir_p = @mkdir_p@
oldincludedir = @oldincludedir@
pdfdir = @pdfdir@
prefix = @prefix@
program_transform_name = @program_transform_name@
psdir = @psdir@
sbindir = @sbindir@
sharedstatedir = @sharedstatedir@
srcdir = @srcdir@
sysconfdir = @sysconfdir@
target_alias = @target_alias@
top_build_prefix = @top_build_prefix@
top_builddir = @top_builddir@
top_srcdir = @top_srcdir@
@ENABLE_JNI_TRUE@JAVA_SRC = $(shell find $(srcdir)/src -name '*.java')
@ENABLE_JNI_TRUE@BUILT_SOURCES = nz_ac_waikato_ffts_FFTS.h
@ENABLE_JNI_TRUE@lib_LTLIBRARIES = libffts_jni.la
@ENABLE_JNI_TRUE@libffts_jni_la_SOURCES = jni/ffts_jni.c
@ENABLE_JNI_TRUE@nodist_include_HEADERS = nz_ac_waikato_ffts_FFTS.h
@ENABLE_JNI_TRUE@libffts_jni_la_LIBADD = $(top_builddir)/src/libffts.la
@ENABLE_JNI_TRUE@libffts_jni_la_CFLAGS = @JNI_CPPFLAGS@ $(AM_CFLAGS) -I$(top_srcdir)/include
@ENABLE_JNI_TRUE@libffts_jni_la_LDFLAGS = -shared
@ENABLE_JNI_TRUE@pkgdata_DATA = ffts.jar
@ENABLE_JNI_TRUE@CLEANFILES = ffts.jar nz_ac_waikato_ffts_FFTS.h
all: $(BUILT_SOURCES)
$(MAKE) $(AM_MAKEFLAGS) all-am
.SUFFIXES:
.SUFFIXES: .c .lo .o .obj
$(srcdir)/Makefile.in: $(srcdir)/Makefile.am $(am__configure_deps)
@for dep in $?; do \
case '$(am__configure_deps)' in \
*$$dep*) \
( cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh ) \
&& { if test -f $@; then exit 0; else break; fi; }; \
exit 1;; \
esac; \
done; \
echo ' cd $(top_srcdir) && $(AUTOMAKE) --gnu java/Makefile'; \
$(am__cd) $(top_srcdir) && \
$(AUTOMAKE) --gnu java/Makefile
.PRECIOUS: Makefile
Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status
@case '$?' in \
*config.status*) \
cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh;; \
*) \
echo ' cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe)'; \
cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe);; \
esac;
$(top_builddir)/config.status: $(top_srcdir)/configure $(CONFIG_STATUS_DEPENDENCIES)
cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
$(top_srcdir)/configure: $(am__configure_deps)
cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
$(ACLOCAL_M4): $(am__aclocal_m4_deps)
cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
$(am__aclocal_m4_deps):
install-libLTLIBRARIES: $(lib_LTLIBRARIES)
@$(NORMAL_INSTALL)
@list='$(lib_LTLIBRARIES)'; test -n "$(libdir)" || list=; \
list2=; for p in $$list; do \
if test -f $$p; then \
list2="$$list2 $$p"; \
else :; fi; \
done; \
test -z "$$list2" || { \
echo " $(MKDIR_P) '$(DESTDIR)$(libdir)'"; \
$(MKDIR_P) "$(DESTDIR)$(libdir)" || exit 1; \
echo " $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=install $(INSTALL) $(INSTALL_STRIP_FLAG) $$list2 '$(DESTDIR)$(libdir)'"; \
$(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=install $(INSTALL) $(INSTALL_STRIP_FLAG) $$list2 "$(DESTDIR)$(libdir)"; \
}
uninstall-libLTLIBRARIES:
@$(NORMAL_UNINSTALL)
@list='$(lib_LTLIBRARIES)'; test -n "$(libdir)" || list=; \
for p in $$list; do \
$(am__strip_dir) \
echo " $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=uninstall rm -f '$(DESTDIR)$(libdir)/$$f'"; \
$(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=uninstall rm -f "$(DESTDIR)$(libdir)/$$f"; \
done
clean-libLTLIBRARIES:
-test -z "$(lib_LTLIBRARIES)" || rm -f $(lib_LTLIBRARIES)
@list='$(lib_LTLIBRARIES)'; \
locs=`for p in $$list; do echo $$p; done | \
sed 's|^[^/]*$$|.|; s|/[^/]*$$||; s|$$|/so_locations|' | \
sort -u`; \
test -z "$$locs" || { \
echo rm -f $${locs}; \
rm -f $${locs}; \
}
libffts_jni.la: $(libffts_jni_la_OBJECTS) $(libffts_jni_la_DEPENDENCIES) $(EXTRA_libffts_jni_la_DEPENDENCIES)
$(libffts_jni_la_LINK) $(am_libffts_jni_la_rpath) $(libffts_jni_la_OBJECTS) $(libffts_jni_la_LIBADD) $(LIBS)
mostlyclean-compile:
-rm -f *.$(OBJEXT)
distclean-compile:
-rm -f *.tab.c
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/libffts_jni_la-ffts_jni.Plo@am__quote@
.c.o:
@am__fastdepCC_TRUE@ $(COMPILE) -MT $@ -MD -MP -MF $(DEPDIR)/$*.Tpo -c -o $@ $<
@am__fastdepCC_TRUE@ $(am__mv) $(DEPDIR)/$*.Tpo $(DEPDIR)/$*.Po
@AMDEP_TRUE@@am__fastdepCC_FALSE@ source='$<' object='$@' libtool=no @AMDEPBACKSLASH@
@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
@am__fastdepCC_FALSE@ $(COMPILE) -c $<
.c.obj:
@am__fastdepCC_TRUE@ $(COMPILE) -MT $@ -MD -MP -MF $(DEPDIR)/$*.Tpo -c -o $@ `$(CYGPATH_W) '$<'`
@am__fastdepCC_TRUE@ $(am__mv) $(DEPDIR)/$*.Tpo $(DEPDIR)/$*.Po
@AMDEP_TRUE@@am__fastdepCC_FALSE@ source='$<' object='$@' libtool=no @AMDEPBACKSLASH@
@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
@am__fastdepCC_FALSE@ $(COMPILE) -c `$(CYGPATH_W) '$<'`
.c.lo:
@am__fastdepCC_TRUE@ $(LTCOMPILE) -MT $@ -MD -MP -MF $(DEPDIR)/$*.Tpo -c -o $@ $<
@am__fastdepCC_TRUE@ $(am__mv) $(DEPDIR)/$*.Tpo $(DEPDIR)/$*.Plo
@AMDEP_TRUE@@am__fastdepCC_FALSE@ source='$<' object='$@' libtool=yes @AMDEPBACKSLASH@
@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
@am__fastdepCC_FALSE@ $(LTCOMPILE) -c -o $@ $<
libffts_jni_la-ffts_jni.lo: jni/ffts_jni.c
@am__fastdepCC_TRUE@ $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libffts_jni_la_CFLAGS) $(CFLAGS) -MT libffts_jni_la-ffts_jni.lo -MD -MP -MF $(DEPDIR)/libffts_jni_la-ffts_jni.Tpo -c -o libffts_jni_la-ffts_jni.lo `test -f 'jni/ffts_jni.c' || echo '$(srcdir)/'`jni/ffts_jni.c
@am__fastdepCC_TRUE@ $(am__mv) $(DEPDIR)/libffts_jni_la-ffts_jni.Tpo $(DEPDIR)/libffts_jni_la-ffts_jni.Plo
@AMDEP_TRUE@@am__fastdepCC_FALSE@ source='jni/ffts_jni.c' object='libffts_jni_la-ffts_jni.lo' libtool=yes @AMDEPBACKSLASH@
@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
@am__fastdepCC_FALSE@ $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libffts_jni_la_CFLAGS) $(CFLAGS) -c -o libffts_jni_la-ffts_jni.lo `test -f 'jni/ffts_jni.c' || echo '$(srcdir)/'`jni/ffts_jni.c
mostlyclean-libtool:
-rm -f *.lo
clean-libtool:
-rm -rf .libs _libs
install-pkgdataDATA: $(pkgdata_DATA)
@$(NORMAL_INSTALL)
@list='$(pkgdata_DATA)'; test -n "$(pkgdatadir)" || list=; \
if test -n "$$list"; then \
echo " $(MKDIR_P) '$(DESTDIR)$(pkgdatadir)'"; \
$(MKDIR_P) "$(DESTDIR)$(pkgdatadir)" || exit 1; \
fi; \
for p in $$list; do \
if test -f "$$p"; then d=; else d="$(srcdir)/"; fi; \
echo "$$d$$p"; \
done | $(am__base_list) | \
while read files; do \
echo " $(INSTALL_DATA) $$files '$(DESTDIR)$(pkgdatadir)'"; \
$(INSTALL_DATA) $$files "$(DESTDIR)$(pkgdatadir)" || exit $$?; \
done
uninstall-pkgdataDATA:
@$(NORMAL_UNINSTALL)
@list='$(pkgdata_DATA)'; test -n "$(pkgdatadir)" || list=; \
files=`for p in $$list; do echo $$p; done | sed -e 's|^.*/||'`; \
dir='$(DESTDIR)$(pkgdatadir)'; $(am__uninstall_files_from_dir)
install-nodist_includeHEADERS: $(nodist_include_HEADERS)
@$(NORMAL_INSTALL)
@list='$(nodist_include_HEADERS)'; test -n "$(includedir)" || list=; \
if test -n "$$list"; then \
echo " $(MKDIR_P) '$(DESTDIR)$(includedir)'"; \
$(MKDIR_P) "$(DESTDIR)$(includedir)" || exit 1; \
fi; \
for p in $$list; do \
if test -f "$$p"; then d=; else d="$(srcdir)/"; fi; \
echo "$$d$$p"; \
done | $(am__base_list) | \
while read files; do \
echo " $(INSTALL_HEADER) $$files '$(DESTDIR)$(includedir)'"; \
$(INSTALL_HEADER) $$files "$(DESTDIR)$(includedir)" || exit $$?; \
done
uninstall-nodist_includeHEADERS:
@$(NORMAL_UNINSTALL)
@list='$(nodist_include_HEADERS)'; test -n "$(includedir)" || list=; \
files=`for p in $$list; do echo $$p; done | sed -e 's|^.*/||'`; \
dir='$(DESTDIR)$(includedir)'; $(am__uninstall_files_from_dir)
ID: $(HEADERS) $(SOURCES) $(LISP) $(TAGS_FILES)
list='$(SOURCES) $(HEADERS) $(LISP) $(TAGS_FILES)'; \
unique=`for i in $$list; do \
if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \
done | \
$(AWK) '{ files[$$0] = 1; nonempty = 1; } \
END { if (nonempty) { for (i in files) print i; }; }'`; \
mkid -fID $$unique
tags: TAGS
TAGS: $(HEADERS) $(SOURCES) $(TAGS_DEPENDENCIES) \
$(TAGS_FILES) $(LISP)
set x; \
here=`pwd`; \
list='$(SOURCES) $(HEADERS) $(LISP) $(TAGS_FILES)'; \
unique=`for i in $$list; do \
if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \
done | \
$(AWK) '{ files[$$0] = 1; nonempty = 1; } \
END { if (nonempty) { for (i in files) print i; }; }'`; \
shift; \
if test -z "$(ETAGS_ARGS)$$*$$unique"; then :; else \
test -n "$$unique" || unique=$$empty_fix; \
if test $$# -gt 0; then \
$(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \
"$$@" $$unique; \
else \
$(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \
$$unique; \
fi; \
fi
ctags: CTAGS
CTAGS: $(HEADERS) $(SOURCES) $(TAGS_DEPENDENCIES) \
$(TAGS_FILES) $(LISP)
list='$(SOURCES) $(HEADERS) $(LISP) $(TAGS_FILES)'; \
unique=`for i in $$list; do \
if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \
done | \
$(AWK) '{ files[$$0] = 1; nonempty = 1; } \
END { if (nonempty) { for (i in files) print i; }; }'`; \
test -z "$(CTAGS_ARGS)$$unique" \
|| $(CTAGS) $(CTAGSFLAGS) $(AM_CTAGSFLAGS) $(CTAGS_ARGS) \
$$unique
GTAGS:
here=`$(am__cd) $(top_builddir) && pwd` \
&& $(am__cd) $(top_srcdir) \
&& gtags -i $(GTAGS_ARGS) "$$here"
cscopelist: $(HEADERS) $(SOURCES) $(LISP)
list='$(SOURCES) $(HEADERS) $(LISP)'; \
case "$(srcdir)" in \
[\\/]* | ?:[\\/]*) sdir="$(srcdir)" ;; \
*) sdir=$(subdir)/$(srcdir) ;; \
esac; \
for i in $$list; do \
if test -f "$$i"; then \
echo "$(subdir)/$$i"; \
else \
echo "$$sdir/$$i"; \
fi; \
done >> $(top_builddir)/cscope.files
distclean-tags:
-rm -f TAGS ID GTAGS GRTAGS GSYMS GPATH tags
distdir: $(DISTFILES)
@srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \
topsrcdirstrip=`echo "$(top_srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \
list='$(DISTFILES)'; \
dist_files=`for file in $$list; do echo $$file; done | \
sed -e "s|^$$srcdirstrip/||;t" \
-e "s|^$$topsrcdirstrip/|$(top_builddir)/|;t"`; \
case $$dist_files in \
*/*) $(MKDIR_P) `echo "$$dist_files" | \
sed '/\//!d;s|^|$(distdir)/|;s,/[^/]*$$,,' | \
sort -u` ;; \
esac; \
for file in $$dist_files; do \
if test -f $$file || test -d $$file; then d=.; else d=$(srcdir); fi; \
if test -d $$d/$$file; then \
dir=`echo "/$$file" | sed -e 's,/[^/]*$$,,'`; \
if test -d "$(distdir)/$$file"; then \
find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \
fi; \
if test -d $(srcdir)/$$file && test $$d != $(srcdir); then \
cp -fpR $(srcdir)/$$file "$(distdir)$$dir" || exit 1; \
find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \
fi; \
cp -fpR $$d/$$file "$(distdir)$$dir" || exit 1; \
else \
test -f "$(distdir)/$$file" \
|| cp -p $$d/$$file "$(distdir)/$$file" \
|| exit 1; \
fi; \
done
check-am: all-am
check: $(BUILT_SOURCES)
$(MAKE) $(AM_MAKEFLAGS) check-am
all-am: Makefile $(LTLIBRARIES) $(DATA) $(HEADERS)
installdirs:
for dir in "$(DESTDIR)$(libdir)" "$(DESTDIR)$(pkgdatadir)" "$(DESTDIR)$(includedir)"; do \
test -z "$$dir" || $(MKDIR_P) "$$dir"; \
done
install: $(BUILT_SOURCES)
$(MAKE) $(AM_MAKEFLAGS) install-am
install-exec: install-exec-am
install-data: install-data-am
uninstall: uninstall-am
install-am: all-am
@$(MAKE) $(AM_MAKEFLAGS) install-exec-am install-data-am
installcheck: installcheck-am
install-strip:
if test -z '$(STRIP)'; then \
$(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
install; \
else \
$(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
"INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'" install; \
fi
mostlyclean-generic:
clean-generic:
-test -z "$(CLEANFILES)" || rm -f $(CLEANFILES)
distclean-generic:
-test -z "$(CONFIG_CLEAN_FILES)" || rm -f $(CONFIG_CLEAN_FILES)
-test . = "$(srcdir)" || test -z "$(CONFIG_CLEAN_VPATH_FILES)" || rm -f $(CONFIG_CLEAN_VPATH_FILES)
maintainer-clean-generic:
@echo "This command is intended for maintainers to use"
@echo "it deletes files that may require special tools to rebuild."
-test -z "$(BUILT_SOURCES)" || rm -f $(BUILT_SOURCES)
@ENABLE_JNI_FALSE@clean-local:
clean: clean-am
clean-am: clean-generic clean-libLTLIBRARIES clean-libtool clean-local \
mostlyclean-am
distclean: distclean-am
-rm -rf ./$(DEPDIR)
-rm -f Makefile
distclean-am: clean-am distclean-compile distclean-generic \
distclean-tags
dvi: dvi-am
dvi-am:
html: html-am
html-am:
info: info-am
info-am:
install-data-am: install-nodist_includeHEADERS install-pkgdataDATA
install-dvi: install-dvi-am
install-dvi-am:
install-exec-am: install-libLTLIBRARIES
install-html: install-html-am
install-html-am:
install-info: install-info-am
install-info-am:
install-man:
install-pdf: install-pdf-am
install-pdf-am:
install-ps: install-ps-am
install-ps-am:
installcheck-am:
maintainer-clean: maintainer-clean-am
-rm -rf ./$(DEPDIR)
-rm -f Makefile
maintainer-clean-am: distclean-am maintainer-clean-generic
mostlyclean: mostlyclean-am
mostlyclean-am: mostlyclean-compile mostlyclean-generic \
mostlyclean-libtool
pdf: pdf-am
pdf-am:
ps: ps-am
ps-am:
uninstall-am: uninstall-libLTLIBRARIES uninstall-nodist_includeHEADERS \
uninstall-pkgdataDATA
.MAKE: all check install install-am install-strip
.PHONY: CTAGS GTAGS all all-am check check-am clean clean-generic \
clean-libLTLIBRARIES clean-libtool clean-local cscopelist \
ctags distclean distclean-compile distclean-generic \
distclean-libtool distclean-tags distdir dvi dvi-am html \
html-am info info-am install install-am install-data \
install-data-am install-dvi install-dvi-am install-exec \
install-exec-am install-html install-html-am install-info \
install-info-am install-libLTLIBRARIES install-man \
install-nodist_includeHEADERS install-pdf install-pdf-am \
install-pkgdataDATA install-ps install-ps-am install-strip \
installcheck installcheck-am installdirs maintainer-clean \
maintainer-clean-generic mostlyclean mostlyclean-compile \
mostlyclean-generic mostlyclean-libtool pdf pdf-am ps ps-am \
tags uninstall uninstall-am uninstall-libLTLIBRARIES \
uninstall-nodist_includeHEADERS uninstall-pkgdataDATA
@ENABLE_JNI_TRUE@all: ffts.jar
@ENABLE_JNI_TRUE@classes ffts.jar: $(JAVA_SRC)
@ENABLE_JNI_TRUE@ -rm -rf classes
@ENABLE_JNI_TRUE@ mkdir classes
@ENABLE_JNI_TRUE@ $(JAVAC) -d classes -sourcepath src $(JAVA_SRC)
@ENABLE_JNI_TRUE@ $(JAR) -cf ffts.jar -C classes .
@ENABLE_JNI_TRUE@nz_ac_waikato_ffts_FFTS.h: classes
@ENABLE_JNI_TRUE@ javah -cp $< nz.ac.waikato.ffts.FFTS
@ENABLE_JNI_TRUE@clean-local:
@ENABLE_JNI_TRUE@ -rm -rf classes
# Tell versions [3.59,3.63) of GNU make to not export all variables.
# Otherwise a system limit (for SysV at least) may be exceeded.
.NOEXPORT:

@ -0,0 +1,233 @@
/*
* This file is part of FFTS -- The Fastest Fourier Transform in the South
*
* Copyright (c) 2013, Michael Zucchi <notzed@gmail.com>
*
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of the organization nor the
* names of its contributors may be used to endorse or promote products
* derived from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* DISCLAIMED. IN NO EVENT SHALL ANTHONY M. BLAKE BE LIABLE FOR ANY
* DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#include "config.h"
#include <ffts.h>
#include <alloca.h>
// Bit of a hack for android, as we can't build the *.h without
// the classes ... but we can't build the project without the jni.
#ifdef ANDROID
#include <jni.h>
#else
#include "nz_ac_waikato_ffts_FFTS.h"
#endif
// TODO: feature tests instead
#ifdef HAVE_SSE
#define NEEDS_ALIGNED
#endif
#ifdef NEEDS_ALIGNED
#define ALIGN_MASK 15
static void *
xmemalign(size_t align, size_t size) {
#if defined(HAVE_DECL_POSIX_MEMALIGN)
void *r;
if (posix_memalign(&r, align, size) != 0)
return NULL;
return r;
#elif defined(HAVE_DECL_MEMALIGN)
return memalign(align, size);
#else
#error "Require an aligning malloc"
#endif
}
#endif
static void
throwOutOfMemoryError(JNIEnv *env, const char *msg) {
jclass jc = (*env)->FindClass(env, "java/lang/OutOfMemoryError");
if (jc)
(*env)->ThrowNew(env, jc, msg);
}
JNIEXPORT jlong JNICALL Java_nz_ac_waikato_ffts_FFTS_complex_11d
(JNIEnv *env, jclass jc, jint N, jint sign) {
ffts_plan_t *plan;
plan = ffts_init_1d(N, sign);
if (!plan)
throwOutOfMemoryError(env, NULL);
return (jlong)plan;
}
JNIEXPORT jlong JNICALL Java_nz_ac_waikato_ffts_FFTS_complex_12d
(JNIEnv *env, jclass jc, jint N1, jint N2, jint sign) {
ffts_plan_t *plan;
plan = ffts_init_2d(N1, N2, sign);
if (!plan)
throwOutOfMemoryError(env, NULL);
return (jlong)plan;
}
JNIEXPORT jlong JNICALL Java_nz_ac_waikato_ffts_FFTS_complex_1nd
(JNIEnv *env, jclass jc, jintArray jNs, jint sign) {
ffts_plan_t *plan;
int n = (*env)->GetArrayLength(env, jNs);
int *cNs;
size_t *Ns;
int i;
// Needs to convert java int array to size_t array
// Get the int elements and conver to C type
Ns = alloca(sizeof(*Ns) * n);
cNs = alloca(sizeof(int) * n);
(*env)->GetIntArrayRegion(env, jNs, 0, n, cNs);
for (i=0;i<n;i++)
Ns[i] = cNs[i];
plan = ffts_init_nd(n, Ns, sign);
if (!plan)
throwOutOfMemoryError(env, NULL);
return (jlong)plan;
}
JNIEXPORT jlong JNICALL Java_nz_ac_waikato_ffts_FFTS_real_11d
(JNIEnv *env, jclass jc, jint N, jint sign) {
ffts_plan_t *plan;
plan = ffts_init_1d_real(N, sign);
if (!plan)
throwOutOfMemoryError(env, NULL);
return (jlong)plan;
}
JNIEXPORT jlong JNICALL Java_nz_ac_waikato_ffts_FFTS_real_12d
(JNIEnv *env, jclass jc, jint N1, jint N2, jint sign) {
ffts_plan_t *plan;
plan = ffts_init_2d_real(N1, N2, sign);
if (!plan)
throwOutOfMemoryError(env, NULL);
return (jlong)plan;
}
JNIEXPORT jlong JNICALL Java_nz_ac_waikato_ffts_FFTS_real_1nd
(JNIEnv *env, jclass jc, jintArray jNs, jint sign) {
ffts_plan_t *plan;
int n = (*env)->GetArrayLength(env, jNs);
int *cNs;
size_t *Ns;
int i;
// Needs to convert java int array to size_t array
// Get the int elements and conver to C type
Ns = alloca(sizeof(*Ns) * n);
cNs = alloca(sizeof(int) * n);
(*env)->GetIntArrayRegion(env, jNs, 0, n, cNs);
for (i=0;i<n;i++)
Ns[i] = cNs[i];
plan = ffts_init_nd_real(n, Ns, sign);
if (!plan)
throwOutOfMemoryError(env, NULL);
return (jlong)plan;
}
JNIEXPORT void JNICALL Java_nz_ac_waikato_ffts_FFTS_execute__JJ_3FI_3FI
(JNIEnv *env, jclass jc, jlong p, jlong size, jfloatArray jsrc, jint soff, jfloatArray jdst, jint doff) {
ffts_plan_t *plan = (ffts_plan_t *)p;
// TODO: check performance on android/arm
#ifdef NEEDS_ALIGNED
// On oracle jvm this is faster than GetFloatArrayElements()
void *src, *dst;
src = xmemalign(64, size * 4);
if (!src) {
throwOutOfMemoryError(env, NULL);
return;
}
dst = xmemalign(64, size * 4);
if (!dst) {
free(src);
throwOutOfMemoryError(env, NULL);
return;
}
(*env)->GetFloatArrayRegion(env, jsrc, 0, size, src + soff);
ffts_execute(plan, src, dst);
(*env)->SetFloatArrayRegion(env, jdst, 0, size, dst + doff);
free(dst);
free(src);
#else
// This is the fastest with oracle jvm, but doesn't work with sse ...
void *src = (*env)->GetPrimitiveArrayCritical(env, jsrc, NULL);
void *dst = (*env)->GetPrimitiveArrayCritical(env, jdst, NULL);
ffts_execute(plan, src + soff, dst + doff);
(*env)->ReleasePrimitiveArrayCritical(env, jdst, dst, 0);
(*env)->ReleasePrimitiveArrayCritical(env, jsrc, src, 0);
#endif
#if 0
// This is the slowest
void *src = (*env)->GetFloatArrayElements(env, jsrc, NULL);
void *dst = (*env)->GetFloatArrayElements(env, jdst, NULL);
ffts_execute(plan, src + soff, dst + doff);
(*env)->ReleaseFloatArrayElements(env, jdst, dst, 0);
(*env)->ReleaseFloatArrayElements(env, jsrc, src, 0);
#endif
}
JNIEXPORT void JNICALL Java_nz_ac_waikato_ffts_FFTS_execute__JJLjava_nio_FloatBuffer_2Ljava_nio_FloatBuffer_2
(JNIEnv *env, jclass jc, jlong p, jlong size, jobject jsrc, jobject jdst) {
ffts_plan_t *plan = (ffts_plan_t *)p;
void *src = (*env)->GetDirectBufferAddress(env, jsrc);
void *dst = (*env)->GetDirectBufferAddress(env, jdst);
// Bounds checking etc is in java side.
ffts_execute(plan, src, dst);
}
JNIEXPORT void JNICALL Java_nz_ac_waikato_ffts_FFTS_free
(JNIEnv *env, jclass jc, jlong p) {
ffts_plan_t *plan = (ffts_plan_t *)p;
ffts_free(plan);
}

File diff suppressed because it is too large Load Diff

@ -0,0 +1,60 @@
# ===========================================================================
# http://www.gnu.org/software/autoconf-archive/ax_check_classpath.html
# ===========================================================================
#
# SYNOPSIS
#
# AX_CHECK_CLASSPATH
#
# DESCRIPTION
#
# AX_CHECK_CLASSPATH just displays the CLASSPATH, for the edification of
# the user.
#
# Note: This is part of the set of autoconf M4 macros for Java programs.
# It is VERY IMPORTANT that you download the whole set, some macros depend
# on other. Unfortunately, the autoconf archive does not support the
# concept of set of macros, so I had to break it for submission. The
# general documentation, as well as the sample configure.in, is included
# in the AX_PROG_JAVA macro.
#
# LICENSE
#
# Copyright (c) 2008 Stephane Bortzmeyer <bortzmeyer@pasteur.fr>
#
# This program is free software; you can redistribute it and/or modify it
# under the terms of the GNU General Public License as published by the
# Free Software Foundation; either version 2 of the License, or (at your
# option) any later version.
#
# This program is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
# Public License for more details.
#
# You should have received a copy of the GNU General Public License along
# with this program. If not, see <http://www.gnu.org/licenses/>.
#
# As a special exception, the respective Autoconf Macro's copyright owner
# gives unlimited permission to copy, distribute and modify the configure
# scripts that are the output of Autoconf when processing the Macro. You
# need not follow the terms of the GNU General Public License when using
# or distributing such scripts, even though portions of the text of the
# Macro appear in them. The GNU General Public License (GPL) does govern
# all other use of the material that constitutes the Autoconf Macro.
#
# This special exception to the GPL applies to versions of the Autoconf
# Macro released by the Autoconf Archive. When you make and distribute a
# modified version of the Autoconf Macro, you may extend this special
# exception to the GPL to apply to your modified version as well.
#serial 5
AU_ALIAS([AC_CHECK_CLASSPATH], [AX_CHECK_CLASSPATH])
AC_DEFUN([AX_CHECK_CLASSPATH],[
if test "x$CLASSPATH" = x; then
echo "You have no CLASSPATH, I hope it is good"
else
echo "You have CLASSPATH $CLASSPATH, hope it is correct"
fi
])

@ -0,0 +1,80 @@
# ===========================================================================
# http://www.gnu.org/software/autoconf-archive/ax_check_java_home.html
# ===========================================================================
#
# SYNOPSIS
#
# AX_CHECK_JAVA_HOME
#
# DESCRIPTION
#
# Check for Sun Java (JDK / JRE) installation, where the 'java' VM is in.
# If found, set environment variable JAVA_HOME = Java installation home,
# else left JAVA_HOME untouch, which in most case means JAVA_HOME is
# empty.
#
# LICENSE
#
# Copyright (c) 2008 Gleen Salmon <gleensalmon@yahoo.com>
#
# This program is free software; you can redistribute it and/or modify it
# under the terms of the GNU General Public License as published by the
# Free Software Foundation; either version 2 of the License, or (at your
# option) any later version.
#
# This program is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
# Public License for more details.
#
# You should have received a copy of the GNU General Public License along
# with this program. If not, see <http://www.gnu.org/licenses/>.
#
# As a special exception, the respective Autoconf Macro's copyright owner
# gives unlimited permission to copy, distribute and modify the configure
# scripts that are the output of Autoconf when processing the Macro. You
# need not follow the terms of the GNU General Public License when using
# or distributing such scripts, even though portions of the text of the
# Macro appear in them. The GNU General Public License (GPL) does govern
# all other use of the material that constitutes the Autoconf Macro.
#
# This special exception to the GPL applies to versions of the Autoconf
# Macro released by the Autoconf Archive. When you make and distribute a
# modified version of the Autoconf Macro, you may extend this special
# exception to the GPL to apply to your modified version as well.
#serial 6
AU_ALIAS([AC_CHECK_JAVA_HOME], [AX_CHECK_JAVA_HOME])
AC_DEFUN([AX_CHECK_JAVA_HOME],
[AC_MSG_CHECKING([for JAVA_HOME])
# We used a fake loop so that we can use "break" to exit when the result
# is found.
while true
do
# If the user defined JAVA_HOME, don't touch it.
test "${JAVA_HOME+set}" = set && break
# On Mac OS X 10.5 and following, run /usr/libexec/java_home to get
# the value of JAVA_HOME to use.
# (http://developer.apple.com/library/mac/#qa/qa2001/qa1170.html).
JAVA_HOME=`/usr/libexec/java_home 2>/dev/null`
test x"$JAVA_HOME" != x && break
# See if we can find the java executable, and compute from there.
TRY_JAVA_HOME=`ls -dr /usr/java/* 2> /dev/null | head -n 1`
if test x$TRY_JAVA_HOME != x; then
PATH=$PATH:$TRY_JAVA_HOME/bin
fi
AC_PATH_PROG([JAVA_PATH_NAME], [java])
if test "x$JAVA_PATH_NAME" != x; then
JAVA_HOME=`echo $JAVA_PATH_NAME | sed "s/\(.*\)[[/]]bin[[/]]java.*/\1/"`
break
fi
AC_MSG_NOTICE([Could not compute JAVA_HOME])
break
done
AC_MSG_RESULT([$JAVA_HOME])
])

@ -0,0 +1,48 @@
# ===========================================================================
# http://www.gnu.org/software/autoconf-archive/ax_java_options.html
# ===========================================================================
#
# SYNOPSIS
#
# AX_JAVA_OPTIONS
#
# DESCRIPTION
#
# AX_JAVA_OPTIONS adds configure command line options used for Java m4
# macros. This Macro is optional.
#
# Note: This is part of the set of autoconf M4 macros for Java programs.
# It is VERY IMPORTANT that you download the whole set, some macros depend
# on other. Unfortunately, the autoconf archive does not support the
# concept of set of macros, so I had to break it for submission. The
# general documentation, as well as the sample configure.in, is included
# in the AX_PROG_JAVA macro.
#
# LICENSE
#
# Copyright (c) 2008 Devin Weaver <ktohg@tritarget.com>
#
# Copying and distribution of this file, with or without modification, are
# permitted in any medium without royalty provided the copyright notice
# and this notice are preserved. This file is offered as-is, without any
# warranty.
#serial 6
AU_ALIAS([AC_JAVA_OPTIONS], [AX_JAVA_OPTIONS])
AC_DEFUN([AX_JAVA_OPTIONS],[
AC_ARG_WITH(java-prefix,
[ --with-java-prefix=PFX prefix where Java runtime is installed (optional)])
AC_ARG_WITH(javac-flags,
[ --with-javac-flags=FLAGS flags to pass to the Java compiler (optional)])
AC_ARG_WITH(java-flags,
[ --with-java-flags=FLAGS flags to pass to the Java VM (optional)])
JAVAPREFIX=$with_java_prefix
JAVACFLAGS=$with_javac_flags
JAVAFLAGS=$with_java_flags
AC_SUBST(JAVAPREFIX)dnl
AC_SUBST(JAVACFLAGS)dnl
AC_SUBST(JAVAFLAGS)dnl
AC_SUBST(JAVA)dnl
AC_SUBST(JAVAC)dnl
])

@ -0,0 +1,120 @@
# ===========================================================================
# http://www.gnu.org/software/autoconf-archive/ax_jni_include_dir.html
# ===========================================================================
#
# SYNOPSIS
#
# AX_JNI_INCLUDE_DIR
#
# DESCRIPTION
#
# AX_JNI_INCLUDE_DIR finds include directories needed for compiling
# programs using the JNI interface.
#
# JNI include directories are usually in the java distribution This is
# deduced from the value of JAVAC. When this macro completes, a list of
# directories is left in the variable JNI_INCLUDE_DIRS.
#
# Example usage follows:
#
# AX_JNI_INCLUDE_DIR
#
# for JNI_INCLUDE_DIR in $JNI_INCLUDE_DIRS
# do
# CPPFLAGS="$CPPFLAGS -I$JNI_INCLUDE_DIR"
# done
#
# If you want to force a specific compiler:
#
# - at the configure.in level, set JAVAC=yourcompiler before calling
# AX_JNI_INCLUDE_DIR
#
# - at the configure level, setenv JAVAC
#
# Note: This macro can work with the autoconf M4 macros for Java programs.
# This particular macro is not part of the original set of macros.
#
# LICENSE
#
# Copyright (c) 2008 Don Anderson <dda@sleepycat.com>
#
# Copying and distribution of this file, with or without modification, are
# permitted in any medium without royalty provided the copyright notice
# and this notice are preserved. This file is offered as-is, without any
# warranty.
#serial 8
AU_ALIAS([AC_JNI_INCLUDE_DIR], [AX_JNI_INCLUDE_DIR])
AC_DEFUN([AX_JNI_INCLUDE_DIR],[
JNI_INCLUDE_DIRS=""
test "x$JAVAC" = x && AC_MSG_ERROR(['\$JAVAC' undefined])
AC_PATH_PROG([_ACJNI_JAVAC], [$JAVAC], [no])
test "x$_ACJNI_JAVAC" = xno && AC_MSG_ERROR([$JAVAC could not be found in path])
_ACJNI_FOLLOW_SYMLINKS("$_ACJNI_JAVAC")
_JTOPDIR=`echo "$_ACJNI_FOLLOWED" | sed -e 's://*:/:g' -e 's:/[[^/]]*$::'`
case "$host_os" in
darwin*) _JTOPDIR=`echo "$_JTOPDIR" | sed -e 's:/[[^/]]*$::'`
_JINC="$_JTOPDIR/Headers";;
*) _JINC="$_JTOPDIR/include";;
esac
_AS_ECHO_LOG([_JTOPDIR=$_JTOPDIR])
_AS_ECHO_LOG([_JINC=$_JINC])
# On Mac OS X 10.6.4, jni.h is a symlink:
# /System/Library/Frameworks/JavaVM.framework/Versions/Current/Headers/jni.h
# -> ../../CurrentJDK/Headers/jni.h.
if test -f "$_JINC/jni.h" || test -L "$_JINC/jni.h"; then
JNI_INCLUDE_DIRS="$JNI_INCLUDE_DIRS $_JINC"
else
_JTOPDIR=`echo "$_JTOPDIR" | sed -e 's:/[[^/]]*$::'`
if test -f "$_JTOPDIR/include/jni.h"; then
JNI_INCLUDE_DIRS="$JNI_INCLUDE_DIRS $_JTOPDIR/include"
else
AC_MSG_ERROR([cannot find java include files])
fi
fi
# get the likely subdirectories for system specific java includes
case "$host_os" in
bsdi*) _JNI_INC_SUBDIRS="bsdos";;
freebsd*) _JNI_INC_SUBDIRS="freebsd";;
linux*) _JNI_INC_SUBDIRS="linux genunix";;
osf*) _JNI_INC_SUBDIRS="alpha";;
solaris*) _JNI_INC_SUBDIRS="solaris";;
mingw*) _JNI_INC_SUBDIRS="win32";;
cygwin*) _JNI_INC_SUBDIRS="win32";;
*) _JNI_INC_SUBDIRS="genunix";;
esac
# add any subdirectories that are present
for JINCSUBDIR in $_JNI_INC_SUBDIRS
do
if test -d "$_JTOPDIR/include/$JINCSUBDIR"; then
JNI_INCLUDE_DIRS="$JNI_INCLUDE_DIRS $_JTOPDIR/include/$JINCSUBDIR"
fi
done
])
# _ACJNI_FOLLOW_SYMLINKS <path>
# Follows symbolic links on <path>,
# finally setting variable _ACJNI_FOLLOWED
# ----------------------------------------
AC_DEFUN([_ACJNI_FOLLOW_SYMLINKS],[
# find the include directory relative to the javac executable
_cur="$1"
while ls -ld "$_cur" 2>/dev/null | grep " -> " >/dev/null; do
AC_MSG_CHECKING([symlink for $_cur])
_slink=`ls -ld "$_cur" | sed 's/.* -> //'`
case "$_slink" in
/*) _cur="$_slink";;
# 'X' avoids triggering unwanted echo options.
*) _cur=`echo "X$_cur" | sed -e 's/^X//' -e 's:[[^/]]*$::'`"$_slink";;
esac
AC_MSG_RESULT([$_cur])
done
_ACJNI_FOLLOWED="$_cur"
])# _ACJNI

@ -0,0 +1,52 @@
# ===========================================================================
# http://www.gnu.org/software/autoconf-archive/ax_prog_jar.html
# ===========================================================================
#
# SYNOPSIS
#
# AX_PROG_JAR
#
# DESCRIPTION
#
# AX_PROG_JAR tests for an existing jar program. It uses the environment
# variable JAR then tests in sequence various common jar programs.
#
# If you want to force a specific compiler:
#
# - at the configure.in level, set JAR=yourcompiler before calling
# AX_PROG_JAR
#
# - at the configure level, setenv JAR
#
# You can use the JAR variable in your Makefile.in, with @JAR@.
#
# Note: This macro depends on the autoconf M4 macros for Java programs. It
# is VERY IMPORTANT that you download that whole set, some macros depend
# on other. Unfortunately, the autoconf archive does not support the
# concept of set of macros, so I had to break it for submission.
#
# The general documentation of those macros, as well as the sample
# configure.in, is included in the AX_PROG_JAVA macro.
#
# LICENSE
#
# Copyright (c) 2008 Egon Willighagen <e.willighagen@science.ru.nl>
#
# Copying and distribution of this file, with or without modification, are
# permitted in any medium without royalty provided the copyright notice
# and this notice are preserved. This file is offered as-is, without any
# warranty.
#serial 6
AU_ALIAS([AC_PROG_JAR], [AX_PROG_JAR])
AC_DEFUN([AX_PROG_JAR],[
AC_REQUIRE([AC_EXEEXT])dnl
if test "x$JAVAPREFIX" = x; then
test "x$JAR" = x && AC_CHECK_PROGS(JAR, jar$EXEEXT)
else
test "x$JAR" = x && AC_CHECK_PROGS(JAR, jar, $JAVAPREFIX)
fi
test "x$JAR" = x && AC_MSG_ERROR([no acceptable jar program found in \$PATH])
AC_PROVIDE([$0])dnl
])

@ -0,0 +1,79 @@
# ===========================================================================
# http://www.gnu.org/software/autoconf-archive/ax_prog_javac.html
# ===========================================================================
#
# SYNOPSIS
#
# AX_PROG_JAVAC
#
# DESCRIPTION
#
# AX_PROG_JAVAC tests an existing Java compiler. It uses the environment
# variable JAVAC then tests in sequence various common Java compilers. For
# political reasons, it starts with the free ones.
#
# If you want to force a specific compiler:
#
# - at the configure.in level, set JAVAC=yourcompiler before calling
# AX_PROG_JAVAC
#
# - at the configure level, setenv JAVAC
#
# You can use the JAVAC variable in your Makefile.in, with @JAVAC@.
#
# *Warning*: its success or failure can depend on a proper setting of the
# CLASSPATH env. variable.
#
# TODO: allow to exclude compilers (rationale: most Java programs cannot
# compile with some compilers like guavac).
#
# Note: This is part of the set of autoconf M4 macros for Java programs.
# It is VERY IMPORTANT that you download the whole set, some macros depend
# on other. Unfortunately, the autoconf archive does not support the
# concept of set of macros, so I had to break it for submission. The
# general documentation, as well as the sample configure.in, is included
# in the AX_PROG_JAVA macro.
#
# LICENSE
#
# Copyright (c) 2008 Stephane Bortzmeyer <bortzmeyer@pasteur.fr>
#
# This program is free software; you can redistribute it and/or modify it
# under the terms of the GNU General Public License as published by the
# Free Software Foundation; either version 2 of the License, or (at your
# option) any later version.
#
# This program is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
# Public License for more details.
#
# You should have received a copy of the GNU General Public License along
# with this program. If not, see <http://www.gnu.org/licenses/>.
#
# As a special exception, the respective Autoconf Macro's copyright owner
# gives unlimited permission to copy, distribute and modify the configure
# scripts that are the output of Autoconf when processing the Macro. You
# need not follow the terms of the GNU General Public License when using
# or distributing such scripts, even though portions of the text of the
# Macro appear in them. The GNU General Public License (GPL) does govern
# all other use of the material that constitutes the Autoconf Macro.
#
# This special exception to the GPL applies to versions of the Autoconf
# Macro released by the Autoconf Archive. When you make and distribute a
# modified version of the Autoconf Macro, you may extend this special
# exception to the GPL to apply to your modified version as well.
#serial 6
AU_ALIAS([AC_PROG_JAVAC], [AX_PROG_JAVAC])
AC_DEFUN([AX_PROG_JAVAC],[
if test "x$JAVAPREFIX" = x; then
test "x$JAVAC" = x && AC_CHECK_PROGS(JAVAC, "gcj -C" guavac jikes javac)
else
test "x$JAVAC" = x && AC_CHECK_PROGS(JAVAC, "gcj -C" guavac jikes javac, $JAVAPREFIX)
fi
test "x$JAVAC" = x && AC_MSG_ERROR([no acceptable Java compiler found in \$PATH])
AX_PROG_JAVAC_WORKS
AC_PROVIDE([$0])dnl
])

@ -0,0 +1,72 @@
# ===========================================================================
# http://www.gnu.org/software/autoconf-archive/ax_prog_javac_works.html
# ===========================================================================
#
# SYNOPSIS
#
# AX_PROG_JAVAC_WORKS
#
# DESCRIPTION
#
# Internal use ONLY.
#
# Note: This is part of the set of autoconf M4 macros for Java programs.
# It is VERY IMPORTANT that you download the whole set, some macros depend
# on other. Unfortunately, the autoconf archive does not support the
# concept of set of macros, so I had to break it for submission. The
# general documentation, as well as the sample configure.in, is included
# in the AX_PROG_JAVA macro.
#
# LICENSE
#
# Copyright (c) 2008 Stephane Bortzmeyer <bortzmeyer@pasteur.fr>
#
# This program is free software; you can redistribute it and/or modify it
# under the terms of the GNU General Public License as published by the
# Free Software Foundation; either version 2 of the License, or (at your
# option) any later version.
#
# This program is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
# Public License for more details.
#
# You should have received a copy of the GNU General Public License along
# with this program. If not, see <http://www.gnu.org/licenses/>.
#
# As a special exception, the respective Autoconf Macro's copyright owner
# gives unlimited permission to copy, distribute and modify the configure
# scripts that are the output of Autoconf when processing the Macro. You
# need not follow the terms of the GNU General Public License when using
# or distributing such scripts, even though portions of the text of the
# Macro appear in them. The GNU General Public License (GPL) does govern
# all other use of the material that constitutes the Autoconf Macro.
#
# This special exception to the GPL applies to versions of the Autoconf
# Macro released by the Autoconf Archive. When you make and distribute a
# modified version of the Autoconf Macro, you may extend this special
# exception to the GPL to apply to your modified version as well.
#serial 6
AU_ALIAS([AC_PROG_JAVAC_WORKS], [AX_PROG_JAVAC_WORKS])
AC_DEFUN([AX_PROG_JAVAC_WORKS],[
AC_CACHE_CHECK([if $JAVAC works], ac_cv_prog_javac_works, [
JAVA_TEST=Test.java
CLASS_TEST=Test.class
cat << \EOF > $JAVA_TEST
/* [#]line __oline__ "configure" */
public class Test {
}
EOF
if AC_TRY_COMMAND($JAVAC $JAVACFLAGS $JAVA_TEST) >/dev/null 2>&1; then
ac_cv_prog_javac_works=yes
else
AC_MSG_ERROR([The Java compiler $JAVAC failed (see config.log, check the CLASSPATH?)])
echo "configure: failed program was:" >&AS_MESSAGE_LOG_FD
cat $JAVA_TEST >&AS_MESSAGE_LOG_FD
fi
rm -f $JAVA_TEST $CLASS_TEST
])
AC_PROVIDE([$0])dnl
])

@ -0,0 +1,330 @@
#! /bin/sh
# Common stub for a few missing GNU programs while installing.
scriptversion=2012-01-06.18; # UTC
# Copyright (C) 1996-2012 Free Software Foundation, Inc.
# Originally by Fran,cois Pinard <pinard@iro.umontreal.ca>, 1996.
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2, or (at your option)
# any later version.
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
# As a special exception to the GNU General Public License, if you
# distribute this file as part of a program that contains a
# configuration script generated by Autoconf, you may include it under
# the same distribution terms that you use for the rest of that program.
if test $# -eq 0; then
echo 1>&2 "Try '$0 --help' for more information"
exit 1
fi
run=:
sed_output='s/.* --output[ =]\([^ ]*\).*/\1/p'
sed_minuso='s/.* -o \([^ ]*\).*/\1/p'
# In the cases where this matters, 'missing' is being run in the
# srcdir already.
if test -f configure.ac; then
configure_ac=configure.ac
else
configure_ac=configure.in
fi
msg="missing on your system"
case $1 in
--run)
# Try to run requested program, and just exit if it succeeds.
run=
shift
"$@" && exit 0
# Exit code 63 means version mismatch. This often happens
# when the user try to use an ancient version of a tool on
# a file that requires a minimum version. In this case we
# we should proceed has if the program had been absent, or
# if --run hadn't been passed.
if test $? = 63; then
run=:
msg="probably too old"
fi
;;
-h|--h|--he|--hel|--help)
echo "\
$0 [OPTION]... PROGRAM [ARGUMENT]...
Handle 'PROGRAM [ARGUMENT]...' for when PROGRAM is missing, or return an
error status if there is no known handling for PROGRAM.
Options:
-h, --help display this help and exit
-v, --version output version information and exit
--run try to run the given command, and emulate it if it fails
Supported PROGRAM values:
aclocal touch file 'aclocal.m4'
autoconf touch file 'configure'
autoheader touch file 'config.h.in'
autom4te touch the output file, or create a stub one
automake touch all 'Makefile.in' files
bison create 'y.tab.[ch]', if possible, from existing .[ch]
flex create 'lex.yy.c', if possible, from existing .c
help2man touch the output file
lex create 'lex.yy.c', if possible, from existing .c
makeinfo touch the output file
yacc create 'y.tab.[ch]', if possible, from existing .[ch]
Version suffixes to PROGRAM as well as the prefixes 'gnu-', 'gnu', and
'g' are ignored when checking the name.
Send bug reports to <bug-automake@gnu.org>."
exit $?
;;
-v|--v|--ve|--ver|--vers|--versi|--versio|--version)
echo "missing $scriptversion (GNU Automake)"
exit $?
;;
-*)
echo 1>&2 "$0: Unknown '$1' option"
echo 1>&2 "Try '$0 --help' for more information"
exit 1
;;
esac
# normalize program name to check for.
program=`echo "$1" | sed '
s/^gnu-//; t
s/^gnu//; t
s/^g//; t'`
# Now exit if we have it, but it failed. Also exit now if we
# don't have it and --version was passed (most likely to detect
# the program). This is about non-GNU programs, so use $1 not
# $program.
case $1 in
lex*|yacc*)
# Not GNU programs, they don't have --version.
;;
*)
if test -z "$run" && ($1 --version) > /dev/null 2>&1; then
# We have it, but it failed.
exit 1
elif test "x$2" = "x--version" || test "x$2" = "x--help"; then
# Could not run --version or --help. This is probably someone
# running '$TOOL --version' or '$TOOL --help' to check whether
# $TOOL exists and not knowing $TOOL uses missing.
exit 1
fi
;;
esac
# If it does not exist, or fails to run (possibly an outdated version),
# try to emulate it.
case $program in
aclocal*)
echo 1>&2 "\
WARNING: '$1' is $msg. You should only need it if
you modified 'acinclude.m4' or '${configure_ac}'. You might want
to install the Automake and Perl packages. Grab them from
any GNU archive site."
touch aclocal.m4
;;
autoconf*)
echo 1>&2 "\
WARNING: '$1' is $msg. You should only need it if
you modified '${configure_ac}'. You might want to install the
Autoconf and GNU m4 packages. Grab them from any GNU
archive site."
touch configure
;;
autoheader*)
echo 1>&2 "\
WARNING: '$1' is $msg. You should only need it if
you modified 'acconfig.h' or '${configure_ac}'. You might want
to install the Autoconf and GNU m4 packages. Grab them
from any GNU archive site."
files=`sed -n 's/^[ ]*A[CM]_CONFIG_HEADER(\([^)]*\)).*/\1/p' ${configure_ac}`
test -z "$files" && files="config.h"
touch_files=
for f in $files; do
case $f in
*:*) touch_files="$touch_files "`echo "$f" |
sed -e 's/^[^:]*://' -e 's/:.*//'`;;
*) touch_files="$touch_files $f.in";;
esac
done
touch $touch_files
;;
automake*)
echo 1>&2 "\
WARNING: '$1' is $msg. You should only need it if
you modified 'Makefile.am', 'acinclude.m4' or '${configure_ac}'.
You might want to install the Automake and Perl packages.
Grab them from any GNU archive site."
find . -type f -name Makefile.am -print |
sed 's/\.am$/.in/' |
while read f; do touch "$f"; done
;;
autom4te*)
echo 1>&2 "\
WARNING: '$1' is needed, but is $msg.
You might have modified some files without having the
proper tools for further handling them.
You can get '$1' as part of Autoconf from any GNU
archive site."
file=`echo "$*" | sed -n "$sed_output"`
test -z "$file" && file=`echo "$*" | sed -n "$sed_minuso"`
if test -f "$file"; then
touch $file
else
test -z "$file" || exec >$file
echo "#! /bin/sh"
echo "# Created by GNU Automake missing as a replacement of"
echo "# $ $@"
echo "exit 0"
chmod +x $file
exit 1
fi
;;
bison*|yacc*)
echo 1>&2 "\
WARNING: '$1' $msg. You should only need it if
you modified a '.y' file. You may need the Bison package
in order for those modifications to take effect. You can get
Bison from any GNU archive site."
rm -f y.tab.c y.tab.h
if test $# -ne 1; then
eval LASTARG=\${$#}
case $LASTARG in
*.y)
SRCFILE=`echo "$LASTARG" | sed 's/y$/c/'`
if test -f "$SRCFILE"; then
cp "$SRCFILE" y.tab.c
fi
SRCFILE=`echo "$LASTARG" | sed 's/y$/h/'`
if test -f "$SRCFILE"; then
cp "$SRCFILE" y.tab.h
fi
;;
esac
fi
if test ! -f y.tab.h; then
echo >y.tab.h
fi
if test ! -f y.tab.c; then
echo 'main() { return 0; }' >y.tab.c
fi
;;
lex*|flex*)
echo 1>&2 "\
WARNING: '$1' is $msg. You should only need it if
you modified a '.l' file. You may need the Flex package
in order for those modifications to take effect. You can get
Flex from any GNU archive site."
rm -f lex.yy.c
if test $# -ne 1; then
eval LASTARG=\${$#}
case $LASTARG in
*.l)
SRCFILE=`echo "$LASTARG" | sed 's/l$/c/'`
if test -f "$SRCFILE"; then
cp "$SRCFILE" lex.yy.c
fi
;;
esac
fi
if test ! -f lex.yy.c; then
echo 'main() { return 0; }' >lex.yy.c
fi
;;
help2man*)
echo 1>&2 "\
WARNING: '$1' is $msg. You should only need it if
you modified a dependency of a manual page. You may need the
Help2man package in order for those modifications to take
effect. You can get Help2man from any GNU archive site."
file=`echo "$*" | sed -n "$sed_output"`
test -z "$file" && file=`echo "$*" | sed -n "$sed_minuso"`
if test -f "$file"; then
touch $file
else
test -z "$file" || exec >$file
echo ".ab help2man is required to generate this page"
exit $?
fi
;;
makeinfo*)
echo 1>&2 "\
WARNING: '$1' is $msg. You should only need it if
you modified a '.texi' or '.texinfo' file, or any other file
indirectly affecting the aspect of the manual. The spurious
call might also be the consequence of using a buggy 'make' (AIX,
DU, IRIX). You might want to install the Texinfo package or
the GNU make package. Grab either from any GNU archive site."
# The file to touch is that specified with -o ...
file=`echo "$*" | sed -n "$sed_output"`
test -z "$file" && file=`echo "$*" | sed -n "$sed_minuso"`
if test -z "$file"; then
# ... or it is the one specified with @setfilename ...
infile=`echo "$*" | sed 's/.* \([^ ]*\) *$/\1/'`
file=`sed -n '
/^@setfilename/{
s/.* \([^ ]*\) *$/\1/
p
q
}' $infile`
# ... or it is derived from the source name (dir/f.texi becomes f.info)
test -z "$file" && file=`echo "$infile" | sed 's,.*/,,;s,.[^.]*$,,'`.info
fi
# If the file does not exist, the user really needs makeinfo;
# let's fail without touching anything.
test -f $file || exit 1
touch $file
;;
*)
echo 1>&2 "\
WARNING: '$1' is needed, and is $msg.
You might have modified some files without having the
proper tools for further handling them. Check the 'README' file,
it often tells you about the needed prerequisites for installing
this package. You may also peek at any GNU archive site, in case
some other package would contain this missing '$1' program."
exit 1
;;
esac
exit 0
# Local variables:
# eval: (add-hook 'write-file-hooks 'time-stamp)
# time-stamp-start: "scriptversion="
# time-stamp-format: "%:y-%02m-%02d.%02H"
# time-stamp-time-zone: "UTC"
# time-stamp-end: "; # UTC"
# End:

@ -0,0 +1,34 @@
lib_LTLIBRARIES = libffts.la
libffts_la_SOURCES = ffts.c ffts_small.c ffts_nd.c ffts_real.c ffts_real_nd.c patterns.c
libffts_la_SOURCES += codegen.h codegen_arm.h codegen_sse.h ffts.h ffts_nd.h ffts_real.h ffts_real_nd.h ffts_small.h ffts_static.h macros-alpha.h macros-altivec.h macros-neon.h macros-sse.h macros.h neon.h neon_float.h patterns.h types.h vfp.h
if DYNAMIC_DISABLED
libffts_la_SOURCES += ffts_static.c
else
libffts_la_SOURCES += codegen.c
endif
libffts_includedir=$(includedir)/ffts
libffts_include_HEADERS = ../include/ffts.h
if HAVE_VFP
libffts_la_SOURCES += vfp.s
else
if HAVE_NEON
if DYNAMIC_DISABLED
libffts_la_SOURCES += neon_static_f.s neon_static_i.s
else
libffts_la_SOURCES += neon.s
endif
else
if HAVE_SSE
libffts_la_SOURCES += sse.s
endif
endif
endif

@ -0,0 +1,666 @@
# Makefile.in generated by automake 1.12.4 from Makefile.am.
# @configure_input@
# Copyright (C) 1994-2012 Free Software Foundation, Inc.
# This Makefile.in is free software; the Free Software Foundation
# gives unlimited permission to copy and/or distribute it,
# with or without modifications, as long as this notice is preserved.
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY, to the extent permitted by law; without
# even the implied warranty of MERCHANTABILITY or FITNESS FOR A
# PARTICULAR PURPOSE.
@SET_MAKE@
VPATH = @srcdir@
am__make_dryrun = \
{ \
am__dry=no; \
case $$MAKEFLAGS in \
*\\[\ \ ]*) \
echo 'am--echo: ; @echo "AM" OK' | $(MAKE) -f - 2>/dev/null \
| grep '^AM OK$$' >/dev/null || am__dry=yes;; \
*) \
for am__flg in $$MAKEFLAGS; do \
case $$am__flg in \
*=*|--*) ;; \
*n*) am__dry=yes; break;; \
esac; \
done;; \
esac; \
test $$am__dry = yes; \
}
pkgdatadir = $(datadir)/@PACKAGE@
pkgincludedir = $(includedir)/@PACKAGE@
pkglibdir = $(libdir)/@PACKAGE@
pkglibexecdir = $(libexecdir)/@PACKAGE@
am__cd = CDPATH="$${ZSH_VERSION+.}$(PATH_SEPARATOR)" && cd
install_sh_DATA = $(install_sh) -c -m 644
install_sh_PROGRAM = $(install_sh) -c
install_sh_SCRIPT = $(install_sh) -c
INSTALL_HEADER = $(INSTALL_DATA)
transform = $(program_transform_name)
NORMAL_INSTALL = :
PRE_INSTALL = :
POST_INSTALL = :
NORMAL_UNINSTALL = :
PRE_UNINSTALL = :
POST_UNINSTALL = :
build_triplet = @build@
host_triplet = @host@
@DYNAMIC_DISABLED_TRUE@am__append_1 = ffts_static.c
@DYNAMIC_DISABLED_FALSE@am__append_2 = codegen.c
@HAVE_VFP_TRUE@am__append_3 = vfp.s
@DYNAMIC_DISABLED_TRUE@@HAVE_NEON_TRUE@@HAVE_VFP_FALSE@am__append_4 = neon_static_f.s neon_static_i.s
@DYNAMIC_DISABLED_FALSE@@HAVE_NEON_TRUE@@HAVE_VFP_FALSE@am__append_5 = neon.s
@HAVE_NEON_FALSE@@HAVE_SSE_TRUE@@HAVE_VFP_FALSE@am__append_6 = sse.s
subdir = src
DIST_COMMON = $(libffts_include_HEADERS) $(srcdir)/Makefile.am \
$(srcdir)/Makefile.in $(top_srcdir)/depcomp
ACLOCAL_M4 = $(top_srcdir)/aclocal.m4
am__aclocal_m4_deps = $(top_srcdir)/m4/ax_check_classpath.m4 \
$(top_srcdir)/m4/ax_check_java_home.m4 \
$(top_srcdir)/m4/ax_java_options.m4 \
$(top_srcdir)/m4/ax_jni_include_dir.m4 \
$(top_srcdir)/m4/ax_prog_jar.m4 \
$(top_srcdir)/m4/ax_prog_javac.m4 \
$(top_srcdir)/m4/ax_prog_javac_works.m4 \
$(top_srcdir)/configure.ac
am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \
$(ACLOCAL_M4)
mkinstalldirs = $(install_sh) -d
CONFIG_HEADER = $(top_builddir)/config.h
CONFIG_CLEAN_FILES =
CONFIG_CLEAN_VPATH_FILES =
am__vpath_adj_setup = srcdirstrip=`echo "$(srcdir)" | sed 's|.|.|g'`;
am__vpath_adj = case $$p in \
$(srcdir)/*) f=`echo "$$p" | sed "s|^$$srcdirstrip/||"`;; \
*) f=$$p;; \
esac;
am__strip_dir = f=`echo $$p | sed -e 's|^.*/||'`;
am__install_max = 40
am__nobase_strip_setup = \
srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*|]/\\\\&/g'`
am__nobase_strip = \
for p in $$list; do echo "$$p"; done | sed -e "s|$$srcdirstrip/||"
am__nobase_list = $(am__nobase_strip_setup); \
for p in $$list; do echo "$$p $$p"; done | \
sed "s| $$srcdirstrip/| |;"' / .*\//!s/ .*/ ./; s,\( .*\)/[^/]*$$,\1,' | \
$(AWK) 'BEGIN { files["."] = "" } { files[$$2] = files[$$2] " " $$1; \
if (++n[$$2] == $(am__install_max)) \
{ print $$2, files[$$2]; n[$$2] = 0; files[$$2] = "" } } \
END { for (dir in files) print dir, files[dir] }'
am__base_list = \
sed '$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;s/\n/ /g' | \
sed '$$!N;$$!N;$$!N;$$!N;s/\n/ /g'
am__uninstall_files_from_dir = { \
test -z "$$files" \
|| { test ! -d "$$dir" && test ! -f "$$dir" && test ! -r "$$dir"; } \
|| { echo " ( cd '$$dir' && rm -f" $$files ")"; \
$(am__cd) "$$dir" && rm -f $$files; }; \
}
am__installdirs = "$(DESTDIR)$(libdir)" \
"$(DESTDIR)$(libffts_includedir)"
LTLIBRARIES = $(lib_LTLIBRARIES)
libffts_la_LIBADD =
am__libffts_la_SOURCES_DIST = ffts.c ffts_small.c ffts_nd.c \
ffts_real.c ffts_real_nd.c patterns.c codegen.h codegen_arm.h \
codegen_sse.h ffts.h ffts_nd.h ffts_real.h ffts_real_nd.h \
ffts_small.h ffts_static.h macros-alpha.h macros-altivec.h \
macros-neon.h macros-sse.h macros.h neon.h neon_float.h \
patterns.h types.h vfp.h ffts_static.c codegen.c vfp.s \
neon_static_f.s neon_static_i.s neon.s sse.s
@DYNAMIC_DISABLED_TRUE@am__objects_1 = ffts_static.lo
@DYNAMIC_DISABLED_FALSE@am__objects_2 = codegen.lo
@HAVE_VFP_TRUE@am__objects_3 = vfp.lo
@DYNAMIC_DISABLED_TRUE@@HAVE_NEON_TRUE@@HAVE_VFP_FALSE@am__objects_4 = neon_static_f.lo \
@DYNAMIC_DISABLED_TRUE@@HAVE_NEON_TRUE@@HAVE_VFP_FALSE@ neon_static_i.lo
@DYNAMIC_DISABLED_FALSE@@HAVE_NEON_TRUE@@HAVE_VFP_FALSE@am__objects_5 = neon.lo
@HAVE_NEON_FALSE@@HAVE_SSE_TRUE@@HAVE_VFP_FALSE@am__objects_6 = \
@HAVE_NEON_FALSE@@HAVE_SSE_TRUE@@HAVE_VFP_FALSE@ sse.lo
am_libffts_la_OBJECTS = ffts.lo ffts_small.lo ffts_nd.lo ffts_real.lo \
ffts_real_nd.lo patterns.lo $(am__objects_1) $(am__objects_2) \
$(am__objects_3) $(am__objects_4) $(am__objects_5) \
$(am__objects_6)
libffts_la_OBJECTS = $(am_libffts_la_OBJECTS)
DEFAULT_INCLUDES = -I.@am__isrc@ -I$(top_builddir)
depcomp = $(SHELL) $(top_srcdir)/depcomp
am__depfiles_maybe = depfiles
am__mv = mv -f
COMPILE = $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) \
$(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS)
LTCOMPILE = $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) \
--mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) \
$(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS)
CCLD = $(CC)
LINK = $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) \
--mode=link $(CCLD) $(AM_CFLAGS) $(CFLAGS) $(AM_LDFLAGS) \
$(LDFLAGS) -o $@
CCASCOMPILE = $(CCAS) $(AM_CCASFLAGS) $(CCASFLAGS)
LTCCASCOMPILE = $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) \
--mode=compile $(CCAS) $(AM_CCASFLAGS) $(CCASFLAGS)
SOURCES = $(libffts_la_SOURCES)
DIST_SOURCES = $(am__libffts_la_SOURCES_DIST)
am__can_run_installinfo = \
case $$AM_UPDATE_INFO_DIR in \
n|no|NO) false;; \
*) (install-info --version) >/dev/null 2>&1;; \
esac
HEADERS = $(libffts_include_HEADERS)
ETAGS = etags
CTAGS = ctags
DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST)
ACLOCAL = @ACLOCAL@
AMTAR = @AMTAR@
AR = @AR@
AUTOCONF = @AUTOCONF@
AUTOHEADER = @AUTOHEADER@
AUTOMAKE = @AUTOMAKE@
AWK = @AWK@
CC = @CC@
CCAS = @CCAS@
CCASDEPMODE = @CCASDEPMODE@
CCASFLAGS = @CCASFLAGS@
CCDEPMODE = @CCDEPMODE@
CFLAGS = @CFLAGS@
CPP = @CPP@
CPPFLAGS = @CPPFLAGS@
CXX = @CXX@
CXXCPP = @CXXCPP@
CXXDEPMODE = @CXXDEPMODE@
CXXFLAGS = @CXXFLAGS@
CYGPATH_W = @CYGPATH_W@
DEFS = @DEFS@
DEPDIR = @DEPDIR@
DLLTOOL = @DLLTOOL@
DSYMUTIL = @DSYMUTIL@
DUMPBIN = @DUMPBIN@
ECHO_C = @ECHO_C@
ECHO_N = @ECHO_N@
ECHO_T = @ECHO_T@
EGREP = @EGREP@
EXEEXT = @EXEEXT@
FGREP = @FGREP@
GREP = @GREP@
INSTALL = @INSTALL@
INSTALL_DATA = @INSTALL_DATA@
INSTALL_PROGRAM = @INSTALL_PROGRAM@
INSTALL_SCRIPT = @INSTALL_SCRIPT@
INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@
JAR = @JAR@
JAVA = @JAVA@
JAVAC = @JAVAC@
JAVACFLAGS = @JAVACFLAGS@
JAVAFLAGS = @JAVAFLAGS@
JAVAPREFIX = @JAVAPREFIX@
JAVA_PATH_NAME = @JAVA_PATH_NAME@
JNI_CPPFLAGS = @JNI_CPPFLAGS@
LD = @LD@
LDFLAGS = @LDFLAGS@
LIBOBJS = @LIBOBJS@
LIBS = @LIBS@
LIBTOOL = @LIBTOOL@
LIPO = @LIPO@
LN_S = @LN_S@
LTLIBOBJS = @LTLIBOBJS@
MAKEINFO = @MAKEINFO@
MANIFEST_TOOL = @MANIFEST_TOOL@
MKDIR_P = @MKDIR_P@
NM = @NM@
NMEDIT = @NMEDIT@
OBJDUMP = @OBJDUMP@
OBJEXT = @OBJEXT@
OTOOL = @OTOOL@
OTOOL64 = @OTOOL64@
PACKAGE = @PACKAGE@
PACKAGE_BUGREPORT = @PACKAGE_BUGREPORT@
PACKAGE_NAME = @PACKAGE_NAME@
PACKAGE_STRING = @PACKAGE_STRING@
PACKAGE_TARNAME = @PACKAGE_TARNAME@
PACKAGE_URL = @PACKAGE_URL@
PACKAGE_VERSION = @PACKAGE_VERSION@
PATH_SEPARATOR = @PATH_SEPARATOR@
RANLIB = @RANLIB@
SED = @SED@
SET_MAKE = @SET_MAKE@
SHELL = @SHELL@
STRIP = @STRIP@
VERSION = @VERSION@
_ACJNI_JAVAC = @_ACJNI_JAVAC@
abs_builddir = @abs_builddir@
abs_srcdir = @abs_srcdir@
abs_top_builddir = @abs_top_builddir@
abs_top_srcdir = @abs_top_srcdir@
ac_ct_AR = @ac_ct_AR@
ac_ct_CC = @ac_ct_CC@
ac_ct_CXX = @ac_ct_CXX@
ac_ct_DUMPBIN = @ac_ct_DUMPBIN@
am__include = @am__include@
am__leading_dot = @am__leading_dot@
am__quote = @am__quote@
am__tar = @am__tar@
am__untar = @am__untar@
bindir = @bindir@
build = @build@
build_alias = @build_alias@
build_cpu = @build_cpu@
build_os = @build_os@
build_vendor = @build_vendor@
builddir = @builddir@
datadir = @datadir@
datarootdir = @datarootdir@
docdir = @docdir@
dvidir = @dvidir@
exec_prefix = @exec_prefix@
host = @host@
host_alias = @host_alias@
host_cpu = @host_cpu@
host_os = @host_os@
host_vendor = @host_vendor@
htmldir = @htmldir@
includedir = @includedir@
infodir = @infodir@
install_sh = @install_sh@
libdir = @libdir@
libexecdir = @libexecdir@
localedir = @localedir@
localstatedir = @localstatedir@
mandir = @mandir@
mkdir_p = @mkdir_p@
oldincludedir = @oldincludedir@
pdfdir = @pdfdir@
prefix = @prefix@
program_transform_name = @program_transform_name@
psdir = @psdir@
sbindir = @sbindir@
sharedstatedir = @sharedstatedir@
srcdir = @srcdir@
sysconfdir = @sysconfdir@
target_alias = @target_alias@
top_build_prefix = @top_build_prefix@
top_builddir = @top_builddir@
top_srcdir = @top_srcdir@
lib_LTLIBRARIES = libffts.la
libffts_la_SOURCES = ffts.c ffts_small.c ffts_nd.c ffts_real.c \
ffts_real_nd.c patterns.c codegen.h codegen_arm.h \
codegen_sse.h ffts.h ffts_nd.h ffts_real.h ffts_real_nd.h \
ffts_small.h ffts_static.h macros-alpha.h macros-altivec.h \
macros-neon.h macros-sse.h macros.h neon.h neon_float.h \
patterns.h types.h vfp.h $(am__append_1) $(am__append_2) \
$(am__append_3) $(am__append_4) $(am__append_5) \
$(am__append_6)
libffts_includedir = $(includedir)/ffts
libffts_include_HEADERS = ../include/ffts.h
all: all-am
.SUFFIXES:
.SUFFIXES: .c .lo .o .obj .s
$(srcdir)/Makefile.in: $(srcdir)/Makefile.am $(am__configure_deps)
@for dep in $?; do \
case '$(am__configure_deps)' in \
*$$dep*) \
( cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh ) \
&& { if test -f $@; then exit 0; else break; fi; }; \
exit 1;; \
esac; \
done; \
echo ' cd $(top_srcdir) && $(AUTOMAKE) --gnu src/Makefile'; \
$(am__cd) $(top_srcdir) && \
$(AUTOMAKE) --gnu src/Makefile
.PRECIOUS: Makefile
Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status
@case '$?' in \
*config.status*) \
cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh;; \
*) \
echo ' cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe)'; \
cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe);; \
esac;
$(top_builddir)/config.status: $(top_srcdir)/configure $(CONFIG_STATUS_DEPENDENCIES)
cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
$(top_srcdir)/configure: $(am__configure_deps)
cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
$(ACLOCAL_M4): $(am__aclocal_m4_deps)
cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
$(am__aclocal_m4_deps):
install-libLTLIBRARIES: $(lib_LTLIBRARIES)
@$(NORMAL_INSTALL)
@list='$(lib_LTLIBRARIES)'; test -n "$(libdir)" || list=; \
list2=; for p in $$list; do \
if test -f $$p; then \
list2="$$list2 $$p"; \
else :; fi; \
done; \
test -z "$$list2" || { \
echo " $(MKDIR_P) '$(DESTDIR)$(libdir)'"; \
$(MKDIR_P) "$(DESTDIR)$(libdir)" || exit 1; \
echo " $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=install $(INSTALL) $(INSTALL_STRIP_FLAG) $$list2 '$(DESTDIR)$(libdir)'"; \
$(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=install $(INSTALL) $(INSTALL_STRIP_FLAG) $$list2 "$(DESTDIR)$(libdir)"; \
}
uninstall-libLTLIBRARIES:
@$(NORMAL_UNINSTALL)
@list='$(lib_LTLIBRARIES)'; test -n "$(libdir)" || list=; \
for p in $$list; do \
$(am__strip_dir) \
echo " $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=uninstall rm -f '$(DESTDIR)$(libdir)/$$f'"; \
$(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=uninstall rm -f "$(DESTDIR)$(libdir)/$$f"; \
done
clean-libLTLIBRARIES:
-test -z "$(lib_LTLIBRARIES)" || rm -f $(lib_LTLIBRARIES)
@list='$(lib_LTLIBRARIES)'; \
locs=`for p in $$list; do echo $$p; done | \
sed 's|^[^/]*$$|.|; s|/[^/]*$$||; s|$$|/so_locations|' | \
sort -u`; \
test -z "$$locs" || { \
echo rm -f $${locs}; \
rm -f $${locs}; \
}
libffts.la: $(libffts_la_OBJECTS) $(libffts_la_DEPENDENCIES) $(EXTRA_libffts_la_DEPENDENCIES)
$(LINK) -rpath $(libdir) $(libffts_la_OBJECTS) $(libffts_la_LIBADD) $(LIBS)
mostlyclean-compile:
-rm -f *.$(OBJEXT)
distclean-compile:
-rm -f *.tab.c
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/codegen.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/ffts.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/ffts_nd.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/ffts_real.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/ffts_real_nd.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/ffts_small.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/ffts_static.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/patterns.Plo@am__quote@
.c.o:
@am__fastdepCC_TRUE@ $(COMPILE) -MT $@ -MD -MP -MF $(DEPDIR)/$*.Tpo -c -o $@ $<
@am__fastdepCC_TRUE@ $(am__mv) $(DEPDIR)/$*.Tpo $(DEPDIR)/$*.Po
@AMDEP_TRUE@@am__fastdepCC_FALSE@ source='$<' object='$@' libtool=no @AMDEPBACKSLASH@
@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
@am__fastdepCC_FALSE@ $(COMPILE) -c $<
.c.obj:
@am__fastdepCC_TRUE@ $(COMPILE) -MT $@ -MD -MP -MF $(DEPDIR)/$*.Tpo -c -o $@ `$(CYGPATH_W) '$<'`
@am__fastdepCC_TRUE@ $(am__mv) $(DEPDIR)/$*.Tpo $(DEPDIR)/$*.Po
@AMDEP_TRUE@@am__fastdepCC_FALSE@ source='$<' object='$@' libtool=no @AMDEPBACKSLASH@
@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
@am__fastdepCC_FALSE@ $(COMPILE) -c `$(CYGPATH_W) '$<'`
.c.lo:
@am__fastdepCC_TRUE@ $(LTCOMPILE) -MT $@ -MD -MP -MF $(DEPDIR)/$*.Tpo -c -o $@ $<
@am__fastdepCC_TRUE@ $(am__mv) $(DEPDIR)/$*.Tpo $(DEPDIR)/$*.Plo
@AMDEP_TRUE@@am__fastdepCC_FALSE@ source='$<' object='$@' libtool=yes @AMDEPBACKSLASH@
@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
@am__fastdepCC_FALSE@ $(LTCOMPILE) -c -o $@ $<
.s.o:
$(CCASCOMPILE) -c -o $@ $<
.s.obj:
$(CCASCOMPILE) -c -o $@ `$(CYGPATH_W) '$<'`
.s.lo:
$(LTCCASCOMPILE) -c -o $@ $<
mostlyclean-libtool:
-rm -f *.lo
clean-libtool:
-rm -rf .libs _libs
install-libffts_includeHEADERS: $(libffts_include_HEADERS)
@$(NORMAL_INSTALL)
@list='$(libffts_include_HEADERS)'; test -n "$(libffts_includedir)" || list=; \
if test -n "$$list"; then \
echo " $(MKDIR_P) '$(DESTDIR)$(libffts_includedir)'"; \
$(MKDIR_P) "$(DESTDIR)$(libffts_includedir)" || exit 1; \
fi; \
for p in $$list; do \
if test -f "$$p"; then d=; else d="$(srcdir)/"; fi; \
echo "$$d$$p"; \
done | $(am__base_list) | \
while read files; do \
echo " $(INSTALL_HEADER) $$files '$(DESTDIR)$(libffts_includedir)'"; \
$(INSTALL_HEADER) $$files "$(DESTDIR)$(libffts_includedir)" || exit $$?; \
done
uninstall-libffts_includeHEADERS:
@$(NORMAL_UNINSTALL)
@list='$(libffts_include_HEADERS)'; test -n "$(libffts_includedir)" || list=; \
files=`for p in $$list; do echo $$p; done | sed -e 's|^.*/||'`; \
dir='$(DESTDIR)$(libffts_includedir)'; $(am__uninstall_files_from_dir)
ID: $(HEADERS) $(SOURCES) $(LISP) $(TAGS_FILES)
list='$(SOURCES) $(HEADERS) $(LISP) $(TAGS_FILES)'; \
unique=`for i in $$list; do \
if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \
done | \
$(AWK) '{ files[$$0] = 1; nonempty = 1; } \
END { if (nonempty) { for (i in files) print i; }; }'`; \
mkid -fID $$unique
tags: TAGS
TAGS: $(HEADERS) $(SOURCES) $(TAGS_DEPENDENCIES) \
$(TAGS_FILES) $(LISP)
set x; \
here=`pwd`; \
list='$(SOURCES) $(HEADERS) $(LISP) $(TAGS_FILES)'; \
unique=`for i in $$list; do \
if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \
done | \
$(AWK) '{ files[$$0] = 1; nonempty = 1; } \
END { if (nonempty) { for (i in files) print i; }; }'`; \
shift; \
if test -z "$(ETAGS_ARGS)$$*$$unique"; then :; else \
test -n "$$unique" || unique=$$empty_fix; \
if test $$# -gt 0; then \
$(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \
"$$@" $$unique; \
else \
$(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \
$$unique; \
fi; \
fi
ctags: CTAGS
CTAGS: $(HEADERS) $(SOURCES) $(TAGS_DEPENDENCIES) \
$(TAGS_FILES) $(LISP)
list='$(SOURCES) $(HEADERS) $(LISP) $(TAGS_FILES)'; \
unique=`for i in $$list; do \
if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \
done | \
$(AWK) '{ files[$$0] = 1; nonempty = 1; } \
END { if (nonempty) { for (i in files) print i; }; }'`; \
test -z "$(CTAGS_ARGS)$$unique" \
|| $(CTAGS) $(CTAGSFLAGS) $(AM_CTAGSFLAGS) $(CTAGS_ARGS) \
$$unique
GTAGS:
here=`$(am__cd) $(top_builddir) && pwd` \
&& $(am__cd) $(top_srcdir) \
&& gtags -i $(GTAGS_ARGS) "$$here"
cscopelist: $(HEADERS) $(SOURCES) $(LISP)
list='$(SOURCES) $(HEADERS) $(LISP)'; \
case "$(srcdir)" in \
[\\/]* | ?:[\\/]*) sdir="$(srcdir)" ;; \
*) sdir=$(subdir)/$(srcdir) ;; \
esac; \
for i in $$list; do \
if test -f "$$i"; then \
echo "$(subdir)/$$i"; \
else \
echo "$$sdir/$$i"; \
fi; \
done >> $(top_builddir)/cscope.files
distclean-tags:
-rm -f TAGS ID GTAGS GRTAGS GSYMS GPATH tags
distdir: $(DISTFILES)
@srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \
topsrcdirstrip=`echo "$(top_srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \
list='$(DISTFILES)'; \
dist_files=`for file in $$list; do echo $$file; done | \
sed -e "s|^$$srcdirstrip/||;t" \
-e "s|^$$topsrcdirstrip/|$(top_builddir)/|;t"`; \
case $$dist_files in \
*/*) $(MKDIR_P) `echo "$$dist_files" | \
sed '/\//!d;s|^|$(distdir)/|;s,/[^/]*$$,,' | \
sort -u` ;; \
esac; \
for file in $$dist_files; do \
if test -f $$file || test -d $$file; then d=.; else d=$(srcdir); fi; \
if test -d $$d/$$file; then \
dir=`echo "/$$file" | sed -e 's,/[^/]*$$,,'`; \
if test -d "$(distdir)/$$file"; then \
find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \
fi; \
if test -d $(srcdir)/$$file && test $$d != $(srcdir); then \
cp -fpR $(srcdir)/$$file "$(distdir)$$dir" || exit 1; \
find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \
fi; \
cp -fpR $$d/$$file "$(distdir)$$dir" || exit 1; \
else \
test -f "$(distdir)/$$file" \
|| cp -p $$d/$$file "$(distdir)/$$file" \
|| exit 1; \
fi; \
done
check-am: all-am
check: check-am
all-am: Makefile $(LTLIBRARIES) $(HEADERS)
installdirs:
for dir in "$(DESTDIR)$(libdir)" "$(DESTDIR)$(libffts_includedir)"; do \
test -z "$$dir" || $(MKDIR_P) "$$dir"; \
done
install: install-am
install-exec: install-exec-am
install-data: install-data-am
uninstall: uninstall-am
install-am: all-am
@$(MAKE) $(AM_MAKEFLAGS) install-exec-am install-data-am
installcheck: installcheck-am
install-strip:
if test -z '$(STRIP)'; then \
$(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
install; \
else \
$(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
"INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'" install; \
fi
mostlyclean-generic:
clean-generic:
distclean-generic:
-test -z "$(CONFIG_CLEAN_FILES)" || rm -f $(CONFIG_CLEAN_FILES)
-test . = "$(srcdir)" || test -z "$(CONFIG_CLEAN_VPATH_FILES)" || rm -f $(CONFIG_CLEAN_VPATH_FILES)
maintainer-clean-generic:
@echo "This command is intended for maintainers to use"
@echo "it deletes files that may require special tools to rebuild."
clean: clean-am
clean-am: clean-generic clean-libLTLIBRARIES clean-libtool \
mostlyclean-am
distclean: distclean-am
-rm -rf ./$(DEPDIR)
-rm -f Makefile
distclean-am: clean-am distclean-compile distclean-generic \
distclean-tags
dvi: dvi-am
dvi-am:
html: html-am
html-am:
info: info-am
info-am:
install-data-am: install-libffts_includeHEADERS
install-dvi: install-dvi-am
install-dvi-am:
install-exec-am: install-libLTLIBRARIES
install-html: install-html-am
install-html-am:
install-info: install-info-am
install-info-am:
install-man:
install-pdf: install-pdf-am
install-pdf-am:
install-ps: install-ps-am
install-ps-am:
installcheck-am:
maintainer-clean: maintainer-clean-am
-rm -rf ./$(DEPDIR)
-rm -f Makefile
maintainer-clean-am: distclean-am maintainer-clean-generic
mostlyclean: mostlyclean-am
mostlyclean-am: mostlyclean-compile mostlyclean-generic \
mostlyclean-libtool
pdf: pdf-am
pdf-am:
ps: ps-am
ps-am:
uninstall-am: uninstall-libLTLIBRARIES \
uninstall-libffts_includeHEADERS
.MAKE: install-am install-strip
.PHONY: CTAGS GTAGS all all-am check check-am clean clean-generic \
clean-libLTLIBRARIES clean-libtool cscopelist ctags distclean \
distclean-compile distclean-generic distclean-libtool \
distclean-tags distdir dvi dvi-am html html-am info info-am \
install install-am install-data install-data-am install-dvi \
install-dvi-am install-exec install-exec-am install-html \
install-html-am install-info install-info-am \
install-libLTLIBRARIES install-libffts_includeHEADERS \
install-man install-pdf install-pdf-am install-ps \
install-ps-am install-strip installcheck installcheck-am \
installdirs maintainer-clean maintainer-clean-generic \
mostlyclean mostlyclean-compile mostlyclean-generic \
mostlyclean-libtool pdf pdf-am ps ps-am tags uninstall \
uninstall-am uninstall-libLTLIBRARIES \
uninstall-libffts_includeHEADERS
# Tell versions [3.59,3.63) of GNU make to not export all variables.
# Otherwise a system limit (for SysV at least) may be exceeded.
.NOEXPORT:

@ -0,0 +1,731 @@
/*
This file is part of FFTS -- The Fastest Fourier Transform in the South
Copyright (c) 2012, Anthony M. Blake <amb@anthonix.com>
Copyright (c) 2012, The University of Waikato
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of the organization nor the
names of its contributors may be used to endorse or promote products
derived from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
DISCLAIMED. IN NO EVENT SHALL ANTHONY M. BLAKE BE LIABLE FOR ANY
DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#include "codegen.h"
#include "macros.h"
#include "ffts.h"
#ifdef __APPLE__
#include <libkern/OSCacheControl.h>
#endif
#include <sys/types.h>
#include <sys/mman.h>
#ifdef HAVE_NEON
#include "codegen_arm.h"
#include "neon.h"
#elif HAVE_VFP
#include "codegen_arm.h"
#include "vfp.h"
#else
#include "codegen_sse.h"
#include "macros-sse.h"
#endif
#ifdef __ANDROID__
#include <unistd.h>
#endif
int tree_count(int N, int leafN, int offset) {
if(N <= leafN) return 0;
int count = 0;
count += tree_count(N/4, leafN, offset);
count += tree_count(N/8, leafN, offset + N/4);
count += tree_count(N/8, leafN, offset + N/4 + N/8);
count += tree_count(N/4, leafN, offset + N/2);
count += tree_count(N/4, leafN, offset + 3*N/4);
return 1 + count;
}
void elaborate_tree(size_t **p, int N, int leafN, int offset) {
if(N <= leafN) return;
elaborate_tree(p, N/4, leafN, offset);
elaborate_tree(p, N/8, leafN, offset + N/4);
elaborate_tree(p, N/8, leafN, offset + N/4 + N/8);
elaborate_tree(p, N/4, leafN, offset + N/2);
elaborate_tree(p, N/4, leafN, offset + 3*N/4);
(*p)[0] = N;
(*p)[1] = offset*2;
(*p)+=2;
}
uint32_t LUT_offset(size_t N, size_t leafN) {
int i;
size_t p_lut_size = 0;
size_t lut_size = 0;
int hardcoded = 0;
size_t n_luts = __builtin_ctzl(N/leafN);
int n = leafN*2;
//if(N <= 32) { n_luts = __builtin_ctzl(N/4); hardcoded = 1; }
for(i=0;i<n_luts-1;i++) {
p_lut_size = lut_size;
if(!i || hardcoded) {
#ifdef __arm__
if(N <= 32) lut_size += n/4 * 2 * sizeof(cdata_t);
else lut_size += n/4 * sizeof(cdata_t);
#else
lut_size += n/4 * 2 * sizeof(cdata_t);
#endif
// n *= 2;
} else {
#ifdef __arm__
lut_size += n/8 * 3 * sizeof(cdata_t);
#else
lut_size += n/8 * 3 * 2 * sizeof(cdata_t);
#endif
}
n *= 2;
}
return lut_size;
}
#ifdef __arm__
typedef uint32_t insns_t;
#else
typedef uint8_t insns_t;
#endif
#define P(x) (*(*p)++ = x)
void insert_nops(uint8_t **p, uint32_t count) {
switch(count) {
case 0: break;
case 2: P(0x66);
case 1: P(0x90); break;
case 3: P(0x0F); P(0x1F); P(0x00); break;
case 4: P(0x0F); P(0x1F); P(0x40); P(0x00); break;
case 5: P(0x0F); P(0x1F); P(0x44); P(0x00); P(0x00); break;
case 6: P(0x66); P(0x0F); P(0x1F); P(0x44); P(0x00); P(0x00); break;
case 7: P(0x0F); P(0x1F); P(0x80); P(0x00); P(0x00); P(0x00); P(0x00); break;
case 8: P(0x0F); P(0x1F); P(0x84); P(0x00); P(0x00); P(0x00); P(0x00); P(0x00); break;
case 9: P(0x66); P(0x0F); P(0x1F); P(0x84); P(0x00); P(0x00); P(0x00); P(0x00); P(0x00); break;
default:
P(0x66); P(0x0F); P(0x1F); P(0x84); P(0x00); P(0x00); P(0x00); P(0x00); P(0x00);
insert_nops(p, count-9);
break;
}
}
void align_mem16(uint8_t **p, uint32_t offset) {
#ifdef __x86_64__
int r = (16 - (offset & 0xf)) - ((uint32_t)(*p) & 0xf);
r = (16 + r) & 0xf;
insert_nops(p, r);
#endif
}
void ffts_generate_func_code(ffts_plan_t *p, size_t N, size_t leafN, int sign) {
int count = tree_count(N, leafN, 0) + 1;
size_t *ps = malloc(count * 2 * sizeof(size_t));
size_t *pps = ps;
#ifdef __x86_64__
if(sign < 0) p->constants = sse_constants;
else p->constants = sse_constants_inv;
#endif
elaborate_tree(&pps, N, leafN, 0);
pps[0] = 0;
pps[1] = 0;
pps = ps;
#ifdef __arm__
if(N < 8192) p->transform_size = 8192;
else p->transform_size = N;
#else
if(N < 2048) p->transform_size = 16384;
else p->transform_size = 16384 + 2*N/8 * __builtin_ctzl(N);
#endif
#ifdef __APPLE__
p->transform_base = mmap(NULL, p->transform_size, PROT_WRITE | PROT_READ, MAP_ANON | MAP_SHARED, -1, 0);
#else
#define MAP_ANONYMOUS 0x20
p->transform_base = mmap(NULL, p->transform_size, PROT_WRITE | PROT_READ, MAP_ANONYMOUS | MAP_SHARED, -1, 0);
#endif
/*
if(p->transform_base == MAP_FAILED) {
fprintf(stderr, "MAP FAILED\n");
exit(1);
}*/
insns_t *func = p->transform_base;//valloc(8192);
insns_t *fp = func;
//fprintf(stderr, "Allocating %d bytes \n", p->transform_size);
//fprintf(stderr, "Base address = %016p\n", func);
if(!func) {
fprintf(stderr, "NOMEM\n");
exit(1);
}
insns_t *x_8_addr = fp;
#ifdef __arm__
#ifdef HAVE_NEON
memcpy(fp, neon_x8, neon_x8_t - neon_x8);
/*
* Changes adds to subtracts and vice versa to allow the computation
* of both the IFFT and FFT
*/
if(sign < 0) {
fp[31] ^= 0x00200000; fp[32] ^= 0x00200000; fp[33] ^= 0x00200000; fp[34] ^= 0x00200000;
fp[65] ^= 0x00200000; fp[66] ^= 0x00200000; fp[70] ^= 0x00200000; fp[74] ^= 0x00200000;
fp[97] ^= 0x00200000; fp[98] ^= 0x00200000; fp[102] ^= 0x00200000; fp[104] ^= 0x00200000;
}
fp += (neon_x8_t - neon_x8) / 4;
#else
memcpy(fp, vfp_x8, vfp_end - vfp_x8);
if(sign > 0) {
fp[65] ^= 0x00000040;
fp[66] ^= 0x00000040;
fp[68] ^= 0x00000040;
fp[70] ^= 0x00000040;
fp[103] ^= 0x00000040;
fp[104] ^= 0x00000040;
fp[105] ^= 0x00000040;
fp[108] ^= 0x00000040;
fp[113] ^= 0x00000040;
fp[114] ^= 0x00000040;
fp[117] ^= 0x00000040;
fp[118] ^= 0x00000040;
}
fp += (vfp_end - vfp_x8) / 4;
#endif
#else
align_mem16(&fp, 0);
x_8_addr = fp;
align_mem16(&fp, 5);
memcpy(fp, x8_soft, x8_hard - x8_soft);
fp += (x8_hard - x8_soft);
//fprintf(stderr, "X8 start address = %016p\n", x_8_addr);
#endif
//uint32_t *x_8_t_addr = fp;
//memcpy(fp, neon_x8_t, neon_end - neon_x8_t);
//fp += (neon_end - neon_x8_t) / 4;
insns_t *x_4_addr = fp;
#ifdef __arm__
#ifdef HAVE_NEON
memcpy(fp, neon_x4, neon_x8 - neon_x4);
if(sign < 0) {
fp[26] ^= 0x00200000; fp[28] ^= 0x00200000; fp[31] ^= 0x00200000; fp[32] ^= 0x00200000;
}
fp += (neon_x8 - neon_x4) / 4;
#else
memcpy(fp, vfp_x4, vfp_x8 - vfp_x4);
if(sign > 0) {
fp[36] ^= 0x00000040;
fp[38] ^= 0x00000040;
fp[43] ^= 0x00000040;
fp[44] ^= 0x00000040;
}
fp += (vfp_x8 - vfp_x4) / 4;
#endif
#else
align_mem16(&fp, 0);
x_4_addr = fp;
memcpy(fp, x4, x8_soft - x4);
fp += (x8_soft - x4);
#endif
insns_t *start = fp;
#ifdef __arm__
*fp = PUSH_LR(); fp++;
*fp = 0xed2d8b10; fp++;
ADDI(&fp, 3, 1, 0);
ADDI(&fp, 7, 1, N);
ADDI(&fp, 5, 1, 2*N);
ADDI(&fp, 10, 7, 2*N);
ADDI(&fp, 4, 5, 2*N);
ADDI(&fp, 8, 10, 2*N);
ADDI(&fp, 6, 4, 2*N);
ADDI(&fp, 9, 8, 2*N);
*fp = LDRI(12, 0, ((uint32_t)&p->offsets) - ((uint32_t)p)); fp++; // load offsets into r12
// *fp++ = LDRI(1, 0, 4); // load ws into r1
ADDI(&fp, 1, 0, 0);
ADDI(&fp, 0, 2, 0), // mov out into r0
#endif
#ifdef __arm__
*fp = LDRI(2, 1, ((uint32_t)&p->ee_ws) - ((uint32_t)p)); fp++;
#ifdef HAVE_NEON
MOVI(&fp, 11, p->i0);
#else
MOVI(&fp, 11, p->i0);
#endif
#else
align_mem16(&fp, 0);
start = fp;
*fp++ = 0x4c;
*fp++ = 0x8b;
*fp++ = 0x07;
uint32_t lp_cnt = p->i0 * 4;
MOVI(&fp, RCX, lp_cnt);
//LEA(&fp, R8, RDI, ((uint32_t)&p->offsets) - ((uint32_t)p));
#endif
//fp++;
#ifdef __arm__
#ifdef HAVE_NEON
memcpy(fp, neon_ee, neon_oo - neon_ee);
if(sign < 0) {
fp[33] ^= 0x00200000; fp[37] ^= 0x00200000; fp[38] ^= 0x00200000; fp[39] ^= 0x00200000;
fp[40] ^= 0x00200000; fp[41] ^= 0x00200000; fp[44] ^= 0x00200000; fp[45] ^= 0x00200000;
fp[46] ^= 0x00200000; fp[47] ^= 0x00200000; fp[48] ^= 0x00200000; fp[57] ^= 0x00200000;
}
fp += (neon_oo - neon_ee) / 4;
#else
memcpy(fp, vfp_e, vfp_o - vfp_e);
if(sign > 0) {
fp[64] ^= 0x00000040; fp[65] ^= 0x00000040; fp[68] ^= 0x00000040; fp[75] ^= 0x00000040;
fp[76] ^= 0x00000040; fp[79] ^= 0x00000040; fp[80] ^= 0x00000040; fp[83] ^= 0x00000040;
fp[84] ^= 0x00000040; fp[87] ^= 0x00000040; fp[91] ^= 0x00000040; fp[93] ^= 0x00000040;
}
fp += (vfp_o - vfp_e) / 4;
#endif
#else
//fprintf(stderr, "Body start address = %016p\n", start);
PUSH(&fp, RBP);
PUSH(&fp, RBX);
PUSH(&fp, R10);
PUSH(&fp, R11);
PUSH(&fp, R12);
PUSH(&fp, R13);
PUSH(&fp, R14);
PUSH(&fp, R15);
int i;
memcpy(fp, leaf_ee_init, leaf_ee - leaf_ee_init);
//fprintf(stderr, "Leaf ee init address = %016p\n", leaf_ee_init);
//fprintf(stderr, "Constants address = %016p\n", sse_constants);
//fprintf(stderr, "Constants address = %016p\n", p->constants);
//int32_t val = READ_IMM32(fp + 3);
//fprintf(stderr, "diff = 0x%x\n", ((uint32_t)&p->constants) - ((uint32_t)p));
//int64_t v2 = val + (int64_t)((void *)leaf_ee_init - (void *)fp );
//fprintf(stderr, "IMM = 0x%llx\n", v2);
//IMM32_NI(fp + 3, ((int64_t) READ_IMM32(fp + 3)) + ((void *)leaf_ee_init - (void *)fp ));
fp += (leaf_ee - leaf_ee_init);
//fprintf(stderr, "Leaf start address = %016p\n", fp);
align_mem16(&fp, 9);
memcpy(fp, leaf_ee, leaf_oo - leaf_ee);
uint32_t offsets[8] = {0, N, N/2, 3*N/2, N/4, 5*N/4, 7*N/4, 3*N/4};
uint32_t offsets_o[8] = {0, N, N/2, 3*N/2, 7*N/4, 3*N/4, N/4, 5*N/4};
uint32_t offsets_oe[8] = {7*N/4, 3*N/4, N/4, 5*N/4, 0, N, 3*N/2, N/2};
for(i=0;i<8;i++) IMM32_NI(fp + sse_leaf_ee_offsets[i], offsets[i]*4);
fp += (leaf_oo - leaf_ee);
if(__builtin_ctzl(N) & 1){
if(p->i1) {
lp_cnt += p->i1 * 4;
MOVI(&fp, RCX, lp_cnt);
align_mem16(&fp, 4);
memcpy(fp, leaf_oo, leaf_eo - leaf_oo);
for(i=0;i<8;i++) IMM32_NI(fp + sse_leaf_oo_offsets[i], offsets_o[i]*4);
fp += (leaf_eo - leaf_oo);
}
memcpy(fp, leaf_oe, leaf_end - leaf_oe);
lp_cnt += 4;
for(i=0;i<8;i++) IMM32_NI(fp + sse_leaf_oe_offsets[i], offsets_o[i]*4);
fp += (leaf_end - leaf_oe);
}else{
memcpy(fp, leaf_eo, leaf_oe - leaf_eo);
lp_cnt += 4;
for(i=0;i<8;i++) IMM32_NI(fp + sse_leaf_eo_offsets[i], offsets[i]*4);
fp += (leaf_oe - leaf_eo);
if(p->i1) {
lp_cnt += p->i1 * 4;
MOVI(&fp, RCX, lp_cnt);
align_mem16(&fp, 4);
memcpy(fp, leaf_oo, leaf_eo - leaf_oo);
for(i=0;i<8;i++) IMM32_NI(fp + sse_leaf_oo_offsets[i], offsets_o[i]*4);
fp += (leaf_eo - leaf_oo);
}
}
if(p->i1) {
lp_cnt += p->i1 * 4;
MOVI(&fp, RCX, lp_cnt);
align_mem16(&fp, 9);
memcpy(fp, leaf_ee, leaf_oo - leaf_ee);
for(i=0;i<8;i++) IMM32_NI(fp + sse_leaf_ee_offsets[i], offsets_oe[i]*4);
fp += (leaf_oo - leaf_ee);
}
//fprintf(stderr, "Body start address = %016p\n", fp);
//LEA(&fp, R8, RDI, ((uint32_t)&p->ws) - ((uint32_t)p));
memcpy(fp, x_init, x4 - x_init);
//IMM32_NI(fp + 3, ((int64_t)READ_IMM32(fp + 3)) + ((void *)x_init - (void *)fp ));
fp += (x4 - x_init);
int32_t pAddr = 0;
int32_t pN = 0;
int32_t pLUT = 0;
count = 2;
while(pps[0]) {
if(!pN) {
MOVI(&fp, RCX, pps[0] / 4);
}else{
if((pps[1]*4)-pAddr) ADDI(&fp, RDX, (pps[1] * 4)- pAddr);
if(pps[0] > leafN && pps[0] - pN) {
int diff = __builtin_ctzl(pps[0]) - __builtin_ctzl(pN);
*fp++ = 0xc1;
if(diff > 0) {
*fp++ = 0xe1;
*fp++ = (diff & 0xff);
}else{
*fp++ = 0xe9;
*fp++ = ((-diff) & 0xff);
}
}
}
if(p->ws_is[__builtin_ctzl(pps[0]/leafN)-1]*8 - pLUT)
ADDI(&fp, R8, p->ws_is[__builtin_ctzl(pps[0]/leafN)-1]*8 - pLUT);
if(pps[0] == 2*leafN) {
CALL(&fp, x_4_addr);
// }else if(!pps[2]){
// //uint32_t *x_8_t_addr = fp;
// memcpy(fp, neon_x8_t, neon_ee - neon_x8_t);
// fp += (neon_ee - neon_x8_t) / 4;
// //*fp++ = BL(fp+2, x_8_t_addr);
}else{
CALL(&fp, x_8_addr);
}
pAddr = pps[1] * 4;
if(pps[0] > leafN)
pN = pps[0];
pLUT = p->ws_is[__builtin_ctzl(pps[0]/leafN)-1]*8;//LUT_offset(pps[0], leafN);
// fprintf(stderr, "LUT offset for %d is %d\n", pN, pLUT);
count += 4;
pps += 2;
}
#endif
#ifdef __arm__
#ifdef HAVE_NEON
if(__builtin_ctzl(N) & 1){
ADDI(&fp, 2, 7, 0);
ADDI(&fp, 7, 9, 0);
ADDI(&fp, 9, 2, 0);
ADDI(&fp, 2, 8, 0);
ADDI(&fp, 8, 10, 0);
ADDI(&fp, 10, 2, 0);
if(p->i1) {
MOVI(&fp, 11, p->i1);
memcpy(fp, neon_oo, neon_eo - neon_oo);
if(sign < 0) {
fp[12] ^= 0x00200000; fp[13] ^= 0x00200000; fp[14] ^= 0x00200000; fp[15] ^= 0x00200000;
fp[27] ^= 0x00200000; fp[29] ^= 0x00200000; fp[30] ^= 0x00200000; fp[31] ^= 0x00200000;
fp[46] ^= 0x00200000; fp[47] ^= 0x00200000; fp[48] ^= 0x00200000; fp[57] ^= 0x00200000;
}
fp += (neon_eo - neon_oo) / 4;
}
*fp = LDRI(11, 1, ((uint32_t)&p->oe_ws) - ((uint32_t)p)); fp++;
memcpy(fp, neon_oe, neon_end - neon_oe);
if(sign < 0) {
fp[19] ^= 0x00200000; fp[20] ^= 0x00200000; fp[22] ^= 0x00200000; fp[23] ^= 0x00200000;
fp[37] ^= 0x00200000; fp[38] ^= 0x00200000; fp[40] ^= 0x00200000; fp[41] ^= 0x00200000;
fp[64] ^= 0x00200000; fp[65] ^= 0x00200000; fp[66] ^= 0x00200000; fp[67] ^= 0x00200000;
}
fp += (neon_end - neon_oe) / 4;
}else{
*fp = LDRI(11, 1, ((uint32_t)&p->eo_ws) - ((uint32_t)p)); fp++;
memcpy(fp, neon_eo, neon_oe - neon_eo);
if(sign < 0) {
fp[10] ^= 0x00200000; fp[11] ^= 0x00200000; fp[13] ^= 0x00200000; fp[14] ^= 0x00200000;
fp[31] ^= 0x00200000; fp[33] ^= 0x00200000; fp[34] ^= 0x00200000; fp[35] ^= 0x00200000;
fp[59] ^= 0x00200000; fp[60] ^= 0x00200000; fp[61] ^= 0x00200000; fp[62] ^= 0x00200000;
}
fp += (neon_oe - neon_eo) / 4;
ADDI(&fp, 2, 7, 0);
ADDI(&fp, 7, 9, 0);
ADDI(&fp, 9, 2, 0);
ADDI(&fp, 2, 8, 0);
ADDI(&fp, 8, 10, 0);
ADDI(&fp, 10, 2, 0);
if(p->i1) {
MOVI(&fp, 11, p->i1);
memcpy(fp, neon_oo, neon_eo - neon_oo);
if(sign < 0) {
fp[12] ^= 0x00200000; fp[13] ^= 0x00200000; fp[14] ^= 0x00200000; fp[15] ^= 0x00200000;
fp[27] ^= 0x00200000; fp[29] ^= 0x00200000; fp[30] ^= 0x00200000; fp[31] ^= 0x00200000;
fp[46] ^= 0x00200000; fp[47] ^= 0x00200000; fp[48] ^= 0x00200000; fp[57] ^= 0x00200000;
}
fp += (neon_eo - neon_oo) / 4;
}
}
if(p->i1) {
ADDI(&fp, 2, 3, 0);
ADDI(&fp, 3, 7, 0);
ADDI(&fp, 7, 2, 0);
ADDI(&fp, 2, 4, 0);
ADDI(&fp, 4, 8, 0);
ADDI(&fp, 8, 2, 0);
ADDI(&fp, 2, 5, 0);
ADDI(&fp, 5, 9, 0);
ADDI(&fp, 9, 2, 0);
ADDI(&fp, 2, 6, 0);
ADDI(&fp, 6, 10, 0);
ADDI(&fp, 10, 2, 0);
ADDI(&fp, 2, 9, 0);
ADDI(&fp, 9, 10, 0);
ADDI(&fp, 10, 2, 0);
*fp = LDRI(2, 1, ((uint32_t)&p->ee_ws) - ((uint32_t)p)); fp++;
MOVI(&fp, 11, p->i1);
memcpy(fp, neon_ee, neon_oo - neon_ee);
if(sign < 0) {
fp[33] ^= 0x00200000; fp[37] ^= 0x00200000; fp[38] ^= 0x00200000; fp[39] ^= 0x00200000;
fp[40] ^= 0x00200000; fp[41] ^= 0x00200000; fp[44] ^= 0x00200000; fp[45] ^= 0x00200000;
fp[46] ^= 0x00200000; fp[47] ^= 0x00200000; fp[48] ^= 0x00200000; fp[57] ^= 0x00200000;
}
fp += (neon_oo - neon_ee) / 4;
}
#else
ADDI(&fp, 2, 7, 0);
ADDI(&fp, 7, 9, 0);
ADDI(&fp, 9, 2, 0);
ADDI(&fp, 2, 8, 0);
ADDI(&fp, 8, 10, 0);
ADDI(&fp, 10, 2, 0);
MOVI(&fp, 11, (p->i1>0) ? p->i1 : 1);
memcpy(fp, vfp_o, vfp_x4 - vfp_o);
if(sign > 0) {
fp[22] ^= 0x00000040; fp[24] ^= 0x00000040; fp[25] ^= 0x00000040; fp[26] ^= 0x00000040;
fp[62] ^= 0x00000040; fp[64] ^= 0x00000040; fp[65] ^= 0x00000040; fp[66] ^= 0x00000040;
}
fp += (vfp_x4 - vfp_o) / 4;
ADDI(&fp, 2, 3, 0);
ADDI(&fp, 3, 7, 0);
ADDI(&fp, 7, 2, 0);
ADDI(&fp, 2, 4, 0);
ADDI(&fp, 4, 8, 0);
ADDI(&fp, 8, 2, 0);
ADDI(&fp, 2, 5, 0);
ADDI(&fp, 5, 9, 0);
ADDI(&fp, 9, 2, 0);
ADDI(&fp, 2, 6, 0);
ADDI(&fp, 6, 10, 0);
ADDI(&fp, 10, 2, 0);
ADDI(&fp, 2, 9, 0);
ADDI(&fp, 9, 10, 0);
ADDI(&fp, 10, 2, 0);
*fp = LDRI(2, 1, ((uint32_t)&p->ee_ws) - ((uint32_t)p)); fp++;
MOVI(&fp, 11, (p->i2>0) ? p->i2 : 1);
memcpy(fp, vfp_e, vfp_o - vfp_e);
if(sign > 0) {
fp[64] ^= 0x00000040; fp[65] ^= 0x00000040; fp[68] ^= 0x00000040; fp[75] ^= 0x00000040;
fp[76] ^= 0x00000040; fp[79] ^= 0x00000040; fp[80] ^= 0x00000040; fp[83] ^= 0x00000040;
fp[84] ^= 0x00000040; fp[87] ^= 0x00000040; fp[91] ^= 0x00000040; fp[93] ^= 0x00000040;
}
fp += (vfp_o - vfp_e) / 4;
#endif
*fp = LDRI(2, 1, ((uint32_t)&p->ws) - ((uint32_t)p)); fp++; // load offsets into r12
//ADDI(&fp, 2, 1, 0);
MOVI(&fp, 1, 0);
// args: r0 - out
// r1 - N
// r2 - ws
// ADDI(&fp, 3, 1, 0); // put N into r3 for counter
int32_t pAddr = 0;
int32_t pN = 0;
int32_t pLUT = 0;
count = 2;
while(pps[0]) {
// fprintf(stderr, "size %zu at %zu - diff %zu\n", pps[0], pps[1]*4, (pps[1]*4) - pAddr);
if(!pN) {
MOVI(&fp, 1, pps[0]);
}else{
if((pps[1]*4)-pAddr) ADDI(&fp, 0, 0, (pps[1] * 4)- pAddr);
if(pps[0] - pN) ADDI(&fp, 1, 1, pps[0] - pN);
}
if(p->ws_is[__builtin_ctzl(pps[0]/leafN)-1]*8 - pLUT)
ADDI(&fp, 2, 2, p->ws_is[__builtin_ctzl(pps[0]/leafN)-1]*8 - pLUT);
if(pps[0] == 2*leafN) {
*fp = BL(fp+2, x_4_addr); fp++;
}else if(!pps[2]){
//uint32_t *x_8_t_addr = fp;
#ifdef HAVE_NEON
memcpy(fp, neon_x8_t, neon_ee - neon_x8_t);
if(sign < 0) {
fp[31] ^= 0x00200000; fp[32] ^= 0x00200000; fp[33] ^= 0x00200000; fp[34] ^= 0x00200000;
fp[65] ^= 0x00200000; fp[66] ^= 0x00200000; fp[70] ^= 0x00200000; fp[74] ^= 0x00200000;
fp[97] ^= 0x00200000; fp[98] ^= 0x00200000; fp[102] ^= 0x00200000; fp[104] ^= 0x00200000;
}
fp += (neon_ee - neon_x8_t) / 4;
//*fp++ = BL(fp+2, x_8_t_addr);
#else
*fp = BL(fp+2, x_8_addr); fp++;
#endif
}else{
*fp = BL(fp+2, x_8_addr); fp++;
}
pAddr = pps[1] * 4;
pN = pps[0];
pLUT = p->ws_is[__builtin_ctzl(pps[0]/leafN)-1]*8;//LUT_offset(pps[0], leafN);
// fprintf(stderr, "LUT offset for %d is %d\n", pN, pLUT);
count += 4;
pps += 2;
}
*fp++ = 0xecbd8b10;
*fp++ = POP_LR(); count++;
#else
POP(&fp, R15);
POP(&fp, R14);
POP(&fp, R13);
POP(&fp, R12);
POP(&fp, R11);
POP(&fp, R10);
POP(&fp, RBX);
POP(&fp, RBP);
RET(&fp);
//uint8_t *pp = func;
//int counter = 0;
//do{
// printf("%02x ", *pp);
// if(counter++ % 16 == 15) printf("\n");
//} while(++pp < fp);
//printf("\n");
#endif
// *fp++ = B(14); count++;
//for(int i=0;i<(neon_x8 - neon_x4)/4;i++)
// fprintf(stderr, "%08x\n", x_4_addr[i]);
//fprintf(stderr, "\n");
//for(int i=0;i<count;i++)
free(ps);
if (mprotect(func, p->transform_size, PROT_READ | PROT_EXEC)) {
perror("Couldn't mprotect");
exit(1);
}
#ifdef __APPLE__
sys_icache_invalidate(func, p->transform_size);
#elif __ANDROID__
cacheflush((long)(func), (long)(func) + p->transform_size, 0);
#elif __linux__
#ifdef __GNUC__
__clear_cache((long)(func), (long)(func) + p->transform_size);
#endif
#endif
//fprintf(stderr, "size of transform %zu = %d\n", N, (fp-func)*4);
p->transform = (void *) (start);
}

@ -0,0 +1,49 @@
/*
This file is part of FFTS -- The Fastest Fourier Transform in the South
Copyright (c) 2012, Anthony M. Blake <amb@anthonix.com>
Copyright (c) 2012, The University of Waikato
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of the organization nor the
names of its contributors may be used to endorse or promote products
derived from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
DISCLAIMED. IN NO EVENT SHALL ANTHONY M. BLAKE BE LIABLE FOR ANY
DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef __CODEGEN_H__
#define __CODEGEN_H__
#include <stddef.h>
#include <stdio.h>
#include <stdlib.h>
#include <errno.h>
#include <sys/mman.h>
#include <string.h>
#include <limits.h> /* for PAGESIZE */
#include "ffts.h"
void ffts_generate_func_code(ffts_plan_t *, size_t N, size_t leafN, int sign);
#endif

@ -0,0 +1,101 @@
/*
This file is part of FFTS -- The Fastest Fourier Transform in the South
Copyright (c) 2012, Anthony M. Blake <amb@anthonix.com>
Copyright (c) 2012, The University of Waikato
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of the organization nor the
names of its contributors may be used to endorse or promote products
derived from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
DISCLAIMED. IN NO EVENT SHALL ANTHONY M. BLAKE BE LIABLE FOR ANY
DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef __CODEGEN_ARM_H__
#define __CODEGEN_ARM_H__
uint32_t BL(void *pos, void *target) {
return 0xeb000000 | (((target - pos) / 4) & 0xffffff);
}
uint32_t B(uint8_t r) {
return 0xe12fff10 | r;
}
uint32_t MOV(uint8_t dst, uint8_t src) {
return 0xe1a00000 | (src & 0xf) | ((dst & 0xf) << 12);
}
void ADDI(uint32_t **p, uint8_t dst, uint8_t src, int32_t imm) {
int32_t oimm = imm;
if(imm < 0) {
imm = -imm;
uint32_t shamt = (__builtin_ctzl(imm)>23)?23:__builtin_ctzl(imm);
if(shamt & 1) shamt -= 1;
imm >>= shamt;
shamt = (32 - shamt)/2;
// if(imm > 255) fprintf(stderr, "imm>255: %d\n", oimm);
*(*p)++ = 0xe2400000 | ((src & 0xf) << 16) | ((dst & 0xf) << 12) | ((shamt & 0xf) << 8) | (imm & 0xff);
if(imm > 255) ADDI(p, dst, src, (oimm + ((imm & 0xff) << (32-shamt*2))));
}else{
uint32_t shamt = (__builtin_ctzl(imm)>23)?23:__builtin_ctzl(imm);
if(shamt & 1) shamt -= 1;
imm >>= shamt;
shamt = (32 - shamt)/2;
// if(imm > 255) fprintf(stderr, "imm>255: %d\n", oimm);
*(*p)++ = 0xe2800000 | ((src & 0xf) << 16) | ((dst & 0xf) << 12) | ((shamt & 0xf) << 8) | (imm & 0xff);
if(imm > 255) ADDI(p, dst, src, (oimm - ((imm & 0xff) << (32-shamt*2))));
}
}
uint32_t LDRI(uint8_t dst, uint8_t base, uint32_t offset) {
return 0xe5900000 | ((dst & 0xf) << 12)
| ((base & 0xf) << 16) | (offset & 0xfff) ;
}
void MOVI(uint32_t **p, uint8_t dst, uint32_t imm) {
uint32_t oimm = imm;
uint32_t shamt = (__builtin_ctzl(imm)>23)?23:__builtin_ctzl(imm);
if(shamt & 1) shamt -= 1;
imm >>= shamt;
shamt = (32 - shamt)/2;
*(*p)++ = 0xe3a00000 | ((dst & 0xf) << 12) | ((shamt & 0xf) << 8) | (imm & 0xff) ;
if(imm > 255) ADDI(p, dst, dst, (oimm - ((imm & 0xff) << (32-shamt*2))));
}
uint32_t PUSH_LR() { return 0xe92d4ff0; } //0xe92d4000; }
uint32_t POP_LR() { return 0xe8bd8ff0; } //0xe8bd8000; }
#endif

@ -0,0 +1,195 @@
/*
This file is part of FFTS -- The Fastest Fourier Transform in the South
Copyright (c) 2012, Anthony M. Blake <amb@anthonix.com>
Copyright (c) 2012, The University of Waikato
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of the organization nor the
names of its contributors may be used to endorse or promote products
derived from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
DISCLAIMED. IN NO EVENT SHALL ANTHONY M. BLAKE BE LIABLE FOR ANY
DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef __CODEGEN_SSE_H__
#define __CODEGEN_SSE_H__
void neon_x4(float *, size_t, float *);
void neon_x8(float *, size_t, float *);
void neon_x8_t(float *, size_t, float *);
void leaf_ee_init();
void leaf_ee();
void leaf_oo();
void leaf_eo();
void leaf_oe();
void leaf_end();
void x_init();
void x4();
void x8_soft();
void x8_hard();
void sse_constants();
void sse_constants_inv();
// typedef uint8_t insns_t;
extern const uint32_t sse_leaf_ee_offsets[8];
extern const uint32_t sse_leaf_oo_offsets[8];
extern const uint32_t sse_leaf_eo_offsets[8];
extern const uint32_t sse_leaf_oe_offsets[8];
#define EAX 0
#define ECX 1
#define EDX 2
#define EBX 3
#define ESI 6
#define EDI 7
#define EBP 5
#define RAX 0
#define RCX 1
#define RDX 2
#define RBX 3
#define RSI 6
#define RDI 7
#define RBP 5
#define R8 8
#define R9 9
#define R10 10
#define R11 11
#define R12 12
#define R13 13
#define R14 14
#define R15 15
void IMM8(uint8_t **p, int32_t imm) {
*(*p)++ = (imm & 0xff);
}
void IMM16(uint8_t **p, int32_t imm) {
int i;
for(i=0;i<2;i++) {
*(*p)++ = (imm & (0xff << (i*8))) >> (i*8);
}
}
void IMM32(uint8_t **p, int32_t imm) {
int i;
for(i=0;i<4;i++) {
*(*p)++ = (imm & (0xff << (i*8))) >> (i*8);
}
}
void IMM32_NI(uint8_t *p, int32_t imm) {
int i;
for(i=0;i<4;i++) {
*(p+i) = (imm & (0xff << (i*8))) >> (i*8);
}
}
int32_t READ_IMM32(uint8_t *p) {
int32_t rval = 0;
int i;
for(i=0;i<4;i++) {
rval |= *(p+i) << (i*8);
}
return rval;
}
void MOVI(uint8_t **p, uint8_t dst, uint32_t imm) {
// if(imm < 65536) *(*p)++ = 0x66;
if(dst >= 8) *(*p)++ = 0x41;
//if(imm < 65536 && imm >= 256) *(*p)++ = 0x66;
//if(imm >= 256)
*(*p)++ = 0xb8 | (dst & 0x7);
// else *(*p)++ = 0xb0 | (dst & 0x7);
// if(imm < 256) IMM8(p, imm);
// else
//if(imm < 65536) IMM16(p, imm);
//else
IMM32(p, imm);
//if(dst < 8) {
// *(*p)++ = 0xb8 + dst;
//}else{
// *(*p)++ = 0x49;
// *(*p)++ = 0xc7;
// *(*p)++ = 0xc0 | (dst - 8);
//}
//IMM32(p, imm);
}
void ADDRMODE(uint8_t **p, uint8_t reg, uint8_t rm, int32_t disp) {
if(disp == 0) {
*(*p)++ = (rm & 7) | ((reg & 7) << 3);
}else if(disp <= 127 || disp >= -128) {
*(*p)++ = 0x40 | (rm & 7) | ((reg & 7) << 3);
IMM8(p, disp);
}else{
*(*p)++ = 0x80 | (rm & 7) | ((reg & 7) << 3);
IMM32(p, disp);
}
}
void LEA(uint8_t **p, uint8_t dst, uint8_t base, int32_t disp) {
*(*p)++ = 0x48 | ((base & 0x8) >> 3) | ((dst & 0x8) >> 1);
*(*p)++ = 0x8d;
ADDRMODE(p, dst, base, disp);
}
void RET(uint8_t **p) {
*(*p)++ = 0xc3;
}
void ADDI(uint8_t **p, uint8_t dst, int32_t imm) {
if(dst >= 8) *(*p)++ = 0x49;
else *(*p)++ = 0x48;
if(imm > 127 || imm <= -128) *(*p)++ = 0x81;
else *(*p)++ = 0x83;
*(*p)++ = 0xc0 | (dst & 0x7);
if(imm > 127 || imm <= -128) IMM32(p, imm);
else IMM8(p, imm);
}
void CALL(uint8_t **p, uint8_t *func) {
*(*p)++ = 0xe8;
IMM32(p, ((void *)func) - (void *)(*p) - 4);
}
void PUSH(uint8_t **p, uint8_t reg) {
if(reg >= 8) *(*p)++ = 0x41;
*(*p)++ = 0x50 | (reg & 7);
}
void POP(uint8_t **p, uint8_t reg) {
if(reg >= 8) *(*p)++ = 0x41;
*(*p)++ = 0x58 | (reg & 7);
}
#endif

@ -0,0 +1,398 @@
/*
This file is part of FFTS -- The Fastest Fourier Transform in the South
Copyright (c) 2012, Anthony M. Blake <amb@anthonix.com>
Copyright (c) 2012, The University of Waikato
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of the organization nor the
names of its contributors may be used to endorse or promote products
derived from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
DISCLAIMED. IN NO EVENT SHALL ANTHONY M. BLAKE BE LIABLE FOR ANY
DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#include "ffts.h"
#include "macros.h"
//#include "mini_macros.h"
#include "patterns.h"
#include "ffts_small.h"
#ifdef DYNAMIC_DISABLED
#include "ffts_static.h"
#else
#include "codegen.h"
#endif
#include <errno.h>
#include <sys/mman.h>
#include <string.h>
#include <limits.h> /* for PAGESIZE */
#if __APPLE__
#include <libkern/OSCacheControl.h>
#else
#endif
void ffts_execute(ffts_plan_t *p, const void * in, void * out) {
p->transform(p, (const float *)in, (float *)out);
}
void ffts_free(ffts_plan_t *p) {
p->destroy(p);
}
void ffts_free_1d(ffts_plan_t *p) {
size_t i;
if(p->ws) {
FFTS_FREE(p->ws);
}
if(p->is) free(p->is);
if(p->ws_is) free(p->ws_is);
if(p->offsets) free(p->offsets);
//free(p->transforms);
if(p->transforms) free(p->transforms);
if(p->transform_base) {
if (mprotect(p->transform_base, p->transform_size, PROT_READ | PROT_WRITE)) {
perror("Couldn't mprotect");
exit(errno);
}
munmap(p->transform_base, p->transform_size);
//free(p->transform_base);
}
free(p);
}
ffts_plan_t *ffts_init_1d(size_t N, int sign) {
ffts_plan_t *p = malloc(sizeof(ffts_plan_t));
size_t leafN = 8;
size_t i;
#ifdef __arm__
//#ifdef HAVE_NEON
V MULI_SIGN;
if(sign < 0) MULI_SIGN = VLIT4(-0.0f, 0.0f, -0.0f, 0.0f);
else MULI_SIGN = VLIT4(0.0f, -0.0f, 0.0f, -0.0f);
//#endif
#else
V MULI_SIGN;
if(sign < 0) MULI_SIGN = VLIT4(-0.0f, 0.0f, -0.0f, 0.0f);
else MULI_SIGN = VLIT4(0.0f, -0.0f, 0.0f, -0.0f);
#endif
p->transform = NULL;
p->transform_base = NULL;
p->transforms = NULL;
p->is = NULL;
p->ws_is = NULL;
p->ws = NULL;
p->offsets = NULL;
p->destroy = ffts_free_1d;
if(N >= 32) {
ffts_init_offsets(p, N, leafN);
#ifdef __arm__
#ifdef HAVE_NEON
ffts_init_is(p, N, leafN, 1);
#else
ffts_init_is(p, N, leafN, 1);
#endif
#else
ffts_init_is(p, N, leafN, 1);
#endif
p->i0 = N/leafN/3+1;
p->i1 = N/leafN/3;
if((N/leafN) % 3 > 1) p->i1++;
p->i2 = N/leafN/3;
#ifdef __arm__
#ifdef HAVE_NEON
p->i0/=2;
p->i1/=2;
#endif
#else
p->i0/=2;
p->i1/=2;
#endif
}else{
p->transforms = malloc(2 * sizeof(transform_index_t));
p->transforms[0] = 0;
p->transforms[1] = 1;
if(N == 2) p->transform = &firstpass_2;
else if(N == 4 && sign == -1) p->transform = &firstpass_4_f;
else if(N == 4 && sign == 1) p->transform = &firstpass_4_b;
else if(N == 8 && sign == -1) p->transform = &firstpass_8_f;
else if(N == 8 && sign == 1) p->transform = &firstpass_8_b;
else if(N == 16 && sign == -1) p->transform = &firstpass_16_f;
else if(N == 16 && sign == 1) p->transform = &firstpass_16_b;
p->is = NULL;
p->offsets = NULL;
}
int hardcoded = 0;
/* LUTS */
size_t n_luts = __builtin_ctzl(N/leafN);
if(N < 32) { n_luts = __builtin_ctzl(N/4); hardcoded = 1; }
if(n_luts >= 32) n_luts = 0;
// fprintf(stderr, "n_luts = %zu\n", n_luts);
cdata_t *w;
int n = leafN*2;
if(hardcoded) n = 8;
size_t lut_size = 0;
for(i=0;i<n_luts;i++) {
if(!i || hardcoded) {
#ifdef __arm__
if(N <= 32) lut_size += n/4 * 2 * sizeof(cdata_t);
else lut_size += n/4 * sizeof(cdata_t);
#else
lut_size += n/4 * 2 * sizeof(cdata_t);
#endif
n *= 2;
} else {
#ifdef __arm__
lut_size += n/8 * 3 * sizeof(cdata_t);
#else
lut_size += n/8 * 3 * 2 * sizeof(cdata_t);
#endif
}
n *= 2;
}
// lut_size *= 16;
// fprintf(stderr, "lut size = %zu\n", lut_size);
if(n_luts) {
p->ws = FFTS_MALLOC(lut_size,32);
p->ws_is = malloc(n_luts * sizeof(size_t));
}else{
p->ws = NULL;
p->ws_is = NULL;
}
w = p->ws;
n = leafN*2;
if(hardcoded) n = 8;
#ifdef HAVE_NEON
V neg = (sign < 0) ? VLIT4(0.0f, 0.0f, 0.0f, 0.0f) : VLIT4(-0.0f, -0.0f, -0.0f, -0.0f);
#endif
for(i=0;i<n_luts;i++) {
p->ws_is[i] = w - (cdata_t *)p->ws;
//fprintf(stderr, "LUT[%zu] = %d @ %08x - %zu\n", i, n, w, p->ws_is[i]);
if(!i || hardcoded) {
cdata_t *w0 = FFTS_MALLOC(n/4 * sizeof(cdata_t), 32);
size_t j;
for(j=0;j<n/4;j++) {
w0[j][0] = W_re(n,j);
w0[j][1] = W_im(n,j);
}
float *fw0 = (float *)w0;
#ifdef __arm__
if(N < 32) {
//w = FFTS_MALLOC(n/4 * 2 * sizeof(cdata_t), 32);
float *fw = (float *)w;
V temp0, temp1, temp2;
for(j=0;j<n/4;j+=2) {
// #ifdef HAVE_NEON
temp0 = VLD(fw0 + j*2);
V re, im;
re = VDUPRE(temp0);
im = VDUPIM(temp0);
#ifdef HAVE_NEON
im = VXOR(im, MULI_SIGN);
//im = IMULI(sign>0, im);
#else
im = MULI(sign>0, im);
#endif
VST(fw + j*4 , re);
VST(fw + j*4+4, im);
// #endif
}
w += n/4 * 2;
}else{
//w = FFTS_MALLOC(n/4 * sizeof(cdata_t), 32);
float *fw = (float *)w;
#ifdef HAVE_NEON
VS temp0, temp1, temp2;
for(j=0;j<n/4;j+=4) {
temp0 = VLD2(fw0 + j*2);
temp0.val[1] = VXOR(temp0.val[1], neg);
STORESPR(fw + j*2, temp0);
}
#else
for(j=0;j<n/4;j+=1) {
fw[j*2] = fw0[j*2];
fw[j*2+1] = (sign < 0) ? fw0[j*2+1] : -fw0[j*2+1];
}
#endif
w += n/4;
}
#else
//w = FFTS_MALLOC(n/4 * 2 * sizeof(cdata_t), 32);
float *fw = (float *)w;
V temp0, temp1, temp2;
for(j=0;j<n/4;j+=2) {
temp0 = VLD(fw0 + j*2);
V re, im;
re = VDUPRE(temp0);
im = VDUPIM(temp0);
im = VXOR(im, MULI_SIGN);
VST(fw + j*4 , re);
VST(fw + j*4+4, im);
}
w += n/4 * 2;
#endif
FFTS_FREE(w0);
}else{
cdata_t *w0 = FFTS_MALLOC(n/8 * sizeof(cdata_t), 32);
cdata_t *w1 = FFTS_MALLOC(n/8 * sizeof(cdata_t), 32);
cdata_t *w2 = FFTS_MALLOC(n/8 * sizeof(cdata_t), 32);
size_t j;
for(j=0;j<n/8;j++) {
w0[j][0] = W_re(n,j*2);
w0[j][1] = W_im(n,j*2);
w1[j][0] = W_re(n,j);
w1[j][1] = W_im(n,j);
w2[j][0] = W_re(n,j + (n/8));
w2[j][1] = W_im(n,j + (n/8));
}
float *fw0 = (float *)w0;
float *fw1 = (float *)w1;
float *fw2 = (float *)w2;
#ifdef __arm__
//w = FFTS_MALLOC(n/8 * 3 * sizeof(cdata_t), 32);
float *fw = (float *)w;
#ifdef HAVE_NEON
VS temp0, temp1, temp2;
for(j=0;j<n/8;j+=4) {
temp0 = VLD2(fw0 + j*2);
temp0.val[1] = VXOR(temp0.val[1], neg);
STORESPR(fw + j*2*3, temp0);
temp1 = VLD2(fw1 + j*2);
temp1.val[1] = VXOR(temp1.val[1], neg);
STORESPR(fw + j*2*3 + 8, temp1);
temp2 = VLD2(fw2 + j*2);
temp2.val[1] = VXOR(temp2.val[1], neg);
STORESPR(fw + j*2*3 + 16, temp2);
}
#else
for(j=0;j<n/8;j+=1) {
fw[j*6] = fw0[j*2];
fw[j*6+1] = (sign < 0) ? fw0[j*2+1] : -fw0[j*2+1];
fw[j*6+2] = fw1[j*2+0];
fw[j*6+3] = (sign < 0) ? fw1[j*2+1] : -fw1[j*2+1];
fw[j*6+4] = fw2[j*2+0];
fw[j*6+5] = (sign < 0) ? fw2[j*2+1] : -fw2[j*2+1];
}
#endif
w += n/8 * 3;
#else
//w = FFTS_MALLOC(n/8 * 3 * 2 * sizeof(cdata_t), 32);
float *fw = (float *)w;
V temp0, temp1, temp2, re, im;
for(j=0;j<n/8;j+=2) {
temp0 = VLD(fw0 + j*2);
re = VDUPRE(temp0);
im = VDUPIM(temp0);
im = VXOR(im, MULI_SIGN);
VST(fw + j*2*6 , re);
VST(fw + j*2*6+4, im);
temp1 = VLD(fw1 + j*2);
re = VDUPRE(temp1);
im = VDUPIM(temp1);
im = VXOR(im, MULI_SIGN);
VST(fw + j*2*6+8 , re);
VST(fw + j*2*6+12, im);
temp2 = VLD(fw2 + j*2);
re = VDUPRE(temp2);
im = VDUPIM(temp2);
im = VXOR(im, MULI_SIGN);
VST(fw + j*2*6+16, re);
VST(fw + j*2*6+20, im);
}
w += n/8 * 3 * 2;
#endif
FFTS_FREE(w0);
FFTS_FREE(w1);
FFTS_FREE(w2);
}
///p->ws[i] = w;
n *= 2;
}
float *tmp = (float *)p->ws;
if(sign < 0) {
p->oe_ws = (void *)(&w_data[4]);
p->ee_ws = (void *)(w_data);
p->eo_ws = (void *)(&w_data[4]);
}else{
p->oe_ws = (void *)(w_data + 12);
p->ee_ws = (void *)(w_data + 8);
p->eo_ws = (void *)(w_data + 12);
}
p->N = N;
p->lastlut = w;
p->n_luts = n_luts;
#ifdef DYNAMIC_DISABLED
if(sign < 0) {
if(N >= 32) p->transform = ffts_static_transform_f;
}else{
if(N >= 32) p->transform = ffts_static_transform_i;
}
#else
if(N>=32) ffts_generate_func_code(p, N, leafN, sign);
#endif
return p;
}

@ -0,0 +1,177 @@
/*
This file is part of FFTS -- The Fastest Fourier Transform in the South
Copyright (c) 2012, Anthony M. Blake <amb@anthonix.com>
Copyright (c) 2012, The University of Waikato
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of the organization nor the
names of its contributors may be used to endorse or promote products
derived from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
DISCLAIMED. IN NO EVENT SHALL ANTHONY M. BLAKE BE LIABLE FOR ANY
DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef __CP_SSE_H__
#define __CP_SSE_H__
#include "config.h"
#include <stdio.h>
#include <stdlib.h>
#include <math.h>
#include <stddef.h>
#include <stdint.h>
//#include <stdalign.h>
//#include "codegen.h"
#include "types.h"
#define PI 3.1415926535897932384626433832795028841971693993751058209
static const __attribute__ ((aligned(64))) float w_data[16] = {
0.70710678118654757273731092936941, 0.70710678118654746171500846685376,
-0.70710678118654757273731092936941, -0.70710678118654746171500846685376,
1.0f, 0.70710678118654757273731092936941f,
-0.0f, -0.70710678118654746171500846685376,
0.70710678118654757273731092936941, 0.70710678118654746171500846685376,
0.70710678118654757273731092936941, 0.70710678118654746171500846685376,
1.0f, 0.70710678118654757273731092936941f,
0.0f, 0.70710678118654746171500846685376
};
__INLINE float W_re(float N, float k) { return cos(-2.0f * PI * k / N); }
__INLINE float W_im(float N, float k) { return sin(-2.0f * PI * k / N); }
typedef size_t transform_index_t;
//typedef void (*transform_func_t)(float *data, size_t N, float *LUT);
typedef void (*transform_func_t)(float *data, size_t N, float *LUT);
typedef struct _ffts_plan_t ffts_plan_t;
/**
* Contains all the Information need to perform FFT
*
*
* DO NOT CHANGE THE ORDER OF MEMBERS
* ASSEMBLY CODE USES HARD CODED OFFSETS TO REFERENCE
* SOME OF THESE VARIABES!!
*/
struct _ffts_plan_t {
/**
*
*/
ptrdiff_t *offsets;
#ifdef DYNAMIC_DISABLED
/**
* Twiddle factors
*/
void *ws;
/**
* ee - 2 size x size8
* oo - 2 x size4 in parallel
* oe -
*/
void *oe_ws, *eo_ws, *ee_ws;
#else
void __attribute__((aligned(32))) *ws;
void __attribute__((aligned(32))) *oe_ws, *eo_ws, *ee_ws;
#endif
/**
* Pointer into an array of precomputed indexes for the input data array
*/
ptrdiff_t *is;
/**
* Twiddle Factor Indexes
*/
size_t *ws_is;
/**
* Size of the loops for the base cases
*/
size_t i0, i1, n_luts;
/**
* Size fo the Transform
*/
size_t N;
void *lastlut;
/**
* Used in multidimensional Code ??
*/
transform_index_t *transforms;
//transform_func_t transform;
/**
* Pointer to the dynamically generated function
* that will execute the FFT
*/
void (*transform)(ffts_plan_t * , const void * , void * );
/**
* Pointer to the base memory address of
* of the transform function
*/
void *transform_base;
/**
* Size of the memory block contain the
* generated code
*/
size_t transform_size;
/**
* Points to the cosnant variables used by
* the Assembly Code
*/
void *constants;
// multi-dimensional stuff:
struct _ffts_plan_t **plans;
int rank;
size_t *Ns, *Ms;
void *buf;
void *transpose_buf;
/**
* Pointer to the destroy function
* to clean up the plan after use
* (differs for real and multi dimension transforms
*/
void (*destroy)(ffts_plan_t *);
/**
* Coefficiants for the real valued transforms
*/
float *A, *B;
size_t i2;
};
void ffts_free(ffts_plan_t *);
ffts_plan_t *ffts_init_1d(size_t N, int sign);
void ffts_execute(ffts_plan_t *, const void *, void *);
#endif

@ -0,0 +1,282 @@
/*
This file is part of FFTS -- The Fastest Fourier Transform in the South
Copyright (c) 2012, Anthony M. Blake <amb@anthonix.com>
Copyright (c) 2012, The University of Waikato
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of the organization nor the
names of its contributors may be used to endorse or promote products
derived from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
DISCLAIMED. IN NO EVENT SHALL ANTHONY M. BLAKE BE LIABLE FOR ANY
DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#include "ffts_nd.h"
#ifdef HAVE_NEON
#include "neon.h"
#endif
void ffts_free_nd(ffts_plan_t *p) {
int i;
for(i=0;i<p->rank;i++) {
ffts_plan_t *x = p->plans[i];
int k;
for(k=0;k<i;k++) {
if(p->Ms[i] == p->Ms[k]) x = NULL;
}
if(x) ffts_free(x);
}
free(p->Ns);
free(p->Ms);
free(p->plans);
free(p->buf);
free(p->transpose_buf);
free(p);
}
#define TSIZE 8
#include <string.h>
void ffts_transpose(uint64_t *in, uint64_t *out, int w, int h, uint64_t *buf) {
#ifdef HAVE_NEON
size_t i,j,k;
int linebytes = w*8;
for(j=0;j<h;j+=8) {
for(i=0;i<w;i+=8) {
neon_transpose_to_buf(in + j*w + i, buf, w);
uint64_t *p = out + i*h + j;
uint64_t *pbuf = buf;
uint64_t *ptemp;
__asm__ __volatile__(
"mov %[ptemp], %[p]\n\t"
"add %[p], %[p], %[w], lsl #3\n\t"
"vld1.32 {q8,q9}, [%[pbuf], :128]!\n\t"
"vld1.32 {q10,q11}, [%[pbuf], :128]!\n\t"
"vld1.32 {q12,q13}, [%[pbuf], :128]!\n\t"
"vld1.32 {q14,q15}, [%[pbuf], :128]!\n\t"
"vst1.32 {q8,q9}, [%[ptemp], :128]!\n\t"
"vst1.32 {q10,q11}, [%[ptemp], :128]!\n\t"
"mov %[ptemp], %[p]\n\t"
"add %[p], %[p], %[w], lsl #3\n\t"
"vst1.32 {q12,q13}, [%[ptemp], :128]!\n\t"
"vst1.32 {q14,q15}, [%[ptemp], :128]!\n\t"
"mov %[ptemp], %[p]\n\t"
"add %[p], %[p], %[w], lsl #3\n\t"
"vld1.32 {q8,q9}, [%[pbuf], :128]!\n\t"
"vld1.32 {q10,q11}, [%[pbuf], :128]!\n\t"
"vld1.32 {q12,q13}, [%[pbuf], :128]!\n\t"
"vld1.32 {q14,q15}, [%[pbuf], :128]!\n\t"
"vst1.32 {q8,q9}, [%[ptemp], :128]!\n\t"
"vst1.32 {q10,q11}, [%[ptemp], :128]!\n\t"
"mov %[ptemp], %[p]\n\t"
"add %[p], %[p], %[w], lsl #3\n\t"
"vst1.32 {q12,q13}, [%[ptemp], :128]!\n\t"
"vst1.32 {q14,q15}, [%[ptemp], :128]!\n\t"
"mov %[ptemp], %[p]\n\t"
"add %[p], %[p], %[w], lsl #3\n\t"
"vld1.32 {q8,q9}, [%[pbuf], :128]!\n\t"
"vld1.32 {q10,q11}, [%[pbuf], :128]!\n\t"
"vld1.32 {q12,q13}, [%[pbuf], :128]!\n\t"
"vld1.32 {q14,q15}, [%[pbuf], :128]!\n\t"
"vst1.32 {q8,q9}, [%[ptemp], :128]!\n\t"
"vst1.32 {q10,q11}, [%[ptemp], :128]!\n\t"
"mov %[ptemp], %[p]\n\t"
"add %[p], %[p], %[w], lsl #3\n\t"
"vst1.32 {q12,q13}, [%[ptemp], :128]!\n\t"
"vst1.32 {q14,q15}, [%[ptemp], :128]!\n\t"
"mov %[ptemp], %[p]\n\t"
"add %[p], %[p], %[w], lsl #3\n\t"
"vld1.32 {q8,q9}, [%[pbuf], :128]!\n\t"
"vld1.32 {q10,q11}, [%[pbuf], :128]!\n\t"
"vld1.32 {q12,q13}, [%[pbuf], :128]!\n\t"
"vld1.32 {q14,q15}, [%[pbuf], :128]!\n\t"
"vst1.32 {q8,q9}, [%[ptemp], :128]!\n\t"
"vst1.32 {q10,q11}, [%[ptemp], :128]!\n\t"
"mov %[ptemp], %[p]\n\t"
"vst1.32 {q12,q13}, [%[ptemp], :128]!\n\t"
"vst1.32 {q14,q15}, [%[ptemp], :128]!\n\t"
: [p] "+r" (p), [pbuf] "+r" (pbuf), [ptemp] "+r" (ptemp)
: [w] "r" (w)
: "memory", "q8", "q9", "q10", "q11"
);
// out[i*h + j] = in[j*w + i];
}
}
#else
#ifdef HAVE_SSE
uint64_t tmp[TSIZE*TSIZE] __attribute__((aligned(64)));
int tx, ty;
int x, y;
int tw = w / TSIZE;
int th = h / TSIZE;
for (ty=0;ty<th;ty++) {
for (tx=0;tx<tw;tx++) {
uint64_t *ip0 = in + w*TSIZE*ty + tx * TSIZE;
uint64_t *op0 = tmp;//out + h*TSIZE*tx + ty*TSIZE;
// Copy/transpose to tmp
for (y=0;y<TSIZE;y+=2) {
//for (x=0;x<TSIZE;x+=2) {
//op[x*TSIZE] = ip[x];
__m128d q0 = _mm_load_pd((double *)(ip0 + 0*w));
__m128d q1 = _mm_load_pd((double *)(ip0 + 1*w));
__m128d q2 = _mm_load_pd((double *)(ip0 + 2*w));
__m128d q3 = _mm_load_pd((double *)(ip0 + 3*w));
__m128d q4 = _mm_load_pd((double *)(ip0 + 4*w));
__m128d q5 = _mm_load_pd((double *)(ip0 + 5*w));
__m128d q6 = _mm_load_pd((double *)(ip0 + 6*w));
__m128d q7 = _mm_load_pd((double *)(ip0 + 7*w));
ip0 += 2;
__m128d t0 = _mm_shuffle_pd(q0, q1, _MM_SHUFFLE2(0, 0));
__m128d t1 = _mm_shuffle_pd(q0, q1, _MM_SHUFFLE2(1, 1));
__m128d t2 = _mm_shuffle_pd(q2, q3, _MM_SHUFFLE2(0, 0));
__m128d t3 = _mm_shuffle_pd(q2, q3, _MM_SHUFFLE2(1, 1));
__m128d t4 = _mm_shuffle_pd(q4, q5, _MM_SHUFFLE2(0, 0));
__m128d t5 = _mm_shuffle_pd(q4, q5, _MM_SHUFFLE2(1, 1));
__m128d t6 = _mm_shuffle_pd(q6, q7, _MM_SHUFFLE2(0, 0));
__m128d t7 = _mm_shuffle_pd(q6, q7, _MM_SHUFFLE2(1, 1));
//_mm_store_pd((double *)(op0 + y*h + x), t0);
//_mm_store_pd((double *)(op0 + y*h + x + h), t1);
_mm_store_pd((double *)(op0 + 0), t0);
_mm_store_pd((double *)(op0 + 0 + TSIZE), t1);
_mm_store_pd((double *)(op0 + 2 ), t2);
_mm_store_pd((double *)(op0 + 2 + TSIZE), t3);
_mm_store_pd((double *)(op0 + 4 ), t4);
_mm_store_pd((double *)(op0 + 4 + TSIZE), t5);
_mm_store_pd((double *)(op0 + 6 ), t6);
_mm_store_pd((double *)(op0 + 6 + TSIZE), t7);
//}
op0 += 2*TSIZE;
}
op0 = out + h*tx*TSIZE + ty*TSIZE;
ip0 = tmp;
for (y=0;y<TSIZE;y+=1) {
// memcpy(op0, ip0, TSIZE * sizeof(*ip0));
__m128d q0 = _mm_load_pd((double *)(ip0 + 0));
__m128d q1 = _mm_load_pd((double *)(ip0 + 2));
__m128d q2 = _mm_load_pd((double *)(ip0 + 4));
__m128d q3 = _mm_load_pd((double *)(ip0 + 6));
_mm_store_pd((double *)(op0 + 0), q0);
_mm_store_pd((double *)(op0 + 2), q1);
_mm_store_pd((double *)(op0 + 4), q2);
_mm_store_pd((double *)(op0 + 6), q3);
op0 += h;
ip0 += TSIZE;
}
}
}
/*
size_t i,j;
for(i=0;i<w;i+=2) {
for(j=0;j<h;j+=2) {
// out[i*h + j] = in[j*w + i];
__m128d q0 = _mm_load_pd((double *)(in + j*w + i));
__m128d q1 = _mm_load_pd((double *)(in + j*w + i + w));
__m128d t0 = _mm_shuffle_pd(q0, q1, _MM_SHUFFLE2(0, 0));
__m128d t1 = _mm_shuffle_pd(q0, q1, _MM_SHUFFLE2(1, 1));
_mm_store_pd((double *)(out + i*h + j), t0);
_mm_store_pd((double *)(out + i*h + j + h), t1);
}
}
*/
#endif
#endif
}
void ffts_execute_nd(ffts_plan_t *p, const void * in, void * out) {
uint64_t *din = (uint64_t *)in;
uint64_t *buf = p->buf;
uint64_t *dout = (uint64_t *)out;
size_t i,j;
for(i=0;i<p->Ns[0];i++) {
p->plans[0]->transform(p->plans[0], din + (i * p->Ms[0]), buf + (i * p->Ms[0]));
}
ffts_transpose(buf, dout, p->Ms[0], p->Ns[0], p->transpose_buf);
for(i=1;i<p->rank;i++) {
for(j=0;j<p->Ns[i];j++) {
p->plans[i]->transform(p->plans[i], dout + (j * p->Ms[i]), buf + (j * p->Ms[i]));
}
ffts_transpose(buf, dout, p->Ms[i], p->Ns[i], p->transpose_buf);
}
}
ffts_plan_t *ffts_init_nd(int rank, size_t *Ns, int sign) {
size_t vol = 1;
ffts_plan_t *p = malloc(sizeof(ffts_plan_t));
p->transform = &ffts_execute_nd;
p->destroy = &ffts_free_nd;
p->rank = rank;
p->Ns = malloc(sizeof(size_t) * rank);
p->Ms = malloc(sizeof(size_t) * rank);
p->plans = malloc(sizeof(ffts_plan_t **) * rank);
int i;
for(i=0;i<rank;i++) {
p->Ns[i] = Ns[i];
vol *= Ns[i];
}
p->buf = valloc(sizeof(float) * 2 * vol);
for(i=0;i<rank;i++) {
p->Ms[i] = vol / p->Ns[i];
p->plans[i] = NULL;
int k;
for(k=0;k<i;k++) {
if(p->Ms[k] == p->Ms[i])
p->plans[i] = p->plans[k];
}
if(!p->plans[i]) p->plans[i] = ffts_init_1d(p->Ms[i], sign);
}
p->transpose_buf = valloc(sizeof(float) * 2 * 8 * 8);
return p;
}
ffts_plan_t *ffts_init_2d(size_t N1, size_t N2, int sign) {
size_t Ns[2];
Ns[0] = N1;
Ns[1] = N2;
return ffts_init_nd(2, Ns, sign);
}

@ -0,0 +1,58 @@
/*
This file is part of FFTS -- The Fastest Fourier Transform in the South
Copyright (c) 2012, Anthony M. Blake <amb@anthonix.com>
Copyright (c) 2012, The University of Waikato
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of the organization nor the
names of its contributors may be used to endorse or promote products
derived from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
DISCLAIMED. IN NO EVENT SHALL ANTHONY M. BLAKE BE LIABLE FOR ANY
DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef __FFTS_ND_H__
#define __FFTS_ND_H__
#include <stdint.h>
#include <stddef.h>
#include <stdio.h>
#include "ffts.h"
#ifdef HAVE_NEON
#include <arm_neon.h>
#endif
#ifdef HAVE_SSE
#include <xmmintrin.h>
#endif
void ffts_free_nd(ffts_plan_t *p);
void ffts_transpose(uint64_t *in, uint64_t *out, int w, int h, uint64_t *buf);
void ffts_execute_nd(ffts_plan_t *p, const void * in, void * out);
ffts_plan_t *ffts_init_nd(int rank, size_t *Ns, int sign);
ffts_plan_t *ffts_init_2d(size_t N1, size_t N2, int sign);
#endif

@ -0,0 +1,226 @@
/*
This file is part of FFTS -- The Fastest Fourier Transform in the South
Copyright (c) 2012, Anthony M. Blake <amb@anthonix.com>
Copyright (c) 2012, The University of Waikato
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of the organization nor the
names of its contributors may be used to endorse or promote products
derived from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
DISCLAIMED. IN NO EVENT SHALL ANTHONY M. BLAKE BE LIABLE FOR ANY
DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#include "ffts_real.h"
void ffts_free_1d_real(ffts_plan_t *p) {
ffts_free(p->plans[0]);
free(p->A);
free(p->B);
free(p->plans);
free(p->buf);
free(p);
}
void ffts_execute_1d_real(ffts_plan_t *p, const void *vin, void *vout) {
float *out = (float *)vout;
float *buf = (float *)p->buf;
float *A = p->A;
float *B = p->B;
p->plans[0]->transform(p->plans[0], vin, buf);
size_t N = p->N;
buf[N] = buf[0];
buf[N+1] = buf[1];
float *p_buf0 = buf;
float *p_buf1 = buf + N - 2;
float *p_out = out;
size_t i;
#ifdef __ARM_NEON__
for(i=0;i<N/2;i+=2) {
__asm__ __volatile__ ("vld1.32 {q8}, [%[pa], :128]!\n\t"
"vld1.32 {q9}, [%[pb], :128]!\n\t"
"vld1.32 {q10}, [%[buf0], :128]!\n\t"
"vld1.32 {q11}, [%[buf1], :64]\n\t"
"sub %[buf1], %[buf1], #16\n\t"
"vdup.32 d26, d16[1]\n\t"
"vdup.32 d27, d17[1]\n\t"
"vdup.32 d24, d16[0]\n\t"
"vdup.32 d25, d17[0]\n\t"
"vdup.32 d30, d23[1]\n\t"
"vdup.32 d31, d22[1]\n\t"
"vdup.32 d28, d23[0]\n\t"
"vdup.32 d29, d22[0]\n\t"
"vmul.f32 q13, q13, q10\n\t"
"vmul.f32 q15, q15, q9\n\t"
"vmul.f32 q12, q12, q10\n\t"
"vmul.f32 q14, q14, q9\n\t"
"vrev64.f32 q13, q13\n\t"
"vrev64.f32 q15, q15\n\t"
"vtrn.32 d26, d27\n\t"
"vtrn.32 d30, d31\n\t"
"vneg.f32 d26, d26\n\t"
"vneg.f32 d31, d31\n\t"
"vtrn.32 d26, d27\n\t"
"vtrn.32 d30, d31\n\t"
"vadd.f32 q12, q12, q14\n\t"
"vadd.f32 q13, q13, q15\n\t"
"vadd.f32 q12, q12, q13\n\t"
"vst1.32 {q12}, [%[pout], :128]!\n\t"
: [pa] "+r" (A), [pb] "+r" (B), [buf0] "+r" (p_buf0), [buf1] "+r" (p_buf1),
[pout] "+r" (p_out)
:
: "memory", "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15"
);
#else
for(i=0;i<N/2;i++) {
out[2*i] = buf[2*i]*A[2*i] - buf[2*i+1]*A[2*i+1] + buf[N-2*i]*B[2*i] + buf[N-2*i+1]*B[2*i+1];
out[2*i+1] = buf[2*i+1]*A[2*i] + buf[2*i]*A[2*i+1] + buf[N-2*i]*B[2*i+1] - buf[N-2*i+1]*B[2*i];
// out[2*N-2*i] = out[2*i];
// out[2*N-2*i+1] = -out[2*i+1];
#endif
}
out[N] = buf[0] - buf[1];
out[N+1] = 0.0f;
}
void ffts_execute_1d_real_inv(ffts_plan_t *p, const void *vin, void *vout) {
float *out = (float *)vout;
float *in = (float *)vin;
float *buf = (float *)p->buf;
float *A = p->A;
float *B = p->B;
size_t N = p->N;
float *p_buf0 = in;
float *p_buf1 = in + N - 2;
float *p_out = buf;
size_t i;
#ifdef __ARM_NEON__
for(i=0;i<N/2;i+=2) {
__asm__ __volatile__ ("vld1.32 {q8}, [%[pa], :128]!\n\t"
"vld1.32 {q9}, [%[pb], :128]!\n\t"
"vld1.32 {q10}, [%[buf0], :128]!\n\t"
"vld1.32 {q11}, [%[buf1], :64]\n\t"
"sub %[buf1], %[buf1], #16\n\t"
"vdup.32 d26, d16[1]\n\t"
"vdup.32 d27, d17[1]\n\t"
"vdup.32 d24, d16[0]\n\t"
"vdup.32 d25, d17[0]\n\t"
"vdup.32 d30, d23[1]\n\t"
"vdup.32 d31, d22[1]\n\t"
"vdup.32 d28, d23[0]\n\t"
"vdup.32 d29, d22[0]\n\t"
"vmul.f32 q13, q13, q10\n\t"
"vmul.f32 q15, q15, q9\n\t"
"vmul.f32 q12, q12, q10\n\t"
"vmul.f32 q14, q14, q9\n\t"
"vrev64.f32 q13, q13\n\t"
"vrev64.f32 q15, q15\n\t"
"vtrn.32 d26, d27\n\t"
"vtrn.32 d28, d29\n\t"
"vneg.f32 d27, d27\n\t"
"vneg.f32 d29, d29\n\t"
"vtrn.32 d26, d27\n\t"
"vtrn.32 d28, d29\n\t"
"vadd.f32 q12, q12, q14\n\t"
"vsub.f32 q13, q13, q15\n\t"
"vadd.f32 q12, q12, q13\n\t"
"vst1.32 {q12}, [%[pout], :128]!\n\t"
: [pa] "+r" (A), [pb] "+r" (B), [buf0] "+r" (p_buf0), [buf1] "+r" (p_buf1),
[pout] "+r" (p_out)
:
: "memory", "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15"
);
#else
for(i=0;i<N/2;i++) {
buf[2*i] = in[2*i]*A[2*i] + in[2*i+1]*A[2*i+1] + in[N-2*i]*B[2*i] - in[N-2*i+1]*B[2*i+1];
buf[2*i+1] = in[2*i+1]*A[2*i] - in[2*i]*A[2*i+1] - in[N-2*i]*B[2*i+1] - in[N-2*i+1]*B[2*i];
#endif
}
p->plans[0]->transform(p->plans[0], buf, out);
}
ffts_plan_t *ffts_init_1d_real(size_t N, int sign) {
ffts_plan_t *p = malloc(sizeof(ffts_plan_t));
if(sign < 0) p->transform = &ffts_execute_1d_real;
else p->transform = &ffts_execute_1d_real_inv;
p->destroy = &ffts_free_1d_real;
p->N = N;
p->rank = 1;
p->plans = malloc(sizeof(ffts_plan_t **) * 1);
p->plans[0] = ffts_init_1d(N/2, sign);
p->buf = valloc(sizeof(float) * 2 * ((N/2) + 1));
p->A = valloc(sizeof(float) * N);
p->B = valloc(sizeof(float) * N);
if(sign < 0) {
int i;
for (i = 0; i < N/2; i++) {
p->A[2 * i] = 0.5 * (1.0 - sin (2.0f * PI / (double) (N) * (double) i));
p->A[2 * i + 1] = 0.5 * (-1.0 * cos (2.0f * PI / (double) (N) * (double) i));
p->B[2 * i] = 0.5 * (1.0 + sin (2.0f * PI / (double) (N) * (double) i));
p->B[2 * i + 1] = 0.5 * (1.0 * cos (2.0f * PI / (double) (N) * (double) i));
}
}else{
int i;
for (i = 0; i < N/2; i++) {
p->A[2 * i] = 1.0 * (1.0 - sin (2.0f * PI / (double) (N) * (double) i));
p->A[2 * i + 1] = 1.0 * (-1.0 * cos (2.0f * PI / (double) (N) * (double) i));
p->B[2 * i] = 1.0 * (1.0 + sin (2.0f * PI / (double) (N) * (double) i));
p->B[2 * i + 1] = 1.0 * (1.0 * cos (2.0f * PI / (double) (N) * (double) i));
}
}
return p;
}

@ -0,0 +1,53 @@
/*
This file is part of FFTS -- The Fastest Fourier Transform in the South
Copyright (c) 2012, Anthony M. Blake <amb@anthonix.com>
Copyright (c) 2012, The University of Waikato
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of the organization nor the
names of its contributors may be used to endorse or promote products
derived from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
DISCLAIMED. IN NO EVENT SHALL ANTHONY M. BLAKE BE LIABLE FOR ANY
DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef __FFTS_REAL_H__
#define __FFTS_REAL_H__
#include <stdint.h>
#include <stddef.h>
#include <stdio.h>
#include "ffts.h"
#ifdef HAVE_NEON
#include <arm_neon.h>
#endif
#ifdef HAVE_SSE
#include <xmmintrin.h>
#endif
ffts_plan_t *ffts_init_1d_real(size_t N, int sign);
#endif

@ -0,0 +1,177 @@
/*
This file is part of FFTS -- The Fastest Fourier Transform in the South
Copyright (c) 2012, Anthony M. Blake <amb@anthonix.com>
Copyright (c) 2012, The University of Waikato
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of the organization nor the
names of its contributors may be used to endorse or promote products
derived from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
DISCLAIMED. IN NO EVENT SHALL ANTHONY M. BLAKE BE LIABLE FOR ANY
DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#include "ffts_real_nd.h"
#ifdef __ARM_NEON__
#include "neon.h"
#endif
void ffts_free_nd_real(ffts_plan_t *p) {
int i;
for(i=0;i<p->rank;i++) {
ffts_plan_t *x = p->plans[i];
int k;
for(k=i+1;k<p->rank;k++) {
if(x == p->plans[k]) p->plans[k] = NULL;
}
if(x) ffts_free(x);
}
free(p->Ns);
free(p->Ms);
free(p->plans);
free(p->buf);
free(p->transpose_buf);
free(p);
}
void ffts_scalar_transpose(uint64_t *in, uint64_t *out, int w, int h, uint64_t *buf) {
size_t i,j;
for(i=0;i<w;i+=1) {
for(j=0;j<h;j+=1) {
out[i*h + j] = in[j*w + i];
}
}
}
void ffts_execute_nd_real(ffts_plan_t *p, const void * in, void * out) {
uint32_t *din = (uint32_t *)in;
uint64_t *buf = p->buf;
uint64_t *dout = (uint64_t *)out;
size_t i,j;
for(i=0;i<p->Ns[0];i++) {
p->plans[0]->transform(p->plans[0], din + (i * p->Ms[0]), buf + (i * (p->Ms[0] / 2 + 1)));
}
ffts_scalar_transpose(buf, dout, p->Ms[0] / 2 + 1, p->Ns[0], p->transpose_buf);
for(i=1;i<p->rank;i++) {
for(j=0;j<p->Ns[i];j++) {
p->plans[i]->transform(p->plans[i], dout + (j * p->Ms[i]), buf + (j * p->Ms[i]));
}
ffts_scalar_transpose(buf, dout, p->Ms[i], p->Ns[i], p->transpose_buf);
}
}
void ffts_execute_nd_real_inv(ffts_plan_t *p, const void * in, void * out) {
uint64_t *din = (uint64_t *)in;
uint64_t *buf = p->buf;
uint64_t *dout = (uint64_t *)out;
float *bufr = (float *)(p->buf);
float *doutr = (float *)out;
size_t i,j;
ffts_scalar_transpose(din, buf, p->Ms[0], p->Ns[0], p->transpose_buf);
for(i=0;i<p->Ms[0];i++) {
p->plans[0]->transform(p->plans[0], buf + (i * p->Ns[0]), dout + (i * p->Ns[0]));
}
ffts_scalar_transpose(dout, buf, p->Ns[0], p->Ms[0], p->transpose_buf);
for(j=0;j<p->Ms[1];j++) {
p->plans[1]->transform(p->plans[1], buf + (j * (p->Ms[0])), &doutr[j * p->Ns[1]]);
}
}
ffts_plan_t *ffts_init_nd_real(int rank, size_t *Ns, int sign) {
size_t vol = 1;
ffts_plan_t *p = malloc(sizeof(ffts_plan_t));
if(sign < 0) p->transform = &ffts_execute_nd_real;
else p->transform = &ffts_execute_nd_real_inv;
p->destroy = &ffts_free_nd_real;
p->rank = rank;
p->Ns = malloc(sizeof(size_t) * rank);
p->Ms = malloc(sizeof(size_t) * rank);
p->plans = malloc(sizeof(ffts_plan_t **) * rank);
int i;
for(i=0;i<rank;i++) {
p->Ns[i] = Ns[i];
vol *= Ns[i];
}
p->buf = valloc(sizeof(float) * 2 * vol);
for(i=0;i<rank;i++) {
p->Ms[i] = vol / p->Ns[i];
p->plans[i] = NULL;
int k;
if(sign < 0) {
for(k=1;k<i;k++) {
if(p->Ms[k] == p->Ms[i]) p->plans[i] = p->plans[k];
}
if(!i) p->plans[i] = ffts_init_1d_real(p->Ms[i], sign);
else if(!p->plans[i]) p->plans[i] = ffts_init_1d(p->Ms[i], sign);
}else{
for(k=0;k<i;k++) {
if(p->Ns[k] == p->Ns[i]) p->plans[i] = p->plans[k];
}
if(i==rank-1) p->plans[i] = ffts_init_1d_real(p->Ns[i], sign);
else if(!p->plans[i]) p->plans[i] = ffts_init_1d(p->Ns[i], sign);
}
}
if(sign < 0) {
for(i=1;i<rank;i++) {
p->Ns[i] = p->Ns[i] / 2 + 1;
}
}else{
for(i=0;i<rank-1;i++) {
p->Ms[i] = p->Ms[i] / 2 + 1;
}
}
p->transpose_buf = valloc(sizeof(float) * 2 * 8 * 8);
return p;
}
ffts_plan_t *ffts_init_2d_real(size_t N1, size_t N2, int sign) {
size_t Ns[2];
Ns[0] = N1;
Ns[1] = N2;
return ffts_init_nd_real(2, Ns, sign);
}

@ -0,0 +1,53 @@
/*
This file is part of FFTS -- The Fastest Fourier Transform in the South
Copyright (c) 2012, Anthony M. Blake <amb@anthonix.com>
Copyright (c) 2012, The University of Waikato
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of the organization nor the
names of its contributors may be used to endorse or promote products
derived from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
DISCLAIMED. IN NO EVENT SHALL ANTHONY M. BLAKE BE LIABLE FOR ANY
DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef __FFTS_REAL_ND_H__
#define __FFTS_REAL_ND_H__
#include <stdint.h>
#include <stddef.h>
#include <stdio.h>
#include "ffts_nd.h"
#include "ffts_real.h"
#include "ffts.h"
#ifdef HAVE_NEON
#include <arm_neon.h>
#endif
#ifdef HAVE_SSE
#include <xmmintrin.h>
#endif
#endif

@ -0,0 +1,156 @@
/*
This file is part of FFTS -- The Fastest Fourier Transform in the South
Copyright (c) 2013, Michael J. Cree <mcree@orcon.net.nz>
Copyright (c) 2012, 2013, Anthony M. Blake <amb@anthonix.com>
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of the organization nor the
names of its contributors may be used to endorse or promote products
derived from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
DISCLAIMED. IN NO EVENT SHALL ANTHONY M. BLAKE BE LIABLE FOR ANY
DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#include "ffts.h"
#include "macros.h"
#include <stdlib.h>
#define DEBUG(x)
#include "ffts_small.h"
void firstpass_16_f(ffts_plan_t * p, const void * in, void * out)
{
const data_t *din = (const data_t *)in;
data_t *dout = (data_t *)out;
V r0_1,r2_3,r4_5,r6_7,r8_9,r10_11,r12_13,r14_15;
float *LUT8 = p->ws;
L_4_4(0, din+0,din+16,din+8,din+24,&r0_1,&r2_3,&r8_9,&r10_11);
L_2_4(0, din+4,din+20,din+28,din+12,&r4_5,&r6_7,&r14_15,&r12_13);
K_N(0, VLD(LUT8),VLD(LUT8+4),&r0_1,&r2_3,&r4_5,&r6_7);
K_N(0, VLD(LUT8+8),VLD(LUT8+12),&r0_1,&r4_5,&r8_9,&r12_13);
S_4(r0_1,r4_5,r8_9,r12_13,dout+0,dout+8,dout+16,dout+24);
K_N(0, VLD(LUT8+16),VLD(LUT8+20),&r2_3,&r6_7,&r10_11,&r14_15);
S_4(r2_3,r6_7,r10_11,r14_15,dout+4,dout+12,dout+20,dout+28);
}
void firstpass_16_b(ffts_plan_t * p, const void * in, void * out)
{
const data_t *din = (const data_t *)in;
data_t *dout = (data_t *)out;
V r0_1,r2_3,r4_5,r6_7,r8_9,r10_11,r12_13,r14_15;
float *LUT8 = p->ws;
L_4_4(1, din+0,din+16,din+8,din+24,&r0_1,&r2_3,&r8_9,&r10_11);
L_2_4(1, din+4,din+20,din+28,din+12,&r4_5,&r6_7,&r14_15,&r12_13);
K_N(1, VLD(LUT8),VLD(LUT8+4),&r0_1,&r2_3,&r4_5,&r6_7);
K_N(1, VLD(LUT8+8),VLD(LUT8+12),&r0_1,&r4_5,&r8_9,&r12_13);
S_4(r0_1,r4_5,r8_9,r12_13,dout+0,dout+8,dout+16,dout+24);
K_N(1, VLD(LUT8+16),VLD(LUT8+20),&r2_3,&r6_7,&r10_11,&r14_15);
S_4(r2_3,r6_7,r10_11,r14_15,dout+4,dout+12,dout+20,dout+28);
}
void firstpass_8_f(ffts_plan_t *p, const void *in, void *out)
{
const data_t *din = (const data_t *)in;
data_t *dout = (data_t *)out;
V r0_1, r2_3, r4_5, r6_7;
float *LUT8 = p->ws + p->ws_is[0];
L_4_2(0, din, din+8, din+4, din+12, &r0_1, &r2_3, &r4_5, &r6_7);
K_N(0, VLD(LUT8), VLD(LUT8+4), &r0_1, &r2_3, &r4_5, &r6_7);
S_4(r0_1,r2_3,r4_5,r6_7,dout+0,dout+4,dout+8,dout+12);
}
void firstpass_8_b(ffts_plan_t *p, const void *in, void *out)
{
const data_t *din = (const data_t *)in;
data_t *dout = (data_t *)out;
V r0_1, r2_3, r4_5, r6_7;
float *LUT8 = p->ws + p->ws_is[0];
L_4_2(1, din, din+8, din+4, din+12, &r0_1, &r2_3, &r4_5, &r6_7);
K_N(1, VLD(LUT8), VLD(LUT8+4), &r0_1, &r2_3, &r4_5, &r6_7);
S_4(r0_1,r2_3,r4_5,r6_7,dout+0,dout+4,dout+8,dout+12);
}
void firstpass_4_f(ffts_plan_t *p, const void *in, void *out)
{
const data_t *din = (const data_t *)in;
data_t *dout = (data_t *)out;
cdata_t t0, t1, t2, t3, t4, t5, t6, t7;
t0[0] = din[0]; t0[1] = din[1];
t1[0] = din[4]; t1[1] = din[5];
t2[0] = din[2]; t2[1] = din[3];
t3[0] = din[6]; t3[1] = din[7];
t4[0] = t0[0] + t1[0]; t4[1] = t0[1] + t1[1];
t5[0] = t0[0] - t1[0]; t5[1] = t0[1] - t1[1];
t6[0] = t2[0] + t3[0]; t6[1] = t2[1] + t3[1];
t7[0] = t2[0] - t3[0]; t7[1] = t2[1] - t3[1];
dout[0] = t4[0] + t6[0]; dout[1] = t4[1] + t6[1];
dout[4] = t4[0] - t6[0]; dout[5] = t4[1] - t6[1];
dout[2] = t5[0] + t7[1]; dout[3] = t5[1] - t7[0];
dout[6] = t5[0] - t7[1]; dout[7] = t5[1] + t7[0];
}
void firstpass_4_b(ffts_plan_t *p, const void *in, void *out)
{
const data_t *din = (const data_t *)in;
data_t *dout = (data_t *)out;
cdata_t t0, t1, t2, t3, t4, t5, t6, t7;
t0[0] = din[0]; t0[1] = din[1];
t1[0] = din[4]; t1[1] = din[5];
t2[0] = din[2]; t2[1] = din[3];
t3[0] = din[6]; t3[1] = din[7];
t4[0] = t0[0] + t1[0]; t4[1] = t0[1] + t1[1];
t5[0] = t0[0] - t1[0]; t5[1] = t0[1] - t1[1];
t6[0] = t2[0] + t3[0]; t6[1] = t2[1] + t3[1];
t7[0] = t2[0] - t3[0]; t7[1] = t2[1] - t3[1];
dout[0] = t4[0] + t6[0]; dout[1] = t4[1] + t6[1];
dout[4] = t4[0] - t6[0]; dout[5] = t4[1] - t6[1];
dout[2] = t5[0] - t7[1]; dout[3] = t5[1] + t7[0];
dout[6] = t5[0] + t7[1]; dout[7] = t5[1] - t7[0];
}
void firstpass_2(ffts_plan_t *p, const void *in, void *out)
{
const data_t *din = (const data_t *)in;
data_t *dout = (data_t *)out;
cdata_t t0, t1, r0,r1;
t0[0] = din[0]; t0[1] = din[1];
t1[0] = din[2]; t1[1] = din[3];
r0[0] = t0[0] + t1[0];
r0[1] = t0[1] + t1[1];
r1[0] = t0[0] - t1[0];
r1[1] = t0[1] - t1[1];
dout[0] = r0[0]; dout[1] = r0[1];
dout[2] = r1[0]; dout[3] = r1[1];
}

@ -0,0 +1,13 @@
#ifndef __FFTS_SMALL_H__
#define __FFTS_SMALL_H__
void firstpass_16_f(ffts_plan_t * p, const void * in, void * out);
void firstpass_16_b(ffts_plan_t * p, const void * in, void * out);
void firstpass_8_f(ffts_plan_t * p, const void * in, void * out);
void firstpass_8_b(ffts_plan_t * p, const void * in, void * out);
void firstpass_4_f(ffts_plan_t * p, const void * in, void * out);
void firstpass_4_b(ffts_plan_t * p, const void * in, void * out);
void firstpass_2(ffts_plan_t * p, const void * in, void * out);
#endif

@ -0,0 +1,101 @@
/*
This file is part of FFTS -- The Fastest Fourier Transform in the South
Copyright (c) 2012, Anthony M. Blake <amb@anthonix.com>
Copyright (c) 2012, The University of Waikato
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of the organization nor the
names of its contributors may be used to endorse or promote products
derived from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
DISCLAIMED. IN NO EVENT SHALL ANTHONY M. BLAKE BE LIABLE FOR ANY
DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#include "ffts_static.h"
void ffts_static_rec_i(ffts_plan_t *p, float *data, size_t N) {
if(N > 16) {
size_t N1 = N >> 1;
size_t N2 = N >> 2;
size_t N3 = N >> 3;
float *ws = ((float *)(p->ws)) + (p->ws_is[__builtin_ctzl(N)-4] << 1);
ffts_static_rec_i(p, data, N2);
ffts_static_rec_i(p, data + N1, N3);
ffts_static_rec_i(p, data + N1 + N2, N3);
ffts_static_rec_i(p, data + N, N2);
ffts_static_rec_i(p, data + N + N1, N2);
if(N == p->N) {
neon_static_x8_t_i(data, N, ws);
}else{
neon_static_x8_i(data, N, ws);
}
}else if(N==16){
neon_static_x4_i(data, N, p->ws);
}
}
void ffts_static_rec_f(ffts_plan_t *p, float *data, size_t N) {
if(N > 16) {
size_t N1 = N >> 1;
size_t N2 = N >> 2;
size_t N3 = N >> 3;
float *ws = ((float *)(p->ws)) + (p->ws_is[__builtin_ctzl(N)-4] << 1);
ffts_static_rec_f(p, data, N2);
ffts_static_rec_f(p, data + N1, N3);
ffts_static_rec_f(p, data + N1 + N2, N3);
ffts_static_rec_f(p, data + N, N2);
ffts_static_rec_f(p, data + N + N1, N2);
if(N == p->N) {
neon_static_x8_t_f(data, N, ws);
}else{
neon_static_x8_f(data, N, ws);
}
}else if(N==16){
neon_static_x4_f(data, N, p->ws);
}
}
void ffts_static_transform_f(ffts_plan_t *p, const void *in, void *out) {
if(__builtin_ctzl(p->N) & 1)
neon_static_o_f(p, in, out);
else
neon_static_e_f(p, in, out);
ffts_static_rec_f(p, out, p->N);
}
void ffts_static_transform_i(ffts_plan_t *p, const void *in, void *out) {
if(__builtin_ctzl(p->N) & 1)
neon_static_o_i(p, in, out);
else
neon_static_e_i(p, in, out);
ffts_static_rec_i(p, out, p->N);
}

@ -0,0 +1,46 @@
/*
This file is part of FFTS -- The Fastest Fourier Transform in the South
Copyright (c) 2012, Anthony M. Blake <amb@anthonix.com>
Copyright (c) 2012, The University of Waikato
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of the organization nor the
names of its contributors may be used to endorse or promote products
derived from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
DISCLAIMED. IN NO EVENT SHALL ANTHONY M. BLAKE BE LIABLE FOR ANY
DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef __FFTS_STATIC_H__
#define __FFTS_STATIC_H__
#include "ffts.h"
#include "neon.h"
void ffts_static_rec_f(ffts_plan_t *p, float *data, size_t N) ;
void ffts_static_transform_f(ffts_plan_t *p, const void *in, void *out);
void ffts_static_rec_i(ffts_plan_t *p, float *data, size_t N) ;
void ffts_static_transform_i(ffts_plan_t *p, const void *in, void *out);
#endif

@ -0,0 +1,206 @@
/*
This file is part of FFTS -- The Fastest Fourier Transform in the South
Copyright (c) 2013, Michael J. Cree <mcree@orcon.net.nz>
Copyright (c) 2012, 2013, Anthony M. Blake <amb@anthonix.com>
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of the organization nor the
names of its contributors may be used to endorse or promote products
derived from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
DISCLAIMED. IN NO EVENT SHALL ANTHONY M. BLAKE BE LIABLE FOR ANY
DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef __MACROS_ALPHA_H__
#define __MACROS_ALPHA_H__
#include <math.h>
#ifdef __alpha__
#define restrict
#endif
typedef struct {float r1, i1, r2, i2;} V;
#define FFTS_MALLOC(d,a) malloc(d)
#define FFTS_FREE(d) free(d)
#define VLIT4(f3,f2,f1,f0) ((V){f0,f1,f2,f3})
static inline V VADD(V x, V y)
{
V z;
z.r1 = x.r1 + y.r1;
z.i1 = x.i1 + y.i1;
z.r2 = x.r2 + y.r2;
z.i2 = x.i2 + y.i2;
return z;
}
static inline V VSUB(V x, V y)
{
V z;
z.r1 = x.r1 - y.r1;
z.i1 = x.i1 - y.i1;
z.r2 = x.r2 - y.r2;
z.i2 = x.i2 - y.i2;
return z;
}
static inline V VMUL(V x, V y)
{
V z;
z.r1 = x.r1 * y.r1;
z.i1 = x.i1 * y.i1;
z.r2 = x.r2 * y.r2;
z.i2 = x.i2 * y.i2;
return z;
}
static inline V VXOR(V x, V y)
{
V r;
r.r1 = (uint32_t)x.r1 ^ (uint32_t)y.r1;
r.i1 = (uint32_t)x.i1 ^ (uint32_t)y.i1;
r.r2 = (uint32_t)x.r2 ^ (uint32_t)y.r2;
r.i2 = (uint32_t)x.i2 ^ (uint32_t)y.i2;
return r;
}
static inline V VSWAPPAIRS(V x)
{
V z;
z.r1 = x.i1;
z.i1 = x.r1;
z.r2 = x.i2;
z.i2 = x.r2;
return z;
}
static inline V VBLEND(V x, V y)
{
V z;
z.r1 = x.r1;
z.i1 = x.i1;
z.r2 = y.r2;
z.i2 = y.i2;
return z;
}
static inline V VUNPACKHI(V x, V y)
{
V z;
z.r1 = x.r2;
z.i1 = x.i2;
z.r2 = y.r2;
z.i2 = y.i2;
return z;
}
static inline V VUNPACKLO(V x, V y)
{
V z;
z.r1 = x.r1;
z.i1 = x.i1;
z.r2 = y.r1;
z.i2 = y.i1;
return z;
}
static inline V VDUPRE(V x)
{
V z;
z.r1 = x.r1;
z.i1 = x.r1;
z.r2 = x.r2;
z.i2 = x.r2;
return z;
}
static inline V VDUPIM(V x)
{
V z;
z.r1 = x.i1;
z.i1 = x.i1;
z.r2 = x.i2;
z.i2 = x.i2;
return z;
}
static inline V IMUL(V d, V re, V im)
{
re = VMUL(re, d);
im = VMUL(im, VSWAPPAIRS(d));
return VSUB(re, im);
}
static inline V IMULJ(V d, V re, V im)
{
re = VMUL(re, d);
im = VMUL(im, VSWAPPAIRS(d));
return VADD(re, im);
}
static inline V MULI(int inv, V x)
{
V z;
if (inv) {
z.r1 = -x.r1;
z.i1 = x.i1;
z.r2 = -x.r2;
z.i2 = x.i2;
}else{
z.r1 = x.r1;
z.i1 = -x.i1;
z.r2 = x.r2;
z.i2 = -x.i2;
}
return z;
}
static inline V IMULI(int inv, V x)
{
return VSWAPPAIRS(MULI(inv, x));
}
static inline V VLD(const void *s)
{
V *d = (V *)s;
return *d;
}
static inline void VST(void *d, V s)
{
V *r = (V *)d;
*r = s;
}
#endif

@ -0,0 +1,137 @@
/*
This file is part of FFTS -- The Fastest Fourier Transform in the South
Copyright (c) 2013, Michael J. Cree <mcree@orcon.net.nz>
Copyright (c) 2012, 2013, Anthony M. Blake <amb@anthonix.com>
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of the organization nor the
names of its contributors may be used to endorse or promote products
derived from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
DISCLAIMED. IN NO EVENT SHALL ANTHONY M. BLAKE BE LIABLE FOR ANY
DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef __MACROS_ALTIVEC_H__
#define __MACROS_ALTIVEC_H__
#include <math.h>
#include <altivec.h>
#define restrict
typedef vector float V;
typedef vector unsigned char VUC;
#ifdef __apple__
#define FFTS_MALLOC(d,a) vec_malloc(d)
#define FFTS_FREE(d) vec_free(d)
#else
/* It appears vec_malloc() and friends are not implemented on Linux */
#include <malloc.h>
#define FFTS_MALLOC(d,a) memalign(16,d)
#define FFTS_FREE(d) free(d)
#endif
#define VLIT4(f0,f1,f2,f3) ((V){f0, f1, f2, f3})
#define VADD(x,y) vec_add(x,y)
#define VSUB(x,y) vec_sub(x,y)
#define VMUL(x,y) vec_madd(x,y,(V){0})
#define VMULADD(x,y,z) vec_madd(x,y,z)
#define VNMULSUB(x,y,z) vec_nmsub(x,y,z)
#define VXOR(x,y) vec_xor((x),(y))
#define VSWAPPAIRS(x) \
vec_perm(x,x,(VUC){0x04,0x05,0x06,0x07,0x00,0x01,0x02,0x03, \
0x0c,0x0d,0x0e,0x0f,0x08,0x09,0x0a,0x0b})
#define VBLEND(x,y) \
vec_perm(x,y,(VUC){0x00,0x01,0x02,0x03,0x04,0x05,0x06,0x07, \
0x18,0x19,0x1a,0x1b,0x1c,0x1d,0x1e,0x1f})
#define VUNPACKHI(x,y) \
vec_perm(x,y,(VUC){0x08,0x09,0x0a,0x0b,0x0c,0x0d,0x0e,0x0f, \
0x18,0x19,0x1a,0x1b,0x1c,0x1d,0x1e,0x1f})
#define VUNPACKLO(x,y) \
vec_perm(x,y,(VUC){0x00,0x01,0x02,0x03,0x04,0x05,0x06,0x07, \
0x10,0x11,0x12,0x13,0x14,0x15,0x16,0x17})
#define VDUPRE(x) \
vec_perm(x,x,(VUC){0x00,0x01,0x02,0x03,0x00,0x01,0x02,0x03, \
0x18,0x19,0x1a,0x1b,0x18,0x19,0x1a,0x1b})
#define VDUPIM(x) \
vec_perm(x,x,(VUC){0x04,0x05,0x06,0x07,0x04,0x05,0x06,0x07, \
0x1c,0x1d,0x1e,0x1f,0x1c,0x1d,0x1e,0x1f})
static inline V IMUL(V d, V re, V im)
{
im = VMUL(im, VSWAPPAIRS(d));
re = VMUL(re, d);
return VSUB(re, im);
}
static inline V IMULJ(V d, V re, V im)
{
im = VMUL(im, VSWAPPAIRS(d));
return VMULADD(re, d, im);
}
#ifndef __GNUC__
/* gcc (4.6 and 4.7) ICEs on this code! */
static inline V MULI(int inv, V x)
{
return VXOR(x, inv ? VLIT4(-0.0f,0.0f,-0.0f,0.0f) : VLIT4(0.0f,-0.0f,0.0f,-0.0f));
}
#else
/* but compiles this fine... */
static inline V MULI(int inv, V x)
{
V t;
t = inv ? VLIT4(-0.0f,0.0f,-0.0f,0.0f) : VLIT4(0.0f,-0.0f,0.0f,-0.0f);
return VXOR(x, t);
}
#endif
static inline V IMULI(int inv, V x)
{
return VSWAPPAIRS(MULI(inv, x));
}
static inline V VLD(const void *s)
{
V *d = (V *)s;
return *d;
}
static inline void VST(void *d, V s)
{
V *r = (V *)d;
*r = s;
}
#endif

@ -0,0 +1,96 @@
/*
This file is part of FFTS -- The Fastest Fourier Transform in the South
Copyright (c) 2012, 2013, Anthony M. Blake <amb@anthonix.com>
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of the organization nor the
names of its contributors may be used to endorse or promote products
derived from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
DISCLAIMED. IN NO EVENT SHALL ANTHONY M. BLAKE BE LIABLE FOR ANY
DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef __MACROS_NEON_H__
#define __MACROS_NEON_H__
#include "neon.h"
#include <arm_neon.h>
typedef float32x4_t V;
typedef float32x4x2_t VS;
#define ADD vaddq_f32
#define SUB vsubq_f32
#define MUL vmulq_f32
#define VADD vaddq_f32
#define VSUB vsubq_f32
#define VMUL vmulq_f32
#define VXOR(x,y) (vreinterpretq_f32_u32(veorq_u32(vreinterpretq_u32_f32(x), vreinterpretq_u32_f32(y))))
#define VST vst1q_f32
#define VLD vld1q_f32
#define VST2 vst2q_f32
#define VLD2 vld2q_f32
#define VSWAPPAIRS(x) (vrev64q_f32(x))
#define VUNPACKHI(a,b) (vcombine_f32(vget_high_f32(a), vget_high_f32(b)))
#define VUNPACKLO(a,b) (vcombine_f32(vget_low_f32(a), vget_low_f32(b)))
#define VBLEND(x,y) (vcombine_f32(vget_low_f32(x), vget_high_f32(y)))
__INLINE V VLIT4(data_t f3, data_t f2, data_t f1, data_t f0) {
data_t __attribute__ ((aligned(16))) d[4] = {f0, f1, f2, f3};
return VLD(d);
}
#define VDUPRE(r) vcombine_f32(vdup_lane_f32(vget_low_f32(r),0), vdup_lane_f32(vget_high_f32(r),0))
#define VDUPIM(r) vcombine_f32(vdup_lane_f32(vget_low_f32(r),1), vdup_lane_f32(vget_high_f32(r),1))
#define FFTS_MALLOC(d,a) (valloc(d))
#define FFTS_FREE(d) (free(d))
__INLINE void STORESPR(data_t * addr, VS p) {
vst1q_f32(addr, p.val[0]);
vst1q_f32(addr + 4, p.val[1]);
}
__INLINE V IMULI(int inv, V a) {
if(inv) return VSWAPPAIRS(VXOR(a, VLIT4(0.0f, -0.0f, 0.0f, -0.0f)));
else return VSWAPPAIRS(VXOR(a, VLIT4(-0.0f, 0.0f, -0.0f, 0.0f)));
}
__INLINE V IMUL(V d, V re, V im) {
re = VMUL(re, d);
im = VMUL(im, VSWAPPAIRS(d));
return VSUB(re, im);
}
__INLINE V IMULJ(V d, V re, V im) {
re = VMUL(re, d);
im = VMUL(im, VSWAPPAIRS(d));
return VADD(re, im);
}
#endif

@ -0,0 +1,84 @@
/*
This file is part of FFTS -- The Fastest Fourier Transform in the South
Copyright (c) 2012, Anthony M. Blake <amb@anthonix.com>
Copyright (c) 2012, The University of Waikato
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of the organization nor the
names of its contributors may be used to endorse or promote products
derived from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
DISCLAIMED. IN NO EVENT SHALL ANTHONY M. BLAKE BE LIABLE FOR ANY
DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef __SSE_FLOAT_H__
#define __SSE_FLOAT_H__
#include <xmmintrin.h>
//#define VL 4
typedef __m128 V;
#define VADD _mm_add_ps
#define VSUB _mm_sub_ps
#define VMUL _mm_mul_ps
//#define VLIT4 _mm_set_ps
#define VXOR _mm_xor_ps
#define VST _mm_store_ps
#define VLD _mm_load_ps
#define VSWAPPAIRS(x) (_mm_shuffle_ps(x,x,_MM_SHUFFLE(2,3,0,1)))
#define VUNPACKHI(x,y) (_mm_shuffle_ps(x,y,_MM_SHUFFLE(3,2,3,2)))
#define VUNPACKLO(x,y) (_mm_shuffle_ps(x,y,_MM_SHUFFLE(1,0,1,0)))
#define VBLEND(x,y) (_mm_shuffle_ps(x,y,_MM_SHUFFLE(3,2,1,0)))
#define VLIT4 _mm_set_ps
#define VDUPRE(r) (_mm_shuffle_ps(r,r,_MM_SHUFFLE(2,2,0,0)))
#define VDUPIM(r) (_mm_shuffle_ps(r,r,_MM_SHUFFLE(3,3,1,1)))
#define FFTS_MALLOC(d,a) (_mm_malloc(d,a))
#define FFTS_FREE(d) (_mm_free(d))
__INLINE V IMULI(int inv, V a) {
if(inv) return VSWAPPAIRS(VXOR(a, VLIT4(0.0f, -0.0f, 0.0f, -0.0f)));
else return VSWAPPAIRS(VXOR(a, VLIT4(-0.0f, 0.0f, -0.0f, 0.0f)));
}
__INLINE V IMUL(V d, V re, V im) {
re = VMUL(re, d);
im = VMUL(im, VSWAPPAIRS(d));
return VSUB(re, im);
}
__INLINE V IMULJ(V d, V re, V im) {
re = VMUL(re, d);
im = VMUL(im, VSWAPPAIRS(d));
return VADD(re, im);
}
#endif

@ -0,0 +1,161 @@
/*
This file is part of FFTS -- The Fastest Fourier Transform in the South
Copyright (c) 2013, Michael J. Cree <mcree@orcon.net.nz>
Copyright (c) 2012, 2013, Anthony M. Blake <amb@anthonix.com>
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of the organization nor the
names of its contributors may be used to endorse or promote products
derived from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
DISCLAIMED. IN NO EVENT SHALL ANTHONY M. BLAKE BE LIABLE FOR ANY
DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef __MACROS_H__
#define __MACROS_H__
#ifdef HAVE_NEON
#include "macros-neon.h"
#else
#ifdef __alpha__
#include "macros-alpha.h"
#else
#ifdef __powerpc__
#include "macros-altivec.h"
#endif
#endif
#endif
#ifdef HAVE_VFP
#include "macros-alpha.h"
#endif
#ifdef HAVE_SSE
#include "macros-sse.h"
#endif
static inline void TX2(V *a, V *b)
{
V TX2_t0 = VUNPACKLO(*a, *b);
V TX2_t1 = VUNPACKHI(*a, *b);
*a = TX2_t0; *b = TX2_t1;
}
static inline void K_N(int inv, V re, V im, V *r0, V *r1, V *r2, V *r3)
{
V uk, uk2, zk_p, zk_n, zk, zk_d;
uk = *r0; uk2 = *r1;
zk_p = IMUL(*r2, re, im);
zk_n = IMULJ(*r3, re, im);
zk = VADD(zk_p, zk_n);
zk_d = IMULI(inv, VSUB(zk_p, zk_n));
*r2 = VSUB(uk, zk);
*r0 = VADD(uk, zk);
*r3 = VADD(uk2, zk_d);
*r1 = VSUB(uk2, zk_d);
}
static inline void S_4(V r0, V r1, V r2, V r3,
data_t * restrict o0, data_t * restrict o1,
data_t * restrict o2, data_t * restrict o3)
{
VST(o0, r0); VST(o1, r1); VST(o2, r2); VST(o3, r3);
}
static inline void L_2_4(int inv,
const data_t * restrict i0, const data_t * restrict i1,
const data_t * restrict i2, const data_t * restrict i3,
V *r0, V *r1, V *r2, V *r3)
{
V t0, t1, t2, t3, t4, t5, t6, t7;
t0 = VLD(i0); t1 = VLD(i1); t2 = VLD(i2); t3 = VLD(i3);
t4 = VADD(t0, t1);
t5 = VSUB(t0, t1);
t6 = VADD(t2, t3);
t7 = VSUB(t2, t3);
*r0 = VUNPACKLO(t4, t5);
*r1 = VUNPACKLO(t6, t7);
t5 = IMULI(inv, t5);
t0 = VADD(t6, t4);
t2 = VSUB(t6, t4);
t1 = VSUB(t7, t5);
t3 = VADD(t7, t5);
*r3 = VUNPACKHI(t0, t1);
*r2 = VUNPACKHI(t2, t3);
}
static inline void L_4_4(int inv,
const data_t * restrict i0, const data_t * restrict i1,
const data_t * restrict i2, const data_t * restrict i3,
V *r0, V *r1, V *r2, V *r3)
{
V t0, t1, t2, t3, t4, t5, t6, t7;
t0 = VLD(i0); t1 = VLD(i1); t2 = VLD(i2); t3 = VLD(i3);
t4 = VADD(t0, t1);
t5 = VSUB(t0, t1);
t6 = VADD(t2, t3);
t7 = IMULI(inv, VSUB(t2, t3));
t0 = VADD(t4, t6);
t2 = VSUB(t4, t6);
t1 = VSUB(t5, t7);
t3 = VADD(t5, t7);
TX2(&t0, &t1);
TX2(&t2, &t3);
*r0 = t0; *r2 = t1; *r1 = t2; *r3 = t3;
}
static inline void L_4_2(int inv,
const data_t * restrict i0, const data_t * restrict i1,
const data_t * restrict i2, const data_t * restrict i3,
V *r0, V *r1, V *r2, V *r3)
{
V t0, t1, t2, t3, t4, t5, t6, t7;
t0 = VLD(i0); t1 = VLD(i1); t6 = VLD(i2); t7 = VLD(i3);
t2 = VBLEND(t6, t7);
t3 = VBLEND(t7, t6);
t4 = VADD(t0, t1);
t5 = VSUB(t0, t1);
t6 = VADD(t2, t3);
t7 = VSUB(t2, t3);
*r2 = VUNPACKHI(t4, t5);
*r3 = VUNPACKHI(t6, t7);
t7 = IMULI(inv, t7);
t0 = VADD(t4, t6);
t2 = VSUB(t4, t6);
t1 = VSUB(t5, t7);
t3 = VADD(t5, t7);
*r0 = VUNPACKLO(t0, t1);
*r1 = VUNPACKLO(t2, t3);
}
#endif

@ -0,0 +1,65 @@
/*
This file is part of FFTS -- The Fastest Fourier Transform in the South
Copyright (c) 2012, Anthony M. Blake <amb@anthonix.com>
Copyright (c) 2012, The University of Waikato
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of the organization nor the
names of its contributors may be used to endorse or promote products
derived from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
DISCLAIMED. IN NO EVENT SHALL ANTHONY M. BLAKE BE LIABLE FOR ANY
DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef __NEON_H__
#define __NEON_H__
#include "ffts.h"
void neon_x4(float *, size_t, float *);
void neon_x8(float *, size_t, float *);
void neon_x8_t(float *, size_t, float *);
void neon_ee();
void neon_oo();
void neon_eo();
void neon_oe();
void neon_end();
void neon_transpose(uint64_t *in, uint64_t *out, int w, int h);
void neon_transpose_to_buf(uint64_t *in, uint64_t *out, int w);
//typedef struct _ffts_plan_t ffts_plan_t;
void neon_static_e_f(ffts_plan_t * , const void * , void * );
void neon_static_o_f(ffts_plan_t * , const void * , void * );
void neon_static_x4_f(float *, size_t, float *);
void neon_static_x8_f(float *, size_t, float *);
void neon_static_x8_t_f(float *, size_t, float *);
void neon_static_e_i(ffts_plan_t * , const void * , void * );
void neon_static_o_i(ffts_plan_t * , const void * , void * );
void neon_static_x4_i(float *, size_t, float *);
void neon_static_x8_i(float *, size_t, float *);
void neon_static_x8_t_i(float *, size_t, float *);
#endif

@ -0,0 +1,738 @@
/*
This file is part of FFTS -- The Fastest Fourier Transform in the South
Copyright (c) 2012, Anthony M. Blake <amb@anthonix.com>
Copyright (c) 2012, The University of Waikato
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of the organization nor the
names of its contributors may be used to endorse or promote products
derived from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
DISCLAIMED. IN NO EVENT SHALL ANTHONY M. BLAKE BE LIABLE FOR ANY
DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
.align 4
#ifdef __APPLE__
.globl _neon_x4
_neon_x4:
#else
.globl neon_x4
neon_x4:
#endif
@ add r3, r0, #0
vld1.32 {q8,q9}, [r0, :128]
add r4, r0, r1, lsl #1
vld1.32 {q10,q11}, [r4, :128]
add r5, r0, r1, lsl #2
vld1.32 {q12,q13}, [r5, :128]
add r6, r4, r1, lsl #2
vld1.32 {q14,q15}, [r6, :128]
vld1.32 {q2,q3}, [r2, :128]
vmul.f32 q0, q13, q3
vmul.f32 q5, q12, q2
vmul.f32 q1, q14, q2
vmul.f32 q4, q14, q3
vmul.f32 q14, q12, q3
vmul.f32 q13, q13, q2
vmul.f32 q12, q15, q3
vmul.f32 q2, q15, q2
vsub.f32 q0, q5, q0
vadd.f32 q13, q13, q14
vadd.f32 q12, q12, q1
vsub.f32 q1, q2, q4
vadd.f32 q15, q0, q12
vsub.f32 q12, q0, q12
vadd.f32 q14, q13, q1
vsub.f32 q13, q13, q1
vadd.f32 q0, q8, q15
vadd.f32 q1, q9, q14
vsub.f32 q2, q10, q13 @
vsub.f32 q4, q8, q15
vadd.f32 q3, q11, q12 @
vst1.32 {q0,q1}, [r0, :128]
vsub.f32 q5, q9, q14
vadd.f32 q6, q10, q13 @
vsub.f32 q7, q11, q12 @
vst1.32 {q2,q3}, [r4, :128]
vst1.32 {q4,q5}, [r5, :128]
vst1.32 {q6,q7}, [r6, :128]
bx lr
.align 4
#ifdef __APPLE__
.globl _neon_x8
_neon_x8:
#else
.globl neon_x8
neon_x8:
#endif
mov r11, #0
add r3, r0, #0 @ data0
add r5, r0, r1, lsl #1 @ data2
add r4, r0, r1 @ data1
add r7, r5, r1, lsl #1 @ data4
add r6, r5, r1 @ data3
add r9, r7, r1, lsl #1 @ data6
add r8, r7, r1 @ data5
add r10, r9, r1 @ data7
add r12, r2, #0 @ LUT
sub r11, r11, r1, lsr #5
neon_x8_loop:
vld1.32 {q2,q3}, [r12, :128]!
vld1.32 {q14,q15}, [r6, :128]
vld1.32 {q10,q11}, [r5, :128]
adds r11, r11, #1
vmul.f32 q12, q15, q2
vmul.f32 q8, q14, q3
vmul.f32 q13, q14, q2
vmul.f32 q9, q10, q3
vmul.f32 q1, q10, q2
vmul.f32 q0, q11, q2
vmul.f32 q14, q11, q3
vmul.f32 q15, q15, q3
vld1.32 {q2,q3}, [r12, :128]!
vsub.f32 q10, q12, q8
vadd.f32 q11, q0, q9
vadd.f32 q8, q15, q13
vld1.32 {q12,q13}, [r4, :128]
vsub.f32 q9, q1, q14
vsub.f32 q15, q11, q10
vsub.f32 q14, q9, q8
vsub.f32 q4, q12, q15 @
vadd.f32 q6, q12, q15 @
vadd.f32 q5, q13, q14 @
vsub.f32 q7, q13, q14 @
vld1.32 {q14,q15}, [r9, :128]
vld1.32 {q12,q13}, [r7, :128]
vmul.f32 q1, q14, q2
vmul.f32 q0, q14, q3
vst1.32 {q4,q5}, [r4, :128]
vmul.f32 q14, q15, q3
vmul.f32 q4, q15, q2
vadd.f32 q15, q9, q8
vst1.32 {q6,q7}, [r6, :128]
vmul.f32 q8, q12, q3
vmul.f32 q5, q13, q3
vmul.f32 q12, q12, q2
vmul.f32 q9, q13, q2
vadd.f32 q14, q14, q1
vsub.f32 q13, q4, q0
vadd.f32 q0, q9, q8
vld1.32 {q8,q9}, [r3, :128]
vadd.f32 q1, q11, q10
vsub.f32 q12, q12, q5
vadd.f32 q11, q8, q15
vsub.f32 q8, q8, q15
vadd.f32 q2, q12, q14
vsub.f32 q10, q0, q13
vadd.f32 q15, q0, q13
vadd.f32 q13, q9, q1
vsub.f32 q9, q9, q1
vsub.f32 q12, q12, q14
vadd.f32 q0, q11, q2
vadd.f32 q1, q13, q15
vsub.f32 q4, q11, q2
vsub.f32 q2, q8, q10 @
vadd.f32 q3, q9, q12 @
vst1.32 {q0,q1}, [r3, :128]!
vsub.f32 q5, q13, q15
vld1.32 {q14,q15}, [r10, :128]
vsub.f32 q7, q9, q12 @
vld1.32 {q12,q13}, [r8, :128]
vst1.32 {q2,q3}, [r5, :128]!
vld1.32 {q2,q3}, [r12, :128]!
vadd.f32 q6, q8, q10 @
vmul.f32 q8, q14, q2
vst1.32 {q4,q5}, [r7, :128]!
vmul.f32 q10, q15, q3
vmul.f32 q9, q13, q3
vmul.f32 q11, q12, q2
vmul.f32 q14, q14, q3
vst1.32 {q6,q7}, [r9, :128]!
vmul.f32 q15, q15, q2
vmul.f32 q12, q12, q3
vmul.f32 q13, q13, q2
vadd.f32 q10, q10, q8
vsub.f32 q11, q11, q9
vld1.32 {q8,q9}, [r4, :128]
vsub.f32 q14, q15, q14
vadd.f32 q15, q13, q12
vadd.f32 q13, q11, q10
vadd.f32 q12, q15, q14
vsub.f32 q15, q15, q14
vsub.f32 q14, q11, q10
vld1.32 {q10,q11}, [r6, :128]
vadd.f32 q0, q8, q13
vadd.f32 q1, q9, q12
vsub.f32 q2, q10, q15 @
vadd.f32 q3, q11, q14 @
vsub.f32 q4, q8, q13
vst1.32 {q0,q1}, [r4, :128]!
vsub.f32 q5, q9, q12
vadd.f32 q6, q10, q15 @
vst1.32 {q2,q3}, [r6, :128]!
vsub.f32 q7, q11, q14 @
vst1.32 {q4,q5}, [r8, :128]!
vst1.32 {q6,q7}, [r10, :128]!
bne neon_x8_loop
bx lr
.align 4
#ifdef __APPLE__
.globl _neon_x8_t
_neon_x8_t:
#else
.globl neon_x8_t
neon_x8_t:
#endif
mov r11, #0
add r3, r0, #0 @ data0
add r5, r0, r1, lsl #1 @ data2
add r4, r0, r1 @ data1
add r7, r5, r1, lsl #1 @ data4
add r6, r5, r1 @ data3
add r9, r7, r1, lsl #1 @ data6
add r8, r7, r1 @ data5
add r10, r9, r1 @ data7
add r12, r2, #0 @ LUT
sub r11, r11, r1, lsr #5
neon_x8_t_loop:
vld1.32 {q2,q3}, [r12, :128]!
vld1.32 {q14,q15}, [r6, :128]
vld1.32 {q10,q11}, [r5, :128]
adds r11, r11, #1
vmul.f32 q12, q15, q2
vmul.f32 q8, q14, q3
vmul.f32 q13, q14, q2
vmul.f32 q9, q10, q3
vmul.f32 q1, q10, q2
vmul.f32 q0, q11, q2
vmul.f32 q14, q11, q3
vmul.f32 q15, q15, q3
vld1.32 {q2,q3}, [r12, :128]!
vsub.f32 q10, q12, q8
vadd.f32 q11, q0, q9
vadd.f32 q8, q15, q13
vld1.32 {q12,q13}, [r4, :128]
vsub.f32 q9, q1, q14
vsub.f32 q15, q11, q10
vsub.f32 q14, q9, q8
vsub.f32 q4, q12, q15 @
vadd.f32 q6, q12, q15 @
vadd.f32 q5, q13, q14 @
vsub.f32 q7, q13, q14 @
vld1.32 {q14,q15}, [r9, :128]
vld1.32 {q12,q13}, [r7, :128]
vmul.f32 q1, q14, q2
vmul.f32 q0, q14, q3
vst1.32 {q4,q5}, [r4, :128]
vmul.f32 q14, q15, q3
vmul.f32 q4, q15, q2
vadd.f32 q15, q9, q8
vst1.32 {q6,q7}, [r6, :128]
vmul.f32 q8, q12, q3
vmul.f32 q5, q13, q3
vmul.f32 q12, q12, q2
vmul.f32 q9, q13, q2
vadd.f32 q14, q14, q1
vsub.f32 q13, q4, q0
vadd.f32 q0, q9, q8
vld1.32 {q8,q9}, [r3, :128]
vadd.f32 q1, q11, q10
vsub.f32 q12, q12, q5
vadd.f32 q11, q8, q15
vsub.f32 q8, q8, q15
vadd.f32 q2, q12, q14
vsub.f32 q10, q0, q13
vadd.f32 q15, q0, q13
vadd.f32 q13, q9, q1
vsub.f32 q9, q9, q1
vsub.f32 q12, q12, q14
vadd.f32 q0, q11, q2
vadd.f32 q1, q13, q15
vsub.f32 q4, q11, q2
vsub.f32 q2, q8, q10 @
vadd.f32 q3, q9, q12 @
vst2.32 {q0,q1}, [r3, :128]!
vsub.f32 q5, q13, q15
vld1.32 {q14,q15}, [r10, :128]
vsub.f32 q7, q9, q12 @
vld1.32 {q12,q13}, [r8, :128]
vst2.32 {q2,q3}, [r5, :128]!
vld1.32 {q2,q3}, [r12, :128]!
vadd.f32 q6, q8, q10 @
vmul.f32 q8, q14, q2
vst2.32 {q4,q5}, [r7, :128]!
vmul.f32 q10, q15, q3
vmul.f32 q9, q13, q3
vmul.f32 q11, q12, q2
vmul.f32 q14, q14, q3
vst2.32 {q6,q7}, [r9, :128]!
vmul.f32 q15, q15, q2
vmul.f32 q12, q12, q3
vmul.f32 q13, q13, q2
vadd.f32 q10, q10, q8
vsub.f32 q11, q11, q9
vld1.32 {q8,q9}, [r4, :128]
vsub.f32 q14, q15, q14
vadd.f32 q15, q13, q12
vadd.f32 q13, q11, q10
vadd.f32 q12, q15, q14
vsub.f32 q15, q15, q14
vsub.f32 q14, q11, q10
vld1.32 {q10,q11}, [r6, :128]
vadd.f32 q0, q8, q13
vadd.f32 q1, q9, q12
vsub.f32 q2, q10, q15 @
vadd.f32 q3, q11, q14 @
vsub.f32 q4, q8, q13
vst2.32 {q0,q1}, [r4, :128]!
vsub.f32 q5, q9, q12
vadd.f32 q6, q10, q15 @
vst2.32 {q2,q3}, [r6, :128]!
vsub.f32 q7, q11, q14 @
vst2.32 {q4,q5}, [r8, :128]!
vst2.32 {q6,q7}, [r10, :128]!
bne neon_x8_t_loop
@bx lr
@ assumes r0 = out
@ r1 = in ?
@
@ r12 = offsets
@ r3-r10 = data pointers
@ r11 = loop iterations
@ r2 & lr = temps
.align 4
#ifdef __APPLE__
.globl _neon_ee
_neon_ee:
#else
.globl neon_ee
neon_ee:
#endif
vld1.32 {d16, d17}, [r2, :128]
_neon_ee_loop:
vld2.32 {q15}, [r10, :128]!
vld2.32 {q13}, [r8, :128]!
vld2.32 {q14}, [r7, :128]!
vld2.32 {q9}, [r4, :128]!
vld2.32 {q10}, [r3, :128]!
vld2.32 {q11}, [r6, :128]!
vld2.32 {q12}, [r5, :128]!
vsub.f32 q1, q14, q13
vld2.32 {q0}, [r9, :128]!
subs r11, r11, #1
vsub.f32 q2, q0, q15
vadd.f32 q0, q0, q15
vmul.f32 d10, d2, d17
vmul.f32 d11, d3, d16
vmul.f32 d12, d3, d17
vmul.f32 d6, d4, d17
vmul.f32 d7, d5, d16
vmul.f32 d8, d4, d16
vmul.f32 d9, d5, d17
vmul.f32 d13, d2, d16
vsub.f32 d7, d7, d6
vadd.f32 d11, d11, d10
vsub.f32 q1, q12, q11
vsub.f32 q2, q10, q9
vadd.f32 d6, d9, d8
vadd.f32 q4, q14, q13
vadd.f32 q11, q12, q11
vadd.f32 q12, q10, q9
vsub.f32 d10, d13, d12
vsub.f32 q7, q4, q0
vsub.f32 q9, q12, q11
vsub.f32 q13, q5, q3
vadd.f32 d29, d5, d2 @
vadd.f32 q5, q5, q3
vadd.f32 q10, q4, q0
vadd.f32 q11, q12, q11
vsub.f32 d31, d5, d2 @
vsub.f32 d28, d4, d3 @
vadd.f32 d30, d4, d3 @
vadd.f32 d5, d19, d14 @-
vadd.f32 d7, d31, d26 @-
vadd.f32 q1, q14, q5
vadd.f32 q0, q11, q10
vsub.f32 d6, d30, d27 @-
vsub.f32 d4, d18, d15 @-
vsub.f32 d13, d19, d14 @-
vadd.f32 d12, d18, d15 @-
vsub.f32 d15, d31, d26 @-
ldr r2, [r12], #4
vtrn.32 q1, q3
ldr lr, [r12], #4
vtrn.32 q0, q2
add r2, r0, r2, lsl #2
vsub.f32 q4, q11, q10
add lr, r0, lr, lsl #2
vsub.f32 q5, q14, q5
vadd.f32 d14, d30, d27 @-
vst2.32 {q0,q1}, [r2, :128]!
vst2.32 {q2,q3}, [lr, :128]!
vtrn.32 q4, q6
vtrn.32 q5, q7
vst2.32 {q4,q5}, [r2, :128]!
vst2.32 {q6,q7}, [lr, :128]!
bne _neon_ee_loop
@ assumes r0 = out
@
@ r12 = offsets
@ r3-r10 = data pointers
@ r11 = loop iterations
@ r2 & lr = temps
.align 4
#ifdef __APPLE__
.globl _neon_oo
_neon_oo:
#else
.globl neon_oo
neon_oo:
#endif
_neon_oo_loop:
vld2.32 {q8}, [r6, :128]!
vld2.32 {q9}, [r5, :128]!
vld2.32 {q10}, [r4, :128]!
vld2.32 {q13}, [r3, :128]!
vadd.f32 q11, q9, q8
vsub.f32 q8, q9, q8
vsub.f32 q9, q13, q10
vadd.f32 q12, q13, q10
subs r11, r11, #1
vld2.32 {q10}, [r7, :128]!
vld2.32 {q13}, [r9, :128]!
vsub.f32 q2, q12, q11
vsub.f32 d7, d19, d16 @
vadd.f32 d3, d19, d16 @
vadd.f32 d6, d18, d17 @
vsub.f32 d2, d18, d17 @
vld2.32 {q9}, [r8, :128]!
vld2.32 {q8}, [r10, :128]!
vadd.f32 q0, q12, q11
vadd.f32 q11, q13, q8
vadd.f32 q12, q10, q9
vsub.f32 q8, q13, q8
vsub.f32 q9, q10, q9
vsub.f32 q6, q12, q11
vadd.f32 q4, q12, q11
vtrn.32 q0, q2
ldr r2, [r12], #4
vsub.f32 d15, d19, d16 @
ldr lr, [r12], #4
vadd.f32 d11, d19, d16 @
vadd.f32 d14, d18, d17 @
vsub.f32 d10, d18, d17 @
add r2, r0, r2, lsl #2
vtrn.32 q1, q3
add lr, r0, lr, lsl #2
vst2.32 {q0,q1}, [r2, :128]!
vst2.32 {q2,q3}, [lr, :128]!
vtrn.32 q4, q6
vtrn.32 q5, q7
vst2.32 {q4,q5}, [r2, :128]!
vst2.32 {q6,q7}, [lr, :128]!
bne _neon_oo_loop
@ assumes r0 = out
@
@ r12 = offsets
@ r3-r10 = data pointers
@ r11 = addr of twiddle
@ r2 & lr = temps
.align 4
#ifdef __APPLE__
.globl _neon_eo
_neon_eo:
#else
.globl neon_eo
neon_eo:
#endif
vld2.32 {q9}, [r5, :128]! @tag2
vld2.32 {q13}, [r3, :128]! @tag0
vld2.32 {q12}, [r4, :128]! @tag1
vld2.32 {q0}, [r7, :128]! @tag4
vsub.f32 q11, q13, q12
vld2.32 {q8}, [r6, :128]! @tag3
vadd.f32 q12, q13, q12
vsub.f32 q10, q9, q8
vadd.f32 q8, q9, q8
vadd.f32 q9, q12, q8
vadd.f32 d9, d23, d20 @
vsub.f32 d11, d23, d20 @
vsub.f32 q8, q12, q8
vsub.f32 d8, d22, d21 @
vadd.f32 d10, d22, d21 @
ldr r2, [r12], #4
vld1.32 {d20, d21}, [r11, :128]
ldr lr, [r12], #4
vtrn.32 q9, q4
add r2, r0, r2, lsl #2
vtrn.32 q8, q5
add lr, r0, lr, lsl #2
vswp d9,d10
vst1.32 {d8,d9,d10,d11}, [lr, :128]!
vld2.32 {q13}, [r10, :128]! @tag7
vld2.32 {q15}, [r9, :128]! @tag6
vld2.32 {q11}, [r8, :128]! @tag5
vsub.f32 q14, q15, q13
vsub.f32 q12, q0, q11
vadd.f32 q11, q0, q11
vadd.f32 q13, q15, q13
vadd.f32 d13, d29, d24 @
vadd.f32 q15, q13, q11
vsub.f32 d12, d28, d25 @
vsub.f32 d15, d29, d24 @
vadd.f32 d14, d28, d25 @
vtrn.32 q15, q6
vsub.f32 q15, q13, q11
vtrn.32 q15, q7
vswp d13, d14
vst1.32 {d12,d13,d14,d15}, [lr, :128]!
vtrn.32 q13, q14
vtrn.32 q11, q12
vmul.f32 d24, d26, d21
vmul.f32 d28, d27, d20
vmul.f32 d25, d26, d20
vmul.f32 d26, d27, d21
vmul.f32 d27, d22, d21
vmul.f32 d30, d23, d20
vmul.f32 d29, d23, d21
vmul.f32 d22, d22, d20
vsub.f32 d21, d28, d24
vadd.f32 d20, d26, d25
vadd.f32 d25, d30, d27
vsub.f32 d24, d22, d29
vadd.f32 q11, q12, q10
vsub.f32 q10, q12, q10
vadd.f32 q0, q9, q11
vsub.f32 q2, q9, q11
vadd.f32 d3, d17, d20 @
vsub.f32 d7, d17, d20 @
vsub.f32 d2, d16, d21 @
vadd.f32 d6, d16, d21 @
vswp d1, d2
vswp d5, d6
vstmia r2!, {q0-q3}
@ assumes r0 = out
@
@ r12 = offsets
@ r3-r10 = data pointers
@ r11 = addr of twiddle
@ r2 & lr = temps
.align 4
#ifdef __APPLE__
.globl _neon_oe
_neon_oe:
#else
.globl neon_oe
neon_oe:
#endif
vld1.32 {q8}, [r5, :128]!
vld1.32 {q10}, [r6, :128]!
vld2.32 {q11}, [r4, :128]!
vld2.32 {q13}, [r3, :128]!
vld2.32 {q15}, [r10, :128]!
vorr d25, d17, d17
vorr d24, d20, d20
vorr d20, d16, d16
vsub.f32 q9, q13, q11
vadd.f32 q11, q13, q11
ldr r2, [r12], #4
vtrn.32 d24, d25
ldr lr, [r12], #4
vtrn.32 d20, d21
add r2, r0, r2, lsl #2
vsub.f32 q8, q10, q12
add lr, r0, lr, lsl #2
vadd.f32 q10, q10, q12
vadd.f32 q0, q11, q10
vadd.f32 d25, d19, d16 @
vsub.f32 d27, d19, d16 @
vsub.f32 q1, q11, q10
vsub.f32 d24, d18, d17 @
vadd.f32 d26, d18, d17 @
vtrn.32 q0, q12
vtrn.32 q1, q13
vld1.32 {d24, d25}, [r11, :128]
vswp d1, d2
vst1.32 {q0, q1}, [r2, :128]!
vld2.32 {q0}, [r9, :128]!
vadd.f32 q1, q0, q15
vld2.32 {q13}, [r8, :128]!
vld2.32 {q14}, [r7, :128]!
vsub.f32 q15, q0, q15
vsub.f32 q0, q14, q13
vadd.f32 q3, q14, q13
vadd.f32 q2, q3, q1
vadd.f32 d29, d1, d30 @
vsub.f32 d27, d1, d30 @
vsub.f32 q3, q3, q1
vsub.f32 d28, d0, d31 @
vadd.f32 d26, d0, d31 @
vtrn.32 q2, q14
vtrn.32 q3, q13
vswp d5, d6
vst1.32 {q2, q3}, [r2, :128]!
vtrn.32 q11, q9
vtrn.32 q10, q8
vmul.f32 d20, d18, d25
vmul.f32 d22, d19, d24
vmul.f32 d21, d19, d25
vmul.f32 d18, d18, d24
vmul.f32 d19, d16, d25
vmul.f32 d30, d17, d24
vmul.f32 d23, d16, d24
vmul.f32 d24, d17, d25
vadd.f32 d17, d22, d20
vsub.f32 d16, d18, d21
vsub.f32 d21, d30, d19
vadd.f32 d20, d24, d23
vadd.f32 q9, q8, q10
vsub.f32 q8, q8, q10
vadd.f32 q4, q14, q9
vsub.f32 q6, q14, q9
vadd.f32 d11, d27, d16 @
vsub.f32 d15, d27, d16 @
vsub.f32 d10, d26, d17 @
vadd.f32 d14, d26, d17 @
vswp d9, d10
vswp d13, d14
vstmia lr!, {q4-q7}
.align 4
#ifdef __APPLE__
.globl _neon_end
_neon_end:
#else
.globl neon_end
neon_end:
#endif
bx lr
.align 4
#ifdef __APPLE__
.globl _neon_transpose
_neon_transpose:
#else
.globl neon_transpose
neon_transpose:
#endif
push {r4-r8}
@ vpush {q8-q9}
mov r5, r3
_neon_transpose_col:
mov r7, r1
add r8, r1, r3, lsl #3
mov r4, r2
add r6, r0, r2, lsl #3
_neon_transpose_row:
vld1.32 {q8,q9}, [r0, :128]!
@ vld1.32 {q10,q11}, [r0, :128]!
vld1.32 {q12,q13}, [r6, :128]!
@ vld1.32 {q14,q15}, [r6, :128]!
sub r4, r4, #4
cmp r4, #0
vswp d17,d24
vswp d19,d26
vswp d21,d28
vswp d23,d30
vst1.32 {q8}, [r7, :128]
vst1.32 {q12}, [r8, :128]
add r7, r7, r3, lsl #4
add r8, r8, r3, lsl #4
vst1.32 {q9}, [r7, :128]
vst1.32 {q13}, [r8, :128]
add r7, r7, r3, lsl #4
add r8, r8, r3, lsl #4
@@vst1.32 {q10}, [r7, :128]
@@vst1.32 {q14}, [r8, :128]
@@add r7, r7, r3, lsl #4
@@add r8, r8, r3, lsl #4
@@vst1.32 {q11}, [r7, :128]
@@vst1.32 {q15}, [r8, :128]
@@add r7, r7, r3, lsl #4
@@add r8, r8, r3, lsl #4
bne _neon_transpose_row
sub r5, r5, #2
cmp r5, #0
add r0, r0, r2, lsl #3
add r1, r1, #16
bne _neon_transpose_col
@ vpop {q8-q9}
pop {r4-r8}
bx lr
.align 4
#ifdef __APPLE__
.globl _neon_transpose_to_buf
_neon_transpose_to_buf:
#else
.globl neon_transpose_to_buf
neon_transpose_to_buf:
#endif
push {r4-r10}
mov r5, #8
_neon_transpose_to_buf_col:
mov r4, #8
add r6, r0, r2, lsl #3
mov r7, r1
add r8, r1, #64
add r9, r1, #128
add r10, r1, #192
_neon_transpose_to_buf_row:
vld1.32 {q8,q9}, [r0, :128]!
vld1.32 {q12,q13}, [r6, :128]!
sub r4, r4, #4
cmp r4, #0
vswp d17,d24
vswp d19,d26
vst1.32 {q8}, [r7, :128]
vst1.32 {q12}, [r8, :128]
vst1.32 {q9}, [r9, :128]
vst1.32 {q13}, [r10, :128]
add r7, r7, #256
add r8, r8, #256
add r9, r9, #256
add r10, r10, #256
bne _neon_transpose_to_buf_row
sub r5, r5, #2
cmp r5, #0
sub r0, r0, #64
add r0, r0, r2, lsl #4
add r1, r1, #16
bne _neon_transpose_to_buf_col
pop {r4-r10}
bx lr

File diff suppressed because it is too large Load Diff

@ -0,0 +1,956 @@
/*
This file is part of FFTS -- The Fastest Fourier Transform in the South
Copyright (c) 2012, Anthony M. Blake <amb@anthonix.com>
Copyright (c) 2012, The University of Waikato
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of the organization nor the
names of its contributors may be used to endorse or promote products
derived from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
DISCLAIMED. IN NO EVENT SHALL ANTHONY M. BLAKE BE LIABLE FOR ANY
DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
.align 4
#ifdef __APPLE__
.globl _neon_static_e_f
_neon_static_e_f:
#else
.globl neon_static_e_f
neon_static_e_f:
#endif
push {r4, r5, r6, r7, r8, r9, r10, r11, lr}
vstmdb sp!, {d8-d15}
ldr lr, [r0, #40] @ this is p->N
add r3, r1, #0
add r7, r1, lr
add r5, r7, lr
add r10, r5, lr
add r4, r10, lr
add r8, r4, lr
add r6, r8, lr
add r9, r6, lr
ldr r12, [r0]
add r1, r0, #0
add r0, r2, #0
ldr r2, [r1, #16] @ this is p->ee_ws
ldr r11, [r1, #28] @ this is p->i0
vld1.32 {d16, d17}, [r2, :128]
_neon_ee_loop:
vld2.32 {q15}, [r10, :128]!
vld2.32 {q13}, [r8, :128]!
vld2.32 {q14}, [r7, :128]!
vld2.32 {q9}, [r4, :128]!
vld2.32 {q10}, [r3, :128]!
vld2.32 {q11}, [r6, :128]!
vld2.32 {q12}, [r5, :128]!
vsub.f32 q1, q14, q13
vld2.32 {q0}, [r9, :128]!
subs r11, r11, #1
vsub.f32 q2, q0, q15
vadd.f32 q0, q0, q15
vmul.f32 d10, d2, d17
vmul.f32 d11, d3, d16
vmul.f32 d12, d3, d17
vmul.f32 d6, d4, d17
vmul.f32 d7, d5, d16
vmul.f32 d8, d4, d16
vmul.f32 d9, d5, d17
vmul.f32 d13, d2, d16
vsub.f32 d7, d7, d6
vadd.f32 d11, d11, d10
vsub.f32 q1, q12, q11
vsub.f32 q2, q10, q9
vadd.f32 d6, d9, d8
vadd.f32 q4, q14, q13
vadd.f32 q11, q12, q11
vadd.f32 q12, q10, q9
vsub.f32 d10, d13, d12
vsub.f32 q7, q4, q0
vsub.f32 q9, q12, q11
vsub.f32 q13, q5, q3
vsub.f32 d29, d5, d2 @
vadd.f32 q5, q5, q3
vadd.f32 q10, q4, q0
vadd.f32 q11, q12, q11
vadd.f32 d31, d5, d2 @
vadd.f32 d28, d4, d3 @
vsub.f32 d30, d4, d3 @
vsub.f32 d5, d19, d14 @
vsub.f32 d7, d31, d26 @
vadd.f32 q1, q14, q5
vadd.f32 q0, q11, q10
vadd.f32 d6, d30, d27 @
vadd.f32 d4, d18, d15 @
vadd.f32 d13, d19, d14 @
vsub.f32 d12, d18, d15 @
vadd.f32 d15, d31, d26 @
ldr r2, [r12], #4
vtrn.32 q1, q3
ldr lr, [r12], #4
vtrn.32 q0, q2
add r2, r0, r2, lsl #2
vsub.f32 q4, q11, q10
add lr, r0, lr, lsl #2
vsub.f32 q5, q14, q5
vsub.f32 d14, d30, d27 @
vst2.32 {q0,q1}, [r2, :128]!
vst2.32 {q2,q3}, [lr, :128]!
vtrn.32 q4, q6
vtrn.32 q5, q7
vst2.32 {q4,q5}, [r2, :128]!
vst2.32 {q6,q7}, [lr, :128]!
bne _neon_ee_loop
ldr r11, [r1, #12]
vld2.32 {q9}, [r5, :128]! @tag2
vld2.32 {q13}, [r3, :128]! @tag0
vld2.32 {q12}, [r4, :128]! @tag1
vld2.32 {q0}, [r7, :128]! @tag4
vsub.f32 q11, q13, q12
vld2.32 {q8}, [r6, :128]! @tag3
vadd.f32 q12, q13, q12
vsub.f32 q10, q9, q8
vadd.f32 q8, q9, q8
vadd.f32 q9, q12, q8
vsub.f32 d9, d23, d20 @
vadd.f32 d11, d23, d20 @
vsub.f32 q8, q12, q8
vadd.f32 d8, d22, d21 @
vsub.f32 d10, d22, d21 @
ldr r2, [r12], #4
vld1.32 {d20, d21}, [r11, :128]
ldr lr, [r12], #4
vtrn.32 q9, q4
add r2, r0, r2, lsl #2
vtrn.32 q8, q5
add lr, r0, lr, lsl #2
vswp d9,d10
vst1.32 {d8,d9,d10,d11}, [lr, :128]!
vld2.32 {q13}, [r10, :128]! @tag7
vld2.32 {q15}, [r9, :128]! @tag6
vld2.32 {q11}, [r8, :128]! @tag5
vsub.f32 q14, q15, q13
vsub.f32 q12, q0, q11
vadd.f32 q11, q0, q11
vadd.f32 q13, q15, q13
vsub.f32 d13, d29, d24 @
vadd.f32 q15, q13, q11
vadd.f32 d12, d28, d25 @
vadd.f32 d15, d29, d24 @
vsub.f32 d14, d28, d25 @
vtrn.32 q15, q6
vsub.f32 q15, q13, q11
vtrn.32 q15, q7
vswp d13, d14
vst1.32 {d12,d13,d14,d15}, [lr, :128]!
vtrn.32 q13, q14
vtrn.32 q11, q12
vmul.f32 d24, d26, d21
vmul.f32 d28, d27, d20
vmul.f32 d25, d26, d20
vmul.f32 d26, d27, d21
vmul.f32 d27, d22, d21
vmul.f32 d30, d23, d20
vmul.f32 d29, d23, d21
vmul.f32 d22, d22, d20
vsub.f32 d21, d28, d24
vadd.f32 d20, d26, d25
vadd.f32 d25, d30, d27
vsub.f32 d24, d22, d29
vadd.f32 q11, q12, q10
vsub.f32 q10, q12, q10
vadd.f32 q0, q9, q11
vsub.f32 q2, q9, q11
vsub.f32 d3, d17, d20 @
vadd.f32 d7, d17, d20 @
vadd.f32 d2, d16, d21 @
vsub.f32 d6, d16, d21 @
vswp d1, d2
vswp d5, d6
vstmia r2!, {q0-q3}
add r2, r7, #0
add r7, r9, #0
add r9, r2, #0
add r2, r8, #0
add r8, r10, #0
add r10, r2, #0
ldr r11, [r1, #32] @ this is p->i1
cmp r11, #0
beq _neon_oo_loop_exit
_neon_oo_loop:
vld2.32 {q8}, [r6, :128]!
vld2.32 {q9}, [r5, :128]!
vld2.32 {q10}, [r4, :128]!
vld2.32 {q13}, [r3, :128]!
vadd.f32 q11, q9, q8
vsub.f32 q8, q9, q8
vsub.f32 q9, q13, q10
vadd.f32 q12, q13, q10
subs r11, r11, #1
vld2.32 {q10}, [r7, :128]!
vld2.32 {q13}, [r9, :128]!
vsub.f32 q2, q12, q11
vadd.f32 d7, d19, d16 @
vsub.f32 d3, d19, d16 @
vsub.f32 d6, d18, d17 @
vadd.f32 d2, d18, d17 @
vld2.32 {q9}, [r8, :128]!
vld2.32 {q8}, [r10, :128]!
vadd.f32 q0, q12, q11
vadd.f32 q11, q13, q8
vadd.f32 q12, q10, q9
vsub.f32 q8, q13, q8
vsub.f32 q9, q10, q9
vsub.f32 q6, q12, q11
vadd.f32 q4, q12, q11
vtrn.32 q0, q2
ldr r2, [r12], #4
vadd.f32 d15, d19, d16 @
ldr lr, [r12], #4
vsub.f32 d11, d19, d16 @
vsub.f32 d14, d18, d17 @
vadd.f32 d10, d18, d17 @
add r2, r0, r2, lsl #2
vtrn.32 q1, q3
add lr, r0, lr, lsl #2
vst2.32 {q0,q1}, [r2, :128]!
vst2.32 {q2,q3}, [lr, :128]!
vtrn.32 q4, q6
vtrn.32 q5, q7
vst2.32 {q4,q5}, [r2, :128]!
vst2.32 {q6,q7}, [lr, :128]!
bne _neon_oo_loop
_neon_oo_loop_exit:
add r2, r3, #0
add r3, r7, #0
add r7, r2, #0
add r2, r4, #0
add r4, r8, #0
add r8, r2, #0
add r2, r5, #0
add r5, r9, #0
add r9, r2, #0
add r2, r6, #0
add r6, r10, #0
add r10, r2, #0
add r2, r9, #0
add r9, r10, #0
add r10, r2, #0
ldr r2, [r1, #16]
ldr r11, [r1, #32] @ this is p->i1
cmp r11, #0
beq _neon_ee_loop2_exit
vld1.32 {d16, d17}, [r2, :128]
_neon_ee_loop2:
vld2.32 {q15}, [r10, :128]!
vld2.32 {q13}, [r8, :128]!
vld2.32 {q14}, [r7, :128]!
vld2.32 {q9}, [r4, :128]!
vld2.32 {q10}, [r3, :128]!
vld2.32 {q11}, [r6, :128]!
vld2.32 {q12}, [r5, :128]!
vsub.f32 q1, q14, q13
vld2.32 {q0}, [r9, :128]!
subs r11, r11, #1
vsub.f32 q2, q0, q15
vadd.f32 q0, q0, q15
vmul.f32 d10, d2, d17
vmul.f32 d11, d3, d16
vmul.f32 d12, d3, d17
vmul.f32 d6, d4, d17
vmul.f32 d7, d5, d16
vmul.f32 d8, d4, d16
vmul.f32 d9, d5, d17
vmul.f32 d13, d2, d16
vsub.f32 d7, d7, d6
vadd.f32 d11, d11, d10
vsub.f32 q1, q12, q11
vsub.f32 q2, q10, q9
vadd.f32 d6, d9, d8
vadd.f32 q4, q14, q13
vadd.f32 q11, q12, q11
vadd.f32 q12, q10, q9
vsub.f32 d10, d13, d12
vsub.f32 q7, q4, q0
vsub.f32 q9, q12, q11
vsub.f32 q13, q5, q3
vsub.f32 d29, d5, d2 @
vadd.f32 q5, q5, q3
vadd.f32 q10, q4, q0
vadd.f32 q11, q12, q11
vadd.f32 d31, d5, d2 @
vadd.f32 d28, d4, d3 @
vsub.f32 d30, d4, d3 @
vsub.f32 d5, d19, d14 @
vsub.f32 d7, d31, d26 @
vadd.f32 q1, q14, q5
vadd.f32 q0, q11, q10
vadd.f32 d6, d30, d27 @
vadd.f32 d4, d18, d15 @
vadd.f32 d13, d19, d14 @
vsub.f32 d12, d18, d15 @
vadd.f32 d15, d31, d26 @
ldr r2, [r12], #4
vtrn.32 q1, q3
ldr lr, [r12], #4
vtrn.32 q0, q2
add r2, r0, r2, lsl #2
vsub.f32 q4, q11, q10
add lr, r0, lr, lsl #2
vsub.f32 q5, q14, q5
vsub.f32 d14, d30, d27 @
vst2.32 {q0,q1}, [r2, :128]!
vst2.32 {q2,q3}, [lr, :128]!
vtrn.32 q4, q6
vtrn.32 q5, q7
vst2.32 {q4,q5}, [r2, :128]!
vst2.32 {q6,q7}, [lr, :128]!
bne _neon_ee_loop2
_neon_ee_loop2_exit:
vldmia sp!, {d8-d15}
pop {r4, r5, r6, r7, r8, r9, r10, r11, pc}
.align 4
#ifdef __APPLE__
.globl _neon_static_o_f
_neon_static_o_f:
#else
.globl neon_static_o_f
neon_static_o_f:
#endif
push {r4, r5, r6, r7, r8, r9, r10, r11, lr}
vstmdb sp!, {d8-d15}
ldr lr, [r0, #40] @ this is p->N
add r3, r1, #0
add r7, r1, lr
add r5, r7, lr
add r10, r5, lr
add r4, r10, lr
add r8, r4, lr
add r6, r8, lr
add r9, r6, lr
ldr r12, [r0]
add r1, r0, #0
add r0, r2, #0
ldr r2, [r1, #16] @ this is p->ee_ws
ldr r11, [r1, #28] @ this is p->i0
vld1.32 {d16, d17}, [r2, :128]
_neon_ee_o_loop:
vld2.32 {q15}, [r10, :128]!
vld2.32 {q13}, [r8, :128]!
vld2.32 {q14}, [r7, :128]!
vld2.32 {q9}, [r4, :128]!
vld2.32 {q10}, [r3, :128]!
vld2.32 {q11}, [r6, :128]!
vld2.32 {q12}, [r5, :128]!
vsub.f32 q1, q14, q13
vld2.32 {q0}, [r9, :128]!
subs r11, r11, #1
vsub.f32 q2, q0, q15
vadd.f32 q0, q0, q15
vmul.f32 d10, d2, d17
vmul.f32 d11, d3, d16
vmul.f32 d12, d3, d17
vmul.f32 d6, d4, d17
vmul.f32 d7, d5, d16
vmul.f32 d8, d4, d16
vmul.f32 d9, d5, d17
vmul.f32 d13, d2, d16
vsub.f32 d7, d7, d6
vadd.f32 d11, d11, d10
vsub.f32 q1, q12, q11
vsub.f32 q2, q10, q9
vadd.f32 d6, d9, d8
vadd.f32 q4, q14, q13
vadd.f32 q11, q12, q11
vadd.f32 q12, q10, q9
vsub.f32 d10, d13, d12
vsub.f32 q7, q4, q0
vsub.f32 q9, q12, q11
vsub.f32 q13, q5, q3
vsub.f32 d29, d5, d2 @
vadd.f32 q5, q5, q3
vadd.f32 q10, q4, q0
vadd.f32 q11, q12, q11
vadd.f32 d31, d5, d2 @
vadd.f32 d28, d4, d3 @
vsub.f32 d30, d4, d3 @
vsub.f32 d5, d19, d14 @
vsub.f32 d7, d31, d26 @
vadd.f32 q1, q14, q5
vadd.f32 q0, q11, q10
vadd.f32 d6, d30, d27 @
vadd.f32 d4, d18, d15 @
vadd.f32 d13, d19, d14 @
vsub.f32 d12, d18, d15 @
vadd.f32 d15, d31, d26 @
ldr r2, [r12], #4
vtrn.32 q1, q3
ldr lr, [r12], #4
vtrn.32 q0, q2
add r2, r0, r2, lsl #2
vsub.f32 q4, q11, q10
add lr, r0, lr, lsl #2
vsub.f32 q5, q14, q5
vsub.f32 d14, d30, d27 @
vst2.32 {q0,q1}, [r2, :128]!
vst2.32 {q2,q3}, [lr, :128]!
vtrn.32 q4, q6
vtrn.32 q5, q7
vst2.32 {q4,q5}, [r2, :128]!
vst2.32 {q6,q7}, [lr, :128]!
bne _neon_ee_o_loop
add r2, r7, #0
add r7, r9, #0
add r9, r2, #0
add r2, r8, #0
add r8, r10, #0
add r10, r2, #0
ldr r11, [r1, #32] @ this is p->i1
cmp r11, #0
beq _neon_oo_o_loop_exit
_neon_oo_o_loop:
vld2.32 {q8}, [r6, :128]!
vld2.32 {q9}, [r5, :128]!
vld2.32 {q10}, [r4, :128]!
vld2.32 {q13}, [r3, :128]!
vadd.f32 q11, q9, q8
vsub.f32 q8, q9, q8
vsub.f32 q9, q13, q10
vadd.f32 q12, q13, q10
subs r11, r11, #1
vld2.32 {q10}, [r7, :128]!
vld2.32 {q13}, [r9, :128]!
vsub.f32 q2, q12, q11
vadd.f32 d7, d19, d16 @
vsub.f32 d3, d19, d16 @
vsub.f32 d6, d18, d17 @
vadd.f32 d2, d18, d17 @
vld2.32 {q9}, [r8, :128]!
vld2.32 {q8}, [r10, :128]!
vadd.f32 q0, q12, q11
vadd.f32 q11, q13, q8
vadd.f32 q12, q10, q9
vsub.f32 q8, q13, q8
vsub.f32 q9, q10, q9
vsub.f32 q6, q12, q11
vadd.f32 q4, q12, q11
vtrn.32 q0, q2
ldr r2, [r12], #4
vadd.f32 d15, d19, d16 @
ldr lr, [r12], #4
vsub.f32 d11, d19, d16 @
vsub.f32 d14, d18, d17 @
vadd.f32 d10, d18, d17 @
add r2, r0, r2, lsl #2
vtrn.32 q1, q3
add lr, r0, lr, lsl #2
vst2.32 {q0,q1}, [r2, :128]!
vst2.32 {q2,q3}, [lr, :128]!
vtrn.32 q4, q6
vtrn.32 q5, q7
vst2.32 {q4,q5}, [r2, :128]!
vst2.32 {q6,q7}, [lr, :128]!
bne _neon_oo_o_loop
_neon_oo_o_loop_exit:
ldr r11, [r1, #8]
vld1.32 {q8}, [r5, :128]!
vld1.32 {q10}, [r6, :128]!
vld2.32 {q11}, [r4, :128]!
vld2.32 {q13}, [r3, :128]!
vld2.32 {q15}, [r10, :128]!
vorr d25, d17, d17
vorr d24, d20, d20
vorr d20, d16, d16
vsub.f32 q9, q13, q11
vadd.f32 q11, q13, q11
ldr r2, [r12], #4
vtrn.32 d24, d25
ldr lr, [r12], #4
vtrn.32 d20, d21
add r2, r0, r2, lsl #2
vsub.f32 q8, q10, q12
add lr, r0, lr, lsl #2
vadd.f32 q10, q10, q12
vadd.f32 q0, q11, q10
vsub.f32 d25, d19, d16 @
vadd.f32 d27, d19, d16 @
vsub.f32 q1, q11, q10
vadd.f32 d24, d18, d17 @
vsub.f32 d26, d18, d17 @
vtrn.32 q0, q12
vtrn.32 q1, q13
vld1.32 {d24, d25}, [r11, :128]
vswp d1, d2
vst1.32 {q0, q1}, [r2, :128]!
vld2.32 {q0}, [r9, :128]!
vadd.f32 q1, q0, q15
vld2.32 {q13}, [r8, :128]!
vld2.32 {q14}, [r7, :128]!
vsub.f32 q15, q0, q15
vsub.f32 q0, q14, q13
vadd.f32 q3, q14, q13
vadd.f32 q2, q3, q1
vsub.f32 d29, d1, d30 @
vadd.f32 d27, d1, d30 @
vsub.f32 q3, q3, q1
vadd.f32 d28, d0, d31 @
vsub.f32 d26, d0, d31 @
vtrn.32 q2, q14
vtrn.32 q3, q13
vswp d5, d6
vst1.32 {q2, q3}, [r2, :128]!
vtrn.32 q11, q9
vtrn.32 q10, q8
vmul.f32 d20, d18, d25
vmul.f32 d22, d19, d24
vmul.f32 d21, d19, d25
vmul.f32 d18, d18, d24
vmul.f32 d19, d16, d25
vmul.f32 d30, d17, d24
vmul.f32 d23, d16, d24
vmul.f32 d24, d17, d25
vadd.f32 d17, d22, d20
vsub.f32 d16, d18, d21
vsub.f32 d21, d30, d19
vadd.f32 d20, d24, d23
vadd.f32 q9, q8, q10
vsub.f32 q8, q8, q10
vadd.f32 q4, q14, q9
vsub.f32 q6, q14, q9
vsub.f32 d11, d27, d16 @
vadd.f32 d15, d27, d16 @
vadd.f32 d10, d26, d17 @
vsub.f32 d14, d26, d17 @
vswp d9, d10
vswp d13, d14
vstmia lr!, {q4-q7}
add r2, r3, #0
add r3, r7, #0
add r7, r2, #0
add r2, r4, #0
add r4, r8, #0
add r8, r2, #0
add r2, r5, #0
add r5, r9, #0
add r9, r2, #0
add r2, r6, #0
add r6, r10, #0
add r10, r2, #0
add r2, r9, #0
add r9, r10, #0
add r10, r2, #0
ldr r2, [r1, #16]
ldr r11, [r1, #32] @ this is p->i1
cmp r11, #0
beq _neon_ee_o_loop2_exit
vld1.32 {d16, d17}, [r2, :128]
_neon_ee_o_loop2:
vld2.32 {q15}, [r10, :128]!
vld2.32 {q13}, [r8, :128]!
vld2.32 {q14}, [r7, :128]!
vld2.32 {q9}, [r4, :128]!
vld2.32 {q10}, [r3, :128]!
vld2.32 {q11}, [r6, :128]!
vld2.32 {q12}, [r5, :128]!
vsub.f32 q1, q14, q13
vld2.32 {q0}, [r9, :128]!
subs r11, r11, #1
vsub.f32 q2, q0, q15
vadd.f32 q0, q0, q15
vmul.f32 d10, d2, d17
vmul.f32 d11, d3, d16
vmul.f32 d12, d3, d17
vmul.f32 d6, d4, d17
vmul.f32 d7, d5, d16
vmul.f32 d8, d4, d16
vmul.f32 d9, d5, d17
vmul.f32 d13, d2, d16
vsub.f32 d7, d7, d6
vadd.f32 d11, d11, d10
vsub.f32 q1, q12, q11
vsub.f32 q2, q10, q9
vadd.f32 d6, d9, d8
vadd.f32 q4, q14, q13
vadd.f32 q11, q12, q11
vadd.f32 q12, q10, q9
vsub.f32 d10, d13, d12
vsub.f32 q7, q4, q0
vsub.f32 q9, q12, q11
vsub.f32 q13, q5, q3
vsub.f32 d29, d5, d2 @
vadd.f32 q5, q5, q3
vadd.f32 q10, q4, q0
vadd.f32 q11, q12, q11
vadd.f32 d31, d5, d2 @
vadd.f32 d28, d4, d3 @
vsub.f32 d30, d4, d3 @
vsub.f32 d5, d19, d14 @
vsub.f32 d7, d31, d26 @
vadd.f32 q1, q14, q5
vadd.f32 q0, q11, q10
vadd.f32 d6, d30, d27 @
vadd.f32 d4, d18, d15 @
vadd.f32 d13, d19, d14 @
vsub.f32 d12, d18, d15 @
vadd.f32 d15, d31, d26 @
ldr r2, [r12], #4
vtrn.32 q1, q3
ldr lr, [r12], #4
vtrn.32 q0, q2
add r2, r0, r2, lsl #2
vsub.f32 q4, q11, q10
add lr, r0, lr, lsl #2
vsub.f32 q5, q14, q5
vsub.f32 d14, d30, d27 @
vst2.32 {q0,q1}, [r2, :128]!
vst2.32 {q2,q3}, [lr, :128]!
vtrn.32 q4, q6
vtrn.32 q5, q7
vst2.32 {q4,q5}, [r2, :128]!
vst2.32 {q6,q7}, [lr, :128]!
bne _neon_ee_o_loop2
_neon_ee_o_loop2_exit:
vldmia sp!, {d8-d15}
pop {r4, r5, r6, r7, r8, r9, r10, r11, pc}
.align 4
#ifdef __APPLE__
.globl _neon_static_x4_f
_neon_static_x4_f:
#else
.globl neon_static_x4_f
neon_static_x4_f:
#endif
@ add r3, r0, #0
push {r4, r5, r6, lr}
vstmdb sp!, {d8-d15}
vld1.32 {q8,q9}, [r0, :128]
add r4, r0, r1, lsl #1
vld1.32 {q10,q11}, [r4, :128]
add r5, r0, r1, lsl #2
vld1.32 {q12,q13}, [r5, :128]
add r6, r4, r1, lsl #2
vld1.32 {q14,q15}, [r6, :128]
vld1.32 {q2,q3}, [r2, :128]
vmul.f32 q0, q13, q3
vmul.f32 q5, q12, q2
vmul.f32 q1, q14, q2
vmul.f32 q4, q14, q3
vmul.f32 q14, q12, q3
vmul.f32 q13, q13, q2
vmul.f32 q12, q15, q3
vmul.f32 q2, q15, q2
vsub.f32 q0, q5, q0
vadd.f32 q13, q13, q14
vadd.f32 q12, q12, q1
vsub.f32 q1, q2, q4
vadd.f32 q15, q0, q12
vsub.f32 q12, q0, q12
vadd.f32 q14, q13, q1
vsub.f32 q13, q13, q1
vadd.f32 q0, q8, q15
vadd.f32 q1, q9, q14
vadd.f32 q2, q10, q13 @
vsub.f32 q4, q8, q15
vsub.f32 q3, q11, q12 @
vst1.32 {q0,q1}, [r0, :128]
vsub.f32 q5, q9, q14
vsub.f32 q6, q10, q13 @
vadd.f32 q7, q11, q12 @
vst1.32 {q2,q3}, [r4, :128]
vst1.32 {q4,q5}, [r5, :128]
vst1.32 {q6,q7}, [r6, :128]
vldmia sp!, {d8-d15}
pop {r4, r5, r6, pc}
.align 4
#ifdef __APPLE__
.globl _neon_static_x8_f
_neon_static_x8_f:
#else
.globl neon_static_x8_f
neon_static_x8_f:
#endif
push {r4, r5, r6, r7, r8, r9, r10, r11, lr}
vstmdb sp!, {d8-d15}
mov r11, #0
add r3, r0, #0 @ data0
add r5, r0, r1, lsl #1 @ data2
add r4, r0, r1 @ data1
add r7, r5, r1, lsl #1 @ data4
add r6, r5, r1 @ data3
add r9, r7, r1, lsl #1 @ data6
add r8, r7, r1 @ data5
add r10, r9, r1 @ data7
add r12, r2, #0 @ LUT
sub r11, r11, r1, lsr #5
neon_x8_loop:
vld1.32 {q2,q3}, [r12, :128]!
vld1.32 {q14,q15}, [r6, :128]
vld1.32 {q10,q11}, [r5, :128]
adds r11, r11, #1
vmul.f32 q12, q15, q2
vmul.f32 q8, q14, q3
vmul.f32 q13, q14, q2
vmul.f32 q9, q10, q3
vmul.f32 q1, q10, q2
vmul.f32 q0, q11, q2
vmul.f32 q14, q11, q3
vmul.f32 q15, q15, q3
vld1.32 {q2,q3}, [r12, :128]!
vsub.f32 q10, q12, q8
vadd.f32 q11, q0, q9
vadd.f32 q8, q15, q13
vld1.32 {q12,q13}, [r4, :128]
vsub.f32 q9, q1, q14
vsub.f32 q15, q11, q10
vsub.f32 q14, q9, q8
vadd.f32 q4, q12, q15 @
vsub.f32 q6, q12, q15 @
vsub.f32 q5, q13, q14 @
vadd.f32 q7, q13, q14 @
vld1.32 {q14,q15}, [r9, :128]
vld1.32 {q12,q13}, [r7, :128]
vmul.f32 q1, q14, q2
vmul.f32 q0, q14, q3
vst1.32 {q4,q5}, [r4, :128]
vmul.f32 q14, q15, q3
vmul.f32 q4, q15, q2
vadd.f32 q15, q9, q8
vst1.32 {q6,q7}, [r6, :128]
vmul.f32 q8, q12, q3
vmul.f32 q5, q13, q3
vmul.f32 q12, q12, q2
vmul.f32 q9, q13, q2
vadd.f32 q14, q14, q1
vsub.f32 q13, q4, q0
vadd.f32 q0, q9, q8
vld1.32 {q8,q9}, [r3, :128]
vadd.f32 q1, q11, q10
vsub.f32 q12, q12, q5
vadd.f32 q11, q8, q15
vsub.f32 q8, q8, q15
vadd.f32 q2, q12, q14
vsub.f32 q10, q0, q13
vadd.f32 q15, q0, q13
vadd.f32 q13, q9, q1
vsub.f32 q9, q9, q1
vsub.f32 q12, q12, q14
vadd.f32 q0, q11, q2
vadd.f32 q1, q13, q15
vsub.f32 q4, q11, q2
vadd.f32 q2, q8, q10 @
vsub.f32 q3, q9, q12 @
vst1.32 {q0,q1}, [r3, :128]!
vsub.f32 q5, q13, q15
vld1.32 {q14,q15}, [r10, :128]
vadd.f32 q7, q9, q12 @
vld1.32 {q12,q13}, [r8, :128]
vst1.32 {q2,q3}, [r5, :128]!
vld1.32 {q2,q3}, [r12, :128]!
vsub.f32 q6, q8, q10 @
vmul.f32 q8, q14, q2
vst1.32 {q4,q5}, [r7, :128]!
vmul.f32 q10, q15, q3
vmul.f32 q9, q13, q3
vmul.f32 q11, q12, q2
vmul.f32 q14, q14, q3
vst1.32 {q6,q7}, [r9, :128]!
vmul.f32 q15, q15, q2
vmul.f32 q12, q12, q3
vmul.f32 q13, q13, q2
vadd.f32 q10, q10, q8
vsub.f32 q11, q11, q9
vld1.32 {q8,q9}, [r4, :128]
vsub.f32 q14, q15, q14
vadd.f32 q15, q13, q12
vadd.f32 q13, q11, q10
vadd.f32 q12, q15, q14
vsub.f32 q15, q15, q14
vsub.f32 q14, q11, q10
vld1.32 {q10,q11}, [r6, :128]
vadd.f32 q0, q8, q13
vadd.f32 q1, q9, q12
vadd.f32 q2, q10, q15 @
vsub.f32 q3, q11, q14 @
vsub.f32 q4, q8, q13
vst1.32 {q0,q1}, [r4, :128]!
vsub.f32 q5, q9, q12
vsub.f32 q6, q10, q15 @
vst1.32 {q2,q3}, [r6, :128]!
vadd.f32 q7, q11, q14 @
vst1.32 {q4,q5}, [r8, :128]!
vst1.32 {q6,q7}, [r10, :128]!
bne neon_x8_loop
vldmia sp!, {d8-d15}
pop {r4, r5, r6, r7, r8, r9, r10, r11, pc}
.align 4
#ifdef __APPLE__
.globl _neon_static_x8_t_f
_neon_static_x8_t_f:
#else
.globl neon_static_x8_t_f
neon_static_x8_t_f:
#endif
push {r4, r5, r6, r7, r8, r9, r10, r11, lr}
vstmdb sp!, {d8-d15}
mov r11, #0
add r3, r0, #0 @ data0
add r5, r0, r1, lsl #1 @ data2
add r4, r0, r1 @ data1
add r7, r5, r1, lsl #1 @ data4
add r6, r5, r1 @ data3
add r9, r7, r1, lsl #1 @ data6
add r8, r7, r1 @ data5
add r10, r9, r1 @ data7
add r12, r2, #0 @ LUT
sub r11, r11, r1, lsr #5
neon_x8_t_loop:
vld1.32 {q2,q3}, [r12, :128]!
vld1.32 {q14,q15}, [r6, :128]
vld1.32 {q10,q11}, [r5, :128]
adds r11, r11, #1
vmul.f32 q12, q15, q2
vmul.f32 q8, q14, q3
vmul.f32 q13, q14, q2
vmul.f32 q9, q10, q3
vmul.f32 q1, q10, q2
vmul.f32 q0, q11, q2
vmul.f32 q14, q11, q3
vmul.f32 q15, q15, q3
vld1.32 {q2,q3}, [r12, :128]!
vsub.f32 q10, q12, q8
vadd.f32 q11, q0, q9
vadd.f32 q8, q15, q13
vld1.32 {q12,q13}, [r4, :128]
vsub.f32 q9, q1, q14
vsub.f32 q15, q11, q10
vsub.f32 q14, q9, q8
vadd.f32 q4, q12, q15 @
vsub.f32 q6, q12, q15 @
vsub.f32 q5, q13, q14 @
vadd.f32 q7, q13, q14 @
vld1.32 {q14,q15}, [r9, :128]
vld1.32 {q12,q13}, [r7, :128]
vmul.f32 q1, q14, q2
vmul.f32 q0, q14, q3
vst1.32 {q4,q5}, [r4, :128]
vmul.f32 q14, q15, q3
vmul.f32 q4, q15, q2
vadd.f32 q15, q9, q8
vst1.32 {q6,q7}, [r6, :128]
vmul.f32 q8, q12, q3
vmul.f32 q5, q13, q3
vmul.f32 q12, q12, q2
vmul.f32 q9, q13, q2
vadd.f32 q14, q14, q1
vsub.f32 q13, q4, q0
vadd.f32 q0, q9, q8
vld1.32 {q8,q9}, [r3, :128]
vadd.f32 q1, q11, q10
vsub.f32 q12, q12, q5
vadd.f32 q11, q8, q15
vsub.f32 q8, q8, q15
vadd.f32 q2, q12, q14
vsub.f32 q10, q0, q13
vadd.f32 q15, q0, q13
vadd.f32 q13, q9, q1
vsub.f32 q9, q9, q1
vsub.f32 q12, q12, q14
vadd.f32 q0, q11, q2
vadd.f32 q1, q13, q15
vsub.f32 q4, q11, q2
vadd.f32 q2, q8, q10 @
vsub.f32 q3, q9, q12 @
vst2.32 {q0,q1}, [r3, :128]!
vsub.f32 q5, q13, q15
vld1.32 {q14,q15}, [r10, :128]
vadd.f32 q7, q9, q12 @
vld1.32 {q12,q13}, [r8, :128]
vst2.32 {q2,q3}, [r5, :128]!
vld1.32 {q2,q3}, [r12, :128]!
vsub.f32 q6, q8, q10 @
vmul.f32 q8, q14, q2
vst2.32 {q4,q5}, [r7, :128]!
vmul.f32 q10, q15, q3
vmul.f32 q9, q13, q3
vmul.f32 q11, q12, q2
vmul.f32 q14, q14, q3
vst2.32 {q6,q7}, [r9, :128]!
vmul.f32 q15, q15, q2
vmul.f32 q12, q12, q3
vmul.f32 q13, q13, q2
vadd.f32 q10, q10, q8
vsub.f32 q11, q11, q9
vld1.32 {q8,q9}, [r4, :128]
vsub.f32 q14, q15, q14
vadd.f32 q15, q13, q12
vadd.f32 q13, q11, q10
vadd.f32 q12, q15, q14
vsub.f32 q15, q15, q14
vsub.f32 q14, q11, q10
vld1.32 {q10,q11}, [r6, :128]
vadd.f32 q0, q8, q13
vadd.f32 q1, q9, q12
vadd.f32 q2, q10, q15 @
vsub.f32 q3, q11, q14 @
vsub.f32 q4, q8, q13
vst2.32 {q0,q1}, [r4, :128]!
vsub.f32 q5, q9, q12
vsub.f32 q6, q10, q15 @
vst2.32 {q2,q3}, [r6, :128]!
vadd.f32 q7, q11, q14 @
vst2.32 {q4,q5}, [r8, :128]!
vst2.32 {q6,q7}, [r10, :128]!
bne neon_x8_t_loop
vldmia sp!, {d8-d15}
pop {r4, r5, r6, r7, r8, r9, r10, r11, pc}

@ -0,0 +1,955 @@
/*
This file is part of FFTS -- The Fastest Fourier Transform in the South
Copyright (c) 2012, Anthony M. Blake <amb@anthonix.com>
Copyright (c) 2012, The University of Waikato
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of the organization nor the
names of its contributors may be used to endorse or promote products
derived from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
DISCLAIMED. IN NO EVENT SHALL ANTHONY M. BLAKE BE LIABLE FOR ANY
DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
.align 4
#ifdef __APPLE__
.globl _neon_static_e_i
_neon_static_e_i:
#else
.globl neon_static_e_i
neon_static_e_i:
#endif
push {r4, r5, r6, r7, r8, r9, r10, r11, lr}
vstmdb sp!, {d8-d15}
ldr lr, [r0, #40] @ this is p->N
add r3, r1, #0
add r7, r1, lr
add r5, r7, lr
add r10, r5, lr
add r4, r10, lr
add r8, r4, lr
add r6, r8, lr
add r9, r6, lr
ldr r12, [r0]
add r1, r0, #0
add r0, r2, #0
ldr r2, [r1, #16] @ this is p->ee_ws
ldr r11, [r1, #28] @ this is p->i0
vld1.32 {d16, d17}, [r2, :128]
_neon_ee_loop:
vld2.32 {q15}, [r10, :128]!
vld2.32 {q13}, [r8, :128]!
vld2.32 {q14}, [r7, :128]!
vld2.32 {q9}, [r4, :128]!
vld2.32 {q10}, [r3, :128]!
vld2.32 {q11}, [r6, :128]!
vld2.32 {q12}, [r5, :128]!
vsub.f32 q1, q14, q13
vld2.32 {q0}, [r9, :128]!
subs r11, r11, #1
vsub.f32 q2, q0, q15
vadd.f32 q0, q0, q15
vmul.f32 d10, d2, d17
vmul.f32 d11, d3, d16
vmul.f32 d12, d3, d17
vmul.f32 d6, d4, d17
vmul.f32 d7, d5, d16
vmul.f32 d8, d4, d16
vmul.f32 d9, d5, d17
vmul.f32 d13, d2, d16
vsub.f32 d7, d7, d6
vadd.f32 d11, d11, d10
vsub.f32 q1, q12, q11
vsub.f32 q2, q10, q9
vadd.f32 d6, d9, d8
vadd.f32 q4, q14, q13
vadd.f32 q11, q12, q11
vadd.f32 q12, q10, q9
vsub.f32 d10, d13, d12
vsub.f32 q7, q4, q0
vsub.f32 q9, q12, q11
vsub.f32 q13, q5, q3
vadd.f32 d29, d5, d2 @
vadd.f32 q5, q5, q3
vadd.f32 q10, q4, q0
vadd.f32 q11, q12, q11
vsub.f32 d31, d5, d2 @
vsub.f32 d28, d4, d3 @
vadd.f32 d30, d4, d3 @
vadd.f32 d5, d19, d14 @
vadd.f32 d7, d31, d26 @
vadd.f32 q1, q14, q5
vadd.f32 q0, q11, q10
vsub.f32 d6, d30, d27 @
vsub.f32 d4, d18, d15 @
vsub.f32 d13, d19, d14 @
vadd.f32 d12, d18, d15 @
vsub.f32 d15, d31, d26 @
ldr r2, [r12], #4
vtrn.32 q1, q3
ldr lr, [r12], #4
vtrn.32 q0, q2
add r2, r0, r2, lsl #2
vsub.f32 q4, q11, q10
add lr, r0, lr, lsl #2
vsub.f32 q5, q14, q5
vadd.f32 d14, d30, d27 @
vst2.32 {q0,q1}, [r2, :128]!
vst2.32 {q2,q3}, [lr, :128]!
vtrn.32 q4, q6
vtrn.32 q5, q7
vst2.32 {q4,q5}, [r2, :128]!
vst2.32 {q6,q7}, [lr, :128]!
bne _neon_ee_loop
ldr r11, [r1, #12]
vld2.32 {q9}, [r5, :128]! @tag2
vld2.32 {q13}, [r3, :128]! @tag0
vld2.32 {q12}, [r4, :128]! @tag1
vld2.32 {q0}, [r7, :128]! @tag4
vsub.f32 q11, q13, q12
vld2.32 {q8}, [r6, :128]! @tag3
vadd.f32 q12, q13, q12
vsub.f32 q10, q9, q8
vadd.f32 q8, q9, q8
vadd.f32 q9, q12, q8
vadd.f32 d9, d23, d20 @
vsub.f32 d11, d23, d20 @
vsub.f32 q8, q12, q8
vsub.f32 d8, d22, d21 @
vadd.f32 d10, d22, d21 @
ldr r2, [r12], #4
vld1.32 {d20, d21}, [r11, :128]
ldr lr, [r12], #4
vtrn.32 q9, q4
add r2, r0, r2, lsl #2
vtrn.32 q8, q5
add lr, r0, lr, lsl #2
vswp d9,d10
vst1.32 {d8,d9,d10,d11}, [lr, :128]!
vld2.32 {q13}, [r10, :128]! @tag7
vld2.32 {q15}, [r9, :128]! @tag6
vld2.32 {q11}, [r8, :128]! @tag5
vsub.f32 q14, q15, q13
vsub.f32 q12, q0, q11
vadd.f32 q11, q0, q11
vadd.f32 q13, q15, q13
vadd.f32 d13, d29, d24 @
vadd.f32 q15, q13, q11
vsub.f32 d12, d28, d25 @
vsub.f32 d15, d29, d24 @
vadd.f32 d14, d28, d25 @
vtrn.32 q15, q6
vsub.f32 q15, q13, q11
vtrn.32 q15, q7
vswp d13, d14
vst1.32 {d12,d13,d14,d15}, [lr, :128]!
vtrn.32 q13, q14
vtrn.32 q11, q12
vmul.f32 d24, d26, d21
vmul.f32 d28, d27, d20
vmul.f32 d25, d26, d20
vmul.f32 d26, d27, d21
vmul.f32 d27, d22, d21
vmul.f32 d30, d23, d20
vmul.f32 d29, d23, d21
vmul.f32 d22, d22, d20
vsub.f32 d21, d28, d24
vadd.f32 d20, d26, d25
vadd.f32 d25, d30, d27
vsub.f32 d24, d22, d29
vadd.f32 q11, q12, q10
vsub.f32 q10, q12, q10
vadd.f32 q0, q9, q11
vsub.f32 q2, q9, q11
vadd.f32 d3, d17, d20 @
vsub.f32 d7, d17, d20 @
vsub.f32 d2, d16, d21 @
vadd.f32 d6, d16, d21 @
vswp d1, d2
vswp d5, d6
vstmia r2!, {q0-q3}
add r2, r7, #0
add r7, r9, #0
add r9, r2, #0
add r2, r8, #0
add r8, r10, #0
add r10, r2, #0
ldr r11, [r1, #32] @ this is p->i1
cmp r11, #0
beq _neon_oo_loop_exit
_neon_oo_loop:
vld2.32 {q8}, [r6, :128]!
vld2.32 {q9}, [r5, :128]!
vld2.32 {q10}, [r4, :128]!
vld2.32 {q13}, [r3, :128]!
vadd.f32 q11, q9, q8
vsub.f32 q8, q9, q8
vsub.f32 q9, q13, q10
vadd.f32 q12, q13, q10
subs r11, r11, #1
vld2.32 {q10}, [r7, :128]!
vld2.32 {q13}, [r9, :128]!
vsub.f32 q2, q12, q11
vsub.f32 d7, d19, d16 @
vadd.f32 d3, d19, d16 @
vadd.f32 d6, d18, d17 @
vsub.f32 d2, d18, d17 @
vld2.32 {q9}, [r8, :128]!
vld2.32 {q8}, [r10, :128]!
vadd.f32 q0, q12, q11
vadd.f32 q11, q13, q8
vadd.f32 q12, q10, q9
vsub.f32 q8, q13, q8
vsub.f32 q9, q10, q9
vsub.f32 q6, q12, q11
vadd.f32 q4, q12, q11
vtrn.32 q0, q2
ldr r2, [r12], #4
vsub.f32 d15, d19, d16 @
ldr lr, [r12], #4
vadd.f32 d11, d19, d16 @
vadd.f32 d14, d18, d17 @
vsub.f32 d10, d18, d17 @
add r2, r0, r2, lsl #2
vtrn.32 q1, q3
add lr, r0, lr, lsl #2
vst2.32 {q0,q1}, [r2, :128]!
vst2.32 {q2,q3}, [lr, :128]!
vtrn.32 q4, q6
vtrn.32 q5, q7
vst2.32 {q4,q5}, [r2, :128]!
vst2.32 {q6,q7}, [lr, :128]!
bne _neon_oo_loop
_neon_oo_loop_exit:
add r2, r3, #0
add r3, r7, #0
add r7, r2, #0
add r2, r4, #0
add r4, r8, #0
add r8, r2, #0
add r2, r5, #0
add r5, r9, #0
add r9, r2, #0
add r2, r6, #0
add r6, r10, #0
add r10, r2, #0
add r2, r9, #0
add r9, r10, #0
add r10, r2, #0
ldr r2, [r1, #16]
ldr r11, [r1, #32] @ this is p->i1
cmp r11, #0
beq _neon_ee_loop2_exit
vld1.32 {d16, d17}, [r2, :128]
_neon_ee_loop2:
vld2.32 {q15}, [r10, :128]!
vld2.32 {q13}, [r8, :128]!
vld2.32 {q14}, [r7, :128]!
vld2.32 {q9}, [r4, :128]!
vld2.32 {q10}, [r3, :128]!
vld2.32 {q11}, [r6, :128]!
vld2.32 {q12}, [r5, :128]!
vsub.f32 q1, q14, q13
vld2.32 {q0}, [r9, :128]!
subs r11, r11, #1
vsub.f32 q2, q0, q15
vadd.f32 q0, q0, q15
vmul.f32 d10, d2, d17
vmul.f32 d11, d3, d16
vmul.f32 d12, d3, d17
vmul.f32 d6, d4, d17
vmul.f32 d7, d5, d16
vmul.f32 d8, d4, d16
vmul.f32 d9, d5, d17
vmul.f32 d13, d2, d16
vsub.f32 d7, d7, d6
vadd.f32 d11, d11, d10
vsub.f32 q1, q12, q11
vsub.f32 q2, q10, q9
vadd.f32 d6, d9, d8
vadd.f32 q4, q14, q13
vadd.f32 q11, q12, q11
vadd.f32 q12, q10, q9
vsub.f32 d10, d13, d12
vsub.f32 q7, q4, q0
vsub.f32 q9, q12, q11
vsub.f32 q13, q5, q3
vadd.f32 d29, d5, d2 @
vadd.f32 q5, q5, q3
vadd.f32 q10, q4, q0
vadd.f32 q11, q12, q11
vsub.f32 d31, d5, d2 @
vsub.f32 d28, d4, d3 @
vadd.f32 d30, d4, d3 @
vadd.f32 d5, d19, d14 @
vadd.f32 d7, d31, d26 @
vadd.f32 q1, q14, q5
vadd.f32 q0, q11, q10
vsub.f32 d6, d30, d27 @
vsub.f32 d4, d18, d15 @
vsub.f32 d13, d19, d14 @
vadd.f32 d12, d18, d15 @
vsub.f32 d15, d31, d26 @
ldr r2, [r12], #4
vtrn.32 q1, q3
ldr lr, [r12], #4
vtrn.32 q0, q2
add r2, r0, r2, lsl #2
vsub.f32 q4, q11, q10
add lr, r0, lr, lsl #2
vsub.f32 q5, q14, q5
vadd.f32 d14, d30, d27 @
vst2.32 {q0,q1}, [r2, :128]!
vst2.32 {q2,q3}, [lr, :128]!
vtrn.32 q4, q6
vtrn.32 q5, q7
vst2.32 {q4,q5}, [r2, :128]!
vst2.32 {q6,q7}, [lr, :128]!
bne _neon_ee_loop2
_neon_ee_loop2_exit:
vldmia sp!, {d8-d15}
pop {r4, r5, r6, r7, r8, r9, r10, r11, pc}
.align 4
#ifdef __APPLE__
.globl _neon_static_o_i
_neon_static_o_i:
#else
.globl neon_static_o_i
neon_static_o_i:
#endif
push {r4, r5, r6, r7, r8, r9, r10, r11, lr}
vstmdb sp!, {d8-d15}
ldr lr, [r0, #40] @ this is p->N
add r3, r1, #0
add r7, r1, lr
add r5, r7, lr
add r10, r5, lr
add r4, r10, lr
add r8, r4, lr
add r6, r8, lr
add r9, r6, lr
ldr r12, [r0]
add r1, r0, #0
add r0, r2, #0
ldr r2, [r1, #16] @ this is p->ee_ws
ldr r11, [r1, #28] @ this is p->i0
vld1.32 {d16, d17}, [r2, :128]
_neon_ee_o_loop:
vld2.32 {q15}, [r10, :128]!
vld2.32 {q13}, [r8, :128]!
vld2.32 {q14}, [r7, :128]!
vld2.32 {q9}, [r4, :128]!
vld2.32 {q10}, [r3, :128]!
vld2.32 {q11}, [r6, :128]!
vld2.32 {q12}, [r5, :128]!
vsub.f32 q1, q14, q13
vld2.32 {q0}, [r9, :128]!
subs r11, r11, #1
vsub.f32 q2, q0, q15
vadd.f32 q0, q0, q15
vmul.f32 d10, d2, d17
vmul.f32 d11, d3, d16
vmul.f32 d12, d3, d17
vmul.f32 d6, d4, d17
vmul.f32 d7, d5, d16
vmul.f32 d8, d4, d16
vmul.f32 d9, d5, d17
vmul.f32 d13, d2, d16
vsub.f32 d7, d7, d6
vadd.f32 d11, d11, d10
vsub.f32 q1, q12, q11
vsub.f32 q2, q10, q9
vadd.f32 d6, d9, d8
vadd.f32 q4, q14, q13
vadd.f32 q11, q12, q11
vadd.f32 q12, q10, q9
vsub.f32 d10, d13, d12
vsub.f32 q7, q4, q0
vsub.f32 q9, q12, q11
vsub.f32 q13, q5, q3
vadd.f32 d29, d5, d2 @
vadd.f32 q5, q5, q3
vadd.f32 q10, q4, q0
vadd.f32 q11, q12, q11
vsub.f32 d31, d5, d2 @
vsub.f32 d28, d4, d3 @
vadd.f32 d30, d4, d3 @
vadd.f32 d5, d19, d14 @
vadd.f32 d7, d31, d26 @
vadd.f32 q1, q14, q5
vadd.f32 q0, q11, q10
vsub.f32 d6, d30, d27 @
vsub.f32 d4, d18, d15 @
vsub.f32 d13, d19, d14 @
vadd.f32 d12, d18, d15 @
vsub.f32 d15, d31, d26 @
ldr r2, [r12], #4
vtrn.32 q1, q3
ldr lr, [r12], #4
vtrn.32 q0, q2
add r2, r0, r2, lsl #2
vsub.f32 q4, q11, q10
add lr, r0, lr, lsl #2
vsub.f32 q5, q14, q5
vadd.f32 d14, d30, d27 @
vst2.32 {q0,q1}, [r2, :128]!
vst2.32 {q2,q3}, [lr, :128]!
vtrn.32 q4, q6
vtrn.32 q5, q7
vst2.32 {q4,q5}, [r2, :128]!
vst2.32 {q6,q7}, [lr, :128]!
bne _neon_ee_o_loop
add r2, r7, #0
add r7, r9, #0
add r9, r2, #0
add r2, r8, #0
add r8, r10, #0
add r10, r2, #0
ldr r11, [r1, #32] @ this is p->i1
cmp r11, #0
beq _neon_oo_o_loop_exit
_neon_oo_o_loop:
vld2.32 {q8}, [r6, :128]!
vld2.32 {q9}, [r5, :128]!
vld2.32 {q10}, [r4, :128]!
vld2.32 {q13}, [r3, :128]!
vadd.f32 q11, q9, q8
vsub.f32 q8, q9, q8
vsub.f32 q9, q13, q10
vadd.f32 q12, q13, q10
subs r11, r11, #1
vld2.32 {q10}, [r7, :128]!
vld2.32 {q13}, [r9, :128]!
vsub.f32 q2, q12, q11
vsub.f32 d7, d19, d16 @
vadd.f32 d3, d19, d16 @
vadd.f32 d6, d18, d17 @
vsub.f32 d2, d18, d17 @
vld2.32 {q9}, [r8, :128]!
vld2.32 {q8}, [r10, :128]!
vadd.f32 q0, q12, q11
vadd.f32 q11, q13, q8
vadd.f32 q12, q10, q9
vsub.f32 q8, q13, q8
vsub.f32 q9, q10, q9
vsub.f32 q6, q12, q11
vadd.f32 q4, q12, q11
vtrn.32 q0, q2
ldr r2, [r12], #4
vsub.f32 d15, d19, d16 @
ldr lr, [r12], #4
vadd.f32 d11, d19, d16 @
vadd.f32 d14, d18, d17 @
vsub.f32 d10, d18, d17 @
add r2, r0, r2, lsl #2
vtrn.32 q1, q3
add lr, r0, lr, lsl #2
vst2.32 {q0,q1}, [r2, :128]!
vst2.32 {q2,q3}, [lr, :128]!
vtrn.32 q4, q6
vtrn.32 q5, q7
vst2.32 {q4,q5}, [r2, :128]!
vst2.32 {q6,q7}, [lr, :128]!
bne _neon_oo_o_loop
_neon_oo_o_loop_exit:
ldr r11, [r1, #8]
vld1.32 {q8}, [r5, :128]!
vld1.32 {q10}, [r6, :128]!
vld2.32 {q11}, [r4, :128]!
vld2.32 {q13}, [r3, :128]!
vld2.32 {q15}, [r10, :128]!
vorr d25, d17, d17
vorr d24, d20, d20
vorr d20, d16, d16
vsub.f32 q9, q13, q11
vadd.f32 q11, q13, q11
ldr r2, [r12], #4
vtrn.32 d24, d25
ldr lr, [r12], #4
vtrn.32 d20, d21
add r2, r0, r2, lsl #2
vsub.f32 q8, q10, q12
add lr, r0, lr, lsl #2
vadd.f32 q10, q10, q12
vadd.f32 q0, q11, q10
vadd.f32 d25, d19, d16 @
vsub.f32 d27, d19, d16 @
vsub.f32 q1, q11, q10
vsub.f32 d24, d18, d17 @
vadd.f32 d26, d18, d17 @
vtrn.32 q0, q12
vtrn.32 q1, q13
vld1.32 {d24, d25}, [r11, :128]
vswp d1, d2
vst1.32 {q0, q1}, [r2, :128]!
vld2.32 {q0}, [r9, :128]!
vadd.f32 q1, q0, q15
vld2.32 {q13}, [r8, :128]!
vld2.32 {q14}, [r7, :128]!
vsub.f32 q15, q0, q15
vsub.f32 q0, q14, q13
vadd.f32 q3, q14, q13
vadd.f32 q2, q3, q1
vadd.f32 d29, d1, d30 @
vsub.f32 d27, d1, d30 @
vsub.f32 q3, q3, q1
vsub.f32 d28, d0, d31 @
vadd.f32 d26, d0, d31 @
vtrn.32 q2, q14
vtrn.32 q3, q13
vswp d5, d6
vst1.32 {q2, q3}, [r2, :128]!
vtrn.32 q11, q9
vtrn.32 q10, q8
vmul.f32 d20, d18, d25
vmul.f32 d22, d19, d24
vmul.f32 d21, d19, d25
vmul.f32 d18, d18, d24
vmul.f32 d19, d16, d25
vmul.f32 d30, d17, d24
vmul.f32 d23, d16, d24
vmul.f32 d24, d17, d25
vadd.f32 d17, d22, d20
vsub.f32 d16, d18, d21
vsub.f32 d21, d30, d19
vadd.f32 d20, d24, d23
vadd.f32 q9, q8, q10
vsub.f32 q8, q8, q10
vadd.f32 q4, q14, q9
vsub.f32 q6, q14, q9
vadd.f32 d11, d27, d16 @
vsub.f32 d15, d27, d16 @
vsub.f32 d10, d26, d17 @
vadd.f32 d14, d26, d17 @
vswp d9, d10
vswp d13, d14
vstmia lr!, {q4-q7}
add r2, r3, #0
add r3, r7, #0
add r7, r2, #0
add r2, r4, #0
add r4, r8, #0
add r8, r2, #0
add r2, r5, #0
add r5, r9, #0
add r9, r2, #0
add r2, r6, #0
add r6, r10, #0
add r10, r2, #0
add r2, r9, #0
add r9, r10, #0
add r10, r2, #0
ldr r2, [r1, #16]
ldr r11, [r1, #32] @ this is p->i1
cmp r11, #0
beq _neon_ee_o_loop2_exit
vld1.32 {d16, d17}, [r2, :128]
_neon_ee_o_loop2:
vld2.32 {q15}, [r10, :128]!
vld2.32 {q13}, [r8, :128]!
vld2.32 {q14}, [r7, :128]!
vld2.32 {q9}, [r4, :128]!
vld2.32 {q10}, [r3, :128]!
vld2.32 {q11}, [r6, :128]!
vld2.32 {q12}, [r5, :128]!
vsub.f32 q1, q14, q13
vld2.32 {q0}, [r9, :128]!
subs r11, r11, #1
vsub.f32 q2, q0, q15
vadd.f32 q0, q0, q15
vmul.f32 d10, d2, d17
vmul.f32 d11, d3, d16
vmul.f32 d12, d3, d17
vmul.f32 d6, d4, d17
vmul.f32 d7, d5, d16
vmul.f32 d8, d4, d16
vmul.f32 d9, d5, d17
vmul.f32 d13, d2, d16
vsub.f32 d7, d7, d6
vadd.f32 d11, d11, d10
vsub.f32 q1, q12, q11
vsub.f32 q2, q10, q9
vadd.f32 d6, d9, d8
vadd.f32 q4, q14, q13
vadd.f32 q11, q12, q11
vadd.f32 q12, q10, q9
vsub.f32 d10, d13, d12
vsub.f32 q7, q4, q0
vsub.f32 q9, q12, q11
vsub.f32 q13, q5, q3
vadd.f32 d29, d5, d2 @
vadd.f32 q5, q5, q3
vadd.f32 q10, q4, q0
vadd.f32 q11, q12, q11
vsub.f32 d31, d5, d2 @
vsub.f32 d28, d4, d3 @
vadd.f32 d30, d4, d3 @
vadd.f32 d5, d19, d14 @
vadd.f32 d7, d31, d26 @
vadd.f32 q1, q14, q5
vadd.f32 q0, q11, q10
vsub.f32 d6, d30, d27 @
vsub.f32 d4, d18, d15 @
vsub.f32 d13, d19, d14 @
vadd.f32 d12, d18, d15 @
vsub.f32 d15, d31, d26 @
ldr r2, [r12], #4
vtrn.32 q1, q3
ldr lr, [r12], #4
vtrn.32 q0, q2
add r2, r0, r2, lsl #2
vsub.f32 q4, q11, q10
add lr, r0, lr, lsl #2
vsub.f32 q5, q14, q5
vadd.f32 d14, d30, d27 @
vst2.32 {q0,q1}, [r2, :128]!
vst2.32 {q2,q3}, [lr, :128]!
vtrn.32 q4, q6
vtrn.32 q5, q7
vst2.32 {q4,q5}, [r2, :128]!
vst2.32 {q6,q7}, [lr, :128]!
bne _neon_ee_o_loop2
_neon_ee_o_loop2_exit:
vldmia sp!, {d8-d15}
pop {r4, r5, r6, r7, r8, r9, r10, r11, pc}
.align 4
#ifdef __APPLE__
.globl _neon_static_x4_i
_neon_static_x4_i:
#else
.globl neon_static_x4_i
neon_static_x4_i:
#endif
@ add r3, r0, #0
push {r4, r5, r6, lr}
vstmdb sp!, {d8-d15}
vld1.32 {q8,q9}, [r0, :128]
add r4, r0, r1, lsl #1
vld1.32 {q10,q11}, [r4, :128]
add r5, r0, r1, lsl #2
vld1.32 {q12,q13}, [r5, :128]
add r6, r4, r1, lsl #2
vld1.32 {q14,q15}, [r6, :128]
vld1.32 {q2,q3}, [r2, :128]
vmul.f32 q0, q13, q3
vmul.f32 q5, q12, q2
vmul.f32 q1, q14, q2
vmul.f32 q4, q14, q3
vmul.f32 q14, q12, q3
vmul.f32 q13, q13, q2
vmul.f32 q12, q15, q3
vmul.f32 q2, q15, q2
vsub.f32 q0, q5, q0
vadd.f32 q13, q13, q14
vadd.f32 q12, q12, q1
vsub.f32 q1, q2, q4
vadd.f32 q15, q0, q12
vsub.f32 q12, q0, q12
vadd.f32 q14, q13, q1
vsub.f32 q13, q13, q1
vadd.f32 q0, q8, q15
vadd.f32 q1, q9, q14
vsub.f32 q2, q10, q13 @
vsub.f32 q4, q8, q15
vadd.f32 q3, q11, q12 @
vst1.32 {q0,q1}, [r0, :128]
vsub.f32 q5, q9, q14
vadd.f32 q6, q10, q13 @
vsub.f32 q7, q11, q12 @
vst1.32 {q2,q3}, [r4, :128]
vst1.32 {q4,q5}, [r5, :128]
vst1.32 {q6,q7}, [r6, :128]
vldmia sp!, {d8-d15}
pop {r4, r5, r6, pc}
.align 4
#ifdef __APPLE__
.globl _neon_static_x8_i
_neon_static_x8_i:
#else
.globl neon_static_x8_i
neon_static_x8_i:
#endif
push {r4, r5, r6, r7, r8, r9, r10, r11, lr}
vstmdb sp!, {d8-d15}
mov r11, #0
add r3, r0, #0 @ data0
add r5, r0, r1, lsl #1 @ data2
add r4, r0, r1 @ data1
add r7, r5, r1, lsl #1 @ data4
add r6, r5, r1 @ data3
add r9, r7, r1, lsl #1 @ data6
add r8, r7, r1 @ data5
add r10, r9, r1 @ data7
add r12, r2, #0 @ LUT
sub r11, r11, r1, lsr #5
neon_x8_loop:
vld1.32 {q2,q3}, [r12, :128]!
vld1.32 {q14,q15}, [r6, :128]
vld1.32 {q10,q11}, [r5, :128]
adds r11, r11, #1
vmul.f32 q12, q15, q2
vmul.f32 q8, q14, q3
vmul.f32 q13, q14, q2
vmul.f32 q9, q10, q3
vmul.f32 q1, q10, q2
vmul.f32 q0, q11, q2
vmul.f32 q14, q11, q3
vmul.f32 q15, q15, q3
vld1.32 {q2,q3}, [r12, :128]!
vsub.f32 q10, q12, q8
vadd.f32 q11, q0, q9
vadd.f32 q8, q15, q13
vld1.32 {q12,q13}, [r4, :128]
vsub.f32 q9, q1, q14
vsub.f32 q15, q11, q10
vsub.f32 q14, q9, q8
vsub.f32 q4, q12, q15 @
vadd.f32 q6, q12, q15 @
vadd.f32 q5, q13, q14 @
vsub.f32 q7, q13, q14 @
vld1.32 {q14,q15}, [r9, :128]
vld1.32 {q12,q13}, [r7, :128]
vmul.f32 q1, q14, q2
vmul.f32 q0, q14, q3
vst1.32 {q4,q5}, [r4, :128]
vmul.f32 q14, q15, q3
vmul.f32 q4, q15, q2
vadd.f32 q15, q9, q8
vst1.32 {q6,q7}, [r6, :128]
vmul.f32 q8, q12, q3
vmul.f32 q5, q13, q3
vmul.f32 q12, q12, q2
vmul.f32 q9, q13, q2
vadd.f32 q14, q14, q1
vsub.f32 q13, q4, q0
vadd.f32 q0, q9, q8
vld1.32 {q8,q9}, [r3, :128]
vadd.f32 q1, q11, q10
vsub.f32 q12, q12, q5
vadd.f32 q11, q8, q15
vsub.f32 q8, q8, q15
vadd.f32 q2, q12, q14
vsub.f32 q10, q0, q13
vadd.f32 q15, q0, q13
vadd.f32 q13, q9, q1
vsub.f32 q9, q9, q1
vsub.f32 q12, q12, q14
vadd.f32 q0, q11, q2
vadd.f32 q1, q13, q15
vsub.f32 q4, q11, q2
vsub.f32 q2, q8, q10 @
vadd.f32 q3, q9, q12 @
vst1.32 {q0,q1}, [r3, :128]!
vsub.f32 q5, q13, q15
vld1.32 {q14,q15}, [r10, :128]
vsub.f32 q7, q9, q12 @
vld1.32 {q12,q13}, [r8, :128]
vst1.32 {q2,q3}, [r5, :128]!
vld1.32 {q2,q3}, [r12, :128]!
vadd.f32 q6, q8, q10 @
vmul.f32 q8, q14, q2
vst1.32 {q4,q5}, [r7, :128]!
vmul.f32 q10, q15, q3
vmul.f32 q9, q13, q3
vmul.f32 q11, q12, q2
vmul.f32 q14, q14, q3
vst1.32 {q6,q7}, [r9, :128]!
vmul.f32 q15, q15, q2
vmul.f32 q12, q12, q3
vmul.f32 q13, q13, q2
vadd.f32 q10, q10, q8
vsub.f32 q11, q11, q9
vld1.32 {q8,q9}, [r4, :128]
vsub.f32 q14, q15, q14
vadd.f32 q15, q13, q12
vadd.f32 q13, q11, q10
vadd.f32 q12, q15, q14
vsub.f32 q15, q15, q14
vsub.f32 q14, q11, q10
vld1.32 {q10,q11}, [r6, :128]
vadd.f32 q0, q8, q13
vadd.f32 q1, q9, q12
vsub.f32 q2, q10, q15 @
vadd.f32 q3, q11, q14 @
vsub.f32 q4, q8, q13
vst1.32 {q0,q1}, [r4, :128]!
vsub.f32 q5, q9, q12
vadd.f32 q6, q10, q15 @
vst1.32 {q2,q3}, [r6, :128]!
vsub.f32 q7, q11, q14 @
vst1.32 {q4,q5}, [r8, :128]!
vst1.32 {q6,q7}, [r10, :128]!
bne neon_x8_loop
vldmia sp!, {d8-d15}
pop {r4, r5, r6, r7, r8, r9, r10, r11, pc}
.align 4
#ifdef __APPLE__
.globl _neon_static_x8_t_i
_neon_static_x8_t_i:
#else
.globl neon_static_x8_t_i
neon_static_x8_t_i:
#endif
push {r4, r5, r6, r7, r8, r9, r10, r11, lr}
vstmdb sp!, {d8-d15}
mov r11, #0
add r3, r0, #0 @ data0
add r5, r0, r1, lsl #1 @ data2
add r4, r0, r1 @ data1
add r7, r5, r1, lsl #1 @ data4
add r6, r5, r1 @ data3
add r9, r7, r1, lsl #1 @ data6
add r8, r7, r1 @ data5
add r10, r9, r1 @ data7
add r12, r2, #0 @ LUT
sub r11, r11, r1, lsr #5
neon_x8_t_loop:
vld1.32 {q2,q3}, [r12, :128]!
vld1.32 {q14,q15}, [r6, :128]
vld1.32 {q10,q11}, [r5, :128]
adds r11, r11, #1
vmul.f32 q12, q15, q2
vmul.f32 q8, q14, q3
vmul.f32 q13, q14, q2
vmul.f32 q9, q10, q3
vmul.f32 q1, q10, q2
vmul.f32 q0, q11, q2
vmul.f32 q14, q11, q3
vmul.f32 q15, q15, q3
vld1.32 {q2,q3}, [r12, :128]!
vsub.f32 q10, q12, q8
vadd.f32 q11, q0, q9
vadd.f32 q8, q15, q13
vld1.32 {q12,q13}, [r4, :128]
vsub.f32 q9, q1, q14
vsub.f32 q15, q11, q10
vsub.f32 q14, q9, q8
vsub.f32 q4, q12, q15 @
vadd.f32 q6, q12, q15 @
vadd.f32 q5, q13, q14 @
vsub.f32 q7, q13, q14 @
vld1.32 {q14,q15}, [r9, :128]
vld1.32 {q12,q13}, [r7, :128]
vmul.f32 q1, q14, q2
vmul.f32 q0, q14, q3
vst1.32 {q4,q5}, [r4, :128]
vmul.f32 q14, q15, q3
vmul.f32 q4, q15, q2
vadd.f32 q15, q9, q8
vst1.32 {q6,q7}, [r6, :128]
vmul.f32 q8, q12, q3
vmul.f32 q5, q13, q3
vmul.f32 q12, q12, q2
vmul.f32 q9, q13, q2
vadd.f32 q14, q14, q1
vsub.f32 q13, q4, q0
vadd.f32 q0, q9, q8
vld1.32 {q8,q9}, [r3, :128]
vadd.f32 q1, q11, q10
vsub.f32 q12, q12, q5
vadd.f32 q11, q8, q15
vsub.f32 q8, q8, q15
vadd.f32 q2, q12, q14
vsub.f32 q10, q0, q13
vadd.f32 q15, q0, q13
vadd.f32 q13, q9, q1
vsub.f32 q9, q9, q1
vsub.f32 q12, q12, q14
vadd.f32 q0, q11, q2
vadd.f32 q1, q13, q15
vsub.f32 q4, q11, q2
vsub.f32 q2, q8, q10 @
vadd.f32 q3, q9, q12 @
vst2.32 {q0,q1}, [r3, :128]!
vsub.f32 q5, q13, q15
vld1.32 {q14,q15}, [r10, :128]
vsub.f32 q7, q9, q12 @
vld1.32 {q12,q13}, [r8, :128]
vst2.32 {q2,q3}, [r5, :128]!
vld1.32 {q2,q3}, [r12, :128]!
vadd.f32 q6, q8, q10 @
vmul.f32 q8, q14, q2
vst2.32 {q4,q5}, [r7, :128]!
vmul.f32 q10, q15, q3
vmul.f32 q9, q13, q3
vmul.f32 q11, q12, q2
vmul.f32 q14, q14, q3
vst2.32 {q6,q7}, [r9, :128]!
vmul.f32 q15, q15, q2
vmul.f32 q12, q12, q3
vmul.f32 q13, q13, q2
vadd.f32 q10, q10, q8
vsub.f32 q11, q11, q9
vld1.32 {q8,q9}, [r4, :128]
vsub.f32 q14, q15, q14
vadd.f32 q15, q13, q12
vadd.f32 q13, q11, q10
vadd.f32 q12, q15, q14
vsub.f32 q15, q15, q14
vsub.f32 q14, q11, q10
vld1.32 {q10,q11}, [r6, :128]
vadd.f32 q0, q8, q13
vadd.f32 q1, q9, q12
vsub.f32 q2, q10, q15 @
vadd.f32 q3, q11, q14 @
vsub.f32 q4, q8, q13
vst2.32 {q0,q1}, [r4, :128]!
vsub.f32 q5, q9, q12
vadd.f32 q6, q10, q15 @
vst2.32 {q2,q3}, [r6, :128]!
vsub.f32 q7, q11, q14 @
vst2.32 {q4,q5}, [r8, :128]!
vst2.32 {q6,q7}, [r10, :128]!
bne neon_x8_t_loop
vldmia sp!, {d8-d15}
pop {r4, r5, r6, r7, r8, r9, r10, r11, pc}

@ -0,0 +1,208 @@
/*
This file is part of FFTS -- The Fastest Fourier Transform in the South
Copyright (c) 2012, Anthony M. Blake <amb@anthonix.com>
Copyright (c) 2012, The University of Waikato
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of the organization nor the
names of its contributors may be used to endorse or promote products
derived from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
DISCLAIMED. IN NO EVENT SHALL ANTHONY M. BLAKE BE LIABLE FOR ANY
DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#include "patterns.h"
void permute_addr(int N, int offset, int stride, int *d) {
int i, a[4] = {0,2,1,3};
for(i=0;i<4;i++) {
d[i] = offset + (a[i] << stride);
if(d[i] < 0) d[i] += N;
}
}
void ffts_hardcodedleaf_is_rec(ptrdiff_t **is, int bigN, int N, int poffset, int offset, int stride, int even, int VL) {
if(N > 4) {
ffts_hardcodedleaf_is_rec(is, bigN, N/2, poffset, offset, stride + 1, even, VL);
if(N/4 >= 4) ffts_hardcodedleaf_is_rec(is, bigN, N/4, poffset+(1<<stride),offset+(N/2), stride + 2, 0, VL);
if(N/4 >= 4) ffts_hardcodedleaf_is_rec(is, bigN, N/4, poffset-(1<<stride),offset+(3*N/4), stride + 2, 0, VL);
else {
int temp = poffset+(1<<stride);
if(temp < 0) temp += bigN;
temp *= 2;
if(!(temp % (VL*2))) {
(*is)[0] = poffset+(1<<stride);
(*is)[1] = poffset+(1<<stride)+(1<<(stride+2));
(*is)[2] = poffset-(1<<stride);
(*is)[3] = poffset-(1<<stride)+(1<<(stride+2));
int i;
for(i=0;i<4;i++) if((*is)[i] < 0) (*is)[i] += bigN;
for(i=0;i<4;i++) (*is)[i] *= 2;
*is += 4;
}
}
}else if(N == 4) {
int perm[4];
permute_addr(bigN, poffset, stride, perm);
if(!((perm[0]*2) % (VL*2))) {
int i;
for(i=0;i<4;i++) {
(*is)[i] = perm[i] * 2;
}
*is += 4;
}
}
}
void ffts_init_is(ffts_plan_t *p, int N, int leafN, int VL) {
int i, i0 = N/leafN/3+1, i1=N/leafN/3, i2 = N/leafN/3;
int stride = log(N/leafN)/log(2);
p->is = malloc(N/VL * sizeof(ptrdiff_t));
ptrdiff_t *is = p->is;
if((N/leafN) % 3 > 1) i1++;
for(i=0;i<i0;i++) ffts_hardcodedleaf_is_rec(&is, N, leafN, i, 0, stride, 1, VL);
for(i=i0;i<i0+i1;i++) {
ffts_hardcodedleaf_is_rec(&is, N, leafN/2, i, 0, stride+1, 1, VL);
ffts_hardcodedleaf_is_rec(&is, N, leafN/2, i-(1<<stride), 0, stride+1, 1, VL);
}
for(i=0-i2;i<0;i++) ffts_hardcodedleaf_is_rec(&is, N, leafN, i, 0, stride, 1, VL);
//for(i=0;i<N/VL;i++) {
// printf("%td ", p->is[i]);
// if(i % 16 == 15) printf("\n");
//}
p->i0 = i0; p->i1 = i1;
}
/**
*
*
*/
void ffts_elaborate_offsets(ptrdiff_t *offsets, int leafN, int N, int ioffset, int ooffset, int stride, int even) {
if((even && N == leafN) || (!even && N <= leafN)) {
offsets[2*(ooffset/leafN)] = ioffset*2;
offsets[2*(ooffset/leafN)+1] = ooffset;
}else if(N > 4) {
ffts_elaborate_offsets(offsets, leafN, N/2, ioffset, ooffset, stride+1, even);
ffts_elaborate_offsets(offsets, leafN, N/4, ioffset+(1<<stride), ooffset+N/2, stride+2, 0);
if(N/4 >= leafN)
ffts_elaborate_offsets(offsets, leafN, N/4, ioffset-(1<<stride), ooffset+3*N/4, stride+2, 0);
}
}
int compare_offsets(const void *a, const void *b) {
return ((ptrdiff_t *)a)[0] - ((ptrdiff_t *)b)[0];
}
uint32_t reverse_bits(uint32_t a, int n) {
uint32_t x = 0;
int i;
for(i=0;i<n;i++) {
if(a & (1 << i)) x |= 1 << (n-i-1);
}
return x;
}
void ffts_init_offsets(ffts_plan_t *p, int N, int leafN) {
ptrdiff_t *offsets = malloc(2 * N/leafN * sizeof(ptrdiff_t));
ffts_elaborate_offsets(offsets, leafN, N, 0, 0, 1, 1);
size_t i;
for(i=0;i<2*N/leafN;i+=2) {
if(offsets[i] < 0) offsets[i] = N + offsets[i];
}
qsort(offsets, N/leafN, 2 * sizeof(ptrdiff_t), compare_offsets);
//elaborate_is(p, N, 0, 0, 1);
p->offsets = malloc(N/leafN * sizeof(ptrdiff_t));
for(i=0;i<N/leafN;i++) {
p->offsets[i] = offsets[i*2+1]*2;
}
//for(i=0;i<N/leafN;i++) {
// printf("%4d %4d\n", p->offsets[i], reverse_bits(p->offsets[i], __builtin_ctzl(2*N)));
//}
free(offsets);
}
/*
int tree_count(int N, int leafN, int offset) {
if(N <= leafN) return 0;
int count = 0;
count += tree_count(N/4, leafN, offset);
count += tree_count(N/8, leafN, offset + N/4);
count += tree_count(N/8, leafN, offset + N/4 + N/8);
count += tree_count(N/4, leafN, offset + N/2);
count += tree_count(N/4, leafN, offset + 3*N/4);
return 1 + count;
}
void elaborate_tree(transform_index_t **p, int N, int leafN, int offset) {
if(N <= leafN) return;
elaborate_tree(p, N/4, leafN, offset);
elaborate_tree(p, N/8, leafN, offset + N/4);
elaborate_tree(p, N/8, leafN, offset + N/4 + N/8);
elaborate_tree(p, N/4, leafN, offset + N/2);
elaborate_tree(p, N/4, leafN, offset + 3*N/4);
(*p)[0] = N;
(*p)[1] = offset*2;
(*p)+=2;
}
void ffts_init_tree(ffts_plan_t *p, int N, int leafN) {
int count = tree_count(N, leafN, 0) + 1;
transform_index_t *ps = p->transforms = malloc(count * 2 * sizeof(transform_index_t));
//printf("count = %d\n", count);
elaborate_tree(&ps, N, leafN, 0);
#ifdef __ARM_NEON__
ps -= 2;
#endif
ps[0] = 0;
ps[1] = 0;
//int i;
//for(i=0;i<count;i++) {
// fprintf(stderr, "%lu %lu - %d\n", p->transforms[i*2], p->transforms[i*2+1],
// __builtin_ctzl(p->transforms[i*2]) - 5);
//}
}
*/

@ -0,0 +1,44 @@
/*
This file is part of FFTS -- The Fastest Fourier Transform in the South
Copyright (c) 2012, Anthony M. Blake <amb@anthonix.com>
Copyright (c) 2012, The University of Waikato
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of the organization nor the
names of its contributors may be used to endorse or promote products
derived from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
DISCLAIMED. IN NO EVENT SHALL ANTHONY M. BLAKE BE LIABLE FOR ANY
DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef __PATTERNS_H__
#define __PATTERNS_H__
#include "ffts.h"
void ffts_init_is(ffts_plan_t *p, int N, int leafN, int VL);
void ffts_init_offsets(ffts_plan_t *p, int N, int leafN);
//void ffts_init_tree(ffts_plan_t *p, int N, int leafN);
#endif

@ -0,0 +1,878 @@
/*
This file is part of FFTS -- The Fastest Fourier Transform in the South
Copyright (c) 2012, Anthony M. Blake <amb@anthonix.com>
Copyright (c) 2012, The University of Waikato
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of the organization nor the
names of its contributors may be used to endorse or promote products
derived from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
DISCLAIMED. IN NO EVENT SHALL ANTHONY M. BLAKE BE LIABLE FOR ANY
DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
.globl _neon_x4
.align 4
_neon_x4:
.globl _neon_x8
.align 4
_neon_x8:
.globl _neon_x8_t
.align 4
_neon_x8_t:
#ifdef __APPLE__
.globl _leaf_ee_init
_leaf_ee_init:
#else
.globl leaf_ee_init
leaf_ee_init:
#endif
#lea L_sse_constants(%rip), %r9
movq 0xe0(%rdi), %r9
xorl %eax, %eax
# eax is loop counter (init to 0)
# rcx is loop max count
# rsi is 'in' base pointer
# rdx is 'out' base pointer
# r8 is offsets pointer
# r9 is constants pointer
# scratch: rax r11 r12
# .align 4, 0x90
# _leaf_ee + 9 needs 16 byte alignment
#ifdef __APPLE__
.globl _leaf_ee
_leaf_ee:
#else
.globl leaf_ee
leaf_ee:
#endif
movaps 32(%r9), %xmm0 #83.5
movaps (%r9), %xmm8 #83.5
LEAF_EE_1:
LEAF_EE_const_0:
movaps 0xFECA(%rsi,%rax,4), %xmm7 #83.5
LEAF_EE_const_2:
movaps 0xFECA(%rsi,%rax,4), %xmm12 #83.5
movaps %xmm7, %xmm6 #83.5
LEAF_EE_const_3:
movaps 0xFECA(%rsi,%rax,4), %xmm10 #83.5
movaps %xmm12, %xmm11 #83.5
subps %xmm10, %xmm12 #83.5
addps %xmm10, %xmm11 #83.5
xorps %xmm8, %xmm12 #83.5
LEAF_EE_const_1:
movaps 0xFECA(%rsi,%rax,4), %xmm9 #83.5
LEAF_EE_const_4:
movaps 0xFECA(%rsi,%rax,4), %xmm10 #83.5
addps %xmm9, %xmm6 #83.5
subps %xmm9, %xmm7 #83.5
LEAF_EE_const_5:
movaps 0xFECA(%rsi,%rax,4), %xmm13 #83.5
movaps %xmm10, %xmm9 #83.5
LEAF_EE_const_6:
movaps 0xFECA(%rsi,%rax,4), %xmm3 #83.5
movaps %xmm6, %xmm5 #83.5
LEAF_EE_const_7:
movaps 0xFECA(%rsi,%rax,4), %xmm14 #83.5
movaps %xmm3, %xmm15 #83.5
shufps $177, %xmm12, %xmm12 #83.5
movaps %xmm7, %xmm4 #83.5
movslq (%r8, %rax, 4), %r11 #83.44
subps %xmm13, %xmm10 #83.5
subps %xmm14, %xmm3 #83.5
addps %xmm11, %xmm5 #83.5
subps %xmm11, %xmm6 #83.5
subps %xmm12, %xmm4 #83.5
addps %xmm12, %xmm7 #83.5
addps %xmm13, %xmm9 #83.5
addps %xmm14, %xmm15 #83.5
movaps 16(%r9), %xmm12 #83.5
movaps %xmm9, %xmm1 #83.5
movaps 16(%r9), %xmm11 #83.5
movaps %xmm5, %xmm2 #83.5
mulps %xmm10, %xmm12 #83.5
subps %xmm15, %xmm9 #83.5
addps %xmm15, %xmm1 #83.5
mulps %xmm3, %xmm11 #83.5
addps %xmm1, %xmm2 #83.5
subps %xmm1, %xmm5 #83.5
shufps $177, %xmm10, %xmm10 #83.5
xorps %xmm8, %xmm9 #83.5
shufps $177, %xmm3, %xmm3 #83.5
movaps %xmm6, %xmm1 #83.5
mulps %xmm0, %xmm10 #83.5
movaps %xmm4, %xmm13 #83.5
mulps %xmm0, %xmm3 #83.5
subps %xmm10, %xmm12 #83.5
addps %xmm3, %xmm11 #83.5
movaps %xmm12, %xmm3 #83.5
movaps %xmm7, %xmm14 #83.5
shufps $177, %xmm9, %xmm9 #83.5
subps %xmm11, %xmm12 #83.5
addps %xmm11, %xmm3 #83.5
subps %xmm9, %xmm1 #83.5
addps %xmm9, %xmm6 #83.5
addps %xmm3, %xmm4 #83.5
subps %xmm3, %xmm13 #83.5
xorps %xmm8, %xmm12 #83.5
movaps %xmm2, %xmm3 #83.5
shufps $177, %xmm12, %xmm12 #83.5
movaps %xmm6, %xmm9 #83.5
movslq 8(%r8, %rax, 4), %r12 #83.59
movlhps %xmm4, %xmm3 #83.5
addq $4, %rax
shufps $238, %xmm4, %xmm2 #83.5
movaps %xmm1, %xmm4 #83.5
#movntdq %xmm3, (%rdx,%r11,4) #83.5
subps %xmm12, %xmm7 #83.5
addps %xmm12, %xmm14 #83.5
movlhps %xmm7, %xmm4 #83.5
shufps $238, %xmm7, %xmm1 #83.5
movaps %xmm5, %xmm7 #83.5
movlhps %xmm13, %xmm7 #83.5
movlhps %xmm14, %xmm9 #83.5
shufps $238, %xmm13, %xmm5 #83.5
shufps $238, %xmm14, %xmm6 #83.5
movaps %xmm3, (%rdx,%r11,4) #83.5
movaps %xmm4, 16(%rdx,%r11,4) #83.5
movaps %xmm7, 32(%rdx,%r11,4) #83.5
movaps %xmm9, 48(%rdx,%r11,4) #83.5
movaps %xmm2, (%rdx,%r12,4) #83.5
movaps %xmm1, 16(%rdx,%r12,4) #83.5
movaps %xmm5, 32(%rdx,%r12,4) #83.5
movaps %xmm6, 48(%rdx,%r12,4) #83.5
cmpq %rcx, %rax
jne LEAF_EE_1
# _leaf_oo + 4 needs to be 16 byte aligned
#ifdef __APPLE__
.globl _leaf_oo
_leaf_oo:
#else
.globl leaf_oo
leaf_oo:
#endif
movaps (%r9), %xmm5 #92.7
LEAF_OO_1:
LEAF_OO_const_0:
movaps 0xFECA(%rsi,%rax,4), %xmm4 #93.5
movaps %xmm4, %xmm6 #93.5
LEAF_OO_const_1:
movaps 0xFECA(%rsi,%rax,4), %xmm7 #93.5
LEAF_OO_const_2:
movaps 0xFECA(%rsi,%rax,4), %xmm10 #93.5
addps %xmm7, %xmm6 #93.5
subps %xmm7, %xmm4 #93.5
LEAF_OO_const_3:
movaps 0xFECA(%rsi,%rax,4), %xmm8 #93.5
movaps %xmm10, %xmm9 #93.5
LEAF_OO_const_4:
movaps 0xFECA(%rsi,%rax,4), %xmm1 #93.5
movaps %xmm6, %xmm3 #93.5
LEAF_OO_const_5:
movaps 0xFECA(%rsi,%rax,4), %xmm11 #93.5
movaps %xmm1, %xmm2 #93.5
LEAF_OO_const_6:
movaps 0xFECA(%rsi,%rax,4), %xmm14 #93.5
movaps %xmm4, %xmm15 #93.5
LEAF_OO_const_7:
movaps 0xFECA(%rsi,%rax,4), %xmm12 #93.5
movaps %xmm14, %xmm13 #93.5
movslq (%r8, %rax, 4), %r11 #83.44
subps %xmm8, %xmm10 #93.5
addps %xmm8, %xmm9 #93.5
addps %xmm11, %xmm2 #93.5
subps %xmm12, %xmm14 #93.5
subps %xmm11, %xmm1 #93.5
addps %xmm12, %xmm13 #93.5
addps %xmm9, %xmm3 #93.5
subps %xmm9, %xmm6 #93.5
xorps %xmm5, %xmm10 #93.5
xorps %xmm5, %xmm14 #93.5
shufps $177, %xmm10, %xmm10 #93.5
movaps %xmm2, %xmm9 #93.5
shufps $177, %xmm14, %xmm14 #93.5
movaps %xmm6, %xmm7 #93.5
movslq 8(%r8, %rax, 4), %r12 #83.59
addq $4, %rax #92.18
addps %xmm10, %xmm4 #93.5
addps %xmm13, %xmm9 #93.5
subps %xmm13, %xmm2 #93.5
subps %xmm10, %xmm15 #93.5
movaps %xmm1, %xmm13 #93.5
movaps %xmm2, %xmm8 #93.5
movlhps %xmm4, %xmm7 #93.5
subps %xmm14, %xmm13 #93.5
addps %xmm14, %xmm1 #93.5
shufps $238, %xmm4, %xmm6 #93.5
movaps %xmm3, %xmm14 #93.5
movaps %xmm9, %xmm4 #93.5
movlhps %xmm15, %xmm14 #93.5
movlhps %xmm13, %xmm4 #93.5
movlhps %xmm1, %xmm8 #93.5
shufps $238, %xmm15, %xmm3 #93.5
shufps $238, %xmm13, %xmm9 #93.5
shufps $238, %xmm1, %xmm2 #93.5
movaps %xmm14, (%rdx,%r11,4) #93.5
movaps %xmm7, 16(%rdx,%r11,4) #93.5
movaps %xmm4, 32(%rdx,%r11,4) #93.5
movaps %xmm8, 48(%rdx,%r11,4) #93.5
movaps %xmm3, (%rdx,%r12,4) #93.5
movaps %xmm6, 16(%rdx,%r12,4) #93.5
movaps %xmm9, 32(%rdx,%r12,4) #93.5
movaps %xmm2, 48(%rdx,%r12,4) #93.5
cmpq %rcx, %rax
jne LEAF_OO_1 # Prob 95% #92.14
#ifdef __APPLE__
.globl _leaf_eo
_leaf_eo:
#else
.globl leaf_eo
leaf_eo:
#endif
LEAF_EO_const_0:
movaps 0xFECA(%rsi,%rax,4), %xmm9 #88.5
LEAF_EO_const_2:
movaps 0xFECA(%rsi,%rax,4), %xmm7 #88.5
movaps %xmm9, %xmm11 #88.5
LEAF_EO_const_3:
movaps 0xFECA(%rsi,%rax,4), %xmm5 #88.5
movaps %xmm7, %xmm6 #88.5
LEAF_EO_const_1:
movaps 0xFECA(%rsi,%rax,4), %xmm4 #88.5
subps %xmm5, %xmm7 #88.5
addps %xmm4, %xmm11 #88.5
subps %xmm4, %xmm9 #88.5
addps %xmm5, %xmm6 #88.5
movaps (%r9), %xmm3 #88.5
movaps %xmm11, %xmm10 #88.5
xorps %xmm3, %xmm7 #88.5
movaps %xmm9, %xmm8 #88.5
shufps $177, %xmm7, %xmm7 #88.5
addps %xmm6, %xmm10 #88.5
subps %xmm6, %xmm11 #88.5
subps %xmm7, %xmm8 #88.5
addps %xmm7, %xmm9 #88.5
movslq 8(%r8, %rax, 4), %r12 #83.59
movaps %xmm10, %xmm2 #88.5
movslq (%r8, %rax, 4), %r11 #83.44
movaps %xmm11, %xmm1 #88.5
shufps $238, %xmm8, %xmm10 #88.5
shufps $238, %xmm9, %xmm11 #88.5
movaps %xmm10, (%rdx,%r12,4) #88.5
movaps %xmm11, 16(%rdx,%r12,4) #88.5
LEAF_EO_const_4:
movaps 0xFECA(%rsi,%rax,4), %xmm15 #88.5
LEAF_EO_const_5:
movaps 0xFECA(%rsi,%rax,4), %xmm12 #88.5
movaps %xmm15, %xmm14 #88.5
LEAF_EO_const_6:
movaps 0xFECA(%rsi,%rax,4), %xmm4 #88.5
addps %xmm12, %xmm14 #88.5
subps %xmm12, %xmm15 #88.5
LEAF_EO_const_7:
movaps 0xFECA(%rsi,%rax,4), %xmm13 #88.5
movaps %xmm4, %xmm5 #88.5
movaps %xmm14, %xmm7 #88.5
addps %xmm13, %xmm5 #88.5
subps %xmm13, %xmm4 #88.5
movlhps %xmm8, %xmm2 #88.5
movaps %xmm5, %xmm8 #88.5
movlhps %xmm15, %xmm7 #88.5
xorps %xmm3, %xmm15 #88.5
movaps %xmm5, %xmm6 #88.5
subps %xmm14, %xmm5 #88.5
addps %xmm14, %xmm6 #88.5
movlhps %xmm9, %xmm1 #88.5
movaps %xmm4, %xmm14 #88.5
movlhps %xmm4, %xmm8 #88.5
movaps %xmm1, %xmm12 #88.5
shufps $177, %xmm15, %xmm15 #88.5
movaps 0x30(%r9), %xmm11 #88.5
addq $4, %rax #90.5
subps %xmm15, %xmm14 #88.5
mulps %xmm7, %xmm11 #88.5
addps %xmm15, %xmm4 #88.5
movaps 0x30(%r9), %xmm9 #88.5
movaps 0x40(%r9), %xmm15 #88.5
shufps $177, %xmm7, %xmm7 #88.5
mulps %xmm8, %xmm9 #88.5
mulps %xmm15, %xmm7 #88.5
shufps $177, %xmm8, %xmm8 #88.5
subps %xmm7, %xmm11 #88.5
mulps %xmm15, %xmm8 #88.5
movaps %xmm11, %xmm10 #88.5
addps %xmm8, %xmm9 #88.5
shufps $238, %xmm14, %xmm6 #88.5
subps %xmm9, %xmm11 #88.5
addps %xmm9, %xmm10 #88.5
xorps %xmm3, %xmm11 #88.5
movaps %xmm2, %xmm3 #88.5
shufps $177, %xmm11, %xmm11 #88.5
subps %xmm10, %xmm3 #88.5
addps %xmm10, %xmm2 #88.5
addps %xmm11, %xmm12 #88.5
subps %xmm11, %xmm1 #88.5
shufps $238, %xmm4, %xmm5 #88.5
movaps %xmm5, 48(%rdx,%r12,4) #88.5
movaps %xmm6, 32(%rdx,%r12,4) #88.5
movaps %xmm2, (%rdx,%r11,4) #88.5
movaps %xmm1, 16(%rdx,%r11,4) #88.5
movaps %xmm3, 32(%rdx,%r11,4) #88.5
movaps %xmm12, 48(%rdx,%r11,4) #88.5
#ifdef __APPLE__
.globl _leaf_oe
_leaf_oe:
#else
.globl leaf_oe
leaf_oe:
#endif
movaps (%r9), %xmm0 #59.5
#movaps 0x20(%r9), %xmm1 #59.5
LEAF_OE_const_2:
movaps 0xFECA(%rsi,%rax,4), %xmm6 #70.5
LEAF_OE_const_3:
movaps 0xFECA(%rsi,%rax,4), %xmm8 #70.5
movaps %xmm6, %xmm10 #70.5
shufps $228, %xmm8, %xmm10 #70.5
movaps %xmm10, %xmm9 #70.5
shufps $228, %xmm6, %xmm8 #70.5
LEAF_OE_const_0:
movaps 0xFECA(%rsi,%rax,4), %xmm12 #70.5
LEAF_OE_const_1:
movaps 0xFECA(%rsi,%rax,4), %xmm7 #70.5
movaps %xmm12, %xmm14 #70.5
movslq (%r8, %rax, 4), %r11 #83.44
addps %xmm8, %xmm9 #70.5
subps %xmm8, %xmm10 #70.5
addps %xmm7, %xmm14 #70.5
subps %xmm7, %xmm12 #70.5
movaps %xmm9, %xmm4 #70.5
movaps %xmm14, %xmm13 #70.5
shufps $238, %xmm10, %xmm4 #70.5
xorps %xmm0, %xmm10 #70.5
shufps $177, %xmm10, %xmm10 #70.5
movaps %xmm12, %xmm11 #70.5
movaps %xmm14, %xmm5 #70.5
addps %xmm9, %xmm13 #70.5
subps %xmm10, %xmm11 #70.5
subps %xmm9, %xmm14 #70.5
shufps $238, %xmm12, %xmm5 #70.5
addps %xmm10, %xmm12 #70.5
movslq 8(%r8, %rax, 4), %r12 #83.59
movlhps %xmm11, %xmm13 #70.5
movaps %xmm13, (%rdx,%r11,4) #70.5
movaps 0x30(%r9), %xmm13 #70.5
movlhps %xmm12, %xmm14 #70.5
movaps 0x40(%r9), %xmm12 #70.5
mulps %xmm5, %xmm13 #70.5
shufps $177, %xmm5, %xmm5 #70.5
mulps %xmm12, %xmm5 #70.5
movaps %xmm14, 16(%rdx,%r11,4) #70.5
subps %xmm5, %xmm13 #70.5
movaps 0x30(%r9), %xmm5 #70.5
mulps %xmm4, %xmm5 #70.5
shufps $177, %xmm4, %xmm4 #70.5
mulps %xmm12, %xmm4 #70.5
LEAF_OE_const_4:
movaps 0xFECA(%rsi,%rax,4), %xmm9 #70.5
addps %xmm4, %xmm5 #70.5
LEAF_OE_const_6:
movaps 0xFECA(%rsi,%rax,4), %xmm7 #70.5
movaps %xmm9, %xmm3 #70.5
LEAF_OE_const_7:
movaps 0xFECA(%rsi,%rax,4), %xmm2 #70.5
movaps %xmm7, %xmm6 #70.5
LEAF_OE_const_5:
movaps 0xFECA(%rsi,%rax,4), %xmm15 #70.5
movaps %xmm13, %xmm4 #70.5
subps %xmm2, %xmm7 #70.5
addps %xmm15, %xmm3 #70.5
subps %xmm15, %xmm9 #70.5
addps %xmm2, %xmm6 #70.5
subps %xmm5, %xmm13 #70.5
addps %xmm5, %xmm4 #70.5
xorps %xmm0, %xmm7 #70.5
addq $4, %rax #72.5
movaps %xmm3, %xmm2 #70.5
shufps $177, %xmm7, %xmm7 #70.5
movaps %xmm9, %xmm8 #70.5
xorps %xmm0, %xmm13 #70.5
addps %xmm6, %xmm2 #70.5
subps %xmm7, %xmm8 #70.5
subps %xmm6, %xmm3 #70.5
addps %xmm7, %xmm9 #70.5
movaps %xmm2, %xmm10 #70.5
movaps %xmm3, %xmm11 #70.5
shufps $238, %xmm8, %xmm2 #70.5
shufps $238, %xmm9, %xmm3 #70.5
movaps %xmm2, %xmm14 #70.5
shufps $177, %xmm13, %xmm13 #70.5
subps %xmm4, %xmm14 #70.5
addps %xmm4, %xmm2 #70.5
movaps %xmm3, %xmm4 #70.5
subps %xmm13, %xmm3 #70.5
addps %xmm13, %xmm4 #70.5
movlhps %xmm8, %xmm10 #70.5
movlhps %xmm9, %xmm11 #70.5
movaps %xmm10, 32(%rdx,%r11,4) #70.5
movaps %xmm11, 48(%rdx,%r11,4) #70.5
movaps %xmm2, (%rdx,%r12,4) #70.5
movaps %xmm3, 16(%rdx,%r12,4) #70.5
movaps %xmm14, 32(%rdx,%r12,4) #70.5
movaps %xmm4, 48(%rdx,%r12,4) #70.5
#ifdef __APPLE__
.globl _leaf_end
_leaf_end:
#else
.globl leaf_end
leaf_end:
#endif
#ifdef __APPLE__
.globl _x_init
_x_init:
#else
.globl x_init
x_init:
#endif
#movaps L_sse_constants(%rip), %xmm3 #34.3
movaps (%r9), %xmm3 #34.3
movq 0x20(%rdi),%r8
#ifdef __APPLE__
.globl _x4
_x4:
#else
.globl x4
x4:
#endif
movaps 64(%rdx), %xmm0 #34.3
movaps 96(%rdx), %xmm1 #34.3
movaps (%rdx), %xmm7 #34.3
movaps (%r8), %xmm4 #const
movaps %xmm7, %xmm9 #34.3
movaps %xmm4, %xmm6 #34.3
movaps 16(%r8), %xmm2 #const
mulps %xmm0, %xmm6 #34.3
mulps %xmm1, %xmm4 #34.3
shufps $177, %xmm0, %xmm0 #34.3
shufps $177, %xmm1, %xmm1 #34.3
mulps %xmm2, %xmm0 #34.3
mulps %xmm1, %xmm2 #34.3
subps %xmm0, %xmm6 #34.3
addps %xmm2, %xmm4 #34.3
movaps %xmm6, %xmm5 #34.3
subps %xmm4, %xmm6 #34.3
addps %xmm4, %xmm5 #34.3
movaps 32(%rdx), %xmm8 #34.3
xorps %xmm3, %xmm6 #34.3
shufps $177, %xmm6, %xmm6 #34.3
movaps %xmm8, %xmm10 #34.3
movaps 112(%rdx), %xmm12 #34.3
subps %xmm5, %xmm9 #34.3
addps %xmm5, %xmm7 #34.3
addps %xmm6, %xmm10 #34.3
subps %xmm6, %xmm8 #34.3
movaps %xmm7, (%rdx) #34.3
movaps %xmm8, 32(%rdx) #34.3
movaps %xmm9, 64(%rdx) #34.3
movaps %xmm10, 96(%rdx) #34.3
movaps 32(%r8), %xmm14 #const #34.3
movaps 80(%rdx), %xmm11 #34.3
movaps %xmm14, %xmm0 #34.3
movaps 48(%r8), %xmm13 #const #34.3
mulps %xmm11, %xmm0 #34.3
mulps %xmm12, %xmm14 #34.3
shufps $177, %xmm11, %xmm11 #34.3
shufps $177, %xmm12, %xmm12 #34.3
mulps %xmm13, %xmm11 #34.3
mulps %xmm12, %xmm13 #34.3
subps %xmm11, %xmm0 #34.3
addps %xmm13, %xmm14 #34.3
movaps %xmm0, %xmm15 #34.3
subps %xmm14, %xmm0 #34.3
addps %xmm14, %xmm15 #34.3
xorps %xmm3, %xmm0 #34.3
movaps 16(%rdx), %xmm1 #34.3
movaps 48(%rdx), %xmm2 #34.3
movaps %xmm1, %xmm4 #34.3
shufps $177, %xmm0, %xmm0 #34.3
movaps %xmm2, %xmm5 #34.3
addps %xmm15, %xmm1 #34.3
subps %xmm0, %xmm2 #34.3
subps %xmm15, %xmm4 #34.3
addps %xmm0, %xmm5 #34.3
movaps %xmm1, 16(%rdx) #34.3
movaps %xmm2, 48(%rdx) #34.3
movaps %xmm4, 80(%rdx) #34.3
movaps %xmm5, 112(%rdx) #34.3
ret
# _x8_soft + 5 needs to be 16 byte aligned
#ifdef __APPLE__
.globl _x8_soft
_x8_soft:
#else
.globl x8_soft
x8_soft:
#endif
xorl %eax, %eax
movq %rdx, %rbx
movq %r8, %rsi
leaq (%rdx,%rcx,4), %r9
leaq (%r9,%rcx,4), %r10
leaq (%r10,%rcx,4), %r11
leaq (%r11,%rcx,4), %r12
leaq (%r12,%rcx,4), %r13
leaq (%r13,%rcx,4), %r14
leaq (%r14,%rcx,4), %r15
X8_soft_loop:
movaps (%rsi), %xmm9
movaps (%r10,%rax,4), %xmm6
movaps %xmm9, %xmm11
movaps (%r11,%rax,4), %xmm7
movaps 16(%rsi), %xmm8
mulps %xmm6, %xmm11
mulps %xmm7, %xmm9
shufps $177, %xmm6, %xmm6
mulps %xmm8, %xmm6
shufps $177, %xmm7, %xmm7
subps %xmm6, %xmm11
mulps %xmm7, %xmm8
movaps %xmm11, %xmm10
addps %xmm8, %xmm9
movaps 32(%rsi), %xmm15
addps %xmm9, %xmm10
subps %xmm9, %xmm11
movaps (%rbx,%rax,4), %xmm5
movaps %xmm15, %xmm6
movaps (%r12,%rax,4), %xmm12
movaps %xmm5, %xmm2
movaps (%r14,%rax,4), %xmm13
xorps %xmm3, %xmm11 #const
movaps 48(%rsi), %xmm14
subps %xmm10, %xmm2
mulps %xmm12, %xmm6
addps %xmm10, %xmm5
mulps %xmm13, %xmm15
movaps 64(%rsi), %xmm10
movaps %xmm5, %xmm0
shufps $177, %xmm12, %xmm12
shufps $177, %xmm13, %xmm13
mulps %xmm14, %xmm12
mulps %xmm13, %xmm14
subps %xmm12, %xmm6
addps %xmm14, %xmm15
movaps (%r13,%rax,4), %xmm7
movaps %xmm10, %xmm13
movaps (%r15,%rax,4), %xmm8
movaps %xmm6, %xmm12
movaps 80(%rsi), %xmm9
addq $96, %rsi
mulps %xmm7, %xmm13
subps %xmm15, %xmm6
addps %xmm15, %xmm12
mulps %xmm8, %xmm10
subps %xmm12, %xmm0
addps %xmm12, %xmm5
shufps $177, %xmm7, %xmm7
xorps %xmm3, %xmm6 #const
shufps $177, %xmm8, %xmm8
movaps %xmm2, %xmm12
mulps %xmm9, %xmm7
mulps %xmm8, %xmm9
subps %xmm7, %xmm13
addps %xmm9, %xmm10
movaps (%r9,%rax,4), %xmm4
shufps $177, %xmm11, %xmm11
movaps %xmm4, %xmm1
shufps $177, %xmm6, %xmm6
addps %xmm11, %xmm1
subps %xmm11, %xmm4
addps %xmm6, %xmm12
subps %xmm6, %xmm2
movaps %xmm13, %xmm11
movaps %xmm4, %xmm14
movaps %xmm1, %xmm6
subps %xmm10, %xmm13
addps %xmm10, %xmm11
xorps %xmm3, %xmm13 #const
addps %xmm11, %xmm4
subps %xmm11, %xmm14
shufps $177, %xmm13, %xmm13
movaps %xmm5, (%rbx,%rax,4)
movaps %xmm4, (%r9,%rax,4)
movaps %xmm2, (%r10,%rax,4)
subps %xmm13, %xmm1
addps %xmm13, %xmm6
movaps %xmm1, (%r11,%rax,4)
movaps %xmm0, (%r12,%rax,4)
movaps %xmm14, (%r13,%rax,4)
movaps %xmm12, (%r14,%rax,4)
movaps %xmm6, (%r15,%rax,4)
addq $4, %rax
cmpq %rcx, %rax
jne X8_soft_loop
ret
#ifdef __APPLE__
.globl _x8_hard
_x8_hard:
#else
.globl x8_hard
x8_hard:
#endif
movaps (%r9), %xmm5
X8_loop:
movaps (%r8), %xmm9
X8_const_2:
movaps 0xFECA(%rdx,%rax,4), %xmm6
movaps %xmm9, %xmm11
X8_const_3:
movaps 0xFECA(%rdx,%rax,4), %xmm7
movaps 16(%r8), %xmm8
mulps %xmm6, %xmm11
mulps %xmm7, %xmm9
shufps $177, %xmm6, %xmm6
mulps %xmm8, %xmm6
shufps $177, %xmm7, %xmm7
subps %xmm6, %xmm11
mulps %xmm7, %xmm8
movaps %xmm11, %xmm10
addps %xmm8, %xmm9
movaps 32(%r8), %xmm15
addps %xmm9, %xmm10
subps %xmm9, %xmm11
X8_const_0:
movaps 0xFECA(%rdx,%rax,4), %xmm3
movaps %xmm15, %xmm6
X8_const_4:
movaps 0xFECA(%rdx,%rax,4), %xmm12
movaps %xmm3, %xmm2
X8_const_6:
movaps 0xFECA(%rdx,%rax,4), %xmm13
xorps %xmm5, %xmm11
movaps 48(%r8), %xmm14
subps %xmm10, %xmm2
mulps %xmm12, %xmm6
addps %xmm10, %xmm3
mulps %xmm13, %xmm15
movaps 64(%r8), %xmm10
movaps %xmm3, %xmm0
shufps $177, %xmm12, %xmm12
shufps $177, %xmm13, %xmm13
mulps %xmm14, %xmm12
mulps %xmm13, %xmm14
subps %xmm12, %xmm6
addps %xmm14, %xmm15
X8_const_5:
movaps 0xFECA(%rdx,%rax,4), %xmm7
movaps %xmm10, %xmm13
X8_const_7:
movaps 0xFECA(%rdx,%rax,4), %xmm8
movaps %xmm6, %xmm12
movaps 80(%r8), %xmm9
addq $96, %r8
mulps %xmm7, %xmm13
subps %xmm15, %xmm6
addps %xmm15, %xmm12
mulps %xmm8, %xmm10
subps %xmm12, %xmm0
addps %xmm12, %xmm3
shufps $177, %xmm7, %xmm7
xorps %xmm5, %xmm6
shufps $177, %xmm8, %xmm8
movaps %xmm2, %xmm12
mulps %xmm9, %xmm7
mulps %xmm8, %xmm9
subps %xmm7, %xmm13
addps %xmm9, %xmm10
X8_const_1:
movaps 0xFECA(%rdx,%rax,4), %xmm4
shufps $177, %xmm11, %xmm11
movaps %xmm4, %xmm1
shufps $177, %xmm6, %xmm6
addps %xmm11, %xmm1
subps %xmm11, %xmm4
addps %xmm6, %xmm12
subps %xmm6, %xmm2
movaps %xmm13, %xmm11
movaps %xmm4, %xmm14
movaps %xmm1, %xmm6
subps %xmm10, %xmm13
addps %xmm10, %xmm11
xorps %xmm5, %xmm13
addps %xmm11, %xmm4
subps %xmm11, %xmm14
shufps $177, %xmm13, %xmm13
X8_const1_0:
movaps %xmm3, 0xFECA(%rdx,%rax,4)
X8_const1_1:
movaps %xmm4, 0xFECA(%rdx,%rax,4)
X8_const1_2:
movaps %xmm2, 0xFECA(%rdx,%rax,4)
subps %xmm13, %xmm1
addps %xmm13, %xmm6
X8_const1_3:
movaps %xmm1, 0xFECA(%rdx,%rax,4)
X8_const1_4:
movaps %xmm0, 0xFECA(%rdx,%rax,4)
X8_const1_5:
movaps %xmm14, 0xFECA(%rdx,%rax,4)
X8_const1_6:
movaps %xmm12, 0xFECA(%rdx,%rax,4)
X8_const1_7:
movaps %xmm6, 0xFECA(%rdx,%rax,4)
addq $4, %rax
cmpq %rcx, %rax
jne X8_loop
#ifdef __APPLE__
.globl _sse_leaf_ee_offsets
.globl _sse_leaf_oo_offsets
.globl _sse_leaf_eo_offsets
.globl _sse_leaf_oe_offsets
.align 4
_sse_leaf_ee_offsets:
.long LEAF_EE_const_0-_leaf_ee+0x4
.long LEAF_EE_const_1-_leaf_ee+0x5
.long LEAF_EE_const_2-_leaf_ee+0x5
.long LEAF_EE_const_3-_leaf_ee+0x5
.long LEAF_EE_const_4-_leaf_ee+0x5
.long LEAF_EE_const_5-_leaf_ee+0x5
.long LEAF_EE_const_6-_leaf_ee+0x4
.long LEAF_EE_const_7-_leaf_ee+0x5
_sse_leaf_oo_offsets:
.long LEAF_OO_const_0-_leaf_oo+0x4
.long LEAF_OO_const_1-_leaf_oo+0x4
.long LEAF_OO_const_2-_leaf_oo+0x5
.long LEAF_OO_const_3-_leaf_oo+0x5
.long LEAF_OO_const_4-_leaf_oo+0x4
.long LEAF_OO_const_5-_leaf_oo+0x5
.long LEAF_OO_const_6-_leaf_oo+0x5
.long LEAF_OO_const_7-_leaf_oo+0x5
_sse_leaf_eo_offsets:
.long LEAF_EO_const_0-_leaf_eo+0x5
.long LEAF_EO_const_1-_leaf_eo+0x4
.long LEAF_EO_const_2-_leaf_eo+0x4
.long LEAF_EO_const_3-_leaf_eo+0x4
.long LEAF_EO_const_4-_leaf_eo+0x5
.long LEAF_EO_const_5-_leaf_eo+0x5
.long LEAF_EO_const_6-_leaf_eo+0x4
.long LEAF_EO_const_7-_leaf_eo+0x5
_sse_leaf_oe_offsets:
.long LEAF_OE_const_0-_leaf_oe+0x5
.long LEAF_OE_const_1-_leaf_oe+0x4
.long LEAF_OE_const_2-_leaf_oe+0x4
.long LEAF_OE_const_3-_leaf_oe+0x5
.long LEAF_OE_const_4-_leaf_oe+0x5
.long LEAF_OE_const_5-_leaf_oe+0x5
.long LEAF_OE_const_6-_leaf_oe+0x4
.long LEAF_OE_const_7-_leaf_oe+0x4
#else
.globl sse_leaf_ee_offsets
.globl sse_leaf_oo_offsets
.globl sse_leaf_eo_offsets
.globl sse_leaf_oe_offsets
.align 4
sse_leaf_ee_offsets:
.long LEAF_EE_const_0-leaf_ee+0x4
.long LEAF_EE_const_1-leaf_ee+0x5
.long LEAF_EE_const_2-leaf_ee+0x5
.long LEAF_EE_const_3-leaf_ee+0x5
.long LEAF_EE_const_4-leaf_ee+0x5
.long LEAF_EE_const_5-leaf_ee+0x5
.long LEAF_EE_const_6-leaf_ee+0x4
.long LEAF_EE_const_7-leaf_ee+0x5
sse_leaf_oo_offsets:
.long LEAF_OO_const_0-leaf_oo+0x4
.long LEAF_OO_const_1-leaf_oo+0x4
.long LEAF_OO_const_2-leaf_oo+0x5
.long LEAF_OO_const_3-leaf_oo+0x5
.long LEAF_OO_const_4-leaf_oo+0x4
.long LEAF_OO_const_5-leaf_oo+0x5
.long LEAF_OO_const_6-leaf_oo+0x5
.long LEAF_OO_const_7-leaf_oo+0x5
sse_leaf_eo_offsets:
.long LEAF_EO_const_0-leaf_eo+0x5
.long LEAF_EO_const_1-leaf_eo+0x4
.long LEAF_EO_const_2-leaf_eo+0x4
.long LEAF_EO_const_3-leaf_eo+0x4
.long LEAF_EO_const_4-leaf_eo+0x5
.long LEAF_EO_const_5-leaf_eo+0x5
.long LEAF_EO_const_6-leaf_eo+0x4
.long LEAF_EO_const_7-leaf_eo+0x5
sse_leaf_oe_offsets:
.long LEAF_OE_const_0-leaf_oe+0x5
.long LEAF_OE_const_1-leaf_oe+0x4
.long LEAF_OE_const_2-leaf_oe+0x4
.long LEAF_OE_const_3-leaf_oe+0x5
.long LEAF_OE_const_4-leaf_oe+0x5
.long LEAF_OE_const_5-leaf_oe+0x5
.long LEAF_OE_const_6-leaf_oe+0x4
.long LEAF_OE_const_7-leaf_oe+0x4
#endif
#ifdef __APPLE__
.data
#else
.section .data
#endif
.p2align 4
#ifdef __APPLE__
.globl _sse_constants
_sse_constants:
#else
.globl sse_constants
sse_constants:
#endif
.long 0x00000000,0x80000000,0x00000000,0x80000000
.long 0x3f3504f3,0x3f3504f3,0x3f3504f3,0x3f3504f3
.long 0xbf3504f3,0x3f3504f3,0xbf3504f3,0x3f3504f3
.long 0x3f800000,0x3f800000,0x3f3504f3,0x3f3504f3
.long 0x00000000,0x00000000,0xbf3504f3,0x3f3504f3
#ifdef __APPLE__
.globl _sse_constants_inv
_sse_constants_inv:
#else
.globl sse_constants_inv
sse_constants_inv:
#endif
.long 0x80000000,0x00000000,0x80000000,0x00000000
.long 0x3f3504f3,0x3f3504f3,0x3f3504f3,0x3f3504f3
.long 0x3f3504f3,0xbf3504f3,0x3f3504f3,0xbf3504f3
.long 0x3f800000,0x3f800000,0x3f3504f3,0x3f3504f3
.long 0x00000000,0x00000000,0x3f3504f3,0xbf3504f3

@ -0,0 +1,49 @@
/*
This file is part of FFTS -- The Fastest Fourier Transform in the South
Copyright (c) 2012, Anthony M. Blake <amb@anthonix.com>
Copyright (c) 2012, The University of Waikato
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of the organization nor the
names of its contributors may be used to endorse or promote products
derived from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
DISCLAIMED. IN NO EVENT SHALL ANTHONY M. BLAKE BE LIABLE FOR ANY
DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef __TYPES_H__
#define __TYPES_H__
#define __INLINE static inline __attribute__((always_inline))
#if defined(complex)
typedef complex float cdata_t;
#else
typedef float cdata_t[2];
#endif
typedef float data_t;
#endif

@ -0,0 +1,45 @@
/*
This file is part of FFTS -- The Fastest Fourier Transform in the South
Copyright (c) 2012, 2013 Anthony M. Blake <amb@anthonix.com>
Copyright (c) 2012, 2013 The University of Waikato
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of the organization nor the
names of its contributors may be used to endorse or promote products
derived from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
DISCLAIMED. IN NO EVENT SHALL ANTHONY M. BLAKE BE LIABLE FOR ANY
DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef __VFP_H__
#define __VFP_H__
#include "ffts.h"
void vfp_e();
void vfp_o();
void vfp_x4();
void vfp_x8();
void vfp_end();
#endif

@ -0,0 +1,473 @@
/*
This file is part of FFTS -- The Fastest Fourier Transform in the South
Copyright (c) 2012, 2013 Anthony M. Blake <amb@anthonix.com>
Copyright (c) 2012, 2013 The University of Waikato
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of the organization nor the
names of its contributors may be used to endorse or promote products
derived from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
DISCLAIMED. IN NO EVENT SHALL ANTHONY M. BLAKE BE LIABLE FOR ANY
DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
@ assumes r0 = out
@ r1 = in ?
@
@ r12 = offsets
@ r3-r10 = data pointers
@ r11 = loop iterations
@ r2 = const pointer
@ & lr = temps
.align 4
#ifdef __APPLE__
.globl _vfp_e
_vfp_e:
#else
.globl vfp_e
vfp_e:
#endif
_vfp_e_loop:
vldr s15, [r2, #8]
vldr s2, [r3] @ x0
vldr s0, [r3, #4]
vldr s4, [r4] @ x1
vldr s11, [r2]
vldr s10, [r7] @ x4
vldr s3, [r7, #4]
vldr s8, [r8] @ x5
vldr s1, [r8, #4]
vldr s14, [r9] @ x6
vldr s9, [r9, #4]
vldr s6, [r10] @ x7
vldr s12, [r10, #4]
vsub.f32 s18, s3, s1
vsub.f32 s7, s10, s8
vsub.f32 s5, s14, s6
vadd.f32 s6, s14, s6
vldr s24, [r5, #4]
vsub.f32 s14, s9, s12
vldr s22, [r6, #4]
vadd.f32 s8, s10, s8
vldr s28, [r6] @ x3
vldr s17, [r5] @ x2
vadd.f32 s10, s9, s12
vmul.f32 s13, s18, s15
vmul.f32 s9, s7, s11
vmul.f32 s16, s5, s11
vmul.f32 s18, s18, s11
vmul.f32 s30, s14, s11
vldr s11, [r4, #4]
add r3, r3, #8
add r4, r4, #8
add r5, r5, #8
add r6, r6, #8
add r7, r7, #8
add r8, r8, #8
add r9, r9, #8
add r10, r10, #8
vmul.f32 s12, s5, s15
vmul.f32 s20, s14, s15
vadd.f32 s5, s2, s4
vadd.f32 s3, s3, s1
vmul.f32 s15, s7, s15
vadd.f32 s1, s24, s22
vsub.f32 s7, s24, s22
vadd.f32 s24, s17, s28
vadd.f32 s26, s0, s11
vsub.f32 s14, s9, s13
vsub.f32 s2, s2, s4
vadd.f32 s4, s16, s20
vsub.f32 s22, s0, s11
vsub.f32 s16, s17, s28
vadd.f32 s9, s5, s24
vadd.f32 s28, s18, s15
vadd.f32 s13, s8, s6
vsub.f32 s5, s5, s24
vsub.f32 s24, s8, s6
vadd.f32 s11, s26, s1
vsub.f32 s12, s30, s12
vadd.f32 s20, s3, s10
vsub.f32 s15, s3, s10
vsub.f32 s3, s26, s1
vadd.f32 s18, s9, s13
vadd.f32 s10, s14, s4
vadd.f32 s6, s2, s7 @
vsub.f32 s0, s2, s7 @
vadd.f32 s26, s11, s20
vsub.f32 s4, s14, s4
vsub.f32 s8, s22, s16 @
vadd.f32 s1, s28, s12
ldr lr, [r12], #4
add lr, r0, lr, lsl #2
subs r11, r11, #1
vstr s18, [lr]
vsub.f32 s2, s28, s12
vadd.f32 s12, s22, s16 @
vsub.f32 s16, s3, s24 @
vsub.f32 s13, s9, s13
vstr s26, [lr, #4]
vadd.f32 s28, s5, s15 @
vsub.f32 s7, s5, s15 @
vadd.f32 s14, s6, s10
vadd.f32 s5, s8, s1
vadd.f32 s9, s0, s2 @
vsub.f32 s2, s0, s2 @
vsub.f32 s11, s11, s20
vstr s28, [lr, #16]
vadd.f32 s3, s3, s24 @
vstr s16, [lr, #20]
vsub.f32 s6, s6, s10
vstr s13, [lr, #32]
vsub.f32 s13, s12, s4 @
vsub.f32 s8, s8, s1
vadd.f32 s0, s12, s4 @
vstr s11, [lr, #36]
vstr s7, [lr, #48]
vstr s3, [lr, #52]
vstr s14, [lr, #8]
vstr s5, [lr, #12]
vstr s9, [lr, #24]
vstr s13, [lr, #28]
vstr s6, [lr, #40]
vstr s8, [lr, #44]
vstr s2, [lr, #56]
vstr s0, [lr, #60]
bne _vfp_e_loop
@ assumes r0 = out
@ r1 = in ?
@
@ r12 = offsets
@ r3-r10 = data pointers
@ r11 = loop iterations
@ r2 & lr = temps
.align 4
#ifdef __APPLE__
.globl _vfp_o
_vfp_o:
#else
.globl vfp_o
vfp_o:
#endif
_vfp_o_loop:
vldr s4, [r3] @ x0
vldr s0, [r3, #4]
vldr s6, [r4] @ x1
vldr s5, [r4, #4]
vldr s7, [r5] @ x2
vldr s1, [r5, #4]
vldr s3, [r6] @ x3
vldr s8, [r6, #4]
subs r11, r11, #1
ldr r2, [r12], #4
add r2, r0, r2, lsl #2
vadd.f32 s2, s4, s6
vadd.f32 s14, s0, s5
vadd.f32 s10, s1, s8
vsub.f32 s4, s4, s6
vsub.f32 s0, s0, s5
vadd.f32 s12, s7, s3
vsub.f32 s6, s7, s3
vsub.f32 s8, s1, s8
vadd.f32 s5, s14, s10
vsub.f32 s10, s14, s10
vadd.f32 s7, s2, s12
vsub.f32 s1, s0, s6 @
vsub.f32 s12, s2, s12
vadd.f32 s3, s4, s8 @
vsub.f32 s2, s4, s8 @
vadd.f32 s0, s0, s6 @
vstr s7, [r2]
vldr s7, [r9] @ x2
vstr s5, [r2, #4]
vstr s3, [r2, #8]
vstr s1, [r2, #12]
vstr s12, [r2, #16]
vstr s10, [r2, #20]
vstr s2, [r2, #24]
vstr s0, [r2, #28]
vldr s4, [r7] @ x0
vldr s0, [r7, #4]
vldr s6, [r8] @ x1
vldr s5, [r8, #4]
vldr s3, [r10] @ x3
vldr s8, [r10, #4]
vldr s1, [r9, #4]
add r3, r3, #8
add r4, r4, #8
add r5, r5, #8
add r6, r6, #8
add r7, r7, #8
add r8, r8, #8
add r9, r9, #8
add r10, r10, #8
vadd.f32 s2, s4, s6
vadd.f32 s14, s0, s5
vadd.f32 s10, s1, s8
vsub.f32 s4, s4, s6
vsub.f32 s0, s0, s5
vadd.f32 s12, s7, s3
vsub.f32 s6, s7, s3
vsub.f32 s8, s1, s8
vadd.f32 s5, s14, s10
vsub.f32 s10, s14, s10
vadd.f32 s7, s2, s12
vsub.f32 s1, s0, s6 @
vsub.f32 s12, s2, s12
vadd.f32 s3, s4, s8 @
vsub.f32 s2, s4, s8 @
vadd.f32 s0, s0, s6 @
vstr s7, [r2, #32]
vstr s5, [r2, #36]
vstr s3, [r2, #40]
vstr s1, [r2, #44]
vstr s12, [r2, #48]
vstr s10, [r2, #52]
vstr s2, [r2, #56]
vstr s0, [r2, #60]
bne _vfp_o_loop
.align 4
#ifdef __APPLE__
.globl _vfp_x4
_vfp_x4:
#else
.globl vfp_x4
vfp_x4:
#endif
add r3, r0, #0
add r7, r2, #0
add r4, r0, r1, lsl #1
add r5, r0, r1, lsl #2
add r6, r4, r1, lsl #2
mov r11, #4
_vfp_x4_loop:
vldr s8, [r3, #0]
vldr s9, [r3, #4]
vldr s10, [r4, #0]
vldr s11, [r4, #4]
vldr s12, [r5, #0]
vldr s13, [r5, #4]
vldr s14, [r6, #0]
vldr s15, [r6, #4]
vldr s2, [r7, #0]
vldr s3, [r7, #4]
add r7, r7, #8
subs r11, r11, #1
vmul.f32 s0, s13, s3
vmul.f32 s5, s12, s2
vmul.f32 s1, s14, s2
vmul.f32 s4, s14, s3
vmul.f32 s14, s12, s3
vmul.f32 s13, s13, s2
vmul.f32 s12, s15, s3
vmul.f32 s2, s15, s2
vsub.f32 s0, s5, s0
vadd.f32 s13, s13, s14
vadd.f32 s12, s12, s1
vsub.f32 s1, s2, s4
vadd.f32 s15, s0, s12
vsub.f32 s12, s0, s12
vadd.f32 s14, s13, s1
vsub.f32 s13, s13, s1
vadd.f32 s0, s8, s15
vadd.f32 s1, s9, s14
vadd.f32 s2, s10, s13 @
vsub.f32 s4, s8, s15
vsub.f32 s3, s11, s12 @
vstr s0, [r3, #0]
vstr s1, [r3, #4]
add r3, r3, #8
vsub.f32 s5, s9, s14
vsub.f32 s6, s10, s13 @
vadd.f32 s7, s11, s12 @
vstr s2, [r4, #0]
vstr s3, [r4, #4]
add r4, r4, #8
vstr s4, [r5, #0]
vstr s5, [r5, #4]
add r5, r5, #8
vstr s6, [r6, #0]
vstr s7, [r6, #4]
add r6, r6, #8
bne _vfp_x4_loop
bx lr
.align 4
#ifdef __APPLE__
.globl _vfp_x8
_vfp_x8:
#else
.globl vfp_x8
vfp_x8:
#endif
mov r11, #0
add r3, r0, #0 @ data0
add r5, r0, r1, lsl #1 @ data2
add r4, r0, r1 @ data1
add r7, r5, r1, lsl #1 @ data4
add r6, r5, r1 @ data3
add r9, r7, r1, lsl #1 @ data6
add r8, r7, r1 @ data5
add r10, r9, r1 @ data7
add r12, r2, #0 @ LUT
sub r11, r11, r1, lsr #3
_vfp_x8_loop:
vldr s10, [r3, #0] @ x0-re
vldr s8, [r3, #4] @ x0-im
vldr s2, [r4, #0] @ x1-re
vldr s0, [r4, #4] @ x1-im
vldr s6, [r5, #0] @ x2-re
vldr s4, [r5, #4] @ x2-im
vldr s13, [r6, #0] @ x3-re
vldr s15, [r6, #4] @ x3-im
vldr s7, [r12]
vldr s11, [r12, #4]
vldr s5, [r7, #0] @ x4-re
vldr s1, [r7, #4] @ x4-im
vldr s28, [r9, #0] @ x6-re
vldr s18, [r9, #4] @ x6-im
adds r11, r11, #1
vmul.f32 s14, s15, s7
vldr s24, [r12, #12]
vmul.f32 s12, s13, s11
vmul.f32 s26, s13, s7
vldr s13, [r12, #8]
vmul.f32 s3, s4, s11
vmul.f32 s15, s15, s11
vmul.f32 s16, s4, s7
vmul.f32 s9, s6, s7
vmul.f32 s11, s6, s11
vmul.f32 s7, s18, s24
vmul.f32 s20, s1, s24
vmul.f32 s30, s5, s13
vadd.f32 s4, s26, s15
vsub.f32 s12, s14, s12
vsub.f32 s6, s9, s3
vadd.f32 s14, s16, s11
vmul.f32 s22, s28, s13
vmul.f32 s26, s28, s24
vmul.f32 s18, s18, s13
vmul.f32 s5, s5, s24
vmul.f32 s1, s1, s13
vsub.f32 s9, s30, s20
vadd.f32 s16, s14, s12
vadd.f32 s3, s22, s7
vadd.f32 s15, s6, s4
vsub.f32 s11, s18, s26
vadd.f32 s18, s1, s5
vadd.f32 s13, s8, s16
vadd.f32 s1, s9, s3
vadd.f32 s7, s10, s15
vsub.f32 s15, s10, s15
vsub.f32 s10, s9, s3
vadd.f32 s5, s18, s11
vsub.f32 s11, s18, s11
vsub.f32 s8, s8, s16
vadd.f32 s20, s7, s1
vsub.f32 s7, s7, s1
vadd.f32 s18, s13, s5
vadd.f32 s16, s15, s11 @
vsub.f32 s9, s8, s10 @
vsub.f32 s3, s13, s5
vsub.f32 s1, s15, s11 @
vstr s20, [r3]
vadd.f32 s8, s8, s10 @
vstr s18, [r3, #4]
add r3, r3, #8
vstr s16, [r5]
vstr s9, [r5, #4]
add r5, r5, #8
vstr s7, [r7]
vstr s3, [r7, #4]
add r7, r7, #8
vstr s1, [r9]
vstr s8, [r9, #4]
add r9, r9, #8
vldr s10, [r8, #0] @ x5-re
vldr s8, [r8, #4] @ x5-im
vldr s5, [r10, #0] @ x7-re
vldr s11, [r10, #4] @ x7-im
vldr s1, [r12, #16]
vldr s15, [r12, #20]
add r12, r12, #24
vmul.f32 s9, s5, s1
vmul.f32 s3, s11, s15
vmul.f32 s13, s10, s1
vmul.f32 s7, s8, s15
vmul.f32 s5, s5, s15
vmul.f32 s11, s11, s1
vmul.f32 s10, s10, s15
vmul.f32 s15, s8, s1
vsub.f32 s1, s14, s12
vadd.f32 s8, s9, s3
vsub.f32 s3, s6, s4
vsub.f32 s12, s13, s7
vsub.f32 s5, s11, s5
vadd.f32 s7, s15, s10
vadd.f32 s4, s2, s1 @
vsub.f32 s2, s2, s1 @
vsub.f32 s6, s0, s3 @
vadd.f32 s10, s12, s8
vsub.f32 s9, s12, s8
vadd.f32 s0, s0, s3 @
vsub.f32 s1, s7, s5
vadd.f32 s14, s7, s5
vadd.f32 s7, s4, s10
vsub.f32 s8, s4, s10
vsub.f32 s12, s0, s9 @
vadd.f32 s3, s2, s1 @
vadd.f32 s5, s6, s14
vsub.f32 s4, s6, s14
vsub.f32 s2, s2, s1 @
vadd.f32 s0, s0, s9 @
vstr s7, [r4]
vstr s5, [r4, #4]
add r4, r4, #8
vstr s3, [r6]
vstr s12, [r6, #4]
add r6, r6, #8
vstr s8, [r8]
vstr s4, [r8, #4]
add r8, r8, #8
vstr s2, [r10]
vstr s0, [r10, #4]
add r10, r10, #8
bne _vfp_x8_loop
bx lr
.align 4
#ifdef __APPLE__
.globl _vfp_end
_vfp_end:
#else
.globl vfp_end
vfp_end:
#endif
bx lr

@ -0,0 +1,4 @@
noinst_PROGRAMS = test
test_SOURCES = test.c
test_LDADD = $(top_builddir)/src/libffts.la

@ -0,0 +1,532 @@
# Makefile.in generated by automake 1.12.4 from Makefile.am.
# @configure_input@
# Copyright (C) 1994-2012 Free Software Foundation, Inc.
# This Makefile.in is free software; the Free Software Foundation
# gives unlimited permission to copy and/or distribute it,
# with or without modifications, as long as this notice is preserved.
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY, to the extent permitted by law; without
# even the implied warranty of MERCHANTABILITY or FITNESS FOR A
# PARTICULAR PURPOSE.
@SET_MAKE@
VPATH = @srcdir@
am__make_dryrun = \
{ \
am__dry=no; \
case $$MAKEFLAGS in \
*\\[\ \ ]*) \
echo 'am--echo: ; @echo "AM" OK' | $(MAKE) -f - 2>/dev/null \
| grep '^AM OK$$' >/dev/null || am__dry=yes;; \
*) \
for am__flg in $$MAKEFLAGS; do \
case $$am__flg in \
*=*|--*) ;; \
*n*) am__dry=yes; break;; \
esac; \
done;; \
esac; \
test $$am__dry = yes; \
}
pkgdatadir = $(datadir)/@PACKAGE@
pkgincludedir = $(includedir)/@PACKAGE@
pkglibdir = $(libdir)/@PACKAGE@
pkglibexecdir = $(libexecdir)/@PACKAGE@
am__cd = CDPATH="$${ZSH_VERSION+.}$(PATH_SEPARATOR)" && cd
install_sh_DATA = $(install_sh) -c -m 644
install_sh_PROGRAM = $(install_sh) -c
install_sh_SCRIPT = $(install_sh) -c
INSTALL_HEADER = $(INSTALL_DATA)
transform = $(program_transform_name)
NORMAL_INSTALL = :
PRE_INSTALL = :
POST_INSTALL = :
NORMAL_UNINSTALL = :
PRE_UNINSTALL = :
POST_UNINSTALL = :
build_triplet = @build@
host_triplet = @host@
noinst_PROGRAMS = test$(EXEEXT)
subdir = tests
DIST_COMMON = $(srcdir)/Makefile.am $(srcdir)/Makefile.in \
$(top_srcdir)/depcomp
ACLOCAL_M4 = $(top_srcdir)/aclocal.m4
am__aclocal_m4_deps = $(top_srcdir)/m4/ax_check_classpath.m4 \
$(top_srcdir)/m4/ax_check_java_home.m4 \
$(top_srcdir)/m4/ax_java_options.m4 \
$(top_srcdir)/m4/ax_jni_include_dir.m4 \
$(top_srcdir)/m4/ax_prog_jar.m4 \
$(top_srcdir)/m4/ax_prog_javac.m4 \
$(top_srcdir)/m4/ax_prog_javac_works.m4 \
$(top_srcdir)/configure.ac
am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \
$(ACLOCAL_M4)
mkinstalldirs = $(install_sh) -d
CONFIG_HEADER = $(top_builddir)/config.h
CONFIG_CLEAN_FILES =
CONFIG_CLEAN_VPATH_FILES =
PROGRAMS = $(noinst_PROGRAMS)
am_test_OBJECTS = test.$(OBJEXT)
test_OBJECTS = $(am_test_OBJECTS)
test_DEPENDENCIES = $(top_builddir)/src/libffts.la
DEFAULT_INCLUDES = -I.@am__isrc@ -I$(top_builddir)
depcomp = $(SHELL) $(top_srcdir)/depcomp
am__depfiles_maybe = depfiles
am__mv = mv -f
COMPILE = $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) \
$(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS)
LTCOMPILE = $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) \
--mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) \
$(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS)
CCLD = $(CC)
LINK = $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) \
--mode=link $(CCLD) $(AM_CFLAGS) $(CFLAGS) $(AM_LDFLAGS) \
$(LDFLAGS) -o $@
SOURCES = $(test_SOURCES)
DIST_SOURCES = $(test_SOURCES)
am__can_run_installinfo = \
case $$AM_UPDATE_INFO_DIR in \
n|no|NO) false;; \
*) (install-info --version) >/dev/null 2>&1;; \
esac
ETAGS = etags
CTAGS = ctags
DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST)
ACLOCAL = @ACLOCAL@
AMTAR = @AMTAR@
AR = @AR@
AUTOCONF = @AUTOCONF@
AUTOHEADER = @AUTOHEADER@
AUTOMAKE = @AUTOMAKE@
AWK = @AWK@
CC = @CC@
CCAS = @CCAS@
CCASDEPMODE = @CCASDEPMODE@
CCASFLAGS = @CCASFLAGS@
CCDEPMODE = @CCDEPMODE@
CFLAGS = @CFLAGS@
CPP = @CPP@
CPPFLAGS = @CPPFLAGS@
CXX = @CXX@
CXXCPP = @CXXCPP@
CXXDEPMODE = @CXXDEPMODE@
CXXFLAGS = @CXXFLAGS@
CYGPATH_W = @CYGPATH_W@
DEFS = @DEFS@
DEPDIR = @DEPDIR@
DLLTOOL = @DLLTOOL@
DSYMUTIL = @DSYMUTIL@
DUMPBIN = @DUMPBIN@
ECHO_C = @ECHO_C@
ECHO_N = @ECHO_N@
ECHO_T = @ECHO_T@
EGREP = @EGREP@
EXEEXT = @EXEEXT@
FGREP = @FGREP@
GREP = @GREP@
INSTALL = @INSTALL@
INSTALL_DATA = @INSTALL_DATA@
INSTALL_PROGRAM = @INSTALL_PROGRAM@
INSTALL_SCRIPT = @INSTALL_SCRIPT@
INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@
JAR = @JAR@
JAVA = @JAVA@
JAVAC = @JAVAC@
JAVACFLAGS = @JAVACFLAGS@
JAVAFLAGS = @JAVAFLAGS@
JAVAPREFIX = @JAVAPREFIX@
JAVA_PATH_NAME = @JAVA_PATH_NAME@
JNI_CPPFLAGS = @JNI_CPPFLAGS@
LD = @LD@
LDFLAGS = @LDFLAGS@
LIBOBJS = @LIBOBJS@
LIBS = @LIBS@
LIBTOOL = @LIBTOOL@
LIPO = @LIPO@
LN_S = @LN_S@
LTLIBOBJS = @LTLIBOBJS@
MAKEINFO = @MAKEINFO@
MANIFEST_TOOL = @MANIFEST_TOOL@
MKDIR_P = @MKDIR_P@
NM = @NM@
NMEDIT = @NMEDIT@
OBJDUMP = @OBJDUMP@
OBJEXT = @OBJEXT@
OTOOL = @OTOOL@
OTOOL64 = @OTOOL64@
PACKAGE = @PACKAGE@
PACKAGE_BUGREPORT = @PACKAGE_BUGREPORT@
PACKAGE_NAME = @PACKAGE_NAME@
PACKAGE_STRING = @PACKAGE_STRING@
PACKAGE_TARNAME = @PACKAGE_TARNAME@
PACKAGE_URL = @PACKAGE_URL@
PACKAGE_VERSION = @PACKAGE_VERSION@
PATH_SEPARATOR = @PATH_SEPARATOR@
RANLIB = @RANLIB@
SED = @SED@
SET_MAKE = @SET_MAKE@
SHELL = @SHELL@
STRIP = @STRIP@
VERSION = @VERSION@
_ACJNI_JAVAC = @_ACJNI_JAVAC@
abs_builddir = @abs_builddir@
abs_srcdir = @abs_srcdir@
abs_top_builddir = @abs_top_builddir@
abs_top_srcdir = @abs_top_srcdir@
ac_ct_AR = @ac_ct_AR@
ac_ct_CC = @ac_ct_CC@
ac_ct_CXX = @ac_ct_CXX@
ac_ct_DUMPBIN = @ac_ct_DUMPBIN@
am__include = @am__include@
am__leading_dot = @am__leading_dot@
am__quote = @am__quote@
am__tar = @am__tar@
am__untar = @am__untar@
bindir = @bindir@
build = @build@
build_alias = @build_alias@
build_cpu = @build_cpu@
build_os = @build_os@
build_vendor = @build_vendor@
builddir = @builddir@
datadir = @datadir@
datarootdir = @datarootdir@
docdir = @docdir@
dvidir = @dvidir@
exec_prefix = @exec_prefix@
host = @host@
host_alias = @host_alias@
host_cpu = @host_cpu@
host_os = @host_os@
host_vendor = @host_vendor@
htmldir = @htmldir@
includedir = @includedir@
infodir = @infodir@
install_sh = @install_sh@
libdir = @libdir@
libexecdir = @libexecdir@
localedir = @localedir@
localstatedir = @localstatedir@
mandir = @mandir@
mkdir_p = @mkdir_p@
oldincludedir = @oldincludedir@
pdfdir = @pdfdir@
prefix = @prefix@
program_transform_name = @program_transform_name@
psdir = @psdir@
sbindir = @sbindir@
sharedstatedir = @sharedstatedir@
srcdir = @srcdir@
sysconfdir = @sysconfdir@
target_alias = @target_alias@
top_build_prefix = @top_build_prefix@
top_builddir = @top_builddir@
top_srcdir = @top_srcdir@
test_SOURCES = test.c
test_LDADD = $(top_builddir)/src/libffts.la
all: all-am
.SUFFIXES:
.SUFFIXES: .c .lo .o .obj
$(srcdir)/Makefile.in: $(srcdir)/Makefile.am $(am__configure_deps)
@for dep in $?; do \
case '$(am__configure_deps)' in \
*$$dep*) \
( cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh ) \
&& { if test -f $@; then exit 0; else break; fi; }; \
exit 1;; \
esac; \
done; \
echo ' cd $(top_srcdir) && $(AUTOMAKE) --gnu tests/Makefile'; \
$(am__cd) $(top_srcdir) && \
$(AUTOMAKE) --gnu tests/Makefile
.PRECIOUS: Makefile
Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status
@case '$?' in \
*config.status*) \
cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh;; \
*) \
echo ' cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe)'; \
cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe);; \
esac;
$(top_builddir)/config.status: $(top_srcdir)/configure $(CONFIG_STATUS_DEPENDENCIES)
cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
$(top_srcdir)/configure: $(am__configure_deps)
cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
$(ACLOCAL_M4): $(am__aclocal_m4_deps)
cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
$(am__aclocal_m4_deps):
clean-noinstPROGRAMS:
@list='$(noinst_PROGRAMS)'; test -n "$$list" || exit 0; \
echo " rm -f" $$list; \
rm -f $$list || exit $$?; \
test -n "$(EXEEXT)" || exit 0; \
list=`for p in $$list; do echo "$$p"; done | sed 's/$(EXEEXT)$$//'`; \
echo " rm -f" $$list; \
rm -f $$list
test$(EXEEXT): $(test_OBJECTS) $(test_DEPENDENCIES) $(EXTRA_test_DEPENDENCIES)
@rm -f test$(EXEEXT)
$(LINK) $(test_OBJECTS) $(test_LDADD) $(LIBS)
mostlyclean-compile:
-rm -f *.$(OBJEXT)
distclean-compile:
-rm -f *.tab.c
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/test.Po@am__quote@
.c.o:
@am__fastdepCC_TRUE@ $(COMPILE) -MT $@ -MD -MP -MF $(DEPDIR)/$*.Tpo -c -o $@ $<
@am__fastdepCC_TRUE@ $(am__mv) $(DEPDIR)/$*.Tpo $(DEPDIR)/$*.Po
@AMDEP_TRUE@@am__fastdepCC_FALSE@ source='$<' object='$@' libtool=no @AMDEPBACKSLASH@
@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
@am__fastdepCC_FALSE@ $(COMPILE) -c $<
.c.obj:
@am__fastdepCC_TRUE@ $(COMPILE) -MT $@ -MD -MP -MF $(DEPDIR)/$*.Tpo -c -o $@ `$(CYGPATH_W) '$<'`
@am__fastdepCC_TRUE@ $(am__mv) $(DEPDIR)/$*.Tpo $(DEPDIR)/$*.Po
@AMDEP_TRUE@@am__fastdepCC_FALSE@ source='$<' object='$@' libtool=no @AMDEPBACKSLASH@
@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
@am__fastdepCC_FALSE@ $(COMPILE) -c `$(CYGPATH_W) '$<'`
.c.lo:
@am__fastdepCC_TRUE@ $(LTCOMPILE) -MT $@ -MD -MP -MF $(DEPDIR)/$*.Tpo -c -o $@ $<
@am__fastdepCC_TRUE@ $(am__mv) $(DEPDIR)/$*.Tpo $(DEPDIR)/$*.Plo
@AMDEP_TRUE@@am__fastdepCC_FALSE@ source='$<' object='$@' libtool=yes @AMDEPBACKSLASH@
@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
@am__fastdepCC_FALSE@ $(LTCOMPILE) -c -o $@ $<
mostlyclean-libtool:
-rm -f *.lo
clean-libtool:
-rm -rf .libs _libs
ID: $(HEADERS) $(SOURCES) $(LISP) $(TAGS_FILES)
list='$(SOURCES) $(HEADERS) $(LISP) $(TAGS_FILES)'; \
unique=`for i in $$list; do \
if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \
done | \
$(AWK) '{ files[$$0] = 1; nonempty = 1; } \
END { if (nonempty) { for (i in files) print i; }; }'`; \
mkid -fID $$unique
tags: TAGS
TAGS: $(HEADERS) $(SOURCES) $(TAGS_DEPENDENCIES) \
$(TAGS_FILES) $(LISP)
set x; \
here=`pwd`; \
list='$(SOURCES) $(HEADERS) $(LISP) $(TAGS_FILES)'; \
unique=`for i in $$list; do \
if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \
done | \
$(AWK) '{ files[$$0] = 1; nonempty = 1; } \
END { if (nonempty) { for (i in files) print i; }; }'`; \
shift; \
if test -z "$(ETAGS_ARGS)$$*$$unique"; then :; else \
test -n "$$unique" || unique=$$empty_fix; \
if test $$# -gt 0; then \
$(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \
"$$@" $$unique; \
else \
$(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \
$$unique; \
fi; \
fi
ctags: CTAGS
CTAGS: $(HEADERS) $(SOURCES) $(TAGS_DEPENDENCIES) \
$(TAGS_FILES) $(LISP)
list='$(SOURCES) $(HEADERS) $(LISP) $(TAGS_FILES)'; \
unique=`for i in $$list; do \
if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \
done | \
$(AWK) '{ files[$$0] = 1; nonempty = 1; } \
END { if (nonempty) { for (i in files) print i; }; }'`; \
test -z "$(CTAGS_ARGS)$$unique" \
|| $(CTAGS) $(CTAGSFLAGS) $(AM_CTAGSFLAGS) $(CTAGS_ARGS) \
$$unique
GTAGS:
here=`$(am__cd) $(top_builddir) && pwd` \
&& $(am__cd) $(top_srcdir) \
&& gtags -i $(GTAGS_ARGS) "$$here"
cscopelist: $(HEADERS) $(SOURCES) $(LISP)
list='$(SOURCES) $(HEADERS) $(LISP)'; \
case "$(srcdir)" in \
[\\/]* | ?:[\\/]*) sdir="$(srcdir)" ;; \
*) sdir=$(subdir)/$(srcdir) ;; \
esac; \
for i in $$list; do \
if test -f "$$i"; then \
echo "$(subdir)/$$i"; \
else \
echo "$$sdir/$$i"; \
fi; \
done >> $(top_builddir)/cscope.files
distclean-tags:
-rm -f TAGS ID GTAGS GRTAGS GSYMS GPATH tags
distdir: $(DISTFILES)
@srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \
topsrcdirstrip=`echo "$(top_srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \
list='$(DISTFILES)'; \
dist_files=`for file in $$list; do echo $$file; done | \
sed -e "s|^$$srcdirstrip/||;t" \
-e "s|^$$topsrcdirstrip/|$(top_builddir)/|;t"`; \
case $$dist_files in \
*/*) $(MKDIR_P) `echo "$$dist_files" | \
sed '/\//!d;s|^|$(distdir)/|;s,/[^/]*$$,,' | \
sort -u` ;; \
esac; \
for file in $$dist_files; do \
if test -f $$file || test -d $$file; then d=.; else d=$(srcdir); fi; \
if test -d $$d/$$file; then \
dir=`echo "/$$file" | sed -e 's,/[^/]*$$,,'`; \
if test -d "$(distdir)/$$file"; then \
find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \
fi; \
if test -d $(srcdir)/$$file && test $$d != $(srcdir); then \
cp -fpR $(srcdir)/$$file "$(distdir)$$dir" || exit 1; \
find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \
fi; \
cp -fpR $$d/$$file "$(distdir)$$dir" || exit 1; \
else \
test -f "$(distdir)/$$file" \
|| cp -p $$d/$$file "$(distdir)/$$file" \
|| exit 1; \
fi; \
done
check-am: all-am
check: check-am
all-am: Makefile $(PROGRAMS)
installdirs:
install: install-am
install-exec: install-exec-am
install-data: install-data-am
uninstall: uninstall-am
install-am: all-am
@$(MAKE) $(AM_MAKEFLAGS) install-exec-am install-data-am
installcheck: installcheck-am
install-strip:
if test -z '$(STRIP)'; then \
$(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
install; \
else \
$(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
"INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'" install; \
fi
mostlyclean-generic:
clean-generic:
distclean-generic:
-test -z "$(CONFIG_CLEAN_FILES)" || rm -f $(CONFIG_CLEAN_FILES)
-test . = "$(srcdir)" || test -z "$(CONFIG_CLEAN_VPATH_FILES)" || rm -f $(CONFIG_CLEAN_VPATH_FILES)
maintainer-clean-generic:
@echo "This command is intended for maintainers to use"
@echo "it deletes files that may require special tools to rebuild."
clean: clean-am
clean-am: clean-generic clean-libtool clean-noinstPROGRAMS \
mostlyclean-am
distclean: distclean-am
-rm -rf ./$(DEPDIR)
-rm -f Makefile
distclean-am: clean-am distclean-compile distclean-generic \
distclean-tags
dvi: dvi-am
dvi-am:
html: html-am
html-am:
info: info-am
info-am:
install-data-am:
install-dvi: install-dvi-am
install-dvi-am:
install-exec-am:
install-html: install-html-am
install-html-am:
install-info: install-info-am
install-info-am:
install-man:
install-pdf: install-pdf-am
install-pdf-am:
install-ps: install-ps-am
install-ps-am:
installcheck-am:
maintainer-clean: maintainer-clean-am
-rm -rf ./$(DEPDIR)
-rm -f Makefile
maintainer-clean-am: distclean-am maintainer-clean-generic
mostlyclean: mostlyclean-am
mostlyclean-am: mostlyclean-compile mostlyclean-generic \
mostlyclean-libtool
pdf: pdf-am
pdf-am:
ps: ps-am
ps-am:
uninstall-am:
.MAKE: install-am install-strip
.PHONY: CTAGS GTAGS all all-am check check-am clean clean-generic \
clean-libtool clean-noinstPROGRAMS cscopelist ctags distclean \
distclean-compile distclean-generic distclean-libtool \
distclean-tags distdir dvi dvi-am html html-am info info-am \
install install-am install-data install-data-am install-dvi \
install-dvi-am install-exec install-exec-am install-html \
install-html-am install-info install-info-am install-man \
install-pdf install-pdf-am install-ps install-ps-am \
install-strip installcheck installcheck-am installdirs \
maintainer-clean maintainer-clean-generic mostlyclean \
mostlyclean-compile mostlyclean-generic mostlyclean-libtool \
pdf pdf-am ps ps-am tags uninstall uninstall-am
# Tell versions [3.59,3.63) of GNU make to not export all variables.
# Otherwise a system limit (for SysV at least) may be exceeded.
.NOEXPORT:

@ -0,0 +1,176 @@
/*
This file is part of SFFT.
Copyright (c) 2012, Anthony M. Blake
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of the organization nor the
names of its contributors may be used to endorse or promote products
derived from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
DISCLAIMED. IN NO EVENT SHALL ANTHONY M. BLAKE BE LIABLE FOR ANY
DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#include <stdio.h>
#include <math.h>
#ifdef __ARM_NEON__
#endif
#ifdef HAVE_SSE
#include <xmmintrin.h>
#endif
#include "../include/ffts.h"
#define PI 3.1415926535897932384626433832795028841971693993751058209
float impulse_error(int N, int sign, float *data) {
#ifdef __ANDROID__
double delta_sum = 0.0f;
double sum = 0.0f;
#else
long double delta_sum = 0.0f;
long double sum = 0.0f;
#endif
int i;
for(i=0;i<N;i++) {
#ifdef __ANDROID__
double re, im;
if(sign < 0) {
re = cos(2 * PI * (double)i / (double)N);
im = -sin(2 * PI * (double)i / (double)N);
}else{
re = cos(2 * PI * (double)i / (double)N);
im = sin(2 * PI * (double)i / (double)N);
}
#else
long double re, im;
if(sign < 0) {
re = cosl(2 * PI * (long double)i / (long double)N);
im = -sinl(2 * PI * (long double)i / (long double)N);
}else{
re = cosl(2 * PI * (long double)i / (long double)N);
im = sinl(2 * PI * (long double)i / (long double)N);
}
#endif
sum += re * re + im * im;
re = re - data[2*i];
im = im - data[2*i+1];
delta_sum += re * re + im * im;
}
#ifdef __ANDROID__
return sqrt(delta_sum) / sqrt(sum);
#else
return sqrtl(delta_sum) / sqrtl(sum);
#endif
}
int
test_transform(int n, int sign) {
#ifdef HAVE_SSE
float __attribute__ ((aligned(32))) *input = _mm_malloc(2 * n * sizeof(float), 32);
float __attribute__ ((aligned(32))) *output = _mm_malloc(2 * n * sizeof(float), 32);
#else
float __attribute__ ((aligned(32))) *input = valloc(2 * n * sizeof(float));
float __attribute__ ((aligned(32))) *output = valloc(2 * n * sizeof(float));
#endif
int i;
for(i=0;i<n;i++) {
input[2*i] = 0.0f;
input[2*i+1] = 0.0f;
}
input[2] = 1.0f;
ffts_plan_t *p = ffts_init_1d(i, sign);
if(p) {
ffts_execute(p, input, output);
printf(" %3d | %9d | %10E\n", sign, n, impulse_error(n, sign, output));
ffts_free(p);
}else{
printf("Plan unsupported\n");
return 0;
}
return 1;
}
int
main(int argc, char *argv[]) {
if(argc == 3) {
// test specific transform with test pattern and display output
int n = atoi(argv[1]);
int sign = atoi(argv[2]);
#ifdef HAVE_SSE
float __attribute__ ((aligned(32))) *input = _mm_malloc(2 * n * sizeof(float), 32);
float __attribute__ ((aligned(32))) *output = _mm_malloc(2 * n * sizeof(float), 32);
#else
float __attribute__ ((aligned(32))) *input = valloc(2 * n * sizeof(float));
float __attribute__ ((aligned(32))) *output = valloc(2 * n * sizeof(float));
#endif
int i;
for(i=0;i<n;i++) {
input[2*i] = i;
input[2*i+1] = 0.0f;
}
// input[2] = 1.0f;
ffts_plan_t *p = ffts_init_1d(i, sign);
if(p) {
ffts_execute(p, input, output);
for(i=0;i<n;i++) printf("%d %d %f %f\n", i, sign, output[2*i], output[2*i+1]);
ffts_free(p);
}else{
printf("Plan unsupported\n");
return 0;
}
#ifdef HAVE_NEON
_mm_free(input);
_mm_free(output);
#else
free(input);
free(output);
#endif
}else{
// test various sizes and display error
printf(" Sign | Size | L2 Error\n");
printf("------+-----------+-------------\n");
int n;
for(n=1;n<=18;n++) {
test_transform(pow(2,n), -1);
}
for(n=1;n<=18;n++) {
test_transform(pow(2,n), 1);
}
}
return 0;
}
Loading…
Cancel
Save