iOS ARM support

git-svn-id: svn+ssh://svn.code.sf.net/p/libjpeg-turbo/code/trunk@659 632fc199-4ca6-4c93-a231-07263d6284db
This commit is contained in:
DRC
2011-06-14 22:16:50 +00:00
parent b8c6ee38b0
commit 4346f91fcb
4 changed files with 98 additions and 44 deletions

View File

@@ -140,18 +140,40 @@ fi
# Test whether the assembler is suitable and supports NEON instructions
AC_DEFUN([AC_CHECK_COMPATIBLE_ARM_ASSEMBLER_IFELSE],[
ac_good_gnu_arm_assembler=no
ac_save_CC="$CC"
ac_save_CFLAGS="$CFLAGS"
CFLAGS="-x assembler-with-cpp $CFLAGS"
CFLAGS="$CCASFLAGS -x assembler-with-cpp"
CC="$CCAS"
AC_COMPILE_IFELSE([[
.text
.fpu neon
.arch armv7a
.object_arch armv4
.arm
.altmacro
pld [r0]
vmovn.u16 d0, q0]], ac_good_gnu_arm_assembler=yes)
ac_use_gas_preprocessor=no
if test "x$ac_good_gnu_arm_assembler" = "xno" ; then
CC="gas-preprocessor.pl $CCAS"
AC_COMPILE_IFELSE([[
.text
.fpu neon
.arch armv7a
.object_arch armv4
.arm
pld [r0]
vmovn.u16 d0, q0]], ac_use_gas_preprocessor=yes)
fi
CFLAGS="$ac_save_CFLAGS"
CC="$ac_save_CC"
if test "x$ac_use_gas_preprocessor" = "xyes" ; then
CCAS="gas-preprocessor.pl $CCAS"
AC_SUBST([CCAS])
ac_good_gnu_arm_assembler=yes
fi
if test "x$ac_good_gnu_arm_assembler" = "xyes" ; then
$1
else

View File

@@ -260,6 +260,21 @@ AC_SUBST(JAVA_RPM_CONTENTS_1)
AC_SUBST(JAVA_RPM_CONTENTS_2)
AC_SUBST(RPM_CONFIG_ARGS)
# optionally force using gas-preprocessor.pl for compatibility testing
AC_ARG_WITH([gas-preprocessor],
AC_HELP_STRING([--with-gas-preprocessor],[Force using gas-preprocessor.pl on ARM.]))
if test "x${with_gas_preprocessor}" = "xyes"; then
case $host_os in
darwin*)
CCAS="gas-preprocessor.pl -fix-unreq $CC"
;;
*)
CCAS="gas-preprocessor.pl -no-fix-unreq $CC"
;;
esac
AC_SUBST([CCAS])
fi
# SIMD is optional
AC_ARG_WITH([simd],
AC_HELP_STRING([--without-simd],[Omit SIMD extensions.]))

View File

@@ -29,7 +29,7 @@
static unsigned int simd_support = ~0;
#ifdef __linux__
#if defined(__linux__) || defined(ANDROID) || defined(__ANDROID__)
#define SOMEWHAT_SANE_PROC_CPUINFO_SIZE_LIMIT (1024 * 1024)
@@ -100,14 +100,21 @@ LOCAL(void)
init_simd (void)
{
char *env = NULL;
#if !defined(__ARM_NEON__) && defined(__linux__) || defined(ANDROID) || defined(__ANDROID__)
int bufsize = 1024; /* an initial guess for the line buffer size limit */
#endif
if (simd_support != ~0)
return;
simd_support = 0;
#ifdef __linux__
#if defined(__ARM_NEON__)
simd_support |= JSIMD_ARM_NEON;
#elif defined(__linux__) || defined(ANDROID) || defined(__ANDROID__)
/* We still have a chance to use NEON regardless of globally used
* -mcpu/-mfpu options passed to gcc by performing runtime detection via
* /proc/cpuinfo parsing on linux/android */
while (!parse_proc_cpuinfo(bufsize)) {
bufsize *= 2;
if (bufsize > SOMEWHAT_SANE_PROC_CPUINFO_SIZE_LIMIT)

View File

@@ -30,28 +30,33 @@
.fpu neon
.arch armv7a
.object_arch armv4
.altmacro
.arm
/*****************************************************************************/
/* Supplementary macro for setting function attributes */
.macro asm_function fname
.func fname
.global fname
#ifdef __APPLE__
.func _\fname
.globl _\fname
_\fname:
#else
.func \fname
.global \fname
#ifdef __ELF__
.hidden fname
.type fname, %function
.hidden \fname
.type \fname, %function
#endif
\fname:
#endif
fname:
.endm
/* Transpose a block of 4x4 coefficients in four 64-bit registers */
.macro transpose_4x4 x0, x1, x2, x3
vtrn.16 x0, x1
vtrn.16 x2, x3
vtrn.32 x0, x2
vtrn.32 x1, x3
vtrn.16 \x0, \x1
vtrn.16 \x2, \x3
vtrn.32 \x0, \x2
vtrn.32 \x1, \x3
.endm
/*****************************************************************************/
@@ -224,7 +229,7 @@ asm_function jsimd_idct_ifast_neon
.irp x, d4, d6, d8, d10, d12, d14, d16, d18
ldr TMP, [OUTPUT_BUF], #4
add TMP, TMP, OUTPUT_COL
vst1.8 {x}, [TMP]!
vst1.8 {\x}, [TMP]!
.endr
vpop {d8-d15}
@@ -252,22 +257,16 @@ asm_function jsimd_idct_ifast_neon
* Colorspace conversion YCbCr -> RGB
*/
.balign 16
jsimd_ycc_rgb_neon_consts:
.short 0, 0, 0, 0
.short 22971, -11277, -23401, 29033
.short -128, -128, -128, -128
.short -128, -128, -128, -128
.macro do_load size
.if size == 8
.if \size == 8
vld1.8 {d4}, [U]!
vld1.8 {d5}, [V]!
vld1.8 {d0}, [Y]!
pld [Y, #64]
pld [U, #64]
pld [V, #64]
.elseif size == 4
.elseif \size == 4
vld1.8 {d4[0]}, [U]!
vld1.8 {d4[1]}, [U]!
vld1.8 {d4[2]}, [U]!
@@ -280,14 +279,14 @@ jsimd_ycc_rgb_neon_consts:
vld1.8 {d0[1]}, [Y]!
vld1.8 {d0[2]}, [Y]!
vld1.8 {d0[3]}, [Y]!
.elseif size == 2
.elseif \size == 2
vld1.8 {d4[4]}, [U]!
vld1.8 {d4[5]}, [U]!
vld1.8 {d5[4]}, [V]!
vld1.8 {d5[5]}, [V]!
vld1.8 {d0[4]}, [Y]!
vld1.8 {d0[5]}, [Y]!
.elseif size == 1
.elseif \size == 1
vld1.8 {d4[6]}, [U]!
vld1.8 {d5[6]}, [V]!
vld1.8 {d0[6]}, [Y]!
@@ -297,34 +296,34 @@ jsimd_ycc_rgb_neon_consts:
.endm
.macro do_store bpp, size
.if bpp == 24
.if size == 8
.if \bpp == 24
.if \size == 8
vst3.8 {d10, d11, d12}, [RGB]!
.elseif size == 4
.elseif \size == 4
vst3.8 {d10[0], d11[0], d12[0]}, [RGB]!
vst3.8 {d10[1], d11[1], d12[1]}, [RGB]!
vst3.8 {d10[2], d11[2], d12[2]}, [RGB]!
vst3.8 {d10[3], d11[3], d12[3]}, [RGB]!
.elseif size == 2
.elseif \size == 2
vst3.8 {d10[4], d11[4], d12[4]}, [RGB]!
vst3.8 {d10[5], d11[5], d12[5]}, [RGB]!
.elseif size == 1
.elseif \size == 1
vst3.8 {d10[6], d11[6], d12[6]}, [RGB]!
.else
.error unsupported macroblock size
.endif
.elseif bpp == 32
.if size == 8
.elseif \bpp == 32
.if \size == 8
vst4.8 {d10, d11, d12, d13}, [RGB]!
.elseif size == 4
.elseif \size == 4
vst4.8 {d10[0], d11[0], d12[0], d13[0]}, [RGB]!
vst4.8 {d10[1], d11[1], d12[1], d13[1]}, [RGB]!
vst4.8 {d10[2], d11[2], d12[2], d13[2]}, [RGB]!
vst4.8 {d10[3], d11[3], d12[3], d13[3]}, [RGB]!
.elseif size == 2
.elseif \size == 2
vst4.8 {d10[4], d11[4], d12[4], d13[4]}, [RGB]!
vst4.8 {d10[5], d11[5], d12[5], d13[5]}, [RGB]!
.elseif size == 1
.elseif \size == 1
vst4.8 {d10[6], d11[6], d12[6], d13[6]}, [RGB]!
.else
.error unsupported macroblock size
@@ -356,12 +355,23 @@ jsimd_ycc_rgb_neon_consts:
vaddw.u8 q10, q10, d0
vaddw.u8 q12, q12, d0
vaddw.u8 q14, q14, d0
vqmovun.s16 d1&g_offs, q10
vqmovun.s16 d1&r_offs, q12
vqmovun.s16 d1&b_offs, q14
vqmovun.s16 d1\g_offs, q10
vqmovun.s16 d1\r_offs, q12
vqmovun.s16 d1\b_offs, q14
.endm
asm_function jsimd_ycc_&colorid&_convert_neon
/* Apple gas crashes on adrl, work around that by using adr.
* But this requires a copy of these constants for each function.
*/
.balign 16
jsimd_ycc_\colorid\()_neon_consts:
.short 0, 0, 0, 0
.short 22971, -11277, -23401, 29033
.short -128, -128, -128, -128
.short -128, -128, -128, -128
asm_function jsimd_ycc_\colorid\()_convert_neon
OUTPUT_WIDTH .req r0
INPUT_BUF .req r1
INPUT_ROW .req r2
@@ -379,7 +389,7 @@ asm_function jsimd_ycc_&colorid&_convert_neon
N .req ip
/* Load constants to d1, d2, d3 (d0 is just used for padding) */
adrl ip, jsimd_ycc_rgb_neon_consts
adr ip, jsimd_ycc_\colorid\()_neon_consts
vld1.16 {d0, d1, d2, d3}, [ip, :128]
/* Save ARM registers and handle input arguments */
@@ -414,7 +424,7 @@ asm_function jsimd_ycc_&colorid&_convert_neon
1:
do_load 8
do_yuv_to_rgb
do_store bpp, 8
do_store \bpp, 8
subs N, N, #8
bge 1b
tst N, #7
@@ -435,15 +445,15 @@ asm_function jsimd_ycc_&colorid&_convert_neon
do_yuv_to_rgb
tst N, #4
beq 6f
do_store bpp, 4
do_store \bpp, 4
6:
tst N, #2
beq 7f
do_store bpp, 2
do_store \bpp, 2
7:
tst N, #1
beq 8f
do_store bpp, 1
do_store \bpp, 1
8:
subs NUM_ROWS, NUM_ROWS, #1
bgt 0b