iOS ARM support

git-svn-id: svn+ssh://svn.code.sf.net/p/libjpeg-turbo/code/trunk@659 632fc199-4ca6-4c93-a231-07263d6284db
This commit is contained in:
DRC
2011-06-14 22:16:50 +00:00
parent b8c6ee38b0
commit 4346f91fcb
4 changed files with 98 additions and 44 deletions

View File

@@ -140,18 +140,40 @@ fi
# Test whether the assembler is suitable and supports NEON instructions # Test whether the assembler is suitable and supports NEON instructions
AC_DEFUN([AC_CHECK_COMPATIBLE_ARM_ASSEMBLER_IFELSE],[ AC_DEFUN([AC_CHECK_COMPATIBLE_ARM_ASSEMBLER_IFELSE],[
ac_good_gnu_arm_assembler=no ac_good_gnu_arm_assembler=no
ac_save_CC="$CC"
ac_save_CFLAGS="$CFLAGS" ac_save_CFLAGS="$CFLAGS"
CFLAGS="-x assembler-with-cpp $CFLAGS" CFLAGS="$CCASFLAGS -x assembler-with-cpp"
CC="$CCAS"
AC_COMPILE_IFELSE([[ AC_COMPILE_IFELSE([[
.text .text
.fpu neon .fpu neon
.arch armv7a .arch armv7a
.object_arch armv4 .object_arch armv4
.arm .arm
.altmacro
pld [r0] pld [r0]
vmovn.u16 d0, q0]], ac_good_gnu_arm_assembler=yes) vmovn.u16 d0, q0]], ac_good_gnu_arm_assembler=yes)
ac_use_gas_preprocessor=no
if test "x$ac_good_gnu_arm_assembler" = "xno" ; then
CC="gas-preprocessor.pl $CCAS"
AC_COMPILE_IFELSE([[
.text
.fpu neon
.arch armv7a
.object_arch armv4
.arm
pld [r0]
vmovn.u16 d0, q0]], ac_use_gas_preprocessor=yes)
fi
CFLAGS="$ac_save_CFLAGS" CFLAGS="$ac_save_CFLAGS"
CC="$ac_save_CC"
if test "x$ac_use_gas_preprocessor" = "xyes" ; then
CCAS="gas-preprocessor.pl $CCAS"
AC_SUBST([CCAS])
ac_good_gnu_arm_assembler=yes
fi
if test "x$ac_good_gnu_arm_assembler" = "xyes" ; then if test "x$ac_good_gnu_arm_assembler" = "xyes" ; then
$1 $1
else else

View File

@@ -260,6 +260,21 @@ AC_SUBST(JAVA_RPM_CONTENTS_1)
AC_SUBST(JAVA_RPM_CONTENTS_2) AC_SUBST(JAVA_RPM_CONTENTS_2)
AC_SUBST(RPM_CONFIG_ARGS) AC_SUBST(RPM_CONFIG_ARGS)
# optionally force using gas-preprocessor.pl for compatibility testing
AC_ARG_WITH([gas-preprocessor],
AC_HELP_STRING([--with-gas-preprocessor],[Force using gas-preprocessor.pl on ARM.]))
if test "x${with_gas_preprocessor}" = "xyes"; then
case $host_os in
darwin*)
CCAS="gas-preprocessor.pl -fix-unreq $CC"
;;
*)
CCAS="gas-preprocessor.pl -no-fix-unreq $CC"
;;
esac
AC_SUBST([CCAS])
fi
# SIMD is optional # SIMD is optional
AC_ARG_WITH([simd], AC_ARG_WITH([simd],
AC_HELP_STRING([--without-simd],[Omit SIMD extensions.])) AC_HELP_STRING([--without-simd],[Omit SIMD extensions.]))

View File

@@ -29,7 +29,7 @@
static unsigned int simd_support = ~0; static unsigned int simd_support = ~0;
#ifdef __linux__ #if defined(__linux__) || defined(ANDROID) || defined(__ANDROID__)
#define SOMEWHAT_SANE_PROC_CPUINFO_SIZE_LIMIT (1024 * 1024) #define SOMEWHAT_SANE_PROC_CPUINFO_SIZE_LIMIT (1024 * 1024)
@@ -100,14 +100,21 @@ LOCAL(void)
init_simd (void) init_simd (void)
{ {
char *env = NULL; char *env = NULL;
#if !defined(__ARM_NEON__) && defined(__linux__) || defined(ANDROID) || defined(__ANDROID__)
int bufsize = 1024; /* an initial guess for the line buffer size limit */ int bufsize = 1024; /* an initial guess for the line buffer size limit */
#endif
if (simd_support != ~0) if (simd_support != ~0)
return; return;
simd_support = 0; simd_support = 0;
#ifdef __linux__ #if defined(__ARM_NEON__)
simd_support |= JSIMD_ARM_NEON;
#elif defined(__linux__) || defined(ANDROID) || defined(__ANDROID__)
/* We still have a chance to use NEON regardless of globally used
* -mcpu/-mfpu options passed to gcc by performing runtime detection via
* /proc/cpuinfo parsing on linux/android */
while (!parse_proc_cpuinfo(bufsize)) { while (!parse_proc_cpuinfo(bufsize)) {
bufsize *= 2; bufsize *= 2;
if (bufsize > SOMEWHAT_SANE_PROC_CPUINFO_SIZE_LIMIT) if (bufsize > SOMEWHAT_SANE_PROC_CPUINFO_SIZE_LIMIT)

View File

@@ -30,28 +30,33 @@
.fpu neon .fpu neon
.arch armv7a .arch armv7a
.object_arch armv4 .object_arch armv4
.altmacro
.arm .arm
/*****************************************************************************/ /*****************************************************************************/
/* Supplementary macro for setting function attributes */ /* Supplementary macro for setting function attributes */
.macro asm_function fname .macro asm_function fname
.func fname #ifdef __APPLE__
.global fname .func _\fname
.globl _\fname
_\fname:
#else
.func \fname
.global \fname
#ifdef __ELF__ #ifdef __ELF__
.hidden fname .hidden \fname
.type fname, %function .type \fname, %function
#endif
\fname:
#endif #endif
fname:
.endm .endm
/* Transpose a block of 4x4 coefficients in four 64-bit registers */ /* Transpose a block of 4x4 coefficients in four 64-bit registers */
.macro transpose_4x4 x0, x1, x2, x3 .macro transpose_4x4 x0, x1, x2, x3
vtrn.16 x0, x1 vtrn.16 \x0, \x1
vtrn.16 x2, x3 vtrn.16 \x2, \x3
vtrn.32 x0, x2 vtrn.32 \x0, \x2
vtrn.32 x1, x3 vtrn.32 \x1, \x3
.endm .endm
/*****************************************************************************/ /*****************************************************************************/
@@ -224,7 +229,7 @@ asm_function jsimd_idct_ifast_neon
.irp x, d4, d6, d8, d10, d12, d14, d16, d18 .irp x, d4, d6, d8, d10, d12, d14, d16, d18
ldr TMP, [OUTPUT_BUF], #4 ldr TMP, [OUTPUT_BUF], #4
add TMP, TMP, OUTPUT_COL add TMP, TMP, OUTPUT_COL
vst1.8 {x}, [TMP]! vst1.8 {\x}, [TMP]!
.endr .endr
vpop {d8-d15} vpop {d8-d15}
@@ -252,22 +257,16 @@ asm_function jsimd_idct_ifast_neon
* Colorspace conversion YCbCr -> RGB * Colorspace conversion YCbCr -> RGB
*/ */
.balign 16
jsimd_ycc_rgb_neon_consts:
.short 0, 0, 0, 0
.short 22971, -11277, -23401, 29033
.short -128, -128, -128, -128
.short -128, -128, -128, -128
.macro do_load size .macro do_load size
.if size == 8 .if \size == 8
vld1.8 {d4}, [U]! vld1.8 {d4}, [U]!
vld1.8 {d5}, [V]! vld1.8 {d5}, [V]!
vld1.8 {d0}, [Y]! vld1.8 {d0}, [Y]!
pld [Y, #64] pld [Y, #64]
pld [U, #64] pld [U, #64]
pld [V, #64] pld [V, #64]
.elseif size == 4 .elseif \size == 4
vld1.8 {d4[0]}, [U]! vld1.8 {d4[0]}, [U]!
vld1.8 {d4[1]}, [U]! vld1.8 {d4[1]}, [U]!
vld1.8 {d4[2]}, [U]! vld1.8 {d4[2]}, [U]!
@@ -280,14 +279,14 @@ jsimd_ycc_rgb_neon_consts:
vld1.8 {d0[1]}, [Y]! vld1.8 {d0[1]}, [Y]!
vld1.8 {d0[2]}, [Y]! vld1.8 {d0[2]}, [Y]!
vld1.8 {d0[3]}, [Y]! vld1.8 {d0[3]}, [Y]!
.elseif size == 2 .elseif \size == 2
vld1.8 {d4[4]}, [U]! vld1.8 {d4[4]}, [U]!
vld1.8 {d4[5]}, [U]! vld1.8 {d4[5]}, [U]!
vld1.8 {d5[4]}, [V]! vld1.8 {d5[4]}, [V]!
vld1.8 {d5[5]}, [V]! vld1.8 {d5[5]}, [V]!
vld1.8 {d0[4]}, [Y]! vld1.8 {d0[4]}, [Y]!
vld1.8 {d0[5]}, [Y]! vld1.8 {d0[5]}, [Y]!
.elseif size == 1 .elseif \size == 1
vld1.8 {d4[6]}, [U]! vld1.8 {d4[6]}, [U]!
vld1.8 {d5[6]}, [V]! vld1.8 {d5[6]}, [V]!
vld1.8 {d0[6]}, [Y]! vld1.8 {d0[6]}, [Y]!
@@ -297,34 +296,34 @@ jsimd_ycc_rgb_neon_consts:
.endm .endm
.macro do_store bpp, size .macro do_store bpp, size
.if bpp == 24 .if \bpp == 24
.if size == 8 .if \size == 8
vst3.8 {d10, d11, d12}, [RGB]! vst3.8 {d10, d11, d12}, [RGB]!
.elseif size == 4 .elseif \size == 4
vst3.8 {d10[0], d11[0], d12[0]}, [RGB]! vst3.8 {d10[0], d11[0], d12[0]}, [RGB]!
vst3.8 {d10[1], d11[1], d12[1]}, [RGB]! vst3.8 {d10[1], d11[1], d12[1]}, [RGB]!
vst3.8 {d10[2], d11[2], d12[2]}, [RGB]! vst3.8 {d10[2], d11[2], d12[2]}, [RGB]!
vst3.8 {d10[3], d11[3], d12[3]}, [RGB]! vst3.8 {d10[3], d11[3], d12[3]}, [RGB]!
.elseif size == 2 .elseif \size == 2
vst3.8 {d10[4], d11[4], d12[4]}, [RGB]! vst3.8 {d10[4], d11[4], d12[4]}, [RGB]!
vst3.8 {d10[5], d11[5], d12[5]}, [RGB]! vst3.8 {d10[5], d11[5], d12[5]}, [RGB]!
.elseif size == 1 .elseif \size == 1
vst3.8 {d10[6], d11[6], d12[6]}, [RGB]! vst3.8 {d10[6], d11[6], d12[6]}, [RGB]!
.else .else
.error unsupported macroblock size .error unsupported macroblock size
.endif .endif
.elseif bpp == 32 .elseif \bpp == 32
.if size == 8 .if \size == 8
vst4.8 {d10, d11, d12, d13}, [RGB]! vst4.8 {d10, d11, d12, d13}, [RGB]!
.elseif size == 4 .elseif \size == 4
vst4.8 {d10[0], d11[0], d12[0], d13[0]}, [RGB]! vst4.8 {d10[0], d11[0], d12[0], d13[0]}, [RGB]!
vst4.8 {d10[1], d11[1], d12[1], d13[1]}, [RGB]! vst4.8 {d10[1], d11[1], d12[1], d13[1]}, [RGB]!
vst4.8 {d10[2], d11[2], d12[2], d13[2]}, [RGB]! vst4.8 {d10[2], d11[2], d12[2], d13[2]}, [RGB]!
vst4.8 {d10[3], d11[3], d12[3], d13[3]}, [RGB]! vst4.8 {d10[3], d11[3], d12[3], d13[3]}, [RGB]!
.elseif size == 2 .elseif \size == 2
vst4.8 {d10[4], d11[4], d12[4], d13[4]}, [RGB]! vst4.8 {d10[4], d11[4], d12[4], d13[4]}, [RGB]!
vst4.8 {d10[5], d11[5], d12[5], d13[5]}, [RGB]! vst4.8 {d10[5], d11[5], d12[5], d13[5]}, [RGB]!
.elseif size == 1 .elseif \size == 1
vst4.8 {d10[6], d11[6], d12[6], d13[6]}, [RGB]! vst4.8 {d10[6], d11[6], d12[6], d13[6]}, [RGB]!
.else .else
.error unsupported macroblock size .error unsupported macroblock size
@@ -356,12 +355,23 @@ jsimd_ycc_rgb_neon_consts:
vaddw.u8 q10, q10, d0 vaddw.u8 q10, q10, d0
vaddw.u8 q12, q12, d0 vaddw.u8 q12, q12, d0
vaddw.u8 q14, q14, d0 vaddw.u8 q14, q14, d0
vqmovun.s16 d1&g_offs, q10 vqmovun.s16 d1\g_offs, q10
vqmovun.s16 d1&r_offs, q12 vqmovun.s16 d1\r_offs, q12
vqmovun.s16 d1&b_offs, q14 vqmovun.s16 d1\b_offs, q14
.endm .endm
asm_function jsimd_ycc_&colorid&_convert_neon /* Apple gas crashes on adrl, work around that by using adr.
* But this requires a copy of these constants for each function.
*/
.balign 16
jsimd_ycc_\colorid\()_neon_consts:
.short 0, 0, 0, 0
.short 22971, -11277, -23401, 29033
.short -128, -128, -128, -128
.short -128, -128, -128, -128
asm_function jsimd_ycc_\colorid\()_convert_neon
OUTPUT_WIDTH .req r0 OUTPUT_WIDTH .req r0
INPUT_BUF .req r1 INPUT_BUF .req r1
INPUT_ROW .req r2 INPUT_ROW .req r2
@@ -379,7 +389,7 @@ asm_function jsimd_ycc_&colorid&_convert_neon
N .req ip N .req ip
/* Load constants to d1, d2, d3 (d0 is just used for padding) */ /* Load constants to d1, d2, d3 (d0 is just used for padding) */
adrl ip, jsimd_ycc_rgb_neon_consts adr ip, jsimd_ycc_\colorid\()_neon_consts
vld1.16 {d0, d1, d2, d3}, [ip, :128] vld1.16 {d0, d1, d2, d3}, [ip, :128]
/* Save ARM registers and handle input arguments */ /* Save ARM registers and handle input arguments */
@@ -414,7 +424,7 @@ asm_function jsimd_ycc_&colorid&_convert_neon
1: 1:
do_load 8 do_load 8
do_yuv_to_rgb do_yuv_to_rgb
do_store bpp, 8 do_store \bpp, 8
subs N, N, #8 subs N, N, #8
bge 1b bge 1b
tst N, #7 tst N, #7
@@ -435,15 +445,15 @@ asm_function jsimd_ycc_&colorid&_convert_neon
do_yuv_to_rgb do_yuv_to_rgb
tst N, #4 tst N, #4
beq 6f beq 6f
do_store bpp, 4 do_store \bpp, 4
6: 6:
tst N, #2 tst N, #2
beq 7f beq 7f
do_store bpp, 2 do_store \bpp, 2
7: 7:
tst N, #1 tst N, #1
beq 8f beq 8f
do_store bpp, 1 do_store \bpp, 1
8: 8:
subs NUM_ROWS, NUM_ROWS, #1 subs NUM_ROWS, NUM_ROWS, #1
bgt 0b bgt 0b