The AltiVec code actually works on 32-bit PowerPC platforms as well, so change the "powerpc64" token to "powerpc". Also clean up the shift code, which wasn't building properly on OS X.
git-svn-id: svn+ssh://svn.code.sf.net/p/libjpeg-turbo/code/trunk@1406 632fc199-4ca6-4c93-a231-07263d6284db
This commit is contained in:
@@ -481,9 +481,9 @@ if test "x${with_simd}" != "xno"; then
|
||||
fi
|
||||
fi
|
||||
;;
|
||||
powerpc64)
|
||||
AC_MSG_RESULT([yes (powerpc64)])
|
||||
simd_arch=powerpc64
|
||||
powerpc*)
|
||||
AC_MSG_RESULT([yes (powerpc)])
|
||||
simd_arch=powerpc
|
||||
;;
|
||||
*)
|
||||
AC_MSG_RESULT([no ("$host_cpu")])
|
||||
@@ -510,7 +510,7 @@ AM_CONDITIONAL([SIMD_X86_64], [test "x$simd_arch" = "xx86_64"])
|
||||
AM_CONDITIONAL([SIMD_ARM], [test "x$simd_arch" = "xarm"])
|
||||
AM_CONDITIONAL([SIMD_ARM_64], [test "x$simd_arch" = "xaarch64"])
|
||||
AM_CONDITIONAL([SIMD_MIPS], [test "x$simd_arch" = "xmips"])
|
||||
AM_CONDITIONAL([SIMD_POWERPC64], [test "x$simd_arch" = "xpowerpc64"])
|
||||
AM_CONDITIONAL([SIMD_POWERPC], [test "x$simd_arch" = "xpowerpc"])
|
||||
AM_CONDITIONAL([X86_64], [test "x$host_cpu" = "xx86_64" -o "x$host_cpu" = "xamd64"])
|
||||
AM_CONDITIONAL([WITH_TURBOJPEG], [test "x$with_turbojpeg" != "xno"])
|
||||
|
||||
|
||||
@@ -70,9 +70,9 @@ libsimd_la_SOURCES = jsimd_mips.c jsimd_mips_dspr2_asm.h jsimd_mips_dspr2.S
|
||||
|
||||
endif
|
||||
|
||||
if SIMD_POWERPC64
|
||||
if SIMD_POWERPC
|
||||
|
||||
libsimd_la_SOURCES = jsimd_powerpc64.c jsimd_powerpc64_altivec.c
|
||||
libsimd_la_SOURCES = jsimd_powerpc.c jsimd_powerpc_altivec.c
|
||||
libsimd_la_CFLAGS = -maltivec
|
||||
|
||||
endif
|
||||
|
||||
@@ -67,8 +67,6 @@
|
||||
col##7 = vec_mergel(col67e, col67o); /* col7=(07 17 27 37 47 57 67 77) */ \
|
||||
}
|
||||
|
||||
#define PRE_MULTIPLY_SCALE_BITS 2
|
||||
|
||||
static const __vector short constants __attribute__((aligned(16))) =
|
||||
{
|
||||
98 << 5, /* FIX(0.382683433) */
|
||||
@@ -90,7 +88,7 @@ static const __vector short constants __attribute__((aligned(16))) =
|
||||
out4 = vec_sub(tmp10, tmp11); \
|
||||
\
|
||||
z1 = vec_add(tmp12, tmp13); \
|
||||
z1 = z1 << PRE_MULTIPLY_SCALE_BITS; \
|
||||
z1 = vec_sl(z1, PRE_MULTIPLY_SCALE_BITS); \
|
||||
z1 = vec_madds(z1, PW_0707, zero); \
|
||||
\
|
||||
out2 = vec_add(tmp13, z1); \
|
||||
@@ -102,8 +100,8 @@ static const __vector short constants __attribute__((aligned(16))) =
|
||||
tmp11 = vec_add(tmp5, tmp6); \
|
||||
tmp12 = vec_add(tmp6, tmp7); \
|
||||
\
|
||||
tmp10 = tmp10 << PRE_MULTIPLY_SCALE_BITS; \
|
||||
tmp12 = tmp12 << PRE_MULTIPLY_SCALE_BITS; \
|
||||
tmp10 = vec_sl(tmp10, PRE_MULTIPLY_SCALE_BITS); \
|
||||
tmp12 = vec_sl(tmp12, PRE_MULTIPLY_SCALE_BITS); \
|
||||
z5 = vec_sub(tmp10, tmp12); \
|
||||
z5 = vec_madds(z5, PW_0382, zero); \
|
||||
\
|
||||
@@ -113,7 +111,7 @@ static const __vector short constants __attribute__((aligned(16))) =
|
||||
z4 = vec_madds(tmp12, PW_1306, zero); \
|
||||
z4 = vec_add(z4, z5); \
|
||||
\
|
||||
tmp11 = tmp11 << PRE_MULTIPLY_SCALE_BITS; \
|
||||
tmp11 = vec_sl(tmp11, PRE_MULTIPLY_SCALE_BITS); \
|
||||
z3 = vec_madds(tmp11, PW_0707, zero); \
|
||||
\
|
||||
z11 = vec_add(tmp7, z3); \
|
||||
@@ -140,6 +138,7 @@ jsimd_fdct_ifast_altivec (DCTELEM *data)
|
||||
PW_0541 = vec_splat(constants, 1),
|
||||
PW_0707 = vec_splat(constants, 2),
|
||||
PW_1306 = vec_splat(constants, 3);
|
||||
__vector unsigned short PRE_MULTIPLY_SCALE_BITS = vec_splat_u16(2);
|
||||
|
||||
/* Pass 1: process rows. */
|
||||
|
||||
Reference in New Issue
Block a user