The AltiVec code actually works on 32-bit PowerPC platforms as well, so change the "powerpc64" token to "powerpc". Also clean up the shift code, which wasn't building properly on OS X.

git-svn-id: svn+ssh://svn.code.sf.net/p/libjpeg-turbo/code/trunk@1406 632fc199-4ca6-4c93-a231-07263d6284db
This commit is contained in:
DRC
2014-09-05 07:23:12 +00:00
parent cd2d8e1cc7
commit 7affbfc241
4 changed files with 11 additions and 12 deletions

View File

@@ -481,9 +481,9 @@ if test "x${with_simd}" != "xno"; then
fi
fi
;;
powerpc64)
AC_MSG_RESULT([yes (powerpc64)])
simd_arch=powerpc64
powerpc*)
AC_MSG_RESULT([yes (powerpc)])
simd_arch=powerpc
;;
*)
AC_MSG_RESULT([no ("$host_cpu")])
@@ -510,7 +510,7 @@ AM_CONDITIONAL([SIMD_X86_64], [test "x$simd_arch" = "xx86_64"])
AM_CONDITIONAL([SIMD_ARM], [test "x$simd_arch" = "xarm"])
AM_CONDITIONAL([SIMD_ARM_64], [test "x$simd_arch" = "xaarch64"])
AM_CONDITIONAL([SIMD_MIPS], [test "x$simd_arch" = "xmips"])
AM_CONDITIONAL([SIMD_POWERPC64], [test "x$simd_arch" = "xpowerpc64"])
AM_CONDITIONAL([SIMD_POWERPC], [test "x$simd_arch" = "xpowerpc"])
AM_CONDITIONAL([X86_64], [test "x$host_cpu" = "xx86_64" -o "x$host_cpu" = "xamd64"])
AM_CONDITIONAL([WITH_TURBOJPEG], [test "x$with_turbojpeg" != "xno"])

View File

@@ -70,9 +70,9 @@ libsimd_la_SOURCES = jsimd_mips.c jsimd_mips_dspr2_asm.h jsimd_mips_dspr2.S
endif
if SIMD_POWERPC64
if SIMD_POWERPC
libsimd_la_SOURCES = jsimd_powerpc64.c jsimd_powerpc64_altivec.c
libsimd_la_SOURCES = jsimd_powerpc.c jsimd_powerpc_altivec.c
libsimd_la_CFLAGS = -maltivec
endif

View File

@@ -67,8 +67,6 @@
col##7 = vec_mergel(col67e, col67o); /* col7=(07 17 27 37 47 57 67 77) */ \
}
#define PRE_MULTIPLY_SCALE_BITS 2
static const __vector short constants __attribute__((aligned(16))) =
{
98 << 5, /* FIX(0.382683433) */
@@ -90,7 +88,7 @@ static const __vector short constants __attribute__((aligned(16))) =
out4 = vec_sub(tmp10, tmp11); \
\
z1 = vec_add(tmp12, tmp13); \
z1 = z1 << PRE_MULTIPLY_SCALE_BITS; \
z1 = vec_sl(z1, PRE_MULTIPLY_SCALE_BITS); \
z1 = vec_madds(z1, PW_0707, zero); \
\
out2 = vec_add(tmp13, z1); \
@@ -102,8 +100,8 @@ static const __vector short constants __attribute__((aligned(16))) =
tmp11 = vec_add(tmp5, tmp6); \
tmp12 = vec_add(tmp6, tmp7); \
\
tmp10 = tmp10 << PRE_MULTIPLY_SCALE_BITS; \
tmp12 = tmp12 << PRE_MULTIPLY_SCALE_BITS; \
tmp10 = vec_sl(tmp10, PRE_MULTIPLY_SCALE_BITS); \
tmp12 = vec_sl(tmp12, PRE_MULTIPLY_SCALE_BITS); \
z5 = vec_sub(tmp10, tmp12); \
z5 = vec_madds(z5, PW_0382, zero); \
\
@@ -113,7 +111,7 @@ static const __vector short constants __attribute__((aligned(16))) =
z4 = vec_madds(tmp12, PW_1306, zero); \
z4 = vec_add(z4, z5); \
\
tmp11 = tmp11 << PRE_MULTIPLY_SCALE_BITS; \
tmp11 = vec_sl(tmp11, PRE_MULTIPLY_SCALE_BITS); \
z3 = vec_madds(tmp11, PW_0707, zero); \
\
z11 = vec_add(tmp7, z3); \
@@ -140,6 +138,7 @@ jsimd_fdct_ifast_altivec (DCTELEM *data)
PW_0541 = vec_splat(constants, 1),
PW_0707 = vec_splat(constants, 2),
PW_1306 = vec_splat(constants, 3);
__vector unsigned short PRE_MULTIPLY_SCALE_BITS = vec_splat_u16(2);
/* Pass 1: process rows. */