The AltiVec code actually works on 32-bit PowerPC platforms as well, so change the "powerpc64" token to "powerpc". Also clean up the shift code, which wasn't building properly on OS X.

git-svn-id: svn+ssh://svn.code.sf.net/p/libjpeg-turbo/code/trunk@1406 632fc199-4ca6-4c93-a231-07263d6284db
2014-09-05 07:23:12 +00:00
parent cd2d8e1cc7
commit 7affbfc241
4 changed files with 11 additions and 12 deletions
--- a/configure.ac
+++ b/configure.ac
@@ -481,9 +481,9 @@ if test "x${with_simd}" != "xno"; then
        fi
      fi
      ;;
-    powerpc64)
-      AC_MSG_RESULT([yes (powerpc64)])
-      simd_arch=powerpc64
+    powerpc*)
+      AC_MSG_RESULT([yes (powerpc)])
+      simd_arch=powerpc
      ;;
    *)
      AC_MSG_RESULT([no ("$host_cpu")])
@@ -510,7 +510,7 @@ AM_CONDITIONAL([SIMD_X86_64], [test "x$simd_arch" = "xx86_64"])
 AM_CONDITIONAL([SIMD_ARM], [test "x$simd_arch" = "xarm"])
 AM_CONDITIONAL([SIMD_ARM_64], [test "x$simd_arch" = "xaarch64"])
 AM_CONDITIONAL([SIMD_MIPS], [test "x$simd_arch" = "xmips"])
-AM_CONDITIONAL([SIMD_POWERPC64], [test "x$simd_arch" = "xpowerpc64"])
+AM_CONDITIONAL([SIMD_POWERPC], [test "x$simd_arch" = "xpowerpc"])
 AM_CONDITIONAL([X86_64], [test "x$host_cpu" = "xx86_64" -o "x$host_cpu" = "xamd64"])
 AM_CONDITIONAL([WITH_TURBOJPEG], [test "x$with_turbojpeg" != "xno"])

--- a/simd/Makefile.am
+++ b/simd/Makefile.am
@@ -70,9 +70,9 @@ libsimd_la_SOURCES = jsimd_mips.c jsimd_mips_dspr2_asm.h jsimd_mips_dspr2.S

 endif

-if SIMD_POWERPC64
+if SIMD_POWERPC

-libsimd_la_SOURCES = jsimd_powerpc64.c jsimd_powerpc64_altivec.c
+libsimd_la_SOURCES = jsimd_powerpc.c jsimd_powerpc_altivec.c
 libsimd_la_CFLAGS = -maltivec 

 endif
--- a/simd/jsimd_powerpc64.c
+++ b/simd/jsimd_powerpc64.c
--- a/simd/jsimd_powerpc64_altivec.c
+++ b/simd/jsimd_powerpc64_altivec.c
@@ -67,8 +67,6 @@
  col##7 = vec_mergel(col67e, col67o); /* col7=(07 17 27 37 47 57 67 77) */   \
 }

-#define PRE_MULTIPLY_SCALE_BITS 2
-
 static const __vector short constants __attribute__((aligned(16))) =
 {
  98 << 5,   /* FIX(0.382683433) */
@@ -90,7 +88,7 @@ static const __vector short constants __attribute__((aligned(16))) =
  out4  = vec_sub(tmp10, tmp11);  \
  \
  z1 = vec_add(tmp12, tmp13);  \
-  z1 = z1 << PRE_MULTIPLY_SCALE_BITS;  \
+  z1 = vec_sl(z1, PRE_MULTIPLY_SCALE_BITS);  \
  z1 = vec_madds(z1, PW_0707, zero);  \
  \
  out2 = vec_add(tmp13, z1);  \
@@ -102,8 +100,8 @@ static const __vector short constants __attribute__((aligned(16))) =
  tmp11 = vec_add(tmp5, tmp6);  \
  tmp12 = vec_add(tmp6, tmp7);  \
  \
-  tmp10 = tmp10 << PRE_MULTIPLY_SCALE_BITS;  \
-  tmp12 = tmp12 << PRE_MULTIPLY_SCALE_BITS;  \
+  tmp10 = vec_sl(tmp10, PRE_MULTIPLY_SCALE_BITS);  \
+  tmp12 = vec_sl(tmp12, PRE_MULTIPLY_SCALE_BITS);  \
  z5 = vec_sub(tmp10, tmp12);  \
  z5 = vec_madds(z5, PW_0382, zero);  \
  \
@@ -113,7 +111,7 @@ static const __vector short constants __attribute__((aligned(16))) =
  z4 = vec_madds(tmp12, PW_1306, zero);  \
  z4 = vec_add(z4, z5);  \
  \
-  tmp11 = tmp11 << PRE_MULTIPLY_SCALE_BITS;  \
+  tmp11 = vec_sl(tmp11, PRE_MULTIPLY_SCALE_BITS);  \
  z3 = vec_madds(tmp11, PW_0707, zero);  \
  \
  z11 = vec_add(tmp7, z3);  \
@@ -140,6 +138,7 @@ jsimd_fdct_ifast_altivec (DCTELEM *data)
    PW_0541 = vec_splat(constants, 1),
    PW_0707 = vec_splat(constants, 2),
    PW_1306 = vec_splat(constants, 3);
+  __vector unsigned short PRE_MULTIPLY_SCALE_BITS = vec_splat_u16(2);

  /* Pass 1: process rows. */