Document the fact that the AltiVec implementation uses the same modified algorithms as the SSE2 implementation

git-svn-id: svn+ssh://svn.code.sf.net/p/libjpeg-turbo/code/trunk@1473 632fc199-4ca6-4c93-a231-07263d6284db
This commit is contained in:
DRC
2014-12-23 02:42:59 +00:00
parent 45453085e7
commit ff30c63934
3 changed files with 85 additions and 1 deletions

View File

@@ -46,6 +46,16 @@
#define DO_FDCT_COMMON(PASS) \
{ \
/* (Original) \
* z1 = (tmp12 + tmp13) * 0.541196100; \
* data2 = z1 + tmp13 * 0.765366865; \
* data6 = z1 + tmp12 * -1.847759065; \
* \
* (This implementation) \
* data2 = tmp13 * (0.541196100 + 0.765366865) + tmp12 * 0.541196100; \
* data6 = tmp13 * 0.541196100 + tmp12 * (0.541196100 - 1.847759065); \
*/ \
\
tmp1312l = vec_mergeh(tmp13, tmp12); \
tmp1312h = vec_mergel(tmp13, tmp12); \
\
@@ -67,6 +77,16 @@
z3 = vec_add(tmp4, tmp6); \
z4 = vec_add(tmp5, tmp7); \
\
/* (Original) \
* z5 = (z3 + z4) * 1.175875602; \
* z3 = z3 * -1.961570560; z4 = z4 * -0.390180644; \
* z3 += z5; z4 += z5; \
* \
* (This implementation) \
* z3 = z3 * (1.175875602 - 1.961570560) + z4 * 1.175875602; \
* z4 = z3 * 1.175875602 + z4 * (1.175875602 - 0.390180644); \
*/ \
\
z34l = vec_mergeh(z3, z4); \
z34h = vec_mergel(z3, z4); \
\
@@ -75,6 +95,23 @@
z4l = vec_msums(z34l, pw_f117_f078, pd_descale_p##PASS); \
z4h = vec_msums(z34h, pw_f117_f078, pd_descale_p##PASS); \
\
/* (Original) \
* z1 = tmp4 + tmp7; z2 = tmp5 + tmp6; \
* tmp4 = tmp4 * 0.298631336; tmp5 = tmp5 * 2.053119869; \
* tmp6 = tmp6 * 3.072711026; tmp7 = tmp7 * 1.501321110; \
* z1 = z1 * -0.899976223; z2 = z2 * -2.562915447; \
* data7 = tmp4 + z1 + z3; data5 = tmp5 + z2 + z4; \
* data3 = tmp6 + z2 + z3; data1 = tmp7 + z1 + z4; \
* \
* (This implementation) \
* tmp4 = tmp4 * (0.298631336 - 0.899976223) + tmp7 * -0.899976223; \
* tmp5 = tmp5 * (2.053119869 - 2.562915447) + tmp6 * -2.562915447; \
* tmp6 = tmp5 * -2.562915447 + tmp6 * (3.072711026 - 2.562915447); \
* tmp7 = tmp4 * -0.899976223 + tmp7 * (1.501321110 - 0.899976223); \
* data7 = tmp4 + z3; data5 = tmp5 + z4; \
* data3 = tmp6 + z3; data1 = tmp7 + z4; \
*/ \
\
tmp47l = vec_mergeh(tmp4, tmp7); \
tmp47h = vec_mergel(tmp4, tmp7); \
\

View File

@@ -77,6 +77,16 @@
\
tmp7 = vec_add(z11, z13); \
\
/* To avoid overflow... \
* \
* (Original) \
* tmp12 = -2.613125930 * z10 + z5; \
* \
* (This implementation) \
* tmp12 = (-1.613125930 - 1) * z10 + z5; \
* = -1.613125930 * z10 - z10 + z5; \
*/ \
\
z5 = vec_add(z10s, z12s); \
z5 = vec_madds(z5, pw_F1847, zero); \
\

View File

@@ -46,7 +46,17 @@
#define DO_IDCT(in, PASS) \
{ \
/* Even part */ \
/* Even part \
* \
* (Original) \
* z1 = (z2 + z3) * 0.541196100; \
* tmp2 = z1 + z3 * -1.847759065; \
* tmp3 = z1 + z2 * 0.765366865; \
* \
* (This implementation) \
* tmp2 = z2 * 0.541196100 + z3 * (0.541196100 - 1.847759065); \
* tmp3 = z2 * (0.541196100 + 0.765366865) + z3 * 0.541196100; \
*/ \
\
in##26l = vec_mergeh(in##2, in##6); \
in##26h = vec_mergel(in##2, in##6); \
@@ -88,6 +98,16 @@
z3 = vec_add(in##3, in##7); \
z4 = vec_add(in##1, in##5); \
\
/* (Original) \
* z5 = (z3 + z4) * 1.175875602; \
* z3 = z3 * -1.961570560; z4 = z4 * -0.390180644; \
* z3 += z5; z4 += z5; \
* \
* (This implementation) \
* z3 = z3 * (1.175875602 - 1.961570560) + z4 * 1.175875602; \
* z4 = z3 * 1.175875602 + z4 * (1.175875602 - 0.390180644); \
*/ \
\
z34l = vec_mergeh(z3, z4); \
z34h = vec_mergel(z3, z4); \
\
@@ -96,6 +116,23 @@
z4l = vec_msums(z34l, pw_f117_f078, zero32); \
z4h = vec_msums(z34h, pw_f117_f078, zero32); \
\
/* (Original) \
* z1 = tmp0 + tmp3; z2 = tmp1 + tmp2; \
* tmp0 = tmp0 * 0.298631336; tmp1 = tmp1 * 2.053119869; \
* tmp2 = tmp2 * 3.072711026; tmp3 = tmp3 * 1.501321110; \
* z1 = z1 * -0.899976223; z2 = z2 * -2.562915447; \
* tmp0 += z1 + z3; tmp1 += z2 + z4; \
* tmp2 += z2 + z3; tmp3 += z1 + z4; \
* \
* (This implementation) \
* tmp0 = tmp0 * (0.298631336 - 0.899976223) + tmp3 * -0.899976223; \
* tmp1 = tmp1 * (2.053119869 - 2.562915447) + tmp2 * -2.562915447; \
* tmp2 = tmp1 * -2.562915447 + tmp2 * (3.072711026 - 2.562915447); \
* tmp3 = tmp0 * -0.899976223 + tmp3 * (1.501321110 - 0.899976223); \
* tmp0 += z3; tmp1 += z4; \
* tmp2 += z3; tmp3 += z4; \
*/ \
\
in##71l = vec_mergeh(in##7, in##1); \
in##71h = vec_mergel(in##7, in##1); \
\