Make the formatting and naming of variables and constants more consistent
git-svn-id: svn+ssh://svn.code.sf.net/p/libjpeg-turbo/code/trunk@1496 632fc199-4ca6-4c93-a231-07263d6284db
This commit is contained in:
@@ -38,7 +38,7 @@ void jsimd_rgb_ycc_convert_altivec (JDIMENSION img_width, JSAMPARRAY input_buf,
|
||||
__vector unsigned char rgb4 = {0};
|
||||
#endif
|
||||
__vector short rg0, rg1, rg2, rg3, bg0, bg1, bg2, bg3;
|
||||
__vector unsigned short y01, y23, cr01, cr23, cb01, cb23;
|
||||
__vector unsigned short yl, yh, crl, crh, cbl, cbh;
|
||||
__vector int y0, y1, y2, y3, cr0, cr1, cr2, cr3, cb0, cb1, cb2, cb3;
|
||||
|
||||
/* Constants */
|
||||
@@ -49,7 +49,7 @@ void jsimd_rgb_ycc_convert_altivec (JDIMENSION img_width, JSAMPARRAY input_buf,
|
||||
__vector unsigned short pw_f050_f000 = { __4X2(F_0_500, 0) };
|
||||
__vector int pd_onehalf = { __4X(ONE_HALF) },
|
||||
pd_onehalfm1_cj = { __4X(ONE_HALF - 1 + (CENTERJSAMPLE << SCALEBITS)) };
|
||||
__vector unsigned char zero = { __16X(0) },
|
||||
__vector unsigned char pb_zero = { __16X(0) },
|
||||
shift_pack_index =
|
||||
{ 0, 1, 4, 5, 8, 9, 12, 13, 16, 17, 20, 21, 24, 25, 28, 29};
|
||||
|
||||
@@ -168,14 +168,14 @@ void jsimd_rgb_ycc_convert_altivec (JDIMENSION img_width, JSAMPARRAY input_buf,
|
||||
* NOTE: We have to use vec_merge*() here because vec_unpack*() doesn't
|
||||
* support unsigned vectors.
|
||||
*/
|
||||
rg0 = (__vector signed short)vec_mergeh(zero, rgbg0);
|
||||
bg0 = (__vector signed short)vec_mergel(zero, rgbg0);
|
||||
rg1 = (__vector signed short)vec_mergeh(zero, rgbg1);
|
||||
bg1 = (__vector signed short)vec_mergel(zero, rgbg1);
|
||||
rg2 = (__vector signed short)vec_mergeh(zero, rgbg2);
|
||||
bg2 = (__vector signed short)vec_mergel(zero, rgbg2);
|
||||
rg3 = (__vector signed short)vec_mergeh(zero, rgbg3);
|
||||
bg3 = (__vector signed short)vec_mergel(zero, rgbg3);
|
||||
rg0 = (__vector signed short)vec_mergeh(pb_zero, rgbg0);
|
||||
bg0 = (__vector signed short)vec_mergel(pb_zero, rgbg0);
|
||||
rg1 = (__vector signed short)vec_mergeh(pb_zero, rgbg1);
|
||||
bg1 = (__vector signed short)vec_mergel(pb_zero, rgbg1);
|
||||
rg2 = (__vector signed short)vec_mergeh(pb_zero, rgbg2);
|
||||
bg2 = (__vector signed short)vec_mergel(pb_zero, rgbg2);
|
||||
rg3 = (__vector signed short)vec_mergeh(pb_zero, rgbg3);
|
||||
bg3 = (__vector signed short)vec_mergel(pb_zero, rgbg3);
|
||||
|
||||
/* (Original)
|
||||
* Y = 0.29900 * R + 0.58700 * G + 0.11400 * B
|
||||
@@ -203,11 +203,11 @@ void jsimd_rgb_ycc_convert_altivec (JDIMENSION img_width, JSAMPARRAY input_buf,
|
||||
* descaling the 32-bit results (right-shifting by 16 bits) and then
|
||||
* packing them.
|
||||
*/
|
||||
y01 = vec_perm((__vector unsigned short)y0, (__vector unsigned short)y1,
|
||||
shift_pack_index);
|
||||
y23 = vec_perm((__vector unsigned short)y2, (__vector unsigned short)y3,
|
||||
shift_pack_index);
|
||||
y = vec_pack(y01, y23);
|
||||
yl = vec_perm((__vector unsigned short)y0, (__vector unsigned short)y1,
|
||||
shift_pack_index);
|
||||
yh = vec_perm((__vector unsigned short)y2, (__vector unsigned short)y3,
|
||||
shift_pack_index);
|
||||
y = vec_pack(yl, yh);
|
||||
vec_st(y, 0, outptr0);
|
||||
|
||||
/* Calculate Cb values */
|
||||
@@ -223,11 +223,11 @@ void jsimd_rgb_ycc_convert_altivec (JDIMENSION img_width, JSAMPARRAY input_buf,
|
||||
(__vector unsigned int)cb2);
|
||||
cb3 = (__vector int)vec_msum((__vector unsigned short)bg3, pw_f050_f000,
|
||||
(__vector unsigned int)cb3);
|
||||
cb01 = vec_perm((__vector unsigned short)cb0,
|
||||
(__vector unsigned short)cb1, shift_pack_index);
|
||||
cb23 = vec_perm((__vector unsigned short)cb2,
|
||||
(__vector unsigned short)cb3, shift_pack_index);
|
||||
cb = vec_pack(cb01, cb23);
|
||||
cbl = vec_perm((__vector unsigned short)cb0,
|
||||
(__vector unsigned short)cb1, shift_pack_index);
|
||||
cbh = vec_perm((__vector unsigned short)cb2,
|
||||
(__vector unsigned short)cb3, shift_pack_index);
|
||||
cb = vec_pack(cbl, cbh);
|
||||
vec_st(cb, 0, outptr1);
|
||||
|
||||
/* Calculate Cr values */
|
||||
@@ -243,11 +243,11 @@ void jsimd_rgb_ycc_convert_altivec (JDIMENSION img_width, JSAMPARRAY input_buf,
|
||||
(__vector unsigned int)cr2);
|
||||
cr3 = (__vector int)vec_msum((__vector unsigned short)rg3, pw_f050_f000,
|
||||
(__vector unsigned int)cr3);
|
||||
cr01 = vec_perm((__vector unsigned short)cr0,
|
||||
(__vector unsigned short)cr1, shift_pack_index);
|
||||
cr23 = vec_perm((__vector unsigned short)cr2,
|
||||
(__vector unsigned short)cr3, shift_pack_index);
|
||||
cr = vec_pack(cr01, cr23);
|
||||
crl = vec_perm((__vector unsigned short)cr0,
|
||||
(__vector unsigned short)cr1, shift_pack_index);
|
||||
crh = vec_perm((__vector unsigned short)cr2,
|
||||
(__vector unsigned short)cr3, shift_pack_index);
|
||||
cr = vec_pack(crl, crh);
|
||||
vec_st(cr, 0, outptr2);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -39,14 +39,14 @@ void jsimd_rgb_gray_convert_altivec (JDIMENSION img_width,
|
||||
__vector unsigned char rgb4 = {0};
|
||||
#endif
|
||||
__vector short rg0, rg1, rg2, rg3, bg0, bg1, bg2, bg3;
|
||||
__vector unsigned short y01, y23;
|
||||
__vector unsigned short yl, yh;
|
||||
__vector int y0, y1, y2, y3;
|
||||
|
||||
/* Constants */
|
||||
__vector short pw_f0299_f0337 = { __4X2(F_0_299, F_0_337) },
|
||||
pw_f0114_f0250 = { __4X2(F_0_114, F_0_250) };
|
||||
__vector int pd_onehalf = { __4X(ONE_HALF) };
|
||||
__vector unsigned char zero = { __16X(0) },
|
||||
__vector unsigned char pb_zero = { __16X(0) },
|
||||
shift_pack_index =
|
||||
{ 0, 1, 4, 5, 8, 9, 12, 13, 16, 17, 20, 21, 24, 25, 28, 29};
|
||||
|
||||
@@ -163,14 +163,14 @@ void jsimd_rgb_gray_convert_altivec (JDIMENSION img_width,
|
||||
* NOTE: We have to use vec_merge*() here because vec_unpack*() doesn't
|
||||
* support unsigned vectors.
|
||||
*/
|
||||
rg0 = (__vector signed short)vec_mergeh(zero, rgbg0);
|
||||
bg0 = (__vector signed short)vec_mergel(zero, rgbg0);
|
||||
rg1 = (__vector signed short)vec_mergeh(zero, rgbg1);
|
||||
bg1 = (__vector signed short)vec_mergel(zero, rgbg1);
|
||||
rg2 = (__vector signed short)vec_mergeh(zero, rgbg2);
|
||||
bg2 = (__vector signed short)vec_mergel(zero, rgbg2);
|
||||
rg3 = (__vector signed short)vec_mergeh(zero, rgbg3);
|
||||
bg3 = (__vector signed short)vec_mergel(zero, rgbg3);
|
||||
rg0 = (__vector signed short)vec_mergeh(pb_zero, rgbg0);
|
||||
bg0 = (__vector signed short)vec_mergel(pb_zero, rgbg0);
|
||||
rg1 = (__vector signed short)vec_mergeh(pb_zero, rgbg1);
|
||||
bg1 = (__vector signed short)vec_mergel(pb_zero, rgbg1);
|
||||
rg2 = (__vector signed short)vec_mergeh(pb_zero, rgbg2);
|
||||
bg2 = (__vector signed short)vec_mergel(pb_zero, rgbg2);
|
||||
rg3 = (__vector signed short)vec_mergeh(pb_zero, rgbg3);
|
||||
bg3 = (__vector signed short)vec_mergel(pb_zero, rgbg3);
|
||||
|
||||
/* (Original)
|
||||
* Y = 0.29900 * R + 0.58700 * G + 0.11400 * B
|
||||
@@ -194,11 +194,11 @@ void jsimd_rgb_gray_convert_altivec (JDIMENSION img_width,
|
||||
* descaling the 32-bit results (right-shifting by 16 bits) and then
|
||||
* packing them.
|
||||
*/
|
||||
y01 = vec_perm((__vector unsigned short)y0, (__vector unsigned short)y1,
|
||||
shift_pack_index);
|
||||
y23 = vec_perm((__vector unsigned short)y2, (__vector unsigned short)y3,
|
||||
shift_pack_index);
|
||||
y = vec_pack(y01, y23);
|
||||
yl = vec_perm((__vector unsigned short)y0, (__vector unsigned short)y1,
|
||||
shift_pack_index);
|
||||
yh = vec_perm((__vector unsigned short)y2, (__vector unsigned short)y3,
|
||||
shift_pack_index);
|
||||
y = vec_pack(yl, yh);
|
||||
vec_st(y, 0, outptr);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -35,15 +35,16 @@ jsimd_h2v1_downsample_altivec (JDIMENSION image_width, int max_v_samp_factor,
|
||||
int outrow, outcol;
|
||||
JDIMENSION output_cols = width_blocks * DCTSIZE;
|
||||
JSAMPROW inptr, outptr;
|
||||
__vector unsigned char tmpa, tmpb, out;
|
||||
__vector unsigned short tmpae, tmpao, tmpbe, tmpbo, outl, outh;
|
||||
|
||||
__vector unsigned char this0, next0, out;
|
||||
__vector unsigned short this0e, this0o, next0e, next0o, outl, outh;
|
||||
|
||||
/* Constants */
|
||||
__vector unsigned short bias = { __4X2(0, 1) },
|
||||
one = { __8X(1) };
|
||||
__vector unsigned short pw_bias = { __4X2(0, 1) },
|
||||
pw_one = { __8X(1) };
|
||||
__vector unsigned char even_odd_index =
|
||||
{ 0, 2, 4, 6, 8, 10, 12, 14, 1, 3, 5, 7, 9, 11, 13, 15 },
|
||||
zero = { __16X(0) };
|
||||
pb_zero = { __16X(0) };
|
||||
|
||||
expand_right_edge(input_data, max_v_samp_factor, image_width,
|
||||
output_cols * 2);
|
||||
@@ -55,22 +56,22 @@ jsimd_h2v1_downsample_altivec (JDIMENSION image_width, int max_v_samp_factor,
|
||||
for (outcol = output_cols; outcol > 0;
|
||||
outcol -= 16, inptr += 32, outptr += 16) {
|
||||
|
||||
tmpa = vec_ld(0, inptr);
|
||||
tmpa = vec_perm(tmpa, tmpa, even_odd_index);
|
||||
tmpae = (__vector unsigned short)vec_mergeh(zero, tmpa);
|
||||
tmpao = (__vector unsigned short)vec_mergel(zero, tmpa);
|
||||
outl = vec_add(tmpae, tmpao);
|
||||
outl = vec_add(outl, bias);
|
||||
outl = vec_sr(outl, one);
|
||||
this0 = vec_ld(0, inptr);
|
||||
this0 = vec_perm(this0, this0, even_odd_index);
|
||||
this0e = (__vector unsigned short)vec_mergeh(pb_zero, this0);
|
||||
this0o = (__vector unsigned short)vec_mergel(pb_zero, this0);
|
||||
outl = vec_add(this0e, this0o);
|
||||
outl = vec_add(outl, pw_bias);
|
||||
outl = vec_sr(outl, pw_one);
|
||||
|
||||
if (outcol > 16) {
|
||||
tmpb = vec_ld(16, inptr);
|
||||
tmpb = vec_perm(tmpb, tmpb, even_odd_index);
|
||||
tmpbe = (__vector unsigned short)vec_mergeh(zero, tmpb);
|
||||
tmpbo = (__vector unsigned short)vec_mergel(zero, tmpb);
|
||||
outh = vec_add(tmpbe, tmpbo);
|
||||
outh = vec_add(outh, bias);
|
||||
outh = vec_sr(outh, one);
|
||||
next0 = vec_ld(16, inptr);
|
||||
next0 = vec_perm(next0, next0, even_odd_index);
|
||||
next0e = (__vector unsigned short)vec_mergeh(pb_zero, next0);
|
||||
next0o = (__vector unsigned short)vec_mergel(pb_zero, next0);
|
||||
outh = vec_add(next0e, next0o);
|
||||
outh = vec_add(outh, pw_bias);
|
||||
outh = vec_sr(outh, pw_one);
|
||||
} else
|
||||
outh = vec_splat_u16(0);
|
||||
|
||||
@@ -90,16 +91,17 @@ jsimd_h2v2_downsample_altivec (JDIMENSION image_width, int max_v_samp_factor,
|
||||
int inrow, outrow, outcol;
|
||||
JDIMENSION output_cols = width_blocks * DCTSIZE;
|
||||
JSAMPROW inptr0, inptr1, outptr;
|
||||
__vector unsigned char tmp0a, tmp0b, tmp1a, tmp1b, out;
|
||||
__vector unsigned short tmp0ae, tmp0ao, tmp0be, tmp0bo, tmp1ae, tmp1ao,
|
||||
tmp1be, tmp1bo, out0l, out0h, out1l, out1h, outl, outh;
|
||||
|
||||
__vector unsigned char this0, next0, this1, next1, out;
|
||||
__vector unsigned short this0e, this0o, next0e, next0o, this1e, this1o,
|
||||
next1e, next1o, out0l, out0h, out1l, out1h, outl, outh;
|
||||
|
||||
/* Constants */
|
||||
__vector unsigned short bias = { __4X2(1, 2) },
|
||||
two = { __8X(2) };
|
||||
__vector unsigned short pw_bias = { __4X2(1, 2) },
|
||||
pw_two = { __8X(2) };
|
||||
__vector unsigned char even_odd_index =
|
||||
{ 0, 2, 4, 6, 8, 10, 12, 14, 1, 3, 5, 7, 9, 11, 13, 15 },
|
||||
zero = { __16X(0) };
|
||||
pb_zero = { __16X(0) };
|
||||
|
||||
expand_right_edge(input_data, max_v_samp_factor, image_width,
|
||||
output_cols * 2);
|
||||
@@ -107,45 +109,45 @@ jsimd_h2v2_downsample_altivec (JDIMENSION image_width, int max_v_samp_factor,
|
||||
for (inrow = 0, outrow = 0; outrow < v_samp_factor;
|
||||
inrow += 2, outrow++) {
|
||||
|
||||
outptr = output_data[outrow];
|
||||
inptr0 = input_data[inrow];
|
||||
inptr1 = input_data[inrow + 1];
|
||||
outptr = output_data[outrow];
|
||||
|
||||
for (outcol = output_cols; outcol > 0;
|
||||
outcol -= 16, inptr0 += 32, inptr1 += 32, outptr += 16) {
|
||||
|
||||
tmp0a = vec_ld(0, inptr0);
|
||||
tmp0a = vec_perm(tmp0a, tmp0a, even_odd_index);
|
||||
tmp0ae = (__vector unsigned short)vec_mergeh(zero, tmp0a);
|
||||
tmp0ao = (__vector unsigned short)vec_mergel(zero, tmp0a);
|
||||
out0l = vec_add(tmp0ae, tmp0ao);
|
||||
this0 = vec_ld(0, inptr0);
|
||||
this0 = vec_perm(this0, this0, even_odd_index);
|
||||
this0e = (__vector unsigned short)vec_mergeh(pb_zero, this0);
|
||||
this0o = (__vector unsigned short)vec_mergel(pb_zero, this0);
|
||||
out0l = vec_add(this0e, this0o);
|
||||
|
||||
tmp1a = vec_ld(0, inptr1);
|
||||
tmp1a = vec_perm(tmp1a, tmp1a, even_odd_index);
|
||||
tmp1ae = (__vector unsigned short)vec_mergeh(zero, tmp1a);
|
||||
tmp1ao = (__vector unsigned short)vec_mergel(zero, tmp1a);
|
||||
out1l = vec_add(tmp1ae, tmp1ao);
|
||||
this1 = vec_ld(0, inptr1);
|
||||
this1 = vec_perm(this1, this1, even_odd_index);
|
||||
this1e = (__vector unsigned short)vec_mergeh(pb_zero, this1);
|
||||
this1o = (__vector unsigned short)vec_mergel(pb_zero, this1);
|
||||
out1l = vec_add(this1e, this1o);
|
||||
|
||||
outl = vec_add(out0l, out1l);
|
||||
outl = vec_add(outl, bias);
|
||||
outl = vec_sr(outl, two);
|
||||
outl = vec_add(outl, pw_bias);
|
||||
outl = vec_sr(outl, pw_two);
|
||||
|
||||
if (outcol > 16) {
|
||||
tmp0b = vec_ld(16, inptr0);
|
||||
tmp0b = vec_perm(tmp0b, tmp0b, even_odd_index);
|
||||
tmp0be = (__vector unsigned short)vec_mergeh(zero, tmp0b);
|
||||
tmp0bo = (__vector unsigned short)vec_mergel(zero, tmp0b);
|
||||
out0h = vec_add(tmp0be, tmp0bo);
|
||||
next0 = vec_ld(16, inptr0);
|
||||
next0 = vec_perm(next0, next0, even_odd_index);
|
||||
next0e = (__vector unsigned short)vec_mergeh(pb_zero, next0);
|
||||
next0o = (__vector unsigned short)vec_mergel(pb_zero, next0);
|
||||
out0h = vec_add(next0e, next0o);
|
||||
|
||||
tmp1b = vec_ld(16, inptr1);
|
||||
tmp1b = vec_perm(tmp1b, tmp1b, even_odd_index);
|
||||
tmp1be = (__vector unsigned short)vec_mergeh(zero, tmp1b);
|
||||
tmp1bo = (__vector unsigned short)vec_mergel(zero, tmp1b);
|
||||
out1h = vec_add(tmp1be, tmp1bo);
|
||||
next1 = vec_ld(16, inptr1);
|
||||
next1 = vec_perm(next1, next1, even_odd_index);
|
||||
next1e = (__vector unsigned short)vec_mergeh(pb_zero, next1);
|
||||
next1o = (__vector unsigned short)vec_mergel(pb_zero, next1);
|
||||
out1h = vec_add(next1e, next1o);
|
||||
|
||||
outh = vec_add(out0h, out1h);
|
||||
outh = vec_add(outh, bias);
|
||||
outh = vec_sr(outh, two);
|
||||
outh = vec_add(outh, pw_bias);
|
||||
outh = vec_sr(outh, pw_two);
|
||||
} else
|
||||
outh = vec_splat_u16(0);
|
||||
|
||||
|
||||
@@ -37,7 +37,7 @@ void jsimd_ycc_rgb_convert_altivec (JDIMENSION out_width, JSAMPIMAGE input_buf,
|
||||
__vector unsigned char rgb3, out4;
|
||||
#endif
|
||||
__vector short rg0, rg1, rg2, rg3, bx0, bx1, bx2, bx3, yl, yh, cbl, cbh,
|
||||
crl, crh, rl, rh, gl, gh, bl, bh, g0s, g1s, g2s, g3s;
|
||||
crl, crh, rl, rh, gl, gh, bl, bh, g0w, g1w, g2w, g3w;
|
||||
__vector int g0, g1, g2, g3;
|
||||
|
||||
/* Constants
|
||||
@@ -47,11 +47,10 @@ void jsimd_ycc_rgb_convert_altivec (JDIMENSION out_width, JSAMPIMAGE input_buf,
|
||||
__vector short pw_f0402 = { __8X(F_0_402 >> 1) },
|
||||
pw_mf0228 = { __8X(-F_0_228 >> 1) },
|
||||
pw_mf0344_f0285 = { __4X2(-F_0_344, F_0_285) },
|
||||
pw_one = { __8X(1) },
|
||||
pw_255 = { __8X(255) },
|
||||
pw_one = { __8X(1) }, pw_255 = { __8X(255) },
|
||||
pw_cj = { __8X(CENTERJSAMPLE) };
|
||||
__vector int pd_onehalf = { __4X(ONE_HALF) };
|
||||
__vector unsigned char zero = { __16X(0) },
|
||||
__vector unsigned char pb_zero = { __16X(0) },
|
||||
shift_pack_index =
|
||||
{ 0, 1, 4, 5, 8, 9, 12, 13, 16, 17, 20, 21, 24, 25, 28, 29};
|
||||
|
||||
@@ -70,18 +69,18 @@ void jsimd_ycc_rgb_convert_altivec (JDIMENSION out_width, JSAMPIMAGE input_buf,
|
||||
/* NOTE: We have to use vec_merge*() here because vec_unpack*() doesn't
|
||||
* support unsigned vectors.
|
||||
*/
|
||||
yl = (__vector signed short)vec_mergeh(zero, y);
|
||||
yh = (__vector signed short)vec_mergel(zero, y);
|
||||
yl = (__vector signed short)vec_mergeh(pb_zero, y);
|
||||
yh = (__vector signed short)vec_mergel(pb_zero, y);
|
||||
|
||||
cb = vec_ld(0, inptr1);
|
||||
cbl = (__vector signed short)vec_mergeh(zero, cb);
|
||||
cbh = (__vector signed short)vec_mergel(zero, cb);
|
||||
cbl = (__vector signed short)vec_mergeh(pb_zero, cb);
|
||||
cbh = (__vector signed short)vec_mergel(pb_zero, cb);
|
||||
cbl = vec_sub(cbl, pw_cj);
|
||||
cbh = vec_sub(cbh, pw_cj);
|
||||
|
||||
cr = vec_ld(0, inptr2);
|
||||
crl = (__vector signed short)vec_mergeh(zero, cr);
|
||||
crh = (__vector signed short)vec_mergel(zero, cr);
|
||||
crl = (__vector signed short)vec_mergeh(pb_zero, cr);
|
||||
crh = (__vector signed short)vec_mergel(pb_zero, cr);
|
||||
crl = vec_sub(crl, pw_cj);
|
||||
crh = vec_sub(crh, pw_cj);
|
||||
|
||||
@@ -119,14 +118,14 @@ void jsimd_ycc_rgb_convert_altivec (JDIMENSION out_width, JSAMPIMAGE input_buf,
|
||||
rl = vec_add(rl, yl);
|
||||
rh = vec_add(rh, yh);
|
||||
|
||||
g0s = vec_mergeh(cbl, crl);
|
||||
g1s = vec_mergel(cbl, crl);
|
||||
g0 = vec_msums(g0s, pw_mf0344_f0285, pd_onehalf);
|
||||
g1 = vec_msums(g1s, pw_mf0344_f0285, pd_onehalf);
|
||||
g2s = vec_mergeh(cbh, crh);
|
||||
g3s = vec_mergel(cbh, crh);
|
||||
g2 = vec_msums(g2s, pw_mf0344_f0285, pd_onehalf);
|
||||
g3 = vec_msums(g3s, pw_mf0344_f0285, pd_onehalf);
|
||||
g0w = vec_mergeh(cbl, crl);
|
||||
g1w = vec_mergel(cbl, crl);
|
||||
g0 = vec_msums(g0w, pw_mf0344_f0285, pd_onehalf);
|
||||
g1 = vec_msums(g1w, pw_mf0344_f0285, pd_onehalf);
|
||||
g2w = vec_mergeh(cbh, crh);
|
||||
g3w = vec_mergel(cbh, crh);
|
||||
g2 = vec_msums(g2w, pw_mf0344_f0285, pd_onehalf);
|
||||
g3 = vec_msums(g3w, pw_mf0344_f0285, pd_onehalf);
|
||||
/* Clever way to avoid 4 shifts + 2 packs. This packs the high word from
|
||||
* each dword into a new 16-bit vector, which is the equivalent of
|
||||
* descaling the 32-bit results (right-shifting by 16 bits) and then
|
||||
|
||||
@@ -35,12 +35,13 @@ jsimd_h2v1_fancy_upsample_altivec (int max_v_samp_factor,
|
||||
JSAMPROW inptr, outptr;
|
||||
int inrow, col;
|
||||
|
||||
__vector unsigned char block, last, next, lastblock, nextblock = {0}, out;
|
||||
__vector short blocke, blocko, blockl, blockh, lastl, lasth, nextl, nexth,
|
||||
outle, outhe, outlo, outho;
|
||||
__vector unsigned char this0, last0, p_last0, next0 = {0}, p_next0,
|
||||
out;
|
||||
__vector short this0e, this0o, this0l, this0h, last0l, last0h,
|
||||
next0l, next0h, outle, outhe, outlo, outho;
|
||||
|
||||
/* Constants */
|
||||
__vector unsigned char pb_three = { __16X(3) }, pb_zero = { __16X(0) },
|
||||
__vector unsigned char pb_zero = { __16X(0) }, pb_three = { __16X(3) },
|
||||
last_index_col0 = {0,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14},
|
||||
last_index = {15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30},
|
||||
next_index = {1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16},
|
||||
@@ -52,44 +53,44 @@ jsimd_h2v1_fancy_upsample_altivec (int max_v_samp_factor,
|
||||
inptr = input_data[inrow];
|
||||
outptr = output_data[inrow];
|
||||
|
||||
block = vec_ld(0, inptr);
|
||||
last = vec_perm(block, block, last_index_col0);
|
||||
lastblock = block;
|
||||
this0 = vec_ld(0, inptr);
|
||||
p_last0 = vec_perm(this0, this0, last_index_col0);
|
||||
last0 = this0;
|
||||
|
||||
for (col = 0; col < downsampled_width;
|
||||
col += 16, inptr += 16, outptr += 32) {
|
||||
|
||||
if (col > 0) {
|
||||
last = vec_perm(lastblock, block, last_index);
|
||||
lastblock = block;
|
||||
p_last0 = vec_perm(last0, this0, last_index);
|
||||
last0 = this0;
|
||||
}
|
||||
|
||||
if (downsampled_width - col <= 16)
|
||||
next = vec_perm(block, block, next_index_lastcol);
|
||||
p_next0 = vec_perm(this0, this0, next_index_lastcol);
|
||||
else {
|
||||
nextblock = vec_ld(16, inptr);
|
||||
next = vec_perm(block, nextblock, next_index);
|
||||
next0 = vec_ld(16, inptr);
|
||||
p_next0 = vec_perm(this0, next0, next_index);
|
||||
}
|
||||
|
||||
blocke = (__vector short)vec_mule(block, pb_three);
|
||||
blocko = (__vector short)vec_mulo(block, pb_three);
|
||||
blockl = vec_mergeh(blocke, blocko);
|
||||
blockh = vec_mergel(blocke, blocko);
|
||||
this0e = (__vector short)vec_mule(this0, pb_three);
|
||||
this0o = (__vector short)vec_mulo(this0, pb_three);
|
||||
this0l = vec_mergeh(this0e, this0o);
|
||||
this0h = vec_mergel(this0e, this0o);
|
||||
|
||||
lastl = (__vector short)vec_mergeh(pb_zero, last);
|
||||
lasth = (__vector short)vec_mergel(pb_zero, last);
|
||||
lastl = vec_add(lastl, pw_one);
|
||||
lasth = vec_add(lasth, pw_one);
|
||||
last0l = (__vector short)vec_mergeh(pb_zero, p_last0);
|
||||
last0h = (__vector short)vec_mergel(pb_zero, p_last0);
|
||||
last0l = vec_add(last0l, pw_one);
|
||||
last0h = vec_add(last0h, pw_one);
|
||||
|
||||
nextl = (__vector short)vec_mergeh(pb_zero, next);
|
||||
nexth = (__vector short)vec_mergel(pb_zero, next);
|
||||
nextl = vec_add(nextl, pw_two);
|
||||
nexth = vec_add(nexth, pw_two);
|
||||
next0l = (__vector short)vec_mergeh(pb_zero, p_next0);
|
||||
next0h = (__vector short)vec_mergel(pb_zero, p_next0);
|
||||
next0l = vec_add(next0l, pw_two);
|
||||
next0h = vec_add(next0h, pw_two);
|
||||
|
||||
outle = vec_add(blockl, lastl);
|
||||
outhe = vec_add(blockh, lasth);
|
||||
outlo = vec_add(blockl, nextl);
|
||||
outho = vec_add(blockh, nexth);
|
||||
outle = vec_add(this0l, last0l);
|
||||
outhe = vec_add(this0h, last0h);
|
||||
outlo = vec_add(this0l, next0l);
|
||||
outho = vec_add(this0h, next0h);
|
||||
outle = vec_sr(outle, (__vector unsigned short)pw_two);
|
||||
outhe = vec_sr(outhe, (__vector unsigned short)pw_two);
|
||||
outlo = vec_sr(outlo, (__vector unsigned short)pw_two);
|
||||
@@ -102,7 +103,7 @@ jsimd_h2v1_fancy_upsample_altivec (int max_v_samp_factor,
|
||||
(__vector unsigned char)outho, merge_pack_index);
|
||||
vec_st(out, 16, outptr);
|
||||
|
||||
block = nextblock;
|
||||
this0 = next0;
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -118,8 +119,8 @@ jsimd_h2v2_fancy_upsample_altivec (int max_v_samp_factor,
|
||||
JSAMPROW inptr_1, inptr0, inptr1, outptr0, outptr1;
|
||||
int inrow, outrow, col;
|
||||
|
||||
__vector unsigned char block_1, block0, block1, out;
|
||||
__vector short block_1l, block_1h, block0l, block0h, block1l, block1h,
|
||||
__vector unsigned char this_1, this0, this1, out;
|
||||
__vector short this_1l, this_1h, this0l, this0h, this1l, this1h,
|
||||
lastcolsum_1h, lastcolsum1h,
|
||||
p_lastcolsum_1l, p_lastcolsum_1h, p_lastcolsum1l, p_lastcolsum1h,
|
||||
thiscolsum_1l, thiscolsum_1h, thiscolsum1l, thiscolsum1h,
|
||||
@@ -147,26 +148,26 @@ jsimd_h2v2_fancy_upsample_altivec (int max_v_samp_factor,
|
||||
outptr0 = output_data[outrow++];
|
||||
outptr1 = output_data[outrow++];
|
||||
|
||||
block0 = vec_ld(0, inptr0);
|
||||
block0l = (__vector short)vec_mergeh(pb_zero, block0);
|
||||
block0h = (__vector short)vec_mergel(pb_zero, block0);
|
||||
block0l = vec_mladd(block0l, pw_three, pw_zero);
|
||||
block0h = vec_mladd(block0h, pw_three, pw_zero);
|
||||
this0 = vec_ld(0, inptr0);
|
||||
this0l = (__vector short)vec_mergeh(pb_zero, this0);
|
||||
this0h = (__vector short)vec_mergel(pb_zero, this0);
|
||||
this0l = vec_mladd(this0l, pw_three, pw_zero);
|
||||
this0h = vec_mladd(this0h, pw_three, pw_zero);
|
||||
|
||||
block_1 = vec_ld(0, inptr_1);
|
||||
block_1l = (__vector short)vec_mergeh(pb_zero, block_1);
|
||||
block_1h = (__vector short)vec_mergel(pb_zero, block_1);
|
||||
thiscolsum_1l = vec_add(block0l, block_1l);
|
||||
thiscolsum_1h = vec_add(block0h, block_1h);
|
||||
this_1 = vec_ld(0, inptr_1);
|
||||
this_1l = (__vector short)vec_mergeh(pb_zero, this_1);
|
||||
this_1h = (__vector short)vec_mergel(pb_zero, this_1);
|
||||
thiscolsum_1l = vec_add(this0l, this_1l);
|
||||
thiscolsum_1h = vec_add(this0h, this_1h);
|
||||
lastcolsum_1h = thiscolsum_1h;
|
||||
p_lastcolsum_1l = vec_perm(thiscolsum_1l, thiscolsum_1l, last_index_col0);
|
||||
p_lastcolsum_1h = vec_perm(thiscolsum_1l, thiscolsum_1h, last_index);
|
||||
|
||||
block1 = vec_ld(0, inptr1);
|
||||
block1l = (__vector short)vec_mergeh(pb_zero, block1);
|
||||
block1h = (__vector short)vec_mergel(pb_zero, block1);
|
||||
thiscolsum1l = vec_add(block0l, block1l);
|
||||
thiscolsum1h = vec_add(block0h, block1h);
|
||||
this1 = vec_ld(0, inptr1);
|
||||
this1l = (__vector short)vec_mergeh(pb_zero, this1);
|
||||
this1h = (__vector short)vec_mergel(pb_zero, this1);
|
||||
thiscolsum1l = vec_add(this0l, this1l);
|
||||
thiscolsum1h = vec_add(this0h, this1h);
|
||||
lastcolsum1h = thiscolsum1h;
|
||||
p_lastcolsum1l = vec_perm(thiscolsum1l, thiscolsum1l, last_index_col0);
|
||||
p_lastcolsum1h = vec_perm(thiscolsum1l, thiscolsum1h, last_index);
|
||||
@@ -191,25 +192,25 @@ jsimd_h2v2_fancy_upsample_altivec (int max_v_samp_factor,
|
||||
p_nextcolsum1h = vec_perm(thiscolsum1h, thiscolsum1h,
|
||||
next_index_lastcol);
|
||||
} else {
|
||||
block0 = vec_ld(16, inptr0);
|
||||
block0l = (__vector short)vec_mergeh(pb_zero, block0);
|
||||
block0h = (__vector short)vec_mergel(pb_zero, block0);
|
||||
block0l = vec_mladd(block0l, pw_three, pw_zero);
|
||||
block0h = vec_mladd(block0h, pw_three, pw_zero);
|
||||
this0 = vec_ld(16, inptr0);
|
||||
this0l = (__vector short)vec_mergeh(pb_zero, this0);
|
||||
this0h = (__vector short)vec_mergel(pb_zero, this0);
|
||||
this0l = vec_mladd(this0l, pw_three, pw_zero);
|
||||
this0h = vec_mladd(this0h, pw_three, pw_zero);
|
||||
|
||||
block_1 = vec_ld(16, inptr_1);
|
||||
block_1l = (__vector short)vec_mergeh(pb_zero, block_1);
|
||||
block_1h = (__vector short)vec_mergel(pb_zero, block_1);
|
||||
nextcolsum_1l = vec_add(block0l, block_1l);
|
||||
nextcolsum_1h = vec_add(block0h, block_1h);
|
||||
this_1 = vec_ld(16, inptr_1);
|
||||
this_1l = (__vector short)vec_mergeh(pb_zero, this_1);
|
||||
this_1h = (__vector short)vec_mergel(pb_zero, this_1);
|
||||
nextcolsum_1l = vec_add(this0l, this_1l);
|
||||
nextcolsum_1h = vec_add(this0h, this_1h);
|
||||
p_nextcolsum_1l = vec_perm(thiscolsum_1l, thiscolsum_1h, next_index);
|
||||
p_nextcolsum_1h = vec_perm(thiscolsum_1h, nextcolsum_1l, next_index);
|
||||
|
||||
block1 = vec_ld(16, inptr1);
|
||||
block1l = (__vector short)vec_mergeh(pb_zero, block1);
|
||||
block1h = (__vector short)vec_mergel(pb_zero, block1);
|
||||
nextcolsum1l = vec_add(block0l, block1l);
|
||||
nextcolsum1h = vec_add(block0h, block1h);
|
||||
this1 = vec_ld(16, inptr1);
|
||||
this1l = (__vector short)vec_mergeh(pb_zero, this1);
|
||||
this1h = (__vector short)vec_mergel(pb_zero, this1);
|
||||
nextcolsum1l = vec_add(this0l, this1l);
|
||||
nextcolsum1h = vec_add(this0h, this1h);
|
||||
p_nextcolsum1l = vec_perm(thiscolsum1l, thiscolsum1h, next_index);
|
||||
p_nextcolsum1h = vec_perm(thiscolsum1h, nextcolsum1l, next_index);
|
||||
}
|
||||
|
||||
@@ -56,7 +56,7 @@
|
||||
\
|
||||
z1 = vec_add(tmp12, tmp13); \
|
||||
z1 = vec_sl(z1, pre_multiply_scale_bits); \
|
||||
z1 = vec_madds(z1, pw_0707, zero); \
|
||||
z1 = vec_madds(z1, pw_0707, pw_zero); \
|
||||
\
|
||||
out2 = vec_add(tmp13, z1); \
|
||||
out6 = vec_sub(tmp13, z1); \
|
||||
@@ -70,13 +70,13 @@
|
||||
tmp10 = vec_sl(tmp10, pre_multiply_scale_bits); \
|
||||
tmp12 = vec_sl(tmp12, pre_multiply_scale_bits); \
|
||||
z5 = vec_sub(tmp10, tmp12); \
|
||||
z5 = vec_madds(z5, pw_0382, zero); \
|
||||
z5 = vec_madds(z5, pw_0382, pw_zero); \
|
||||
\
|
||||
z2 = vec_madds(tmp10, pw_0541, z5); \
|
||||
z4 = vec_madds(tmp12, pw_1306, z5); \
|
||||
\
|
||||
tmp11 = vec_sl(tmp11, pre_multiply_scale_bits); \
|
||||
z3 = vec_madds(tmp11, pw_0707, zero); \
|
||||
z3 = vec_madds(tmp11, pw_0707, pw_zero); \
|
||||
\
|
||||
z11 = vec_add(tmp7, z3); \
|
||||
z13 = vec_sub(tmp7, z3); \
|
||||
@@ -98,7 +98,7 @@ jsimd_fdct_ifast_altivec (DCTELEM *data)
|
||||
out0, out1, out2, out3, out4, out5, out6, out7;
|
||||
|
||||
/* Constants */
|
||||
__vector short zero = vec_splat_s16(0),
|
||||
__vector short pw_zero = { __8X(0) },
|
||||
pw_0382 = { __8X(F_0_382 << CONST_SHIFT) },
|
||||
pw_0541 = { __8X(F_0_541 << CONST_SHIFT) },
|
||||
pw_0707 = { __8X(F_0_707 << CONST_SHIFT) },
|
||||
|
||||
@@ -54,7 +54,7 @@
|
||||
\
|
||||
tmp12 = vec_sub(in##2, in##6); \
|
||||
tmp12 = vec_sl(tmp12, pre_multiply_scale_bits); \
|
||||
tmp12 = vec_madds(tmp12, pw_F1414, zero); \
|
||||
tmp12 = vec_madds(tmp12, pw_F1414, pw_zero); \
|
||||
tmp12 = vec_sub(tmp12, tmp13); \
|
||||
\
|
||||
tmp0 = vec_add(tmp10, tmp13); \
|
||||
@@ -73,7 +73,7 @@
|
||||
\
|
||||
tmp11 = vec_sub(z11, z13); \
|
||||
tmp11 = vec_sl(tmp11, pre_multiply_scale_bits); \
|
||||
tmp11 = vec_madds(tmp11, pw_F1414, zero); \
|
||||
tmp11 = vec_madds(tmp11, pw_F1414, pw_zero); \
|
||||
\
|
||||
tmp7 = vec_add(z11, z13); \
|
||||
\
|
||||
@@ -88,9 +88,9 @@
|
||||
*/ \
|
||||
\
|
||||
z5 = vec_add(z10s, z12s); \
|
||||
z5 = vec_madds(z5, pw_F1847, zero); \
|
||||
z5 = vec_madds(z5, pw_F1847, pw_zero); \
|
||||
\
|
||||
tmp10 = vec_madds(z12s, pw_F1082, zero); \
|
||||
tmp10 = vec_madds(z12s, pw_F1082, pw_zero); \
|
||||
tmp10 = vec_sub(tmp10, z5); \
|
||||
tmp12 = vec_madds(z10s, pw_MF1613, z5); \
|
||||
tmp12 = vec_sub(tmp12, z10); \
|
||||
@@ -115,6 +115,8 @@ jsimd_idct_ifast_altivec (void * dct_table_, JCOEFPTR coef_block,
|
||||
JSAMPARRAY output_buf, JDIMENSION output_col)
|
||||
{
|
||||
short *dct_table = (short *)dct_table_;
|
||||
int *outptr;
|
||||
|
||||
__vector short row0, row1, row2, row3, row4, row5, row6, row7,
|
||||
col0, col1, col2, col3, col4, col5, col6, col7,
|
||||
quant0, quant1, quant2, quant3, quant4, quant5, quant6, quant7,
|
||||
@@ -122,10 +124,9 @@ jsimd_idct_ifast_altivec (void * dct_table_, JCOEFPTR coef_block,
|
||||
z5, z10, z10s, z11, z12s, z13,
|
||||
out0, out1, out2, out3, out4, out5, out6, out7;
|
||||
__vector signed char outb;
|
||||
int *outptr;
|
||||
|
||||
/* Constants */
|
||||
__vector short zero = { __8X(0) },
|
||||
__vector short pw_zero = { __8X(0) },
|
||||
pw_F1414 = { __8X(F_1_414 << CONST_SHIFT) },
|
||||
pw_F1847 = { __8X(F_1_847 << CONST_SHIFT) },
|
||||
pw_MF1613 = { __8X(-F_1_613 << CONST_SHIFT) },
|
||||
@@ -154,9 +155,9 @@ jsimd_idct_ifast_altivec (void * dct_table_, JCOEFPTR coef_block,
|
||||
tmp1 = vec_or(tmp1, tmp3);
|
||||
|
||||
quant0 = vec_ld(0, dct_table);
|
||||
col0 = vec_mladd(col0, quant0, zero);
|
||||
col0 = vec_mladd(col0, quant0, pw_zero);
|
||||
|
||||
if (vec_all_eq(tmp1, zero)) {
|
||||
if (vec_all_eq(tmp1, pw_zero)) {
|
||||
/* AC terms all zero */
|
||||
|
||||
row0 = vec_splat(col0, 0);
|
||||
@@ -178,13 +179,13 @@ jsimd_idct_ifast_altivec (void * dct_table_, JCOEFPTR coef_block,
|
||||
quant6 = vec_ld(96, dct_table);
|
||||
quant7 = vec_ld(112, dct_table);
|
||||
|
||||
col1 = vec_mladd(col1, quant1, zero);
|
||||
col2 = vec_mladd(col2, quant2, zero);
|
||||
col3 = vec_mladd(col3, quant3, zero);
|
||||
col4 = vec_mladd(col4, quant4, zero);
|
||||
col5 = vec_mladd(col5, quant5, zero);
|
||||
col6 = vec_mladd(col6, quant6, zero);
|
||||
col7 = vec_mladd(col7, quant7, zero);
|
||||
col1 = vec_mladd(col1, quant1, pw_zero);
|
||||
col2 = vec_mladd(col2, quant2, pw_zero);
|
||||
col3 = vec_mladd(col3, quant3, pw_zero);
|
||||
col4 = vec_mladd(col4, quant4, pw_zero);
|
||||
col5 = vec_mladd(col5, quant5, pw_zero);
|
||||
col6 = vec_mladd(col6, quant6, pw_zero);
|
||||
col7 = vec_mladd(col7, quant7, pw_zero);
|
||||
|
||||
DO_IDCT(col);
|
||||
|
||||
|
||||
@@ -61,10 +61,10 @@
|
||||
in##26l = vec_mergeh(in##2, in##6); \
|
||||
in##26h = vec_mergel(in##2, in##6); \
|
||||
\
|
||||
tmp3l = vec_msums(in##26l, pw_f130_f054, zero32); \
|
||||
tmp3h = vec_msums(in##26h, pw_f130_f054, zero32); \
|
||||
tmp2l = vec_msums(in##26l, pw_f054_mf130, zero32); \
|
||||
tmp2h = vec_msums(in##26h, pw_f054_mf130, zero32); \
|
||||
tmp3l = vec_msums(in##26l, pw_f130_f054, pd_zero); \
|
||||
tmp3h = vec_msums(in##26h, pw_f130_f054, pd_zero); \
|
||||
tmp2l = vec_msums(in##26l, pw_f054_mf130, pd_zero); \
|
||||
tmp2h = vec_msums(in##26h, pw_f054_mf130, pd_zero); \
|
||||
\
|
||||
tmp0 = vec_add(in##0, in##4); \
|
||||
tmp1 = vec_sub(in##0, in##4); \
|
||||
@@ -111,10 +111,10 @@
|
||||
z34l = vec_mergeh(z3, z4); \
|
||||
z34h = vec_mergel(z3, z4); \
|
||||
\
|
||||
z3l = vec_msums(z34l, pw_mf078_f117, zero32); \
|
||||
z3h = vec_msums(z34h, pw_mf078_f117, zero32); \
|
||||
z4l = vec_msums(z34l, pw_f117_f078, zero32); \
|
||||
z4h = vec_msums(z34h, pw_f117_f078, zero32); \
|
||||
z3l = vec_msums(z34l, pw_mf078_f117, pd_zero); \
|
||||
z3h = vec_msums(z34h, pw_mf078_f117, pd_zero); \
|
||||
z4l = vec_msums(z34l, pw_f117_f078, pd_zero); \
|
||||
z4h = vec_msums(z34h, pw_f117_f078, pd_zero); \
|
||||
\
|
||||
/* (Original) \
|
||||
* z1 = tmp0 + tmp3; z2 = tmp1 + tmp2; \
|
||||
@@ -210,6 +210,8 @@ jsimd_idct_islow_altivec (void * dct_table_, JCOEFPTR coef_block,
|
||||
JSAMPARRAY output_buf, JDIMENSION output_col)
|
||||
{
|
||||
short *dct_table = (short *)dct_table_;
|
||||
int *outptr;
|
||||
|
||||
__vector short row0, row1, row2, row3, row4, row5, row6, row7,
|
||||
col0, col1, col2, col3, col4, col5, col6, col7,
|
||||
quant0, quant1, quant2, quant3, quant4, quant5, quant6, quant7,
|
||||
@@ -223,10 +225,9 @@ jsimd_idct_islow_altivec (void * dct_table_, JCOEFPTR coef_block,
|
||||
out0l, out0h, out1l, out1h, out2l, out2h, out3l, out3h, out4l, out4h,
|
||||
out5l, out5h, out6l, out6h, out7l, out7h;
|
||||
__vector signed char outb;
|
||||
int *outptr;
|
||||
|
||||
/* Constants */
|
||||
__vector short zero16 = { __8X(0) },
|
||||
__vector short pw_zero = { __8X(0) },
|
||||
pw_f130_f054 = { __4X2(F_0_541 + F_0_765, F_0_541) },
|
||||
pw_f054_mf130 = { __4X2(F_0_541, F_0_541 - F_1_847) },
|
||||
pw_mf078_f117 = { __4X2(F_1_175 - F_1_961, F_1_175) },
|
||||
@@ -236,7 +237,7 @@ jsimd_idct_islow_altivec (void * dct_table_, JCOEFPTR coef_block,
|
||||
pw_mf050_mf256 = { __4X2(F_2_053 - F_2_562, -F_2_562) },
|
||||
pw_mf256_f050 = { __4X2(-F_2_562, F_3_072 - F_2_562) };
|
||||
__vector unsigned short pass1_bits = { __8X(PASS1_BITS) };
|
||||
__vector int zero32 = { __4X(0) },
|
||||
__vector int pd_zero = { __4X(0) },
|
||||
pd_descale_p1 = { __4X(1 << (DESCALE_P1 - 1)) },
|
||||
pd_descale_p2 = { __4X(1 << (DESCALE_P2 - 1)) };
|
||||
__vector unsigned int descale_p1 = { __4X(DESCALE_P1) },
|
||||
@@ -263,9 +264,9 @@ jsimd_idct_islow_altivec (void * dct_table_, JCOEFPTR coef_block,
|
||||
tmp1 = vec_or(tmp1, tmp3);
|
||||
|
||||
quant0 = vec_ld(0, dct_table);
|
||||
col0 = vec_mladd(col0, quant0, zero16);
|
||||
col0 = vec_mladd(col0, quant0, pw_zero);
|
||||
|
||||
if (vec_all_eq(tmp1, zero16)) {
|
||||
if (vec_all_eq(tmp1, pw_zero)) {
|
||||
/* AC terms all zero */
|
||||
|
||||
col0 = vec_sl(col0, pass1_bits);
|
||||
@@ -289,13 +290,13 @@ jsimd_idct_islow_altivec (void * dct_table_, JCOEFPTR coef_block,
|
||||
quant6 = vec_ld(96, dct_table);
|
||||
quant7 = vec_ld(112, dct_table);
|
||||
|
||||
col1 = vec_mladd(col1, quant1, zero16);
|
||||
col2 = vec_mladd(col2, quant2, zero16);
|
||||
col3 = vec_mladd(col3, quant3, zero16);
|
||||
col4 = vec_mladd(col4, quant4, zero16);
|
||||
col5 = vec_mladd(col5, quant5, zero16);
|
||||
col6 = vec_mladd(col6, quant6, zero16);
|
||||
col7 = vec_mladd(col7, quant7, zero16);
|
||||
col1 = vec_mladd(col1, quant1, pw_zero);
|
||||
col2 = vec_mladd(col2, quant2, pw_zero);
|
||||
col3 = vec_mladd(col3, quant3, pw_zero);
|
||||
col4 = vec_mladd(col4, quant4, pw_zero);
|
||||
col5 = vec_mladd(col5, quant5, pw_zero);
|
||||
col6 = vec_mladd(col6, quant6, pw_zero);
|
||||
col7 = vec_mladd(col7, quant7, pw_zero);
|
||||
|
||||
DO_IDCT(col, 1);
|
||||
|
||||
|
||||
@@ -42,12 +42,13 @@ jsimd_convsamp_altivec (JSAMPARRAY sample_data, JDIMENSION start_col,
|
||||
DCTELEM * workspace)
|
||||
{
|
||||
JSAMPROW elemptr;
|
||||
|
||||
__vector unsigned char in0, in1, in2, in3, in4, in5, in6, in7;
|
||||
__vector short out0, out1, out2, out3, out4, out5, out6, out7;
|
||||
|
||||
/* Constants */
|
||||
__vector short pw_centerjsamp = { __8X(CENTERJSAMPLE) };
|
||||
__vector unsigned char zero = { __16X(0) };
|
||||
__vector unsigned char pb_zero = { __16X(0) };
|
||||
|
||||
LOAD_ROW(0);
|
||||
LOAD_ROW(1);
|
||||
@@ -58,14 +59,14 @@ jsimd_convsamp_altivec (JSAMPARRAY sample_data, JDIMENSION start_col,
|
||||
LOAD_ROW(6);
|
||||
LOAD_ROW(7);
|
||||
|
||||
out0 = (__vector short)vec_mergeh(zero, in0);
|
||||
out1 = (__vector short)vec_mergeh(zero, in1);
|
||||
out2 = (__vector short)vec_mergeh(zero, in2);
|
||||
out3 = (__vector short)vec_mergeh(zero, in3);
|
||||
out4 = (__vector short)vec_mergeh(zero, in4);
|
||||
out5 = (__vector short)vec_mergeh(zero, in5);
|
||||
out6 = (__vector short)vec_mergeh(zero, in6);
|
||||
out7 = (__vector short)vec_mergeh(zero, in7);
|
||||
out0 = (__vector short)vec_mergeh(pb_zero, in0);
|
||||
out1 = (__vector short)vec_mergeh(pb_zero, in1);
|
||||
out2 = (__vector short)vec_mergeh(pb_zero, in2);
|
||||
out3 = (__vector short)vec_mergeh(pb_zero, in3);
|
||||
out4 = (__vector short)vec_mergeh(pb_zero, in4);
|
||||
out5 = (__vector short)vec_mergeh(pb_zero, in5);
|
||||
out6 = (__vector short)vec_mergeh(pb_zero, in6);
|
||||
out7 = (__vector short)vec_mergeh(pb_zero, in7);
|
||||
|
||||
out0 = vec_sub(out0, pw_centerjsamp);
|
||||
out1 = vec_sub(out1, pw_centerjsamp);
|
||||
@@ -89,7 +90,8 @@ jsimd_convsamp_altivec (JSAMPARRAY sample_data, JDIMENSION start_col,
|
||||
|
||||
#define WORD_BIT 16
|
||||
|
||||
/* There is no AltiVec unsigned multiply instruction, hence this. */
|
||||
/* There is no AltiVec 16-bit unsigned multiply instruction, hence this.
|
||||
We basically need an unsigned equivalent of vec_madds(). */
|
||||
|
||||
#define MULTIPLY(vs0, vs1, out) { \
|
||||
tmpe = vec_mule((__vector unsigned short)vs0, \
|
||||
@@ -105,13 +107,11 @@ void
|
||||
jsimd_quantize_altivec (JCOEFPTR coef_block, DCTELEM * divisors,
|
||||
DCTELEM * workspace)
|
||||
{
|
||||
__vector short row0, row1, row2, row3, row4, row5, row6, row7;
|
||||
__vector short row0s, row1s, row2s, row3s, row4s, row5s, row6s, row7s;
|
||||
__vector short corr0, corr1, corr2, corr3, corr4, corr5, corr6, corr7;
|
||||
__vector short recip0, recip1, recip2, recip3, recip4, recip5, recip6,
|
||||
recip7;
|
||||
__vector short scale0, scale1, scale2, scale3, scale4, scale5, scale6,
|
||||
scale7;
|
||||
__vector short row0, row1, row2, row3, row4, row5, row6, row7,
|
||||
row0s, row1s, row2s, row3s, row4s, row5s, row6s, row7s,
|
||||
corr0, corr1, corr2, corr3, corr4, corr5, corr6, corr7,
|
||||
recip0, recip1, recip2, recip3, recip4, recip5, recip6, recip7,
|
||||
scale0, scale1, scale2, scale3, scale4, scale5, scale6, scale7;
|
||||
__vector unsigned int tmpe, tmpo;
|
||||
|
||||
/* Constants */
|
||||
|
||||
Reference in New Issue
Block a user