Make the formatting and naming of variables and constants more consistent

git-svn-id: svn+ssh://svn.code.sf.net/p/libjpeg-turbo/code/trunk@1496 632fc199-4ca6-4c93-a231-07263d6284db
This commit is contained in:
DRC
2015-01-13 10:00:12 +00:00
parent 52a4ec6c8a
commit a6a24c270e
9 changed files with 229 additions and 225 deletions

View File

@@ -38,7 +38,7 @@ void jsimd_rgb_ycc_convert_altivec (JDIMENSION img_width, JSAMPARRAY input_buf,
__vector unsigned char rgb4 = {0};
#endif
__vector short rg0, rg1, rg2, rg3, bg0, bg1, bg2, bg3;
__vector unsigned short y01, y23, cr01, cr23, cb01, cb23;
__vector unsigned short yl, yh, crl, crh, cbl, cbh;
__vector int y0, y1, y2, y3, cr0, cr1, cr2, cr3, cb0, cb1, cb2, cb3;
/* Constants */
@@ -49,7 +49,7 @@ void jsimd_rgb_ycc_convert_altivec (JDIMENSION img_width, JSAMPARRAY input_buf,
__vector unsigned short pw_f050_f000 = { __4X2(F_0_500, 0) };
__vector int pd_onehalf = { __4X(ONE_HALF) },
pd_onehalfm1_cj = { __4X(ONE_HALF - 1 + (CENTERJSAMPLE << SCALEBITS)) };
__vector unsigned char zero = { __16X(0) },
__vector unsigned char pb_zero = { __16X(0) },
shift_pack_index =
{ 0, 1, 4, 5, 8, 9, 12, 13, 16, 17, 20, 21, 24, 25, 28, 29};
@@ -168,14 +168,14 @@ void jsimd_rgb_ycc_convert_altivec (JDIMENSION img_width, JSAMPARRAY input_buf,
* NOTE: We have to use vec_merge*() here because vec_unpack*() doesn't
* support unsigned vectors.
*/
rg0 = (__vector signed short)vec_mergeh(zero, rgbg0);
bg0 = (__vector signed short)vec_mergel(zero, rgbg0);
rg1 = (__vector signed short)vec_mergeh(zero, rgbg1);
bg1 = (__vector signed short)vec_mergel(zero, rgbg1);
rg2 = (__vector signed short)vec_mergeh(zero, rgbg2);
bg2 = (__vector signed short)vec_mergel(zero, rgbg2);
rg3 = (__vector signed short)vec_mergeh(zero, rgbg3);
bg3 = (__vector signed short)vec_mergel(zero, rgbg3);
rg0 = (__vector signed short)vec_mergeh(pb_zero, rgbg0);
bg0 = (__vector signed short)vec_mergel(pb_zero, rgbg0);
rg1 = (__vector signed short)vec_mergeh(pb_zero, rgbg1);
bg1 = (__vector signed short)vec_mergel(pb_zero, rgbg1);
rg2 = (__vector signed short)vec_mergeh(pb_zero, rgbg2);
bg2 = (__vector signed short)vec_mergel(pb_zero, rgbg2);
rg3 = (__vector signed short)vec_mergeh(pb_zero, rgbg3);
bg3 = (__vector signed short)vec_mergel(pb_zero, rgbg3);
/* (Original)
* Y = 0.29900 * R + 0.58700 * G + 0.11400 * B
@@ -203,11 +203,11 @@ void jsimd_rgb_ycc_convert_altivec (JDIMENSION img_width, JSAMPARRAY input_buf,
* descaling the 32-bit results (right-shifting by 16 bits) and then
* packing them.
*/
y01 = vec_perm((__vector unsigned short)y0, (__vector unsigned short)y1,
shift_pack_index);
y23 = vec_perm((__vector unsigned short)y2, (__vector unsigned short)y3,
shift_pack_index);
y = vec_pack(y01, y23);
yl = vec_perm((__vector unsigned short)y0, (__vector unsigned short)y1,
shift_pack_index);
yh = vec_perm((__vector unsigned short)y2, (__vector unsigned short)y3,
shift_pack_index);
y = vec_pack(yl, yh);
vec_st(y, 0, outptr0);
/* Calculate Cb values */
@@ -223,11 +223,11 @@ void jsimd_rgb_ycc_convert_altivec (JDIMENSION img_width, JSAMPARRAY input_buf,
(__vector unsigned int)cb2);
cb3 = (__vector int)vec_msum((__vector unsigned short)bg3, pw_f050_f000,
(__vector unsigned int)cb3);
cb01 = vec_perm((__vector unsigned short)cb0,
(__vector unsigned short)cb1, shift_pack_index);
cb23 = vec_perm((__vector unsigned short)cb2,
(__vector unsigned short)cb3, shift_pack_index);
cb = vec_pack(cb01, cb23);
cbl = vec_perm((__vector unsigned short)cb0,
(__vector unsigned short)cb1, shift_pack_index);
cbh = vec_perm((__vector unsigned short)cb2,
(__vector unsigned short)cb3, shift_pack_index);
cb = vec_pack(cbl, cbh);
vec_st(cb, 0, outptr1);
/* Calculate Cr values */
@@ -243,11 +243,11 @@ void jsimd_rgb_ycc_convert_altivec (JDIMENSION img_width, JSAMPARRAY input_buf,
(__vector unsigned int)cr2);
cr3 = (__vector int)vec_msum((__vector unsigned short)rg3, pw_f050_f000,
(__vector unsigned int)cr3);
cr01 = vec_perm((__vector unsigned short)cr0,
(__vector unsigned short)cr1, shift_pack_index);
cr23 = vec_perm((__vector unsigned short)cr2,
(__vector unsigned short)cr3, shift_pack_index);
cr = vec_pack(cr01, cr23);
crl = vec_perm((__vector unsigned short)cr0,
(__vector unsigned short)cr1, shift_pack_index);
crh = vec_perm((__vector unsigned short)cr2,
(__vector unsigned short)cr3, shift_pack_index);
cr = vec_pack(crl, crh);
vec_st(cr, 0, outptr2);
}
}

View File

@@ -39,14 +39,14 @@ void jsimd_rgb_gray_convert_altivec (JDIMENSION img_width,
__vector unsigned char rgb4 = {0};
#endif
__vector short rg0, rg1, rg2, rg3, bg0, bg1, bg2, bg3;
__vector unsigned short y01, y23;
__vector unsigned short yl, yh;
__vector int y0, y1, y2, y3;
/* Constants */
__vector short pw_f0299_f0337 = { __4X2(F_0_299, F_0_337) },
pw_f0114_f0250 = { __4X2(F_0_114, F_0_250) };
__vector int pd_onehalf = { __4X(ONE_HALF) };
__vector unsigned char zero = { __16X(0) },
__vector unsigned char pb_zero = { __16X(0) },
shift_pack_index =
{ 0, 1, 4, 5, 8, 9, 12, 13, 16, 17, 20, 21, 24, 25, 28, 29};
@@ -163,14 +163,14 @@ void jsimd_rgb_gray_convert_altivec (JDIMENSION img_width,
* NOTE: We have to use vec_merge*() here because vec_unpack*() doesn't
* support unsigned vectors.
*/
rg0 = (__vector signed short)vec_mergeh(zero, rgbg0);
bg0 = (__vector signed short)vec_mergel(zero, rgbg0);
rg1 = (__vector signed short)vec_mergeh(zero, rgbg1);
bg1 = (__vector signed short)vec_mergel(zero, rgbg1);
rg2 = (__vector signed short)vec_mergeh(zero, rgbg2);
bg2 = (__vector signed short)vec_mergel(zero, rgbg2);
rg3 = (__vector signed short)vec_mergeh(zero, rgbg3);
bg3 = (__vector signed short)vec_mergel(zero, rgbg3);
rg0 = (__vector signed short)vec_mergeh(pb_zero, rgbg0);
bg0 = (__vector signed short)vec_mergel(pb_zero, rgbg0);
rg1 = (__vector signed short)vec_mergeh(pb_zero, rgbg1);
bg1 = (__vector signed short)vec_mergel(pb_zero, rgbg1);
rg2 = (__vector signed short)vec_mergeh(pb_zero, rgbg2);
bg2 = (__vector signed short)vec_mergel(pb_zero, rgbg2);
rg3 = (__vector signed short)vec_mergeh(pb_zero, rgbg3);
bg3 = (__vector signed short)vec_mergel(pb_zero, rgbg3);
/* (Original)
* Y = 0.29900 * R + 0.58700 * G + 0.11400 * B
@@ -194,11 +194,11 @@ void jsimd_rgb_gray_convert_altivec (JDIMENSION img_width,
* descaling the 32-bit results (right-shifting by 16 bits) and then
* packing them.
*/
y01 = vec_perm((__vector unsigned short)y0, (__vector unsigned short)y1,
shift_pack_index);
y23 = vec_perm((__vector unsigned short)y2, (__vector unsigned short)y3,
shift_pack_index);
y = vec_pack(y01, y23);
yl = vec_perm((__vector unsigned short)y0, (__vector unsigned short)y1,
shift_pack_index);
yh = vec_perm((__vector unsigned short)y2, (__vector unsigned short)y3,
shift_pack_index);
y = vec_pack(yl, yh);
vec_st(y, 0, outptr);
}
}

View File

@@ -35,15 +35,16 @@ jsimd_h2v1_downsample_altivec (JDIMENSION image_width, int max_v_samp_factor,
int outrow, outcol;
JDIMENSION output_cols = width_blocks * DCTSIZE;
JSAMPROW inptr, outptr;
__vector unsigned char tmpa, tmpb, out;
__vector unsigned short tmpae, tmpao, tmpbe, tmpbo, outl, outh;
__vector unsigned char this0, next0, out;
__vector unsigned short this0e, this0o, next0e, next0o, outl, outh;
/* Constants */
__vector unsigned short bias = { __4X2(0, 1) },
one = { __8X(1) };
__vector unsigned short pw_bias = { __4X2(0, 1) },
pw_one = { __8X(1) };
__vector unsigned char even_odd_index =
{ 0, 2, 4, 6, 8, 10, 12, 14, 1, 3, 5, 7, 9, 11, 13, 15 },
zero = { __16X(0) };
pb_zero = { __16X(0) };
expand_right_edge(input_data, max_v_samp_factor, image_width,
output_cols * 2);
@@ -55,22 +56,22 @@ jsimd_h2v1_downsample_altivec (JDIMENSION image_width, int max_v_samp_factor,
for (outcol = output_cols; outcol > 0;
outcol -= 16, inptr += 32, outptr += 16) {
tmpa = vec_ld(0, inptr);
tmpa = vec_perm(tmpa, tmpa, even_odd_index);
tmpae = (__vector unsigned short)vec_mergeh(zero, tmpa);
tmpao = (__vector unsigned short)vec_mergel(zero, tmpa);
outl = vec_add(tmpae, tmpao);
outl = vec_add(outl, bias);
outl = vec_sr(outl, one);
this0 = vec_ld(0, inptr);
this0 = vec_perm(this0, this0, even_odd_index);
this0e = (__vector unsigned short)vec_mergeh(pb_zero, this0);
this0o = (__vector unsigned short)vec_mergel(pb_zero, this0);
outl = vec_add(this0e, this0o);
outl = vec_add(outl, pw_bias);
outl = vec_sr(outl, pw_one);
if (outcol > 16) {
tmpb = vec_ld(16, inptr);
tmpb = vec_perm(tmpb, tmpb, even_odd_index);
tmpbe = (__vector unsigned short)vec_mergeh(zero, tmpb);
tmpbo = (__vector unsigned short)vec_mergel(zero, tmpb);
outh = vec_add(tmpbe, tmpbo);
outh = vec_add(outh, bias);
outh = vec_sr(outh, one);
next0 = vec_ld(16, inptr);
next0 = vec_perm(next0, next0, even_odd_index);
next0e = (__vector unsigned short)vec_mergeh(pb_zero, next0);
next0o = (__vector unsigned short)vec_mergel(pb_zero, next0);
outh = vec_add(next0e, next0o);
outh = vec_add(outh, pw_bias);
outh = vec_sr(outh, pw_one);
} else
outh = vec_splat_u16(0);
@@ -90,16 +91,17 @@ jsimd_h2v2_downsample_altivec (JDIMENSION image_width, int max_v_samp_factor,
int inrow, outrow, outcol;
JDIMENSION output_cols = width_blocks * DCTSIZE;
JSAMPROW inptr0, inptr1, outptr;
__vector unsigned char tmp0a, tmp0b, tmp1a, tmp1b, out;
__vector unsigned short tmp0ae, tmp0ao, tmp0be, tmp0bo, tmp1ae, tmp1ao,
tmp1be, tmp1bo, out0l, out0h, out1l, out1h, outl, outh;
__vector unsigned char this0, next0, this1, next1, out;
__vector unsigned short this0e, this0o, next0e, next0o, this1e, this1o,
next1e, next1o, out0l, out0h, out1l, out1h, outl, outh;
/* Constants */
__vector unsigned short bias = { __4X2(1, 2) },
two = { __8X(2) };
__vector unsigned short pw_bias = { __4X2(1, 2) },
pw_two = { __8X(2) };
__vector unsigned char even_odd_index =
{ 0, 2, 4, 6, 8, 10, 12, 14, 1, 3, 5, 7, 9, 11, 13, 15 },
zero = { __16X(0) };
pb_zero = { __16X(0) };
expand_right_edge(input_data, max_v_samp_factor, image_width,
output_cols * 2);
@@ -107,45 +109,45 @@ jsimd_h2v2_downsample_altivec (JDIMENSION image_width, int max_v_samp_factor,
for (inrow = 0, outrow = 0; outrow < v_samp_factor;
inrow += 2, outrow++) {
outptr = output_data[outrow];
inptr0 = input_data[inrow];
inptr1 = input_data[inrow + 1];
outptr = output_data[outrow];
for (outcol = output_cols; outcol > 0;
outcol -= 16, inptr0 += 32, inptr1 += 32, outptr += 16) {
tmp0a = vec_ld(0, inptr0);
tmp0a = vec_perm(tmp0a, tmp0a, even_odd_index);
tmp0ae = (__vector unsigned short)vec_mergeh(zero, tmp0a);
tmp0ao = (__vector unsigned short)vec_mergel(zero, tmp0a);
out0l = vec_add(tmp0ae, tmp0ao);
this0 = vec_ld(0, inptr0);
this0 = vec_perm(this0, this0, even_odd_index);
this0e = (__vector unsigned short)vec_mergeh(pb_zero, this0);
this0o = (__vector unsigned short)vec_mergel(pb_zero, this0);
out0l = vec_add(this0e, this0o);
tmp1a = vec_ld(0, inptr1);
tmp1a = vec_perm(tmp1a, tmp1a, even_odd_index);
tmp1ae = (__vector unsigned short)vec_mergeh(zero, tmp1a);
tmp1ao = (__vector unsigned short)vec_mergel(zero, tmp1a);
out1l = vec_add(tmp1ae, tmp1ao);
this1 = vec_ld(0, inptr1);
this1 = vec_perm(this1, this1, even_odd_index);
this1e = (__vector unsigned short)vec_mergeh(pb_zero, this1);
this1o = (__vector unsigned short)vec_mergel(pb_zero, this1);
out1l = vec_add(this1e, this1o);
outl = vec_add(out0l, out1l);
outl = vec_add(outl, bias);
outl = vec_sr(outl, two);
outl = vec_add(outl, pw_bias);
outl = vec_sr(outl, pw_two);
if (outcol > 16) {
tmp0b = vec_ld(16, inptr0);
tmp0b = vec_perm(tmp0b, tmp0b, even_odd_index);
tmp0be = (__vector unsigned short)vec_mergeh(zero, tmp0b);
tmp0bo = (__vector unsigned short)vec_mergel(zero, tmp0b);
out0h = vec_add(tmp0be, tmp0bo);
next0 = vec_ld(16, inptr0);
next0 = vec_perm(next0, next0, even_odd_index);
next0e = (__vector unsigned short)vec_mergeh(pb_zero, next0);
next0o = (__vector unsigned short)vec_mergel(pb_zero, next0);
out0h = vec_add(next0e, next0o);
tmp1b = vec_ld(16, inptr1);
tmp1b = vec_perm(tmp1b, tmp1b, even_odd_index);
tmp1be = (__vector unsigned short)vec_mergeh(zero, tmp1b);
tmp1bo = (__vector unsigned short)vec_mergel(zero, tmp1b);
out1h = vec_add(tmp1be, tmp1bo);
next1 = vec_ld(16, inptr1);
next1 = vec_perm(next1, next1, even_odd_index);
next1e = (__vector unsigned short)vec_mergeh(pb_zero, next1);
next1o = (__vector unsigned short)vec_mergel(pb_zero, next1);
out1h = vec_add(next1e, next1o);
outh = vec_add(out0h, out1h);
outh = vec_add(outh, bias);
outh = vec_sr(outh, two);
outh = vec_add(outh, pw_bias);
outh = vec_sr(outh, pw_two);
} else
outh = vec_splat_u16(0);

View File

@@ -37,7 +37,7 @@ void jsimd_ycc_rgb_convert_altivec (JDIMENSION out_width, JSAMPIMAGE input_buf,
__vector unsigned char rgb3, out4;
#endif
__vector short rg0, rg1, rg2, rg3, bx0, bx1, bx2, bx3, yl, yh, cbl, cbh,
crl, crh, rl, rh, gl, gh, bl, bh, g0s, g1s, g2s, g3s;
crl, crh, rl, rh, gl, gh, bl, bh, g0w, g1w, g2w, g3w;
__vector int g0, g1, g2, g3;
/* Constants
@@ -47,11 +47,10 @@ void jsimd_ycc_rgb_convert_altivec (JDIMENSION out_width, JSAMPIMAGE input_buf,
__vector short pw_f0402 = { __8X(F_0_402 >> 1) },
pw_mf0228 = { __8X(-F_0_228 >> 1) },
pw_mf0344_f0285 = { __4X2(-F_0_344, F_0_285) },
pw_one = { __8X(1) },
pw_255 = { __8X(255) },
pw_one = { __8X(1) }, pw_255 = { __8X(255) },
pw_cj = { __8X(CENTERJSAMPLE) };
__vector int pd_onehalf = { __4X(ONE_HALF) };
__vector unsigned char zero = { __16X(0) },
__vector unsigned char pb_zero = { __16X(0) },
shift_pack_index =
{ 0, 1, 4, 5, 8, 9, 12, 13, 16, 17, 20, 21, 24, 25, 28, 29};
@@ -70,18 +69,18 @@ void jsimd_ycc_rgb_convert_altivec (JDIMENSION out_width, JSAMPIMAGE input_buf,
/* NOTE: We have to use vec_merge*() here because vec_unpack*() doesn't
* support unsigned vectors.
*/
yl = (__vector signed short)vec_mergeh(zero, y);
yh = (__vector signed short)vec_mergel(zero, y);
yl = (__vector signed short)vec_mergeh(pb_zero, y);
yh = (__vector signed short)vec_mergel(pb_zero, y);
cb = vec_ld(0, inptr1);
cbl = (__vector signed short)vec_mergeh(zero, cb);
cbh = (__vector signed short)vec_mergel(zero, cb);
cbl = (__vector signed short)vec_mergeh(pb_zero, cb);
cbh = (__vector signed short)vec_mergel(pb_zero, cb);
cbl = vec_sub(cbl, pw_cj);
cbh = vec_sub(cbh, pw_cj);
cr = vec_ld(0, inptr2);
crl = (__vector signed short)vec_mergeh(zero, cr);
crh = (__vector signed short)vec_mergel(zero, cr);
crl = (__vector signed short)vec_mergeh(pb_zero, cr);
crh = (__vector signed short)vec_mergel(pb_zero, cr);
crl = vec_sub(crl, pw_cj);
crh = vec_sub(crh, pw_cj);
@@ -119,14 +118,14 @@ void jsimd_ycc_rgb_convert_altivec (JDIMENSION out_width, JSAMPIMAGE input_buf,
rl = vec_add(rl, yl);
rh = vec_add(rh, yh);
g0s = vec_mergeh(cbl, crl);
g1s = vec_mergel(cbl, crl);
g0 = vec_msums(g0s, pw_mf0344_f0285, pd_onehalf);
g1 = vec_msums(g1s, pw_mf0344_f0285, pd_onehalf);
g2s = vec_mergeh(cbh, crh);
g3s = vec_mergel(cbh, crh);
g2 = vec_msums(g2s, pw_mf0344_f0285, pd_onehalf);
g3 = vec_msums(g3s, pw_mf0344_f0285, pd_onehalf);
g0w = vec_mergeh(cbl, crl);
g1w = vec_mergel(cbl, crl);
g0 = vec_msums(g0w, pw_mf0344_f0285, pd_onehalf);
g1 = vec_msums(g1w, pw_mf0344_f0285, pd_onehalf);
g2w = vec_mergeh(cbh, crh);
g3w = vec_mergel(cbh, crh);
g2 = vec_msums(g2w, pw_mf0344_f0285, pd_onehalf);
g3 = vec_msums(g3w, pw_mf0344_f0285, pd_onehalf);
/* Clever way to avoid 4 shifts + 2 packs. This packs the high word from
* each dword into a new 16-bit vector, which is the equivalent of
* descaling the 32-bit results (right-shifting by 16 bits) and then

View File

@@ -35,12 +35,13 @@ jsimd_h2v1_fancy_upsample_altivec (int max_v_samp_factor,
JSAMPROW inptr, outptr;
int inrow, col;
__vector unsigned char block, last, next, lastblock, nextblock = {0}, out;
__vector short blocke, blocko, blockl, blockh, lastl, lasth, nextl, nexth,
outle, outhe, outlo, outho;
__vector unsigned char this0, last0, p_last0, next0 = {0}, p_next0,
out;
__vector short this0e, this0o, this0l, this0h, last0l, last0h,
next0l, next0h, outle, outhe, outlo, outho;
/* Constants */
__vector unsigned char pb_three = { __16X(3) }, pb_zero = { __16X(0) },
__vector unsigned char pb_zero = { __16X(0) }, pb_three = { __16X(3) },
last_index_col0 = {0,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14},
last_index = {15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30},
next_index = {1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16},
@@ -52,44 +53,44 @@ jsimd_h2v1_fancy_upsample_altivec (int max_v_samp_factor,
inptr = input_data[inrow];
outptr = output_data[inrow];
block = vec_ld(0, inptr);
last = vec_perm(block, block, last_index_col0);
lastblock = block;
this0 = vec_ld(0, inptr);
p_last0 = vec_perm(this0, this0, last_index_col0);
last0 = this0;
for (col = 0; col < downsampled_width;
col += 16, inptr += 16, outptr += 32) {
if (col > 0) {
last = vec_perm(lastblock, block, last_index);
lastblock = block;
p_last0 = vec_perm(last0, this0, last_index);
last0 = this0;
}
if (downsampled_width - col <= 16)
next = vec_perm(block, block, next_index_lastcol);
p_next0 = vec_perm(this0, this0, next_index_lastcol);
else {
nextblock = vec_ld(16, inptr);
next = vec_perm(block, nextblock, next_index);
next0 = vec_ld(16, inptr);
p_next0 = vec_perm(this0, next0, next_index);
}
blocke = (__vector short)vec_mule(block, pb_three);
blocko = (__vector short)vec_mulo(block, pb_three);
blockl = vec_mergeh(blocke, blocko);
blockh = vec_mergel(blocke, blocko);
this0e = (__vector short)vec_mule(this0, pb_three);
this0o = (__vector short)vec_mulo(this0, pb_three);
this0l = vec_mergeh(this0e, this0o);
this0h = vec_mergel(this0e, this0o);
lastl = (__vector short)vec_mergeh(pb_zero, last);
lasth = (__vector short)vec_mergel(pb_zero, last);
lastl = vec_add(lastl, pw_one);
lasth = vec_add(lasth, pw_one);
last0l = (__vector short)vec_mergeh(pb_zero, p_last0);
last0h = (__vector short)vec_mergel(pb_zero, p_last0);
last0l = vec_add(last0l, pw_one);
last0h = vec_add(last0h, pw_one);
nextl = (__vector short)vec_mergeh(pb_zero, next);
nexth = (__vector short)vec_mergel(pb_zero, next);
nextl = vec_add(nextl, pw_two);
nexth = vec_add(nexth, pw_two);
next0l = (__vector short)vec_mergeh(pb_zero, p_next0);
next0h = (__vector short)vec_mergel(pb_zero, p_next0);
next0l = vec_add(next0l, pw_two);
next0h = vec_add(next0h, pw_two);
outle = vec_add(blockl, lastl);
outhe = vec_add(blockh, lasth);
outlo = vec_add(blockl, nextl);
outho = vec_add(blockh, nexth);
outle = vec_add(this0l, last0l);
outhe = vec_add(this0h, last0h);
outlo = vec_add(this0l, next0l);
outho = vec_add(this0h, next0h);
outle = vec_sr(outle, (__vector unsigned short)pw_two);
outhe = vec_sr(outhe, (__vector unsigned short)pw_two);
outlo = vec_sr(outlo, (__vector unsigned short)pw_two);
@@ -102,7 +103,7 @@ jsimd_h2v1_fancy_upsample_altivec (int max_v_samp_factor,
(__vector unsigned char)outho, merge_pack_index);
vec_st(out, 16, outptr);
block = nextblock;
this0 = next0;
}
}
}
@@ -118,8 +119,8 @@ jsimd_h2v2_fancy_upsample_altivec (int max_v_samp_factor,
JSAMPROW inptr_1, inptr0, inptr1, outptr0, outptr1;
int inrow, outrow, col;
__vector unsigned char block_1, block0, block1, out;
__vector short block_1l, block_1h, block0l, block0h, block1l, block1h,
__vector unsigned char this_1, this0, this1, out;
__vector short this_1l, this_1h, this0l, this0h, this1l, this1h,
lastcolsum_1h, lastcolsum1h,
p_lastcolsum_1l, p_lastcolsum_1h, p_lastcolsum1l, p_lastcolsum1h,
thiscolsum_1l, thiscolsum_1h, thiscolsum1l, thiscolsum1h,
@@ -147,26 +148,26 @@ jsimd_h2v2_fancy_upsample_altivec (int max_v_samp_factor,
outptr0 = output_data[outrow++];
outptr1 = output_data[outrow++];
block0 = vec_ld(0, inptr0);
block0l = (__vector short)vec_mergeh(pb_zero, block0);
block0h = (__vector short)vec_mergel(pb_zero, block0);
block0l = vec_mladd(block0l, pw_three, pw_zero);
block0h = vec_mladd(block0h, pw_three, pw_zero);
this0 = vec_ld(0, inptr0);
this0l = (__vector short)vec_mergeh(pb_zero, this0);
this0h = (__vector short)vec_mergel(pb_zero, this0);
this0l = vec_mladd(this0l, pw_three, pw_zero);
this0h = vec_mladd(this0h, pw_three, pw_zero);
block_1 = vec_ld(0, inptr_1);
block_1l = (__vector short)vec_mergeh(pb_zero, block_1);
block_1h = (__vector short)vec_mergel(pb_zero, block_1);
thiscolsum_1l = vec_add(block0l, block_1l);
thiscolsum_1h = vec_add(block0h, block_1h);
this_1 = vec_ld(0, inptr_1);
this_1l = (__vector short)vec_mergeh(pb_zero, this_1);
this_1h = (__vector short)vec_mergel(pb_zero, this_1);
thiscolsum_1l = vec_add(this0l, this_1l);
thiscolsum_1h = vec_add(this0h, this_1h);
lastcolsum_1h = thiscolsum_1h;
p_lastcolsum_1l = vec_perm(thiscolsum_1l, thiscolsum_1l, last_index_col0);
p_lastcolsum_1h = vec_perm(thiscolsum_1l, thiscolsum_1h, last_index);
block1 = vec_ld(0, inptr1);
block1l = (__vector short)vec_mergeh(pb_zero, block1);
block1h = (__vector short)vec_mergel(pb_zero, block1);
thiscolsum1l = vec_add(block0l, block1l);
thiscolsum1h = vec_add(block0h, block1h);
this1 = vec_ld(0, inptr1);
this1l = (__vector short)vec_mergeh(pb_zero, this1);
this1h = (__vector short)vec_mergel(pb_zero, this1);
thiscolsum1l = vec_add(this0l, this1l);
thiscolsum1h = vec_add(this0h, this1h);
lastcolsum1h = thiscolsum1h;
p_lastcolsum1l = vec_perm(thiscolsum1l, thiscolsum1l, last_index_col0);
p_lastcolsum1h = vec_perm(thiscolsum1l, thiscolsum1h, last_index);
@@ -191,25 +192,25 @@ jsimd_h2v2_fancy_upsample_altivec (int max_v_samp_factor,
p_nextcolsum1h = vec_perm(thiscolsum1h, thiscolsum1h,
next_index_lastcol);
} else {
block0 = vec_ld(16, inptr0);
block0l = (__vector short)vec_mergeh(pb_zero, block0);
block0h = (__vector short)vec_mergel(pb_zero, block0);
block0l = vec_mladd(block0l, pw_three, pw_zero);
block0h = vec_mladd(block0h, pw_three, pw_zero);
this0 = vec_ld(16, inptr0);
this0l = (__vector short)vec_mergeh(pb_zero, this0);
this0h = (__vector short)vec_mergel(pb_zero, this0);
this0l = vec_mladd(this0l, pw_three, pw_zero);
this0h = vec_mladd(this0h, pw_three, pw_zero);
block_1 = vec_ld(16, inptr_1);
block_1l = (__vector short)vec_mergeh(pb_zero, block_1);
block_1h = (__vector short)vec_mergel(pb_zero, block_1);
nextcolsum_1l = vec_add(block0l, block_1l);
nextcolsum_1h = vec_add(block0h, block_1h);
this_1 = vec_ld(16, inptr_1);
this_1l = (__vector short)vec_mergeh(pb_zero, this_1);
this_1h = (__vector short)vec_mergel(pb_zero, this_1);
nextcolsum_1l = vec_add(this0l, this_1l);
nextcolsum_1h = vec_add(this0h, this_1h);
p_nextcolsum_1l = vec_perm(thiscolsum_1l, thiscolsum_1h, next_index);
p_nextcolsum_1h = vec_perm(thiscolsum_1h, nextcolsum_1l, next_index);
block1 = vec_ld(16, inptr1);
block1l = (__vector short)vec_mergeh(pb_zero, block1);
block1h = (__vector short)vec_mergel(pb_zero, block1);
nextcolsum1l = vec_add(block0l, block1l);
nextcolsum1h = vec_add(block0h, block1h);
this1 = vec_ld(16, inptr1);
this1l = (__vector short)vec_mergeh(pb_zero, this1);
this1h = (__vector short)vec_mergel(pb_zero, this1);
nextcolsum1l = vec_add(this0l, this1l);
nextcolsum1h = vec_add(this0h, this1h);
p_nextcolsum1l = vec_perm(thiscolsum1l, thiscolsum1h, next_index);
p_nextcolsum1h = vec_perm(thiscolsum1h, nextcolsum1l, next_index);
}

View File

@@ -56,7 +56,7 @@
\
z1 = vec_add(tmp12, tmp13); \
z1 = vec_sl(z1, pre_multiply_scale_bits); \
z1 = vec_madds(z1, pw_0707, zero); \
z1 = vec_madds(z1, pw_0707, pw_zero); \
\
out2 = vec_add(tmp13, z1); \
out6 = vec_sub(tmp13, z1); \
@@ -70,13 +70,13 @@
tmp10 = vec_sl(tmp10, pre_multiply_scale_bits); \
tmp12 = vec_sl(tmp12, pre_multiply_scale_bits); \
z5 = vec_sub(tmp10, tmp12); \
z5 = vec_madds(z5, pw_0382, zero); \
z5 = vec_madds(z5, pw_0382, pw_zero); \
\
z2 = vec_madds(tmp10, pw_0541, z5); \
z4 = vec_madds(tmp12, pw_1306, z5); \
\
tmp11 = vec_sl(tmp11, pre_multiply_scale_bits); \
z3 = vec_madds(tmp11, pw_0707, zero); \
z3 = vec_madds(tmp11, pw_0707, pw_zero); \
\
z11 = vec_add(tmp7, z3); \
z13 = vec_sub(tmp7, z3); \
@@ -98,7 +98,7 @@ jsimd_fdct_ifast_altivec (DCTELEM *data)
out0, out1, out2, out3, out4, out5, out6, out7;
/* Constants */
__vector short zero = vec_splat_s16(0),
__vector short pw_zero = { __8X(0) },
pw_0382 = { __8X(F_0_382 << CONST_SHIFT) },
pw_0541 = { __8X(F_0_541 << CONST_SHIFT) },
pw_0707 = { __8X(F_0_707 << CONST_SHIFT) },

View File

@@ -54,7 +54,7 @@
\
tmp12 = vec_sub(in##2, in##6); \
tmp12 = vec_sl(tmp12, pre_multiply_scale_bits); \
tmp12 = vec_madds(tmp12, pw_F1414, zero); \
tmp12 = vec_madds(tmp12, pw_F1414, pw_zero); \
tmp12 = vec_sub(tmp12, tmp13); \
\
tmp0 = vec_add(tmp10, tmp13); \
@@ -73,7 +73,7 @@
\
tmp11 = vec_sub(z11, z13); \
tmp11 = vec_sl(tmp11, pre_multiply_scale_bits); \
tmp11 = vec_madds(tmp11, pw_F1414, zero); \
tmp11 = vec_madds(tmp11, pw_F1414, pw_zero); \
\
tmp7 = vec_add(z11, z13); \
\
@@ -88,9 +88,9 @@
*/ \
\
z5 = vec_add(z10s, z12s); \
z5 = vec_madds(z5, pw_F1847, zero); \
z5 = vec_madds(z5, pw_F1847, pw_zero); \
\
tmp10 = vec_madds(z12s, pw_F1082, zero); \
tmp10 = vec_madds(z12s, pw_F1082, pw_zero); \
tmp10 = vec_sub(tmp10, z5); \
tmp12 = vec_madds(z10s, pw_MF1613, z5); \
tmp12 = vec_sub(tmp12, z10); \
@@ -115,6 +115,8 @@ jsimd_idct_ifast_altivec (void * dct_table_, JCOEFPTR coef_block,
JSAMPARRAY output_buf, JDIMENSION output_col)
{
short *dct_table = (short *)dct_table_;
int *outptr;
__vector short row0, row1, row2, row3, row4, row5, row6, row7,
col0, col1, col2, col3, col4, col5, col6, col7,
quant0, quant1, quant2, quant3, quant4, quant5, quant6, quant7,
@@ -122,10 +124,9 @@ jsimd_idct_ifast_altivec (void * dct_table_, JCOEFPTR coef_block,
z5, z10, z10s, z11, z12s, z13,
out0, out1, out2, out3, out4, out5, out6, out7;
__vector signed char outb;
int *outptr;
/* Constants */
__vector short zero = { __8X(0) },
__vector short pw_zero = { __8X(0) },
pw_F1414 = { __8X(F_1_414 << CONST_SHIFT) },
pw_F1847 = { __8X(F_1_847 << CONST_SHIFT) },
pw_MF1613 = { __8X(-F_1_613 << CONST_SHIFT) },
@@ -154,9 +155,9 @@ jsimd_idct_ifast_altivec (void * dct_table_, JCOEFPTR coef_block,
tmp1 = vec_or(tmp1, tmp3);
quant0 = vec_ld(0, dct_table);
col0 = vec_mladd(col0, quant0, zero);
col0 = vec_mladd(col0, quant0, pw_zero);
if (vec_all_eq(tmp1, zero)) {
if (vec_all_eq(tmp1, pw_zero)) {
/* AC terms all zero */
row0 = vec_splat(col0, 0);
@@ -178,13 +179,13 @@ jsimd_idct_ifast_altivec (void * dct_table_, JCOEFPTR coef_block,
quant6 = vec_ld(96, dct_table);
quant7 = vec_ld(112, dct_table);
col1 = vec_mladd(col1, quant1, zero);
col2 = vec_mladd(col2, quant2, zero);
col3 = vec_mladd(col3, quant3, zero);
col4 = vec_mladd(col4, quant4, zero);
col5 = vec_mladd(col5, quant5, zero);
col6 = vec_mladd(col6, quant6, zero);
col7 = vec_mladd(col7, quant7, zero);
col1 = vec_mladd(col1, quant1, pw_zero);
col2 = vec_mladd(col2, quant2, pw_zero);
col3 = vec_mladd(col3, quant3, pw_zero);
col4 = vec_mladd(col4, quant4, pw_zero);
col5 = vec_mladd(col5, quant5, pw_zero);
col6 = vec_mladd(col6, quant6, pw_zero);
col7 = vec_mladd(col7, quant7, pw_zero);
DO_IDCT(col);

View File

@@ -61,10 +61,10 @@
in##26l = vec_mergeh(in##2, in##6); \
in##26h = vec_mergel(in##2, in##6); \
\
tmp3l = vec_msums(in##26l, pw_f130_f054, zero32); \
tmp3h = vec_msums(in##26h, pw_f130_f054, zero32); \
tmp2l = vec_msums(in##26l, pw_f054_mf130, zero32); \
tmp2h = vec_msums(in##26h, pw_f054_mf130, zero32); \
tmp3l = vec_msums(in##26l, pw_f130_f054, pd_zero); \
tmp3h = vec_msums(in##26h, pw_f130_f054, pd_zero); \
tmp2l = vec_msums(in##26l, pw_f054_mf130, pd_zero); \
tmp2h = vec_msums(in##26h, pw_f054_mf130, pd_zero); \
\
tmp0 = vec_add(in##0, in##4); \
tmp1 = vec_sub(in##0, in##4); \
@@ -111,10 +111,10 @@
z34l = vec_mergeh(z3, z4); \
z34h = vec_mergel(z3, z4); \
\
z3l = vec_msums(z34l, pw_mf078_f117, zero32); \
z3h = vec_msums(z34h, pw_mf078_f117, zero32); \
z4l = vec_msums(z34l, pw_f117_f078, zero32); \
z4h = vec_msums(z34h, pw_f117_f078, zero32); \
z3l = vec_msums(z34l, pw_mf078_f117, pd_zero); \
z3h = vec_msums(z34h, pw_mf078_f117, pd_zero); \
z4l = vec_msums(z34l, pw_f117_f078, pd_zero); \
z4h = vec_msums(z34h, pw_f117_f078, pd_zero); \
\
/* (Original) \
* z1 = tmp0 + tmp3; z2 = tmp1 + tmp2; \
@@ -210,6 +210,8 @@ jsimd_idct_islow_altivec (void * dct_table_, JCOEFPTR coef_block,
JSAMPARRAY output_buf, JDIMENSION output_col)
{
short *dct_table = (short *)dct_table_;
int *outptr;
__vector short row0, row1, row2, row3, row4, row5, row6, row7,
col0, col1, col2, col3, col4, col5, col6, col7,
quant0, quant1, quant2, quant3, quant4, quant5, quant6, quant7,
@@ -223,10 +225,9 @@ jsimd_idct_islow_altivec (void * dct_table_, JCOEFPTR coef_block,
out0l, out0h, out1l, out1h, out2l, out2h, out3l, out3h, out4l, out4h,
out5l, out5h, out6l, out6h, out7l, out7h;
__vector signed char outb;
int *outptr;
/* Constants */
__vector short zero16 = { __8X(0) },
__vector short pw_zero = { __8X(0) },
pw_f130_f054 = { __4X2(F_0_541 + F_0_765, F_0_541) },
pw_f054_mf130 = { __4X2(F_0_541, F_0_541 - F_1_847) },
pw_mf078_f117 = { __4X2(F_1_175 - F_1_961, F_1_175) },
@@ -236,7 +237,7 @@ jsimd_idct_islow_altivec (void * dct_table_, JCOEFPTR coef_block,
pw_mf050_mf256 = { __4X2(F_2_053 - F_2_562, -F_2_562) },
pw_mf256_f050 = { __4X2(-F_2_562, F_3_072 - F_2_562) };
__vector unsigned short pass1_bits = { __8X(PASS1_BITS) };
__vector int zero32 = { __4X(0) },
__vector int pd_zero = { __4X(0) },
pd_descale_p1 = { __4X(1 << (DESCALE_P1 - 1)) },
pd_descale_p2 = { __4X(1 << (DESCALE_P2 - 1)) };
__vector unsigned int descale_p1 = { __4X(DESCALE_P1) },
@@ -263,9 +264,9 @@ jsimd_idct_islow_altivec (void * dct_table_, JCOEFPTR coef_block,
tmp1 = vec_or(tmp1, tmp3);
quant0 = vec_ld(0, dct_table);
col0 = vec_mladd(col0, quant0, zero16);
col0 = vec_mladd(col0, quant0, pw_zero);
if (vec_all_eq(tmp1, zero16)) {
if (vec_all_eq(tmp1, pw_zero)) {
/* AC terms all zero */
col0 = vec_sl(col0, pass1_bits);
@@ -289,13 +290,13 @@ jsimd_idct_islow_altivec (void * dct_table_, JCOEFPTR coef_block,
quant6 = vec_ld(96, dct_table);
quant7 = vec_ld(112, dct_table);
col1 = vec_mladd(col1, quant1, zero16);
col2 = vec_mladd(col2, quant2, zero16);
col3 = vec_mladd(col3, quant3, zero16);
col4 = vec_mladd(col4, quant4, zero16);
col5 = vec_mladd(col5, quant5, zero16);
col6 = vec_mladd(col6, quant6, zero16);
col7 = vec_mladd(col7, quant7, zero16);
col1 = vec_mladd(col1, quant1, pw_zero);
col2 = vec_mladd(col2, quant2, pw_zero);
col3 = vec_mladd(col3, quant3, pw_zero);
col4 = vec_mladd(col4, quant4, pw_zero);
col5 = vec_mladd(col5, quant5, pw_zero);
col6 = vec_mladd(col6, quant6, pw_zero);
col7 = vec_mladd(col7, quant7, pw_zero);
DO_IDCT(col, 1);

View File

@@ -42,12 +42,13 @@ jsimd_convsamp_altivec (JSAMPARRAY sample_data, JDIMENSION start_col,
DCTELEM * workspace)
{
JSAMPROW elemptr;
__vector unsigned char in0, in1, in2, in3, in4, in5, in6, in7;
__vector short out0, out1, out2, out3, out4, out5, out6, out7;
/* Constants */
__vector short pw_centerjsamp = { __8X(CENTERJSAMPLE) };
__vector unsigned char zero = { __16X(0) };
__vector unsigned char pb_zero = { __16X(0) };
LOAD_ROW(0);
LOAD_ROW(1);
@@ -58,14 +59,14 @@ jsimd_convsamp_altivec (JSAMPARRAY sample_data, JDIMENSION start_col,
LOAD_ROW(6);
LOAD_ROW(7);
out0 = (__vector short)vec_mergeh(zero, in0);
out1 = (__vector short)vec_mergeh(zero, in1);
out2 = (__vector short)vec_mergeh(zero, in2);
out3 = (__vector short)vec_mergeh(zero, in3);
out4 = (__vector short)vec_mergeh(zero, in4);
out5 = (__vector short)vec_mergeh(zero, in5);
out6 = (__vector short)vec_mergeh(zero, in6);
out7 = (__vector short)vec_mergeh(zero, in7);
out0 = (__vector short)vec_mergeh(pb_zero, in0);
out1 = (__vector short)vec_mergeh(pb_zero, in1);
out2 = (__vector short)vec_mergeh(pb_zero, in2);
out3 = (__vector short)vec_mergeh(pb_zero, in3);
out4 = (__vector short)vec_mergeh(pb_zero, in4);
out5 = (__vector short)vec_mergeh(pb_zero, in5);
out6 = (__vector short)vec_mergeh(pb_zero, in6);
out7 = (__vector short)vec_mergeh(pb_zero, in7);
out0 = vec_sub(out0, pw_centerjsamp);
out1 = vec_sub(out1, pw_centerjsamp);
@@ -89,7 +90,8 @@ jsimd_convsamp_altivec (JSAMPARRAY sample_data, JDIMENSION start_col,
#define WORD_BIT 16
/* There is no AltiVec unsigned multiply instruction, hence this. */
/* There is no AltiVec 16-bit unsigned multiply instruction, hence this.
We basically need an unsigned equivalent of vec_madds(). */
#define MULTIPLY(vs0, vs1, out) { \
tmpe = vec_mule((__vector unsigned short)vs0, \
@@ -105,13 +107,11 @@ void
jsimd_quantize_altivec (JCOEFPTR coef_block, DCTELEM * divisors,
DCTELEM * workspace)
{
__vector short row0, row1, row2, row3, row4, row5, row6, row7;
__vector short row0s, row1s, row2s, row3s, row4s, row5s, row6s, row7s;
__vector short corr0, corr1, corr2, corr3, corr4, corr5, corr6, corr7;
__vector short recip0, recip1, recip2, recip3, recip4, recip5, recip6,
recip7;
__vector short scale0, scale1, scale2, scale3, scale4, scale5, scale6,
scale7;
__vector short row0, row1, row2, row3, row4, row5, row6, row7,
row0s, row1s, row2s, row3s, row4s, row5s, row6s, row7s,
corr0, corr1, corr2, corr3, corr4, corr5, corr6, corr7,
recip0, recip1, recip2, recip3, recip4, recip5, recip6, recip7,
scale0, scale1, scale2, scale3, scale4, scale5, scale6, scale7;
__vector unsigned int tmpe, tmpo;
/* Constants */