De-confusify the variable names a bit -- "out" represents the output of the IDCT kernel, so use "final" to represent the packed data that will be stored to memory.

git-svn-id: svn+ssh://svn.code.sf.net/p/libjpeg-turbo/code/trunk@1506 632fc199-4ca6-4c93-a231-07263d6284db
This commit is contained in:
DRC
2015-01-15 08:51:31 +00:00
parent 2ab5ea6c48
commit c4e3c361d7
2 changed files with 70 additions and 68 deletions

View File

@@ -119,11 +119,12 @@ jsimd_idct_ifast_altivec (void * dct_table_, JCOEFPTR coef_block,
__vector short row0, row1, row2, row3, row4, row5, row6, row7,
col0, col1, col2, col3, col4, col5, col6, col7,
final0, final1, final2, final3, final4, final5, final6, final7,
quant0, quant1, quant2, quant3, quant4, quant5, quant6, quant7,
tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7, tmp10, tmp11, tmp12, tmp13,
z5, z10, z10s, z11, z12s, z13,
out0, out1, out2, out3, out4, out5, out6, out7;
__vector signed char outb;
__vector signed char final;
/* Constants */
__vector short pw_zero = { __8X(0) },
@@ -205,53 +206,53 @@ jsimd_idct_ifast_altivec (void * dct_table_, JCOEFPTR coef_block,
out6 = vec_sra(out6, pass1_bits3);
out7 = vec_sra(out7, pass1_bits3);
TRANSPOSE(out, col);
TRANSPOSE(out, final);
outb = vec_packs(col0, col0);
outb = vec_add(outb, pb_centerjsamp);
final = vec_packs(final0, final0);
final = vec_add(final, pb_centerjsamp);
outptr = (int *)(output_buf[0] + output_col);
vec_ste((__vector int)outb, 0, outptr);
vec_ste((__vector int)outb, 4, outptr);
vec_ste((__vector int)final, 0, outptr);
vec_ste((__vector int)final, 4, outptr);
outb = vec_packs(col1, col1);
outb = vec_add(outb, pb_centerjsamp);
final = vec_packs(final1, final1);
final = vec_add(final, pb_centerjsamp);
outptr = (int *)(output_buf[1] + output_col);
vec_ste((__vector int)outb, 0, outptr);
vec_ste((__vector int)outb, 4, outptr);
vec_ste((__vector int)final, 0, outptr);
vec_ste((__vector int)final, 4, outptr);
outb = vec_packs(col2, col2);
outb = vec_add(outb, pb_centerjsamp);
final = vec_packs(final2, final2);
final = vec_add(final, pb_centerjsamp);
outptr = (int *)(output_buf[2] + output_col);
vec_ste((__vector int)outb, 0, outptr);
vec_ste((__vector int)outb, 4, outptr);
vec_ste((__vector int)final, 0, outptr);
vec_ste((__vector int)final, 4, outptr);
outb = vec_packs(col3, col3);
outb = vec_add(outb, pb_centerjsamp);
final = vec_packs(final3, final3);
final = vec_add(final, pb_centerjsamp);
outptr = (int *)(output_buf[3] + output_col);
vec_ste((__vector int)outb, 0, outptr);
vec_ste((__vector int)outb, 4, outptr);
vec_ste((__vector int)final, 0, outptr);
vec_ste((__vector int)final, 4, outptr);
outb = vec_packs(col4, col4);
outb = vec_add(outb, pb_centerjsamp);
final = vec_packs(final4, final4);
final = vec_add(final, pb_centerjsamp);
outptr = (int *)(output_buf[4] + output_col);
vec_ste((__vector int)outb, 0, outptr);
vec_ste((__vector int)outb, 4, outptr);
vec_ste((__vector int)final, 0, outptr);
vec_ste((__vector int)final, 4, outptr);
outb = vec_packs(col5, col5);
outb = vec_add(outb, pb_centerjsamp);
final = vec_packs(final5, final5);
final = vec_add(final, pb_centerjsamp);
outptr = (int *)(output_buf[5] + output_col);
vec_ste((__vector int)outb, 0, outptr);
vec_ste((__vector int)outb, 4, outptr);
vec_ste((__vector int)final, 0, outptr);
vec_ste((__vector int)final, 4, outptr);
outb = vec_packs(col6, col6);
outb = vec_add(outb, pb_centerjsamp);
final = vec_packs(final6, final6);
final = vec_add(final, pb_centerjsamp);
outptr = (int *)(output_buf[6] + output_col);
vec_ste((__vector int)outb, 0, outptr);
vec_ste((__vector int)outb, 4, outptr);
vec_ste((__vector int)final, 0, outptr);
vec_ste((__vector int)final, 4, outptr);
outb = vec_packs(col7, col7);
outb = vec_add(outb, pb_centerjsamp);
final = vec_packs(final7, final7);
final = vec_add(final, pb_centerjsamp);
outptr = (int *)(output_buf[7] + output_col);
vec_ste((__vector int)outb, 0, outptr);
vec_ste((__vector int)outb, 4, outptr);
vec_ste((__vector int)final, 0, outptr);
vec_ste((__vector int)final, 4, outptr);
}

View File

@@ -214,6 +214,7 @@ jsimd_idct_islow_altivec (void * dct_table_, JCOEFPTR coef_block,
__vector short row0, row1, row2, row3, row4, row5, row6, row7,
col0, col1, col2, col3, col4, col5, col6, col7,
final0, final1, final2, final3, final4, final5, final6, final7,
quant0, quant1, quant2, quant3, quant4, quant5, quant6, quant7,
tmp0, tmp1, tmp2, tmp3, z3, z4,
z34l, z34h, col71l, col71h, col26l, col26h, col53l, col53h,
@@ -224,7 +225,7 @@ jsimd_idct_islow_altivec (void * dct_table_, JCOEFPTR coef_block,
z3l, z3h, z4l, z4h,
out0l, out0h, out1l, out1h, out2l, out2h, out3l, out3h, out4l, out4h,
out5l, out5h, out6l, out6h, out7l, out7h;
__vector signed char outb;
__vector signed char final;
/* Constants */
__vector short pw_zero = { __8X(0) },
@@ -307,53 +308,53 @@ jsimd_idct_islow_altivec (void * dct_table_, JCOEFPTR coef_block,
DO_IDCT(row, 2);
TRANSPOSE(out, col);
TRANSPOSE(out, final);
outb = vec_packs(col0, col0);
outb = vec_add(outb, pb_centerjsamp);
final = vec_packs(final0, final0);
final = vec_add(final, pb_centerjsamp);
outptr = (int *)(output_buf[0] + output_col);
vec_ste((__vector int)outb, 0, outptr);
vec_ste((__vector int)outb, 4, outptr);
vec_ste((__vector int)final, 0, outptr);
vec_ste((__vector int)final, 4, outptr);
outb = vec_packs(col1, col1);
outb = vec_add(outb, pb_centerjsamp);
final = vec_packs(final1, final1);
final = vec_add(final, pb_centerjsamp);
outptr = (int *)(output_buf[1] + output_col);
vec_ste((__vector int)outb, 0, outptr);
vec_ste((__vector int)outb, 4, outptr);
vec_ste((__vector int)final, 0, outptr);
vec_ste((__vector int)final, 4, outptr);
outb = vec_packs(col2, col2);
outb = vec_add(outb, pb_centerjsamp);
final = vec_packs(final2, final2);
final = vec_add(final, pb_centerjsamp);
outptr = (int *)(output_buf[2] + output_col);
vec_ste((__vector int)outb, 0, outptr);
vec_ste((__vector int)outb, 4, outptr);
vec_ste((__vector int)final, 0, outptr);
vec_ste((__vector int)final, 4, outptr);
outb = vec_packs(col3, col3);
outb = vec_add(outb, pb_centerjsamp);
final = vec_packs(final3, final3);
final = vec_add(final, pb_centerjsamp);
outptr = (int *)(output_buf[3] + output_col);
vec_ste((__vector int)outb, 0, outptr);
vec_ste((__vector int)outb, 4, outptr);
vec_ste((__vector int)final, 0, outptr);
vec_ste((__vector int)final, 4, outptr);
outb = vec_packs(col4, col4);
outb = vec_add(outb, pb_centerjsamp);
final = vec_packs(final4, final4);
final = vec_add(final, pb_centerjsamp);
outptr = (int *)(output_buf[4] + output_col);
vec_ste((__vector int)outb, 0, outptr);
vec_ste((__vector int)outb, 4, outptr);
vec_ste((__vector int)final, 0, outptr);
vec_ste((__vector int)final, 4, outptr);
outb = vec_packs(col5, col5);
outb = vec_add(outb, pb_centerjsamp);
final = vec_packs(final5, final5);
final = vec_add(final, pb_centerjsamp);
outptr = (int *)(output_buf[5] + output_col);
vec_ste((__vector int)outb, 0, outptr);
vec_ste((__vector int)outb, 4, outptr);
vec_ste((__vector int)final, 0, outptr);
vec_ste((__vector int)final, 4, outptr);
outb = vec_packs(col6, col6);
outb = vec_add(outb, pb_centerjsamp);
final = vec_packs(final6, final6);
final = vec_add(final, pb_centerjsamp);
outptr = (int *)(output_buf[6] + output_col);
vec_ste((__vector int)outb, 0, outptr);
vec_ste((__vector int)outb, 4, outptr);
vec_ste((__vector int)final, 0, outptr);
vec_ste((__vector int)final, 4, outptr);
outb = vec_packs(col7, col7);
outb = vec_add(outb, pb_centerjsamp);
final = vec_packs(final7, final7);
final = vec_add(final, pb_centerjsamp);
outptr = (int *)(output_buf[7] + output_col);
vec_ste((__vector int)outb, 0, outptr);
vec_ste((__vector int)outb, 4, outptr);
vec_ste((__vector int)final, 0, outptr);
vec_ste((__vector int)final, 4, outptr);
}