diff --git a/simd/jidctfst-altivec.c b/simd/jidctfst-altivec.c index fd7a2a34..9779a6ac 100644 --- a/simd/jidctfst-altivec.c +++ b/simd/jidctfst-altivec.c @@ -119,11 +119,12 @@ jsimd_idct_ifast_altivec (void * dct_table_, JCOEFPTR coef_block, __vector short row0, row1, row2, row3, row4, row5, row6, row7, col0, col1, col2, col3, col4, col5, col6, col7, + final0, final1, final2, final3, final4, final5, final6, final7, quant0, quant1, quant2, quant3, quant4, quant5, quant6, quant7, tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7, tmp10, tmp11, tmp12, tmp13, z5, z10, z10s, z11, z12s, z13, out0, out1, out2, out3, out4, out5, out6, out7; - __vector signed char outb; + __vector signed char final; /* Constants */ __vector short pw_zero = { __8X(0) }, @@ -205,53 +206,53 @@ jsimd_idct_ifast_altivec (void * dct_table_, JCOEFPTR coef_block, out6 = vec_sra(out6, pass1_bits3); out7 = vec_sra(out7, pass1_bits3); - TRANSPOSE(out, col); + TRANSPOSE(out, final); - outb = vec_packs(col0, col0); - outb = vec_add(outb, pb_centerjsamp); + final = vec_packs(final0, final0); + final = vec_add(final, pb_centerjsamp); outptr = (int *)(output_buf[0] + output_col); - vec_ste((__vector int)outb, 0, outptr); - vec_ste((__vector int)outb, 4, outptr); + vec_ste((__vector int)final, 0, outptr); + vec_ste((__vector int)final, 4, outptr); - outb = vec_packs(col1, col1); - outb = vec_add(outb, pb_centerjsamp); + final = vec_packs(final1, final1); + final = vec_add(final, pb_centerjsamp); outptr = (int *)(output_buf[1] + output_col); - vec_ste((__vector int)outb, 0, outptr); - vec_ste((__vector int)outb, 4, outptr); + vec_ste((__vector int)final, 0, outptr); + vec_ste((__vector int)final, 4, outptr); - outb = vec_packs(col2, col2); - outb = vec_add(outb, pb_centerjsamp); + final = vec_packs(final2, final2); + final = vec_add(final, pb_centerjsamp); outptr = (int *)(output_buf[2] + output_col); - vec_ste((__vector int)outb, 0, outptr); - vec_ste((__vector int)outb, 4, outptr); + vec_ste((__vector int)final, 0, outptr); + vec_ste((__vector int)final, 4, outptr); - outb = vec_packs(col3, col3); - outb = vec_add(outb, pb_centerjsamp); + final = vec_packs(final3, final3); + final = vec_add(final, pb_centerjsamp); outptr = (int *)(output_buf[3] + output_col); - vec_ste((__vector int)outb, 0, outptr); - vec_ste((__vector int)outb, 4, outptr); + vec_ste((__vector int)final, 0, outptr); + vec_ste((__vector int)final, 4, outptr); - outb = vec_packs(col4, col4); - outb = vec_add(outb, pb_centerjsamp); + final = vec_packs(final4, final4); + final = vec_add(final, pb_centerjsamp); outptr = (int *)(output_buf[4] + output_col); - vec_ste((__vector int)outb, 0, outptr); - vec_ste((__vector int)outb, 4, outptr); + vec_ste((__vector int)final, 0, outptr); + vec_ste((__vector int)final, 4, outptr); - outb = vec_packs(col5, col5); - outb = vec_add(outb, pb_centerjsamp); + final = vec_packs(final5, final5); + final = vec_add(final, pb_centerjsamp); outptr = (int *)(output_buf[5] + output_col); - vec_ste((__vector int)outb, 0, outptr); - vec_ste((__vector int)outb, 4, outptr); + vec_ste((__vector int)final, 0, outptr); + vec_ste((__vector int)final, 4, outptr); - outb = vec_packs(col6, col6); - outb = vec_add(outb, pb_centerjsamp); + final = vec_packs(final6, final6); + final = vec_add(final, pb_centerjsamp); outptr = (int *)(output_buf[6] + output_col); - vec_ste((__vector int)outb, 0, outptr); - vec_ste((__vector int)outb, 4, outptr); + vec_ste((__vector int)final, 0, outptr); + vec_ste((__vector int)final, 4, outptr); - outb = vec_packs(col7, col7); - outb = vec_add(outb, pb_centerjsamp); + final = vec_packs(final7, final7); + final = vec_add(final, pb_centerjsamp); outptr = (int *)(output_buf[7] + output_col); - vec_ste((__vector int)outb, 0, outptr); - vec_ste((__vector int)outb, 4, outptr); + vec_ste((__vector int)final, 0, outptr); + vec_ste((__vector int)final, 4, outptr); } diff --git a/simd/jidctint-altivec.c b/simd/jidctint-altivec.c index 7f0f8d0d..a7b6f00e 100644 --- a/simd/jidctint-altivec.c +++ b/simd/jidctint-altivec.c @@ -214,6 +214,7 @@ jsimd_idct_islow_altivec (void * dct_table_, JCOEFPTR coef_block, __vector short row0, row1, row2, row3, row4, row5, row6, row7, col0, col1, col2, col3, col4, col5, col6, col7, + final0, final1, final2, final3, final4, final5, final6, final7, quant0, quant1, quant2, quant3, quant4, quant5, quant6, quant7, tmp0, tmp1, tmp2, tmp3, z3, z4, z34l, z34h, col71l, col71h, col26l, col26h, col53l, col53h, @@ -224,7 +225,7 @@ jsimd_idct_islow_altivec (void * dct_table_, JCOEFPTR coef_block, z3l, z3h, z4l, z4h, out0l, out0h, out1l, out1h, out2l, out2h, out3l, out3h, out4l, out4h, out5l, out5h, out6l, out6h, out7l, out7h; - __vector signed char outb; + __vector signed char final; /* Constants */ __vector short pw_zero = { __8X(0) }, @@ -307,53 +308,53 @@ jsimd_idct_islow_altivec (void * dct_table_, JCOEFPTR coef_block, DO_IDCT(row, 2); - TRANSPOSE(out, col); + TRANSPOSE(out, final); - outb = vec_packs(col0, col0); - outb = vec_add(outb, pb_centerjsamp); + final = vec_packs(final0, final0); + final = vec_add(final, pb_centerjsamp); outptr = (int *)(output_buf[0] + output_col); - vec_ste((__vector int)outb, 0, outptr); - vec_ste((__vector int)outb, 4, outptr); + vec_ste((__vector int)final, 0, outptr); + vec_ste((__vector int)final, 4, outptr); - outb = vec_packs(col1, col1); - outb = vec_add(outb, pb_centerjsamp); + final = vec_packs(final1, final1); + final = vec_add(final, pb_centerjsamp); outptr = (int *)(output_buf[1] + output_col); - vec_ste((__vector int)outb, 0, outptr); - vec_ste((__vector int)outb, 4, outptr); + vec_ste((__vector int)final, 0, outptr); + vec_ste((__vector int)final, 4, outptr); - outb = vec_packs(col2, col2); - outb = vec_add(outb, pb_centerjsamp); + final = vec_packs(final2, final2); + final = vec_add(final, pb_centerjsamp); outptr = (int *)(output_buf[2] + output_col); - vec_ste((__vector int)outb, 0, outptr); - vec_ste((__vector int)outb, 4, outptr); + vec_ste((__vector int)final, 0, outptr); + vec_ste((__vector int)final, 4, outptr); - outb = vec_packs(col3, col3); - outb = vec_add(outb, pb_centerjsamp); + final = vec_packs(final3, final3); + final = vec_add(final, pb_centerjsamp); outptr = (int *)(output_buf[3] + output_col); - vec_ste((__vector int)outb, 0, outptr); - vec_ste((__vector int)outb, 4, outptr); + vec_ste((__vector int)final, 0, outptr); + vec_ste((__vector int)final, 4, outptr); - outb = vec_packs(col4, col4); - outb = vec_add(outb, pb_centerjsamp); + final = vec_packs(final4, final4); + final = vec_add(final, pb_centerjsamp); outptr = (int *)(output_buf[4] + output_col); - vec_ste((__vector int)outb, 0, outptr); - vec_ste((__vector int)outb, 4, outptr); + vec_ste((__vector int)final, 0, outptr); + vec_ste((__vector int)final, 4, outptr); - outb = vec_packs(col5, col5); - outb = vec_add(outb, pb_centerjsamp); + final = vec_packs(final5, final5); + final = vec_add(final, pb_centerjsamp); outptr = (int *)(output_buf[5] + output_col); - vec_ste((__vector int)outb, 0, outptr); - vec_ste((__vector int)outb, 4, outptr); + vec_ste((__vector int)final, 0, outptr); + vec_ste((__vector int)final, 4, outptr); - outb = vec_packs(col6, col6); - outb = vec_add(outb, pb_centerjsamp); + final = vec_packs(final6, final6); + final = vec_add(final, pb_centerjsamp); outptr = (int *)(output_buf[6] + output_col); - vec_ste((__vector int)outb, 0, outptr); - vec_ste((__vector int)outb, 4, outptr); + vec_ste((__vector int)final, 0, outptr); + vec_ste((__vector int)final, 4, outptr); - outb = vec_packs(col7, col7); - outb = vec_add(outb, pb_centerjsamp); + final = vec_packs(final7, final7); + final = vec_add(final, pb_centerjsamp); outptr = (int *)(output_buf[7] + output_col); - vec_ste((__vector int)outb, 0, outptr); - vec_ste((__vector int)outb, 4, outptr); + vec_ste((__vector int)final, 0, outptr); + vec_ste((__vector int)final, 4, outptr); }