Simplify the code somewhat. It actually wasn't necessary to have a "fast path" and a "medium path"-- they perform the same.
git-svn-id: svn+ssh://svn.code.sf.net/p/libjpeg-turbo/code/trunk@1486 632fc199-4ca6-4c93-a231-07263d6284db
This commit is contained in:
@@ -70,27 +70,13 @@ void jsimd_rgb_ycc_convert_altivec (JDIMENSION img_width, JSAMPARRAY input_buf,
|
|||||||
__vector unsigned char unaligned_shift_index;
|
__vector unsigned char unaligned_shift_index;
|
||||||
int bytes = num_cols + offset;
|
int bytes = num_cols + offset;
|
||||||
|
|
||||||
if (bytes >= (RGB_PIXELSIZE + 1) * 16) {
|
if (bytes < (RGB_PIXELSIZE + 1) * 16 && (bytes & 15)) {
|
||||||
/* Fast path -- we have enough buffer space to load all vectors.
|
|
||||||
* Even if we don't need them all, this is faster than narrowing
|
|
||||||
* down which ones we need.
|
|
||||||
*/
|
|
||||||
rgb0 = vec_ld(0, inptr);
|
|
||||||
rgb1 = vec_ld(16, inptr);
|
|
||||||
rgb2 = vec_ld(32, inptr);
|
|
||||||
rgb3 = vec_ld(48, inptr);
|
|
||||||
#if RGB_PIXELSIZE == 4
|
|
||||||
rgb4 = vec_ld(64, inptr);
|
|
||||||
#endif
|
|
||||||
} else {
|
|
||||||
if (bytes & 15) {
|
|
||||||
/* Slow path to prevent buffer overread. Since there is no way to
|
/* Slow path to prevent buffer overread. Since there is no way to
|
||||||
* read a partial AltiVec register, overread would occur on the
|
* read a partial AltiVec register, overread would occur on the last
|
||||||
* last chunk of the last image row if the right edge is not on a
|
* chunk of the last image row if the right edge is not on a 16-byte
|
||||||
* 16-byte boundary. It could also occur on other rows if the
|
* 16-byte boundary. It could also occur on other rows if the bytes
|
||||||
* bytes per row is low enough. Since we can't determine whether
|
* per row is low enough. Since we can't determine whether we're on
|
||||||
* we're on the last image row, we have to assume every row is the
|
* the last image row, we have to assume every row is the last.
|
||||||
* last.
|
|
||||||
*/
|
*/
|
||||||
memcpy(tmpbuf, inptr, min(num_cols, RGB_PIXELSIZE * 16));
|
memcpy(tmpbuf, inptr, min(num_cols, RGB_PIXELSIZE * 16));
|
||||||
rgb0 = vec_ld(0, tmpbuf);
|
rgb0 = vec_ld(0, tmpbuf);
|
||||||
@@ -99,28 +85,19 @@ void jsimd_rgb_ycc_convert_altivec (JDIMENSION img_width, JSAMPARRAY input_buf,
|
|||||||
#if RGB_PIXELSIZE == 4
|
#if RGB_PIXELSIZE == 4
|
||||||
rgb3 = vec_ld(48, tmpbuf);
|
rgb3 = vec_ld(48, tmpbuf);
|
||||||
#endif
|
#endif
|
||||||
goto start; /* Skip permutation */
|
|
||||||
} else {
|
} else {
|
||||||
/* Medium path -- if the right edge is vector-aligned, then we can
|
/* Fast path */
|
||||||
* read full vectors (but with a lot of branches.)
|
|
||||||
*/
|
|
||||||
rgb0 = vec_ld(0, inptr);
|
rgb0 = vec_ld(0, inptr);
|
||||||
if (bytes > 16) {
|
if (bytes > 16)
|
||||||
rgb1 = vec_ld(16, inptr);
|
rgb1 = vec_ld(16, inptr);
|
||||||
if (bytes > 32) {
|
if (bytes > 32)
|
||||||
rgb2 = vec_ld(32, inptr);
|
rgb2 = vec_ld(32, inptr);
|
||||||
if (bytes > 48) {
|
if (bytes > 48)
|
||||||
rgb3 = vec_ld(48, inptr);
|
rgb3 = vec_ld(48, inptr);
|
||||||
#if RGB_PIXELSIZE == 4
|
#if RGB_PIXELSIZE == 4
|
||||||
if (bytes > 64)
|
if (bytes > 64)
|
||||||
rgb4 = vec_ld(64, inptr);
|
rgb4 = vec_ld(64, inptr);
|
||||||
#endif
|
#endif
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
unaligned_shift_index = vec_lvsl(0, inptr);
|
unaligned_shift_index = vec_lvsl(0, inptr);
|
||||||
rgb0 = vec_perm(rgb0, rgb1, unaligned_shift_index);
|
rgb0 = vec_perm(rgb0, rgb1, unaligned_shift_index);
|
||||||
rgb1 = vec_perm(rgb1, rgb2, unaligned_shift_index);
|
rgb1 = vec_perm(rgb1, rgb2, unaligned_shift_index);
|
||||||
@@ -128,17 +105,9 @@ void jsimd_rgb_ycc_convert_altivec (JDIMENSION img_width, JSAMPARRAY input_buf,
|
|||||||
#if RGB_PIXELSIZE == 4
|
#if RGB_PIXELSIZE == 4
|
||||||
rgb3 = vec_perm(rgb3, rgb4, unaligned_shift_index);
|
rgb3 = vec_perm(rgb3, rgb4, unaligned_shift_index);
|
||||||
#endif
|
#endif
|
||||||
|
}
|
||||||
} else {
|
} else {
|
||||||
if (num_cols >= RGB_PIXELSIZE * 16) {
|
if (num_cols < RGB_PIXELSIZE * 16 && (num_cols & 15)) {
|
||||||
/* Fast path */
|
|
||||||
rgb0 = vec_ld(0, inptr);
|
|
||||||
rgb1 = vec_ld(16, inptr);
|
|
||||||
rgb2 = vec_ld(32, inptr);
|
|
||||||
#if RGB_PIXELSIZE == 4
|
|
||||||
rgb3 = vec_ld(48, inptr);
|
|
||||||
#endif
|
|
||||||
} else {
|
|
||||||
if (num_cols & 15) {
|
|
||||||
/* Slow path */
|
/* Slow path */
|
||||||
memcpy(tmpbuf, inptr, min(num_cols, RGB_PIXELSIZE * 16));
|
memcpy(tmpbuf, inptr, min(num_cols, RGB_PIXELSIZE * 16));
|
||||||
rgb0 = vec_ld(0, tmpbuf);
|
rgb0 = vec_ld(0, tmpbuf);
|
||||||
@@ -148,11 +117,11 @@ void jsimd_rgb_ycc_convert_altivec (JDIMENSION img_width, JSAMPARRAY input_buf,
|
|||||||
rgb3 = vec_ld(48, tmpbuf);
|
rgb3 = vec_ld(48, tmpbuf);
|
||||||
#endif
|
#endif
|
||||||
} else {
|
} else {
|
||||||
/* Medium path */
|
/* Fast path */
|
||||||
rgb0 = vec_ld(0, inptr);
|
rgb0 = vec_ld(0, inptr);
|
||||||
if (num_cols > 16) {
|
if (num_cols > 16)
|
||||||
rgb1 = vec_ld(16, inptr);
|
rgb1 = vec_ld(16, inptr);
|
||||||
if (num_cols > 32) {
|
if (num_cols > 32)
|
||||||
rgb2 = vec_ld(32, inptr);
|
rgb2 = vec_ld(32, inptr);
|
||||||
#if RGB_PIXELSIZE == 4
|
#if RGB_PIXELSIZE == 4
|
||||||
if (num_cols > 48)
|
if (num_cols > 48)
|
||||||
@@ -160,11 +129,7 @@ void jsimd_rgb_ycc_convert_altivec (JDIMENSION img_width, JSAMPARRAY input_buf,
|
|||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
start:
|
|
||||||
#if RGB_PIXELSIZE == 3
|
#if RGB_PIXELSIZE == 3
|
||||||
/* rgb0 = R0 G0 B0 R1 G1 B1 R2 G2 B2 R3 G3 B3 R4 G4 B4 R5
|
/* rgb0 = R0 G0 B0 R1 G1 B1 R2 G2 B2 R3 G3 B3 R4 G4 B4 R5
|
||||||
* rgb1 = G5 B5 R6 G6 B6 R7 G7 B7 R8 G8 B8 R9 G9 B9 Ra Ga
|
* rgb1 = G5 B5 R6 G6 B6 R7 G7 B7 R8 G8 B8 R9 G9 B9 Ra Ga
|
||||||
|
|||||||
@@ -65,27 +65,13 @@ void jsimd_rgb_gray_convert_altivec (JDIMENSION img_width,
|
|||||||
__vector unsigned char unaligned_shift_index;
|
__vector unsigned char unaligned_shift_index;
|
||||||
int bytes = num_cols + offset;
|
int bytes = num_cols + offset;
|
||||||
|
|
||||||
if (bytes >= (RGB_PIXELSIZE + 1) * 16) {
|
if (bytes < (RGB_PIXELSIZE + 1) * 16 && (bytes & 15)) {
|
||||||
/* Fast path -- we have enough buffer space to load all vectors.
|
|
||||||
* Even if we don't need them all, this is faster than narrowing
|
|
||||||
* down which ones we need.
|
|
||||||
*/
|
|
||||||
rgb0 = vec_ld(0, inptr);
|
|
||||||
rgb1 = vec_ld(16, inptr);
|
|
||||||
rgb2 = vec_ld(32, inptr);
|
|
||||||
rgb3 = vec_ld(48, inptr);
|
|
||||||
#if RGB_PIXELSIZE == 4
|
|
||||||
rgb4 = vec_ld(64, inptr);
|
|
||||||
#endif
|
|
||||||
} else {
|
|
||||||
if (bytes & 15) {
|
|
||||||
/* Slow path to prevent buffer overread. Since there is no way to
|
/* Slow path to prevent buffer overread. Since there is no way to
|
||||||
* read a partial AltiVec register, overread would occur on the
|
* read a partial AltiVec register, overread would occur on the last
|
||||||
* last chunk of the last image row if the right edge is not on a
|
* chunk of the last image row if the right edge is not on a 16-byte
|
||||||
* 16-byte boundary. It could also occur on other rows if the
|
* 16-byte boundary. It could also occur on other rows if the bytes
|
||||||
* bytes per row is low enough. Since we can't determine whether
|
* per row is low enough. Since we can't determine whether we're on
|
||||||
* we're on the last image row, we have to assume every row is the
|
* the last image row, we have to assume every row is the last.
|
||||||
* last.
|
|
||||||
*/
|
*/
|
||||||
memcpy(tmpbuf, inptr, min(num_cols, RGB_PIXELSIZE * 16));
|
memcpy(tmpbuf, inptr, min(num_cols, RGB_PIXELSIZE * 16));
|
||||||
rgb0 = vec_ld(0, tmpbuf);
|
rgb0 = vec_ld(0, tmpbuf);
|
||||||
@@ -94,28 +80,19 @@ void jsimd_rgb_gray_convert_altivec (JDIMENSION img_width,
|
|||||||
#if RGB_PIXELSIZE == 4
|
#if RGB_PIXELSIZE == 4
|
||||||
rgb3 = vec_ld(48, tmpbuf);
|
rgb3 = vec_ld(48, tmpbuf);
|
||||||
#endif
|
#endif
|
||||||
goto start; /* Skip permutation */
|
|
||||||
} else {
|
} else {
|
||||||
/* Medium path -- if the right edge is vector-aligned, then we can
|
/* Fast path */
|
||||||
* read full vectors (but with a lot of branches.)
|
|
||||||
*/
|
|
||||||
rgb0 = vec_ld(0, inptr);
|
rgb0 = vec_ld(0, inptr);
|
||||||
if (bytes > 16) {
|
if (bytes > 16)
|
||||||
rgb1 = vec_ld(16, inptr);
|
rgb1 = vec_ld(16, inptr);
|
||||||
if (bytes > 32) {
|
if (bytes > 32)
|
||||||
rgb2 = vec_ld(32, inptr);
|
rgb2 = vec_ld(32, inptr);
|
||||||
if (bytes > 48) {
|
if (bytes > 48)
|
||||||
rgb3 = vec_ld(48, inptr);
|
rgb3 = vec_ld(48, inptr);
|
||||||
#if RGB_PIXELSIZE == 4
|
#if RGB_PIXELSIZE == 4
|
||||||
if (bytes > 64)
|
if (bytes > 64)
|
||||||
rgb4 = vec_ld(64, inptr);
|
rgb4 = vec_ld(64, inptr);
|
||||||
#endif
|
#endif
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
unaligned_shift_index = vec_lvsl(0, inptr);
|
unaligned_shift_index = vec_lvsl(0, inptr);
|
||||||
rgb0 = vec_perm(rgb0, rgb1, unaligned_shift_index);
|
rgb0 = vec_perm(rgb0, rgb1, unaligned_shift_index);
|
||||||
rgb1 = vec_perm(rgb1, rgb2, unaligned_shift_index);
|
rgb1 = vec_perm(rgb1, rgb2, unaligned_shift_index);
|
||||||
@@ -123,17 +100,9 @@ void jsimd_rgb_gray_convert_altivec (JDIMENSION img_width,
|
|||||||
#if RGB_PIXELSIZE == 4
|
#if RGB_PIXELSIZE == 4
|
||||||
rgb3 = vec_perm(rgb3, rgb4, unaligned_shift_index);
|
rgb3 = vec_perm(rgb3, rgb4, unaligned_shift_index);
|
||||||
#endif
|
#endif
|
||||||
|
}
|
||||||
} else {
|
} else {
|
||||||
if (num_cols >= RGB_PIXELSIZE * 16) {
|
if (num_cols < RGB_PIXELSIZE * 16 && (num_cols & 15)) {
|
||||||
/* Fast path */
|
|
||||||
rgb0 = vec_ld(0, inptr);
|
|
||||||
rgb1 = vec_ld(16, inptr);
|
|
||||||
rgb2 = vec_ld(32, inptr);
|
|
||||||
#if RGB_PIXELSIZE == 4
|
|
||||||
rgb3 = vec_ld(48, inptr);
|
|
||||||
#endif
|
|
||||||
} else {
|
|
||||||
if (num_cols & 15) {
|
|
||||||
/* Slow path */
|
/* Slow path */
|
||||||
memcpy(tmpbuf, inptr, min(num_cols, RGB_PIXELSIZE * 16));
|
memcpy(tmpbuf, inptr, min(num_cols, RGB_PIXELSIZE * 16));
|
||||||
rgb0 = vec_ld(0, tmpbuf);
|
rgb0 = vec_ld(0, tmpbuf);
|
||||||
@@ -143,11 +112,11 @@ void jsimd_rgb_gray_convert_altivec (JDIMENSION img_width,
|
|||||||
rgb3 = vec_ld(48, tmpbuf);
|
rgb3 = vec_ld(48, tmpbuf);
|
||||||
#endif
|
#endif
|
||||||
} else {
|
} else {
|
||||||
/* Medium path */
|
/* Fast path */
|
||||||
rgb0 = vec_ld(0, inptr);
|
rgb0 = vec_ld(0, inptr);
|
||||||
if (num_cols > 16) {
|
if (num_cols > 16)
|
||||||
rgb1 = vec_ld(16, inptr);
|
rgb1 = vec_ld(16, inptr);
|
||||||
if (num_cols > 32) {
|
if (num_cols > 32)
|
||||||
rgb2 = vec_ld(32, inptr);
|
rgb2 = vec_ld(32, inptr);
|
||||||
#if RGB_PIXELSIZE == 4
|
#if RGB_PIXELSIZE == 4
|
||||||
if (num_cols > 48)
|
if (num_cols > 48)
|
||||||
@@ -155,11 +124,7 @@ void jsimd_rgb_gray_convert_altivec (JDIMENSION img_width,
|
|||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
start:
|
|
||||||
#if RGB_PIXELSIZE == 3
|
#if RGB_PIXELSIZE == 3
|
||||||
/* rgb0 = R0 G0 B0 R1 G1 B1 R2 G2 B2 R3 G3 B3 R4 G4 B4 R5
|
/* rgb0 = R0 G0 B0 R1 G1 B1 R2 G2 B2 R3 G3 B3 R4 G4 B4 R5
|
||||||
* rgb1 = G5 B5 R6 G6 B6 R7 G7 B7 R8 G8 B8 R9 G9 B9 Ra Ga
|
* rgb1 = G5 B5 R6 G6 B6 R7 G7 B7 R8 G8 B8 R9 G9 B9 Ra Ga
|
||||||
|
|||||||
Reference in New Issue
Block a user