SIMD-optimized RGB-to-grayscale conversion for MIPS DSPr2
git-svn-id: svn+ssh://svn.code.sf.net/p/libjpeg-turbo/code/trunk@1045 632fc199-4ca6-4c93-a231-07263d6284db
This commit is contained in:
@@ -8,7 +8,7 @@ extended to support image scaling.
|
|||||||
|
|
||||||
[2] Added SIMD acceleration for performing color conversion, downsampling,
|
[2] Added SIMD acceleration for performing color conversion, downsampling,
|
||||||
and upsampling on DSPr2-capable MIPS platforms. This speeds up the compression
|
and upsampling on DSPr2-capable MIPS platforms. This speeds up the compression
|
||||||
of full-color JPEGs by 6-18% on such platforms and decompression by 3-12%.
|
of full-color JPEGs by 6-21% on such platforms and decompression by 6-17%.
|
||||||
|
|
||||||
[3] Added support for 4:1:1 subsampling to the TurboJPEG API. This is mainly
|
[3] Added support for 4:1:1 subsampling to the TurboJPEG API. This is mainly
|
||||||
included for compatibility, since 4:1:1 is not fully accelerated in
|
included for compatibility, since 4:1:1 is not fully accelerated in
|
||||||
|
|||||||
29
simd/jsimd.h
29
simd/jsimd.h
@@ -417,6 +417,35 @@ EXTERN(void) jsimd_extxrgb_ycc_convert_mips_dspr2
|
|||||||
JSAMPARRAY input_buf, JSAMPIMAGE output_buf,
|
JSAMPARRAY input_buf, JSAMPIMAGE output_buf,
|
||||||
JDIMENSION output_row, int num_rows));
|
JDIMENSION output_row, int num_rows));
|
||||||
|
|
||||||
|
EXTERN(void) jsimd_rgb_gray_convert_mips_dspr2
|
||||||
|
JPP((JDIMENSION img_width,
|
||||||
|
JSAMPARRAY input_buf, JSAMPIMAGE output_buf,
|
||||||
|
JDIMENSION output_row, int num_rows));
|
||||||
|
EXTERN(void) jsimd_extrgb_gray_convert_mips_dspr2
|
||||||
|
JPP((JDIMENSION img_width,
|
||||||
|
JSAMPARRAY input_buf, JSAMPIMAGE output_buf,
|
||||||
|
JDIMENSION output_row, int num_rows));
|
||||||
|
EXTERN(void) jsimd_extrgbx_gray_convert_mips_dspr2
|
||||||
|
JPP((JDIMENSION img_width,
|
||||||
|
JSAMPARRAY input_buf, JSAMPIMAGE output_buf,
|
||||||
|
JDIMENSION output_row, int num_rows));
|
||||||
|
EXTERN(void) jsimd_extbgr_gray_convert_mips_dspr2
|
||||||
|
JPP((JDIMENSION img_width,
|
||||||
|
JSAMPARRAY input_buf, JSAMPIMAGE output_buf,
|
||||||
|
JDIMENSION output_row, int num_rows));
|
||||||
|
EXTERN(void) jsimd_extbgrx_gray_convert_mips_dspr2
|
||||||
|
JPP((JDIMENSION img_width,
|
||||||
|
JSAMPARRAY input_buf, JSAMPIMAGE output_buf,
|
||||||
|
JDIMENSION output_row, int num_rows));
|
||||||
|
EXTERN(void) jsimd_extxbgr_gray_convert_mips_dspr2
|
||||||
|
JPP((JDIMENSION img_width,
|
||||||
|
JSAMPARRAY input_buf, JSAMPIMAGE output_buf,
|
||||||
|
JDIMENSION output_row, int num_rows));
|
||||||
|
EXTERN(void) jsimd_extxrgb_gray_convert_mips_dspr2
|
||||||
|
JPP((JDIMENSION img_width,
|
||||||
|
JSAMPARRAY input_buf, JSAMPIMAGE output_buf,
|
||||||
|
JDIMENSION output_row, int num_rows));
|
||||||
|
|
||||||
EXTERN (void) jsimd_ycc_rgb_convert_mips_dspr2
|
EXTERN (void) jsimd_ycc_rgb_convert_mips_dspr2
|
||||||
JPP((JDIMENSION img_width,
|
JPP((JDIMENSION img_width,
|
||||||
JSAMPIMAGE input_buf, JDIMENSION input_row,
|
JSAMPIMAGE input_buf, JDIMENSION input_row,
|
||||||
|
|||||||
@@ -100,6 +100,18 @@ jsimd_can_rgb_ycc (void)
|
|||||||
GLOBAL(int)
|
GLOBAL(int)
|
||||||
jsimd_can_rgb_gray (void)
|
jsimd_can_rgb_gray (void)
|
||||||
{
|
{
|
||||||
|
init_simd();
|
||||||
|
|
||||||
|
/* The code is optimised for these values only */
|
||||||
|
if (BITS_IN_JSAMPLE != 8)
|
||||||
|
return 0;
|
||||||
|
if (sizeof(JDIMENSION) != 4)
|
||||||
|
return 0;
|
||||||
|
if ((RGB_PIXELSIZE != 3) && (RGB_PIXELSIZE != 4))
|
||||||
|
return 0;
|
||||||
|
if (simd_support & JSIMD_MIPS_DSPR2)
|
||||||
|
return 1;
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -167,6 +179,40 @@ jsimd_rgb_gray_convert (j_compress_ptr cinfo,
|
|||||||
JSAMPARRAY input_buf, JSAMPIMAGE output_buf,
|
JSAMPARRAY input_buf, JSAMPIMAGE output_buf,
|
||||||
JDIMENSION output_row, int num_rows)
|
JDIMENSION output_row, int num_rows)
|
||||||
{
|
{
|
||||||
|
void (*mipsdspr2fct)(JDIMENSION, JSAMPARRAY, JSAMPIMAGE, JDIMENSION, int);
|
||||||
|
switch(cinfo->in_color_space)
|
||||||
|
{
|
||||||
|
case JCS_EXT_RGB:
|
||||||
|
mipsdspr2fct=jsimd_extrgb_gray_convert_mips_dspr2;
|
||||||
|
break;
|
||||||
|
case JCS_EXT_RGBX:
|
||||||
|
case JCS_EXT_RGBA:
|
||||||
|
mipsdspr2fct=jsimd_extrgbx_gray_convert_mips_dspr2;
|
||||||
|
break;
|
||||||
|
case JCS_EXT_BGR:
|
||||||
|
mipsdspr2fct=jsimd_extbgr_gray_convert_mips_dspr2;
|
||||||
|
break;
|
||||||
|
case JCS_EXT_BGRX:
|
||||||
|
case JCS_EXT_BGRA:
|
||||||
|
mipsdspr2fct=jsimd_extbgrx_gray_convert_mips_dspr2;
|
||||||
|
break;
|
||||||
|
case JCS_EXT_XBGR:
|
||||||
|
case JCS_EXT_ABGR:
|
||||||
|
mipsdspr2fct=jsimd_extxbgr_gray_convert_mips_dspr2;
|
||||||
|
break;
|
||||||
|
case JCS_EXT_XRGB:
|
||||||
|
case JCS_EXT_ARGB:
|
||||||
|
mipsdspr2fct=jsimd_extxrgb_gray_convert_mips_dspr2;
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
mipsdspr2fct=jsimd_extrgb_gray_convert_mips_dspr2;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (simd_support & JSIMD_MIPS_DSPR2)
|
||||||
|
mipsdspr2fct(cinfo->image_width, input_buf,
|
||||||
|
output_buf, output_row, num_rows);
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
GLOBAL(void)
|
GLOBAL(void)
|
||||||
|
|||||||
@@ -247,6 +247,134 @@ GENERATE_JSIMD_YCC_RGB_CONVERT_MIPS_DSPR2 extbgrx, 4, 2, 1, 0, 3
|
|||||||
GENERATE_JSIMD_YCC_RGB_CONVERT_MIPS_DSPR2 extxbgr, 4, 3, 2, 1, 0
|
GENERATE_JSIMD_YCC_RGB_CONVERT_MIPS_DSPR2 extxbgr, 4, 3, 2, 1, 0
|
||||||
GENERATE_JSIMD_YCC_RGB_CONVERT_MIPS_DSPR2 extxrgb, 4, 1, 2, 3, 0
|
GENERATE_JSIMD_YCC_RGB_CONVERT_MIPS_DSPR2 extxrgb, 4, 1, 2, 3, 0
|
||||||
|
|
||||||
|
/*****************************************************************************/
|
||||||
|
/*
|
||||||
|
* jsimd_extrgb_gray_convert_mips_dspr2
|
||||||
|
* jsimd_extbgr_gray_convert_mips_dspr2
|
||||||
|
* jsimd_extrgbx_gray_convert_mips_dspr2
|
||||||
|
* jsimd_extbgrx_gray_convert_mips_dspr2
|
||||||
|
* jsimd_extxbgr_gray_convert_mips_dspr2
|
||||||
|
* jsimd_extxrgb_gray_convert_mips_dspr2
|
||||||
|
*
|
||||||
|
* Colorspace conversion RGB -> GRAY
|
||||||
|
*/
|
||||||
|
|
||||||
|
.macro GENERATE_JSIMD_RGB_GRAY_CONVERT_MIPS_DSPR2 colorid, pixel_size, r_offs, g_offs, b_offs
|
||||||
|
|
||||||
|
.macro DO_RGB_TO_GRAY r, \
|
||||||
|
g, \
|
||||||
|
b, \
|
||||||
|
inptr
|
||||||
|
lbu \r, \r_offs(\inptr)
|
||||||
|
lbu \g, \g_offs(\inptr)
|
||||||
|
lbu \b, \b_offs(\inptr)
|
||||||
|
addiu \inptr, \pixel_size
|
||||||
|
.endm
|
||||||
|
|
||||||
|
LEAF_MIPS_DSPR2(jsimd_\colorid\()_gray_convert_mips_dspr2)
|
||||||
|
/*
|
||||||
|
* a0 - cinfo->image_width
|
||||||
|
* a1 - input_buf
|
||||||
|
* a2 - output_buf
|
||||||
|
* a3 - output_row
|
||||||
|
* 16(sp) - num_rows
|
||||||
|
*/
|
||||||
|
|
||||||
|
SAVE_REGS_ON_STACK 32, s0, s1, s2, s3, s4, s5, s6, s7
|
||||||
|
|
||||||
|
li s0, 0x4c8b // s0 = FIX(0.29900)
|
||||||
|
li s1, 0x9646 // s1 = FIX(0.58700)
|
||||||
|
li s2, 0x1d2f // s2 = FIX(0.11400)
|
||||||
|
li s7, 0x8000 // s7 = FIX(0.50000)
|
||||||
|
lw s6, 48(sp)
|
||||||
|
andi t7, a0, 3
|
||||||
|
|
||||||
|
0:
|
||||||
|
addiu s6, -1 // s6 = num_rows
|
||||||
|
lw t0, 0(a1)
|
||||||
|
lw t1, 0(a2)
|
||||||
|
sll t3, a3, 2
|
||||||
|
lwx t1, t3(t1)
|
||||||
|
addiu a3, 1
|
||||||
|
addu t9, t1, a0
|
||||||
|
subu t8, t9, t7
|
||||||
|
beq t1, t8, 2f
|
||||||
|
nop
|
||||||
|
|
||||||
|
1:
|
||||||
|
DO_RGB_TO_GRAY t3, t4, t5, t0
|
||||||
|
DO_RGB_TO_GRAY s3, s4, s5, t0
|
||||||
|
|
||||||
|
mtlo s7, $ac0
|
||||||
|
maddu $ac0, s2, t5
|
||||||
|
maddu $ac0, s1, t4
|
||||||
|
maddu $ac0, s0, t3
|
||||||
|
mtlo s7, $ac1
|
||||||
|
maddu $ac1, s2, s5
|
||||||
|
maddu $ac1, s1, s4
|
||||||
|
maddu $ac1, s0, s3
|
||||||
|
extr.w t6, $ac0, 16
|
||||||
|
|
||||||
|
DO_RGB_TO_GRAY t3, t4, t5, t0
|
||||||
|
DO_RGB_TO_GRAY s3, s4, s5, t0
|
||||||
|
|
||||||
|
mtlo s7, $ac0
|
||||||
|
maddu $ac0, s2, t5
|
||||||
|
maddu $ac0, s1, t4
|
||||||
|
extr.w t2, $ac1, 16
|
||||||
|
maddu $ac0, s0, t3
|
||||||
|
mtlo s7, $ac1
|
||||||
|
maddu $ac1, s2, s5
|
||||||
|
maddu $ac1, s1, s4
|
||||||
|
maddu $ac1, s0, s3
|
||||||
|
extr.w t5, $ac0, 16
|
||||||
|
sb t6, 0(t1)
|
||||||
|
sb t2, 1(t1)
|
||||||
|
extr.w t3, $ac1, 16
|
||||||
|
addiu t1, 4
|
||||||
|
sb t5, -2(t1)
|
||||||
|
sb t3, -1(t1)
|
||||||
|
bne t1, t8, 1b
|
||||||
|
nop
|
||||||
|
|
||||||
|
2:
|
||||||
|
beqz t7, 4f
|
||||||
|
nop
|
||||||
|
|
||||||
|
3:
|
||||||
|
DO_RGB_TO_GRAY t3, t4, t5, t0
|
||||||
|
|
||||||
|
mtlo s7, $ac0
|
||||||
|
maddu $ac0, s2, t5
|
||||||
|
maddu $ac0, s1, t4
|
||||||
|
maddu $ac0, s0, t3
|
||||||
|
extr.w t6, $ac0, 16
|
||||||
|
sb t6, 0(t1)
|
||||||
|
addiu t1, 1
|
||||||
|
bne t1, t9, 3b
|
||||||
|
nop
|
||||||
|
|
||||||
|
4:
|
||||||
|
bgtz s6, 0b
|
||||||
|
addiu a1, 4
|
||||||
|
|
||||||
|
RESTORE_REGS_FROM_STACK 32, s0, s1, s2, s3, s4, s5, s6, s7
|
||||||
|
|
||||||
|
j ra
|
||||||
|
nop
|
||||||
|
END(jsimd_\colorid\()_gray_convert_mips_dspr2)
|
||||||
|
|
||||||
|
.purgem DO_RGB_TO_GRAY
|
||||||
|
|
||||||
|
.endm
|
||||||
|
|
||||||
|
/*------------------------------------------id -- pix R G B */
|
||||||
|
GENERATE_JSIMD_RGB_GRAY_CONVERT_MIPS_DSPR2 extrgb, 3, 0, 1, 2
|
||||||
|
GENERATE_JSIMD_RGB_GRAY_CONVERT_MIPS_DSPR2 extbgr, 3, 2, 1, 0
|
||||||
|
GENERATE_JSIMD_RGB_GRAY_CONVERT_MIPS_DSPR2 extrgbx, 4, 0, 1, 2
|
||||||
|
GENERATE_JSIMD_RGB_GRAY_CONVERT_MIPS_DSPR2 extbgrx, 4, 2, 1, 0
|
||||||
|
GENERATE_JSIMD_RGB_GRAY_CONVERT_MIPS_DSPR2 extxbgr, 4, 3, 2, 1
|
||||||
|
GENERATE_JSIMD_RGB_GRAY_CONVERT_MIPS_DSPR2 extxrgb, 4, 1, 2, 3
|
||||||
/*****************************************************************************/
|
/*****************************************************************************/
|
||||||
/*
|
/*
|
||||||
* jsimd_h2v2_fancy_upsample_mips_dspr2
|
* jsimd_h2v2_fancy_upsample_mips_dspr2
|
||||||
|
|||||||
Reference in New Issue
Block a user