SIMD-accelerated NULL convert routine for MIPS DSPr2

git-svn-id: svn+ssh://svn.code.sf.net/p/libjpeg-turbo/code/trunk@1304 632fc199-4ca6-4c93-a231-07263d6284db
This commit is contained in:
DRC
2014-05-15 18:26:01 +00:00
parent a3c3bbc2f4
commit 1b3fd7eead
6 changed files with 201 additions and 23 deletions

View File

@@ -587,19 +587,24 @@ jinit_color_converter (j_compress_ptr cinfo)
if (rgb_red[cinfo->in_color_space] == 0 &&
rgb_green[cinfo->in_color_space] == 1 &&
rgb_blue[cinfo->in_color_space] == 2 &&
rgb_pixelsize[cinfo->in_color_space] == 3)
cconvert->pub.color_convert = null_convert;
else if (cinfo->in_color_space == JCS_RGB ||
cinfo->in_color_space == JCS_EXT_RGB ||
cinfo->in_color_space == JCS_EXT_RGBX ||
cinfo->in_color_space == JCS_EXT_BGR ||
cinfo->in_color_space == JCS_EXT_BGRX ||
cinfo->in_color_space == JCS_EXT_XBGR ||
cinfo->in_color_space == JCS_EXT_XRGB ||
cinfo->in_color_space == JCS_EXT_RGBA ||
cinfo->in_color_space == JCS_EXT_BGRA ||
cinfo->in_color_space == JCS_EXT_ABGR ||
cinfo->in_color_space == JCS_EXT_ARGB)
rgb_pixelsize[cinfo->in_color_space] == 3) {
#if defined(__mips__)
if (jsimd_c_can_null_convert())
cconvert->pub.color_convert = jsimd_c_null_convert;
else
#endif
cconvert->pub.color_convert = null_convert;
} else if (cinfo->in_color_space == JCS_RGB ||
cinfo->in_color_space == JCS_EXT_RGB ||
cinfo->in_color_space == JCS_EXT_RGBX ||
cinfo->in_color_space == JCS_EXT_BGR ||
cinfo->in_color_space == JCS_EXT_BGRX ||
cinfo->in_color_space == JCS_EXT_XBGR ||
cinfo->in_color_space == JCS_EXT_XRGB ||
cinfo->in_color_space == JCS_EXT_RGBA ||
cinfo->in_color_space == JCS_EXT_BGRA ||
cinfo->in_color_space == JCS_EXT_ABGR ||
cinfo->in_color_space == JCS_EXT_ARGB)
cconvert->pub.color_convert = rgb_rgb_convert;
else
ERREXIT(cinfo, JERR_CONVERSION_NOTIMPL);
@@ -625,18 +630,28 @@ jinit_color_converter (j_compress_ptr cinfo)
cconvert->pub.start_pass = rgb_ycc_start;
cconvert->pub.color_convert = rgb_ycc_convert;
}
} else if (cinfo->in_color_space == JCS_YCbCr)
cconvert->pub.color_convert = null_convert;
else
} else if (cinfo->in_color_space == JCS_YCbCr) {
#if defined(__mips__)
if (jsimd_c_can_null_convert())
cconvert->pub.color_convert = jsimd_c_null_convert;
else
#endif
cconvert->pub.color_convert = null_convert;
} else
ERREXIT(cinfo, JERR_CONVERSION_NOTIMPL);
break;
case JCS_CMYK:
if (cinfo->num_components != 4)
ERREXIT(cinfo, JERR_BAD_J_COLORSPACE);
if (cinfo->in_color_space == JCS_CMYK)
cconvert->pub.color_convert = null_convert;
else
if (cinfo->in_color_space == JCS_CMYK) {
#if defined(__mips__)
if (jsimd_c_can_null_convert())
cconvert->pub.color_convert = jsimd_c_null_convert;
else
#endif
cconvert->pub.color_convert = null_convert;
} else
ERREXIT(cinfo, JERR_CONVERSION_NOTIMPL);
break;
@@ -646,9 +661,14 @@ jinit_color_converter (j_compress_ptr cinfo)
if (cinfo->in_color_space == JCS_CMYK) {
cconvert->pub.start_pass = rgb_ycc_start;
cconvert->pub.color_convert = cmyk_ycck_convert;
} else if (cinfo->in_color_space == JCS_YCCK)
cconvert->pub.color_convert = null_convert;
else
} else if (cinfo->in_color_space == JCS_YCCK) {
#if defined(__mips__)
if (jsimd_c_can_null_convert())
cconvert->pub.color_convert = jsimd_c_null_convert;
else
#endif
cconvert->pub.color_convert = null_convert;
} else
ERREXIT(cinfo, JERR_CONVERSION_NOTIMPL);
break;
@@ -656,7 +676,12 @@ jinit_color_converter (j_compress_ptr cinfo)
if (cinfo->jpeg_color_space != cinfo->in_color_space ||
cinfo->num_components != cinfo->input_components)
ERREXIT(cinfo, JERR_CONVERSION_NOTIMPL);
cconvert->pub.color_convert = null_convert;
#if defined(__mips__)
if (jsimd_c_can_null_convert())
cconvert->pub.color_convert = jsimd_c_null_convert;
else
#endif
cconvert->pub.color_convert = null_convert;
break;
}
}

View File

@@ -40,6 +40,7 @@
EXTERN(int) jsimd_can_rgb_ycc JPP((void));
EXTERN(int) jsimd_can_rgb_gray JPP((void));
EXTERN(int) jsimd_can_ycc_rgb JPP((void));
EXTERN(int) jsimd_c_can_null_convert JPP((void));
EXTERN(void) jsimd_rgb_ycc_convert
JPP((j_compress_ptr cinfo,
@@ -53,6 +54,10 @@ EXTERN(void) jsimd_ycc_rgb_convert
JPP((j_decompress_ptr cinfo,
JSAMPIMAGE input_buf, JDIMENSION input_row,
JSAMPARRAY output_buf, int num_rows));
EXTERN(void) jsimd_c_null_convert
JPP((j_compress_ptr cinfo,
JSAMPARRAY input_buf, JSAMPIMAGE output_buf,
JDIMENSION output_row, int num_rows));
EXTERN(int) jsimd_can_h2v2_downsample JPP((void));
EXTERN(int) jsimd_can_h2v1_downsample JPP((void));

View File

@@ -36,6 +36,12 @@ jsimd_can_ycc_rgb (void)
return 0;
}
GLOBAL(int)
jsimd_c_can_null_convert (void)
{
return 0;
}
GLOBAL(void)
jsimd_rgb_ycc_convert (j_compress_ptr cinfo,
JSAMPARRAY input_buf, JSAMPIMAGE output_buf,
@@ -57,6 +63,13 @@ jsimd_ycc_rgb_convert (j_decompress_ptr cinfo,
{
}
GLOBAL(void)
jsimd_c_null_convert (j_compress_ptr cinfo,
JSAMPARRAY input_buf, JSAMPIMAGE output_buf,
JDIMENSION output_row, int num_rows)
{
}
GLOBAL(int)
jsimd_can_h2v2_downsample (void)
{

View File

@@ -475,6 +475,11 @@ EXTERN(void) jsimd_ycc_extxrgb_convert_mips_dspr2
JSAMPIMAGE input_buf, JDIMENSION input_row,
JSAMPARRAY output_buf, int num_rows));
EXTERN(void) jsimd_c_null_convert_mips_dspr2
JPP((JDIMENSION img_width, JSAMPARRAY input_buf,
JSAMPIMAGE output_buf, JDIMENSION output_row,
int num_rows, int num_components));
/* SIMD Downsample */
EXTERN(void) jsimd_h2v2_downsample_mmx
JPP((JDIMENSION image_width, int max_v_samp_factor,

View File

@@ -139,6 +139,22 @@ jsimd_can_ycc_rgb (void)
return 0;
}
GLOBAL(int)
jsimd_c_can_null_convert (void)
{
init_simd();
/* The code is optimised for these values only */
if (BITS_IN_JSAMPLE != 8)
return 0;
if (sizeof(JDIMENSION) != 4)
return 0;
if (simd_support & JSIMD_MIPS_DSPR2)
return 1;
return 0;
}
GLOBAL(void)
jsimd_rgb_ycc_convert (j_compress_ptr cinfo,
JSAMPARRAY input_buf, JSAMPIMAGE output_buf,
@@ -262,6 +278,16 @@ jsimd_ycc_rgb_convert (j_decompress_ptr cinfo,
input_row, output_buf, num_rows);
}
GLOBAL(void)
jsimd_c_null_convert (j_compress_ptr cinfo,
JSAMPARRAY input_buf, JSAMPIMAGE output_buf,
JDIMENSION output_row, int num_rows)
{
if (simd_support & JSIMD_MIPS_DSPR2)
jsimd_null_convert_compr_mips_dspr2 (cinfo->image_width, input_buf,
output_buf, output_row, num_rows, cinfo->num_components);
}
GLOBAL(int)
jsimd_can_h2v2_downsample (void)
{

View File

@@ -24,6 +24,110 @@
#include "jsimd_mips_dspr2_asm.h"
/*****************************************************************************/
LEAF_MIPS_DSPR2(jsimd_c_null_convert_mips_dspr2)
/*
* a0 - cinfo->image_width
* a1 - input_buf
* a2 - output_buf
* a3 - output_row
* 16(sp) - num_rows
* 20(sp) - cinfo->num_components
*
* Null conversion for compression
*/
SAVE_REGS_ON_STACK 8, s0, s1
lw t9, 24(sp) // t9 = num_rows
lw s0, 28(sp) // s0 = cinfo->num_components
andi t0, a0, 3 // t0 = cinfo->image_width & 3
beqz t0, 4f // no residual
nop
0:
addiu t9, t9, -1
bltz t9, 7f
li t1, 0
1:
sll t3, t1, 2
lwx t5, t3(a2) // t5 = outptr = output_buf[ci]
lw t2, 0(a1) // t2 = inptr = *input_buf
sll t4, a3, 2
lwx t5, t4(t5) // t5 = outptr = output_buf[ci][output_row]
addu t2, t2, t1
addu s1, t5, a0
addu t6, t5, t0
2:
lbu t3, 0(t2)
addiu t5, t5, 1
sb t3, -1(t5)
bne t6, t5, 2b
addu t2, t2, s0
3:
lbu t3, 0(t2)
addu t4, t2, s0
addu t7, t4, s0
addu t8, t7, s0
addu t2, t8, s0
lbu t4, 0(t4)
lbu t7, 0(t7)
lbu t8, 0(t8)
addiu t5, t5, 4
sb t3, -4(t5)
sb t4, -3(t5)
sb t7, -2(t5)
bne s1, t5, 3b
sb t8, -1(t5)
addiu t1, t1, 1
bne t1, s0, 1b
nop
addiu a1, a1, 4
bgez t9, 0b
addiu a3, a3, 1
b 7f
nop
4:
addiu t9, t9, -1
bltz t9, 7f
li t1, 0
5:
sll t3, t1, 2
lwx t5, t3(a2) // t5 = outptr = output_buf[ci]
lw t2, 0(a1) // t2 = inptr = *input_buf
sll t4, a3, 2
lwx t5, t4(t5) // t5 = outptr = output_buf[ci][output_row]
addu t2, t2, t1
addu s1, t5, a0
addu t6, t5, t0
6:
lbu t3, 0(t2)
addu t4, t2, s0
addu t7, t4, s0
addu t8, t7, s0
addu t2, t8, s0
lbu t4, 0(t4)
lbu t7, 0(t7)
lbu t8, 0(t8)
addiu t5, t5, 4
sb t3, -4(t5)
sb t4, -3(t5)
sb t7, -2(t5)
bne s1, t5, 6b
sb t8, -1(t5)
addiu t1, t1, 1
bne t1, s0, 5b
nop
addiu a1, a1, 4
bgez t9, 4b
addiu a3, a3, 1
7:
RESTORE_REGS_FROM_STACK 8, s0, s1
j ra
nop
END(jsimd_c_null_convert_mips_dspr2)
/*****************************************************************************/
/*
* jsimd_extrgb_ycc_convert_mips_dspr2