SIMD support for performing upsampling using MIPS DSPr2 instructions
This commit is contained in:
@@ -602,6 +602,13 @@ EXTERN(void) jsimd_h2v2_fancy_upsample_mips_dspr2
|
||||
JPP((int max_v_samp_factor, JDIMENSION downsampled_width,
|
||||
JSAMPARRAY input_data, JSAMPARRAY * output_data_ptr));
|
||||
|
||||
EXTERN(void) jsimd_h2v2_upsample_mips_dspr2
|
||||
JPP((int max_v_samp_factor, JDIMENSION output_width,
|
||||
JSAMPARRAY input_data, JSAMPARRAY * output_data_ptr));
|
||||
EXTERN(void) jsimd_h2v1_upsample_mips_dspr2
|
||||
JPP((int max_v_samp_factor, JDIMENSION output_width,
|
||||
JSAMPARRAY input_data, JSAMPARRAY * output_data_ptr));
|
||||
|
||||
/* SIMD Sample Conversion */
|
||||
EXTERN(void) jsimd_convsamp_mmx JPP((JSAMPARRAY sample_data,
|
||||
JDIMENSION start_col,
|
||||
|
||||
@@ -265,12 +265,32 @@ jsimd_h2v1_downsample (j_compress_ptr cinfo, jpeg_component_info * compptr,
|
||||
GLOBAL(int)
|
||||
jsimd_can_h2v2_upsample (void)
|
||||
{
|
||||
init_simd();
|
||||
|
||||
/* The code is optimised for these values only */
|
||||
if (BITS_IN_JSAMPLE != 8)
|
||||
return 0;
|
||||
if (sizeof(JDIMENSION) != 4)
|
||||
return 0;
|
||||
if (simd_support & JSIMD_MIPS_DSPR2)
|
||||
return 1;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
GLOBAL(int)
|
||||
jsimd_can_h2v1_upsample (void)
|
||||
{
|
||||
init_simd();
|
||||
|
||||
/* The code is optimised for these values only */
|
||||
if (BITS_IN_JSAMPLE != 8)
|
||||
return 0;
|
||||
if (sizeof(JDIMENSION) != 4)
|
||||
return 0;
|
||||
if (simd_support & JSIMD_MIPS_DSPR2)
|
||||
return 1;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
@@ -280,6 +300,9 @@ jsimd_h2v2_upsample (j_decompress_ptr cinfo,
|
||||
JSAMPARRAY input_data,
|
||||
JSAMPARRAY * output_data_ptr)
|
||||
{
|
||||
if (simd_support & JSIMD_MIPS_DSPR2)
|
||||
jsimd_h2v2_upsample_mips_dspr2(cinfo->max_v_samp_factor,
|
||||
cinfo->output_width, input_data, output_data_ptr);
|
||||
}
|
||||
|
||||
GLOBAL(void)
|
||||
@@ -288,6 +311,9 @@ jsimd_h2v1_upsample (j_decompress_ptr cinfo,
|
||||
JSAMPARRAY input_data,
|
||||
JSAMPARRAY * output_data_ptr)
|
||||
{
|
||||
if (simd_support & JSIMD_MIPS_DSPR2)
|
||||
jsimd_h2v1_upsample_mips_dspr2(cinfo->max_v_samp_factor,
|
||||
cinfo->output_width, input_data, output_data_ptr);
|
||||
}
|
||||
|
||||
GLOBAL(int)
|
||||
|
||||
@@ -694,3 +694,147 @@ LEAF_MIPS_DSPR2(jsimd_h2v2_downsample_mips_dspr2)
|
||||
nop
|
||||
END(jsimd_h2v2_downsample_mips_dspr2)
|
||||
/*****************************************************************************/
|
||||
LEAF_MIPS_DSPR2(jsimd_h2v1_upsample_mips_dspr2)
|
||||
/*
|
||||
* a0 - cinfo->max_v_samp_factor
|
||||
* a1 - cinfo->output_width
|
||||
* a2 - input_data
|
||||
* a3 - output_data_ptr
|
||||
*/
|
||||
lw t7, 0(a3) // t7 = output_data
|
||||
andi t8, a1, 0xf // t8 = residual
|
||||
sll t0, a0, 2
|
||||
beqz a0, 4f
|
||||
addu t9, t7, t0 // t9 = output_data end address
|
||||
0:
|
||||
lw t5, 0(t7) // t5 = outptr
|
||||
lw t6, 0(a2) // t6 = inptr
|
||||
addu t3, t5, a1 // t3 = outptr + output_width (end address)
|
||||
subu t3, t8 // t3 = end address - residual
|
||||
beqz t3, 2f
|
||||
nop
|
||||
1:
|
||||
ulw t0, 0(t6) // t0 = |P3|P2|P1|P0|
|
||||
ulw t2, 4(t6) // t2 = |P7|P6|P5|P4|
|
||||
srl t1, t0, 16 // t1 = |X|X|P3|P2|
|
||||
ins t0, t0, 16, 16 // t0 = |P1|P0|P1|P0|
|
||||
ins t1, t1, 16, 16 // t1 = |P3|P2|P3|P2|
|
||||
ins t0, t0, 8, 16 // t0 = |P1|P1|P0|P0|
|
||||
ins t1, t1, 8, 16 // t1 = |P3|P3|P2|P2|
|
||||
usw t0, 0(t5)
|
||||
usw t1, 4(t5)
|
||||
srl t0, t2, 16 // t0 = |X|X|P7|P6|
|
||||
ins t2, t2, 16, 16 // t2 = |P5|P4|P5|P4|
|
||||
ins t0, t0, 16, 16 // t0 = |P7|P6|P7|P6|
|
||||
ins t2, t2, 8, 16 // t2 = |P5|P5|P4|P4|
|
||||
ins t0, t0, 8, 16 // t0 = |P7|P7|P6|P6|
|
||||
usw t2, 8(t5)
|
||||
usw t0, 12(t5)
|
||||
addiu t5, 16
|
||||
bne t5, t3, 1b
|
||||
addiu t6, 8
|
||||
beqz t8, 3f
|
||||
move t4, t8
|
||||
2:
|
||||
lbu t1, 0(t6)
|
||||
sb t1, 0(t5)
|
||||
sb t1, 1(t5)
|
||||
addiu t4, -2
|
||||
addiu t6, 1
|
||||
bgtz t4, 2b
|
||||
addiu t5, 2
|
||||
3:
|
||||
addiu t7, 4
|
||||
bne t9, t7, 0b
|
||||
addiu a2, 4
|
||||
4:
|
||||
j ra
|
||||
nop
|
||||
END(jsimd_h2v1_upsample_mips_dspr2)
|
||||
|
||||
/*****************************************************************************/
|
||||
LEAF_MIPS_DSPR2(jsimd_h2v2_upsample_mips_dspr2)
|
||||
/*
|
||||
* a0 - cinfo->max_v_samp_factor
|
||||
* a1 - cinfo->output_width
|
||||
* a2 - input_data
|
||||
* a3 - output_data_ptr
|
||||
*/
|
||||
lw t7, 0(a3)
|
||||
beqz a0, 7f
|
||||
andi t9, a1, 0xf // t9 = residual
|
||||
0:
|
||||
lw t6, 0(a2) // t6 = inptr
|
||||
lw t5, 0(t7) // t5 = outptr
|
||||
addu t8, t5, a1 // t8 = outptr end address
|
||||
subu t8, t9 // t8 = end address - residual
|
||||
beqz t8, 2f
|
||||
nop
|
||||
1:
|
||||
ulw t0, 0(t6)
|
||||
srl t1, t0, 16
|
||||
ins t0, t0, 16, 16
|
||||
ins t0, t0, 8, 16
|
||||
ins t1, t1, 16, 16
|
||||
ins t1, t1, 8, 16
|
||||
ulw t2, 4(t6)
|
||||
usw t0, 0(t5)
|
||||
usw t1, 4(t5)
|
||||
srl t3, t2, 16
|
||||
ins t2, t2, 16, 16
|
||||
ins t2, t2, 8, 16
|
||||
ins t3, t3, 16, 16
|
||||
ins t3, t3, 8, 16
|
||||
usw t2, 8(t5)
|
||||
usw t3, 12(t5)
|
||||
addiu t5, 16
|
||||
bne t5, t8, 1b
|
||||
addiu t6, 8
|
||||
beqz t9, 3f
|
||||
move t4, t9
|
||||
2:
|
||||
lbu t0, 0(t6)
|
||||
sb t0, 0(t5)
|
||||
sb t0, 1(t5)
|
||||
addiu t4, -2
|
||||
addiu t6, 1
|
||||
bgtz t4, 2b
|
||||
addiu t5, 2
|
||||
3:
|
||||
ulw t6, 0(t7) // t6 = outptr
|
||||
ulw t5, 4(t7) // t5 = outptr[1]
|
||||
addu t4, t6, a1 // t4 = new end address
|
||||
subu t8, t4, t9
|
||||
beqz t8, 5f
|
||||
nop
|
||||
4:
|
||||
ulw t0, 0(t6)
|
||||
ulw t1, 4(t6)
|
||||
ulw t2, 8(t6)
|
||||
usw t0, 0(t5)
|
||||
ulw t0, 12(t6)
|
||||
usw t1, 4(t5)
|
||||
usw t2, 8(t5)
|
||||
usw t0, 12(t5)
|
||||
addiu t6, 16
|
||||
bne t6, t8, 4b
|
||||
addiu t5, 16
|
||||
beqz t9, 6f
|
||||
nop
|
||||
5:
|
||||
lbu t0, 0(t6)
|
||||
sb t0, 0(t5)
|
||||
addiu t6, 1
|
||||
bne t6, t4, 5b
|
||||
addiu t5, 1
|
||||
6:
|
||||
addiu t7, 8
|
||||
addiu a0, -2
|
||||
bgtz a0, 0b
|
||||
addiu a2, 4
|
||||
7:
|
||||
j ra
|
||||
nop
|
||||
END(jsimd_h2v2_upsample_mips_dspr2)
|
||||
|
||||
/*****************************************************************************/
|
||||
|
||||
Reference in New Issue
Block a user