SIMD-accelerated h2v2 smooth downsampling routine for MIPS DSPr2
git-svn-id: svn+ssh://svn.code.sf.net/p/libjpeg-turbo/code/trunk@1301 632fc199-4ca6-4c93-a231-07263d6284db
This commit is contained in:
@@ -504,7 +504,10 @@ jinit_downsampler (j_compress_ptr cinfo)
|
||||
compptr->v_samp_factor * 2 == cinfo->max_v_samp_factor) {
|
||||
#ifdef INPUT_SMOOTHING_SUPPORTED
|
||||
if (cinfo->smoothing_factor) {
|
||||
downsample->methods[ci] = h2v2_smooth_downsample;
|
||||
if (jsimd_can_h2v2_smooth_downsample())
|
||||
downsample->methods[ci] = jsimd_h2v2_smooth_downsample;
|
||||
else
|
||||
downsample->methods[ci] = h2v2_smooth_downsample;
|
||||
downsample->pub.need_context_rows = TRUE;
|
||||
} else {
|
||||
#endif
|
||||
|
||||
7
jsimd.h
7
jsimd.h
@@ -60,6 +60,13 @@ EXTERN(int) jsimd_can_h2v1_downsample JPP((void));
|
||||
EXTERN(void) jsimd_h2v2_downsample
|
||||
JPP((j_compress_ptr cinfo, jpeg_component_info * compptr,
|
||||
JSAMPARRAY input_data, JSAMPARRAY output_data));
|
||||
|
||||
EXTERN(int) jsimd_can_h2v2_smooth_downsample JPP((void));
|
||||
|
||||
EXTERN(void) jsimd_h2v2_smooth_downsample
|
||||
JPP((j_compress_ptr cinfo, jpeg_component_info * compptr,
|
||||
JSAMPARRAY input_data, JSAMPARRAY output_data));
|
||||
|
||||
EXTERN(void) jsimd_h2v1_downsample
|
||||
JPP((j_compress_ptr cinfo, jpeg_component_info * compptr,
|
||||
JSAMPARRAY input_data, JSAMPARRAY output_data));
|
||||
|
||||
12
jsimd_none.c
12
jsimd_none.c
@@ -69,12 +69,24 @@ jsimd_can_h2v1_downsample (void)
|
||||
return 0;
|
||||
}
|
||||
|
||||
GLOBAL(int)
|
||||
jsimd_can_h2v2_smooth_downsample (void)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
GLOBAL(void)
|
||||
jsimd_h2v2_downsample (j_compress_ptr cinfo, jpeg_component_info * compptr,
|
||||
JSAMPARRAY input_data, JSAMPARRAY output_data)
|
||||
{
|
||||
}
|
||||
|
||||
GLOBAL(void)
|
||||
jsimd_h2v2_smooth_downsample (j_compress_ptr cinfo, jpeg_component_info * compptr,
|
||||
JSAMPARRAY input_data, JSAMPARRAY output_data)
|
||||
{
|
||||
}
|
||||
|
||||
GLOBAL(void)
|
||||
jsimd_h2v1_downsample (j_compress_ptr cinfo, jpeg_component_info * compptr,
|
||||
JSAMPARRAY input_data, JSAMPARRAY output_data)
|
||||
|
||||
@@ -498,6 +498,11 @@ EXTERN(void) jsimd_h2v2_downsample_mips_dspr2
|
||||
JPP((JDIMENSION image_width, int max_v_samp_factor,
|
||||
JDIMENSION v_samp_factor, JDIMENSION width_blocks,
|
||||
JSAMPARRAY input_data, JSAMPARRAY output_data));
|
||||
EXTERN(void) jsimd_h2v2_smooth_downsample_mips_dspr2
|
||||
JPP((JSAMPARRAY input_data, JSAMPARRAY output_data,
|
||||
JDIMENSION v_samp_factor, int max_v_samp_factor,
|
||||
int smoothing_factor, JDIMENSION width_blocks,
|
||||
JDIMENSION image_width));
|
||||
EXTERN(void) jsimd_h2v1_downsample_mips_dspr2
|
||||
JPP((JDIMENSION image_width, int max_v_samp_factor,
|
||||
JDIMENSION v_samp_factor, JDIMENSION width_blocks,
|
||||
|
||||
@@ -278,6 +278,24 @@ jsimd_can_h2v2_downsample (void)
|
||||
return 0;
|
||||
}
|
||||
|
||||
GLOBAL(int)
|
||||
jsimd_can_h2v2_smooth_downsample (void)
|
||||
{
|
||||
init_simd();
|
||||
|
||||
/* The code is optimised for these values only */
|
||||
if (BITS_IN_JSAMPLE != 8)
|
||||
return 0;
|
||||
if (sizeof(JDIMENSION) != 4)
|
||||
return 0;
|
||||
if(DCTSIZE != 8)
|
||||
return 0;
|
||||
if (simd_support & JSIMD_MIPS_DSPR2)
|
||||
return 1;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
GLOBAL(int)
|
||||
jsimd_can_h2v1_downsample (void)
|
||||
{
|
||||
@@ -304,6 +322,16 @@ jsimd_h2v2_downsample (j_compress_ptr cinfo, jpeg_component_info * compptr,
|
||||
compptr->width_in_blocks, input_data, output_data);
|
||||
}
|
||||
|
||||
GLOBAL(void)
|
||||
jsimd_h2v2_smooth_downsample (j_compress_ptr cinfo, jpeg_component_info * compptr,
|
||||
JSAMPARRAY input_data, JSAMPARRAY output_data)
|
||||
{
|
||||
jsimd_h2v2_smooth_downsample_mips_dspr2(input_data, output_data,
|
||||
compptr->v_samp_factor, cinfo->max_v_samp_factor,
|
||||
cinfo->smoothing_factor, compptr->width_in_blocks,
|
||||
cinfo->image_width);
|
||||
}
|
||||
|
||||
GLOBAL(void)
|
||||
jsimd_h2v1_downsample (j_compress_ptr cinfo, jpeg_component_info * compptr,
|
||||
JSAMPARRAY input_data, JSAMPARRAY output_data)
|
||||
|
||||
@@ -1210,6 +1210,306 @@ LEAF_MIPS_DSPR2(jsimd_h2v2_downsample_mips_dspr2)
|
||||
nop
|
||||
END(jsimd_h2v2_downsample_mips_dspr2)
|
||||
/*****************************************************************************/
|
||||
LEAF_MIPS_DSPR2(jsimd_h2v2_smooth_downsample_mips_dspr2)
|
||||
/*
|
||||
* a0 - input_data
|
||||
* a1 - output_data
|
||||
* a2 - compptr->v_samp_factor
|
||||
* a3 - cinfo->max_v_samp_factor
|
||||
* 16(sp) - cinfo->smoothing_factor
|
||||
* 20(sp) - compptr->width_in_blocks
|
||||
* 24(sp) - cinfo->image_width
|
||||
*/
|
||||
|
||||
.set at
|
||||
|
||||
SAVE_REGS_ON_STACK 32, s0, s1, s2, s3, s4, s5, s6, s7
|
||||
|
||||
lw s7, 52(sp) // compptr->width_in_blocks
|
||||
lw s0, 56(sp) // cinfo->image_width
|
||||
lw s6, 48(sp) // cinfo->smoothing_factor
|
||||
sll s7, 3 // output_cols = width_in_blocks * DCTSIZE
|
||||
sll v0, s7, 1
|
||||
subu v0, v0, s0
|
||||
blez v0, 2f
|
||||
move v1, zero
|
||||
addiu t0, a3, 2 // t0 = cinfo->max_v_samp_factor + 2
|
||||
0:
|
||||
addiu t1, a0, -4
|
||||
sll t2, v1, 2
|
||||
lwx t1, t2(t1)
|
||||
move t3, v0
|
||||
addu t1, t1, s0
|
||||
lbu t2, -1(t1)
|
||||
1:
|
||||
addiu t3, t3, -1
|
||||
sb t2, 0(t1)
|
||||
bgtz t3, 1b
|
||||
addiu t1, t1, 1
|
||||
addiu v1, v1, 1
|
||||
bne v1, t0, 0b
|
||||
nop
|
||||
2:
|
||||
li v0, 80
|
||||
mul v0, s6, v0
|
||||
li v1, 16384
|
||||
move t4, zero
|
||||
move t5, zero
|
||||
subu t6, v1, v0 // t6 = 16384 - tmp_smoot_f * 80
|
||||
sll t7, s6, 4 // t7 = tmp_smoot_f * 16
|
||||
3:
|
||||
/* Special case for first column: pretend column -1 is same as column 0 */
|
||||
sll v0, t4, 2
|
||||
lwx t8, v0(a1) // outptr = output_data[outrow]
|
||||
sll v1, t5, 2
|
||||
addiu t9, v1, 4
|
||||
addiu s0, v1, -4
|
||||
addiu s1, v1, 8
|
||||
lwx s2, v1(a0) // inptr0 = input_data[inrow]
|
||||
lwx t9, t9(a0) // inptr1 = input_data[inrow+1]
|
||||
lwx s0, s0(a0) // above_ptr = input_data[inrow-1]
|
||||
lwx s1, s1(a0) // below_ptr = input_data[inrow+2]
|
||||
lh v0, 0(s2)
|
||||
lh v1, 0(t9)
|
||||
lh t0, 0(s0)
|
||||
lh t1, 0(s1)
|
||||
ins v0, v1, 16, 16
|
||||
ins t0, t1, 16, 16
|
||||
raddu.w.qb t2, v0
|
||||
raddu.w.qb s3, t0
|
||||
lbu v0, 0(s2)
|
||||
lbu v1, 2(s2)
|
||||
lbu t0, 0(t9)
|
||||
lbu t1, 2(t9)
|
||||
addu v0, v0, v1
|
||||
mult $ac1,t2, t6
|
||||
addu t0, t0, t1
|
||||
lbu t2, 2(s0)
|
||||
addu t0, t0, v0
|
||||
lbu t3, 2(s1)
|
||||
addu s3, t0, s3
|
||||
lbu v0, 0(s0)
|
||||
lbu t0, 0(s1)
|
||||
sll s3, s3, 1
|
||||
addu v0, v0, t2
|
||||
addu t0, t0, t3
|
||||
addu t0, t0, v0
|
||||
addu s3, t0, s3
|
||||
madd $ac1,s3, t7
|
||||
extr_r.w v0, $ac1, 16
|
||||
addiu t8, t8, 1
|
||||
addiu s2, s2, 2
|
||||
addiu t9, t9, 2
|
||||
addiu s0, s0, 2
|
||||
addiu s1, s1, 2
|
||||
sb v0, -1(t8)
|
||||
addiu s4, s7, -2
|
||||
and s4, s4, 3
|
||||
addu s5, s4, t8 //end adress
|
||||
4:
|
||||
lh v0, 0(s2)
|
||||
lh v1, 0(t9)
|
||||
lh t0, 0(s0)
|
||||
lh t1, 0(s1)
|
||||
ins v0, v1, 16, 16
|
||||
ins t0, t1, 16, 16
|
||||
raddu.w.qb t2, v0
|
||||
raddu.w.qb s3, t0
|
||||
lbu v0, -1(s2)
|
||||
lbu v1, 2(s2)
|
||||
lbu t0, -1(t9)
|
||||
lbu t1, 2(t9)
|
||||
addu v0, v0, v1
|
||||
mult $ac1, t2, t6
|
||||
addu t0, t0, t1
|
||||
lbu t2, 2(s0)
|
||||
addu t0, t0, v0
|
||||
lbu t3, 2(s1)
|
||||
addu s3, t0, s3
|
||||
lbu v0, -1(s0)
|
||||
lbu t0, -1(s1)
|
||||
sll s3, s3, 1
|
||||
addu v0, v0, t2
|
||||
addu t0, t0, t3
|
||||
addu t0, t0, v0
|
||||
addu s3, t0, s3
|
||||
madd $ac1, s3, t7
|
||||
extr_r.w t2, $ac1, 16
|
||||
addiu t8, t8, 1
|
||||
addiu s2, s2, 2
|
||||
addiu t9, t9, 2
|
||||
addiu s0, s0, 2
|
||||
sb t2, -1(t8)
|
||||
bne s5, t8, 4b
|
||||
addiu s1, s1, 2
|
||||
addiu s5, s7, -2
|
||||
subu s5, s5, s4
|
||||
addu s5, s5, t8 //end adress
|
||||
5:
|
||||
lh v0, 0(s2)
|
||||
lh v1, 0(t9)
|
||||
lh t0, 0(s0)
|
||||
lh t1, 0(s1)
|
||||
ins v0, v1, 16, 16
|
||||
ins t0, t1, 16, 16
|
||||
raddu.w.qb t2, v0
|
||||
raddu.w.qb s3, t0
|
||||
lbu v0, -1(s2)
|
||||
lbu v1, 2(s2)
|
||||
lbu t0, -1(t9)
|
||||
lbu t1, 2(t9)
|
||||
addu v0, v0, v1
|
||||
mult $ac1, t2, t6
|
||||
addu t0, t0, t1
|
||||
lbu t2, 2(s0)
|
||||
addu t0, t0, v0
|
||||
lbu t3, 2(s1)
|
||||
addu s3, t0, s3
|
||||
lbu v0, -1(s0)
|
||||
lbu t0, -1(s1)
|
||||
sll s3, s3, 1
|
||||
addu v0, v0, t2
|
||||
addu t0, t0, t3
|
||||
lh v1, 2(t9)
|
||||
addu t0, t0, v0
|
||||
lh v0, 2(s2)
|
||||
addu s3, t0, s3
|
||||
lh t0, 2(s0)
|
||||
lh t1, 2(s1)
|
||||
madd $ac1, s3, t7
|
||||
extr_r.w t2, $ac1, 16
|
||||
ins t0, t1, 16, 16
|
||||
ins v0, v1, 16, 16
|
||||
raddu.w.qb s3, t0
|
||||
lbu v1, 4(s2)
|
||||
lbu t0, 1(t9)
|
||||
lbu t1, 4(t9)
|
||||
sb t2, 0(t8)
|
||||
raddu.w.qb t3, v0
|
||||
lbu v0, 1(s2)
|
||||
addu t0, t0, t1
|
||||
mult $ac1, t3, t6
|
||||
addu v0, v0, v1
|
||||
lbu t2, 4(s0)
|
||||
addu t0, t0, v0
|
||||
lbu v0, 1(s0)
|
||||
addu s3, t0, s3
|
||||
lbu t0, 1(s1)
|
||||
lbu t3, 4(s1)
|
||||
addu v0, v0, t2
|
||||
sll s3, s3, 1
|
||||
addu t0, t0, t3
|
||||
lh v1, 4(t9)
|
||||
addu t0, t0, v0
|
||||
lh v0, 4(s2)
|
||||
addu s3, t0, s3
|
||||
lh t0, 4(s0)
|
||||
lh t1, 4(s1)
|
||||
madd $ac1, s3, t7
|
||||
extr_r.w t2, $ac1, 16
|
||||
ins t0, t1, 16, 16
|
||||
ins v0, v1, 16, 16
|
||||
raddu.w.qb s3, t0
|
||||
lbu v1, 6(s2)
|
||||
lbu t0, 3(t9)
|
||||
lbu t1, 6(t9)
|
||||
sb t2, 1(t8)
|
||||
raddu.w.qb t3, v0
|
||||
lbu v0, 3(s2)
|
||||
addu t0, t0,t1
|
||||
mult $ac1, t3, t6
|
||||
addu v0, v0, v1
|
||||
lbu t2, 6(s0)
|
||||
addu t0, t0, v0
|
||||
lbu v0, 3(s0)
|
||||
addu s3, t0, s3
|
||||
lbu t0, 3(s1)
|
||||
lbu t3, 6(s1)
|
||||
addu v0, v0, t2
|
||||
sll s3, s3, 1
|
||||
addu t0, t0, t3
|
||||
lh v1, 6(t9)
|
||||
addu t0, t0, v0
|
||||
lh v0, 6(s2)
|
||||
addu s3, t0, s3
|
||||
lh t0, 6(s0)
|
||||
lh t1, 6(s1)
|
||||
madd $ac1, s3, t7
|
||||
extr_r.w t3, $ac1, 16
|
||||
ins t0, t1, 16, 16
|
||||
ins v0, v1, 16, 16
|
||||
raddu.w.qb s3, t0
|
||||
lbu v1, 8(s2)
|
||||
lbu t0, 5(t9)
|
||||
lbu t1, 8(t9)
|
||||
sb t3, 2(t8)
|
||||
raddu.w.qb t2, v0
|
||||
lbu v0, 5(s2)
|
||||
addu t0, t0, t1
|
||||
mult $ac1, t2, t6
|
||||
addu v0, v0, v1
|
||||
lbu t2, 8(s0)
|
||||
addu t0, t0, v0
|
||||
lbu v0, 5(s0)
|
||||
addu s3, t0, s3
|
||||
lbu t0, 5(s1)
|
||||
lbu t3, 8(s1)
|
||||
addu v0, v0, t2
|
||||
sll s3, s3, 1
|
||||
addu t0, t0, t3
|
||||
addiu t8, t8, 4
|
||||
addu t0, t0, v0
|
||||
addiu s2, s2, 8
|
||||
addu s3, t0, s3
|
||||
addiu t9, t9, 8
|
||||
madd $ac1, s3, t7
|
||||
extr_r.w t1, $ac1, 16
|
||||
addiu s0, s0, 8
|
||||
addiu s1, s1, 8
|
||||
bne s5, t8, 5b
|
||||
sb t1, -1(t8)
|
||||
/* Special case for last column */
|
||||
lh v0, 0(s2)
|
||||
lh v1, 0(t9)
|
||||
lh t0, 0(s0)
|
||||
lh t1, 0(s1)
|
||||
ins v0, v1, 16, 16
|
||||
ins t0, t1, 16, 16
|
||||
raddu.w.qb t2, v0
|
||||
raddu.w.qb s3, t0
|
||||
lbu v0, -1(s2)
|
||||
lbu v1, 1(s2)
|
||||
lbu t0, -1(t9)
|
||||
lbu t1, 1(t9)
|
||||
addu v0, v0, v1
|
||||
mult $ac1, t2, t6
|
||||
addu t0, t0, t1
|
||||
lbu t2, 1(s0)
|
||||
addu t0, t0, v0
|
||||
lbu t3, 1(s1)
|
||||
addu s3, t0, s3
|
||||
lbu v0, -1(s0)
|
||||
lbu t0, -1(s1)
|
||||
sll s3, s3, 1
|
||||
addu v0, v0, t2
|
||||
addu t0, t0, t3
|
||||
addu t0, t0, v0
|
||||
addu s3, t0, s3
|
||||
madd $ac1, s3, t7
|
||||
extr_r.w t0, $ac1, 16
|
||||
addiu t5, t5, 2
|
||||
sb t0, 0(t8)
|
||||
addiu t4, t4, 1
|
||||
bne t4, a2, 3b
|
||||
addiu t5, t5, 2
|
||||
|
||||
RESTORE_REGS_FROM_STACK 32, s0, s1, s2, s3, s4, s5, s6, s7
|
||||
|
||||
j ra
|
||||
nop
|
||||
|
||||
END(jsimd_h2v2_downsample_mips_dspr2)
|
||||
/*****************************************************************************/
|
||||
LEAF_MIPS_DSPR2(jsimd_h2v1_upsample_mips_dspr2)
|
||||
/*
|
||||
* a0 - cinfo->max_v_samp_factor
|
||||
|
||||
Reference in New Issue
Block a user