SIMD-accelerated int upsample routine for MIPS DSPr2

git-svn-id: svn+ssh://svn.code.sf.net/p/libjpeg-turbo/code/trunk@1315 632fc199-4ca6-4c93-a231-07263d6284db
This commit is contained in:
DRC
2014-05-18 20:04:47 +00:00
parent c728cfd8f2
commit 5ef463056a
9 changed files with 169 additions and 1 deletions

View File

@@ -6,6 +6,7 @@
* libjpeg-turbo Modifications:
* Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
* Copyright (C) 2009-2012, D. R. Commander.
* Copyright (C) 2014, MIPS Technologies, Inc., California
* For conditions of distribution and use, see the accompanying README file.
*
* This file contains input colorspace conversion routines.

View File

@@ -5,6 +5,7 @@
* Copyright (C) 1991-1996, Thomas G. Lane.
* libjpeg-turbo Modifications:
* Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
* Copyright (C) 2014, MIPS Technologies, Inc., California
* For conditions of distribution and use, see the accompanying README file.
*
* This file contains downsampling routines.

View File

@@ -7,6 +7,7 @@
* libjpeg-turbo Modifications:
* Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
* Copyright (C) 2010, D. R. Commander.
* Copyright (C) 2013, MIPS Technologies, Inc., California
* For conditions of distribution and use, see the accompanying README file.
*
* This file contains the inverse-DCT management logic.

View File

@@ -6,6 +6,7 @@
* libjpeg-turbo Modifications:
* Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
* Copyright (C) 2010, D. R. Commander.
* Copyright (C) 2014, MIPS Technologies, Inc., California
* For conditions of distribution and use, see the accompanying README file.
*
* This file contains upsampling routines.
@@ -482,7 +483,12 @@ jinit_upsampler (j_decompress_ptr cinfo)
} else if ((h_out_group % h_in_group) == 0 &&
(v_out_group % v_in_group) == 0) {
/* Generic integral-factors upsampling method */
upsample->methods[ci] = int_upsample;
#if defined(__mips__)
if (jsimd_can_int_upsample())
upsample->methods[ci] = jsimd_int_upsample;
else
#endif
upsample->methods[ci] = int_upsample;
upsample->h_expand[ci] = (UINT8) (h_out_group / h_in_group);
upsample->v_expand[ci] = (UINT8) (v_out_group / v_in_group);
} else

View File

@@ -47,6 +47,7 @@ EXTERN(void) jsimd_h2v1_downsample
EXTERN(int) jsimd_can_h2v2_upsample (void);
EXTERN(int) jsimd_can_h2v1_upsample (void);
EXTERN(int) jsimd_can_int_upsample (void);
EXTERN(void) jsimd_h2v2_upsample
(j_decompress_ptr cinfo, jpeg_component_info * compptr,
@@ -54,6 +55,9 @@ EXTERN(void) jsimd_h2v2_upsample
EXTERN(void) jsimd_h2v1_upsample
(j_decompress_ptr cinfo, jpeg_component_info * compptr,
JSAMPARRAY input_data, JSAMPARRAY * output_data_ptr);
EXTERN(void) jsimd_int_upsample
(j_decompress_ptr cinfo, jpeg_component_info * compptr,
JSAMPARRAY input_data, JSAMPARRAY * output_data_ptr);
EXTERN(int) jsimd_can_h2v2_fancy_upsample (void);
EXTERN(int) jsimd_can_h2v1_fancy_upsample (void);

View File

@@ -118,6 +118,18 @@ jsimd_can_h2v1_upsample (void)
return 0;
}
GLOBAL(int)
jsimd_can_int_upsample (void)
{
return 0;
}
GLOBAL(void)
jsimd_int_upsample (j_decompress_ptr cinfo, jpeg_component_info * compptr,
JSAMPARRAY input_data, JSAMPARRAY * output_data_ptr)
{
}
GLOBAL(void)
jsimd_h2v2_upsample (j_decompress_ptr cinfo,
jpeg_component_info * compptr,

View File

@@ -339,6 +339,12 @@ EXTERN(void) jsimd_h2v2_upsample_mips_dspr2
(int max_v_samp_factor, JDIMENSION output_width, JSAMPARRAY input_data,
JSAMPARRAY * output_data_ptr);
EXTERN(void) jsimd_int_upsample_mips_dspr2
(UINT8 h_expand, UINT8 v_expand, JSAMPARRAY input_data,
JSAMPARRAY * output_data_ptr, JDIMENSION output_width,
int max_v_samp_factor);
/* Fancy Upsampling */
EXTERN(void) jsimd_h2v1_fancy_upsample_mmx
(int max_v_samp_factor, JDIMENSION downsampled_width,

View File

@@ -85,6 +85,25 @@ static const int mips_idct_ifast_coefs[4] = {
0xAC60AC60 // FIX(-2.613125930 / 4) = -21407 = 0xAC61
};
/* The following struct is borrowed from jdsample.c */
typedef void (*upsample1_ptr) (j_decompress_ptr cinfo,
jpeg_component_info * compptr,
JSAMPARRAY input_data,
JSAMPARRAY * output_data_ptr);
typedef struct {
struct jpeg_upsampler pub;
JSAMPARRAY color_buf[MAX_COMPONENTS];
upsample1_ptr methods[MAX_COMPONENTS];
int next_row_out;
JDIMENSION rows_to_go;
int rowgroup_height[MAX_COMPONENTS];
UINT8 h_expand[MAX_COMPONENTS];
UINT8 v_expand[MAX_COMPONENTS];
} my_upsampler;
typedef my_upsampler * my_upsample_ptr;
GLOBAL(int)
jsimd_can_rgb_ycc (void)
{
@@ -415,6 +434,23 @@ jsimd_can_h2v1_upsample (void)
return 0;
}
GLOBAL(int)
jsimd_can_int_upsample (void)
{
init_simd();
/* The code is optimised for these values only */
if (BITS_IN_JSAMPLE != 8)
return 0;
if (sizeof(JDIMENSION) != 4)
return 0;
if (simd_support & JSIMD_MIPS_DSPR2)
return 1;
return 0;
}
GLOBAL(void)
jsimd_h2v2_upsample (j_decompress_ptr cinfo,
jpeg_component_info * compptr,
@@ -439,6 +475,19 @@ jsimd_h2v1_upsample (j_decompress_ptr cinfo,
output_data_ptr);
}
GLOBAL(void)
jsimd_int_upsample (j_decompress_ptr cinfo, jpeg_component_info * compptr,
JSAMPARRAY input_data, JSAMPARRAY * output_data_ptr)
{
my_upsample_ptr upsample = (my_upsample_ptr) cinfo->upsample;
jsimd_int_upsample_mips_dspr2(upsample->h_expand[compptr->component_index],
upsample->v_expand[compptr->component_index],
input_data, output_data_ptr,
cinfo->output_width,
cinfo->max_v_samp_factor);
}
GLOBAL(int)
jsimd_can_h2v2_fancy_upsample (void)
{

View File

@@ -1613,6 +1613,94 @@ LEAF_MIPS_DSPR2(jsimd_h2v2_smooth_downsample_mips_dspr2)
nop
END(jsimd_h2v2_smooth_downsample_mips_dspr2)
/*****************************************************************************/
LEAF_MIPS_DSPR2(jsimd_int_upsample_mips_dspr2)
/*
* a0 - upsample->h_expand[compptr->component_index]
* a1 - upsample->v_expand[compptr->component_index]
* a2 - input_data
* a3 - output_data_ptr
* 16(sp) - cinfo->output_width
* 20(sp) - cinfo->max_v_samp_factor
*/
.set at
SAVE_REGS_ON_STACK 16, s0, s1, s2, s3
lw s0, 0(a3) // s0 = output_data
lw s1, 32(sp) // s1 = cinfo->output_width
lw s2, 36(sp) // s2 = cinfo->max_v_samp_factor
li t6, 0 // t6 = inrow
beqz s2, 10f
li s3, 0 // s3 = outrow
0:
addu t0, a2, t6
addu t7, s0, s3
lw t3, 0(t0) // t3 = inptr
lw t8, 0(t7) // t8 = outptr
beqz s1, 4f
addu t5, t8, s1 // t5 = outend
1:
lb t2, 0(t3) // t2 = invalue = *inptr++
addiu t3, 1
beqz a0, 3f
move t0, a0 // t0 = h_expand
2:
sb t2, 0(t8)
addiu t0, -1
bgtz t0, 2b
addiu t8, 1
3:
bgt t5, t8, 1b
nop
4:
addiu t9, a1, -1 // t9 = v_expand - 1
blez t9, 9f
nop
5:
lw t3, 0(s0)
lw t4, 4(s0)
subu t0, s1, 0xF
blez t0, 7f
addu t5, t3, s1 // t5 = end address
andi t7, s1, 0xF // t7 = residual
subu t8, t5, t7
6:
ulw t0, 0(t3)
ulw t1, 4(t3)
ulw t2, 8(t3)
usw t0, 0(t4)
ulw t0, 12(t3)
usw t1, 4(t4)
usw t2, 8(t4)
usw t0, 12(t4)
addiu t3, 16
bne t3, t8, 6b
addiu t4, 16
beqz t7, 8f
nop
7:
lbu t0, 0(t3)
sb t0, 0(t4)
addiu t3, 1
bne t3, t5, 7b
addiu t4, 1
8:
addiu t9, -1
bgtz t9, 5b
addiu s0, 8
9:
addu s3, s3, a1
bne s3, s2, 0b
addiu t6, 1
10:
RESTORE_REGS_FROM_STACK 16, s0, s1, s2, s3
j ra
nop
END(jsimd_int_upsample_mips_dspr2)
/*****************************************************************************/
LEAF_MIPS_DSPR2(jsimd_h2v1_upsample_mips_dspr2)
/*