SIMD-accelerated int upsample routine for MIPS DSPr2
This commit is contained in:
@@ -6,6 +6,7 @@
|
||||
* libjpeg-turbo Modifications:
|
||||
* Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
|
||||
* Copyright (C) 2009-2012, D. R. Commander.
|
||||
* Copyright (C) 2014, MIPS Technologies, Inc., California
|
||||
* For conditions of distribution and use, see the accompanying README file.
|
||||
*
|
||||
* This file contains input colorspace conversion routines.
|
||||
|
||||
@@ -5,6 +5,7 @@
|
||||
* Copyright (C) 1991-1996, Thomas G. Lane.
|
||||
* libjpeg-turbo Modifications:
|
||||
* Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
|
||||
* Copyright (C) 2014, MIPS Technologies, Inc., California
|
||||
* For conditions of distribution and use, see the accompanying README file.
|
||||
*
|
||||
* This file contains downsampling routines.
|
||||
|
||||
@@ -7,6 +7,7 @@
|
||||
* libjpeg-turbo Modifications:
|
||||
* Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
|
||||
* Copyright (C) 2010, D. R. Commander.
|
||||
* Copyright (C) 2013, MIPS Technologies, Inc., California
|
||||
* For conditions of distribution and use, see the accompanying README file.
|
||||
*
|
||||
* This file contains the inverse-DCT management logic.
|
||||
|
||||
@@ -6,6 +6,7 @@
|
||||
* libjpeg-turbo Modifications:
|
||||
* Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
|
||||
* Copyright (C) 2010, D. R. Commander.
|
||||
* Copyright (C) 2014, MIPS Technologies, Inc., California
|
||||
* For conditions of distribution and use, see the accompanying README file.
|
||||
*
|
||||
* This file contains upsampling routines.
|
||||
@@ -482,7 +483,12 @@ jinit_upsampler (j_decompress_ptr cinfo)
|
||||
} else if ((h_out_group % h_in_group) == 0 &&
|
||||
(v_out_group % v_in_group) == 0) {
|
||||
/* Generic integral-factors upsampling method */
|
||||
upsample->methods[ci] = int_upsample;
|
||||
#if defined(__mips__)
|
||||
if (jsimd_can_int_upsample())
|
||||
upsample->methods[ci] = jsimd_int_upsample;
|
||||
else
|
||||
#endif
|
||||
upsample->methods[ci] = int_upsample;
|
||||
upsample->h_expand[ci] = (UINT8) (h_out_group / h_in_group);
|
||||
upsample->v_expand[ci] = (UINT8) (v_out_group / v_in_group);
|
||||
} else
|
||||
|
||||
4
jsimd.h
4
jsimd.h
@@ -47,6 +47,7 @@ EXTERN(void) jsimd_h2v1_downsample
|
||||
|
||||
EXTERN(int) jsimd_can_h2v2_upsample (void);
|
||||
EXTERN(int) jsimd_can_h2v1_upsample (void);
|
||||
EXTERN(int) jsimd_can_int_upsample (void);
|
||||
|
||||
EXTERN(void) jsimd_h2v2_upsample
|
||||
(j_decompress_ptr cinfo, jpeg_component_info * compptr,
|
||||
@@ -54,6 +55,9 @@ EXTERN(void) jsimd_h2v2_upsample
|
||||
EXTERN(void) jsimd_h2v1_upsample
|
||||
(j_decompress_ptr cinfo, jpeg_component_info * compptr,
|
||||
JSAMPARRAY input_data, JSAMPARRAY * output_data_ptr);
|
||||
EXTERN(void) jsimd_int_upsample
|
||||
(j_decompress_ptr cinfo, jpeg_component_info * compptr,
|
||||
JSAMPARRAY input_data, JSAMPARRAY * output_data_ptr);
|
||||
|
||||
EXTERN(int) jsimd_can_h2v2_fancy_upsample (void);
|
||||
EXTERN(int) jsimd_can_h2v1_fancy_upsample (void);
|
||||
|
||||
12
jsimd_none.c
12
jsimd_none.c
@@ -118,6 +118,18 @@ jsimd_can_h2v1_upsample (void)
|
||||
return 0;
|
||||
}
|
||||
|
||||
GLOBAL(int)
|
||||
jsimd_can_int_upsample (void)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
GLOBAL(void)
|
||||
jsimd_int_upsample (j_decompress_ptr cinfo, jpeg_component_info * compptr,
|
||||
JSAMPARRAY input_data, JSAMPARRAY * output_data_ptr)
|
||||
{
|
||||
}
|
||||
|
||||
GLOBAL(void)
|
||||
jsimd_h2v2_upsample (j_decompress_ptr cinfo,
|
||||
jpeg_component_info * compptr,
|
||||
|
||||
@@ -339,6 +339,12 @@ EXTERN(void) jsimd_h2v2_upsample_mips_dspr2
|
||||
(int max_v_samp_factor, JDIMENSION output_width, JSAMPARRAY input_data,
|
||||
JSAMPARRAY * output_data_ptr);
|
||||
|
||||
EXTERN(void) jsimd_int_upsample_mips_dspr2
|
||||
(UINT8 h_expand, UINT8 v_expand, JSAMPARRAY input_data,
|
||||
JSAMPARRAY * output_data_ptr, JDIMENSION output_width,
|
||||
int max_v_samp_factor);
|
||||
|
||||
|
||||
/* Fancy Upsampling */
|
||||
EXTERN(void) jsimd_h2v1_fancy_upsample_mmx
|
||||
(int max_v_samp_factor, JDIMENSION downsampled_width,
|
||||
|
||||
@@ -85,6 +85,25 @@ static const int mips_idct_ifast_coefs[4] = {
|
||||
0xAC60AC60 // FIX(-2.613125930 / 4) = -21407 = 0xAC61
|
||||
};
|
||||
|
||||
/* The following struct is borrowed from jdsample.c */
|
||||
typedef void (*upsample1_ptr) (j_decompress_ptr cinfo,
|
||||
jpeg_component_info * compptr,
|
||||
JSAMPARRAY input_data,
|
||||
JSAMPARRAY * output_data_ptr);
|
||||
|
||||
typedef struct {
|
||||
struct jpeg_upsampler pub;
|
||||
JSAMPARRAY color_buf[MAX_COMPONENTS];
|
||||
upsample1_ptr methods[MAX_COMPONENTS];
|
||||
int next_row_out;
|
||||
JDIMENSION rows_to_go;
|
||||
int rowgroup_height[MAX_COMPONENTS];
|
||||
UINT8 h_expand[MAX_COMPONENTS];
|
||||
UINT8 v_expand[MAX_COMPONENTS];
|
||||
} my_upsampler;
|
||||
|
||||
typedef my_upsampler * my_upsample_ptr;
|
||||
|
||||
GLOBAL(int)
|
||||
jsimd_can_rgb_ycc (void)
|
||||
{
|
||||
@@ -415,6 +434,23 @@ jsimd_can_h2v1_upsample (void)
|
||||
return 0;
|
||||
}
|
||||
|
||||
GLOBAL(int)
|
||||
jsimd_can_int_upsample (void)
|
||||
{
|
||||
init_simd();
|
||||
|
||||
/* The code is optimised for these values only */
|
||||
if (BITS_IN_JSAMPLE != 8)
|
||||
return 0;
|
||||
if (sizeof(JDIMENSION) != 4)
|
||||
return 0;
|
||||
|
||||
if (simd_support & JSIMD_MIPS_DSPR2)
|
||||
return 1;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
GLOBAL(void)
|
||||
jsimd_h2v2_upsample (j_decompress_ptr cinfo,
|
||||
jpeg_component_info * compptr,
|
||||
@@ -439,6 +475,19 @@ jsimd_h2v1_upsample (j_decompress_ptr cinfo,
|
||||
output_data_ptr);
|
||||
}
|
||||
|
||||
GLOBAL(void)
|
||||
jsimd_int_upsample (j_decompress_ptr cinfo, jpeg_component_info * compptr,
|
||||
JSAMPARRAY input_data, JSAMPARRAY * output_data_ptr)
|
||||
{
|
||||
my_upsample_ptr upsample = (my_upsample_ptr) cinfo->upsample;
|
||||
|
||||
jsimd_int_upsample_mips_dspr2(upsample->h_expand[compptr->component_index],
|
||||
upsample->v_expand[compptr->component_index],
|
||||
input_data, output_data_ptr,
|
||||
cinfo->output_width,
|
||||
cinfo->max_v_samp_factor);
|
||||
}
|
||||
|
||||
GLOBAL(int)
|
||||
jsimd_can_h2v2_fancy_upsample (void)
|
||||
{
|
||||
|
||||
@@ -1613,6 +1613,94 @@ LEAF_MIPS_DSPR2(jsimd_h2v2_smooth_downsample_mips_dspr2)
|
||||
nop
|
||||
|
||||
END(jsimd_h2v2_smooth_downsample_mips_dspr2)
|
||||
|
||||
/*****************************************************************************/
|
||||
LEAF_MIPS_DSPR2(jsimd_int_upsample_mips_dspr2)
|
||||
/*
|
||||
* a0 - upsample->h_expand[compptr->component_index]
|
||||
* a1 - upsample->v_expand[compptr->component_index]
|
||||
* a2 - input_data
|
||||
* a3 - output_data_ptr
|
||||
* 16(sp) - cinfo->output_width
|
||||
* 20(sp) - cinfo->max_v_samp_factor
|
||||
*/
|
||||
.set at
|
||||
|
||||
SAVE_REGS_ON_STACK 16, s0, s1, s2, s3
|
||||
|
||||
lw s0, 0(a3) // s0 = output_data
|
||||
lw s1, 32(sp) // s1 = cinfo->output_width
|
||||
lw s2, 36(sp) // s2 = cinfo->max_v_samp_factor
|
||||
li t6, 0 // t6 = inrow
|
||||
beqz s2, 10f
|
||||
li s3, 0 // s3 = outrow
|
||||
0:
|
||||
addu t0, a2, t6
|
||||
addu t7, s0, s3
|
||||
lw t3, 0(t0) // t3 = inptr
|
||||
lw t8, 0(t7) // t8 = outptr
|
||||
beqz s1, 4f
|
||||
addu t5, t8, s1 // t5 = outend
|
||||
1:
|
||||
lb t2, 0(t3) // t2 = invalue = *inptr++
|
||||
addiu t3, 1
|
||||
beqz a0, 3f
|
||||
move t0, a0 // t0 = h_expand
|
||||
2:
|
||||
sb t2, 0(t8)
|
||||
addiu t0, -1
|
||||
bgtz t0, 2b
|
||||
addiu t8, 1
|
||||
3:
|
||||
bgt t5, t8, 1b
|
||||
nop
|
||||
4:
|
||||
addiu t9, a1, -1 // t9 = v_expand - 1
|
||||
blez t9, 9f
|
||||
nop
|
||||
5:
|
||||
lw t3, 0(s0)
|
||||
lw t4, 4(s0)
|
||||
subu t0, s1, 0xF
|
||||
blez t0, 7f
|
||||
addu t5, t3, s1 // t5 = end address
|
||||
andi t7, s1, 0xF // t7 = residual
|
||||
subu t8, t5, t7
|
||||
6:
|
||||
ulw t0, 0(t3)
|
||||
ulw t1, 4(t3)
|
||||
ulw t2, 8(t3)
|
||||
usw t0, 0(t4)
|
||||
ulw t0, 12(t3)
|
||||
usw t1, 4(t4)
|
||||
usw t2, 8(t4)
|
||||
usw t0, 12(t4)
|
||||
addiu t3, 16
|
||||
bne t3, t8, 6b
|
||||
addiu t4, 16
|
||||
beqz t7, 8f
|
||||
nop
|
||||
7:
|
||||
lbu t0, 0(t3)
|
||||
sb t0, 0(t4)
|
||||
addiu t3, 1
|
||||
bne t3, t5, 7b
|
||||
addiu t4, 1
|
||||
8:
|
||||
addiu t9, -1
|
||||
bgtz t9, 5b
|
||||
addiu s0, 8
|
||||
9:
|
||||
addu s3, s3, a1
|
||||
bne s3, s2, 0b
|
||||
addiu t6, 1
|
||||
10:
|
||||
RESTORE_REGS_FROM_STACK 16, s0, s1, s2, s3
|
||||
|
||||
j ra
|
||||
nop
|
||||
END(jsimd_int_upsample_mips_dspr2)
|
||||
|
||||
/*****************************************************************************/
|
||||
LEAF_MIPS_DSPR2(jsimd_h2v1_upsample_mips_dspr2)
|
||||
/*
|
||||
|
||||
Reference in New Issue
Block a user