SIMD-accelerated merged upsampling routines for MIPS DSPr2
git-svn-id: svn+ssh://svn.code.sf.net/p/libjpeg-turbo/code/trunk@1297 632fc199-4ca6-4c93-a231-07263d6284db
This commit is contained in:
57
simd/jsimd.h
57
simd/jsimd.h
@@ -631,6 +631,63 @@ EXTERN(void) jsimd_h2v2_fancy_upsample_mips_dspr2
|
||||
JPP((int max_v_samp_factor, JDIMENSION downsampled_width,
|
||||
JSAMPARRAY input_data, JSAMPARRAY * output_data_ptr));
|
||||
|
||||
EXTERN(void) jsimd_h2v2_merged_upsample_mips_dspr2
|
||||
JPP((JDIMENSION output_width, JSAMPIMAGE input_buf,
|
||||
JDIMENSION in_row_group_ctr, JSAMPARRAY output_buf,
|
||||
JSAMPLE* range));
|
||||
EXTERN(void) jsimd_h2v2_extrgb_merged_upsample_mips_dspr2
|
||||
JPP((JDIMENSION output_width, JSAMPIMAGE input_buf,
|
||||
JDIMENSION in_row_group_ctr, JSAMPARRAY output_buf,
|
||||
JSAMPLE* range));
|
||||
EXTERN(void) jsimd_h2v2_extrgbx_merged_upsample_mips_dspr2
|
||||
JPP((JDIMENSION output_width, JSAMPIMAGE input_buf,
|
||||
JDIMENSION in_row_group_ctr, JSAMPARRAY output_buf,
|
||||
JSAMPLE* range));
|
||||
EXTERN(void) jsimd_h2v2_extbgr_merged_upsample_mips_dspr2
|
||||
JPP((JDIMENSION output_width, JSAMPIMAGE input_buf,
|
||||
JDIMENSION in_row_group_ctr, JSAMPARRAY output_buf,
|
||||
JSAMPLE* range));
|
||||
EXTERN(void) jsimd_h2v2_extbgrx_merged_upsample_mips_dspr2
|
||||
JPP((JDIMENSION output_width, JSAMPIMAGE input_buf,
|
||||
JDIMENSION in_row_group_ctr, JSAMPARRAY output_buf,
|
||||
JSAMPLE* range));
|
||||
EXTERN(void) jsimd_h2v2_extxbgr_merged_upsample_mips_dspr2
|
||||
JPP((JDIMENSION output_width, JSAMPIMAGE input_buf,
|
||||
JDIMENSION in_row_group_ctr, JSAMPARRAY output_buf,
|
||||
JSAMPLE* range));
|
||||
EXTERN(void) jsimd_h2v2_extxrgb_merged_upsample_mips_dspr2
|
||||
JPP((JDIMENSION output_width, JSAMPIMAGE input_buf,
|
||||
JDIMENSION in_row_group_ctr, JSAMPARRAY output_buf,
|
||||
JSAMPLE* range));
|
||||
EXTERN(void) jsimd_h2v1_merged_upsample_mips_dspr2
|
||||
JPP((JDIMENSION output_width, JSAMPIMAGE input_buf,
|
||||
JDIMENSION in_row_group_ctr, JSAMPARRAY output_buf,
|
||||
JSAMPLE* range));
|
||||
EXTERN(void) jsimd_h2v1_extrgb_merged_upsample_mips_dspr2
|
||||
JPP((JDIMENSION output_width, JSAMPIMAGE input_buf,
|
||||
JDIMENSION in_row_group_ctr, JSAMPARRAY output_buf,
|
||||
JSAMPLE* range));
|
||||
EXTERN(void) jsimd_h2v1_extrgbx_merged_upsample_mips_dspr2
|
||||
JPP((JDIMENSION output_width, JSAMPIMAGE input_buf,
|
||||
JDIMENSION in_row_group_ctr, JSAMPARRAY output_buf,
|
||||
JSAMPLE* range));
|
||||
EXTERN(void) jsimd_h2v1_extbgr_merged_upsample_mips_dspr2
|
||||
JPP((JDIMENSION output_width, JSAMPIMAGE input_buf,
|
||||
JDIMENSION in_row_group_ctr, JSAMPARRAY output_buf,
|
||||
JSAMPLE* range));
|
||||
EXTERN(void) jsimd_h2v1_extbgrx_merged_upsample_mips_dspr2
|
||||
JPP((JDIMENSION output_width, JSAMPIMAGE input_buf,
|
||||
JDIMENSION in_row_group_ctr, JSAMPARRAY output_buf,
|
||||
JSAMPLE* range));
|
||||
EXTERN(void) jsimd_h2v1_extxbgr_merged_upsample_mips_dspr2
|
||||
JPP((JDIMENSION output_width, JSAMPIMAGE input_buf,
|
||||
JDIMENSION in_row_group_ctr, JSAMPARRAY output_buf,
|
||||
JSAMPLE* range));
|
||||
EXTERN(void) jsimd_h2v1_extxrgb_merged_upsample_mips_dspr2
|
||||
JPP((JDIMENSION output_width, JSAMPIMAGE input_buf,
|
||||
JDIMENSION in_row_group_ctr, JSAMPARRAY output_buf,
|
||||
JSAMPLE* range));
|
||||
|
||||
EXTERN(void) jsimd_h2v2_upsample_mips_dspr2
|
||||
JPP((int max_v_samp_factor, JDIMENSION output_width,
|
||||
JSAMPARRAY input_data, JSAMPARRAY * output_data_ptr));
|
||||
|
||||
@@ -425,12 +425,28 @@ jsimd_h2v1_fancy_upsample (j_decompress_ptr cinfo,
|
||||
GLOBAL(int)
|
||||
jsimd_can_h2v2_merged_upsample (void)
|
||||
{
|
||||
if (BITS_IN_JSAMPLE != 8)
|
||||
return 0;
|
||||
if (sizeof(JDIMENSION) != 4)
|
||||
return 0;
|
||||
|
||||
if (simd_support & JSIMD_MIPS_DSPR2)
|
||||
return 1;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
GLOBAL(int)
|
||||
jsimd_can_h2v1_merged_upsample (void)
|
||||
{
|
||||
if (BITS_IN_JSAMPLE != 8)
|
||||
return 0;
|
||||
if (sizeof(JDIMENSION) != 4)
|
||||
return 0;
|
||||
|
||||
if (simd_support & JSIMD_MIPS_DSPR2)
|
||||
return 1;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
@@ -440,6 +456,39 @@ jsimd_h2v2_merged_upsample (j_decompress_ptr cinfo,
|
||||
JDIMENSION in_row_group_ctr,
|
||||
JSAMPARRAY output_buf)
|
||||
{
|
||||
void (*mipsdspr2fct)(JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY, JSAMPLE *);
|
||||
|
||||
switch(cinfo->out_color_space)
|
||||
{
|
||||
case JCS_EXT_RGB:
|
||||
mipsdspr2fct=jsimd_h2v2_extrgb_merged_upsample_mips_dspr2;
|
||||
break;
|
||||
case JCS_EXT_RGBX:
|
||||
case JCS_EXT_RGBA:
|
||||
mipsdspr2fct=jsimd_h2v2_extrgbx_merged_upsample_mips_dspr2;
|
||||
break;
|
||||
case JCS_EXT_BGR:
|
||||
mipsdspr2fct=jsimd_h2v2_extbgr_merged_upsample_mips_dspr2;
|
||||
break;
|
||||
case JCS_EXT_BGRX:
|
||||
case JCS_EXT_BGRA:
|
||||
mipsdspr2fct=jsimd_h2v2_extbgrx_merged_upsample_mips_dspr2;
|
||||
break;
|
||||
case JCS_EXT_XBGR:
|
||||
case JCS_EXT_ABGR:
|
||||
mipsdspr2fct=jsimd_h2v2_extxbgr_merged_upsample_mips_dspr2;
|
||||
break;
|
||||
case JCS_EXT_XRGB:
|
||||
case JCS_EXT_ARGB:
|
||||
mipsdspr2fct=jsimd_h2v2_extxrgb_merged_upsample_mips_dspr2;
|
||||
break;
|
||||
default:
|
||||
mipsdspr2fct=jsimd_h2v2_extrgb_merged_upsample_mips_dspr2;
|
||||
break;
|
||||
}
|
||||
|
||||
mipsdspr2fct(cinfo->output_width, input_buf, in_row_group_ctr,
|
||||
output_buf, cinfo->sample_range_limit);
|
||||
}
|
||||
|
||||
GLOBAL(void)
|
||||
@@ -448,6 +497,39 @@ jsimd_h2v1_merged_upsample (j_decompress_ptr cinfo,
|
||||
JDIMENSION in_row_group_ctr,
|
||||
JSAMPARRAY output_buf)
|
||||
{
|
||||
void (*mipsdspr2fct)(JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY, JSAMPLE *);
|
||||
|
||||
switch(cinfo->out_color_space)
|
||||
{
|
||||
case JCS_EXT_RGB:
|
||||
mipsdspr2fct=jsimd_h2v1_extrgb_merged_upsample_mips_dspr2;
|
||||
break;
|
||||
case JCS_EXT_RGBX:
|
||||
case JCS_EXT_RGBA:
|
||||
mipsdspr2fct=jsimd_h2v1_extrgbx_merged_upsample_mips_dspr2;
|
||||
break;
|
||||
case JCS_EXT_BGR:
|
||||
mipsdspr2fct=jsimd_h2v1_extbgr_merged_upsample_mips_dspr2;
|
||||
break;
|
||||
case JCS_EXT_BGRX:
|
||||
case JCS_EXT_BGRA:
|
||||
mipsdspr2fct=jsimd_h2v1_extbgrx_merged_upsample_mips_dspr2;
|
||||
break;
|
||||
case JCS_EXT_XBGR:
|
||||
case JCS_EXT_ABGR:
|
||||
mipsdspr2fct=jsimd_h2v1_extxbgr_merged_upsample_mips_dspr2;
|
||||
break;
|
||||
case JCS_EXT_XRGB:
|
||||
case JCS_EXT_ARGB:
|
||||
mipsdspr2fct=jsimd_h2v1_extxrgb_merged_upsample_mips_dspr2;
|
||||
break;
|
||||
default:
|
||||
mipsdspr2fct=jsimd_h2v1_extrgb_merged_upsample_mips_dspr2;
|
||||
break;
|
||||
}
|
||||
|
||||
mipsdspr2fct(cinfo->output_width, input_buf, in_row_group_ctr,
|
||||
output_buf, cinfo->sample_range_limit);
|
||||
}
|
||||
|
||||
GLOBAL(int)
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
/*
|
||||
* MIPS DSPr2 optimizations for libjpeg-turbo
|
||||
*
|
||||
* Copyright (C) 2013, MIPS Technologies, Inc., California.
|
||||
* Copyright (C) 2013-2014, MIPS Technologies, Inc., California.
|
||||
* All rights reserved.
|
||||
* Authors: Teodora Novkovic (teodora.novkovic@imgtec.com)
|
||||
* Darko Laus (darko.laus@imgtec.com)
|
||||
@@ -376,6 +376,393 @@ GENERATE_JSIMD_RGB_GRAY_CONVERT_MIPS_DSPR2 extbgrx, 4, 2, 1, 0
|
||||
GENERATE_JSIMD_RGB_GRAY_CONVERT_MIPS_DSPR2 extxbgr, 4, 3, 2, 1
|
||||
GENERATE_JSIMD_RGB_GRAY_CONVERT_MIPS_DSPR2 extxrgb, 4, 1, 2, 3
|
||||
/*****************************************************************************/
|
||||
/*
|
||||
* jsimd_h2v2_merged_upsample_mips_dspr2
|
||||
* jsimd_h2v2_extrgb_merged_upsample_mips_dspr2
|
||||
* jsimd_h2v2_extrgbx_merged_upsample_mips_dspr2
|
||||
* jsimd_h2v2_extbgr_merged_upsample_mips_dspr2
|
||||
* jsimd_h2v2_extbgrx_merged_upsample_mips_dspr2
|
||||
* jsimd_h2v2_extxbgr_merged_upsample_mips_dspr2
|
||||
* jsimd_h2v2_extxrgb_merged_upsample_mips_dspr2
|
||||
*
|
||||
* Merged h2v2 upsample routines
|
||||
*/
|
||||
.macro GENERATE_H2V2_MERGED_UPSAMPLE_MIPS_DSPR2 colorid, \
|
||||
pixel_size, \
|
||||
r1_offs, \
|
||||
g1_offs, \
|
||||
b1_offs, \
|
||||
a1_offs, \
|
||||
r2_offs, \
|
||||
g2_offs, \
|
||||
b2_offs, \
|
||||
a2_offs
|
||||
|
||||
.macro STORE_H2V2_2_PIXELS scratch0 \
|
||||
scratch1 \
|
||||
scratch2 \
|
||||
scratch3 \
|
||||
scratch4 \
|
||||
scratch5 \
|
||||
outptr
|
||||
sb \scratch0, \r1_offs(\outptr)
|
||||
sb \scratch1, \g1_offs(\outptr)
|
||||
sb \scratch2, \b1_offs(\outptr)
|
||||
sb \scratch3, \r2_offs(\outptr)
|
||||
sb \scratch4, \g2_offs(\outptr)
|
||||
sb \scratch5, \b2_offs(\outptr)
|
||||
.if (\pixel_size == 8)
|
||||
li \scratch0, 0xFF
|
||||
sb \scratch0, \a1_offs(\outptr)
|
||||
sb \scratch0, \a2_offs(\outptr)
|
||||
.endif
|
||||
addiu \outptr, \pixel_size
|
||||
.endm
|
||||
|
||||
.macro STORE_H2V2_1_PIXEL scratch0 \
|
||||
scratch1 \
|
||||
scratch2 \
|
||||
outptr
|
||||
sb \scratch0, \r1_offs(\outptr)
|
||||
sb \scratch1, \g1_offs(\outptr)
|
||||
sb \scratch2, \b1_offs(\outptr)
|
||||
|
||||
.if (\pixel_size == 8)
|
||||
li t0, 0xFF
|
||||
sb t0, \a1_offs(\outptr)
|
||||
.endif
|
||||
.endm
|
||||
|
||||
LEAF_MIPS_DSPR2(jsimd_h2v2_\colorid\()_merged_upsample_mips_dspr2)
|
||||
/*
|
||||
* a0 - cinfo->output_width
|
||||
* a1 - input_buf
|
||||
* a2 - in_row_group_ctr
|
||||
* a3 - output_buf
|
||||
* 16(sp) - cinfo->sample_range_limit
|
||||
*/
|
||||
|
||||
SAVE_REGS_ON_STACK 40, s0, s1, s2, s3, s4, s5, s6, s7, ra
|
||||
|
||||
lw t9, 56(sp) // cinfo->sample_range_limit
|
||||
lw v0, 0(a1)
|
||||
lw v1, 4(a1)
|
||||
lw t0, 8(a1)
|
||||
sll t1, a2, 3
|
||||
addiu t2, t1, 4
|
||||
sll t3, a2, 2
|
||||
lw t4, 0(a3) // t4 = output_buf[0]
|
||||
lwx t1, t1(v0) // t1 = input_buf[0][in_row_group_ctr*2]
|
||||
lwx t2, t2(v0) // t2 = input_buf[0][in_row_group_ctr*2 + 1]
|
||||
lwx t5, t3(v1) // t5 = input_buf[1][in_row_group_ctr]
|
||||
lwx t6, t3(t0) // t6 = input_buf[2][in_row_group_ctr]
|
||||
lw t7, 4(a3) // t7 = output_buf[1]
|
||||
li s1, 0xe6ea
|
||||
addiu t8, s1, 0x7fff // t8 = 0x166e9 [FIX(1.40200)]
|
||||
addiu s0, t8, 0x5eb9 // s0 = 0x1c5a2 [FIX(1.77200)]
|
||||
addiu s1, zero, 0xa7e6 // s4 = 0xffffa7e6 [-FIX(0.34414)]
|
||||
xori s2, s1, 0xeec8 // s3 = 0xffff492e [-FIX(0.71414)]
|
||||
srl t3, a0, 1
|
||||
blez t3, 2f
|
||||
addu t0, t5, t3 // t0 = end address
|
||||
1:
|
||||
lbu t3, 0(t5)
|
||||
lbu s3, 0(t6)
|
||||
addiu t5, t5, 1
|
||||
addiu t3, t3, -128 // (cb - 128)
|
||||
addiu s3, s3, -128 // (cr - 128)
|
||||
mult $ac1, s1, t3
|
||||
madd $ac1, s2, s3
|
||||
sll s3, s3, 15
|
||||
sll t3, t3, 15
|
||||
mulq_rs.w s4, t8, s3 // s4 = (C1 * cr + ONE_HALF)>> SCALEBITS
|
||||
extr_r.w s5, $ac1, 16
|
||||
mulq_rs.w s6, s0, t3 // s6 = (C2 * cb + ONE_HALF)>> SCALEBITS
|
||||
lbu v0, 0(t1)
|
||||
addiu t6, t6, 1
|
||||
addiu t1, t1, 2
|
||||
addu t3, v0, s4 // y+cred
|
||||
addu s3, v0, s5 // y+cgreen
|
||||
addu v1, v0, s6 // y+cblue
|
||||
addu t3, t9, t3 // y+cred
|
||||
addu s3, t9, s3 // y+cgreen
|
||||
addu v1, t9, v1 // y+cblue
|
||||
lbu AT, 0(t3)
|
||||
lbu s7, 0(s3)
|
||||
lbu ra, 0(v1)
|
||||
lbu v0, -1(t1)
|
||||
addu t3, v0, s4 // y+cred
|
||||
addu s3, v0, s5 // y+cgreen
|
||||
addu v1, v0, s6 // y+cblue
|
||||
addu t3, t9, t3 // y+cred
|
||||
addu s3, t9, s3 // y+cgreen
|
||||
addu v1, t9, v1 // y+cblue
|
||||
lbu t3, 0(t3)
|
||||
lbu s3, 0(s3)
|
||||
lbu v1, 0(v1)
|
||||
lbu v0, 0(t2)
|
||||
|
||||
STORE_H2V2_2_PIXELS AT, s7, ra, t3, s3, v1, t4
|
||||
|
||||
addu t3, v0, s4 // y+cred
|
||||
addu s3, v0, s5 // y+cgreen
|
||||
addu v1, v0, s6 // y+cblue
|
||||
addu t3, t9, t3 // y+cred
|
||||
addu s3, t9, s3 // y+cgreen
|
||||
addu v1, t9, v1 // y+cblue
|
||||
lbu AT, 0(t3)
|
||||
lbu s7, 0(s3)
|
||||
lbu ra, 0(v1)
|
||||
lbu v0, 1(t2)
|
||||
addiu t2, t2, 2
|
||||
addu t3, v0, s4 // y+cred
|
||||
addu s3, v0, s5 // y+cgreen
|
||||
addu v1, v0, s6 // y+cblue
|
||||
addu t3, t9, t3 // y+cred
|
||||
addu s3, t9, s3 // y+cgreen
|
||||
addu v1, t9, v1 // y+cblue
|
||||
lbu t3, 0(t3)
|
||||
lbu s3, 0(s3)
|
||||
lbu v1, 0(v1)
|
||||
|
||||
STORE_H2V2_2_PIXELS AT, s7, ra, t3, s3, v1, t7
|
||||
|
||||
bne t0, t5, 1b
|
||||
nop
|
||||
2:
|
||||
andi t0, a0, 1
|
||||
beqz t0, 4f
|
||||
lbu t3, 0(t5)
|
||||
lbu s3, 0(t6)
|
||||
addiu t3, t3, -128 // (cb - 128)
|
||||
addiu s3, s3, -128 // (cr - 128)
|
||||
mult $ac1, s1, t3
|
||||
madd $ac1, s2, s3
|
||||
sll s3, s3, 15
|
||||
sll t3, t3, 15
|
||||
lbu v0, 0(t1)
|
||||
extr_r.w s5, $ac1, 16
|
||||
mulq_rs.w s4, t8, s3 // s4 = (C1 * cr + ONE_HALF)>> SCALEBITS
|
||||
mulq_rs.w s6, s0, t3 // s6 = (C2 * cb + ONE_HALF)>> SCALEBITS
|
||||
addu t3, v0, s4 // y+cred
|
||||
addu s3, v0, s5 // y+cgreen
|
||||
addu v1, v0, s6 // y+cblue
|
||||
addu t3, t9, t3 // y+cred
|
||||
addu s3, t9, s3 // y+cgreen
|
||||
addu v1, t9, v1 // y+cblue
|
||||
lbu t3, 0(t3)
|
||||
lbu s3, 0(s3)
|
||||
lbu v1, 0(v1)
|
||||
lbu v0, 0(t2)
|
||||
|
||||
STORE_H2V2_1_PIXEL t3, s3, v1, t4
|
||||
|
||||
addu t3, v0, s4 // y+cred
|
||||
addu s3, v0, s5 // y+cgreen
|
||||
addu v1, v0, s6 // y+cblue
|
||||
addu t3, t9, t3 // y+cred
|
||||
addu s3, t9, s3 // y+cgreen
|
||||
addu v1, t9, v1 // y+cblue
|
||||
lbu t3, 0(t3)
|
||||
lbu s3, 0(s3)
|
||||
lbu v1, 0(v1)
|
||||
|
||||
STORE_H2V2_1_PIXEL t3, s3, v1, t7
|
||||
4:
|
||||
RESTORE_REGS_FROM_STACK 40, s0, s1, s2, s3, s4, s5, s6, s7, ra
|
||||
|
||||
j ra
|
||||
nop
|
||||
|
||||
END(jsimd_h2v2_\colorid\()_merged_upsample_mips_dspr2)
|
||||
|
||||
.purgem STORE_H2V2_1_PIXEL
|
||||
.purgem STORE_H2V2_2_PIXELS
|
||||
.endm
|
||||
|
||||
/*-----------------------------------------id -- pix R1 G1 B1 A1 R2 G2 B2 A2 */
|
||||
GENERATE_H2V2_MERGED_UPSAMPLE_MIPS_DSPR2 extrgb, 6, 0, 1, 2, 6, 3, 4, 5, 6
|
||||
GENERATE_H2V2_MERGED_UPSAMPLE_MIPS_DSPR2 extbgr, 6, 2, 1, 0, 3, 5, 4, 3, 6
|
||||
GENERATE_H2V2_MERGED_UPSAMPLE_MIPS_DSPR2 extrgbx, 8, 0, 1, 2, 3, 4, 5, 6, 7
|
||||
GENERATE_H2V2_MERGED_UPSAMPLE_MIPS_DSPR2 extbgrx, 8, 2, 1, 0, 3, 6, 5, 4, 7
|
||||
GENERATE_H2V2_MERGED_UPSAMPLE_MIPS_DSPR2 extxbgr, 8, 3, 2, 1, 0, 7, 6, 5, 4
|
||||
GENERATE_H2V2_MERGED_UPSAMPLE_MIPS_DSPR2 extxrgb, 8, 1, 2, 3, 0, 5, 6, 7, 4
|
||||
/*****************************************************************************/
|
||||
/*
|
||||
* jsimd_h2v1_merged_upsample_mips_dspr2
|
||||
* jsimd_h2v1_extrgb_merged_upsample_mips_dspr2
|
||||
* jsimd_h2v1_extrgbx_merged_upsample_mips_dspr2
|
||||
* jsimd_h2v1_extbgr_merged_upsample_mips_dspr2
|
||||
* jsimd_h2v1_extbgrx_merged_upsample_mips_dspr2
|
||||
* jsimd_h2v1_extxbgr_merged_upsample_mips_dspr2
|
||||
* jsimd_h2v1_extxrgb_merged_upsample_mips_dspr2
|
||||
*
|
||||
* Merged h2v1 upsample routines
|
||||
*/
|
||||
|
||||
.macro GENERATE_H2V1_MERGED_UPSAMPLE_MIPS_DSPR2 colorid, \
|
||||
pixel_size, \
|
||||
r1_offs, \
|
||||
g1_offs, \
|
||||
b1_offs, \
|
||||
a1_offs, \
|
||||
r2_offs, \
|
||||
g2_offs, \
|
||||
b2_offs, \
|
||||
a2_offs
|
||||
|
||||
.macro STORE_H2V1_2_PIXELS scratch0 \
|
||||
scratch1 \
|
||||
scratch2 \
|
||||
scratch3 \
|
||||
scratch4 \
|
||||
scratch5 \
|
||||
outptr
|
||||
sb \scratch0, \r1_offs(\outptr)
|
||||
sb \scratch1, \g1_offs(\outptr)
|
||||
sb \scratch2, \b1_offs(\outptr)
|
||||
sb \scratch3, \r2_offs(\outptr)
|
||||
sb \scratch4, \g2_offs(\outptr)
|
||||
sb \scratch5, \b2_offs(\outptr)
|
||||
.if (\pixel_size == 8)
|
||||
li t0, 0xFF
|
||||
sb t0, \a1_offs(\outptr)
|
||||
sb t0, \a2_offs(\outptr)
|
||||
.endif
|
||||
addiu \outptr, \pixel_size
|
||||
.endm
|
||||
|
||||
.macro STORE_H2V1_1_PIXEL scratch0 \
|
||||
scratch1 \
|
||||
scratch2 \
|
||||
outptr
|
||||
sb \scratch0, \r1_offs(\outptr)
|
||||
sb \scratch1, \g1_offs(\outptr)
|
||||
sb \scratch2, \b1_offs(\outptr)
|
||||
.if (\pixel_size == 8)
|
||||
li t0, 0xFF
|
||||
sb t0, \a1_offs(\outptr)
|
||||
.endif
|
||||
.endm
|
||||
|
||||
LEAF_MIPS_DSPR2(jsimd_h2v1_\colorid\()_merged_upsample_mips_dspr2)
|
||||
/*
|
||||
* a0 - cinfo->output_width
|
||||
* a1 - input_buf
|
||||
* a2 - in_row_group_ctr
|
||||
* a3 - output_buf
|
||||
* 16(sp) - range_limit
|
||||
*/
|
||||
|
||||
SAVE_REGS_ON_STACK 40, s0, s1, s2, s3, s4, s5, s6, s7, ra
|
||||
|
||||
li t0, 0xe6ea
|
||||
lw t1, 0(a1) // t1 = input_buf[0]
|
||||
lw t2, 4(a1) // t2 = input_buf[1]
|
||||
lw t3, 8(a1) // t3 = input_buf[2]
|
||||
lw t8, 56(sp) // t8 = range_limit
|
||||
addiu s1, t0, 0x7fff // s1 = 0x166e9 [FIX(1.40200)]
|
||||
addiu s2, s1, 0x5eb9 // s2 = 0x1c5a2 [FIX(1.77200)]
|
||||
addiu s0, t0, 0x9916 // s0 = 0x8000
|
||||
addiu s4, zero, 0xa7e6 // s4 = 0xffffa7e6 [-FIX(0.34414)]
|
||||
xori s3, s4, 0xeec8 // s3 = 0xffff492e [-FIX(0.71414)]
|
||||
srl t0, a0, 1
|
||||
sll t4, a2, 2
|
||||
lwx s5, t4(t1) // s5 = inptr0
|
||||
lwx s6, t4(t2) // s6 = inptr1
|
||||
lwx s7, t4(t3) // s7 = inptr2
|
||||
lw t7, 0(a3) // t7 = outptr
|
||||
blez t0, 2f
|
||||
addu t9, s6, t0 // t9 = end address
|
||||
1:
|
||||
lbu t2, 0(s6) // t2 = cb
|
||||
lbu t0, 0(s7) // t0 = cr
|
||||
lbu t1, 0(s5) // t1 = y
|
||||
addiu t2, t2, -128 // t2 = cb - 128
|
||||
addiu t0, t0, -128 // t0 = cr - 128
|
||||
mult $ac1, s4, t2
|
||||
madd $ac1, s3, t0
|
||||
sll t0, t0, 15
|
||||
sll t2, t2, 15
|
||||
mulq_rs.w t0, s1, t0 // t0 = (C1*cr + ONE_HALF)>> SCALEBITS
|
||||
extr_r.w t5, $ac1, 16
|
||||
mulq_rs.w t6, s2, t2 // t6 = (C2*cb + ONE_HALF)>> SCALEBITS
|
||||
addiu s7, s7, 1
|
||||
addiu s6, s6, 1
|
||||
addu t2, t1, t0 // t2 = y + cred
|
||||
addu t3, t1, t5 // t3 = y + cgreen
|
||||
addu t4, t1, t6 // t4 = y + cblue
|
||||
addu t2, t8, t2
|
||||
addu t3, t8, t3
|
||||
addu t4, t8, t4
|
||||
lbu t1, 1(s5)
|
||||
lbu v0, 0(t2)
|
||||
lbu v1, 0(t3)
|
||||
lbu ra, 0(t4)
|
||||
addu t2, t1, t0
|
||||
addu t3, t1, t5
|
||||
addu t4, t1, t6
|
||||
addu t2, t8, t2
|
||||
addu t3, t8, t3
|
||||
addu t4, t8, t4
|
||||
lbu t2, 0(t2)
|
||||
lbu t3, 0(t3)
|
||||
lbu t4, 0(t4)
|
||||
|
||||
STORE_H2V1_2_PIXELS v0, v1, ra, t2, t3, t4, t7
|
||||
|
||||
bne t9, s6, 1b
|
||||
addiu s5, s5, 2
|
||||
2:
|
||||
andi t0, a0, 1
|
||||
beqz t0, 4f
|
||||
nop
|
||||
3:
|
||||
lbu t2, 0(s6)
|
||||
lbu t0, 0(s7)
|
||||
lbu t1, 0(s5)
|
||||
addiu t2, t2, -128 //(cb - 128)
|
||||
addiu t0, t0, -128 //(cr - 128)
|
||||
mul t3, s4, t2
|
||||
mul t4, s3, t0
|
||||
sll t0, t0, 15
|
||||
sll t2, t2, 15
|
||||
mulq_rs.w t0, s1, t0 // (C1*cr + ONE_HALF)>> SCALEBITS
|
||||
mulq_rs.w t6, s2, t2 // (C2*cb + ONE_HALF)>> SCALEBITS
|
||||
addu t3, t3, s0
|
||||
addu t3, t4, t3
|
||||
sra t5, t3, 16 // (C4*cb + ONE_HALF + C3*cr)>> SCALEBITS
|
||||
addu t2, t1, t0 // y + cred
|
||||
addu t3, t1, t5 // y + cgreen
|
||||
addu t4, t1, t6 // y + cblue
|
||||
addu t2, t8, t2
|
||||
addu t3, t8, t3
|
||||
addu t4, t8, t4
|
||||
lbu t2, 0(t2)
|
||||
lbu t3, 0(t3)
|
||||
lbu t4, 0(t4)
|
||||
|
||||
STORE_H2V1_1_PIXEL t2, t3, t4, t7
|
||||
4:
|
||||
RESTORE_REGS_FROM_STACK 40, s0, s1, s2, s3, s4, s5, s6, s7, ra
|
||||
|
||||
j ra
|
||||
nop
|
||||
|
||||
END(jsimd_h2v1_\colorid\()_merged_upsample_mips_dspr2)
|
||||
|
||||
.purgem STORE_H2V1_1_PIXEL
|
||||
.purgem STORE_H2V1_2_PIXELS
|
||||
.endm
|
||||
|
||||
/*-----------------------------------------id -- pix R1 G1 B1 A1 R2 G2 B2 A2 */
|
||||
GENERATE_H2V1_MERGED_UPSAMPLE_MIPS_DSPR2 extrgb, 6, 0, 1, 2, 6, 3, 4, 5, 6
|
||||
GENERATE_H2V1_MERGED_UPSAMPLE_MIPS_DSPR2 extbgr, 6, 2, 1, 0, 3, 5, 4, 3, 6
|
||||
GENERATE_H2V1_MERGED_UPSAMPLE_MIPS_DSPR2 extrgbx, 8, 0, 1, 2, 3, 4, 5, 6, 7
|
||||
GENERATE_H2V1_MERGED_UPSAMPLE_MIPS_DSPR2 extbgrx, 8, 2, 1, 0, 3, 6, 5, 4, 7
|
||||
GENERATE_H2V1_MERGED_UPSAMPLE_MIPS_DSPR2 extxbgr, 8, 3, 2, 1, 0, 7, 6, 5, 4
|
||||
GENERATE_H2V1_MERGED_UPSAMPLE_MIPS_DSPR2 extxrgb, 8, 1, 2, 3, 0, 5, 6, 7, 4
|
||||
/*****************************************************************************/
|
||||
/*
|
||||
* jsimd_h2v2_fancy_upsample_mips_dspr2
|
||||
*
|
||||
|
||||
Reference in New Issue
Block a user