Files
mozjpeg/simd/jsimd_mips_dspr2.S

251 lines
7.4 KiB
ArmAsm

/*
* MIPS DSPr2 optimizations for libjpeg-turbo
*
* Copyright (C) 2013, MIPS Technologies, Inc., California.
* All rights reserved.
* Authors: Teodora Novkovic (teodora.novkovic@imgtec.com)
* Darko Laus (darko.laus@imgtec.com)
* This software is provided 'as-is', without any express or implied
* warranty. In no event will the authors be held liable for any damages
* arising from the use of this software.
*
* Permission is granted to anyone to use this software for any purpose,
* including commercial applications, and to alter it and redistribute it
* freely, subject to the following restrictions:
*
* 1. The origin of this software must not be misrepresented; you must not
* claim that you wrote the original software. If you use this software
* in a product, an acknowledgment in the product documentation would be
* appreciated but is not required.
* 2. Altered source versions must be plainly marked as such, and must not be
* misrepresented as being the original software.
* 3. This notice may not be removed or altered from any source distribution.
*/
#include "jsimd_mips_dspr2_asm.h"
/*****************************************************************************/
/*
* jsimd_extrgb_ycc_convert_mips_dspr2
* jsimd_extbgr_ycc_convert_mips_dspr2
* jsimd_extrgbx_ycc_convert_mips_dspr2
* jsimd_extbgrx_ycc_convert_mips_dspr2
* jsimd_extxbgr_ycc_convert_mips_dspr2
* jsimd_extxrgb_ycc_convert_mips_dspr2
*
* Colorspace conversion RGB -> YCbCr
*/
.macro GENERATE_JSIMD_RGB_YCC_CONVERT_MIPS_DSPR2 colorid, pixel_size, r_offs, g_offs, b_offs
.macro DO_RGB_TO_YCC r, \
g, \
b, \
inptr
lbu \r, \r_offs(\inptr)
lbu \g, \g_offs(\inptr)
lbu \b, \b_offs(\inptr)
addiu \inptr, \pixel_size
.endm
LEAF_MIPS_DSPR2(jsimd_\colorid\()_ycc_convert_mips_dspr2)
/*
* a0 - cinfo->image_width
* a1 - input_buf
* a2 - output_buf
* a3 - output_row
* 16(sp) - num_rows
*/
SAVE_REGS_ON_STACK 32, s0, s1, s2, s3, s4, s5, s6, s7
lw t7, 48(sp) // t7 = num_rows
li s0, 0x4c8b // FIX(0.29900)
li s1, 0x9646 // FIX(0.58700)
li s2, 0x1d2f // FIX(0.11400)
li s3, 0xffffd4cd // -FIX(0.16874)
li s4, 0xffffab33 // -FIX(0.33126)
li s5, 0x8000 // FIX(0.50000)
li s6, 0xffff94d1 // -FIX(0.41869)
li s7, 0xffffeb2f // -FIX(0.08131)
li t8, 0x807fff // CBCR_OFFSET + ONE_HALF-1
0:
addiu t7, -1 // --num_rows
lw t6, 0(a1) // t6 = input_buf[0]
lw t0, 0(a2)
lw t1, 4(a2)
lw t2, 8(a2)
sll t3, a3, 2
lwx t0, t3(t0) // t0 = output_buf[0][output_row]
lwx t1, t3(t1) // t1 = output_buf[1][output_row]
lwx t2, t3(t2) // t2 = output_buf[2][output_row]
addu t9, t2, a0 // t9 = end address
addiu a3, 1
1:
DO_RGB_TO_YCC t3, t4, t5, t6
mtlo s5, $ac0
mtlo t8, $ac1
mtlo t8, $ac2
maddu $ac0, s2, t5
maddu $ac1, s5, t5
maddu $ac2, s5, t3
maddu $ac0, s0, t3
maddu $ac1, s3, t3
maddu $ac2, s6, t4
maddu $ac0, s1, t4
maddu $ac1, s4, t4
maddu $ac2, s7, t5
extr.w t3, $ac0, 16
extr.w t4, $ac1, 16
extr.w t5, $ac2, 16
sb t3, 0(t0)
sb t4, 0(t1)
sb t5, 0(t2)
addiu t0, 1
addiu t2, 1
bne t2, t9, 1b
addiu t1, 1
bgtz t7, 0b
addiu a1, 4
RESTORE_REGS_FROM_STACK 32, s0, s1, s2, s3, s4, s5, s6, s7
j ra
nop
END(jsimd_\colorid\()_ycc_convert_mips_dspr2)
.purgem DO_RGB_TO_YCC
.endm
/*------------------------------------------id -- pix R G B */
GENERATE_JSIMD_RGB_YCC_CONVERT_MIPS_DSPR2 extrgb, 3, 0, 1, 2
GENERATE_JSIMD_RGB_YCC_CONVERT_MIPS_DSPR2 extbgr, 3, 2, 1, 0
GENERATE_JSIMD_RGB_YCC_CONVERT_MIPS_DSPR2 extrgbx, 4, 0, 1, 2
GENERATE_JSIMD_RGB_YCC_CONVERT_MIPS_DSPR2 extbgrx, 4, 2, 1, 0
GENERATE_JSIMD_RGB_YCC_CONVERT_MIPS_DSPR2 extxbgr, 4, 3, 2, 1
GENERATE_JSIMD_RGB_YCC_CONVERT_MIPS_DSPR2 extxrgb, 4, 1, 2, 3
/*****************************************************************************/
/*
* jsimd_ycc_extrgb_convert_mips_dspr2
* jsimd_ycc_extbgr_convert_mips_dspr2
* jsimd_ycc_extrgbx_convert_mips_dspr2
* jsimd_ycc_extbgrx_convert_mips_dspr2
* jsimd_ycc_extxbgr_convert_mips_dspr2
* jsimd_ycc_extxrgb_convert_mips_dspr2
*
* Colorspace conversion YCbCr -> RGB
*/
.macro GENERATE_JSIMD_YCC_RGB_CONVERT_MIPS_DSPR2 colorid, pixel_size, r_offs, g_offs, b_offs, a_offs
.macro STORE_YCC_TO_RGB scratch0 \
scratch1 \
scratch2 \
outptr
sb \scratch0, \r_offs(\outptr)
sb \scratch1, \g_offs(\outptr)
sb \scratch2, \b_offs(\outptr)
.if (\pixel_size == 4)
li t0, 0xFF
sb t0, \a_offs(\outptr)
.endif
addiu \outptr, \pixel_size
.endm
LEAF_MIPS_DSPR2(jsimd_ycc_\colorid\()_convert_mips_dspr2)
/*
* a0 - cinfo->image_width
* a1 - input_buf
* a2 - input_row
* a3 - output_buf
* 16(sp) - num_rows
*/
SAVE_REGS_ON_STACK 32, s0, s1, s2, s3, s4, s5, s6, s7
lw s1, 48(sp)
li t3, 0x8000
li t4, 0x166e9 // FIX(1.40200)
li t5, 0x1c5a2 // FIX(1.77200)
li t6, 0xffff492e // -FIX(0.71414)
li t7, 0xffffa7e6 // -FIX(0.34414)
repl.ph t8, 128
0:
lw s0, 0(a3)
lw t0, 0(a1)
lw t1, 4(a1)
lw t2, 8(a1)
sll s5, a2, 2
addiu s1, -1
lwx s2, s5(t0)
lwx s3, s5(t1)
lwx s4, s5(t2)
addu t9, s2, a0
addiu a2, 1
1:
lbu s7, 0(s4) // cr
lbu s6, 0(s3) // cb
lbu s5, 0(s2) // y
addiu s2, 1
addiu s4, 1
addiu s7, -128
addiu s6, -128
mul t2, t7, s6
mul t0, t6, s7 // Crgtab[cr]
sll s7, 15
mulq_rs.w t1, t4, s7 // Crrtab[cr]
sll s6, 15
addu t2, t3 // Cbgtab[cb]
addu t2, t0
mulq_rs.w t0, t5, s6 // Cbbtab[cb]
sra t2, 16
addu t1, s5
addu t2, s5 // add y
ins t2, t1, 16, 16
subu.ph t2, t2, t8
addu t0, s5
shll_s.ph t2, t2, 8
subu t0, 128
shra.ph t2, t2, 8
shll_s.w t0, t0, 24
addu.ph t2, t2, t8 // clip & store
sra t0, t0, 24
sra t1, t2, 16
addiu t0, 128
STORE_YCC_TO_RGB t1, t2, t0, s0
bne s2, t9, 1b
addiu s3, 1
bgtz s1, 0b
addiu a3, 4
RESTORE_REGS_FROM_STACK 32, s0, s1, s2, s3, s4, s5, s6, s7
j ra
nop
END(jsimd_ycc_\colorid\()_convert_mips_dspr2)
.purgem STORE_YCC_TO_RGB
.endm
/*------------------------------------------id -- pix R G B A */
GENERATE_JSIMD_YCC_RGB_CONVERT_MIPS_DSPR2 extrgb, 3, 0, 1, 2, 3
GENERATE_JSIMD_YCC_RGB_CONVERT_MIPS_DSPR2 extbgr, 3, 2, 1, 0, 3
GENERATE_JSIMD_YCC_RGB_CONVERT_MIPS_DSPR2 extrgbx, 4, 0, 1, 2, 3
GENERATE_JSIMD_YCC_RGB_CONVERT_MIPS_DSPR2 extbgrx, 4, 2, 1, 0, 3
GENERATE_JSIMD_YCC_RGB_CONVERT_MIPS_DSPR2 extxbgr, 4, 3, 2, 1, 0
GENERATE_JSIMD_YCC_RGB_CONVERT_MIPS_DSPR2 extxrgb, 4, 1, 2, 3, 0
/*****************************************************************************/