SIMD support for performing color conversion using MIPS DSPr2 instructions
git-svn-id: svn+ssh://svn.code.sf.net/p/libjpeg-turbo/code/trunk@993 632fc199-4ca6-4c93-a231-07263d6284db
This commit is contained in:
@@ -6,6 +6,10 @@ line padding (previously, it only supported 4-byte padding, which was
|
||||
compatible with X Video.) Also, the decompress-to-YUV function has been
|
||||
extended to support image scaling.
|
||||
|
||||
[2] Added SIMD acceleration for performing color conversion on DSPr2-capable
|
||||
MIPS platforms. This speeds up the compression of full-color JPEGs by 6-17%
|
||||
on such platforms and decompression by 3-5%.
|
||||
|
||||
|
||||
1.3.0
|
||||
=====
|
||||
|
||||
30
acinclude.m4
30
acinclude.m4
@@ -180,3 +180,33 @@ AC_DEFUN([AC_CHECK_COMPATIBLE_ARM_ASSEMBLER_IFELSE],[
|
||||
$2
|
||||
fi
|
||||
])
|
||||
|
||||
# AC_CHECK_COMPATIBLE_MIPSEL_ASSEMBLER_IFELSE
|
||||
# --------------------------
|
||||
# Test whether the assembler is suitable and supports MIPS instructions
|
||||
AC_DEFUN([AC_CHECK_COMPATIBLE_MIPSEL_ASSEMBLER_IFELSE],[
|
||||
have_mips_dspr2=no
|
||||
ac_save_CFLAGS="$CFLAGS"
|
||||
CFLAGS="$CCASFLAGS -mdspr2"
|
||||
|
||||
AC_COMPILE_IFELSE([[
|
||||
|
||||
int main ()
|
||||
{
|
||||
int c = 0, a = 0, b = 0;
|
||||
__asm__ __volatile__ (
|
||||
"precr.qb.ph %[c], %[a], %[b] \n\t"
|
||||
: [c] "=r" (c)
|
||||
: [a] "r" (a), [b] "r" (b)
|
||||
);
|
||||
return c;
|
||||
}
|
||||
]], have_mips_dspr2=yes)
|
||||
CFLAGS=$ac_save_CFLAGS
|
||||
|
||||
if test "x$have_mips_dspr2" = "xyes" ; then
|
||||
$1
|
||||
else
|
||||
$2
|
||||
fi
|
||||
])
|
||||
|
||||
11
configure.ac
11
configure.ac
@@ -425,6 +425,16 @@ if test "x${with_simd}" != "xno"; then
|
||||
with_simd=no
|
||||
AC_MSG_WARN([SIMD support can't be enabled. Performance will suffer.])])
|
||||
;;
|
||||
mipsel*)
|
||||
AC_MSG_RESULT([yes (mipsel)])
|
||||
AC_MSG_CHECKING([if the assembler is GNU-compatible and can be used])
|
||||
AC_CHECK_COMPATIBLE_MIPSEL_ASSEMBLER_IFELSE(
|
||||
[AC_MSG_RESULT([yes])
|
||||
simd_arch=mipsel],
|
||||
[AC_MSG_RESULT([no])
|
||||
with_simd=no
|
||||
AC_MSG_WARN([SIMD support can't be enabled. Performance will suffer.])])
|
||||
;;
|
||||
*)
|
||||
AC_MSG_RESULT([no ("$host_cpu")])
|
||||
AC_MSG_WARN([SIMD support not available for this CPU. Performance will suffer.])
|
||||
@@ -444,6 +454,7 @@ AM_CONDITIONAL([WITH_SSE_FLOAT_DCT], [test "x$simd_arch" = "xx86_64" -o "x$simd_
|
||||
AM_CONDITIONAL([SIMD_I386], [test "x$simd_arch" = "xi386"])
|
||||
AM_CONDITIONAL([SIMD_X86_64], [test "x$simd_arch" = "xx86_64"])
|
||||
AM_CONDITIONAL([SIMD_ARM], [test "x$simd_arch" = "xarm"])
|
||||
AM_CONDITIONAL([SIMD_MIPSEL], [test "x$simd_arch" = "xmipsel"])
|
||||
AM_CONDITIONAL([X86_64], [test "x$host_cpu" = "xx86_64" -o "x$host_cpu" = "xamd64"])
|
||||
AM_CONDITIONAL([WITH_TURBOJPEG], [test "x$with_turbojpeg" != "xno"])
|
||||
|
||||
|
||||
@@ -58,6 +58,12 @@ libsimd_la_SOURCES = jsimd_arm.c jsimd_arm_neon.S
|
||||
|
||||
endif
|
||||
|
||||
if SIMD_MIPSEL
|
||||
|
||||
libsimd_la_SOURCES = jsimd_mips.c jsimd_mips_dspr2.S
|
||||
|
||||
endif
|
||||
|
||||
AM_CPPFLAGS = -I$(top_srcdir)
|
||||
|
||||
.asm.lo:
|
||||
|
||||
60
simd/jsimd.h
60
simd/jsimd.h
@@ -3,6 +3,7 @@
|
||||
*
|
||||
* Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
|
||||
* Copyright 2011 D. R. Commander
|
||||
* Copyright (C) 2013, MIPS Technologies, Inc., California
|
||||
*
|
||||
* Based on the x86 SIMD extension for IJG JPEG library,
|
||||
* Copyright (C) 1999-2006, MIYASAKA Masaru.
|
||||
@@ -18,6 +19,7 @@
|
||||
#define JSIMD_SSE 0x04
|
||||
#define JSIMD_SSE2 0x08
|
||||
#define JSIMD_ARM_NEON 0x10
|
||||
#define JSIMD_MIPS_DSPR2 0x20
|
||||
|
||||
/* Short forms of external names for systems with brain-damaged linkers. */
|
||||
|
||||
@@ -386,6 +388,64 @@ EXTERN(void) jsimd_ycc_extxrgb_convert_neon
|
||||
JSAMPIMAGE input_buf, JDIMENSION input_row,
|
||||
JSAMPARRAY output_buf, int num_rows));
|
||||
|
||||
EXTERN(void) jsimd_rgb_ycc_convert_mips_dspr2
|
||||
JPP((JDIMENSION img_width,
|
||||
JSAMPARRAY input_buf, JSAMPIMAGE output_buf,
|
||||
JDIMENSION output_row, int num_rows));
|
||||
EXTERN(void) jsimd_extrgb_ycc_convert_mips_dspr2
|
||||
JPP((JDIMENSION img_width,
|
||||
JSAMPARRAY input_buf, JSAMPIMAGE output_buf,
|
||||
JDIMENSION output_row, int num_rows));
|
||||
EXTERN(void) jsimd_extrgbx_ycc_convert_mips_dspr2
|
||||
JPP((JDIMENSION img_width,
|
||||
JSAMPARRAY input_buf, JSAMPIMAGE output_buf,
|
||||
JDIMENSION output_row, int num_rows));
|
||||
EXTERN(void) jsimd_extbgr_ycc_convert_mips_dspr2
|
||||
JPP((JDIMENSION img_width,
|
||||
JSAMPARRAY input_buf, JSAMPIMAGE output_buf,
|
||||
JDIMENSION output_row, int num_rows));
|
||||
EXTERN(void) jsimd_extbgrx_ycc_convert_mips_dspr2
|
||||
JPP((JDIMENSION img_width,
|
||||
JSAMPARRAY input_buf, JSAMPIMAGE output_buf,
|
||||
JDIMENSION output_row, int num_rows));
|
||||
EXTERN(void) jsimd_extxbgr_ycc_convert_mips_dspr2
|
||||
JPP((JDIMENSION img_width,
|
||||
JSAMPARRAY input_buf, JSAMPIMAGE output_buf,
|
||||
JDIMENSION output_row, int num_rows));
|
||||
EXTERN(void) jsimd_extxrgb_ycc_convert_mips_dspr2
|
||||
JPP((JDIMENSION img_width,
|
||||
JSAMPARRAY input_buf, JSAMPIMAGE output_buf,
|
||||
JDIMENSION output_row, int num_rows));
|
||||
|
||||
EXTERN (void) jsimd_ycc_rgb_convert_mips_dspr2
|
||||
JPP((JDIMENSION img_width,
|
||||
JSAMPIMAGE input_buf, JDIMENSION input_row,
|
||||
JSAMPARRAY output_buf, int num_rows));
|
||||
EXTERN(void) jsimd_ycc_extrgb_convert_mips_dspr2
|
||||
JPP((JDIMENSION img_width,
|
||||
JSAMPIMAGE input_buf, JDIMENSION input_row,
|
||||
JSAMPARRAY output_buf, int num_rows));
|
||||
EXTERN(void) jsimd_ycc_extrgbx_convert_mips_dspr2
|
||||
JPP((JDIMENSION img_width,
|
||||
JSAMPIMAGE input_buf, JDIMENSION input_row,
|
||||
JSAMPARRAY output_buf, int num_rows));
|
||||
EXTERN(void) jsimd_ycc_extbgr_convert_mips_dspr2
|
||||
JPP((JDIMENSION img_width,
|
||||
JSAMPIMAGE input_buf, JDIMENSION input_row,
|
||||
JSAMPARRAY output_buf, int num_rows));
|
||||
EXTERN(void) jsimd_ycc_extbgrx_convert_mips_dspr2
|
||||
JPP((JDIMENSION img_width,
|
||||
JSAMPIMAGE input_buf, JDIMENSION input_row,
|
||||
JSAMPARRAY output_buf, int num_rows));
|
||||
EXTERN(void) jsimd_ycc_extxbgr_convert_mips_dspr2
|
||||
JPP((JDIMENSION img_width,
|
||||
JSAMPIMAGE input_buf, JDIMENSION input_row,
|
||||
JSAMPARRAY output_buf, int num_rows));
|
||||
EXTERN(void) jsimd_ycc_extxrgb_convert_mips_dspr2
|
||||
JPP((JDIMENSION img_width,
|
||||
JSAMPIMAGE input_buf, JDIMENSION input_row,
|
||||
JSAMPARRAY output_buf, int num_rows));
|
||||
|
||||
/* SIMD Downsample */
|
||||
EXTERN(void) jsimd_h2v2_downsample_mmx
|
||||
JPP((JDIMENSION image_width, int max_v_samp_factor,
|
||||
|
||||
465
simd/jsimd_mips.c
Normal file
465
simd/jsimd_mips.c
Normal file
@@ -0,0 +1,465 @@
|
||||
/*
|
||||
* jsimd_mips.c
|
||||
*
|
||||
* Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
|
||||
* Copyright 2009-2011 D. R. Commander
|
||||
* Copyright (C) 2013, MIPS Technologies, Inc., California
|
||||
*
|
||||
* Based on the x86 SIMD extension for IJG JPEG library,
|
||||
* Copyright (C) 1999-2006, MIYASAKA Masaru.
|
||||
* For conditions of distribution and use, see copyright notice in jsimdext.inc
|
||||
*
|
||||
* This file contains the interface between the "normal" portions
|
||||
* of the library and the SIMD implementations when running on
|
||||
* MIPS architecture.
|
||||
*
|
||||
* Based on the stubs from 'jsimd_none.c'
|
||||
*/
|
||||
|
||||
#define JPEG_INTERNALS
|
||||
#include "../jinclude.h"
|
||||
#include "../jpeglib.h"
|
||||
#include "../jsimd.h"
|
||||
#include "../jdct.h"
|
||||
#include "../jsimddct.h"
|
||||
#include "jsimd.h"
|
||||
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
#include <ctype.h>
|
||||
|
||||
static unsigned int simd_support = ~0;
|
||||
|
||||
#if defined(__linux__)
|
||||
|
||||
LOCAL(int)
|
||||
parse_proc_cpuinfo(const char* search_string)
|
||||
{
|
||||
const char* file_name = "/proc/cpuinfo";
|
||||
char cpuinfo_line[256];
|
||||
FILE* f = NULL;
|
||||
simd_support = 0;
|
||||
|
||||
if ((f = fopen(file_name, "r")) != NULL) {
|
||||
while (fgets(cpuinfo_line, sizeof(cpuinfo_line), f) != NULL) {
|
||||
if (strstr(cpuinfo_line, search_string) != NULL) {
|
||||
fclose(f);
|
||||
simd_support |= JSIMD_MIPS_DSPR2;
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
fclose(f);
|
||||
}
|
||||
/* Did not find string in the proc file, or not Linux ELF. */
|
||||
return 0;
|
||||
}
|
||||
#endif
|
||||
|
||||
/*
|
||||
* Check what SIMD accelerations are supported.
|
||||
*
|
||||
* FIXME: This code is racy under a multi-threaded environment.
|
||||
*/
|
||||
LOCAL(void)
|
||||
init_simd (void)
|
||||
{
|
||||
if (simd_support != ~0U)
|
||||
return;
|
||||
|
||||
simd_support = 0;
|
||||
|
||||
#if defined(__mips__) && defined(__mips_dsp) && (__mips_dsp_rev >= 2)
|
||||
simd_support |= JSIMD_MIPS_DSPR2;
|
||||
#elif defined(__linux__)
|
||||
/* We still have a chance to use MIPS DSPR2 regardless of globally used
|
||||
* -mdspr2 options passed to gcc by performing runtime detection via
|
||||
* /proc/cpuinfo parsing on linux */
|
||||
if (!parse_proc_cpuinfo("MIPS 74K"))
|
||||
return;
|
||||
#endif
|
||||
}
|
||||
|
||||
GLOBAL(int)
|
||||
jsimd_can_rgb_ycc (void)
|
||||
{
|
||||
init_simd();
|
||||
|
||||
/* The code is optimised for these values only */
|
||||
if (BITS_IN_JSAMPLE != 8)
|
||||
return 0;
|
||||
if (sizeof(JDIMENSION) != 4)
|
||||
return 0;
|
||||
if ((RGB_PIXELSIZE != 3) && (RGB_PIXELSIZE != 4))
|
||||
return 0;
|
||||
if (simd_support & JSIMD_MIPS_DSPR2)
|
||||
return 1;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
GLOBAL(int)
|
||||
jsimd_can_rgb_gray (void)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
GLOBAL(int)
|
||||
jsimd_can_ycc_rgb (void)
|
||||
{
|
||||
init_simd();
|
||||
|
||||
/* The code is optimised for these values only */
|
||||
if (BITS_IN_JSAMPLE != 8)
|
||||
return 0;
|
||||
if (sizeof(JDIMENSION) != 4)
|
||||
return 0;
|
||||
if ((RGB_PIXELSIZE != 3) && (RGB_PIXELSIZE != 4))
|
||||
return 0;
|
||||
if (simd_support & JSIMD_MIPS_DSPR2)
|
||||
return 1;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
GLOBAL(void)
|
||||
jsimd_rgb_ycc_convert (j_compress_ptr cinfo,
|
||||
JSAMPARRAY input_buf, JSAMPIMAGE output_buf,
|
||||
JDIMENSION output_row, int num_rows)
|
||||
{
|
||||
void (*mipsdspr2fct)(JDIMENSION, JSAMPARRAY, JSAMPIMAGE, JDIMENSION, int);
|
||||
switch(cinfo->in_color_space)
|
||||
{
|
||||
case JCS_EXT_RGB:
|
||||
mipsdspr2fct=jsimd_extrgb_ycc_convert_mips_dspr2;
|
||||
break;
|
||||
case JCS_EXT_RGBX:
|
||||
case JCS_EXT_RGBA:
|
||||
mipsdspr2fct=jsimd_extrgbx_ycc_convert_mips_dspr2;
|
||||
break;
|
||||
case JCS_EXT_BGR:
|
||||
mipsdspr2fct=jsimd_extbgr_ycc_convert_mips_dspr2;
|
||||
break;
|
||||
case JCS_EXT_BGRX:
|
||||
case JCS_EXT_BGRA:
|
||||
mipsdspr2fct=jsimd_extbgrx_ycc_convert_mips_dspr2;
|
||||
break;
|
||||
case JCS_EXT_XBGR:
|
||||
case JCS_EXT_ABGR:
|
||||
mipsdspr2fct=jsimd_extxbgr_ycc_convert_mips_dspr2;
|
||||
|
||||
break;
|
||||
case JCS_EXT_XRGB:
|
||||
case JCS_EXT_ARGB:
|
||||
mipsdspr2fct=jsimd_extxrgb_ycc_convert_mips_dspr2;
|
||||
break;
|
||||
default:
|
||||
mipsdspr2fct=jsimd_extrgb_ycc_convert_mips_dspr2;
|
||||
break;
|
||||
}
|
||||
|
||||
if (simd_support & JSIMD_MIPS_DSPR2)
|
||||
mipsdspr2fct(cinfo->image_width, input_buf,
|
||||
output_buf, output_row, num_rows);
|
||||
}
|
||||
|
||||
GLOBAL(void)
|
||||
jsimd_rgb_gray_convert (j_compress_ptr cinfo,
|
||||
JSAMPARRAY input_buf, JSAMPIMAGE output_buf,
|
||||
JDIMENSION output_row, int num_rows)
|
||||
{
|
||||
}
|
||||
|
||||
GLOBAL(void)
|
||||
jsimd_ycc_rgb_convert (j_decompress_ptr cinfo,
|
||||
JSAMPIMAGE input_buf, JDIMENSION input_row,
|
||||
JSAMPARRAY output_buf, int num_rows)
|
||||
{
|
||||
void (*mipsdspr2fct)(JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY, int);
|
||||
|
||||
switch(cinfo->out_color_space)
|
||||
{
|
||||
case JCS_EXT_RGB:
|
||||
mipsdspr2fct=jsimd_ycc_extrgb_convert_mips_dspr2;
|
||||
break;
|
||||
case JCS_EXT_RGBX:
|
||||
case JCS_EXT_RGBA:
|
||||
mipsdspr2fct=jsimd_ycc_extrgbx_convert_mips_dspr2;
|
||||
break;
|
||||
case JCS_EXT_BGR:
|
||||
mipsdspr2fct=jsimd_ycc_extbgr_convert_mips_dspr2;
|
||||
break;
|
||||
case JCS_EXT_BGRX:
|
||||
case JCS_EXT_BGRA:
|
||||
mipsdspr2fct=jsimd_ycc_extbgrx_convert_mips_dspr2;
|
||||
break;
|
||||
case JCS_EXT_XBGR:
|
||||
case JCS_EXT_ABGR:
|
||||
mipsdspr2fct=jsimd_ycc_extxbgr_convert_mips_dspr2;
|
||||
break;
|
||||
case JCS_EXT_XRGB:
|
||||
case JCS_EXT_ARGB:
|
||||
mipsdspr2fct=jsimd_ycc_extxrgb_convert_mips_dspr2;
|
||||
break;
|
||||
default:
|
||||
mipsdspr2fct=jsimd_ycc_extrgb_convert_mips_dspr2;
|
||||
break;
|
||||
}
|
||||
|
||||
if (simd_support & JSIMD_MIPS_DSPR2)
|
||||
mipsdspr2fct(cinfo->output_width, input_buf,
|
||||
input_row, output_buf, num_rows);
|
||||
}
|
||||
|
||||
GLOBAL(int)
|
||||
jsimd_can_h2v2_downsample (void)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
GLOBAL(int)
|
||||
jsimd_can_h2v1_downsample (void)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
GLOBAL(void)
|
||||
jsimd_h2v2_downsample (j_compress_ptr cinfo, jpeg_component_info * compptr,
|
||||
JSAMPARRAY input_data, JSAMPARRAY output_data)
|
||||
{
|
||||
}
|
||||
|
||||
GLOBAL(void)
|
||||
jsimd_h2v1_downsample (j_compress_ptr cinfo, jpeg_component_info * compptr,
|
||||
JSAMPARRAY input_data, JSAMPARRAY output_data)
|
||||
{
|
||||
}
|
||||
|
||||
GLOBAL(int)
|
||||
jsimd_can_h2v2_upsample (void)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
GLOBAL(int)
|
||||
jsimd_can_h2v1_upsample (void)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
GLOBAL(void)
|
||||
jsimd_h2v2_upsample (j_decompress_ptr cinfo,
|
||||
jpeg_component_info * compptr,
|
||||
JSAMPARRAY input_data,
|
||||
JSAMPARRAY * output_data_ptr)
|
||||
{
|
||||
}
|
||||
|
||||
GLOBAL(void)
|
||||
jsimd_h2v1_upsample (j_decompress_ptr cinfo,
|
||||
jpeg_component_info * compptr,
|
||||
JSAMPARRAY input_data,
|
||||
JSAMPARRAY * output_data_ptr)
|
||||
{
|
||||
}
|
||||
|
||||
GLOBAL(int)
|
||||
jsimd_can_h2v2_fancy_upsample (void)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
GLOBAL(int)
|
||||
jsimd_can_h2v1_fancy_upsample (void)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
GLOBAL(void)
|
||||
jsimd_h2v2_fancy_upsample (j_decompress_ptr cinfo,
|
||||
jpeg_component_info * compptr,
|
||||
JSAMPARRAY input_data,
|
||||
JSAMPARRAY * output_data_ptr)
|
||||
{
|
||||
}
|
||||
|
||||
GLOBAL(void)
|
||||
jsimd_h2v1_fancy_upsample (j_decompress_ptr cinfo,
|
||||
jpeg_component_info * compptr,
|
||||
JSAMPARRAY input_data,
|
||||
JSAMPARRAY * output_data_ptr)
|
||||
{
|
||||
}
|
||||
|
||||
GLOBAL(int)
|
||||
jsimd_can_h2v2_merged_upsample (void)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
GLOBAL(int)
|
||||
jsimd_can_h2v1_merged_upsample (void)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
GLOBAL(void)
|
||||
jsimd_h2v2_merged_upsample (j_decompress_ptr cinfo,
|
||||
JSAMPIMAGE input_buf,
|
||||
JDIMENSION in_row_group_ctr,
|
||||
JSAMPARRAY output_buf)
|
||||
{
|
||||
}
|
||||
|
||||
GLOBAL(void)
|
||||
jsimd_h2v1_merged_upsample (j_decompress_ptr cinfo,
|
||||
JSAMPIMAGE input_buf,
|
||||
JDIMENSION in_row_group_ctr,
|
||||
JSAMPARRAY output_buf)
|
||||
{
|
||||
}
|
||||
|
||||
GLOBAL(int)
|
||||
jsimd_can_convsamp (void)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
GLOBAL(int)
|
||||
jsimd_can_convsamp_float (void)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
GLOBAL(void)
|
||||
jsimd_convsamp (JSAMPARRAY sample_data, JDIMENSION start_col,
|
||||
DCTELEM * workspace)
|
||||
{
|
||||
}
|
||||
|
||||
GLOBAL(void)
|
||||
jsimd_convsamp_float (JSAMPARRAY sample_data, JDIMENSION start_col,
|
||||
FAST_FLOAT * workspace)
|
||||
{
|
||||
}
|
||||
|
||||
GLOBAL(int)
|
||||
jsimd_can_fdct_islow (void)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
GLOBAL(int)
|
||||
jsimd_can_fdct_ifast (void)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
GLOBAL(int)
|
||||
jsimd_can_fdct_float (void)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
GLOBAL(void)
|
||||
jsimd_fdct_islow (DCTELEM * data)
|
||||
{
|
||||
}
|
||||
|
||||
GLOBAL(void)
|
||||
jsimd_fdct_ifast (DCTELEM * data)
|
||||
{
|
||||
}
|
||||
|
||||
GLOBAL(void)
|
||||
jsimd_fdct_float (FAST_FLOAT * data)
|
||||
{
|
||||
}
|
||||
|
||||
GLOBAL(int)
|
||||
jsimd_can_quantize (void)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
GLOBAL(int)
|
||||
jsimd_can_quantize_float (void)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
GLOBAL(void)
|
||||
jsimd_quantize (JCOEFPTR coef_block, DCTELEM * divisors,
|
||||
DCTELEM * workspace)
|
||||
{
|
||||
}
|
||||
|
||||
GLOBAL(void)
|
||||
jsimd_quantize_float (JCOEFPTR coef_block, FAST_FLOAT * divisors,
|
||||
FAST_FLOAT * workspace)
|
||||
{
|
||||
}
|
||||
|
||||
GLOBAL(int)
|
||||
jsimd_can_idct_2x2 (void)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
GLOBAL(int)
|
||||
jsimd_can_idct_4x4 (void)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
GLOBAL(void)
|
||||
jsimd_idct_2x2 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
|
||||
JCOEFPTR coef_block, JSAMPARRAY output_buf,
|
||||
JDIMENSION output_col)
|
||||
{
|
||||
}
|
||||
|
||||
GLOBAL(void)
|
||||
jsimd_idct_4x4 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
|
||||
JCOEFPTR coef_block, JSAMPARRAY output_buf,
|
||||
JDIMENSION output_col)
|
||||
{
|
||||
}
|
||||
|
||||
GLOBAL(int)
|
||||
jsimd_can_idct_islow (void)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
GLOBAL(int)
|
||||
jsimd_can_idct_ifast (void)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
GLOBAL(int)
|
||||
jsimd_can_idct_float (void)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
GLOBAL(void)
|
||||
jsimd_idct_islow (j_decompress_ptr cinfo, jpeg_component_info * compptr,
|
||||
JCOEFPTR coef_block, JSAMPARRAY output_buf,
|
||||
JDIMENSION output_col)
|
||||
{
|
||||
}
|
||||
|
||||
GLOBAL(void)
|
||||
jsimd_idct_ifast (j_decompress_ptr cinfo, jpeg_component_info * compptr,
|
||||
JCOEFPTR coef_block, JSAMPARRAY output_buf,
|
||||
JDIMENSION output_col)
|
||||
{
|
||||
}
|
||||
|
||||
GLOBAL(void)
|
||||
jsimd_idct_float (j_decompress_ptr cinfo, jpeg_component_info * compptr,
|
||||
JCOEFPTR coef_block, JSAMPARRAY output_buf,
|
||||
JDIMENSION output_col)
|
||||
{
|
||||
}
|
||||
250
simd/jsimd_mips_dspr2.S
Normal file
250
simd/jsimd_mips_dspr2.S
Normal file
@@ -0,0 +1,250 @@
|
||||
/*
|
||||
* MIPS DSPr2 optimizations for libjpeg-turbo
|
||||
*
|
||||
* Copyright (C) 2013, MIPS Technologies, Inc., California.
|
||||
* All rights reserved.
|
||||
* Authors: Teodora Novkovic (teodora.novkovic@imgtec.com)
|
||||
* Darko Laus (darko.laus@imgtec.com)
|
||||
* This software is provided 'as-is', without any express or implied
|
||||
* warranty. In no event will the authors be held liable for any damages
|
||||
* arising from the use of this software.
|
||||
*
|
||||
* Permission is granted to anyone to use this software for any purpose,
|
||||
* including commercial applications, and to alter it and redistribute it
|
||||
* freely, subject to the following restrictions:
|
||||
*
|
||||
* 1. The origin of this software must not be misrepresented; you must not
|
||||
* claim that you wrote the original software. If you use this software
|
||||
* in a product, an acknowledgment in the product documentation would be
|
||||
* appreciated but is not required.
|
||||
* 2. Altered source versions must be plainly marked as such, and must not be
|
||||
* misrepresented as being the original software.
|
||||
* 3. This notice may not be removed or altered from any source distribution.
|
||||
*/
|
||||
|
||||
#include "jsimd_mips_dspr2_asm.h"
|
||||
|
||||
/*****************************************************************************/
|
||||
/*
|
||||
* jsimd_extrgb_ycc_convert_mips_dspr2
|
||||
* jsimd_extbgr_ycc_convert_mips_dspr2
|
||||
* jsimd_extrgbx_ycc_convert_mips_dspr2
|
||||
* jsimd_extbgrx_ycc_convert_mips_dspr2
|
||||
* jsimd_extxbgr_ycc_convert_mips_dspr2
|
||||
* jsimd_extxrgb_ycc_convert_mips_dspr2
|
||||
*
|
||||
* Colorspace conversion RGB -> YCbCr
|
||||
*/
|
||||
|
||||
.macro GENERATE_JSIMD_RGB_YCC_CONVERT_MIPS_DSPR2 colorid, pixel_size, r_offs, g_offs, b_offs
|
||||
|
||||
.macro DO_RGB_TO_YCC r, \
|
||||
g, \
|
||||
b, \
|
||||
inptr
|
||||
lbu \r, \r_offs(\inptr)
|
||||
lbu \g, \g_offs(\inptr)
|
||||
lbu \b, \b_offs(\inptr)
|
||||
addiu \inptr, \pixel_size
|
||||
.endm
|
||||
|
||||
LEAF_MIPS_DSPR2(jsimd_\colorid\()_ycc_convert_mips_dspr2)
|
||||
/*
|
||||
* a0 - cinfo->image_width
|
||||
* a1 - input_buf
|
||||
* a2 - output_buf
|
||||
* a3 - output_row
|
||||
* 16(sp) - num_rows
|
||||
*/
|
||||
|
||||
SAVE_REGS_ON_STACK 32, s0, s1, s2, s3, s4, s5, s6, s7
|
||||
|
||||
lw t7, 48(sp) // t7 = num_rows
|
||||
li s0, 0x4c8b // FIX(0.29900)
|
||||
li s1, 0x9646 // FIX(0.58700)
|
||||
li s2, 0x1d2f // FIX(0.11400)
|
||||
li s3, 0xffffd4cd // -FIX(0.16874)
|
||||
li s4, 0xffffab33 // -FIX(0.33126)
|
||||
li s5, 0x8000 // FIX(0.50000)
|
||||
li s6, 0xffff94d1 // -FIX(0.41869)
|
||||
li s7, 0xffffeb2f // -FIX(0.08131)
|
||||
li t8, 0x807fff // CBCR_OFFSET + ONE_HALF-1
|
||||
|
||||
0:
|
||||
addiu t7, -1 // --num_rows
|
||||
lw t6, 0(a1) // t6 = input_buf[0]
|
||||
lw t0, 0(a2)
|
||||
lw t1, 4(a2)
|
||||
lw t2, 8(a2)
|
||||
sll t3, a3, 2
|
||||
lwx t0, t3(t0) // t0 = output_buf[0][output_row]
|
||||
lwx t1, t3(t1) // t1 = output_buf[1][output_row]
|
||||
lwx t2, t3(t2) // t2 = output_buf[2][output_row]
|
||||
|
||||
addu t9, t2, a0 // t9 = end address
|
||||
addiu a3, 1
|
||||
|
||||
1:
|
||||
DO_RGB_TO_YCC t3, t4, t5, t6
|
||||
|
||||
mtlo s5, $ac0
|
||||
mtlo t8, $ac1
|
||||
mtlo t8, $ac2
|
||||
maddu $ac0, s2, t5
|
||||
maddu $ac1, s5, t5
|
||||
maddu $ac2, s5, t3
|
||||
maddu $ac0, s0, t3
|
||||
maddu $ac1, s3, t3
|
||||
maddu $ac2, s6, t4
|
||||
maddu $ac0, s1, t4
|
||||
maddu $ac1, s4, t4
|
||||
maddu $ac2, s7, t5
|
||||
extr.w t3, $ac0, 16
|
||||
extr.w t4, $ac1, 16
|
||||
extr.w t5, $ac2, 16
|
||||
sb t3, 0(t0)
|
||||
sb t4, 0(t1)
|
||||
sb t5, 0(t2)
|
||||
addiu t0, 1
|
||||
addiu t2, 1
|
||||
bne t2, t9, 1b
|
||||
addiu t1, 1
|
||||
bgtz t7, 0b
|
||||
addiu a1, 4
|
||||
|
||||
RESTORE_REGS_FROM_STACK 32, s0, s1, s2, s3, s4, s5, s6, s7
|
||||
|
||||
j ra
|
||||
nop
|
||||
END(jsimd_\colorid\()_ycc_convert_mips_dspr2)
|
||||
|
||||
.purgem DO_RGB_TO_YCC
|
||||
|
||||
.endm
|
||||
|
||||
/*------------------------------------------id -- pix R G B */
|
||||
GENERATE_JSIMD_RGB_YCC_CONVERT_MIPS_DSPR2 extrgb, 3, 0, 1, 2
|
||||
GENERATE_JSIMD_RGB_YCC_CONVERT_MIPS_DSPR2 extbgr, 3, 2, 1, 0
|
||||
GENERATE_JSIMD_RGB_YCC_CONVERT_MIPS_DSPR2 extrgbx, 4, 0, 1, 2
|
||||
GENERATE_JSIMD_RGB_YCC_CONVERT_MIPS_DSPR2 extbgrx, 4, 2, 1, 0
|
||||
GENERATE_JSIMD_RGB_YCC_CONVERT_MIPS_DSPR2 extxbgr, 4, 3, 2, 1
|
||||
GENERATE_JSIMD_RGB_YCC_CONVERT_MIPS_DSPR2 extxrgb, 4, 1, 2, 3
|
||||
|
||||
/*****************************************************************************/
|
||||
/*
|
||||
* jsimd_ycc_extrgb_convert_mips_dspr2
|
||||
* jsimd_ycc_extbgr_convert_mips_dspr2
|
||||
* jsimd_ycc_extrgbx_convert_mips_dspr2
|
||||
* jsimd_ycc_extbgrx_convert_mips_dspr2
|
||||
* jsimd_ycc_extxbgr_convert_mips_dspr2
|
||||
* jsimd_ycc_extxrgb_convert_mips_dspr2
|
||||
*
|
||||
* Colorspace conversion YCbCr -> RGB
|
||||
*/
|
||||
|
||||
.macro GENERATE_JSIMD_YCC_RGB_CONVERT_MIPS_DSPR2 colorid, pixel_size, r_offs, g_offs, b_offs, a_offs
|
||||
|
||||
.macro STORE_YCC_TO_RGB scratch0 \
|
||||
scratch1 \
|
||||
scratch2 \
|
||||
outptr
|
||||
sb \scratch0, \r_offs(\outptr)
|
||||
sb \scratch1, \g_offs(\outptr)
|
||||
sb \scratch2, \b_offs(\outptr)
|
||||
.if (\pixel_size == 4)
|
||||
li t0, 0xFF
|
||||
sb t0, \a_offs(\outptr)
|
||||
.endif
|
||||
addiu \outptr, \pixel_size
|
||||
.endm
|
||||
|
||||
LEAF_MIPS_DSPR2(jsimd_ycc_\colorid\()_convert_mips_dspr2)
|
||||
/*
|
||||
* a0 - cinfo->image_width
|
||||
* a1 - input_buf
|
||||
* a2 - input_row
|
||||
* a3 - output_buf
|
||||
* 16(sp) - num_rows
|
||||
*/
|
||||
|
||||
SAVE_REGS_ON_STACK 32, s0, s1, s2, s3, s4, s5, s6, s7
|
||||
|
||||
lw s1, 48(sp)
|
||||
li t3, 0x8000
|
||||
li t4, 0x166e9 // FIX(1.40200)
|
||||
li t5, 0x1c5a2 // FIX(1.77200)
|
||||
li t6, 0xffff492e // -FIX(0.71414)
|
||||
li t7, 0xffffa7e6 // -FIX(0.34414)
|
||||
repl.ph t8, 128
|
||||
|
||||
0:
|
||||
lw s0, 0(a3)
|
||||
lw t0, 0(a1)
|
||||
lw t1, 4(a1)
|
||||
lw t2, 8(a1)
|
||||
sll s5, a2, 2
|
||||
addiu s1, -1
|
||||
lwx s2, s5(t0)
|
||||
lwx s3, s5(t1)
|
||||
lwx s4, s5(t2)
|
||||
addu t9, s2, a0
|
||||
addiu a2, 1
|
||||
|
||||
1:
|
||||
lbu s7, 0(s4) // cr
|
||||
lbu s6, 0(s3) // cb
|
||||
lbu s5, 0(s2) // y
|
||||
addiu s2, 1
|
||||
addiu s4, 1
|
||||
addiu s7, -128
|
||||
addiu s6, -128
|
||||
mul t2, t7, s6
|
||||
mul t0, t6, s7 // Crgtab[cr]
|
||||
sll s7, 15
|
||||
mulq_rs.w t1, t4, s7 // Crrtab[cr]
|
||||
sll s6, 15
|
||||
addu t2, t3 // Cbgtab[cb]
|
||||
addu t2, t0
|
||||
|
||||
mulq_rs.w t0, t5, s6 // Cbbtab[cb]
|
||||
sra t2, 16
|
||||
addu t1, s5
|
||||
addu t2, s5 // add y
|
||||
ins t2, t1, 16, 16
|
||||
subu.ph t2, t2, t8
|
||||
addu t0, s5
|
||||
shll_s.ph t2, t2, 8
|
||||
subu t0, 128
|
||||
shra.ph t2, t2, 8
|
||||
shll_s.w t0, t0, 24
|
||||
addu.ph t2, t2, t8 // clip & store
|
||||
sra t0, t0, 24
|
||||
sra t1, t2, 16
|
||||
addiu t0, 128
|
||||
|
||||
STORE_YCC_TO_RGB t1, t2, t0, s0
|
||||
|
||||
bne s2, t9, 1b
|
||||
addiu s3, 1
|
||||
bgtz s1, 0b
|
||||
addiu a3, 4
|
||||
|
||||
RESTORE_REGS_FROM_STACK 32, s0, s1, s2, s3, s4, s5, s6, s7
|
||||
|
||||
j ra
|
||||
nop
|
||||
END(jsimd_ycc_\colorid\()_convert_mips_dspr2)
|
||||
|
||||
.purgem STORE_YCC_TO_RGB
|
||||
|
||||
.endm
|
||||
|
||||
/*------------------------------------------id -- pix R G B A */
|
||||
GENERATE_JSIMD_YCC_RGB_CONVERT_MIPS_DSPR2 extrgb, 3, 0, 1, 2, 3
|
||||
GENERATE_JSIMD_YCC_RGB_CONVERT_MIPS_DSPR2 extbgr, 3, 2, 1, 0, 3
|
||||
GENERATE_JSIMD_YCC_RGB_CONVERT_MIPS_DSPR2 extrgbx, 4, 0, 1, 2, 3
|
||||
GENERATE_JSIMD_YCC_RGB_CONVERT_MIPS_DSPR2 extbgrx, 4, 2, 1, 0, 3
|
||||
GENERATE_JSIMD_YCC_RGB_CONVERT_MIPS_DSPR2 extxbgr, 4, 3, 2, 1, 0
|
||||
GENERATE_JSIMD_YCC_RGB_CONVERT_MIPS_DSPR2 extxrgb, 4, 1, 2, 3, 0
|
||||
|
||||
/*****************************************************************************/
|
||||
252
simd/jsimd_mips_dspr2_asm.h
Normal file
252
simd/jsimd_mips_dspr2_asm.h
Normal file
@@ -0,0 +1,252 @@
|
||||
/*
|
||||
* MIPS DSPr2 optimizations for libjpeg-turbo
|
||||
*
|
||||
* Copyright (C) 2013, MIPS Technologies, Inc., California.
|
||||
* All rights reserved.
|
||||
* Authors: Teodora Novkovic (teodora.novkovic@imgtec.com)
|
||||
* Darko Laus (darko.laus@imgtec.com)
|
||||
* This software is provided 'as-is', without any express or implied
|
||||
* warranty. In no event will the authors be held liable for any damages
|
||||
* arising from the use of this software.
|
||||
*
|
||||
* Permission is granted to anyone to use this software for any purpose,
|
||||
* including commercial applications, and to alter it and redistribute it
|
||||
* freely, subject to the following restrictions:
|
||||
*
|
||||
* 1. The origin of this software must not be misrepresented; you must not
|
||||
* claim that you wrote the original software. If you use this software
|
||||
* in a product, an acknowledgment in the product documentation would be
|
||||
* appreciated but is not required.
|
||||
* 2. Altered source versions must be plainly marked as such, and must not be
|
||||
* misrepresented as being the original software.
|
||||
* 3. This notice may not be removed or altered from any source distribution.
|
||||
*/
|
||||
|
||||
#define zero $0
|
||||
#define AT $1
|
||||
#define v0 $2
|
||||
#define v1 $3
|
||||
#define a0 $4
|
||||
#define a1 $5
|
||||
#define a2 $6
|
||||
#define a3 $7
|
||||
#define t0 $8
|
||||
#define t1 $9
|
||||
#define t2 $10
|
||||
#define t3 $11
|
||||
#define t4 $12
|
||||
#define t5 $13
|
||||
#define t6 $14
|
||||
#define t7 $15
|
||||
#define s0 $16
|
||||
#define s1 $17
|
||||
#define s2 $18
|
||||
#define s3 $19
|
||||
#define s4 $20
|
||||
#define s5 $21
|
||||
#define s6 $22
|
||||
#define s7 $23
|
||||
#define t8 $24
|
||||
#define t9 $25
|
||||
#define k0 $26
|
||||
#define k1 $27
|
||||
#define gp $28
|
||||
#define sp $29
|
||||
#define fp $30
|
||||
#define s8 $30
|
||||
#define ra $31
|
||||
|
||||
/*
|
||||
* LEAF_MIPS32R2 - declare leaf routine for MIPS32r2
|
||||
*/
|
||||
#define LEAF_MIPS32R2(symbol) \
|
||||
.globl symbol; \
|
||||
.align 2; \
|
||||
.type symbol, @function; \
|
||||
.ent symbol, 0; \
|
||||
symbol: .frame sp, 0, ra; \
|
||||
.set push; \
|
||||
.set arch=mips32r2; \
|
||||
.set noreorder; \
|
||||
.set noat;
|
||||
|
||||
/*
|
||||
* LEAF_MIPS_DSPR2 - declare leaf routine for MIPS DSPr2
|
||||
*/
|
||||
#define LEAF_MIPS_DSPR2(symbol) \
|
||||
LEAF_MIPS32R2(symbol) \
|
||||
.set dspr2;
|
||||
|
||||
/*
|
||||
* END - mark end of function
|
||||
*/
|
||||
#define END(function) \
|
||||
.set pop; \
|
||||
.end function; \
|
||||
.size function,.-function
|
||||
|
||||
/*
|
||||
* Checks if stack offset is big enough for storing/restoring regs_num
|
||||
* number of register to/from stack. Stack offset must be greater than
|
||||
* or equal to the number of bytes needed for storing registers (regs_num*4).
|
||||
* Since MIPS ABI allows usage of first 16 bytes of stack frame (this is
|
||||
* preserved for input arguments of the functions, already stored in a0-a3),
|
||||
* stack size can be further optimized by utilizing this space.
|
||||
*/
|
||||
.macro CHECK_STACK_OFFSET regs_num, stack_offset
|
||||
.if \stack_offset < \regs_num * 4 - 16
|
||||
.error "Stack offset too small."
|
||||
.endif
|
||||
.endm
|
||||
|
||||
/*
|
||||
* Saves set of registers on stack. Maximum number of registers that
|
||||
* can be saved on stack is limitted to 14 (a0-a3, v0-v1 and s0-s7).
|
||||
* Stack offset is number of bytes that are added to stack pointer (sp)
|
||||
* before registers are pushed in order to provide enough space on stack
|
||||
* (offset must be multiple of 4, and must be big enough, as described by
|
||||
* CHECK_STACK_OFFSET macro). This macro is intended to be used in
|
||||
* combination with RESTORE_REGS_FROM_STACK macro. Example:
|
||||
* SAVE_REGS_ON_STACK 4, v0, v1, s0, s1
|
||||
* RESTORE_REGS_FROM_STACK 4, v0, v1, s0, s1
|
||||
*/
|
||||
.macro SAVE_REGS_ON_STACK stack_offset = 0, r1, \
|
||||
r2 = 0, r3 = 0, r4 = 0, \
|
||||
r5 = 0, r6 = 0, r7 = 0, \
|
||||
r8 = 0, r9 = 0, r10 = 0, \
|
||||
r11 = 0, r12 = 0, r13 = 0, \
|
||||
r14 = 0
|
||||
.if (\stack_offset < 0) || (\stack_offset - (\stack_offset / 4) * 4)
|
||||
.error "Stack offset must be pozitive and multiple of 4."
|
||||
.endif
|
||||
.if \stack_offset != 0
|
||||
addiu sp, sp, -\stack_offset
|
||||
.endif
|
||||
sw \r1, 0(sp)
|
||||
.if \r2 != 0
|
||||
sw \r2, 4(sp)
|
||||
.endif
|
||||
.if \r3 != 0
|
||||
sw \r3, 8(sp)
|
||||
.endif
|
||||
.if \r4 != 0
|
||||
sw \r4, 12(sp)
|
||||
.endif
|
||||
.if \r5 != 0
|
||||
CHECK_STACK_OFFSET 5, \stack_offset
|
||||
sw \r5, 16(sp)
|
||||
.endif
|
||||
.if \r6 != 0
|
||||
CHECK_STACK_OFFSET 6, \stack_offset
|
||||
sw \r6, 20(sp)
|
||||
.endif
|
||||
.if \r7 != 0
|
||||
CHECK_STACK_OFFSET 7, \stack_offset
|
||||
sw \r7, 24(sp)
|
||||
.endif
|
||||
.if \r8 != 0
|
||||
CHECK_STACK_OFFSET 8, \stack_offset
|
||||
sw \r8, 28(sp)
|
||||
.endif
|
||||
.if \r9 != 0
|
||||
CHECK_STACK_OFFSET 9, \stack_offset
|
||||
sw \r9, 32(sp)
|
||||
.endif
|
||||
.if \r10 != 0
|
||||
CHECK_STACK_OFFSET 10, \stack_offset
|
||||
sw \r10, 36(sp)
|
||||
.endif
|
||||
.if \r11 != 0
|
||||
CHECK_STACK_OFFSET 11, \stack_offset
|
||||
sw \r11, 40(sp)
|
||||
.endif
|
||||
.if \r12 != 0
|
||||
CHECK_STACK_OFFSET 12, \stack_offset
|
||||
sw \r12, 44(sp)
|
||||
.endif
|
||||
.if \r13 != 0
|
||||
CHECK_STACK_OFFSET 13, \stack_offset
|
||||
sw \r13, 48(sp)
|
||||
.endif
|
||||
.if \r14 != 0
|
||||
CHECK_STACK_OFFSET 14, \stack_offset
|
||||
sw \r14, 52(sp)
|
||||
.endif
|
||||
.endm
|
||||
|
||||
/*
|
||||
* Restores set of registers from stack. Maximum number of registers that
|
||||
* can be restored from stack is limitted to 14 (a0-a3, v0-v1 and s0-s7).
|
||||
* Stack offset is number of bytes that are added to stack pointer (sp)
|
||||
* after registers are restored (offset must be multiple of 4, and must
|
||||
* be big enough, as described by CHECK_STACK_OFFSET macro). This macro is
|
||||
* intended to be used in combination with RESTORE_REGS_FROM_STACK macro.
|
||||
* Example:
|
||||
* SAVE_REGS_ON_STACK 4, v0, v1, s0, s1
|
||||
* RESTORE_REGS_FROM_STACK 4, v0, v1, s0, s1
|
||||
*/
|
||||
.macro RESTORE_REGS_FROM_STACK stack_offset = 0, r1, \
|
||||
r2 = 0, r3 = 0, r4 = 0, \
|
||||
r5 = 0, r6 = 0, r7 = 0, \
|
||||
r8 = 0, r9 = 0, r10 = 0, \
|
||||
r11 = 0, r12 = 0, r13 = 0, \
|
||||
r14 = 0
|
||||
.if (\stack_offset < 0) || (\stack_offset - (\stack_offset/4)*4)
|
||||
.error "Stack offset must be pozitive and multiple of 4."
|
||||
.endif
|
||||
lw \r1, 0(sp)
|
||||
.if \r2 != 0
|
||||
lw \r2, 4(sp)
|
||||
.endif
|
||||
.if \r3 != 0
|
||||
lw \r3, 8(sp)
|
||||
.endif
|
||||
.if \r4 != 0
|
||||
lw \r4, 12(sp)
|
||||
.endif
|
||||
.if \r5 != 0
|
||||
CHECK_STACK_OFFSET 5, \stack_offset
|
||||
lw \r5, 16(sp)
|
||||
.endif
|
||||
.if \r6 != 0
|
||||
CHECK_STACK_OFFSET 6, \stack_offset
|
||||
lw \r6, 20(sp)
|
||||
.endif
|
||||
.if \r7 != 0
|
||||
CHECK_STACK_OFFSET 7, \stack_offset
|
||||
lw \r7, 24(sp)
|
||||
.endif
|
||||
.if \r8 != 0
|
||||
CHECK_STACK_OFFSET 8, \stack_offset
|
||||
lw \r8, 28(sp)
|
||||
.endif
|
||||
.if \r9 != 0
|
||||
CHECK_STACK_OFFSET 9, \stack_offset
|
||||
lw \r9, 32(sp)
|
||||
.endif
|
||||
.if \r10 != 0
|
||||
CHECK_STACK_OFFSET 10, \stack_offset
|
||||
lw \r10, 36(sp)
|
||||
.endif
|
||||
.if \r11 != 0
|
||||
CHECK_STACK_OFFSET 11, \stack_offset
|
||||
lw \r11, 40(sp)
|
||||
.endif
|
||||
.if \r12 != 0
|
||||
CHECK_STACK_OFFSET 12, \stack_offset
|
||||
lw \r12, 44(sp)
|
||||
.endif
|
||||
.if \r13 != 0
|
||||
CHECK_STACK_OFFSET 13, \stack_offset
|
||||
lw \r13, 48(sp)
|
||||
.endif
|
||||
.if \r14 != 0
|
||||
CHECK_STACK_OFFSET 14, \stack_offset
|
||||
lw \r14, 52(sp)
|
||||
.endif
|
||||
.if \stack_offset != 0
|
||||
addiu sp, sp, \stack_offset
|
||||
.endif
|
||||
.endm
|
||||
|
||||
|
||||
Reference in New Issue
Block a user