ARM NEON SIMD support for YCC-to-RGB565 conversion, and optimizations to the existing YCC-to-RGB conversion code:
-----aee36252be.patch From aee36252be20054afce371a92406fc66ba6627b5 Mon Sep 17 00:00:00 2001 From: Siarhei Siamashka <siarhei.siamashka@gmail.com> Date: Wed, 13 Aug 2014 03:50:22 +0300 Subject: [PATCH] ARM: Faster NEON yuv->rgb conversion for Krait and Cortex-A15 The older code was developed and tested only on ARM Cortex-A8 and ARM Cortex-A9. Tuning it for newer ARM processors can introduce some speed-up (up to 20%). The performance of the inner loop (conversion of 8 pixels) improves from ~27 cycles down to ~22 cycles on Qualcomm Krait 300, and from ~20 cycles down to ~18 cycles on ARM Cortex-A15. The performance remains exactly the same on ARM Cortex-A7 (~58 cycles), ARM Cortex-A8 (~25 cycles) and ARM Cortex-A9 (~30 cycles) processors. Also use larger indentation in the source code for separating two independent instruction streams. -----a5efdbf22c.patch From a5efdbf22ce9c1acd4b14a353cec863c2c57557e Mon Sep 17 00:00:00 2001 From: Siarhei Siamashka <siarhei.siamashka@gmail.com> Date: Wed, 13 Aug 2014 07:23:09 +0300 Subject: [PATCH] ARM: NEON optimized yuv->rgb565 conversion The performance of the inner loop (conversion of 8 pixels): * ARM Cortex-A7: ~55 cycles * ARM Cortex-A8: ~28 cycles * ARM Cortex-A9: ~32 cycles * ARM Cortex-A15: ~20 cycles * Qualcomm Krait: ~24 cycles Based on the Linaro rgb565 patch from https://sourceforge.net/p/libjpeg-turbo/patches/24/ but implements better instructions scheduling. git-svn-id: svn+ssh://svn.code.sf.net/p/libjpeg-turbo/code/trunk@1385 632fc199-4ca6-4c93-a231-07263d6284db
This commit is contained in:
@@ -68,7 +68,8 @@ accuracy than the slow integer DCT/IDCT algorithms, and they are quite a bit
|
|||||||
slower.
|
slower.
|
||||||
|
|
||||||
[8] Added a new output colorspace (JCS_RGB565) to the libjpeg API that allows
|
[8] Added a new output colorspace (JCS_RGB565) to the libjpeg API that allows
|
||||||
for decompressing JPEG images into RGB565 (16-bit) pixels.
|
for decompressing JPEG images into RGB565 (16-bit) pixels. If dithering is not
|
||||||
|
used, then this code path is SIMD-accelerated on ARM platforms.
|
||||||
|
|
||||||
[9] Numerous obsolete features, such as support for non-ANSI compilers and
|
[9] Numerous obsolete features, such as support for non-ANSI compilers and
|
||||||
support for the MS-DOS memory model, were removed from the libjpeg code,
|
support for the MS-DOS memory model, were removed from the libjpeg code,
|
||||||
|
|||||||
10
jdcolor.c
10
jdcolor.c
@@ -6,7 +6,7 @@
|
|||||||
* Modified 2011 by Guido Vollbeding.
|
* Modified 2011 by Guido Vollbeding.
|
||||||
* libjpeg-turbo Modifications:
|
* libjpeg-turbo Modifications:
|
||||||
* Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
|
* Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
|
||||||
* Copyright (C) 2009, 2011-2012, D. R. Commander.
|
* Copyright (C) 2009, 2011-2012, 2014, D. R. Commander.
|
||||||
* Copyright (C) 2013, Linaro Limited.
|
* Copyright (C) 2013, Linaro Limited.
|
||||||
* For conditions of distribution and use, see the accompanying README file.
|
* For conditions of distribution and use, see the accompanying README file.
|
||||||
*
|
*
|
||||||
@@ -657,8 +657,12 @@ jinit_color_deconverter (j_decompress_ptr cinfo)
|
|||||||
cinfo->out_color_components = 3;
|
cinfo->out_color_components = 3;
|
||||||
if (cinfo->dither_mode == JDITHER_NONE) {
|
if (cinfo->dither_mode == JDITHER_NONE) {
|
||||||
if (cinfo->jpeg_color_space == JCS_YCbCr) {
|
if (cinfo->jpeg_color_space == JCS_YCbCr) {
|
||||||
cconvert->pub.color_convert = ycc_rgb565_convert;
|
if (jsimd_can_ycc_rgb565())
|
||||||
build_ycc_rgb_table(cinfo);
|
cconvert->pub.color_convert = jsimd_ycc_rgb565_convert;
|
||||||
|
else {
|
||||||
|
cconvert->pub.color_convert = ycc_rgb565_convert;
|
||||||
|
build_ycc_rgb_table(cinfo);
|
||||||
|
}
|
||||||
} else if (cinfo->jpeg_color_space == JCS_GRAYSCALE) {
|
} else if (cinfo->jpeg_color_space == JCS_GRAYSCALE) {
|
||||||
cconvert->pub.color_convert = gray_rgb565_convert;
|
cconvert->pub.color_convert = gray_rgb565_convert;
|
||||||
} else if (cinfo->jpeg_color_space == JCS_RGB) {
|
} else if (cinfo->jpeg_color_space == JCS_RGB) {
|
||||||
|
|||||||
6
jsimd.h
6
jsimd.h
@@ -2,7 +2,7 @@
|
|||||||
* jsimd.h
|
* jsimd.h
|
||||||
*
|
*
|
||||||
* Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
|
* Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
|
||||||
* Copyright 2011 D. R. Commander
|
* Copyright 2011, 2014 D. R. Commander
|
||||||
*
|
*
|
||||||
* Based on the x86 SIMD extension for IJG JPEG library,
|
* Based on the x86 SIMD extension for IJG JPEG library,
|
||||||
* Copyright (C) 1999-2006, MIYASAKA Masaru.
|
* Copyright (C) 1999-2006, MIYASAKA Masaru.
|
||||||
@@ -13,6 +13,7 @@
|
|||||||
EXTERN(int) jsimd_can_rgb_ycc (void);
|
EXTERN(int) jsimd_can_rgb_ycc (void);
|
||||||
EXTERN(int) jsimd_can_rgb_gray (void);
|
EXTERN(int) jsimd_can_rgb_gray (void);
|
||||||
EXTERN(int) jsimd_can_ycc_rgb (void);
|
EXTERN(int) jsimd_can_ycc_rgb (void);
|
||||||
|
EXTERN(int) jsimd_can_ycc_rgb565 (void);
|
||||||
EXTERN(int) jsimd_c_can_null_convert (void);
|
EXTERN(int) jsimd_c_can_null_convert (void);
|
||||||
|
|
||||||
EXTERN(void) jsimd_rgb_ycc_convert
|
EXTERN(void) jsimd_rgb_ycc_convert
|
||||||
@@ -24,6 +25,9 @@ EXTERN(void) jsimd_rgb_gray_convert
|
|||||||
EXTERN(void) jsimd_ycc_rgb_convert
|
EXTERN(void) jsimd_ycc_rgb_convert
|
||||||
(j_decompress_ptr cinfo, JSAMPIMAGE input_buf, JDIMENSION input_row,
|
(j_decompress_ptr cinfo, JSAMPIMAGE input_buf, JDIMENSION input_row,
|
||||||
JSAMPARRAY output_buf, int num_rows);
|
JSAMPARRAY output_buf, int num_rows);
|
||||||
|
EXTERN(void) jsimd_ycc_rgb565_convert
|
||||||
|
(j_decompress_ptr cinfo, JSAMPIMAGE input_buf, JDIMENSION input_row,
|
||||||
|
JSAMPARRAY output_buf, int num_rows);
|
||||||
EXTERN(void) jsimd_c_null_convert
|
EXTERN(void) jsimd_c_null_convert
|
||||||
(j_compress_ptr cinfo, JSAMPARRAY input_buf, JSAMPIMAGE output_buf,
|
(j_compress_ptr cinfo, JSAMPARRAY input_buf, JSAMPIMAGE output_buf,
|
||||||
JDIMENSION output_row, int num_rows);
|
JDIMENSION output_row, int num_rows);
|
||||||
|
|||||||
15
jsimd_none.c
15
jsimd_none.c
@@ -2,7 +2,7 @@
|
|||||||
* jsimd_none.c
|
* jsimd_none.c
|
||||||
*
|
*
|
||||||
* Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
|
* Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
|
||||||
* Copyright 2009-2011 D. R. Commander
|
* Copyright 2009-2011, 2014 D. R. Commander
|
||||||
*
|
*
|
||||||
* Based on the x86 SIMD extension for IJG JPEG library,
|
* Based on the x86 SIMD extension for IJG JPEG library,
|
||||||
* Copyright (C) 1999-2006, MIYASAKA Masaru.
|
* Copyright (C) 1999-2006, MIYASAKA Masaru.
|
||||||
@@ -36,6 +36,12 @@ jsimd_can_ycc_rgb (void)
|
|||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
GLOBAL(int)
|
||||||
|
jsimd_can_ycc_rgb565 (void)
|
||||||
|
{
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
GLOBAL(int)
|
GLOBAL(int)
|
||||||
jsimd_c_can_null_convert (void)
|
jsimd_c_can_null_convert (void)
|
||||||
{
|
{
|
||||||
@@ -63,6 +69,13 @@ jsimd_ycc_rgb_convert (j_decompress_ptr cinfo,
|
|||||||
{
|
{
|
||||||
}
|
}
|
||||||
|
|
||||||
|
GLOBAL(void)
|
||||||
|
jsimd_ycc_rgb565_convert (j_decompress_ptr cinfo,
|
||||||
|
JSAMPIMAGE input_buf, JDIMENSION input_row,
|
||||||
|
JSAMPARRAY output_buf, int num_rows)
|
||||||
|
{
|
||||||
|
}
|
||||||
|
|
||||||
GLOBAL(void)
|
GLOBAL(void)
|
||||||
jsimd_c_null_convert (j_compress_ptr cinfo,
|
jsimd_c_null_convert (j_compress_ptr cinfo,
|
||||||
JSAMPARRAY input_buf, JSAMPIMAGE output_buf,
|
JSAMPARRAY input_buf, JSAMPIMAGE output_buf,
|
||||||
|
|||||||
@@ -4,6 +4,7 @@
|
|||||||
* Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
|
* Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
|
||||||
* Copyright 2011 D. R. Commander
|
* Copyright 2011 D. R. Commander
|
||||||
* Copyright (C) 2013-2014, MIPS Technologies, Inc., California
|
* Copyright (C) 2013-2014, MIPS Technologies, Inc., California
|
||||||
|
* Copyright (C) 2014 Linaro Limited
|
||||||
*
|
*
|
||||||
* Based on the x86 SIMD extension for IJG JPEG library,
|
* Based on the x86 SIMD extension for IJG JPEG library,
|
||||||
* Copyright (C) 1999-2006, MIYASAKA Masaru.
|
* Copyright (C) 1999-2006, MIYASAKA Masaru.
|
||||||
@@ -249,6 +250,9 @@ EXTERN(void) jsimd_ycc_extxbgr_convert_neon
|
|||||||
EXTERN(void) jsimd_ycc_extxrgb_convert_neon
|
EXTERN(void) jsimd_ycc_extxrgb_convert_neon
|
||||||
(JDIMENSION out_width, JSAMPIMAGE input_buf, JDIMENSION input_row,
|
(JDIMENSION out_width, JSAMPIMAGE input_buf, JDIMENSION input_row,
|
||||||
JSAMPARRAY output_buf, int num_rows);
|
JSAMPARRAY output_buf, int num_rows);
|
||||||
|
EXTERN(void) jsimd_ycc_rgb565_convert_neon
|
||||||
|
(JDIMENSION out_width, JSAMPIMAGE input_buf, JDIMENSION input_row,
|
||||||
|
JSAMPARRAY output_buf, int num_rows);
|
||||||
|
|
||||||
EXTERN(void) jsimd_ycc_rgb_convert_mips_dspr2
|
EXTERN(void) jsimd_ycc_rgb_convert_mips_dspr2
|
||||||
(JDIMENSION out_width, JSAMPIMAGE input_buf, JDIMENSION input_row,
|
(JDIMENSION out_width, JSAMPIMAGE input_buf, JDIMENSION input_row,
|
||||||
|
|||||||
@@ -2,7 +2,7 @@
|
|||||||
* jsimd_arm.c
|
* jsimd_arm.c
|
||||||
*
|
*
|
||||||
* Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
|
* Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
|
||||||
* Copyright 2009-2011, 2013 D. R. Commander
|
* Copyright 2009-2011, 2013-2014 D. R. Commander
|
||||||
*
|
*
|
||||||
* Based on the x86 SIMD extension for IJG JPEG library,
|
* Based on the x86 SIMD extension for IJG JPEG library,
|
||||||
* Copyright (C) 1999-2006, MIYASAKA Masaru.
|
* Copyright (C) 1999-2006, MIYASAKA Masaru.
|
||||||
@@ -175,6 +175,23 @@ jsimd_can_ycc_rgb (void)
|
|||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
GLOBAL(int)
|
||||||
|
jsimd_can_ycc_rgb565 (void)
|
||||||
|
{
|
||||||
|
init_simd();
|
||||||
|
|
||||||
|
/* The code is optimised for these values only */
|
||||||
|
if (BITS_IN_JSAMPLE != 8)
|
||||||
|
return 0;
|
||||||
|
if (sizeof(JDIMENSION) != 4)
|
||||||
|
return 0;
|
||||||
|
|
||||||
|
if (simd_support & JSIMD_ARM_NEON)
|
||||||
|
return 1;
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
GLOBAL(void)
|
GLOBAL(void)
|
||||||
jsimd_rgb_ycc_convert (j_compress_ptr cinfo,
|
jsimd_rgb_ycc_convert (j_compress_ptr cinfo,
|
||||||
JSAMPARRAY input_buf, JSAMPIMAGE output_buf,
|
JSAMPARRAY input_buf, JSAMPIMAGE output_buf,
|
||||||
@@ -251,7 +268,7 @@ jsimd_ycc_rgb_convert (j_decompress_ptr cinfo,
|
|||||||
case JCS_EXT_ARGB:
|
case JCS_EXT_ARGB:
|
||||||
neonfct=jsimd_ycc_extxrgb_convert_neon;
|
neonfct=jsimd_ycc_extxrgb_convert_neon;
|
||||||
break;
|
break;
|
||||||
default:
|
default:
|
||||||
neonfct=jsimd_ycc_extrgb_convert_neon;
|
neonfct=jsimd_ycc_extrgb_convert_neon;
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
@@ -260,6 +277,16 @@ jsimd_ycc_rgb_convert (j_decompress_ptr cinfo,
|
|||||||
neonfct(cinfo->output_width, input_buf, input_row, output_buf, num_rows);
|
neonfct(cinfo->output_width, input_buf, input_row, output_buf, num_rows);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
GLOBAL(void)
|
||||||
|
jsimd_ycc_rgb565_convert (j_decompress_ptr cinfo,
|
||||||
|
JSAMPIMAGE input_buf, JDIMENSION input_row,
|
||||||
|
JSAMPARRAY output_buf, int num_rows)
|
||||||
|
{
|
||||||
|
if (simd_support & JSIMD_ARM_NEON)
|
||||||
|
jsimd_ycc_rgb565_convert_neon(cinfo->output_width, input_buf, input_row,
|
||||||
|
output_buf, num_rows);
|
||||||
|
}
|
||||||
|
|
||||||
GLOBAL(int)
|
GLOBAL(int)
|
||||||
jsimd_can_h2v2_downsample (void)
|
jsimd_can_h2v2_downsample (void)
|
||||||
{
|
{
|
||||||
|
|||||||
@@ -95,6 +95,12 @@ jsimd_can_ycc_rgb (void)
|
|||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
GLOBAL(int)
|
||||||
|
jsimd_can_ycc_rgb565 (void)
|
||||||
|
{
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
GLOBAL(void)
|
GLOBAL(void)
|
||||||
jsimd_rgb_ycc_convert (j_compress_ptr cinfo,
|
jsimd_rgb_ycc_convert (j_compress_ptr cinfo,
|
||||||
JSAMPARRAY input_buf, JSAMPIMAGE output_buf,
|
JSAMPARRAY input_buf, JSAMPIMAGE output_buf,
|
||||||
@@ -148,6 +154,13 @@ jsimd_ycc_rgb_convert (j_decompress_ptr cinfo,
|
|||||||
neonfct(cinfo->output_width, input_buf, input_row, output_buf, num_rows);
|
neonfct(cinfo->output_width, input_buf, input_row, output_buf, num_rows);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
GLOBAL(void)
|
||||||
|
jsimd_ycc_rgb565_convert (j_decompress_ptr cinfo,
|
||||||
|
JSAMPIMAGE input_buf, JDIMENSION input_row,
|
||||||
|
JSAMPARRAY output_buf, int num_rows)
|
||||||
|
{
|
||||||
|
}
|
||||||
|
|
||||||
GLOBAL(int)
|
GLOBAL(int)
|
||||||
jsimd_can_h2v2_downsample (void)
|
jsimd_can_h2v2_downsample (void)
|
||||||
{
|
{
|
||||||
|
|||||||
@@ -4,6 +4,7 @@
|
|||||||
* Copyright (C) 2009-2011 Nokia Corporation and/or its subsidiary(-ies).
|
* Copyright (C) 2009-2011 Nokia Corporation and/or its subsidiary(-ies).
|
||||||
* All rights reserved.
|
* All rights reserved.
|
||||||
* Author: Siarhei Siamashka <siarhei.siamashka@nokia.com>
|
* Author: Siarhei Siamashka <siarhei.siamashka@nokia.com>
|
||||||
|
* Copyright (C) 2014 Linaro Limited. All Rights Reserved.
|
||||||
*
|
*
|
||||||
* This software is provided 'as-is', without any express or implied
|
* This software is provided 'as-is', without any express or implied
|
||||||
* warranty. In no event will the authors be held liable for any damages
|
* warranty. In no event will the authors be held liable for any damages
|
||||||
@@ -1346,6 +1347,19 @@ asm_function jsimd_idct_2x2_neon
|
|||||||
.else
|
.else
|
||||||
.error unsupported macroblock size
|
.error unsupported macroblock size
|
||||||
.endif
|
.endif
|
||||||
|
.elseif \bpp == 16
|
||||||
|
.if \size == 8
|
||||||
|
vst1.16 {q15}, [RGB]!
|
||||||
|
.elseif \size == 4
|
||||||
|
vst1.16 {d30}, [RGB]!
|
||||||
|
.elseif \size == 2
|
||||||
|
vst1.16 {d31[0]}, [RGB]!
|
||||||
|
vst1.16 {d31[1]}, [RGB]!
|
||||||
|
.elseif \size == 1
|
||||||
|
vst1.16 {d31[2]}, [RGB]!
|
||||||
|
.else
|
||||||
|
.error unsupported macroblock size
|
||||||
|
.endif
|
||||||
.else
|
.else
|
||||||
.error unsupported bpp
|
.error unsupported bpp
|
||||||
.endif
|
.endif
|
||||||
@@ -1377,44 +1391,71 @@ asm_function jsimd_idct_2x2_neon
|
|||||||
vrshrn.s32 d25, q13, #14
|
vrshrn.s32 d25, q13, #14
|
||||||
vrshrn.s32 d28, q14, #14
|
vrshrn.s32 d28, q14, #14
|
||||||
vrshrn.s32 d29, q15, #14
|
vrshrn.s32 d29, q15, #14
|
||||||
vaddw.u8 q10, q10, d0
|
vaddw.u8 q11, q10, d0
|
||||||
vaddw.u8 q12, q12, d0
|
vaddw.u8 q12, q12, d0
|
||||||
vaddw.u8 q14, q14, d0
|
vaddw.u8 q14, q14, d0
|
||||||
vqmovun.s16 d1\g_offs, q10
|
.if \bpp != 16
|
||||||
|
vqmovun.s16 d1\g_offs, q11
|
||||||
vqmovun.s16 d1\r_offs, q12
|
vqmovun.s16 d1\r_offs, q12
|
||||||
vqmovun.s16 d1\b_offs, q14
|
vqmovun.s16 d1\b_offs, q14
|
||||||
|
.else /* rgb565 */
|
||||||
|
vqshlu.s16 q13, q11, #8
|
||||||
|
vqshlu.s16 q15, q12, #8
|
||||||
|
vqshlu.s16 q14, q14, #8
|
||||||
|
vsri.u16 q15, q13, #5
|
||||||
|
vsri.u16 q15, q14, #11
|
||||||
|
.endif
|
||||||
.endm
|
.endm
|
||||||
|
|
||||||
.macro do_yuv_to_rgb_stage2_store_load_stage1
|
.macro do_yuv_to_rgb_stage2_store_load_stage1
|
||||||
vld1.8 {d4}, [U, :64]!
|
/* "do_yuv_to_rgb_stage2" and "store" */
|
||||||
vrshrn.s32 d20, q10, #15
|
vrshrn.s32 d20, q10, #15
|
||||||
|
/* "load" and "do_yuv_to_rgb_stage1" */
|
||||||
|
pld [U, #64]
|
||||||
vrshrn.s32 d21, q11, #15
|
vrshrn.s32 d21, q11, #15
|
||||||
|
pld [V, #64]
|
||||||
vrshrn.s32 d24, q12, #14
|
vrshrn.s32 d24, q12, #14
|
||||||
vrshrn.s32 d25, q13, #14
|
vrshrn.s32 d25, q13, #14
|
||||||
|
vld1.8 {d4}, [U, :64]!
|
||||||
vrshrn.s32 d28, q14, #14
|
vrshrn.s32 d28, q14, #14
|
||||||
vld1.8 {d5}, [V, :64]!
|
vld1.8 {d5}, [V, :64]!
|
||||||
vrshrn.s32 d29, q15, #14
|
vrshrn.s32 d29, q15, #14
|
||||||
vaddw.u8 q10, q10, d0
|
|
||||||
vaddw.u8 q12, q12, d0
|
|
||||||
vaddw.u8 q14, q14, d0
|
|
||||||
vqmovun.s16 d1\g_offs, q10
|
|
||||||
vld1.8 {d0}, [Y, :64]!
|
|
||||||
vqmovun.s16 d1\r_offs, q12
|
|
||||||
pld [U, #64]
|
|
||||||
pld [V, #64]
|
|
||||||
pld [Y, #64]
|
|
||||||
vqmovun.s16 d1\b_offs, q14
|
|
||||||
vaddw.u8 q3, q1, d4 /* q3 = u - 128 */
|
vaddw.u8 q3, q1, d4 /* q3 = u - 128 */
|
||||||
vaddw.u8 q4, q1, d5 /* q2 = v - 128 */
|
vaddw.u8 q4, q1, d5 /* q2 = v - 128 */
|
||||||
do_store \bpp, 8
|
vaddw.u8 q11, q10, d0
|
||||||
vmull.s16 q10, d6, d1[1] /* multiply by -11277 */
|
vmull.s16 q10, d6, d1[1] /* multiply by -11277 */
|
||||||
vmlal.s16 q10, d8, d1[2] /* multiply by -23401 */
|
vmlal.s16 q10, d8, d1[2] /* multiply by -23401 */
|
||||||
|
vaddw.u8 q12, q12, d0
|
||||||
|
vaddw.u8 q14, q14, d0
|
||||||
|
.if \bpp != 16 /**************** rgb24/rgb32 *********************************/
|
||||||
|
vqmovun.s16 d1\g_offs, q11
|
||||||
|
pld [Y, #64]
|
||||||
|
vqmovun.s16 d1\r_offs, q12
|
||||||
|
vld1.8 {d0}, [Y, :64]!
|
||||||
|
vqmovun.s16 d1\b_offs, q14
|
||||||
vmull.s16 q11, d7, d1[1] /* multiply by -11277 */
|
vmull.s16 q11, d7, d1[1] /* multiply by -11277 */
|
||||||
vmlal.s16 q11, d9, d1[2] /* multiply by -23401 */
|
vmlal.s16 q11, d9, d1[2] /* multiply by -23401 */
|
||||||
|
do_store \bpp, 8
|
||||||
vmull.s16 q12, d8, d1[0] /* multiply by 22971 */
|
vmull.s16 q12, d8, d1[0] /* multiply by 22971 */
|
||||||
vmull.s16 q13, d9, d1[0] /* multiply by 22971 */
|
vmull.s16 q13, d9, d1[0] /* multiply by 22971 */
|
||||||
vmull.s16 q14, d6, d1[3] /* multiply by 29033 */
|
vmull.s16 q14, d6, d1[3] /* multiply by 29033 */
|
||||||
vmull.s16 q15, d7, d1[3] /* multiply by 29033 */
|
vmull.s16 q15, d7, d1[3] /* multiply by 29033 */
|
||||||
|
.else /**************************** rgb565 ***********************************/
|
||||||
|
vqshlu.s16 q13, q11, #8
|
||||||
|
pld [Y, #64]
|
||||||
|
vqshlu.s16 q15, q12, #8
|
||||||
|
vqshlu.s16 q14, q14, #8
|
||||||
|
vld1.8 {d0}, [Y, :64]!
|
||||||
|
vmull.s16 q11, d7, d1[1]
|
||||||
|
vmlal.s16 q11, d9, d1[2]
|
||||||
|
vsri.u16 q15, q13, #5
|
||||||
|
vmull.s16 q12, d8, d1[0]
|
||||||
|
vsri.u16 q15, q14, #11
|
||||||
|
vmull.s16 q13, d9, d1[0]
|
||||||
|
vmull.s16 q14, d6, d1[3]
|
||||||
|
do_store \bpp, 8
|
||||||
|
vmull.s16 q15, d7, d1[3]
|
||||||
|
.endif
|
||||||
.endm
|
.endm
|
||||||
|
|
||||||
.macro do_yuv_to_rgb
|
.macro do_yuv_to_rgb
|
||||||
@@ -1556,6 +1597,7 @@ generate_jsimd_ycc_rgb_convert_neon extrgbx, 32, 0, 1, 2
|
|||||||
generate_jsimd_ycc_rgb_convert_neon extbgrx, 32, 2, 1, 0
|
generate_jsimd_ycc_rgb_convert_neon extbgrx, 32, 2, 1, 0
|
||||||
generate_jsimd_ycc_rgb_convert_neon extxbgr, 32, 3, 2, 1
|
generate_jsimd_ycc_rgb_convert_neon extxbgr, 32, 3, 2, 1
|
||||||
generate_jsimd_ycc_rgb_convert_neon extxrgb, 32, 1, 2, 3
|
generate_jsimd_ycc_rgb_convert_neon extxrgb, 32, 1, 2, 3
|
||||||
|
generate_jsimd_ycc_rgb_convert_neon rgb565, 16, 0, 0, 0
|
||||||
|
|
||||||
.purgem do_load
|
.purgem do_load
|
||||||
.purgem do_store
|
.purgem do_store
|
||||||
|
|||||||
@@ -2,7 +2,7 @@
|
|||||||
* jsimd_i386.c
|
* jsimd_i386.c
|
||||||
*
|
*
|
||||||
* Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
|
* Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
|
||||||
* Copyright 2009-2011, 2013 D. R. Commander
|
* Copyright 2009-2011, 2013-2014 D. R. Commander
|
||||||
*
|
*
|
||||||
* Based on the x86 SIMD extension for IJG JPEG library,
|
* Based on the x86 SIMD extension for IJG JPEG library,
|
||||||
* Copyright (C) 1999-2006, MIYASAKA Masaru.
|
* Copyright (C) 1999-2006, MIYASAKA Masaru.
|
||||||
@@ -130,6 +130,12 @@ jsimd_can_ycc_rgb (void)
|
|||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
GLOBAL(int)
|
||||||
|
jsimd_can_ycc_rgb565 (void)
|
||||||
|
{
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
GLOBAL(void)
|
GLOBAL(void)
|
||||||
jsimd_rgb_ycc_convert (j_compress_ptr cinfo,
|
jsimd_rgb_ycc_convert (j_compress_ptr cinfo,
|
||||||
JSAMPARRAY input_buf, JSAMPIMAGE output_buf,
|
JSAMPARRAY input_buf, JSAMPIMAGE output_buf,
|
||||||
@@ -280,6 +286,13 @@ jsimd_ycc_rgb_convert (j_decompress_ptr cinfo,
|
|||||||
mmxfct(cinfo->output_width, input_buf, input_row, output_buf, num_rows);
|
mmxfct(cinfo->output_width, input_buf, input_row, output_buf, num_rows);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
GLOBAL(void)
|
||||||
|
jsimd_ycc_rgb565_convert (j_decompress_ptr cinfo,
|
||||||
|
JSAMPIMAGE input_buf, JDIMENSION input_row,
|
||||||
|
JSAMPARRAY output_buf, int num_rows)
|
||||||
|
{
|
||||||
|
}
|
||||||
|
|
||||||
GLOBAL(int)
|
GLOBAL(int)
|
||||||
jsimd_can_h2v2_downsample (void)
|
jsimd_can_h2v2_downsample (void)
|
||||||
{
|
{
|
||||||
|
|||||||
@@ -2,7 +2,7 @@
|
|||||||
* jsimd_mips.c
|
* jsimd_mips.c
|
||||||
*
|
*
|
||||||
* Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
|
* Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
|
||||||
* Copyright 2009-2011 D. R. Commander
|
* Copyright 2009-2011, 2014 D. R. Commander
|
||||||
* Copyright (C) 2013-2014, MIPS Technologies, Inc., California
|
* Copyright (C) 2013-2014, MIPS Technologies, Inc., California
|
||||||
*
|
*
|
||||||
* Based on the x86 SIMD extension for IJG JPEG library,
|
* Based on the x86 SIMD extension for IJG JPEG library,
|
||||||
@@ -161,6 +161,12 @@ jsimd_can_ycc_rgb (void)
|
|||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
GLOBAL(int)
|
||||||
|
jsimd_can_ycc_rgb565 (void)
|
||||||
|
{
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
GLOBAL(int)
|
GLOBAL(int)
|
||||||
jsimd_c_can_null_convert (void)
|
jsimd_c_can_null_convert (void)
|
||||||
{
|
{
|
||||||
@@ -299,6 +305,13 @@ jsimd_ycc_rgb_convert (j_decompress_ptr cinfo,
|
|||||||
num_rows);
|
num_rows);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
GLOBAL(void)
|
||||||
|
jsimd_ycc_rgb565_convert (j_decompress_ptr cinfo,
|
||||||
|
JSAMPIMAGE input_buf, JDIMENSION input_row,
|
||||||
|
JSAMPARRAY output_buf, int num_rows)
|
||||||
|
{
|
||||||
|
}
|
||||||
|
|
||||||
GLOBAL(void)
|
GLOBAL(void)
|
||||||
jsimd_c_null_convert (j_compress_ptr cinfo,
|
jsimd_c_null_convert (j_compress_ptr cinfo,
|
||||||
JSAMPARRAY input_buf, JSAMPIMAGE output_buf,
|
JSAMPARRAY input_buf, JSAMPIMAGE output_buf,
|
||||||
|
|||||||
@@ -2,7 +2,7 @@
|
|||||||
* jsimd_x86_64.c
|
* jsimd_x86_64.c
|
||||||
*
|
*
|
||||||
* Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
|
* Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
|
||||||
* Copyright 2009-2011 D. R. Commander
|
* Copyright 2009-2011, 2014 D. R. Commander
|
||||||
*
|
*
|
||||||
* Based on the x86 SIMD extension for IJG JPEG library,
|
* Based on the x86 SIMD extension for IJG JPEG library,
|
||||||
* Copyright (C) 1999-2006, MIYASAKA Masaru.
|
* Copyright (C) 1999-2006, MIYASAKA Masaru.
|
||||||
@@ -80,6 +80,12 @@ jsimd_can_ycc_rgb (void)
|
|||||||
return 1;
|
return 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
GLOBAL(int)
|
||||||
|
jsimd_can_ycc_rgb565 (void)
|
||||||
|
{
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
GLOBAL(void)
|
GLOBAL(void)
|
||||||
jsimd_rgb_ycc_convert (j_compress_ptr cinfo,
|
jsimd_rgb_ycc_convert (j_compress_ptr cinfo,
|
||||||
JSAMPARRAY input_buf, JSAMPIMAGE output_buf,
|
JSAMPARRAY input_buf, JSAMPIMAGE output_buf,
|
||||||
@@ -194,6 +200,13 @@ jsimd_ycc_rgb_convert (j_decompress_ptr cinfo,
|
|||||||
sse2fct(cinfo->output_width, input_buf, input_row, output_buf, num_rows);
|
sse2fct(cinfo->output_width, input_buf, input_row, output_buf, num_rows);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
GLOBAL(void)
|
||||||
|
jsimd_ycc_rgb565_convert (j_decompress_ptr cinfo,
|
||||||
|
JSAMPIMAGE input_buf, JDIMENSION input_row,
|
||||||
|
JSAMPARRAY output_buf, int num_rows)
|
||||||
|
{
|
||||||
|
}
|
||||||
|
|
||||||
GLOBAL(int)
|
GLOBAL(int)
|
||||||
jsimd_can_h2v2_downsample (void)
|
jsimd_can_h2v2_downsample (void)
|
||||||
{
|
{
|
||||||
|
|||||||
Reference in New Issue
Block a user