-----aee36252be.patch From aee36252be20054afce371a92406fc66ba6627b5 Mon Sep 17 00:00:00 2001 From: Siarhei Siamashka <siarhei.siamashka@gmail.com> Date: Wed, 13 Aug 2014 03:50:22 +0300 Subject: [PATCH] ARM: Faster NEON yuv->rgb conversion for Krait and Cortex-A15 The older code was developed and tested only on ARM Cortex-A8 and ARM Cortex-A9. Tuning it for newer ARM processors can introduce some speed-up (up to 20%). The performance of the inner loop (conversion of 8 pixels) improves from ~27 cycles down to ~22 cycles on Qualcomm Krait 300, and from ~20 cycles down to ~18 cycles on ARM Cortex-A15. The performance remains exactly the same on ARM Cortex-A7 (~58 cycles), ARM Cortex-A8 (~25 cycles) and ARM Cortex-A9 (~30 cycles) processors. Also use larger indentation in the source code for separating two independent instruction streams. -----a5efdbf22c.patch From a5efdbf22ce9c1acd4b14a353cec863c2c57557e Mon Sep 17 00:00:00 2001 From: Siarhei Siamashka <siarhei.siamashka@gmail.com> Date: Wed, 13 Aug 2014 07:23:09 +0300 Subject: [PATCH] ARM: NEON optimized yuv->rgb565 conversion The performance of the inner loop (conversion of 8 pixels): * ARM Cortex-A7: ~55 cycles * ARM Cortex-A8: ~28 cycles * ARM Cortex-A9: ~32 cycles * ARM Cortex-A15: ~20 cycles * Qualcomm Krait: ~24 cycles Based on the Linaro rgb565 patch from https://sourceforge.net/p/libjpeg-turbo/patches/24/ but implements better instructions scheduling. git-svn-id: svn+ssh://svn.code.sf.net/p/libjpeg-turbo/code/trunk@1385 632fc199-4ca6-4c93-a231-07263d6284db
85 lines
3.3 KiB
C
85 lines
3.3 KiB
C
/*
|
|
* jsimd.h
|
|
*
|
|
* Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
|
|
* Copyright 2011, 2014 D. R. Commander
|
|
*
|
|
* Based on the x86 SIMD extension for IJG JPEG library,
|
|
* Copyright (C) 1999-2006, MIYASAKA Masaru.
|
|
* For conditions of distribution and use, see copyright notice in jsimdext.inc
|
|
*
|
|
*/
|
|
|
|
EXTERN(int) jsimd_can_rgb_ycc (void);
|
|
EXTERN(int) jsimd_can_rgb_gray (void);
|
|
EXTERN(int) jsimd_can_ycc_rgb (void);
|
|
EXTERN(int) jsimd_can_ycc_rgb565 (void);
|
|
EXTERN(int) jsimd_c_can_null_convert (void);
|
|
|
|
EXTERN(void) jsimd_rgb_ycc_convert
|
|
(j_compress_ptr cinfo, JSAMPARRAY input_buf, JSAMPIMAGE output_buf,
|
|
JDIMENSION output_row, int num_rows);
|
|
EXTERN(void) jsimd_rgb_gray_convert
|
|
(j_compress_ptr cinfo, JSAMPARRAY input_buf, JSAMPIMAGE output_buf,
|
|
JDIMENSION output_row, int num_rows);
|
|
EXTERN(void) jsimd_ycc_rgb_convert
|
|
(j_decompress_ptr cinfo, JSAMPIMAGE input_buf, JDIMENSION input_row,
|
|
JSAMPARRAY output_buf, int num_rows);
|
|
EXTERN(void) jsimd_ycc_rgb565_convert
|
|
(j_decompress_ptr cinfo, JSAMPIMAGE input_buf, JDIMENSION input_row,
|
|
JSAMPARRAY output_buf, int num_rows);
|
|
EXTERN(void) jsimd_c_null_convert
|
|
(j_compress_ptr cinfo, JSAMPARRAY input_buf, JSAMPIMAGE output_buf,
|
|
JDIMENSION output_row, int num_rows);
|
|
|
|
EXTERN(int) jsimd_can_h2v2_downsample (void);
|
|
EXTERN(int) jsimd_can_h2v1_downsample (void);
|
|
|
|
EXTERN(void) jsimd_h2v2_downsample
|
|
(j_compress_ptr cinfo, jpeg_component_info * compptr,
|
|
JSAMPARRAY input_data, JSAMPARRAY output_data);
|
|
|
|
EXTERN(int) jsimd_can_h2v2_smooth_downsample (void);
|
|
|
|
EXTERN(void) jsimd_h2v2_smooth_downsample
|
|
(j_compress_ptr cinfo, jpeg_component_info * compptr,
|
|
JSAMPARRAY input_data, JSAMPARRAY output_data);
|
|
|
|
EXTERN(void) jsimd_h2v1_downsample
|
|
(j_compress_ptr cinfo, jpeg_component_info * compptr,
|
|
JSAMPARRAY input_data, JSAMPARRAY output_data);
|
|
|
|
EXTERN(int) jsimd_can_h2v2_upsample (void);
|
|
EXTERN(int) jsimd_can_h2v1_upsample (void);
|
|
EXTERN(int) jsimd_can_int_upsample (void);
|
|
|
|
EXTERN(void) jsimd_h2v2_upsample
|
|
(j_decompress_ptr cinfo, jpeg_component_info * compptr,
|
|
JSAMPARRAY input_data, JSAMPARRAY * output_data_ptr);
|
|
EXTERN(void) jsimd_h2v1_upsample
|
|
(j_decompress_ptr cinfo, jpeg_component_info * compptr,
|
|
JSAMPARRAY input_data, JSAMPARRAY * output_data_ptr);
|
|
EXTERN(void) jsimd_int_upsample
|
|
(j_decompress_ptr cinfo, jpeg_component_info * compptr,
|
|
JSAMPARRAY input_data, JSAMPARRAY * output_data_ptr);
|
|
|
|
EXTERN(int) jsimd_can_h2v2_fancy_upsample (void);
|
|
EXTERN(int) jsimd_can_h2v1_fancy_upsample (void);
|
|
|
|
EXTERN(void) jsimd_h2v2_fancy_upsample
|
|
(j_decompress_ptr cinfo, jpeg_component_info * compptr,
|
|
JSAMPARRAY input_data, JSAMPARRAY * output_data_ptr);
|
|
EXTERN(void) jsimd_h2v1_fancy_upsample
|
|
(j_decompress_ptr cinfo, jpeg_component_info * compptr,
|
|
JSAMPARRAY input_data, JSAMPARRAY * output_data_ptr);
|
|
|
|
EXTERN(int) jsimd_can_h2v2_merged_upsample (void);
|
|
EXTERN(int) jsimd_can_h2v1_merged_upsample (void);
|
|
|
|
EXTERN(void) jsimd_h2v2_merged_upsample
|
|
(j_decompress_ptr cinfo, JSAMPIMAGE input_buf,
|
|
JDIMENSION in_row_group_ctr, JSAMPARRAY output_buf);
|
|
EXTERN(void) jsimd_h2v1_merged_upsample
|
|
(j_decompress_ptr cinfo, JSAMPIMAGE input_buf,
|
|
JDIMENSION in_row_group_ctr, JSAMPARRAY output_buf);
|