Full-color compression speedups relative to libjpeg-turbo 1.4.2: 2.8 GHz Intel Xeon W3530, Linux, 64-bit: 2.2-18% (avg. 9.5%) 2.8 GHz Intel Xeon W3530, Linux, 32-bit: 10-25% (avg. 17%) 2.3 GHz AMD A10-4600M APU, Linux, 64-bit: 4.9-17% (avg. 11%) 2.3 GHz AMD A10-4600M APU, Linux, 32-bit: 8.8-19% (avg. 15%) 3.0 GHz Intel Core i7, OS X, 64-bit: 3.5-16% (avg. 10%) 3.0 GHz Intel Core i7, OS X, 32-bit: 4.8-14% (avg. 11%) 2.6 GHz AMD Athlon 64 X2 5050e: Performance-neutral (give or take a few percent) Full-color compression speedups relative to IPP: 2.8 GHz Intel Xeon W3530, Linux, 64-bit: 4.8-34% (avg. 19%) 2.8 GHz Intel Xeon W3530, Linux, 32-bit: -19%-7.0% (avg. -7.0%) Refer to #42 for discussion. Numerous other approaches were attempted, but this one proved to be the most performant across all platforms. This commit also fixes #3 (works around, really-- the clang-compiled version of jchuff.c still performs 20% worse than its GCC-compiled counterpart, but that code is now bypassed by the new SSE2 Huffman algorithm.) Based on:2cb4d4133036c94e050d
94 lines
3.6 KiB
C
94 lines
3.6 KiB
C
/*
|
|
* jsimd.h
|
|
*
|
|
* Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
|
|
* Copyright 2011, 2014 D. R. Commander
|
|
* Copyright 2015 Matthieu Darbois
|
|
*
|
|
* Based on the x86 SIMD extension for IJG JPEG library,
|
|
* Copyright (C) 1999-2006, MIYASAKA Masaru.
|
|
* For conditions of distribution and use, see copyright notice in jsimdext.inc
|
|
*
|
|
*/
|
|
|
|
#include "jchuff.h" /* Declarations shared with jcphuff.c */
|
|
|
|
EXTERN(int) jsimd_can_rgb_ycc (void);
|
|
EXTERN(int) jsimd_can_rgb_gray (void);
|
|
EXTERN(int) jsimd_can_ycc_rgb (void);
|
|
EXTERN(int) jsimd_can_ycc_rgb565 (void);
|
|
EXTERN(int) jsimd_c_can_null_convert (void);
|
|
|
|
EXTERN(void) jsimd_rgb_ycc_convert
|
|
(j_compress_ptr cinfo, JSAMPARRAY input_buf, JSAMPIMAGE output_buf,
|
|
JDIMENSION output_row, int num_rows);
|
|
EXTERN(void) jsimd_rgb_gray_convert
|
|
(j_compress_ptr cinfo, JSAMPARRAY input_buf, JSAMPIMAGE output_buf,
|
|
JDIMENSION output_row, int num_rows);
|
|
EXTERN(void) jsimd_ycc_rgb_convert
|
|
(j_decompress_ptr cinfo, JSAMPIMAGE input_buf, JDIMENSION input_row,
|
|
JSAMPARRAY output_buf, int num_rows);
|
|
EXTERN(void) jsimd_ycc_rgb565_convert
|
|
(j_decompress_ptr cinfo, JSAMPIMAGE input_buf, JDIMENSION input_row,
|
|
JSAMPARRAY output_buf, int num_rows);
|
|
EXTERN(void) jsimd_c_null_convert
|
|
(j_compress_ptr cinfo, JSAMPARRAY input_buf, JSAMPIMAGE output_buf,
|
|
JDIMENSION output_row, int num_rows);
|
|
|
|
EXTERN(int) jsimd_can_h2v2_downsample (void);
|
|
EXTERN(int) jsimd_can_h2v1_downsample (void);
|
|
|
|
EXTERN(void) jsimd_h2v2_downsample
|
|
(j_compress_ptr cinfo, jpeg_component_info * compptr,
|
|
JSAMPARRAY input_data, JSAMPARRAY output_data);
|
|
|
|
EXTERN(int) jsimd_can_h2v2_smooth_downsample (void);
|
|
|
|
EXTERN(void) jsimd_h2v2_smooth_downsample
|
|
(j_compress_ptr cinfo, jpeg_component_info * compptr,
|
|
JSAMPARRAY input_data, JSAMPARRAY output_data);
|
|
|
|
EXTERN(void) jsimd_h2v1_downsample
|
|
(j_compress_ptr cinfo, jpeg_component_info * compptr,
|
|
JSAMPARRAY input_data, JSAMPARRAY output_data);
|
|
|
|
EXTERN(int) jsimd_can_h2v2_upsample (void);
|
|
EXTERN(int) jsimd_can_h2v1_upsample (void);
|
|
EXTERN(int) jsimd_can_int_upsample (void);
|
|
|
|
EXTERN(void) jsimd_h2v2_upsample
|
|
(j_decompress_ptr cinfo, jpeg_component_info * compptr,
|
|
JSAMPARRAY input_data, JSAMPARRAY * output_data_ptr);
|
|
EXTERN(void) jsimd_h2v1_upsample
|
|
(j_decompress_ptr cinfo, jpeg_component_info * compptr,
|
|
JSAMPARRAY input_data, JSAMPARRAY * output_data_ptr);
|
|
EXTERN(void) jsimd_int_upsample
|
|
(j_decompress_ptr cinfo, jpeg_component_info * compptr,
|
|
JSAMPARRAY input_data, JSAMPARRAY * output_data_ptr);
|
|
|
|
EXTERN(int) jsimd_can_h2v2_fancy_upsample (void);
|
|
EXTERN(int) jsimd_can_h2v1_fancy_upsample (void);
|
|
|
|
EXTERN(void) jsimd_h2v2_fancy_upsample
|
|
(j_decompress_ptr cinfo, jpeg_component_info * compptr,
|
|
JSAMPARRAY input_data, JSAMPARRAY * output_data_ptr);
|
|
EXTERN(void) jsimd_h2v1_fancy_upsample
|
|
(j_decompress_ptr cinfo, jpeg_component_info * compptr,
|
|
JSAMPARRAY input_data, JSAMPARRAY * output_data_ptr);
|
|
|
|
EXTERN(int) jsimd_can_h2v2_merged_upsample (void);
|
|
EXTERN(int) jsimd_can_h2v1_merged_upsample (void);
|
|
|
|
EXTERN(void) jsimd_h2v2_merged_upsample
|
|
(j_decompress_ptr cinfo, JSAMPIMAGE input_buf,
|
|
JDIMENSION in_row_group_ctr, JSAMPARRAY output_buf);
|
|
EXTERN(void) jsimd_h2v1_merged_upsample
|
|
(j_decompress_ptr cinfo, JSAMPIMAGE input_buf,
|
|
JDIMENSION in_row_group_ctr, JSAMPARRAY output_buf);
|
|
|
|
EXTERN(int) jsimd_can_huff_encode_one_block (void);
|
|
|
|
EXTERN(JOCTET*) jsimd_huff_encode_one_block
|
|
(void * state, JOCTET *buffer, JCOEFPTR block, int last_dc_val,
|
|
c_derived_tbl *dctbl, c_derived_tbl *actbl);
|