This commit adds C and SSE2 optimizations for the encode_mcu_AC_first() function used in progressive Huffman encoding. The image used for testing can be retrieved from this page: https://blog.cloudflare.com/doubling-the-speed-of-jpegtran All timings done on `Intel(R) Core(TM) i7-4870HQ CPU @ 2.50GHz` clang version is `Apple LLVM version 9.0.0 (clang-900.0.39.2)` gcc-5 version is `gcc-5 (Homebrew GCC 5.5.0) 5.5.0` gcc-7 version is `gcc-7 (Homebrew GCC 7.2.0) 7.2.0` Here are the results in comparison to libjpeg-turbo@293263c using `time ./jpegtran -outfile /dev/null -progressive -optimise -copy none print_poster_0025.jpg` C clang x86_64: +19% gcc-5 x86_64: +80% gcc-7 x86_64: +57% clang i386: +5% gcc-5 i386: +59% gcc-7 i386: +51% SSE2 clang x86_64: +79% gcc-5 x86_64: +158% gcc-7 x86_64: +122% clang i386: +71% gcc-5 i386: +134% gcc-7 i386: +135% Discussion in libjpeg-turbo/libjpeg-turbo#46
118 lines
5.4 KiB
C
118 lines
5.4 KiB
C
/*
|
|
* jsimd.h
|
|
*
|
|
* Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
|
|
* Copyright (C) 2011, 2014, D. R. Commander.
|
|
* Copyright (C) 2015-2016, 2018, Matthieu Darbois.
|
|
*
|
|
* Based on the x86 SIMD extension for IJG JPEG library,
|
|
* Copyright (C) 1999-2006, MIYASAKA Masaru.
|
|
* For conditions of distribution and use, see copyright notice in jsimdext.inc
|
|
*
|
|
*/
|
|
|
|
#include "jchuff.h" /* Declarations shared with jcphuff.c */
|
|
|
|
EXTERN(int) jsimd_can_rgb_ycc(void);
|
|
EXTERN(int) jsimd_can_rgb_gray(void);
|
|
EXTERN(int) jsimd_can_ycc_rgb(void);
|
|
EXTERN(int) jsimd_can_ycc_rgb565(void);
|
|
EXTERN(int) jsimd_c_can_null_convert(void);
|
|
|
|
EXTERN(void) jsimd_rgb_ycc_convert(j_compress_ptr cinfo, JSAMPARRAY input_buf,
|
|
JSAMPIMAGE output_buf,
|
|
JDIMENSION output_row, int num_rows);
|
|
EXTERN(void) jsimd_rgb_gray_convert(j_compress_ptr cinfo, JSAMPARRAY input_buf,
|
|
JSAMPIMAGE output_buf,
|
|
JDIMENSION output_row, int num_rows);
|
|
EXTERN(void) jsimd_ycc_rgb_convert(j_decompress_ptr cinfo,
|
|
JSAMPIMAGE input_buf, JDIMENSION input_row,
|
|
JSAMPARRAY output_buf, int num_rows);
|
|
EXTERN(void) jsimd_ycc_rgb565_convert(j_decompress_ptr cinfo,
|
|
JSAMPIMAGE input_buf,
|
|
JDIMENSION input_row,
|
|
JSAMPARRAY output_buf, int num_rows);
|
|
EXTERN(void) jsimd_c_null_convert(j_compress_ptr cinfo, JSAMPARRAY input_buf,
|
|
JSAMPIMAGE output_buf, JDIMENSION output_row,
|
|
int num_rows);
|
|
|
|
EXTERN(int) jsimd_can_h2v2_downsample(void);
|
|
EXTERN(int) jsimd_can_h2v1_downsample(void);
|
|
|
|
EXTERN(void) jsimd_h2v2_downsample(j_compress_ptr cinfo,
|
|
jpeg_component_info *compptr,
|
|
JSAMPARRAY input_data,
|
|
JSAMPARRAY output_data);
|
|
|
|
EXTERN(int) jsimd_can_h2v2_smooth_downsample(void);
|
|
|
|
EXTERN(void) jsimd_h2v2_smooth_downsample(j_compress_ptr cinfo,
|
|
jpeg_component_info *compptr,
|
|
JSAMPARRAY input_data,
|
|
JSAMPARRAY output_data);
|
|
|
|
EXTERN(void) jsimd_h2v1_downsample(j_compress_ptr cinfo,
|
|
jpeg_component_info *compptr,
|
|
JSAMPARRAY input_data,
|
|
JSAMPARRAY output_data);
|
|
|
|
EXTERN(int) jsimd_can_h2v2_upsample(void);
|
|
EXTERN(int) jsimd_can_h2v1_upsample(void);
|
|
EXTERN(int) jsimd_can_int_upsample(void);
|
|
|
|
EXTERN(void) jsimd_h2v2_upsample(j_decompress_ptr cinfo,
|
|
jpeg_component_info *compptr,
|
|
JSAMPARRAY input_data,
|
|
JSAMPARRAY *output_data_ptr);
|
|
EXTERN(void) jsimd_h2v1_upsample(j_decompress_ptr cinfo,
|
|
jpeg_component_info *compptr,
|
|
JSAMPARRAY input_data,
|
|
JSAMPARRAY *output_data_ptr);
|
|
EXTERN(void) jsimd_int_upsample(j_decompress_ptr cinfo,
|
|
jpeg_component_info *compptr,
|
|
JSAMPARRAY input_data,
|
|
JSAMPARRAY *output_data_ptr);
|
|
|
|
EXTERN(int) jsimd_can_h2v2_fancy_upsample(void);
|
|
EXTERN(int) jsimd_can_h2v1_fancy_upsample(void);
|
|
|
|
EXTERN(void) jsimd_h2v2_fancy_upsample(j_decompress_ptr cinfo,
|
|
jpeg_component_info *compptr,
|
|
JSAMPARRAY input_data,
|
|
JSAMPARRAY *output_data_ptr);
|
|
EXTERN(void) jsimd_h2v1_fancy_upsample(j_decompress_ptr cinfo,
|
|
jpeg_component_info *compptr,
|
|
JSAMPARRAY input_data,
|
|
JSAMPARRAY *output_data_ptr);
|
|
|
|
EXTERN(int) jsimd_can_h2v2_merged_upsample(void);
|
|
EXTERN(int) jsimd_can_h2v1_merged_upsample(void);
|
|
|
|
EXTERN(void) jsimd_h2v2_merged_upsample(j_decompress_ptr cinfo,
|
|
JSAMPIMAGE input_buf,
|
|
JDIMENSION in_row_group_ctr,
|
|
JSAMPARRAY output_buf);
|
|
EXTERN(void) jsimd_h2v1_merged_upsample(j_decompress_ptr cinfo,
|
|
JSAMPIMAGE input_buf,
|
|
JDIMENSION in_row_group_ctr,
|
|
JSAMPARRAY output_buf);
|
|
|
|
EXTERN(int) jsimd_can_huff_encode_one_block(void);
|
|
|
|
EXTERN(JOCTET *) jsimd_huff_encode_one_block(void *state, JOCTET *buffer,
|
|
JCOEFPTR block, int last_dc_val,
|
|
c_derived_tbl *dctbl,
|
|
c_derived_tbl *actbl);
|
|
|
|
EXTERN(int) jsimd_can_encode_mcu_AC_first_prepare(void);
|
|
|
|
EXTERN(void) jsimd_encode_mcu_AC_first_prepare
|
|
(const JCOEF *block, const int *jpeg_natural_order_start, int Sl, int Al,
|
|
JCOEF *values, size_t *zerobits);
|
|
|
|
EXTERN(int) jsimd_can_encode_mcu_AC_refine_prepare(void);
|
|
|
|
EXTERN(int) jsimd_encode_mcu_AC_refine_prepare
|
|
(const JCOEF *block, const int *jpeg_natural_order_start, int Sl, int Al,
|
|
JCOEF *absvalues, size_t *bits);
|