Greatly improve performance of Huffman decoding
git-svn-id: svn+ssh://svn.code.sf.net/p/libjpeg-turbo/code/trunk@64 632fc199-4ca6-4c93-a231-07263d6284db
This commit is contained in:
241
jdhuff.c
241
jdhuff.c
@@ -14,6 +14,21 @@
|
|||||||
* storage only upon successful completion of an MCU.
|
* storage only upon successful completion of an MCU.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
|
/* Modifications:
|
||||||
|
* Copyright (C)2007 Sun Microsystems, Inc.
|
||||||
|
* Copyright (C)2009 D. R. Commander
|
||||||
|
*
|
||||||
|
* This library is free software and may be redistributed and/or modified under
|
||||||
|
* the terms of the wxWindows Library License, Version 3.1 or (at your option)
|
||||||
|
* any later version. The full license is in the LICENSE.txt file included
|
||||||
|
* with this distribution.
|
||||||
|
*
|
||||||
|
* This library is distributed in the hope that it will be useful,
|
||||||
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
* wxWindows Library License for more details.
|
||||||
|
*/
|
||||||
|
|
||||||
#define JPEG_INTERNALS
|
#define JPEG_INTERNALS
|
||||||
#include "jinclude.h"
|
#include "jinclude.h"
|
||||||
#include "jpeglib.h"
|
#include "jpeglib.h"
|
||||||
@@ -234,7 +249,8 @@ jpeg_make_d_derived_tbl (j_decompress_ptr cinfo, boolean isDC, int tblno,
|
|||||||
* with that code.
|
* with that code.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
MEMZERO(dtbl->look_nbits, SIZEOF(dtbl->look_nbits));
|
for (i = 0; i < (1 << HUFF_LOOKAHEAD); i++)
|
||||||
|
dtbl->lookup[i] = (HUFF_LOOKAHEAD + 1) << HUFF_LOOKAHEAD;
|
||||||
|
|
||||||
p = 0;
|
p = 0;
|
||||||
for (l = 1; l <= HUFF_LOOKAHEAD; l++) {
|
for (l = 1; l <= HUFF_LOOKAHEAD; l++) {
|
||||||
@@ -243,8 +259,7 @@ jpeg_make_d_derived_tbl (j_decompress_ptr cinfo, boolean isDC, int tblno,
|
|||||||
/* Generate left-justified code followed by all possible bit sequences */
|
/* Generate left-justified code followed by all possible bit sequences */
|
||||||
lookbits = huffcode[p] << (HUFF_LOOKAHEAD-l);
|
lookbits = huffcode[p] << (HUFF_LOOKAHEAD-l);
|
||||||
for (ctr = 1 << (HUFF_LOOKAHEAD-l); ctr > 0; ctr--) {
|
for (ctr = 1 << (HUFF_LOOKAHEAD-l); ctr > 0; ctr--) {
|
||||||
dtbl->look_nbits[lookbits] = l;
|
dtbl->lookup[lookbits] = (l << HUFF_LOOKAHEAD) | htbl->huffval[p];
|
||||||
dtbl->look_sym[lookbits] = htbl->huffval[p];
|
|
||||||
lookbits++;
|
lookbits++;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -438,9 +453,10 @@ jpeg_huff_decode (bitread_working_state * state,
|
|||||||
* On some machines, a shift and add will be faster than a table lookup.
|
* On some machines, a shift and add will be faster than a table lookup.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
|
#define AVOID_TABLES
|
||||||
#ifdef AVOID_TABLES
|
#ifdef AVOID_TABLES
|
||||||
|
|
||||||
#define HUFF_EXTEND(x,s) ((x) < (1<<((s)-1)) ? (x) + (((-1)<<(s)) + 1) : (x))
|
#define HUFF_EXTEND(x,s) ((x) + ((((x) - (1<<((s)-1))) >> 31) & (((-1)<<(s)) + 1)))
|
||||||
|
|
||||||
#else
|
#else
|
||||||
|
|
||||||
@@ -498,47 +514,19 @@ process_restart (j_decompress_ptr cinfo)
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
/*
|
LOCAL(boolean)
|
||||||
* Decode and return one MCU's worth of Huffman-compressed coefficients.
|
decode_mcu_slow (j_decompress_ptr cinfo, JBLOCKROW *MCU_data)
|
||||||
* The coefficients are reordered from zigzag order into natural array order,
|
|
||||||
* but are not dequantized.
|
|
||||||
*
|
|
||||||
* The i'th block of the MCU is stored into the block pointed to by
|
|
||||||
* MCU_data[i]. WE ASSUME THIS AREA HAS BEEN ZEROED BY THE CALLER.
|
|
||||||
* (Wholesale zeroing is usually a little faster than retail...)
|
|
||||||
*
|
|
||||||
* Returns FALSE if data source requested suspension. In that case no
|
|
||||||
* changes have been made to permanent state. (Exception: some output
|
|
||||||
* coefficients may already have been assigned. This is harmless for
|
|
||||||
* this module, since we'll just re-assign them on the next call.)
|
|
||||||
*/
|
|
||||||
|
|
||||||
METHODDEF(boolean)
|
|
||||||
decode_mcu (j_decompress_ptr cinfo, JBLOCKROW *MCU_data)
|
|
||||||
{
|
{
|
||||||
huff_entropy_ptr entropy = (huff_entropy_ptr) cinfo->entropy;
|
huff_entropy_ptr entropy = (huff_entropy_ptr) cinfo->entropy;
|
||||||
int blkn;
|
|
||||||
BITREAD_STATE_VARS;
|
BITREAD_STATE_VARS;
|
||||||
|
int blkn;
|
||||||
savable_state state;
|
savable_state state;
|
||||||
|
/* Outer loop handles each block in the MCU */
|
||||||
/* Process restart marker if needed; may have to suspend */
|
|
||||||
if (cinfo->restart_interval) {
|
|
||||||
if (entropy->restarts_to_go == 0)
|
|
||||||
if (! process_restart(cinfo))
|
|
||||||
return FALSE;
|
|
||||||
}
|
|
||||||
|
|
||||||
/* If we've run out of data, just leave the MCU set to zeroes.
|
|
||||||
* This way, we return uniform gray for the remainder of the segment.
|
|
||||||
*/
|
|
||||||
if (! entropy->pub.insufficient_data) {
|
|
||||||
|
|
||||||
/* Load up working state */
|
/* Load up working state */
|
||||||
BITREAD_LOAD_STATE(cinfo,entropy->bitstate);
|
BITREAD_LOAD_STATE(cinfo,entropy->bitstate);
|
||||||
ASSIGN_STATE(state, entropy->saved);
|
ASSIGN_STATE(state, entropy->saved);
|
||||||
|
|
||||||
/* Outer loop handles each block in the MCU */
|
|
||||||
|
|
||||||
for (blkn = 0; blkn < cinfo->blocks_in_MCU; blkn++) {
|
for (blkn = 0; blkn < cinfo->blocks_in_MCU; blkn++) {
|
||||||
JBLOCKROW block = MCU_data[blkn];
|
JBLOCKROW block = MCU_data[blkn];
|
||||||
d_derived_tbl * dctbl = entropy->dc_cur_tbls[blkn];
|
d_derived_tbl * dctbl = entropy->dc_cur_tbls[blkn];
|
||||||
@@ -611,13 +599,192 @@ decode_mcu (j_decompress_ptr cinfo, JBLOCKROW *MCU_data)
|
|||||||
k += 15;
|
k += 15;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Completed MCU, so update state */
|
/* Completed MCU, so update state */
|
||||||
BITREAD_SAVE_STATE(cinfo,entropy->bitstate);
|
BITREAD_SAVE_STATE(cinfo,entropy->bitstate);
|
||||||
ASSIGN_STATE(entropy->saved, state);
|
ASSIGN_STATE(entropy->saved, state);
|
||||||
|
return TRUE;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/***************************************************************/
|
||||||
|
|
||||||
|
#define ADD_BYTE { \
|
||||||
|
int val0 = *(buffer++); \
|
||||||
|
int val1 = *(buffer); \
|
||||||
|
\
|
||||||
|
bits_left += 8; \
|
||||||
|
get_buffer = (get_buffer << 8) | (val0); \
|
||||||
|
if (val0 == 0xFF) { \
|
||||||
|
buffer++; \
|
||||||
|
if (val1 != 0) { \
|
||||||
|
buffer -= 2; \
|
||||||
|
get_buffer &= ~0xFF; \
|
||||||
|
} \
|
||||||
|
} \
|
||||||
|
}
|
||||||
|
|
||||||
|
/***************************************************************/
|
||||||
|
|
||||||
|
#if __WORDSIZE == 64
|
||||||
|
|
||||||
|
#define ENSURE_SHORT \
|
||||||
|
if (bits_left < 16) { \
|
||||||
|
ADD_BYTE ADD_BYTE ADD_BYTE ADD_BYTE ADD_BYTE ADD_BYTE \
|
||||||
|
}
|
||||||
|
|
||||||
|
#else
|
||||||
|
|
||||||
|
#define ENSURE_SHORT if (bits_left < 16) { ADD_BYTE ADD_BYTE }
|
||||||
|
|
||||||
|
#endif
|
||||||
|
|
||||||
|
/***************************************************************/
|
||||||
|
|
||||||
|
#define HUFF_DECODE_FAST(symbol, size, htbl) { \
|
||||||
|
ENSURE_SHORT \
|
||||||
|
symbol = PEEK_BITS(HUFF_LOOKAHEAD); \
|
||||||
|
symbol = htbl->lookup[symbol]; \
|
||||||
|
size = symbol >> 8; \
|
||||||
|
bits_left -= size; \
|
||||||
|
symbol = symbol & ((1 << HUFF_LOOKAHEAD) - 1); \
|
||||||
|
if (size == HUFF_LOOKAHEAD + 1) { \
|
||||||
|
symbol = (get_buffer >> bits_left) & ((1 << (size)) - 1); \
|
||||||
|
while (symbol > htbl->maxcode[size]) { \
|
||||||
|
symbol <<= 1; \
|
||||||
|
symbol |= GET_BITS(1); \
|
||||||
|
size++; \
|
||||||
|
} \
|
||||||
|
symbol = htbl->pub->huffval[ (int) (symbol + htbl->valoffset[size]) ]; \
|
||||||
|
} \
|
||||||
|
}
|
||||||
|
|
||||||
|
/***************************************************************/
|
||||||
|
|
||||||
|
LOCAL(boolean)
|
||||||
|
decode_mcu_fast (j_decompress_ptr cinfo, JBLOCKROW *MCU_data)
|
||||||
|
{
|
||||||
|
huff_entropy_ptr entropy = (huff_entropy_ptr) cinfo->entropy;
|
||||||
|
BITREAD_STATE_VARS;
|
||||||
|
JOCTET *buffer;
|
||||||
|
int blkn;
|
||||||
|
savable_state state;
|
||||||
|
/* Outer loop handles each block in the MCU */
|
||||||
|
|
||||||
|
/* Load up working state */
|
||||||
|
BITREAD_LOAD_STATE(cinfo,entropy->bitstate);
|
||||||
|
buffer = (JOCTET *) br_state.next_input_byte;
|
||||||
|
ASSIGN_STATE(state, entropy->saved);
|
||||||
|
|
||||||
|
for (blkn = 0; blkn < cinfo->blocks_in_MCU; blkn++) {
|
||||||
|
JBLOCKROW block = MCU_data[blkn];
|
||||||
|
d_derived_tbl * dctbl = entropy->dc_cur_tbls[blkn];
|
||||||
|
d_derived_tbl * actbl = entropy->ac_cur_tbls[blkn];
|
||||||
|
register int s, k, r, l;
|
||||||
|
|
||||||
|
HUFF_DECODE_FAST(s, l, dctbl);
|
||||||
|
if (s) {
|
||||||
|
ENSURE_SHORT
|
||||||
|
r = GET_BITS(s);
|
||||||
|
s = HUFF_EXTEND(r, s);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (entropy->dc_needed[blkn]) {
|
||||||
|
int ci = cinfo->MCU_membership[blkn];
|
||||||
|
s += state.last_dc_val[ci];
|
||||||
|
state.last_dc_val[ci] = s;
|
||||||
|
(*block)[0] = (JCOEF) s;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (entropy->ac_needed[blkn]) {
|
||||||
|
|
||||||
|
for (k = 1; k < DCTSIZE2; k++) {
|
||||||
|
HUFF_DECODE_FAST(s, l, actbl);
|
||||||
|
r = s >> 4;
|
||||||
|
s &= 15;
|
||||||
|
|
||||||
|
if (s) {
|
||||||
|
k += r;
|
||||||
|
ENSURE_SHORT
|
||||||
|
r = GET_BITS(s);
|
||||||
|
s = HUFF_EXTEND(r, s);
|
||||||
|
(*block)[jpeg_natural_order[k]] = (JCOEF) s;
|
||||||
|
} else {
|
||||||
|
if (r != 15) break;
|
||||||
|
k += 15;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
} else {
|
||||||
|
|
||||||
|
for (k = 1; k < DCTSIZE2; k++) {
|
||||||
|
HUFF_DECODE_FAST(s, l, actbl);
|
||||||
|
r = s >> 4;
|
||||||
|
s &= 15;
|
||||||
|
|
||||||
|
if (s) {
|
||||||
|
k += r;
|
||||||
|
ENSURE_SHORT
|
||||||
|
DROP_BITS(s);
|
||||||
|
} else {
|
||||||
|
if (r != 15) break;
|
||||||
|
k += 15;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
br_state.bytes_in_buffer -= (buffer - br_state.next_input_byte);
|
||||||
|
br_state.next_input_byte = buffer;
|
||||||
|
BITREAD_SAVE_STATE(cinfo,entropy->bitstate);
|
||||||
|
ASSIGN_STATE(entropy->saved, state);
|
||||||
|
return TRUE;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Decode and return one MCU's worth of Huffman-compressed coefficients.
|
||||||
|
* The coefficients are reordered from zigzag order into natural array order,
|
||||||
|
* but are not dequantized.
|
||||||
|
*
|
||||||
|
* The i'th block of the MCU is stored into the block pointed to by
|
||||||
|
* MCU_data[i]. WE ASSUME THIS AREA HAS BEEN ZEROED BY THE CALLER.
|
||||||
|
* (Wholesale zeroing is usually a little faster than retail...)
|
||||||
|
*
|
||||||
|
* Returns FALSE if data source requested suspension. In that case no
|
||||||
|
* changes have been made to permanent state. (Exception: some output
|
||||||
|
* coefficients may already have been assigned. This is harmless for
|
||||||
|
* this module, since we'll just re-assign them on the next call.)
|
||||||
|
*/
|
||||||
|
|
||||||
|
#define BUFSIZE (DCTSIZE2 * 2)
|
||||||
|
|
||||||
|
METHODDEF(boolean)
|
||||||
|
decode_mcu (j_decompress_ptr cinfo, JBLOCKROW *MCU_data)
|
||||||
|
{
|
||||||
|
huff_entropy_ptr entropy = (huff_entropy_ptr) cinfo->entropy;
|
||||||
|
|
||||||
|
/* Process restart marker if needed; may have to suspend */
|
||||||
|
if (cinfo->restart_interval) {
|
||||||
|
if (entropy->restarts_to_go == 0)
|
||||||
|
if (! process_restart(cinfo))
|
||||||
|
return FALSE;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* If we've run out of data, just leave the MCU set to zeroes.
|
||||||
|
* This way, we return uniform gray for the remainder of the segment.
|
||||||
|
*/
|
||||||
|
if (! entropy->pub.insufficient_data) {
|
||||||
|
|
||||||
|
if (cinfo->src->bytes_in_buffer >= BUFSIZE) {
|
||||||
|
if (!decode_mcu_fast(cinfo, MCU_data)) return FALSE;
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
if (!decode_mcu_slow(cinfo, MCU_data)) return FALSE;
|
||||||
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Account for restart interval (no-op if not using restarts) */
|
/* Account for restart interval (no-op if not using restarts) */
|
||||||
|
|||||||
21
jdhuff.h
21
jdhuff.h
@@ -36,13 +36,17 @@ typedef struct {
|
|||||||
/* Link to public Huffman table (needed only in jpeg_huff_decode) */
|
/* Link to public Huffman table (needed only in jpeg_huff_decode) */
|
||||||
JHUFF_TBL *pub;
|
JHUFF_TBL *pub;
|
||||||
|
|
||||||
/* Lookahead tables: indexed by the next HUFF_LOOKAHEAD bits of
|
/* Lookahead table: indexed by the next HUFF_LOOKAHEAD bits of
|
||||||
* the input data stream. If the next Huffman code is no more
|
* the input data stream. If the next Huffman code is no more
|
||||||
* than HUFF_LOOKAHEAD bits long, we can obtain its length and
|
* than HUFF_LOOKAHEAD bits long, we can obtain its length and
|
||||||
* the corresponding symbol directly from these tables.
|
* the corresponding symbol directly from this tables.
|
||||||
|
*
|
||||||
|
* The lower 8 bits of each table entry contain the number of
|
||||||
|
* bits in the corresponding Huffman code, or HUFF_LOOKAHEAD + 1
|
||||||
|
* if too long. The next 8 bits of each entry contain the
|
||||||
|
* symbol.
|
||||||
*/
|
*/
|
||||||
int look_nbits[1<<HUFF_LOOKAHEAD]; /* # bits, or 0 if too long */
|
int lookup[1<<HUFF_LOOKAHEAD];
|
||||||
UINT8 look_sym[1<<HUFF_LOOKAHEAD]; /* symbol, or unused */
|
|
||||||
} d_derived_tbl;
|
} d_derived_tbl;
|
||||||
|
|
||||||
/* Expand a Huffman table definition into the derived format */
|
/* Expand a Huffman table definition into the derived format */
|
||||||
@@ -69,8 +73,8 @@ EXTERN(void) jpeg_make_d_derived_tbl
|
|||||||
* necessary.
|
* necessary.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
typedef INT32 bit_buf_type; /* type of bit-extraction buffer */
|
typedef long bit_buf_type; /* type of bit-extraction buffer */
|
||||||
#define BIT_BUF_SIZE 32 /* size of buffer in bits */
|
#define BIT_BUF_SIZE __WORDSIZE /* size of buffer in bits */
|
||||||
|
|
||||||
/* If long is > 32 bits on your machine, and shifting/masking longs is
|
/* If long is > 32 bits on your machine, and shifting/masking longs is
|
||||||
* reasonably fast, making bit_buf_type be long and setting BIT_BUF_SIZE
|
* reasonably fast, making bit_buf_type be long and setting BIT_BUF_SIZE
|
||||||
@@ -183,11 +187,10 @@ EXTERN(boolean) jpeg_fill_bit_buffer
|
|||||||
} \
|
} \
|
||||||
} \
|
} \
|
||||||
look = PEEK_BITS(HUFF_LOOKAHEAD); \
|
look = PEEK_BITS(HUFF_LOOKAHEAD); \
|
||||||
if ((nb = htbl->look_nbits[look]) != 0) { \
|
if ((nb = (htbl->lookup[look] >> HUFF_LOOKAHEAD)) <= HUFF_LOOKAHEAD) { \
|
||||||
DROP_BITS(nb); \
|
DROP_BITS(nb); \
|
||||||
result = htbl->look_sym[look]; \
|
result = htbl->lookup[look] & ((1 << HUFF_LOOKAHEAD) - 1); \
|
||||||
} else { \
|
} else { \
|
||||||
nb = HUFF_LOOKAHEAD+1; \
|
|
||||||
slowlabel: \
|
slowlabel: \
|
||||||
if ((result=jpeg_huff_decode(&state,get_buffer,bits_left,htbl,nb)) < 0) \
|
if ((result=jpeg_huff_decode(&state,get_buffer,bits_left,htbl,nb)) < 0) \
|
||||||
{ failaction; } \
|
{ failaction; } \
|
||||||
|
|||||||
Reference in New Issue
Block a user