Greatly improve performance of Huffman decoding
git-svn-id: svn+ssh://svn.code.sf.net/p/libjpeg-turbo/code/trunk@64 632fc199-4ca6-4c93-a231-07263d6284db
This commit is contained in:
241
jdhuff.c
241
jdhuff.c
@@ -14,6 +14,21 @@
|
||||
* storage only upon successful completion of an MCU.
|
||||
*/
|
||||
|
||||
/* Modifications:
|
||||
* Copyright (C)2007 Sun Microsystems, Inc.
|
||||
* Copyright (C)2009 D. R. Commander
|
||||
*
|
||||
* This library is free software and may be redistributed and/or modified under
|
||||
* the terms of the wxWindows Library License, Version 3.1 or (at your option)
|
||||
* any later version. The full license is in the LICENSE.txt file included
|
||||
* with this distribution.
|
||||
*
|
||||
* This library is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* wxWindows Library License for more details.
|
||||
*/
|
||||
|
||||
#define JPEG_INTERNALS
|
||||
#include "jinclude.h"
|
||||
#include "jpeglib.h"
|
||||
@@ -234,7 +249,8 @@ jpeg_make_d_derived_tbl (j_decompress_ptr cinfo, boolean isDC, int tblno,
|
||||
* with that code.
|
||||
*/
|
||||
|
||||
MEMZERO(dtbl->look_nbits, SIZEOF(dtbl->look_nbits));
|
||||
for (i = 0; i < (1 << HUFF_LOOKAHEAD); i++)
|
||||
dtbl->lookup[i] = (HUFF_LOOKAHEAD + 1) << HUFF_LOOKAHEAD;
|
||||
|
||||
p = 0;
|
||||
for (l = 1; l <= HUFF_LOOKAHEAD; l++) {
|
||||
@@ -243,8 +259,7 @@ jpeg_make_d_derived_tbl (j_decompress_ptr cinfo, boolean isDC, int tblno,
|
||||
/* Generate left-justified code followed by all possible bit sequences */
|
||||
lookbits = huffcode[p] << (HUFF_LOOKAHEAD-l);
|
||||
for (ctr = 1 << (HUFF_LOOKAHEAD-l); ctr > 0; ctr--) {
|
||||
dtbl->look_nbits[lookbits] = l;
|
||||
dtbl->look_sym[lookbits] = htbl->huffval[p];
|
||||
dtbl->lookup[lookbits] = (l << HUFF_LOOKAHEAD) | htbl->huffval[p];
|
||||
lookbits++;
|
||||
}
|
||||
}
|
||||
@@ -438,9 +453,10 @@ jpeg_huff_decode (bitread_working_state * state,
|
||||
* On some machines, a shift and add will be faster than a table lookup.
|
||||
*/
|
||||
|
||||
#define AVOID_TABLES
|
||||
#ifdef AVOID_TABLES
|
||||
|
||||
#define HUFF_EXTEND(x,s) ((x) < (1<<((s)-1)) ? (x) + (((-1)<<(s)) + 1) : (x))
|
||||
#define HUFF_EXTEND(x,s) ((x) + ((((x) - (1<<((s)-1))) >> 31) & (((-1)<<(s)) + 1)))
|
||||
|
||||
#else
|
||||
|
||||
@@ -498,47 +514,19 @@ process_restart (j_decompress_ptr cinfo)
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* Decode and return one MCU's worth of Huffman-compressed coefficients.
|
||||
* The coefficients are reordered from zigzag order into natural array order,
|
||||
* but are not dequantized.
|
||||
*
|
||||
* The i'th block of the MCU is stored into the block pointed to by
|
||||
* MCU_data[i]. WE ASSUME THIS AREA HAS BEEN ZEROED BY THE CALLER.
|
||||
* (Wholesale zeroing is usually a little faster than retail...)
|
||||
*
|
||||
* Returns FALSE if data source requested suspension. In that case no
|
||||
* changes have been made to permanent state. (Exception: some output
|
||||
* coefficients may already have been assigned. This is harmless for
|
||||
* this module, since we'll just re-assign them on the next call.)
|
||||
*/
|
||||
|
||||
METHODDEF(boolean)
|
||||
decode_mcu (j_decompress_ptr cinfo, JBLOCKROW *MCU_data)
|
||||
LOCAL(boolean)
|
||||
decode_mcu_slow (j_decompress_ptr cinfo, JBLOCKROW *MCU_data)
|
||||
{
|
||||
huff_entropy_ptr entropy = (huff_entropy_ptr) cinfo->entropy;
|
||||
int blkn;
|
||||
BITREAD_STATE_VARS;
|
||||
int blkn;
|
||||
savable_state state;
|
||||
|
||||
/* Process restart marker if needed; may have to suspend */
|
||||
if (cinfo->restart_interval) {
|
||||
if (entropy->restarts_to_go == 0)
|
||||
if (! process_restart(cinfo))
|
||||
return FALSE;
|
||||
}
|
||||
|
||||
/* If we've run out of data, just leave the MCU set to zeroes.
|
||||
* This way, we return uniform gray for the remainder of the segment.
|
||||
*/
|
||||
if (! entropy->pub.insufficient_data) {
|
||||
/* Outer loop handles each block in the MCU */
|
||||
|
||||
/* Load up working state */
|
||||
BITREAD_LOAD_STATE(cinfo,entropy->bitstate);
|
||||
ASSIGN_STATE(state, entropy->saved);
|
||||
|
||||
/* Outer loop handles each block in the MCU */
|
||||
|
||||
for (blkn = 0; blkn < cinfo->blocks_in_MCU; blkn++) {
|
||||
JBLOCKROW block = MCU_data[blkn];
|
||||
d_derived_tbl * dctbl = entropy->dc_cur_tbls[blkn];
|
||||
@@ -611,13 +599,192 @@ decode_mcu (j_decompress_ptr cinfo, JBLOCKROW *MCU_data)
|
||||
k += 15;
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
/* Completed MCU, so update state */
|
||||
BITREAD_SAVE_STATE(cinfo,entropy->bitstate);
|
||||
ASSIGN_STATE(entropy->saved, state);
|
||||
return TRUE;
|
||||
}
|
||||
|
||||
|
||||
/***************************************************************/
|
||||
|
||||
#define ADD_BYTE { \
|
||||
int val0 = *(buffer++); \
|
||||
int val1 = *(buffer); \
|
||||
\
|
||||
bits_left += 8; \
|
||||
get_buffer = (get_buffer << 8) | (val0); \
|
||||
if (val0 == 0xFF) { \
|
||||
buffer++; \
|
||||
if (val1 != 0) { \
|
||||
buffer -= 2; \
|
||||
get_buffer &= ~0xFF; \
|
||||
} \
|
||||
} \
|
||||
}
|
||||
|
||||
/***************************************************************/
|
||||
|
||||
#if __WORDSIZE == 64
|
||||
|
||||
#define ENSURE_SHORT \
|
||||
if (bits_left < 16) { \
|
||||
ADD_BYTE ADD_BYTE ADD_BYTE ADD_BYTE ADD_BYTE ADD_BYTE \
|
||||
}
|
||||
|
||||
#else
|
||||
|
||||
#define ENSURE_SHORT if (bits_left < 16) { ADD_BYTE ADD_BYTE }
|
||||
|
||||
#endif
|
||||
|
||||
/***************************************************************/
|
||||
|
||||
#define HUFF_DECODE_FAST(symbol, size, htbl) { \
|
||||
ENSURE_SHORT \
|
||||
symbol = PEEK_BITS(HUFF_LOOKAHEAD); \
|
||||
symbol = htbl->lookup[symbol]; \
|
||||
size = symbol >> 8; \
|
||||
bits_left -= size; \
|
||||
symbol = symbol & ((1 << HUFF_LOOKAHEAD) - 1); \
|
||||
if (size == HUFF_LOOKAHEAD + 1) { \
|
||||
symbol = (get_buffer >> bits_left) & ((1 << (size)) - 1); \
|
||||
while (symbol > htbl->maxcode[size]) { \
|
||||
symbol <<= 1; \
|
||||
symbol |= GET_BITS(1); \
|
||||
size++; \
|
||||
} \
|
||||
symbol = htbl->pub->huffval[ (int) (symbol + htbl->valoffset[size]) ]; \
|
||||
} \
|
||||
}
|
||||
|
||||
/***************************************************************/
|
||||
|
||||
LOCAL(boolean)
|
||||
decode_mcu_fast (j_decompress_ptr cinfo, JBLOCKROW *MCU_data)
|
||||
{
|
||||
huff_entropy_ptr entropy = (huff_entropy_ptr) cinfo->entropy;
|
||||
BITREAD_STATE_VARS;
|
||||
JOCTET *buffer;
|
||||
int blkn;
|
||||
savable_state state;
|
||||
/* Outer loop handles each block in the MCU */
|
||||
|
||||
/* Load up working state */
|
||||
BITREAD_LOAD_STATE(cinfo,entropy->bitstate);
|
||||
buffer = (JOCTET *) br_state.next_input_byte;
|
||||
ASSIGN_STATE(state, entropy->saved);
|
||||
|
||||
for (blkn = 0; blkn < cinfo->blocks_in_MCU; blkn++) {
|
||||
JBLOCKROW block = MCU_data[blkn];
|
||||
d_derived_tbl * dctbl = entropy->dc_cur_tbls[blkn];
|
||||
d_derived_tbl * actbl = entropy->ac_cur_tbls[blkn];
|
||||
register int s, k, r, l;
|
||||
|
||||
HUFF_DECODE_FAST(s, l, dctbl);
|
||||
if (s) {
|
||||
ENSURE_SHORT
|
||||
r = GET_BITS(s);
|
||||
s = HUFF_EXTEND(r, s);
|
||||
}
|
||||
|
||||
if (entropy->dc_needed[blkn]) {
|
||||
int ci = cinfo->MCU_membership[blkn];
|
||||
s += state.last_dc_val[ci];
|
||||
state.last_dc_val[ci] = s;
|
||||
(*block)[0] = (JCOEF) s;
|
||||
}
|
||||
|
||||
if (entropy->ac_needed[blkn]) {
|
||||
|
||||
for (k = 1; k < DCTSIZE2; k++) {
|
||||
HUFF_DECODE_FAST(s, l, actbl);
|
||||
r = s >> 4;
|
||||
s &= 15;
|
||||
|
||||
if (s) {
|
||||
k += r;
|
||||
ENSURE_SHORT
|
||||
r = GET_BITS(s);
|
||||
s = HUFF_EXTEND(r, s);
|
||||
(*block)[jpeg_natural_order[k]] = (JCOEF) s;
|
||||
} else {
|
||||
if (r != 15) break;
|
||||
k += 15;
|
||||
}
|
||||
}
|
||||
|
||||
} else {
|
||||
|
||||
for (k = 1; k < DCTSIZE2; k++) {
|
||||
HUFF_DECODE_FAST(s, l, actbl);
|
||||
r = s >> 4;
|
||||
s &= 15;
|
||||
|
||||
if (s) {
|
||||
k += r;
|
||||
ENSURE_SHORT
|
||||
DROP_BITS(s);
|
||||
} else {
|
||||
if (r != 15) break;
|
||||
k += 15;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
br_state.bytes_in_buffer -= (buffer - br_state.next_input_byte);
|
||||
br_state.next_input_byte = buffer;
|
||||
BITREAD_SAVE_STATE(cinfo,entropy->bitstate);
|
||||
ASSIGN_STATE(entropy->saved, state);
|
||||
return TRUE;
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* Decode and return one MCU's worth of Huffman-compressed coefficients.
|
||||
* The coefficients are reordered from zigzag order into natural array order,
|
||||
* but are not dequantized.
|
||||
*
|
||||
* The i'th block of the MCU is stored into the block pointed to by
|
||||
* MCU_data[i]. WE ASSUME THIS AREA HAS BEEN ZEROED BY THE CALLER.
|
||||
* (Wholesale zeroing is usually a little faster than retail...)
|
||||
*
|
||||
* Returns FALSE if data source requested suspension. In that case no
|
||||
* changes have been made to permanent state. (Exception: some output
|
||||
* coefficients may already have been assigned. This is harmless for
|
||||
* this module, since we'll just re-assign them on the next call.)
|
||||
*/
|
||||
|
||||
#define BUFSIZE (DCTSIZE2 * 2)
|
||||
|
||||
METHODDEF(boolean)
|
||||
decode_mcu (j_decompress_ptr cinfo, JBLOCKROW *MCU_data)
|
||||
{
|
||||
huff_entropy_ptr entropy = (huff_entropy_ptr) cinfo->entropy;
|
||||
|
||||
/* Process restart marker if needed; may have to suspend */
|
||||
if (cinfo->restart_interval) {
|
||||
if (entropy->restarts_to_go == 0)
|
||||
if (! process_restart(cinfo))
|
||||
return FALSE;
|
||||
}
|
||||
|
||||
/* If we've run out of data, just leave the MCU set to zeroes.
|
||||
* This way, we return uniform gray for the remainder of the segment.
|
||||
*/
|
||||
if (! entropy->pub.insufficient_data) {
|
||||
|
||||
if (cinfo->src->bytes_in_buffer >= BUFSIZE) {
|
||||
if (!decode_mcu_fast(cinfo, MCU_data)) return FALSE;
|
||||
}
|
||||
else {
|
||||
if (!decode_mcu_slow(cinfo, MCU_data)) return FALSE;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
/* Account for restart interval (no-op if not using restarts) */
|
||||
|
||||
21
jdhuff.h
21
jdhuff.h
@@ -36,13 +36,17 @@ typedef struct {
|
||||
/* Link to public Huffman table (needed only in jpeg_huff_decode) */
|
||||
JHUFF_TBL *pub;
|
||||
|
||||
/* Lookahead tables: indexed by the next HUFF_LOOKAHEAD bits of
|
||||
/* Lookahead table: indexed by the next HUFF_LOOKAHEAD bits of
|
||||
* the input data stream. If the next Huffman code is no more
|
||||
* than HUFF_LOOKAHEAD bits long, we can obtain its length and
|
||||
* the corresponding symbol directly from these tables.
|
||||
* the corresponding symbol directly from this tables.
|
||||
*
|
||||
* The lower 8 bits of each table entry contain the number of
|
||||
* bits in the corresponding Huffman code, or HUFF_LOOKAHEAD + 1
|
||||
* if too long. The next 8 bits of each entry contain the
|
||||
* symbol.
|
||||
*/
|
||||
int look_nbits[1<<HUFF_LOOKAHEAD]; /* # bits, or 0 if too long */
|
||||
UINT8 look_sym[1<<HUFF_LOOKAHEAD]; /* symbol, or unused */
|
||||
int lookup[1<<HUFF_LOOKAHEAD];
|
||||
} d_derived_tbl;
|
||||
|
||||
/* Expand a Huffman table definition into the derived format */
|
||||
@@ -69,8 +73,8 @@ EXTERN(void) jpeg_make_d_derived_tbl
|
||||
* necessary.
|
||||
*/
|
||||
|
||||
typedef INT32 bit_buf_type; /* type of bit-extraction buffer */
|
||||
#define BIT_BUF_SIZE 32 /* size of buffer in bits */
|
||||
typedef long bit_buf_type; /* type of bit-extraction buffer */
|
||||
#define BIT_BUF_SIZE __WORDSIZE /* size of buffer in bits */
|
||||
|
||||
/* If long is > 32 bits on your machine, and shifting/masking longs is
|
||||
* reasonably fast, making bit_buf_type be long and setting BIT_BUF_SIZE
|
||||
@@ -183,11 +187,10 @@ EXTERN(boolean) jpeg_fill_bit_buffer
|
||||
} \
|
||||
} \
|
||||
look = PEEK_BITS(HUFF_LOOKAHEAD); \
|
||||
if ((nb = htbl->look_nbits[look]) != 0) { \
|
||||
if ((nb = (htbl->lookup[look] >> HUFF_LOOKAHEAD)) <= HUFF_LOOKAHEAD) { \
|
||||
DROP_BITS(nb); \
|
||||
result = htbl->look_sym[look]; \
|
||||
result = htbl->lookup[look] & ((1 << HUFF_LOOKAHEAD) - 1); \
|
||||
} else { \
|
||||
nb = HUFF_LOOKAHEAD+1; \
|
||||
slowlabel: \
|
||||
if ((result=jpeg_huff_decode(&state,get_buffer,bits_left,htbl,nb)) < 0) \
|
||||
{ failaction; } \
|
||||
|
||||
Reference in New Issue
Block a user