36% faster encode_mcu_AC_first(), by eliminating an unpredictable branch in the inner loop

Which corresponds to 15% faster overall encoding in "progressive scan optimization" mode.
(And a negligible speedup in -fastcrush mode.)
This commit is contained in:
Loren Merritt
2014-03-07 00:03:44 +00:00
parent f4c556031c
commit 8db334cde2

View File

@@ -468,6 +468,7 @@ encode_mcu_AC_first (j_compress_ptr cinfo, JBLOCKROW *MCU_data)
int Se = cinfo->Se; int Se = cinfo->Se;
int Al = cinfo->Al; int Al = cinfo->Al;
JBLOCKROW block; JBLOCKROW block;
int deadzone = (1 << Al) - 1;
entropy->next_output_byte = cinfo->dest->next_output_byte; entropy->next_output_byte = cinfo->dest->next_output_byte;
entropy->free_in_buffer = cinfo->dest->free_in_buffer; entropy->free_in_buffer = cinfo->dest->free_in_buffer;
@@ -485,29 +486,21 @@ encode_mcu_AC_first (j_compress_ptr cinfo, JBLOCKROW *MCU_data)
r = 0; /* r = run length of zeros */ r = 0; /* r = run length of zeros */
for (k = cinfo->Ss; k <= Se; k++) { for (k = cinfo->Ss; k <= Se; k++) {
if ((temp = (*block)[jpeg_natural_order[k]]) == 0) { temp = (*block)[jpeg_natural_order[k]];
if ((unsigned)(temp + deadzone) <= 2*deadzone) {
r++; r++;
continue; continue;
} }
/* We must apply the point transform by Al. For AC coefficients this /* We must apply the point transform by Al. For AC coefficients this
* is an integer division with rounding towards 0. To do this portably * is an integer division with rounding towards 0. To do this portably
* in C, we shift after obtaining the absolute value; so the code is * in C, we shift after obtaining the absolute value; so the code is
* interwoven with finding the abs value (temp) and output bits (temp2). * interwoven with finding the abs value (temp) and output bits (temp2).
*/ */
if (temp < 0) { int sign = temp >> 31;
temp = -temp; /* temp is abs value of input */ temp += sign;
temp >>= Al; /* apply the point transform */ temp2 = temp >> Al;
/* For a negative coef, want temp2 = bitwise complement of abs(coef) */ temp = (temp ^ sign) >> Al;
temp2 = ~temp;
} else {
temp >>= Al; /* apply the point transform */
temp2 = temp;
}
/* Watch out for case that nonzero coef is zero after point transform */
if (temp == 0) {
r++;
continue;
}
/* Emit any pending EOBRUN */ /* Emit any pending EOBRUN */
if (entropy->EOBRUN > 0) if (entropy->EOBRUN > 0)