Implement new colorspaces to allow directly compressing from/decompressing to RGB/RGBX/BGR/BGRX/XBGR/XRGB without conversion
This commit is contained in:
52
jccolor.c
52
jccolor.c
@@ -3,6 +3,7 @@
|
|||||||
*
|
*
|
||||||
* Copyright (C) 1991-1996, Thomas G. Lane.
|
* Copyright (C) 1991-1996, Thomas G. Lane.
|
||||||
* Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
|
* Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
|
||||||
|
* Copyright 2009 D. R. Commander
|
||||||
* This file is part of the Independent JPEG Group's software.
|
* This file is part of the Independent JPEG Group's software.
|
||||||
* For conditions of distribution and use, see the accompanying README file.
|
* For conditions of distribution and use, see the accompanying README file.
|
||||||
*
|
*
|
||||||
@@ -148,10 +149,10 @@ rgb_ycc_convert (j_compress_ptr cinfo,
|
|||||||
outptr2 = output_buf[2][output_row];
|
outptr2 = output_buf[2][output_row];
|
||||||
output_row++;
|
output_row++;
|
||||||
for (col = 0; col < num_cols; col++) {
|
for (col = 0; col < num_cols; col++) {
|
||||||
r = GETJSAMPLE(inptr[RGB_RED]);
|
r = GETJSAMPLE(inptr[rgb_red[cinfo->in_color_space]]);
|
||||||
g = GETJSAMPLE(inptr[RGB_GREEN]);
|
g = GETJSAMPLE(inptr[rgb_green[cinfo->in_color_space]]);
|
||||||
b = GETJSAMPLE(inptr[RGB_BLUE]);
|
b = GETJSAMPLE(inptr[rgb_blue[cinfo->in_color_space]]);
|
||||||
inptr += RGB_PIXELSIZE;
|
inptr += rgb_pixelsize[cinfo->in_color_space];
|
||||||
/* If the inputs are 0..MAXJSAMPLE, the outputs of these equations
|
/* If the inputs are 0..MAXJSAMPLE, the outputs of these equations
|
||||||
* must be too; we do not need an explicit range-limiting operation.
|
* must be too; we do not need an explicit range-limiting operation.
|
||||||
* Hence the value being shifted is never negative, and we don't
|
* Hence the value being shifted is never negative, and we don't
|
||||||
@@ -202,10 +203,10 @@ rgb_gray_convert (j_compress_ptr cinfo,
|
|||||||
outptr = output_buf[0][output_row];
|
outptr = output_buf[0][output_row];
|
||||||
output_row++;
|
output_row++;
|
||||||
for (col = 0; col < num_cols; col++) {
|
for (col = 0; col < num_cols; col++) {
|
||||||
r = GETJSAMPLE(inptr[RGB_RED]);
|
r = GETJSAMPLE(inptr[rgb_red[cinfo->in_color_space]]);
|
||||||
g = GETJSAMPLE(inptr[RGB_GREEN]);
|
g = GETJSAMPLE(inptr[rgb_green[cinfo->in_color_space]]);
|
||||||
b = GETJSAMPLE(inptr[RGB_BLUE]);
|
b = GETJSAMPLE(inptr[rgb_blue[cinfo->in_color_space]]);
|
||||||
inptr += RGB_PIXELSIZE;
|
inptr += rgb_pixelsize[cinfo->in_color_space];
|
||||||
/* Y */
|
/* Y */
|
||||||
outptr[col] = (JSAMPLE)
|
outptr[col] = (JSAMPLE)
|
||||||
((ctab[r+R_Y_OFF] + ctab[g+G_Y_OFF] + ctab[b+B_Y_OFF])
|
((ctab[r+R_Y_OFF] + ctab[g+G_Y_OFF] + ctab[b+B_Y_OFF])
|
||||||
@@ -376,6 +377,16 @@ jinit_color_converter (j_compress_ptr cinfo)
|
|||||||
break;
|
break;
|
||||||
#endif /* else share code with YCbCr */
|
#endif /* else share code with YCbCr */
|
||||||
|
|
||||||
|
case JCS_EXT_RGB:
|
||||||
|
case JCS_EXT_RGBX:
|
||||||
|
case JCS_EXT_BGR:
|
||||||
|
case JCS_EXT_BGRX:
|
||||||
|
case JCS_EXT_XBGR:
|
||||||
|
case JCS_EXT_XRGB:
|
||||||
|
if (cinfo->input_components != rgb_pixelsize[cinfo->in_color_space])
|
||||||
|
ERREXIT(cinfo, JERR_BAD_IN_COLORSPACE);
|
||||||
|
break;
|
||||||
|
|
||||||
case JCS_YCbCr:
|
case JCS_YCbCr:
|
||||||
if (cinfo->input_components != 3)
|
if (cinfo->input_components != 3)
|
||||||
ERREXIT(cinfo, JERR_BAD_IN_COLORSPACE);
|
ERREXIT(cinfo, JERR_BAD_IN_COLORSPACE);
|
||||||
@@ -400,7 +411,13 @@ jinit_color_converter (j_compress_ptr cinfo)
|
|||||||
ERREXIT(cinfo, JERR_BAD_J_COLORSPACE);
|
ERREXIT(cinfo, JERR_BAD_J_COLORSPACE);
|
||||||
if (cinfo->in_color_space == JCS_GRAYSCALE)
|
if (cinfo->in_color_space == JCS_GRAYSCALE)
|
||||||
cconvert->pub.color_convert = grayscale_convert;
|
cconvert->pub.color_convert = grayscale_convert;
|
||||||
else if (cinfo->in_color_space == JCS_RGB) {
|
else if (cinfo->in_color_space == JCS_RGB ||
|
||||||
|
cinfo->in_color_space == JCS_EXT_RGB ||
|
||||||
|
cinfo->in_color_space == JCS_EXT_RGBX ||
|
||||||
|
cinfo->in_color_space == JCS_EXT_BGR ||
|
||||||
|
cinfo->in_color_space == JCS_EXT_BGRX ||
|
||||||
|
cinfo->in_color_space == JCS_EXT_XBGR ||
|
||||||
|
cinfo->in_color_space == JCS_EXT_XRGB) {
|
||||||
cconvert->pub.start_pass = rgb_ycc_start;
|
cconvert->pub.start_pass = rgb_ycc_start;
|
||||||
cconvert->pub.color_convert = rgb_gray_convert;
|
cconvert->pub.color_convert = rgb_gray_convert;
|
||||||
} else if (cinfo->in_color_space == JCS_YCbCr)
|
} else if (cinfo->in_color_space == JCS_YCbCr)
|
||||||
@@ -410,9 +427,16 @@ jinit_color_converter (j_compress_ptr cinfo)
|
|||||||
break;
|
break;
|
||||||
|
|
||||||
case JCS_RGB:
|
case JCS_RGB:
|
||||||
|
case JCS_EXT_RGB:
|
||||||
|
case JCS_EXT_RGBX:
|
||||||
|
case JCS_EXT_BGR:
|
||||||
|
case JCS_EXT_BGRX:
|
||||||
|
case JCS_EXT_XBGR:
|
||||||
|
case JCS_EXT_XRGB:
|
||||||
if (cinfo->num_components != 3)
|
if (cinfo->num_components != 3)
|
||||||
ERREXIT(cinfo, JERR_BAD_J_COLORSPACE);
|
ERREXIT(cinfo, JERR_BAD_J_COLORSPACE);
|
||||||
if (cinfo->in_color_space == JCS_RGB && RGB_PIXELSIZE == 3)
|
if (cinfo->in_color_space == cinfo->jpeg_color_space &&
|
||||||
|
rgb_pixelsize[cinfo->in_color_space] == 3)
|
||||||
cconvert->pub.color_convert = null_convert;
|
cconvert->pub.color_convert = null_convert;
|
||||||
else
|
else
|
||||||
ERREXIT(cinfo, JERR_CONVERSION_NOTIMPL);
|
ERREXIT(cinfo, JERR_CONVERSION_NOTIMPL);
|
||||||
@@ -421,7 +445,13 @@ jinit_color_converter (j_compress_ptr cinfo)
|
|||||||
case JCS_YCbCr:
|
case JCS_YCbCr:
|
||||||
if (cinfo->num_components != 3)
|
if (cinfo->num_components != 3)
|
||||||
ERREXIT(cinfo, JERR_BAD_J_COLORSPACE);
|
ERREXIT(cinfo, JERR_BAD_J_COLORSPACE);
|
||||||
if (cinfo->in_color_space == JCS_RGB) {
|
if (cinfo->in_color_space == JCS_RGB ||
|
||||||
|
cinfo->in_color_space == JCS_EXT_RGB ||
|
||||||
|
cinfo->in_color_space == JCS_EXT_RGBX ||
|
||||||
|
cinfo->in_color_space == JCS_EXT_BGR ||
|
||||||
|
cinfo->in_color_space == JCS_EXT_BGRX ||
|
||||||
|
cinfo->in_color_space == JCS_EXT_XBGR ||
|
||||||
|
cinfo->in_color_space == JCS_EXT_XRGB) {
|
||||||
if (jsimd_can_rgb_ycc())
|
if (jsimd_can_rgb_ycc())
|
||||||
cconvert->pub.color_convert = jsimd_rgb_ycc_convert;
|
cconvert->pub.color_convert = jsimd_rgb_ycc_convert;
|
||||||
else {
|
else {
|
||||||
|
|||||||
@@ -363,6 +363,12 @@ jpeg_default_colorspace (j_compress_ptr cinfo)
|
|||||||
jpeg_set_colorspace(cinfo, JCS_GRAYSCALE);
|
jpeg_set_colorspace(cinfo, JCS_GRAYSCALE);
|
||||||
break;
|
break;
|
||||||
case JCS_RGB:
|
case JCS_RGB:
|
||||||
|
case JCS_EXT_RGB:
|
||||||
|
case JCS_EXT_RGBX:
|
||||||
|
case JCS_EXT_BGR:
|
||||||
|
case JCS_EXT_BGRX:
|
||||||
|
case JCS_EXT_XBGR:
|
||||||
|
case JCS_EXT_XRGB:
|
||||||
jpeg_set_colorspace(cinfo, JCS_YCbCr);
|
jpeg_set_colorspace(cinfo, JCS_YCbCr);
|
||||||
break;
|
break;
|
||||||
case JCS_YCbCr:
|
case JCS_YCbCr:
|
||||||
|
|||||||
25
jdcolor.c
25
jdcolor.c
@@ -148,12 +148,12 @@ ycc_rgb_convert (j_decompress_ptr cinfo,
|
|||||||
cb = GETJSAMPLE(inptr1[col]);
|
cb = GETJSAMPLE(inptr1[col]);
|
||||||
cr = GETJSAMPLE(inptr2[col]);
|
cr = GETJSAMPLE(inptr2[col]);
|
||||||
/* Range-limiting is essential due to noise introduced by DCT losses. */
|
/* Range-limiting is essential due to noise introduced by DCT losses. */
|
||||||
outptr[RGB_RED] = range_limit[y + Crrtab[cr]];
|
outptr[rgb_red[cinfo->out_color_space]] = range_limit[y + Crrtab[cr]];
|
||||||
outptr[RGB_GREEN] = range_limit[y +
|
outptr[rgb_green[cinfo->out_color_space]] = range_limit[y +
|
||||||
((int) RIGHT_SHIFT(Cbgtab[cb] + Crgtab[cr],
|
((int) RIGHT_SHIFT(Cbgtab[cb] + Crgtab[cr],
|
||||||
SCALEBITS))];
|
SCALEBITS))];
|
||||||
outptr[RGB_BLUE] = range_limit[y + Cbbtab[cb]];
|
outptr[rgb_blue[cinfo->out_color_space]] = range_limit[y + Cbbtab[cb]];
|
||||||
outptr += RGB_PIXELSIZE;
|
outptr += rgb_pixelsize[cinfo->out_color_space];
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -229,8 +229,10 @@ gray_rgb_convert (j_decompress_ptr cinfo,
|
|||||||
outptr = *output_buf++;
|
outptr = *output_buf++;
|
||||||
for (col = 0; col < num_cols; col++) {
|
for (col = 0; col < num_cols; col++) {
|
||||||
/* We can dispense with GETJSAMPLE() here */
|
/* We can dispense with GETJSAMPLE() here */
|
||||||
outptr[RGB_RED] = outptr[RGB_GREEN] = outptr[RGB_BLUE] = inptr[col];
|
outptr[rgb_red[cinfo->out_color_space]] =
|
||||||
outptr += RGB_PIXELSIZE;
|
outptr[rgb_green[cinfo->out_color_space]] =
|
||||||
|
outptr[rgb_blue[cinfo->out_color_space]] = inptr[col];
|
||||||
|
outptr += rgb_pixelsize[cinfo->out_color_space];
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -358,7 +360,13 @@ jinit_color_deconverter (j_decompress_ptr cinfo)
|
|||||||
break;
|
break;
|
||||||
|
|
||||||
case JCS_RGB:
|
case JCS_RGB:
|
||||||
cinfo->out_color_components = RGB_PIXELSIZE;
|
case JCS_EXT_RGB:
|
||||||
|
case JCS_EXT_RGBX:
|
||||||
|
case JCS_EXT_BGR:
|
||||||
|
case JCS_EXT_BGRX:
|
||||||
|
case JCS_EXT_XBGR:
|
||||||
|
case JCS_EXT_XRGB:
|
||||||
|
cinfo->out_color_components = rgb_pixelsize[cinfo->out_color_space];
|
||||||
if (cinfo->jpeg_color_space == JCS_YCbCr) {
|
if (cinfo->jpeg_color_space == JCS_YCbCr) {
|
||||||
if (jsimd_can_ycc_rgb())
|
if (jsimd_can_ycc_rgb())
|
||||||
cconvert->pub.color_convert = jsimd_ycc_rgb_convert;
|
cconvert->pub.color_convert = jsimd_ycc_rgb_convert;
|
||||||
@@ -368,7 +376,8 @@ jinit_color_deconverter (j_decompress_ptr cinfo)
|
|||||||
}
|
}
|
||||||
} else if (cinfo->jpeg_color_space == JCS_GRAYSCALE) {
|
} else if (cinfo->jpeg_color_space == JCS_GRAYSCALE) {
|
||||||
cconvert->pub.color_convert = gray_rgb_convert;
|
cconvert->pub.color_convert = gray_rgb_convert;
|
||||||
} else if (cinfo->jpeg_color_space == JCS_RGB && RGB_PIXELSIZE == 3) {
|
} else if (cinfo->jpeg_color_space == cinfo->out_color_space &&
|
||||||
|
rgb_pixelsize[cinfo->out_color_space] == 3) {
|
||||||
cconvert->pub.color_convert = null_convert;
|
cconvert->pub.color_convert = null_convert;
|
||||||
} else
|
} else
|
||||||
ERREXIT(cinfo, JERR_CONVERSION_NOTIMPL);
|
ERREXIT(cinfo, JERR_CONVERSION_NOTIMPL);
|
||||||
|
|||||||
58
jdmerge.c
58
jdmerge.c
@@ -257,15 +257,15 @@ h2v1_merged_upsample (j_decompress_ptr cinfo,
|
|||||||
cblue = Cbbtab[cb];
|
cblue = Cbbtab[cb];
|
||||||
/* Fetch 2 Y values and emit 2 pixels */
|
/* Fetch 2 Y values and emit 2 pixels */
|
||||||
y = GETJSAMPLE(*inptr0++);
|
y = GETJSAMPLE(*inptr0++);
|
||||||
outptr[RGB_RED] = range_limit[y + cred];
|
outptr[rgb_red[cinfo->out_color_space]] = range_limit[y + cred];
|
||||||
outptr[RGB_GREEN] = range_limit[y + cgreen];
|
outptr[rgb_green[cinfo->out_color_space]] = range_limit[y + cgreen];
|
||||||
outptr[RGB_BLUE] = range_limit[y + cblue];
|
outptr[rgb_blue[cinfo->out_color_space]] = range_limit[y + cblue];
|
||||||
outptr += RGB_PIXELSIZE;
|
outptr += rgb_pixelsize[cinfo->out_color_space];
|
||||||
y = GETJSAMPLE(*inptr0++);
|
y = GETJSAMPLE(*inptr0++);
|
||||||
outptr[RGB_RED] = range_limit[y + cred];
|
outptr[rgb_red[cinfo->out_color_space]] = range_limit[y + cred];
|
||||||
outptr[RGB_GREEN] = range_limit[y + cgreen];
|
outptr[rgb_green[cinfo->out_color_space]] = range_limit[y + cgreen];
|
||||||
outptr[RGB_BLUE] = range_limit[y + cblue];
|
outptr[rgb_blue[cinfo->out_color_space]] = range_limit[y + cblue];
|
||||||
outptr += RGB_PIXELSIZE;
|
outptr += rgb_pixelsize[cinfo->out_color_space];
|
||||||
}
|
}
|
||||||
/* If image width is odd, do the last output column separately */
|
/* If image width is odd, do the last output column separately */
|
||||||
if (cinfo->output_width & 1) {
|
if (cinfo->output_width & 1) {
|
||||||
@@ -275,9 +275,9 @@ h2v1_merged_upsample (j_decompress_ptr cinfo,
|
|||||||
cgreen = (int) RIGHT_SHIFT(Cbgtab[cb] + Crgtab[cr], SCALEBITS);
|
cgreen = (int) RIGHT_SHIFT(Cbgtab[cb] + Crgtab[cr], SCALEBITS);
|
||||||
cblue = Cbbtab[cb];
|
cblue = Cbbtab[cb];
|
||||||
y = GETJSAMPLE(*inptr0);
|
y = GETJSAMPLE(*inptr0);
|
||||||
outptr[RGB_RED] = range_limit[y + cred];
|
outptr[rgb_red[cinfo->out_color_space]] = range_limit[y + cred];
|
||||||
outptr[RGB_GREEN] = range_limit[y + cgreen];
|
outptr[rgb_green[cinfo->out_color_space]] = range_limit[y + cgreen];
|
||||||
outptr[RGB_BLUE] = range_limit[y + cblue];
|
outptr[rgb_blue[cinfo->out_color_space]] = range_limit[y + cblue];
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -321,24 +321,24 @@ h2v2_merged_upsample (j_decompress_ptr cinfo,
|
|||||||
cblue = Cbbtab[cb];
|
cblue = Cbbtab[cb];
|
||||||
/* Fetch 4 Y values and emit 4 pixels */
|
/* Fetch 4 Y values and emit 4 pixels */
|
||||||
y = GETJSAMPLE(*inptr00++);
|
y = GETJSAMPLE(*inptr00++);
|
||||||
outptr0[RGB_RED] = range_limit[y + cred];
|
outptr0[rgb_red[cinfo->out_color_space]] = range_limit[y + cred];
|
||||||
outptr0[RGB_GREEN] = range_limit[y + cgreen];
|
outptr0[rgb_green[cinfo->out_color_space]] = range_limit[y + cgreen];
|
||||||
outptr0[RGB_BLUE] = range_limit[y + cblue];
|
outptr0[rgb_blue[cinfo->out_color_space]] = range_limit[y + cblue];
|
||||||
outptr0 += RGB_PIXELSIZE;
|
outptr0 += RGB_PIXELSIZE;
|
||||||
y = GETJSAMPLE(*inptr00++);
|
y = GETJSAMPLE(*inptr00++);
|
||||||
outptr0[RGB_RED] = range_limit[y + cred];
|
outptr0[rgb_red[cinfo->out_color_space]] = range_limit[y + cred];
|
||||||
outptr0[RGB_GREEN] = range_limit[y + cgreen];
|
outptr0[rgb_green[cinfo->out_color_space]] = range_limit[y + cgreen];
|
||||||
outptr0[RGB_BLUE] = range_limit[y + cblue];
|
outptr0[rgb_blue[cinfo->out_color_space]] = range_limit[y + cblue];
|
||||||
outptr0 += RGB_PIXELSIZE;
|
outptr0 += RGB_PIXELSIZE;
|
||||||
y = GETJSAMPLE(*inptr01++);
|
y = GETJSAMPLE(*inptr01++);
|
||||||
outptr1[RGB_RED] = range_limit[y + cred];
|
outptr1[rgb_red[cinfo->out_color_space]] = range_limit[y + cred];
|
||||||
outptr1[RGB_GREEN] = range_limit[y + cgreen];
|
outptr1[rgb_green[cinfo->out_color_space]] = range_limit[y + cgreen];
|
||||||
outptr1[RGB_BLUE] = range_limit[y + cblue];
|
outptr1[rgb_blue[cinfo->out_color_space]] = range_limit[y + cblue];
|
||||||
outptr1 += RGB_PIXELSIZE;
|
outptr1 += RGB_PIXELSIZE;
|
||||||
y = GETJSAMPLE(*inptr01++);
|
y = GETJSAMPLE(*inptr01++);
|
||||||
outptr1[RGB_RED] = range_limit[y + cred];
|
outptr1[rgb_red[cinfo->out_color_space]] = range_limit[y + cred];
|
||||||
outptr1[RGB_GREEN] = range_limit[y + cgreen];
|
outptr1[rgb_green[cinfo->out_color_space]] = range_limit[y + cgreen];
|
||||||
outptr1[RGB_BLUE] = range_limit[y + cblue];
|
outptr1[rgb_blue[cinfo->out_color_space]] = range_limit[y + cblue];
|
||||||
outptr1 += RGB_PIXELSIZE;
|
outptr1 += RGB_PIXELSIZE;
|
||||||
}
|
}
|
||||||
/* If image width is odd, do the last output column separately */
|
/* If image width is odd, do the last output column separately */
|
||||||
@@ -349,13 +349,13 @@ h2v2_merged_upsample (j_decompress_ptr cinfo,
|
|||||||
cgreen = (int) RIGHT_SHIFT(Cbgtab[cb] + Crgtab[cr], SCALEBITS);
|
cgreen = (int) RIGHT_SHIFT(Cbgtab[cb] + Crgtab[cr], SCALEBITS);
|
||||||
cblue = Cbbtab[cb];
|
cblue = Cbbtab[cb];
|
||||||
y = GETJSAMPLE(*inptr00);
|
y = GETJSAMPLE(*inptr00);
|
||||||
outptr0[RGB_RED] = range_limit[y + cred];
|
outptr0[rgb_red[cinfo->out_color_space]] = range_limit[y + cred];
|
||||||
outptr0[RGB_GREEN] = range_limit[y + cgreen];
|
outptr0[rgb_green[cinfo->out_color_space]] = range_limit[y + cgreen];
|
||||||
outptr0[RGB_BLUE] = range_limit[y + cblue];
|
outptr0[rgb_blue[cinfo->out_color_space]] = range_limit[y + cblue];
|
||||||
y = GETJSAMPLE(*inptr01);
|
y = GETJSAMPLE(*inptr01);
|
||||||
outptr1[RGB_RED] = range_limit[y + cred];
|
outptr1[rgb_red[cinfo->out_color_space]] = range_limit[y + cred];
|
||||||
outptr1[RGB_GREEN] = range_limit[y + cgreen];
|
outptr1[rgb_green[cinfo->out_color_space]] = range_limit[y + cgreen];
|
||||||
outptr1[RGB_BLUE] = range_limit[y + cblue];
|
outptr1[rgb_blue[cinfo->out_color_space]] = range_limit[y + cblue];
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
18
jmorecfg.h
18
jmorecfg.h
@@ -2,6 +2,7 @@
|
|||||||
* jmorecfg.h
|
* jmorecfg.h
|
||||||
*
|
*
|
||||||
* Copyright (C) 1991-1997, Thomas G. Lane.
|
* Copyright (C) 1991-1997, Thomas G. Lane.
|
||||||
|
* Copyright (C) 2009, D. R. Commander.
|
||||||
* This file is part of the Independent JPEG Group's software.
|
* This file is part of the Independent JPEG Group's software.
|
||||||
* For conditions of distribution and use, see the accompanying README file.
|
* For conditions of distribution and use, see the accompanying README file.
|
||||||
*
|
*
|
||||||
@@ -316,6 +317,23 @@ typedef int boolean;
|
|||||||
#define RGB_BLUE 2 /* Offset of Blue */
|
#define RGB_BLUE 2 /* Offset of Blue */
|
||||||
#define RGB_PIXELSIZE 3 /* JSAMPLEs per RGB scanline element */
|
#define RGB_PIXELSIZE 3 /* JSAMPLEs per RGB scanline element */
|
||||||
|
|
||||||
|
#define JPEG_NUMCS 12
|
||||||
|
|
||||||
|
static const int rgb_red[JPEG_NUMCS] = {
|
||||||
|
-1, -1, RGB_RED, -1, -1, -1, 0, 0, 2, 2, 3, 1
|
||||||
|
};
|
||||||
|
|
||||||
|
static const int rgb_green[JPEG_NUMCS] = {
|
||||||
|
-1, -1, RGB_GREEN, -1, -1, -1, 1, 1, 1, 1, 2, 2
|
||||||
|
};
|
||||||
|
|
||||||
|
static const int rgb_blue[JPEG_NUMCS] = {
|
||||||
|
-1, -1, RGB_BLUE, -1, -1, -1, 2, 2, 0, 0, 1, 3
|
||||||
|
};
|
||||||
|
|
||||||
|
static const int rgb_pixelsize[JPEG_NUMCS] = {
|
||||||
|
-1, -1, RGB_PIXELSIZE, -1, -1, -1, 3, 4, 3, 4, 4, 4
|
||||||
|
};
|
||||||
|
|
||||||
/* Definitions for speed-related optimizations. */
|
/* Definitions for speed-related optimizations. */
|
||||||
|
|
||||||
|
|||||||
13
jpeglib.h
13
jpeglib.h
@@ -203,13 +203,22 @@ struct jpeg_marker_struct {
|
|||||||
|
|
||||||
/* Known color spaces. */
|
/* Known color spaces. */
|
||||||
|
|
||||||
|
#define JCS_EXTENSIONS 1
|
||||||
|
|
||||||
typedef enum {
|
typedef enum {
|
||||||
JCS_UNKNOWN, /* error/unspecified */
|
JCS_UNKNOWN, /* error/unspecified */
|
||||||
JCS_GRAYSCALE, /* monochrome */
|
JCS_GRAYSCALE, /* monochrome */
|
||||||
JCS_RGB, /* red/green/blue */
|
JCS_RGB, /* red/green/blue as specified by the RGB_RED, RGB_GREEN,
|
||||||
|
RGB_BLUE, and RGB_PIXELSIZE macros */
|
||||||
JCS_YCbCr, /* Y/Cb/Cr (also known as YUV) */
|
JCS_YCbCr, /* Y/Cb/Cr (also known as YUV) */
|
||||||
JCS_CMYK, /* C/M/Y/K */
|
JCS_CMYK, /* C/M/Y/K */
|
||||||
JCS_YCCK /* Y/Cb/Cr/K */
|
JCS_YCCK, /* Y/Cb/Cr/K */
|
||||||
|
JCS_EXT_RGB, /* red/green/blue */
|
||||||
|
JCS_EXT_RGBX, /* red/green/blue/x */
|
||||||
|
JCS_EXT_BGR, /* blue/green/red */
|
||||||
|
JCS_EXT_BGRX, /* blue/green/red/x */
|
||||||
|
JCS_EXT_XBGR, /* x/blue/green/red */
|
||||||
|
JCS_EXT_XRGB, /* x/red/green/blue */
|
||||||
} J_COLOR_SPACE;
|
} J_COLOR_SPACE;
|
||||||
|
|
||||||
/* DCT/IDCT algorithm options. */
|
/* DCT/IDCT algorithm options. */
|
||||||
|
|||||||
@@ -193,7 +193,10 @@ select_ncolors (j_decompress_ptr cinfo, int Ncolors[])
|
|||||||
int total_colors, iroot, i, j;
|
int total_colors, iroot, i, j;
|
||||||
boolean changed;
|
boolean changed;
|
||||||
long temp;
|
long temp;
|
||||||
static const int RGB_order[3] = { RGB_GREEN, RGB_RED, RGB_BLUE };
|
int RGB_order[3] = { RGB_GREEN, RGB_RED, RGB_BLUE };
|
||||||
|
RGB_order[0] = rgb_green[cinfo->out_color_space];
|
||||||
|
RGB_order[1] = rgb_red[cinfo->out_color_space];
|
||||||
|
RGB_order[2] = rgb_blue[cinfo->out_color_space];
|
||||||
|
|
||||||
/* We can allocate at least the nc'th root of max_colors per component. */
|
/* We can allocate at least the nc'th root of max_colors per component. */
|
||||||
/* Compute floor(nc'th root of max_colors). */
|
/* Compute floor(nc'th root of max_colors). */
|
||||||
|
|||||||
46
jquant2.c
46
jquant2.c
@@ -74,29 +74,10 @@
|
|||||||
#define G_SCALE 3 /* scale G distances by this much */
|
#define G_SCALE 3 /* scale G distances by this much */
|
||||||
#define B_SCALE 1 /* and B by this much */
|
#define B_SCALE 1 /* and B by this much */
|
||||||
|
|
||||||
/* Relabel R/G/B as components 0/1/2, respecting the RGB ordering defined
|
static const int c_scales[3]={R_SCALE, G_SCALE, B_SCALE};
|
||||||
* in jmorecfg.h. As the code stands, it will do the right thing for R,G,B
|
#define C0_SCALE c_scales[rgb_red[cinfo->out_color_space]]
|
||||||
* and B,G,R orders. If you define some other weird order in jmorecfg.h,
|
#define C1_SCALE c_scales[rgb_green[cinfo->out_color_space]]
|
||||||
* you'll get compile errors until you extend this logic. In that case
|
#define C2_SCALE c_scales[rgb_blue[cinfo->out_color_space]]
|
||||||
* you'll probably want to tweak the histogram sizes too.
|
|
||||||
*/
|
|
||||||
|
|
||||||
#if RGB_RED == 0
|
|
||||||
#define C0_SCALE R_SCALE
|
|
||||||
#endif
|
|
||||||
#if RGB_BLUE == 0
|
|
||||||
#define C0_SCALE B_SCALE
|
|
||||||
#endif
|
|
||||||
#if RGB_GREEN == 1
|
|
||||||
#define C1_SCALE G_SCALE
|
|
||||||
#endif
|
|
||||||
#if RGB_RED == 2
|
|
||||||
#define C2_SCALE R_SCALE
|
|
||||||
#endif
|
|
||||||
#if RGB_BLUE == 2
|
|
||||||
#define C2_SCALE B_SCALE
|
|
||||||
#endif
|
|
||||||
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* First we have the histogram data structure and routines for creating it.
|
* First we have the histogram data structure and routines for creating it.
|
||||||
@@ -454,15 +435,16 @@ median_cut (j_decompress_ptr cinfo, boxptr boxlist, int numboxes,
|
|||||||
/* We want to break any ties in favor of green, then red, blue last.
|
/* We want to break any ties in favor of green, then red, blue last.
|
||||||
* This code does the right thing for R,G,B or B,G,R color orders only.
|
* This code does the right thing for R,G,B or B,G,R color orders only.
|
||||||
*/
|
*/
|
||||||
#if RGB_RED == 0
|
if (rgb_red[cinfo->out_color_space] == 0) {
|
||||||
cmax = c1; n = 1;
|
cmax = c1; n = 1;
|
||||||
if (c0 > cmax) { cmax = c0; n = 0; }
|
if (c0 > cmax) { cmax = c0; n = 0; }
|
||||||
if (c2 > cmax) { n = 2; }
|
if (c2 > cmax) { n = 2; }
|
||||||
#else
|
}
|
||||||
cmax = c1; n = 1;
|
else {
|
||||||
if (c2 > cmax) { cmax = c2; n = 2; }
|
cmax = c1; n = 1;
|
||||||
if (c0 > cmax) { n = 0; }
|
if (c2 > cmax) { cmax = c2; n = 2; }
|
||||||
#endif
|
if (c0 > cmax) { n = 0; }
|
||||||
|
}
|
||||||
/* Choose split point along selected axis, and update box bounds.
|
/* Choose split point along selected axis, and update box bounds.
|
||||||
* Current algorithm: split at halfway point.
|
* Current algorithm: split at halfway point.
|
||||||
* (Since the box has been shrunk to minimum volume,
|
* (Since the box has been shrunk to minimum volume,
|
||||||
|
|||||||
75
jsimd.c
75
jsimd.c
@@ -2,6 +2,7 @@
|
|||||||
* jsimd.c
|
* jsimd.c
|
||||||
*
|
*
|
||||||
* Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
|
* Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
|
||||||
|
* Copyright 2009 D. R. Commander
|
||||||
*
|
*
|
||||||
* Based on the x86 SIMD extension for IJG JPEG library,
|
* Based on the x86 SIMD extension for IJG JPEG library,
|
||||||
* Copyright (C) 1999-2006, MIYASAKA Masaru.
|
* Copyright (C) 1999-2006, MIYASAKA Masaru.
|
||||||
@@ -107,12 +108,45 @@ jsimd_rgb_ycc_convert (j_compress_ptr cinfo,
|
|||||||
JDIMENSION output_row, int num_rows)
|
JDIMENSION output_row, int num_rows)
|
||||||
{
|
{
|
||||||
#ifdef WITH_SIMD
|
#ifdef WITH_SIMD
|
||||||
|
void (*sse2fct)(JDIMENSION, JSAMPARRAY, JSAMPIMAGE, JDIMENSION, int);
|
||||||
|
void (*mmxfct)(JDIMENSION, JSAMPARRAY, JSAMPIMAGE, JDIMENSION, int);
|
||||||
|
switch(cinfo->in_color_space)
|
||||||
|
{
|
||||||
|
case JCS_EXT_RGB:
|
||||||
|
sse2fct=jsimd_extrgb_ycc_convert_sse2;
|
||||||
|
mmxfct=jsimd_extrgb_ycc_convert_mmx;
|
||||||
|
break;
|
||||||
|
case JCS_EXT_RGBX:
|
||||||
|
sse2fct=jsimd_extrgbx_ycc_convert_sse2;
|
||||||
|
mmxfct=jsimd_extrgbx_ycc_convert_mmx;
|
||||||
|
break;
|
||||||
|
case JCS_EXT_BGR:
|
||||||
|
sse2fct=jsimd_extbgr_ycc_convert_sse2;
|
||||||
|
mmxfct=jsimd_extbgr_ycc_convert_mmx;
|
||||||
|
break;
|
||||||
|
case JCS_EXT_BGRX:
|
||||||
|
sse2fct=jsimd_extbgrx_ycc_convert_sse2;
|
||||||
|
mmxfct=jsimd_extbgrx_ycc_convert_mmx;
|
||||||
|
break;
|
||||||
|
case JCS_EXT_XBGR:
|
||||||
|
sse2fct=jsimd_extxbgr_ycc_convert_sse2;
|
||||||
|
mmxfct=jsimd_extxbgr_ycc_convert_mmx;
|
||||||
|
break;
|
||||||
|
case JCS_EXT_XRGB:
|
||||||
|
sse2fct=jsimd_extxrgb_ycc_convert_sse2;
|
||||||
|
mmxfct=jsimd_extxrgb_ycc_convert_mmx;
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
sse2fct=jsimd_rgb_ycc_convert_sse2;
|
||||||
|
mmxfct=jsimd_rgb_ycc_convert_mmx;
|
||||||
|
break;
|
||||||
|
}
|
||||||
if ((simd_support & JSIMD_SSE2) &&
|
if ((simd_support & JSIMD_SSE2) &&
|
||||||
IS_ALIGNED_SSE(jconst_rgb_ycc_convert_sse2))
|
IS_ALIGNED_SSE(jconst_rgb_ycc_convert_sse2))
|
||||||
jsimd_rgb_ycc_convert_sse2(cinfo->image_width, input_buf,
|
sse2fct(cinfo->image_width, input_buf,
|
||||||
output_buf, output_row, num_rows);
|
output_buf, output_row, num_rows);
|
||||||
else if (simd_support & JSIMD_MMX)
|
else if (simd_support & JSIMD_MMX)
|
||||||
jsimd_rgb_ycc_convert_mmx(cinfo->image_width, input_buf,
|
mmxfct(cinfo->image_width, input_buf,
|
||||||
output_buf, output_row, num_rows);
|
output_buf, output_row, num_rows);
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
@@ -123,12 +157,45 @@ jsimd_ycc_rgb_convert (j_decompress_ptr cinfo,
|
|||||||
JSAMPARRAY output_buf, int num_rows)
|
JSAMPARRAY output_buf, int num_rows)
|
||||||
{
|
{
|
||||||
#ifdef WITH_SIMD
|
#ifdef WITH_SIMD
|
||||||
|
void (*sse2fct)(JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY, int);
|
||||||
|
void (*mmxfct)(JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY, int);
|
||||||
|
switch(cinfo->out_color_space)
|
||||||
|
{
|
||||||
|
case JCS_EXT_RGB:
|
||||||
|
sse2fct=jsimd_ycc_extrgb_convert_sse2;
|
||||||
|
mmxfct=jsimd_ycc_extrgb_convert_mmx;
|
||||||
|
break;
|
||||||
|
case JCS_EXT_RGBX:
|
||||||
|
sse2fct=jsimd_ycc_extrgbx_convert_sse2;
|
||||||
|
mmxfct=jsimd_ycc_extrgbx_convert_mmx;
|
||||||
|
break;
|
||||||
|
case JCS_EXT_BGR:
|
||||||
|
sse2fct=jsimd_ycc_extbgr_convert_sse2;
|
||||||
|
mmxfct=jsimd_ycc_extbgr_convert_mmx;
|
||||||
|
break;
|
||||||
|
case JCS_EXT_BGRX:
|
||||||
|
sse2fct=jsimd_ycc_extbgrx_convert_sse2;
|
||||||
|
mmxfct=jsimd_ycc_extbgrx_convert_mmx;
|
||||||
|
break;
|
||||||
|
case JCS_EXT_XBGR:
|
||||||
|
sse2fct=jsimd_ycc_extxbgr_convert_sse2;
|
||||||
|
mmxfct=jsimd_ycc_extxbgr_convert_mmx;
|
||||||
|
break;
|
||||||
|
case JCS_EXT_XRGB:
|
||||||
|
sse2fct=jsimd_ycc_extxrgb_convert_sse2;
|
||||||
|
mmxfct=jsimd_ycc_extxrgb_convert_mmx;
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
sse2fct=jsimd_ycc_rgb_convert_sse2;
|
||||||
|
mmxfct=jsimd_ycc_rgb_convert_mmx;
|
||||||
|
break;
|
||||||
|
}
|
||||||
if ((simd_support & JSIMD_SSE2) &&
|
if ((simd_support & JSIMD_SSE2) &&
|
||||||
IS_ALIGNED_SSE(jconst_ycc_rgb_convert_sse2))
|
IS_ALIGNED_SSE(jconst_ycc_rgb_convert_sse2))
|
||||||
jsimd_ycc_rgb_convert_sse2(cinfo->output_width, input_buf,
|
sse2fct(cinfo->output_width, input_buf,
|
||||||
input_row, output_buf, num_rows);
|
input_row, output_buf, num_rows);
|
||||||
else if (simd_support & JSIMD_MMX)
|
else if (simd_support & JSIMD_MMX)
|
||||||
jsimd_ycc_rgb_convert_mmx(cinfo->output_width, input_buf,
|
mmxfct(cinfo->output_width, input_buf,
|
||||||
input_row, output_buf, num_rows);
|
input_row, output_buf, num_rows);
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -19,6 +19,11 @@ libsimd_la_SOURCES = jsimd.h jsimdcfg.inc.h \
|
|||||||
jiss2red.asm jiss2int.asm jiss2fst.asm \
|
jiss2red.asm jiss2int.asm jiss2fst.asm \
|
||||||
jcqnts2f.asm jiss2flt.asm
|
jcqnts2f.asm jiss2flt.asm
|
||||||
|
|
||||||
|
jccolmmx.lo: jcclrmmx.asm
|
||||||
|
jccolss2.lo: jcclrss2.asm
|
||||||
|
jdcolmmx.lo: jdclrmmx.asm
|
||||||
|
jdcolss2.lo: jdclrss2.asm
|
||||||
|
|
||||||
.asm.lo:
|
.asm.lo:
|
||||||
$(LIBTOOL) --mode=compile --tag NASM ./nasm_lt.sh $(NASM) $(NAFLAGS) $< -o $@
|
$(LIBTOOL) --mode=compile --tag NASM ./nasm_lt.sh $(NASM) $(NAFLAGS) $< -o $@
|
||||||
|
|
||||||
|
|||||||
474
simd/jcclrmmx.asm
Normal file
474
simd/jcclrmmx.asm
Normal file
@@ -0,0 +1,474 @@
|
|||||||
|
;
|
||||||
|
; jcclrmmx.asm - colorspace conversion (MMX)
|
||||||
|
;
|
||||||
|
; Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
|
||||||
|
;
|
||||||
|
; Based on
|
||||||
|
; x86 SIMD extension for IJG JPEG library
|
||||||
|
; Copyright (C) 1999-2006, MIYASAKA Masaru.
|
||||||
|
; For conditions of distribution and use, see copyright notice in jsimdext.inc
|
||||||
|
;
|
||||||
|
; This file should be assembled with NASM (Netwide Assembler),
|
||||||
|
; can *not* be assembled with Microsoft's MASM or any compatible
|
||||||
|
; assembler (including Borland's Turbo Assembler).
|
||||||
|
; NASM is available from http://nasm.sourceforge.net/ or
|
||||||
|
; http://sourceforge.net/project/showfiles.php?group_id=6208
|
||||||
|
;
|
||||||
|
; [TAB8]
|
||||||
|
|
||||||
|
; --------------------------------------------------------------------------
|
||||||
|
SECTION SEG_TEXT
|
||||||
|
BITS 32
|
||||||
|
;
|
||||||
|
; Convert some rows of samples to the output colorspace.
|
||||||
|
;
|
||||||
|
; GLOBAL(void)
|
||||||
|
; jsimd_rgb_ycc_convert_mmx (JDIMENSION img_width,
|
||||||
|
; JSAMPARRAY input_buf, JSAMPIMAGE output_buf,
|
||||||
|
; JDIMENSION output_row, int num_rows);
|
||||||
|
;
|
||||||
|
|
||||||
|
%define img_width(b) (b)+8 ; JDIMENSION img_width
|
||||||
|
%define input_buf(b) (b)+12 ; JSAMPARRAY input_buf
|
||||||
|
%define output_buf(b) (b)+16 ; JSAMPIMAGE output_buf
|
||||||
|
%define output_row(b) (b)+20 ; JDIMENSION output_row
|
||||||
|
%define num_rows(b) (b)+24 ; int num_rows
|
||||||
|
|
||||||
|
%define original_ebp ebp+0
|
||||||
|
%define wk(i) ebp-(WK_NUM-(i))*SIZEOF_MMWORD ; mmword wk[WK_NUM]
|
||||||
|
%define WK_NUM 8
|
||||||
|
%define gotptr wk(0)-SIZEOF_POINTER ; void * gotptr
|
||||||
|
|
||||||
|
align 16
|
||||||
|
global EXTN(jsimd_rgb_ycc_convert_mmx)
|
||||||
|
|
||||||
|
EXTN(jsimd_rgb_ycc_convert_mmx):
|
||||||
|
push ebp
|
||||||
|
mov eax,esp ; eax = original ebp
|
||||||
|
sub esp, byte 4
|
||||||
|
and esp, byte (-SIZEOF_MMWORD) ; align to 64 bits
|
||||||
|
mov [esp],eax
|
||||||
|
mov ebp,esp ; ebp = aligned ebp
|
||||||
|
lea esp, [wk(0)]
|
||||||
|
pushpic eax ; make a room for GOT address
|
||||||
|
push ebx
|
||||||
|
; push ecx ; need not be preserved
|
||||||
|
; push edx ; need not be preserved
|
||||||
|
push esi
|
||||||
|
push edi
|
||||||
|
|
||||||
|
get_GOT ebx ; get GOT address
|
||||||
|
movpic POINTER [gotptr], ebx ; save GOT address
|
||||||
|
|
||||||
|
mov ecx, JDIMENSION [img_width(eax)] ; num_cols
|
||||||
|
test ecx,ecx
|
||||||
|
jz near .return
|
||||||
|
|
||||||
|
push ecx
|
||||||
|
|
||||||
|
mov esi, JSAMPIMAGE [output_buf(eax)]
|
||||||
|
mov ecx, JDIMENSION [output_row(eax)]
|
||||||
|
mov edi, JSAMPARRAY [esi+0*SIZEOF_JSAMPARRAY]
|
||||||
|
mov ebx, JSAMPARRAY [esi+1*SIZEOF_JSAMPARRAY]
|
||||||
|
mov edx, JSAMPARRAY [esi+2*SIZEOF_JSAMPARRAY]
|
||||||
|
lea edi, [edi+ecx*SIZEOF_JSAMPROW]
|
||||||
|
lea ebx, [ebx+ecx*SIZEOF_JSAMPROW]
|
||||||
|
lea edx, [edx+ecx*SIZEOF_JSAMPROW]
|
||||||
|
|
||||||
|
pop ecx
|
||||||
|
|
||||||
|
mov esi, JSAMPARRAY [input_buf(eax)]
|
||||||
|
mov eax, INT [num_rows(eax)]
|
||||||
|
test eax,eax
|
||||||
|
jle near .return
|
||||||
|
alignx 16,7
|
||||||
|
.rowloop:
|
||||||
|
pushpic eax
|
||||||
|
push edx
|
||||||
|
push ebx
|
||||||
|
push edi
|
||||||
|
push esi
|
||||||
|
push ecx ; col
|
||||||
|
|
||||||
|
mov esi, JSAMPROW [esi] ; inptr
|
||||||
|
mov edi, JSAMPROW [edi] ; outptr0
|
||||||
|
mov ebx, JSAMPROW [ebx] ; outptr1
|
||||||
|
mov edx, JSAMPROW [edx] ; outptr2
|
||||||
|
movpic eax, POINTER [gotptr] ; load GOT address (eax)
|
||||||
|
|
||||||
|
cmp ecx, byte SIZEOF_MMWORD
|
||||||
|
jae short .columnloop
|
||||||
|
alignx 16,7
|
||||||
|
|
||||||
|
%if RGB_PIXELSIZE == 3 ; ---------------
|
||||||
|
|
||||||
|
.column_ld1:
|
||||||
|
push eax
|
||||||
|
push edx
|
||||||
|
lea ecx,[ecx+ecx*2] ; imul ecx,RGB_PIXELSIZE
|
||||||
|
test cl, SIZEOF_BYTE
|
||||||
|
jz short .column_ld2
|
||||||
|
sub ecx, byte SIZEOF_BYTE
|
||||||
|
xor eax,eax
|
||||||
|
mov al, BYTE [esi+ecx]
|
||||||
|
.column_ld2:
|
||||||
|
test cl, SIZEOF_WORD
|
||||||
|
jz short .column_ld4
|
||||||
|
sub ecx, byte SIZEOF_WORD
|
||||||
|
xor edx,edx
|
||||||
|
mov dx, WORD [esi+ecx]
|
||||||
|
shl eax, WORD_BIT
|
||||||
|
or eax,edx
|
||||||
|
.column_ld4:
|
||||||
|
movd mmA,eax
|
||||||
|
pop edx
|
||||||
|
pop eax
|
||||||
|
test cl, SIZEOF_DWORD
|
||||||
|
jz short .column_ld8
|
||||||
|
sub ecx, byte SIZEOF_DWORD
|
||||||
|
movd mmG, DWORD [esi+ecx]
|
||||||
|
psllq mmA, DWORD_BIT
|
||||||
|
por mmA,mmG
|
||||||
|
.column_ld8:
|
||||||
|
test cl, SIZEOF_MMWORD
|
||||||
|
jz short .column_ld16
|
||||||
|
movq mmG,mmA
|
||||||
|
movq mmA, MMWORD [esi+0*SIZEOF_MMWORD]
|
||||||
|
mov ecx, SIZEOF_MMWORD
|
||||||
|
jmp short .rgb_ycc_cnv
|
||||||
|
.column_ld16:
|
||||||
|
test cl, 2*SIZEOF_MMWORD
|
||||||
|
mov ecx, SIZEOF_MMWORD
|
||||||
|
jz short .rgb_ycc_cnv
|
||||||
|
movq mmF,mmA
|
||||||
|
movq mmA, MMWORD [esi+0*SIZEOF_MMWORD]
|
||||||
|
movq mmG, MMWORD [esi+1*SIZEOF_MMWORD]
|
||||||
|
jmp short .rgb_ycc_cnv
|
||||||
|
alignx 16,7
|
||||||
|
|
||||||
|
.columnloop:
|
||||||
|
movq mmA, MMWORD [esi+0*SIZEOF_MMWORD]
|
||||||
|
movq mmG, MMWORD [esi+1*SIZEOF_MMWORD]
|
||||||
|
movq mmF, MMWORD [esi+2*SIZEOF_MMWORD]
|
||||||
|
|
||||||
|
.rgb_ycc_cnv:
|
||||||
|
; mmA=(00 10 20 01 11 21 02 12)
|
||||||
|
; mmG=(22 03 13 23 04 14 24 05)
|
||||||
|
; mmF=(15 25 06 16 26 07 17 27)
|
||||||
|
|
||||||
|
movq mmD,mmA
|
||||||
|
psllq mmA,4*BYTE_BIT ; mmA=(-- -- -- -- 00 10 20 01)
|
||||||
|
psrlq mmD,4*BYTE_BIT ; mmD=(11 21 02 12 -- -- -- --)
|
||||||
|
|
||||||
|
punpckhbw mmA,mmG ; mmA=(00 04 10 14 20 24 01 05)
|
||||||
|
psllq mmG,4*BYTE_BIT ; mmG=(-- -- -- -- 22 03 13 23)
|
||||||
|
|
||||||
|
punpcklbw mmD,mmF ; mmD=(11 15 21 25 02 06 12 16)
|
||||||
|
punpckhbw mmG,mmF ; mmG=(22 26 03 07 13 17 23 27)
|
||||||
|
|
||||||
|
movq mmE,mmA
|
||||||
|
psllq mmA,4*BYTE_BIT ; mmA=(-- -- -- -- 00 04 10 14)
|
||||||
|
psrlq mmE,4*BYTE_BIT ; mmE=(20 24 01 05 -- -- -- --)
|
||||||
|
|
||||||
|
punpckhbw mmA,mmD ; mmA=(00 02 04 06 10 12 14 16)
|
||||||
|
psllq mmD,4*BYTE_BIT ; mmD=(-- -- -- -- 11 15 21 25)
|
||||||
|
|
||||||
|
punpcklbw mmE,mmG ; mmE=(20 22 24 26 01 03 05 07)
|
||||||
|
punpckhbw mmD,mmG ; mmD=(11 13 15 17 21 23 25 27)
|
||||||
|
|
||||||
|
pxor mmH,mmH
|
||||||
|
|
||||||
|
movq mmC,mmA
|
||||||
|
punpcklbw mmA,mmH ; mmA=(00 02 04 06)
|
||||||
|
punpckhbw mmC,mmH ; mmC=(10 12 14 16)
|
||||||
|
|
||||||
|
movq mmB,mmE
|
||||||
|
punpcklbw mmE,mmH ; mmE=(20 22 24 26)
|
||||||
|
punpckhbw mmB,mmH ; mmB=(01 03 05 07)
|
||||||
|
|
||||||
|
movq mmF,mmD
|
||||||
|
punpcklbw mmD,mmH ; mmD=(11 13 15 17)
|
||||||
|
punpckhbw mmF,mmH ; mmF=(21 23 25 27)
|
||||||
|
|
||||||
|
%else ; RGB_PIXELSIZE == 4 ; -----------
|
||||||
|
|
||||||
|
.column_ld1:
|
||||||
|
test cl, SIZEOF_MMWORD/8
|
||||||
|
jz short .column_ld2
|
||||||
|
sub ecx, byte SIZEOF_MMWORD/8
|
||||||
|
movd mmA, DWORD [esi+ecx*RGB_PIXELSIZE]
|
||||||
|
.column_ld2:
|
||||||
|
test cl, SIZEOF_MMWORD/4
|
||||||
|
jz short .column_ld4
|
||||||
|
sub ecx, byte SIZEOF_MMWORD/4
|
||||||
|
movq mmF,mmA
|
||||||
|
movq mmA, MMWORD [esi+ecx*RGB_PIXELSIZE]
|
||||||
|
.column_ld4:
|
||||||
|
test cl, SIZEOF_MMWORD/2
|
||||||
|
mov ecx, SIZEOF_MMWORD
|
||||||
|
jz short .rgb_ycc_cnv
|
||||||
|
movq mmD,mmA
|
||||||
|
movq mmC,mmF
|
||||||
|
movq mmA, MMWORD [esi+0*SIZEOF_MMWORD]
|
||||||
|
movq mmF, MMWORD [esi+1*SIZEOF_MMWORD]
|
||||||
|
jmp short .rgb_ycc_cnv
|
||||||
|
alignx 16,7
|
||||||
|
|
||||||
|
.columnloop:
|
||||||
|
movq mmA, MMWORD [esi+0*SIZEOF_MMWORD]
|
||||||
|
movq mmF, MMWORD [esi+1*SIZEOF_MMWORD]
|
||||||
|
movq mmD, MMWORD [esi+2*SIZEOF_MMWORD]
|
||||||
|
movq mmC, MMWORD [esi+3*SIZEOF_MMWORD]
|
||||||
|
|
||||||
|
.rgb_ycc_cnv:
|
||||||
|
; mmA=(00 10 20 30 01 11 21 31)
|
||||||
|
; mmF=(02 12 22 32 03 13 23 33)
|
||||||
|
; mmD=(04 14 24 34 05 15 25 35)
|
||||||
|
; mmC=(06 16 26 36 07 17 27 37)
|
||||||
|
|
||||||
|
movq mmB,mmA
|
||||||
|
punpcklbw mmA,mmF ; mmA=(00 02 10 12 20 22 30 32)
|
||||||
|
punpckhbw mmB,mmF ; mmB=(01 03 11 13 21 23 31 33)
|
||||||
|
|
||||||
|
movq mmG,mmD
|
||||||
|
punpcklbw mmD,mmC ; mmD=(04 06 14 16 24 26 34 36)
|
||||||
|
punpckhbw mmG,mmC ; mmG=(05 07 15 17 25 27 35 37)
|
||||||
|
|
||||||
|
movq mmE,mmA
|
||||||
|
punpcklwd mmA,mmD ; mmA=(00 02 04 06 10 12 14 16)
|
||||||
|
punpckhwd mmE,mmD ; mmE=(20 22 24 26 30 32 34 36)
|
||||||
|
|
||||||
|
movq mmH,mmB
|
||||||
|
punpcklwd mmB,mmG ; mmB=(01 03 05 07 11 13 15 17)
|
||||||
|
punpckhwd mmH,mmG ; mmH=(21 23 25 27 31 33 35 37)
|
||||||
|
|
||||||
|
pxor mmF,mmF
|
||||||
|
|
||||||
|
movq mmC,mmA
|
||||||
|
punpcklbw mmA,mmF ; mmA=(00 02 04 06)
|
||||||
|
punpckhbw mmC,mmF ; mmC=(10 12 14 16)
|
||||||
|
|
||||||
|
movq mmD,mmB
|
||||||
|
punpcklbw mmB,mmF ; mmB=(01 03 05 07)
|
||||||
|
punpckhbw mmD,mmF ; mmD=(11 13 15 17)
|
||||||
|
|
||||||
|
movq mmG,mmE
|
||||||
|
punpcklbw mmE,mmF ; mmE=(20 22 24 26)
|
||||||
|
punpckhbw mmG,mmF ; mmG=(30 32 34 36)
|
||||||
|
|
||||||
|
punpcklbw mmF,mmH
|
||||||
|
punpckhbw mmH,mmH
|
||||||
|
psrlw mmF,BYTE_BIT ; mmF=(21 23 25 27)
|
||||||
|
psrlw mmH,BYTE_BIT ; mmH=(31 33 35 37)
|
||||||
|
|
||||||
|
%endif ; RGB_PIXELSIZE ; ---------------
|
||||||
|
|
||||||
|
; mm0=(R0 R2 R4 R6)=RE, mm2=(G0 G2 G4 G6)=GE, mm4=(B0 B2 B4 B6)=BE
|
||||||
|
; mm1=(R1 R3 R5 R7)=RO, mm3=(G1 G3 G5 G7)=GO, mm5=(B1 B3 B5 B7)=BO
|
||||||
|
|
||||||
|
; (Original)
|
||||||
|
; Y = 0.29900 * R + 0.58700 * G + 0.11400 * B
|
||||||
|
; Cb = -0.16874 * R - 0.33126 * G + 0.50000 * B + CENTERJSAMPLE
|
||||||
|
; Cr = 0.50000 * R - 0.41869 * G - 0.08131 * B + CENTERJSAMPLE
|
||||||
|
;
|
||||||
|
; (This implementation)
|
||||||
|
; Y = 0.29900 * R + 0.33700 * G + 0.11400 * B + 0.25000 * G
|
||||||
|
; Cb = -0.16874 * R - 0.33126 * G + 0.50000 * B + CENTERJSAMPLE
|
||||||
|
; Cr = 0.50000 * R - 0.41869 * G - 0.08131 * B + CENTERJSAMPLE
|
||||||
|
|
||||||
|
movq MMWORD [wk(0)], mm0 ; wk(0)=RE
|
||||||
|
movq MMWORD [wk(1)], mm1 ; wk(1)=RO
|
||||||
|
movq MMWORD [wk(2)], mm4 ; wk(2)=BE
|
||||||
|
movq MMWORD [wk(3)], mm5 ; wk(3)=BO
|
||||||
|
|
||||||
|
movq mm6,mm1
|
||||||
|
punpcklwd mm1,mm3
|
||||||
|
punpckhwd mm6,mm3
|
||||||
|
movq mm7,mm1
|
||||||
|
movq mm4,mm6
|
||||||
|
pmaddwd mm1,[GOTOFF(eax,PW_F0299_F0337)] ; mm1=ROL*FIX(0.299)+GOL*FIX(0.337)
|
||||||
|
pmaddwd mm6,[GOTOFF(eax,PW_F0299_F0337)] ; mm6=ROH*FIX(0.299)+GOH*FIX(0.337)
|
||||||
|
pmaddwd mm7,[GOTOFF(eax,PW_MF016_MF033)] ; mm7=ROL*-FIX(0.168)+GOL*-FIX(0.331)
|
||||||
|
pmaddwd mm4,[GOTOFF(eax,PW_MF016_MF033)] ; mm4=ROH*-FIX(0.168)+GOH*-FIX(0.331)
|
||||||
|
|
||||||
|
movq MMWORD [wk(4)], mm1 ; wk(4)=ROL*FIX(0.299)+GOL*FIX(0.337)
|
||||||
|
movq MMWORD [wk(5)], mm6 ; wk(5)=ROH*FIX(0.299)+GOH*FIX(0.337)
|
||||||
|
|
||||||
|
pxor mm1,mm1
|
||||||
|
pxor mm6,mm6
|
||||||
|
punpcklwd mm1,mm5 ; mm1=BOL
|
||||||
|
punpckhwd mm6,mm5 ; mm6=BOH
|
||||||
|
psrld mm1,1 ; mm1=BOL*FIX(0.500)
|
||||||
|
psrld mm6,1 ; mm6=BOH*FIX(0.500)
|
||||||
|
|
||||||
|
movq mm5,[GOTOFF(eax,PD_ONEHALFM1_CJ)] ; mm5=[PD_ONEHALFM1_CJ]
|
||||||
|
|
||||||
|
paddd mm7,mm1
|
||||||
|
paddd mm4,mm6
|
||||||
|
paddd mm7,mm5
|
||||||
|
paddd mm4,mm5
|
||||||
|
psrld mm7,SCALEBITS ; mm7=CbOL
|
||||||
|
psrld mm4,SCALEBITS ; mm4=CbOH
|
||||||
|
packssdw mm7,mm4 ; mm7=CbO
|
||||||
|
|
||||||
|
movq mm1, MMWORD [wk(2)] ; mm1=BE
|
||||||
|
|
||||||
|
movq mm6,mm0
|
||||||
|
punpcklwd mm0,mm2
|
||||||
|
punpckhwd mm6,mm2
|
||||||
|
movq mm5,mm0
|
||||||
|
movq mm4,mm6
|
||||||
|
pmaddwd mm0,[GOTOFF(eax,PW_F0299_F0337)] ; mm0=REL*FIX(0.299)+GEL*FIX(0.337)
|
||||||
|
pmaddwd mm6,[GOTOFF(eax,PW_F0299_F0337)] ; mm6=REH*FIX(0.299)+GEH*FIX(0.337)
|
||||||
|
pmaddwd mm5,[GOTOFF(eax,PW_MF016_MF033)] ; mm5=REL*-FIX(0.168)+GEL*-FIX(0.331)
|
||||||
|
pmaddwd mm4,[GOTOFF(eax,PW_MF016_MF033)] ; mm4=REH*-FIX(0.168)+GEH*-FIX(0.331)
|
||||||
|
|
||||||
|
movq MMWORD [wk(6)], mm0 ; wk(6)=REL*FIX(0.299)+GEL*FIX(0.337)
|
||||||
|
movq MMWORD [wk(7)], mm6 ; wk(7)=REH*FIX(0.299)+GEH*FIX(0.337)
|
||||||
|
|
||||||
|
pxor mm0,mm0
|
||||||
|
pxor mm6,mm6
|
||||||
|
punpcklwd mm0,mm1 ; mm0=BEL
|
||||||
|
punpckhwd mm6,mm1 ; mm6=BEH
|
||||||
|
psrld mm0,1 ; mm0=BEL*FIX(0.500)
|
||||||
|
psrld mm6,1 ; mm6=BEH*FIX(0.500)
|
||||||
|
|
||||||
|
movq mm1,[GOTOFF(eax,PD_ONEHALFM1_CJ)] ; mm1=[PD_ONEHALFM1_CJ]
|
||||||
|
|
||||||
|
paddd mm5,mm0
|
||||||
|
paddd mm4,mm6
|
||||||
|
paddd mm5,mm1
|
||||||
|
paddd mm4,mm1
|
||||||
|
psrld mm5,SCALEBITS ; mm5=CbEL
|
||||||
|
psrld mm4,SCALEBITS ; mm4=CbEH
|
||||||
|
packssdw mm5,mm4 ; mm5=CbE
|
||||||
|
|
||||||
|
psllw mm7,BYTE_BIT
|
||||||
|
por mm5,mm7 ; mm5=Cb
|
||||||
|
movq MMWORD [ebx], mm5 ; Save Cb
|
||||||
|
|
||||||
|
movq mm0, MMWORD [wk(3)] ; mm0=BO
|
||||||
|
movq mm6, MMWORD [wk(2)] ; mm6=BE
|
||||||
|
movq mm1, MMWORD [wk(1)] ; mm1=RO
|
||||||
|
|
||||||
|
movq mm4,mm0
|
||||||
|
punpcklwd mm0,mm3
|
||||||
|
punpckhwd mm4,mm3
|
||||||
|
movq mm7,mm0
|
||||||
|
movq mm5,mm4
|
||||||
|
pmaddwd mm0,[GOTOFF(eax,PW_F0114_F0250)] ; mm0=BOL*FIX(0.114)+GOL*FIX(0.250)
|
||||||
|
pmaddwd mm4,[GOTOFF(eax,PW_F0114_F0250)] ; mm4=BOH*FIX(0.114)+GOH*FIX(0.250)
|
||||||
|
pmaddwd mm7,[GOTOFF(eax,PW_MF008_MF041)] ; mm7=BOL*-FIX(0.081)+GOL*-FIX(0.418)
|
||||||
|
pmaddwd mm5,[GOTOFF(eax,PW_MF008_MF041)] ; mm5=BOH*-FIX(0.081)+GOH*-FIX(0.418)
|
||||||
|
|
||||||
|
movq mm3,[GOTOFF(eax,PD_ONEHALF)] ; mm3=[PD_ONEHALF]
|
||||||
|
|
||||||
|
paddd mm0, MMWORD [wk(4)]
|
||||||
|
paddd mm4, MMWORD [wk(5)]
|
||||||
|
paddd mm0,mm3
|
||||||
|
paddd mm4,mm3
|
||||||
|
psrld mm0,SCALEBITS ; mm0=YOL
|
||||||
|
psrld mm4,SCALEBITS ; mm4=YOH
|
||||||
|
packssdw mm0,mm4 ; mm0=YO
|
||||||
|
|
||||||
|
pxor mm3,mm3
|
||||||
|
pxor mm4,mm4
|
||||||
|
punpcklwd mm3,mm1 ; mm3=ROL
|
||||||
|
punpckhwd mm4,mm1 ; mm4=ROH
|
||||||
|
psrld mm3,1 ; mm3=ROL*FIX(0.500)
|
||||||
|
psrld mm4,1 ; mm4=ROH*FIX(0.500)
|
||||||
|
|
||||||
|
movq mm1,[GOTOFF(eax,PD_ONEHALFM1_CJ)] ; mm1=[PD_ONEHALFM1_CJ]
|
||||||
|
|
||||||
|
paddd mm7,mm3
|
||||||
|
paddd mm5,mm4
|
||||||
|
paddd mm7,mm1
|
||||||
|
paddd mm5,mm1
|
||||||
|
psrld mm7,SCALEBITS ; mm7=CrOL
|
||||||
|
psrld mm5,SCALEBITS ; mm5=CrOH
|
||||||
|
packssdw mm7,mm5 ; mm7=CrO
|
||||||
|
|
||||||
|
movq mm3, MMWORD [wk(0)] ; mm3=RE
|
||||||
|
|
||||||
|
movq mm4,mm6
|
||||||
|
punpcklwd mm6,mm2
|
||||||
|
punpckhwd mm4,mm2
|
||||||
|
movq mm1,mm6
|
||||||
|
movq mm5,mm4
|
||||||
|
pmaddwd mm6,[GOTOFF(eax,PW_F0114_F0250)] ; mm6=BEL*FIX(0.114)+GEL*FIX(0.250)
|
||||||
|
pmaddwd mm4,[GOTOFF(eax,PW_F0114_F0250)] ; mm4=BEH*FIX(0.114)+GEH*FIX(0.250)
|
||||||
|
pmaddwd mm1,[GOTOFF(eax,PW_MF008_MF041)] ; mm1=BEL*-FIX(0.081)+GEL*-FIX(0.418)
|
||||||
|
pmaddwd mm5,[GOTOFF(eax,PW_MF008_MF041)] ; mm5=BEH*-FIX(0.081)+GEH*-FIX(0.418)
|
||||||
|
|
||||||
|
movq mm2,[GOTOFF(eax,PD_ONEHALF)] ; mm2=[PD_ONEHALF]
|
||||||
|
|
||||||
|
paddd mm6, MMWORD [wk(6)]
|
||||||
|
paddd mm4, MMWORD [wk(7)]
|
||||||
|
paddd mm6,mm2
|
||||||
|
paddd mm4,mm2
|
||||||
|
psrld mm6,SCALEBITS ; mm6=YEL
|
||||||
|
psrld mm4,SCALEBITS ; mm4=YEH
|
||||||
|
packssdw mm6,mm4 ; mm6=YE
|
||||||
|
|
||||||
|
psllw mm0,BYTE_BIT
|
||||||
|
por mm6,mm0 ; mm6=Y
|
||||||
|
movq MMWORD [edi], mm6 ; Save Y
|
||||||
|
|
||||||
|
pxor mm2,mm2
|
||||||
|
pxor mm4,mm4
|
||||||
|
punpcklwd mm2,mm3 ; mm2=REL
|
||||||
|
punpckhwd mm4,mm3 ; mm4=REH
|
||||||
|
psrld mm2,1 ; mm2=REL*FIX(0.500)
|
||||||
|
psrld mm4,1 ; mm4=REH*FIX(0.500)
|
||||||
|
|
||||||
|
movq mm0,[GOTOFF(eax,PD_ONEHALFM1_CJ)] ; mm0=[PD_ONEHALFM1_CJ]
|
||||||
|
|
||||||
|
paddd mm1,mm2
|
||||||
|
paddd mm5,mm4
|
||||||
|
paddd mm1,mm0
|
||||||
|
paddd mm5,mm0
|
||||||
|
psrld mm1,SCALEBITS ; mm1=CrEL
|
||||||
|
psrld mm5,SCALEBITS ; mm5=CrEH
|
||||||
|
packssdw mm1,mm5 ; mm1=CrE
|
||||||
|
|
||||||
|
psllw mm7,BYTE_BIT
|
||||||
|
por mm1,mm7 ; mm1=Cr
|
||||||
|
movq MMWORD [edx], mm1 ; Save Cr
|
||||||
|
|
||||||
|
sub ecx, byte SIZEOF_MMWORD
|
||||||
|
add esi, byte RGB_PIXELSIZE*SIZEOF_MMWORD ; inptr
|
||||||
|
add edi, byte SIZEOF_MMWORD ; outptr0
|
||||||
|
add ebx, byte SIZEOF_MMWORD ; outptr1
|
||||||
|
add edx, byte SIZEOF_MMWORD ; outptr2
|
||||||
|
cmp ecx, byte SIZEOF_MMWORD
|
||||||
|
jae near .columnloop
|
||||||
|
test ecx,ecx
|
||||||
|
jnz near .column_ld1
|
||||||
|
|
||||||
|
pop ecx ; col
|
||||||
|
pop esi
|
||||||
|
pop edi
|
||||||
|
pop ebx
|
||||||
|
pop edx
|
||||||
|
poppic eax
|
||||||
|
|
||||||
|
add esi, byte SIZEOF_JSAMPROW ; input_buf
|
||||||
|
add edi, byte SIZEOF_JSAMPROW
|
||||||
|
add ebx, byte SIZEOF_JSAMPROW
|
||||||
|
add edx, byte SIZEOF_JSAMPROW
|
||||||
|
dec eax ; num_rows
|
||||||
|
jg near .rowloop
|
||||||
|
|
||||||
|
emms ; empty MMX state
|
||||||
|
|
||||||
|
.return:
|
||||||
|
pop edi
|
||||||
|
pop esi
|
||||||
|
; pop edx ; need not be preserved
|
||||||
|
; pop ecx ; need not be preserved
|
||||||
|
pop ebx
|
||||||
|
mov esp,ebp ; esp <- aligned ebp
|
||||||
|
pop esp ; esp <- original ebp
|
||||||
|
pop ebp
|
||||||
|
ret
|
||||||
|
|
||||||
500
simd/jcclrss2.asm
Normal file
500
simd/jcclrss2.asm
Normal file
@@ -0,0 +1,500 @@
|
|||||||
|
;
|
||||||
|
; jcclrss2.asm - colorspace conversion (SSE2)
|
||||||
|
;
|
||||||
|
; x86 SIMD extension for IJG JPEG library
|
||||||
|
; Copyright (C) 1999-2006, MIYASAKA Masaru.
|
||||||
|
; For conditions of distribution and use, see copyright notice in jsimdext.inc
|
||||||
|
;
|
||||||
|
; This file should be assembled with NASM (Netwide Assembler),
|
||||||
|
; can *not* be assembled with Microsoft's MASM or any compatible
|
||||||
|
; assembler (including Borland's Turbo Assembler).
|
||||||
|
; NASM is available from http://nasm.sourceforge.net/ or
|
||||||
|
; http://sourceforge.net/project/showfiles.php?group_id=6208
|
||||||
|
;
|
||||||
|
; [TAB8]
|
||||||
|
|
||||||
|
; --------------------------------------------------------------------------
|
||||||
|
SECTION SEG_TEXT
|
||||||
|
BITS 32
|
||||||
|
;
|
||||||
|
; Convert some rows of samples to the output colorspace.
|
||||||
|
;
|
||||||
|
; GLOBAL(void)
|
||||||
|
; jsimd_rgb_ycc_convert_sse2 (JDIMENSION img_width,
|
||||||
|
; JSAMPARRAY input_buf, JSAMPIMAGE output_buf,
|
||||||
|
; JDIMENSION output_row, int num_rows);
|
||||||
|
;
|
||||||
|
|
||||||
|
%define img_width(b) (b)+8 ; JDIMENSION img_width
|
||||||
|
%define input_buf(b) (b)+12 ; JSAMPARRAY input_buf
|
||||||
|
%define output_buf(b) (b)+16 ; JSAMPIMAGE output_buf
|
||||||
|
%define output_row(b) (b)+20 ; JDIMENSION output_row
|
||||||
|
%define num_rows(b) (b)+24 ; int num_rows
|
||||||
|
|
||||||
|
%define original_ebp ebp+0
|
||||||
|
%define wk(i) ebp-(WK_NUM-(i))*SIZEOF_XMMWORD ; xmmword wk[WK_NUM]
|
||||||
|
%define WK_NUM 8
|
||||||
|
%define gotptr wk(0)-SIZEOF_POINTER ; void * gotptr
|
||||||
|
|
||||||
|
align 16
|
||||||
|
|
||||||
|
global EXTN(jsimd_rgb_ycc_convert_sse2)
|
||||||
|
|
||||||
|
EXTN(jsimd_rgb_ycc_convert_sse2):
|
||||||
|
push ebp
|
||||||
|
mov eax,esp ; eax = original ebp
|
||||||
|
sub esp, byte 4
|
||||||
|
and esp, byte (-SIZEOF_XMMWORD) ; align to 128 bits
|
||||||
|
mov [esp],eax
|
||||||
|
mov ebp,esp ; ebp = aligned ebp
|
||||||
|
lea esp, [wk(0)]
|
||||||
|
pushpic eax ; make a room for GOT address
|
||||||
|
push ebx
|
||||||
|
; push ecx ; need not be preserved
|
||||||
|
; push edx ; need not be preserved
|
||||||
|
push esi
|
||||||
|
push edi
|
||||||
|
|
||||||
|
get_GOT ebx ; get GOT address
|
||||||
|
movpic POINTER [gotptr], ebx ; save GOT address
|
||||||
|
|
||||||
|
mov ecx, JDIMENSION [img_width(eax)]
|
||||||
|
test ecx,ecx
|
||||||
|
jz near .return
|
||||||
|
|
||||||
|
push ecx
|
||||||
|
|
||||||
|
mov esi, JSAMPIMAGE [output_buf(eax)]
|
||||||
|
mov ecx, JDIMENSION [output_row(eax)]
|
||||||
|
mov edi, JSAMPARRAY [esi+0*SIZEOF_JSAMPARRAY]
|
||||||
|
mov ebx, JSAMPARRAY [esi+1*SIZEOF_JSAMPARRAY]
|
||||||
|
mov edx, JSAMPARRAY [esi+2*SIZEOF_JSAMPARRAY]
|
||||||
|
lea edi, [edi+ecx*SIZEOF_JSAMPROW]
|
||||||
|
lea ebx, [ebx+ecx*SIZEOF_JSAMPROW]
|
||||||
|
lea edx, [edx+ecx*SIZEOF_JSAMPROW]
|
||||||
|
|
||||||
|
pop ecx
|
||||||
|
|
||||||
|
mov esi, JSAMPARRAY [input_buf(eax)]
|
||||||
|
mov eax, INT [num_rows(eax)]
|
||||||
|
test eax,eax
|
||||||
|
jle near .return
|
||||||
|
alignx 16,7
|
||||||
|
.rowloop:
|
||||||
|
pushpic eax
|
||||||
|
push edx
|
||||||
|
push ebx
|
||||||
|
push edi
|
||||||
|
push esi
|
||||||
|
push ecx ; col
|
||||||
|
|
||||||
|
mov esi, JSAMPROW [esi] ; inptr
|
||||||
|
mov edi, JSAMPROW [edi] ; outptr0
|
||||||
|
mov ebx, JSAMPROW [ebx] ; outptr1
|
||||||
|
mov edx, JSAMPROW [edx] ; outptr2
|
||||||
|
movpic eax, POINTER [gotptr] ; load GOT address (eax)
|
||||||
|
|
||||||
|
cmp ecx, byte SIZEOF_XMMWORD
|
||||||
|
jae near .columnloop
|
||||||
|
alignx 16,7
|
||||||
|
|
||||||
|
%if RGB_PIXELSIZE == 3 ; ---------------
|
||||||
|
|
||||||
|
.column_ld1:
|
||||||
|
push eax
|
||||||
|
push edx
|
||||||
|
lea ecx,[ecx+ecx*2] ; imul ecx,RGB_PIXELSIZE
|
||||||
|
test cl, SIZEOF_BYTE
|
||||||
|
jz short .column_ld2
|
||||||
|
sub ecx, byte SIZEOF_BYTE
|
||||||
|
movzx eax, BYTE [esi+ecx]
|
||||||
|
.column_ld2:
|
||||||
|
test cl, SIZEOF_WORD
|
||||||
|
jz short .column_ld4
|
||||||
|
sub ecx, byte SIZEOF_WORD
|
||||||
|
movzx edx, WORD [esi+ecx]
|
||||||
|
shl eax, WORD_BIT
|
||||||
|
or eax,edx
|
||||||
|
.column_ld4:
|
||||||
|
movd xmmA,eax
|
||||||
|
pop edx
|
||||||
|
pop eax
|
||||||
|
test cl, SIZEOF_DWORD
|
||||||
|
jz short .column_ld8
|
||||||
|
sub ecx, byte SIZEOF_DWORD
|
||||||
|
movd xmmF, XMM_DWORD [esi+ecx]
|
||||||
|
pslldq xmmA, SIZEOF_DWORD
|
||||||
|
por xmmA,xmmF
|
||||||
|
.column_ld8:
|
||||||
|
test cl, SIZEOF_MMWORD
|
||||||
|
jz short .column_ld16
|
||||||
|
sub ecx, byte SIZEOF_MMWORD
|
||||||
|
movq xmmB, XMM_MMWORD [esi+ecx]
|
||||||
|
pslldq xmmA, SIZEOF_MMWORD
|
||||||
|
por xmmA,xmmB
|
||||||
|
.column_ld16:
|
||||||
|
test cl, SIZEOF_XMMWORD
|
||||||
|
jz short .column_ld32
|
||||||
|
movdqa xmmF,xmmA
|
||||||
|
movdqu xmmA, XMMWORD [esi+0*SIZEOF_XMMWORD]
|
||||||
|
mov ecx, SIZEOF_XMMWORD
|
||||||
|
jmp short .rgb_ycc_cnv
|
||||||
|
.column_ld32:
|
||||||
|
test cl, 2*SIZEOF_XMMWORD
|
||||||
|
mov ecx, SIZEOF_XMMWORD
|
||||||
|
jz short .rgb_ycc_cnv
|
||||||
|
movdqa xmmB,xmmA
|
||||||
|
movdqu xmmA, XMMWORD [esi+0*SIZEOF_XMMWORD]
|
||||||
|
movdqu xmmF, XMMWORD [esi+1*SIZEOF_XMMWORD]
|
||||||
|
jmp short .rgb_ycc_cnv
|
||||||
|
alignx 16,7
|
||||||
|
|
||||||
|
.columnloop:
|
||||||
|
movdqu xmmA, XMMWORD [esi+0*SIZEOF_XMMWORD]
|
||||||
|
movdqu xmmF, XMMWORD [esi+1*SIZEOF_XMMWORD]
|
||||||
|
movdqu xmmB, XMMWORD [esi+2*SIZEOF_XMMWORD]
|
||||||
|
|
||||||
|
.rgb_ycc_cnv:
|
||||||
|
; xmmA=(00 10 20 01 11 21 02 12 22 03 13 23 04 14 24 05)
|
||||||
|
; xmmF=(15 25 06 16 26 07 17 27 08 18 28 09 19 29 0A 1A)
|
||||||
|
; xmmB=(2A 0B 1B 2B 0C 1C 2C 0D 1D 2D 0E 1E 2E 0F 1F 2F)
|
||||||
|
|
||||||
|
movdqa xmmG,xmmA
|
||||||
|
pslldq xmmA,8 ; xmmA=(-- -- -- -- -- -- -- -- 00 10 20 01 11 21 02 12)
|
||||||
|
psrldq xmmG,8 ; xmmG=(22 03 13 23 04 14 24 05 -- -- -- -- -- -- -- --)
|
||||||
|
|
||||||
|
punpckhbw xmmA,xmmF ; xmmA=(00 08 10 18 20 28 01 09 11 19 21 29 02 0A 12 1A)
|
||||||
|
pslldq xmmF,8 ; xmmF=(-- -- -- -- -- -- -- -- 15 25 06 16 26 07 17 27)
|
||||||
|
|
||||||
|
punpcklbw xmmG,xmmB ; xmmG=(22 2A 03 0B 13 1B 23 2B 04 0C 14 1C 24 2C 05 0D)
|
||||||
|
punpckhbw xmmF,xmmB ; xmmF=(15 1D 25 2D 06 0E 16 1E 26 2E 07 0F 17 1F 27 2F)
|
||||||
|
|
||||||
|
movdqa xmmD,xmmA
|
||||||
|
pslldq xmmA,8 ; xmmA=(-- -- -- -- -- -- -- -- 00 08 10 18 20 28 01 09)
|
||||||
|
psrldq xmmD,8 ; xmmD=(11 19 21 29 02 0A 12 1A -- -- -- -- -- -- -- --)
|
||||||
|
|
||||||
|
punpckhbw xmmA,xmmG ; xmmA=(00 04 08 0C 10 14 18 1C 20 24 28 2C 01 05 09 0D)
|
||||||
|
pslldq xmmG,8 ; xmmG=(-- -- -- -- -- -- -- -- 22 2A 03 0B 13 1B 23 2B)
|
||||||
|
|
||||||
|
punpcklbw xmmD,xmmF ; xmmD=(11 15 19 1D 21 25 29 2D 02 06 0A 0E 12 16 1A 1E)
|
||||||
|
punpckhbw xmmG,xmmF ; xmmG=(22 26 2A 2E 03 07 0B 0F 13 17 1B 1F 23 27 2B 2F)
|
||||||
|
|
||||||
|
movdqa xmmE,xmmA
|
||||||
|
pslldq xmmA,8 ; xmmA=(-- -- -- -- -- -- -- -- 00 04 08 0C 10 14 18 1C)
|
||||||
|
psrldq xmmE,8 ; xmmE=(20 24 28 2C 01 05 09 0D -- -- -- -- -- -- -- --)
|
||||||
|
|
||||||
|
punpckhbw xmmA,xmmD ; xmmA=(00 02 04 06 08 0A 0C 0E 10 12 14 16 18 1A 1C 1E)
|
||||||
|
pslldq xmmD,8 ; xmmD=(-- -- -- -- -- -- -- -- 11 15 19 1D 21 25 29 2D)
|
||||||
|
|
||||||
|
punpcklbw xmmE,xmmG ; xmmE=(20 22 24 26 28 2A 2C 2E 01 03 05 07 09 0B 0D 0F)
|
||||||
|
punpckhbw xmmD,xmmG ; xmmD=(11 13 15 17 19 1B 1D 1F 21 23 25 27 29 2B 2D 2F)
|
||||||
|
|
||||||
|
pxor xmmH,xmmH
|
||||||
|
|
||||||
|
movdqa xmmC,xmmA
|
||||||
|
punpcklbw xmmA,xmmH ; xmmA=(00 02 04 06 08 0A 0C 0E)
|
||||||
|
punpckhbw xmmC,xmmH ; xmmC=(10 12 14 16 18 1A 1C 1E)
|
||||||
|
|
||||||
|
movdqa xmmB,xmmE
|
||||||
|
punpcklbw xmmE,xmmH ; xmmE=(20 22 24 26 28 2A 2C 2E)
|
||||||
|
punpckhbw xmmB,xmmH ; xmmB=(01 03 05 07 09 0B 0D 0F)
|
||||||
|
|
||||||
|
movdqa xmmF,xmmD
|
||||||
|
punpcklbw xmmD,xmmH ; xmmD=(11 13 15 17 19 1B 1D 1F)
|
||||||
|
punpckhbw xmmF,xmmH ; xmmF=(21 23 25 27 29 2B 2D 2F)
|
||||||
|
|
||||||
|
%else ; RGB_PIXELSIZE == 4 ; -----------
|
||||||
|
|
||||||
|
.column_ld1:
|
||||||
|
test cl, SIZEOF_XMMWORD/16
|
||||||
|
jz short .column_ld2
|
||||||
|
sub ecx, byte SIZEOF_XMMWORD/16
|
||||||
|
movd xmmA, XMM_DWORD [esi+ecx*RGB_PIXELSIZE]
|
||||||
|
.column_ld2:
|
||||||
|
test cl, SIZEOF_XMMWORD/8
|
||||||
|
jz short .column_ld4
|
||||||
|
sub ecx, byte SIZEOF_XMMWORD/8
|
||||||
|
movq xmmE, XMM_MMWORD [esi+ecx*RGB_PIXELSIZE]
|
||||||
|
pslldq xmmA, SIZEOF_MMWORD
|
||||||
|
por xmmA,xmmE
|
||||||
|
.column_ld4:
|
||||||
|
test cl, SIZEOF_XMMWORD/4
|
||||||
|
jz short .column_ld8
|
||||||
|
sub ecx, byte SIZEOF_XMMWORD/4
|
||||||
|
movdqa xmmE,xmmA
|
||||||
|
movdqu xmmA, XMMWORD [esi+ecx*RGB_PIXELSIZE]
|
||||||
|
.column_ld8:
|
||||||
|
test cl, SIZEOF_XMMWORD/2
|
||||||
|
mov ecx, SIZEOF_XMMWORD
|
||||||
|
jz short .rgb_ycc_cnv
|
||||||
|
movdqa xmmF,xmmA
|
||||||
|
movdqa xmmH,xmmE
|
||||||
|
movdqu xmmA, XMMWORD [esi+0*SIZEOF_XMMWORD]
|
||||||
|
movdqu xmmE, XMMWORD [esi+1*SIZEOF_XMMWORD]
|
||||||
|
jmp short .rgb_ycc_cnv
|
||||||
|
alignx 16,7
|
||||||
|
|
||||||
|
.columnloop:
|
||||||
|
movdqu xmmA, XMMWORD [esi+0*SIZEOF_XMMWORD]
|
||||||
|
movdqu xmmE, XMMWORD [esi+1*SIZEOF_XMMWORD]
|
||||||
|
movdqu xmmF, XMMWORD [esi+2*SIZEOF_XMMWORD]
|
||||||
|
movdqu xmmH, XMMWORD [esi+3*SIZEOF_XMMWORD]
|
||||||
|
|
||||||
|
.rgb_ycc_cnv:
|
||||||
|
; xmmA=(00 10 20 30 01 11 21 31 02 12 22 32 03 13 23 33)
|
||||||
|
; xmmE=(04 14 24 34 05 15 25 35 06 16 26 36 07 17 27 37)
|
||||||
|
; xmmF=(08 18 28 38 09 19 29 39 0A 1A 2A 3A 0B 1B 2B 3B)
|
||||||
|
; xmmH=(0C 1C 2C 3C 0D 1D 2D 3D 0E 1E 2E 3E 0F 1F 2F 3F)
|
||||||
|
|
||||||
|
movdqa xmmD,xmmA
|
||||||
|
punpcklbw xmmA,xmmE ; xmmA=(00 04 10 14 20 24 30 34 01 05 11 15 21 25 31 35)
|
||||||
|
punpckhbw xmmD,xmmE ; xmmD=(02 06 12 16 22 26 32 36 03 07 13 17 23 27 33 37)
|
||||||
|
|
||||||
|
movdqa xmmC,xmmF
|
||||||
|
punpcklbw xmmF,xmmH ; xmmF=(08 0C 18 1C 28 2C 38 3C 09 0D 19 1D 29 2D 39 3D)
|
||||||
|
punpckhbw xmmC,xmmH ; xmmC=(0A 0E 1A 1E 2A 2E 3A 3E 0B 0F 1B 1F 2B 2F 3B 3F)
|
||||||
|
|
||||||
|
movdqa xmmB,xmmA
|
||||||
|
punpcklwd xmmA,xmmF ; xmmA=(00 04 08 0C 10 14 18 1C 20 24 28 2C 30 34 38 3C)
|
||||||
|
punpckhwd xmmB,xmmF ; xmmB=(01 05 09 0D 11 15 19 1D 21 25 29 2D 31 35 39 3D)
|
||||||
|
|
||||||
|
movdqa xmmG,xmmD
|
||||||
|
punpcklwd xmmD,xmmC ; xmmD=(02 06 0A 0E 12 16 1A 1E 22 26 2A 2E 32 36 3A 3E)
|
||||||
|
punpckhwd xmmG,xmmC ; xmmG=(03 07 0B 0F 13 17 1B 1F 23 27 2B 2F 33 37 3B 3F)
|
||||||
|
|
||||||
|
movdqa xmmE,xmmA
|
||||||
|
punpcklbw xmmA,xmmD ; xmmA=(00 02 04 06 08 0A 0C 0E 10 12 14 16 18 1A 1C 1E)
|
||||||
|
punpckhbw xmmE,xmmD ; xmmE=(20 22 24 26 28 2A 2C 2E 30 32 34 36 38 3A 3C 3E)
|
||||||
|
|
||||||
|
movdqa xmmH,xmmB
|
||||||
|
punpcklbw xmmB,xmmG ; xmmB=(01 03 05 07 09 0B 0D 0F 11 13 15 17 19 1B 1D 1F)
|
||||||
|
punpckhbw xmmH,xmmG ; xmmH=(21 23 25 27 29 2B 2D 2F 31 33 35 37 39 3B 3D 3F)
|
||||||
|
|
||||||
|
pxor xmmF,xmmF
|
||||||
|
|
||||||
|
movdqa xmmC,xmmA
|
||||||
|
punpcklbw xmmA,xmmF ; xmmA=(00 02 04 06 08 0A 0C 0E)
|
||||||
|
punpckhbw xmmC,xmmF ; xmmC=(10 12 14 16 18 1A 1C 1E)
|
||||||
|
|
||||||
|
movdqa xmmD,xmmB
|
||||||
|
punpcklbw xmmB,xmmF ; xmmB=(01 03 05 07 09 0B 0D 0F)
|
||||||
|
punpckhbw xmmD,xmmF ; xmmD=(11 13 15 17 19 1B 1D 1F)
|
||||||
|
|
||||||
|
movdqa xmmG,xmmE
|
||||||
|
punpcklbw xmmE,xmmF ; xmmE=(20 22 24 26 28 2A 2C 2E)
|
||||||
|
punpckhbw xmmG,xmmF ; xmmG=(30 32 34 36 38 3A 3C 3E)
|
||||||
|
|
||||||
|
punpcklbw xmmF,xmmH
|
||||||
|
punpckhbw xmmH,xmmH
|
||||||
|
psrlw xmmF,BYTE_BIT ; xmmF=(21 23 25 27 29 2B 2D 2F)
|
||||||
|
psrlw xmmH,BYTE_BIT ; xmmH=(31 33 35 37 39 3B 3D 3F)
|
||||||
|
|
||||||
|
%endif ; RGB_PIXELSIZE ; ---------------
|
||||||
|
|
||||||
|
; xmm0=R(02468ACE)=RE, xmm2=G(02468ACE)=GE, xmm4=B(02468ACE)=BE
|
||||||
|
; xmm1=R(13579BDF)=RO, xmm3=G(13579BDF)=GO, xmm5=B(13579BDF)=BO
|
||||||
|
|
||||||
|
; (Original)
|
||||||
|
; Y = 0.29900 * R + 0.58700 * G + 0.11400 * B
|
||||||
|
; Cb = -0.16874 * R - 0.33126 * G + 0.50000 * B + CENTERJSAMPLE
|
||||||
|
; Cr = 0.50000 * R - 0.41869 * G - 0.08131 * B + CENTERJSAMPLE
|
||||||
|
;
|
||||||
|
; (This implementation)
|
||||||
|
; Y = 0.29900 * R + 0.33700 * G + 0.11400 * B + 0.25000 * G
|
||||||
|
; Cb = -0.16874 * R - 0.33126 * G + 0.50000 * B + CENTERJSAMPLE
|
||||||
|
; Cr = 0.50000 * R - 0.41869 * G - 0.08131 * B + CENTERJSAMPLE
|
||||||
|
|
||||||
|
movdqa XMMWORD [wk(0)], xmm0 ; wk(0)=RE
|
||||||
|
movdqa XMMWORD [wk(1)], xmm1 ; wk(1)=RO
|
||||||
|
movdqa XMMWORD [wk(2)], xmm4 ; wk(2)=BE
|
||||||
|
movdqa XMMWORD [wk(3)], xmm5 ; wk(3)=BO
|
||||||
|
|
||||||
|
movdqa xmm6,xmm1
|
||||||
|
punpcklwd xmm1,xmm3
|
||||||
|
punpckhwd xmm6,xmm3
|
||||||
|
movdqa xmm7,xmm1
|
||||||
|
movdqa xmm4,xmm6
|
||||||
|
pmaddwd xmm1,[GOTOFF(eax,PW_F0299_F0337)] ; xmm1=ROL*FIX(0.299)+GOL*FIX(0.337)
|
||||||
|
pmaddwd xmm6,[GOTOFF(eax,PW_F0299_F0337)] ; xmm6=ROH*FIX(0.299)+GOH*FIX(0.337)
|
||||||
|
pmaddwd xmm7,[GOTOFF(eax,PW_MF016_MF033)] ; xmm7=ROL*-FIX(0.168)+GOL*-FIX(0.331)
|
||||||
|
pmaddwd xmm4,[GOTOFF(eax,PW_MF016_MF033)] ; xmm4=ROH*-FIX(0.168)+GOH*-FIX(0.331)
|
||||||
|
|
||||||
|
movdqa XMMWORD [wk(4)], xmm1 ; wk(4)=ROL*FIX(0.299)+GOL*FIX(0.337)
|
||||||
|
movdqa XMMWORD [wk(5)], xmm6 ; wk(5)=ROH*FIX(0.299)+GOH*FIX(0.337)
|
||||||
|
|
||||||
|
pxor xmm1,xmm1
|
||||||
|
pxor xmm6,xmm6
|
||||||
|
punpcklwd xmm1,xmm5 ; xmm1=BOL
|
||||||
|
punpckhwd xmm6,xmm5 ; xmm6=BOH
|
||||||
|
psrld xmm1,1 ; xmm1=BOL*FIX(0.500)
|
||||||
|
psrld xmm6,1 ; xmm6=BOH*FIX(0.500)
|
||||||
|
|
||||||
|
movdqa xmm5,[GOTOFF(eax,PD_ONEHALFM1_CJ)] ; xmm5=[PD_ONEHALFM1_CJ]
|
||||||
|
|
||||||
|
paddd xmm7,xmm1
|
||||||
|
paddd xmm4,xmm6
|
||||||
|
paddd xmm7,xmm5
|
||||||
|
paddd xmm4,xmm5
|
||||||
|
psrld xmm7,SCALEBITS ; xmm7=CbOL
|
||||||
|
psrld xmm4,SCALEBITS ; xmm4=CbOH
|
||||||
|
packssdw xmm7,xmm4 ; xmm7=CbO
|
||||||
|
|
||||||
|
movdqa xmm1, XMMWORD [wk(2)] ; xmm1=BE
|
||||||
|
|
||||||
|
movdqa xmm6,xmm0
|
||||||
|
punpcklwd xmm0,xmm2
|
||||||
|
punpckhwd xmm6,xmm2
|
||||||
|
movdqa xmm5,xmm0
|
||||||
|
movdqa xmm4,xmm6
|
||||||
|
pmaddwd xmm0,[GOTOFF(eax,PW_F0299_F0337)] ; xmm0=REL*FIX(0.299)+GEL*FIX(0.337)
|
||||||
|
pmaddwd xmm6,[GOTOFF(eax,PW_F0299_F0337)] ; xmm6=REH*FIX(0.299)+GEH*FIX(0.337)
|
||||||
|
pmaddwd xmm5,[GOTOFF(eax,PW_MF016_MF033)] ; xmm5=REL*-FIX(0.168)+GEL*-FIX(0.331)
|
||||||
|
pmaddwd xmm4,[GOTOFF(eax,PW_MF016_MF033)] ; xmm4=REH*-FIX(0.168)+GEH*-FIX(0.331)
|
||||||
|
|
||||||
|
movdqa XMMWORD [wk(6)], xmm0 ; wk(6)=REL*FIX(0.299)+GEL*FIX(0.337)
|
||||||
|
movdqa XMMWORD [wk(7)], xmm6 ; wk(7)=REH*FIX(0.299)+GEH*FIX(0.337)
|
||||||
|
|
||||||
|
pxor xmm0,xmm0
|
||||||
|
pxor xmm6,xmm6
|
||||||
|
punpcklwd xmm0,xmm1 ; xmm0=BEL
|
||||||
|
punpckhwd xmm6,xmm1 ; xmm6=BEH
|
||||||
|
psrld xmm0,1 ; xmm0=BEL*FIX(0.500)
|
||||||
|
psrld xmm6,1 ; xmm6=BEH*FIX(0.500)
|
||||||
|
|
||||||
|
movdqa xmm1,[GOTOFF(eax,PD_ONEHALFM1_CJ)] ; xmm1=[PD_ONEHALFM1_CJ]
|
||||||
|
|
||||||
|
paddd xmm5,xmm0
|
||||||
|
paddd xmm4,xmm6
|
||||||
|
paddd xmm5,xmm1
|
||||||
|
paddd xmm4,xmm1
|
||||||
|
psrld xmm5,SCALEBITS ; xmm5=CbEL
|
||||||
|
psrld xmm4,SCALEBITS ; xmm4=CbEH
|
||||||
|
packssdw xmm5,xmm4 ; xmm5=CbE
|
||||||
|
|
||||||
|
psllw xmm7,BYTE_BIT
|
||||||
|
por xmm5,xmm7 ; xmm5=Cb
|
||||||
|
movdqa XMMWORD [ebx], xmm5 ; Save Cb
|
||||||
|
|
||||||
|
movdqa xmm0, XMMWORD [wk(3)] ; xmm0=BO
|
||||||
|
movdqa xmm6, XMMWORD [wk(2)] ; xmm6=BE
|
||||||
|
movdqa xmm1, XMMWORD [wk(1)] ; xmm1=RO
|
||||||
|
|
||||||
|
movdqa xmm4,xmm0
|
||||||
|
punpcklwd xmm0,xmm3
|
||||||
|
punpckhwd xmm4,xmm3
|
||||||
|
movdqa xmm7,xmm0
|
||||||
|
movdqa xmm5,xmm4
|
||||||
|
pmaddwd xmm0,[GOTOFF(eax,PW_F0114_F0250)] ; xmm0=BOL*FIX(0.114)+GOL*FIX(0.250)
|
||||||
|
pmaddwd xmm4,[GOTOFF(eax,PW_F0114_F0250)] ; xmm4=BOH*FIX(0.114)+GOH*FIX(0.250)
|
||||||
|
pmaddwd xmm7,[GOTOFF(eax,PW_MF008_MF041)] ; xmm7=BOL*-FIX(0.081)+GOL*-FIX(0.418)
|
||||||
|
pmaddwd xmm5,[GOTOFF(eax,PW_MF008_MF041)] ; xmm5=BOH*-FIX(0.081)+GOH*-FIX(0.418)
|
||||||
|
|
||||||
|
movdqa xmm3,[GOTOFF(eax,PD_ONEHALF)] ; xmm3=[PD_ONEHALF]
|
||||||
|
|
||||||
|
paddd xmm0, XMMWORD [wk(4)]
|
||||||
|
paddd xmm4, XMMWORD [wk(5)]
|
||||||
|
paddd xmm0,xmm3
|
||||||
|
paddd xmm4,xmm3
|
||||||
|
psrld xmm0,SCALEBITS ; xmm0=YOL
|
||||||
|
psrld xmm4,SCALEBITS ; xmm4=YOH
|
||||||
|
packssdw xmm0,xmm4 ; xmm0=YO
|
||||||
|
|
||||||
|
pxor xmm3,xmm3
|
||||||
|
pxor xmm4,xmm4
|
||||||
|
punpcklwd xmm3,xmm1 ; xmm3=ROL
|
||||||
|
punpckhwd xmm4,xmm1 ; xmm4=ROH
|
||||||
|
psrld xmm3,1 ; xmm3=ROL*FIX(0.500)
|
||||||
|
psrld xmm4,1 ; xmm4=ROH*FIX(0.500)
|
||||||
|
|
||||||
|
movdqa xmm1,[GOTOFF(eax,PD_ONEHALFM1_CJ)] ; xmm1=[PD_ONEHALFM1_CJ]
|
||||||
|
|
||||||
|
paddd xmm7,xmm3
|
||||||
|
paddd xmm5,xmm4
|
||||||
|
paddd xmm7,xmm1
|
||||||
|
paddd xmm5,xmm1
|
||||||
|
psrld xmm7,SCALEBITS ; xmm7=CrOL
|
||||||
|
psrld xmm5,SCALEBITS ; xmm5=CrOH
|
||||||
|
packssdw xmm7,xmm5 ; xmm7=CrO
|
||||||
|
|
||||||
|
movdqa xmm3, XMMWORD [wk(0)] ; xmm3=RE
|
||||||
|
|
||||||
|
movdqa xmm4,xmm6
|
||||||
|
punpcklwd xmm6,xmm2
|
||||||
|
punpckhwd xmm4,xmm2
|
||||||
|
movdqa xmm1,xmm6
|
||||||
|
movdqa xmm5,xmm4
|
||||||
|
pmaddwd xmm6,[GOTOFF(eax,PW_F0114_F0250)] ; xmm6=BEL*FIX(0.114)+GEL*FIX(0.250)
|
||||||
|
pmaddwd xmm4,[GOTOFF(eax,PW_F0114_F0250)] ; xmm4=BEH*FIX(0.114)+GEH*FIX(0.250)
|
||||||
|
pmaddwd xmm1,[GOTOFF(eax,PW_MF008_MF041)] ; xmm1=BEL*-FIX(0.081)+GEL*-FIX(0.418)
|
||||||
|
pmaddwd xmm5,[GOTOFF(eax,PW_MF008_MF041)] ; xmm5=BEH*-FIX(0.081)+GEH*-FIX(0.418)
|
||||||
|
|
||||||
|
movdqa xmm2,[GOTOFF(eax,PD_ONEHALF)] ; xmm2=[PD_ONEHALF]
|
||||||
|
|
||||||
|
paddd xmm6, XMMWORD [wk(6)]
|
||||||
|
paddd xmm4, XMMWORD [wk(7)]
|
||||||
|
paddd xmm6,xmm2
|
||||||
|
paddd xmm4,xmm2
|
||||||
|
psrld xmm6,SCALEBITS ; xmm6=YEL
|
||||||
|
psrld xmm4,SCALEBITS ; xmm4=YEH
|
||||||
|
packssdw xmm6,xmm4 ; xmm6=YE
|
||||||
|
|
||||||
|
psllw xmm0,BYTE_BIT
|
||||||
|
por xmm6,xmm0 ; xmm6=Y
|
||||||
|
movdqa XMMWORD [edi], xmm6 ; Save Y
|
||||||
|
|
||||||
|
pxor xmm2,xmm2
|
||||||
|
pxor xmm4,xmm4
|
||||||
|
punpcklwd xmm2,xmm3 ; xmm2=REL
|
||||||
|
punpckhwd xmm4,xmm3 ; xmm4=REH
|
||||||
|
psrld xmm2,1 ; xmm2=REL*FIX(0.500)
|
||||||
|
psrld xmm4,1 ; xmm4=REH*FIX(0.500)
|
||||||
|
|
||||||
|
movdqa xmm0,[GOTOFF(eax,PD_ONEHALFM1_CJ)] ; xmm0=[PD_ONEHALFM1_CJ]
|
||||||
|
|
||||||
|
paddd xmm1,xmm2
|
||||||
|
paddd xmm5,xmm4
|
||||||
|
paddd xmm1,xmm0
|
||||||
|
paddd xmm5,xmm0
|
||||||
|
psrld xmm1,SCALEBITS ; xmm1=CrEL
|
||||||
|
psrld xmm5,SCALEBITS ; xmm5=CrEH
|
||||||
|
packssdw xmm1,xmm5 ; xmm1=CrE
|
||||||
|
|
||||||
|
psllw xmm7,BYTE_BIT
|
||||||
|
por xmm1,xmm7 ; xmm1=Cr
|
||||||
|
movdqa XMMWORD [edx], xmm1 ; Save Cr
|
||||||
|
|
||||||
|
sub ecx, byte SIZEOF_XMMWORD
|
||||||
|
add esi, byte RGB_PIXELSIZE*SIZEOF_XMMWORD ; inptr
|
||||||
|
add edi, byte SIZEOF_XMMWORD ; outptr0
|
||||||
|
add ebx, byte SIZEOF_XMMWORD ; outptr1
|
||||||
|
add edx, byte SIZEOF_XMMWORD ; outptr2
|
||||||
|
cmp ecx, byte SIZEOF_XMMWORD
|
||||||
|
jae near .columnloop
|
||||||
|
test ecx,ecx
|
||||||
|
jnz near .column_ld1
|
||||||
|
|
||||||
|
pop ecx ; col
|
||||||
|
pop esi
|
||||||
|
pop edi
|
||||||
|
pop ebx
|
||||||
|
pop edx
|
||||||
|
poppic eax
|
||||||
|
|
||||||
|
add esi, byte SIZEOF_JSAMPROW ; input_buf
|
||||||
|
add edi, byte SIZEOF_JSAMPROW
|
||||||
|
add ebx, byte SIZEOF_JSAMPROW
|
||||||
|
add edx, byte SIZEOF_JSAMPROW
|
||||||
|
dec eax ; num_rows
|
||||||
|
jg near .rowloop
|
||||||
|
|
||||||
|
.return:
|
||||||
|
pop edi
|
||||||
|
pop esi
|
||||||
|
; pop edx ; need not be preserved
|
||||||
|
; pop ecx ; need not be preserved
|
||||||
|
pop ebx
|
||||||
|
mov esp,ebp ; esp <- aligned ebp
|
||||||
|
pop esp ; esp <- original ebp
|
||||||
|
pop ebp
|
||||||
|
ret
|
||||||
|
|
||||||
@@ -2,6 +2,7 @@
|
|||||||
; jccolmmx.asm - colorspace conversion (MMX)
|
; jccolmmx.asm - colorspace conversion (MMX)
|
||||||
;
|
;
|
||||||
; Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
|
; Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
|
||||||
|
; Copyright 2009 D. R. Commander
|
||||||
;
|
;
|
||||||
; Based on
|
; Based on
|
||||||
; x86 SIMD extension for IJG JPEG library
|
; x86 SIMD extension for IJG JPEG library
|
||||||
@@ -51,458 +52,70 @@ PD_ONEHALF times 2 dd (1 << (SCALEBITS-1))
|
|||||||
alignz 16
|
alignz 16
|
||||||
|
|
||||||
; --------------------------------------------------------------------------
|
; --------------------------------------------------------------------------
|
||||||
SECTION SEG_TEXT
|
%include "jcclrmmx.asm"
|
||||||
BITS 32
|
|
||||||
;
|
%undef RGB_RED
|
||||||
; Convert some rows of samples to the output colorspace.
|
%undef RGB_GREEN
|
||||||
;
|
%undef RGB_BLUE
|
||||||
; GLOBAL(void)
|
%undef RGB_PIXELSIZE
|
||||||
; jsimd_rgb_ycc_convert_mmx (JDIMENSION img_width,
|
%define RGB_RED 0
|
||||||
; JSAMPARRAY input_buf, JSAMPIMAGE output_buf,
|
%define RGB_GREEN 1
|
||||||
; JDIMENSION output_row, int num_rows);
|
%define RGB_BLUE 2
|
||||||
;
|
%define RGB_PIXELSIZE 3
|
||||||
|
%define jsimd_rgb_ycc_convert_mmx jsimd_extrgb_ycc_convert_mmx
|
||||||
%define img_width(b) (b)+8 ; JDIMENSION img_width
|
%include "jcclrmmx.asm"
|
||||||
%define input_buf(b) (b)+12 ; JSAMPARRAY input_buf
|
|
||||||
%define output_buf(b) (b)+16 ; JSAMPIMAGE output_buf
|
%undef RGB_RED
|
||||||
%define output_row(b) (b)+20 ; JDIMENSION output_row
|
%undef RGB_GREEN
|
||||||
%define num_rows(b) (b)+24 ; int num_rows
|
%undef RGB_BLUE
|
||||||
|
%undef RGB_PIXELSIZE
|
||||||
%define original_ebp ebp+0
|
%define RGB_RED 0
|
||||||
%define wk(i) ebp-(WK_NUM-(i))*SIZEOF_MMWORD ; mmword wk[WK_NUM]
|
%define RGB_GREEN 1
|
||||||
%define WK_NUM 8
|
%define RGB_BLUE 2
|
||||||
%define gotptr wk(0)-SIZEOF_POINTER ; void * gotptr
|
%define RGB_PIXELSIZE 4
|
||||||
|
%define jsimd_rgb_ycc_convert_mmx jsimd_extrgbx_ycc_convert_mmx
|
||||||
align 16
|
%include "jcclrmmx.asm"
|
||||||
global EXTN(jsimd_rgb_ycc_convert_mmx)
|
|
||||||
|
%undef RGB_RED
|
||||||
EXTN(jsimd_rgb_ycc_convert_mmx):
|
%undef RGB_GREEN
|
||||||
push ebp
|
%undef RGB_BLUE
|
||||||
mov eax,esp ; eax = original ebp
|
%undef RGB_PIXELSIZE
|
||||||
sub esp, byte 4
|
%define RGB_RED 2
|
||||||
and esp, byte (-SIZEOF_MMWORD) ; align to 64 bits
|
%define RGB_GREEN 1
|
||||||
mov [esp],eax
|
%define RGB_BLUE 0
|
||||||
mov ebp,esp ; ebp = aligned ebp
|
%define RGB_PIXELSIZE 3
|
||||||
lea esp, [wk(0)]
|
%define jsimd_rgb_ycc_convert_mmx jsimd_extbgr_ycc_convert_mmx
|
||||||
pushpic eax ; make a room for GOT address
|
%include "jcclrmmx.asm"
|
||||||
push ebx
|
|
||||||
; push ecx ; need not be preserved
|
%undef RGB_RED
|
||||||
; push edx ; need not be preserved
|
%undef RGB_GREEN
|
||||||
push esi
|
%undef RGB_BLUE
|
||||||
push edi
|
%undef RGB_PIXELSIZE
|
||||||
|
%define RGB_RED 2
|
||||||
get_GOT ebx ; get GOT address
|
%define RGB_GREEN 1
|
||||||
movpic POINTER [gotptr], ebx ; save GOT address
|
%define RGB_BLUE 0
|
||||||
|
%define RGB_PIXELSIZE 4
|
||||||
mov ecx, JDIMENSION [img_width(eax)] ; num_cols
|
%define jsimd_rgb_ycc_convert_mmx jsimd_extbgrx_ycc_convert_mmx
|
||||||
test ecx,ecx
|
%include "jcclrmmx.asm"
|
||||||
jz near .return
|
|
||||||
|
%undef RGB_RED
|
||||||
push ecx
|
%undef RGB_GREEN
|
||||||
|
%undef RGB_BLUE
|
||||||
mov esi, JSAMPIMAGE [output_buf(eax)]
|
%undef RGB_PIXELSIZE
|
||||||
mov ecx, JDIMENSION [output_row(eax)]
|
%define RGB_RED 3
|
||||||
mov edi, JSAMPARRAY [esi+0*SIZEOF_JSAMPARRAY]
|
%define RGB_GREEN 2
|
||||||
mov ebx, JSAMPARRAY [esi+1*SIZEOF_JSAMPARRAY]
|
%define RGB_BLUE 1
|
||||||
mov edx, JSAMPARRAY [esi+2*SIZEOF_JSAMPARRAY]
|
%define RGB_PIXELSIZE 4
|
||||||
lea edi, [edi+ecx*SIZEOF_JSAMPROW]
|
%define jsimd_rgb_ycc_convert_mmx jsimd_extxbgr_ycc_convert_mmx
|
||||||
lea ebx, [ebx+ecx*SIZEOF_JSAMPROW]
|
%include "jcclrmmx.asm"
|
||||||
lea edx, [edx+ecx*SIZEOF_JSAMPROW]
|
|
||||||
|
%undef RGB_RED
|
||||||
pop ecx
|
%undef RGB_GREEN
|
||||||
|
%undef RGB_BLUE
|
||||||
mov esi, JSAMPARRAY [input_buf(eax)]
|
%undef RGB_PIXELSIZE
|
||||||
mov eax, INT [num_rows(eax)]
|
%define RGB_RED 1
|
||||||
test eax,eax
|
%define RGB_GREEN 2
|
||||||
jle near .return
|
%define RGB_BLUE 3
|
||||||
alignx 16,7
|
%define RGB_PIXELSIZE 4
|
||||||
.rowloop:
|
%define jsimd_rgb_ycc_convert_mmx jsimd_extxrgb_ycc_convert_mmx
|
||||||
pushpic eax
|
%include "jcclrmmx.asm"
|
||||||
push edx
|
|
||||||
push ebx
|
|
||||||
push edi
|
|
||||||
push esi
|
|
||||||
push ecx ; col
|
|
||||||
|
|
||||||
mov esi, JSAMPROW [esi] ; inptr
|
|
||||||
mov edi, JSAMPROW [edi] ; outptr0
|
|
||||||
mov ebx, JSAMPROW [ebx] ; outptr1
|
|
||||||
mov edx, JSAMPROW [edx] ; outptr2
|
|
||||||
movpic eax, POINTER [gotptr] ; load GOT address (eax)
|
|
||||||
|
|
||||||
cmp ecx, byte SIZEOF_MMWORD
|
|
||||||
jae short .columnloop
|
|
||||||
alignx 16,7
|
|
||||||
|
|
||||||
%if RGB_PIXELSIZE == 3 ; ---------------
|
|
||||||
|
|
||||||
.column_ld1:
|
|
||||||
push eax
|
|
||||||
push edx
|
|
||||||
lea ecx,[ecx+ecx*2] ; imul ecx,RGB_PIXELSIZE
|
|
||||||
test cl, SIZEOF_BYTE
|
|
||||||
jz short .column_ld2
|
|
||||||
sub ecx, byte SIZEOF_BYTE
|
|
||||||
xor eax,eax
|
|
||||||
mov al, BYTE [esi+ecx]
|
|
||||||
.column_ld2:
|
|
||||||
test cl, SIZEOF_WORD
|
|
||||||
jz short .column_ld4
|
|
||||||
sub ecx, byte SIZEOF_WORD
|
|
||||||
xor edx,edx
|
|
||||||
mov dx, WORD [esi+ecx]
|
|
||||||
shl eax, WORD_BIT
|
|
||||||
or eax,edx
|
|
||||||
.column_ld4:
|
|
||||||
movd mmA,eax
|
|
||||||
pop edx
|
|
||||||
pop eax
|
|
||||||
test cl, SIZEOF_DWORD
|
|
||||||
jz short .column_ld8
|
|
||||||
sub ecx, byte SIZEOF_DWORD
|
|
||||||
movd mmG, DWORD [esi+ecx]
|
|
||||||
psllq mmA, DWORD_BIT
|
|
||||||
por mmA,mmG
|
|
||||||
.column_ld8:
|
|
||||||
test cl, SIZEOF_MMWORD
|
|
||||||
jz short .column_ld16
|
|
||||||
movq mmG,mmA
|
|
||||||
movq mmA, MMWORD [esi+0*SIZEOF_MMWORD]
|
|
||||||
mov ecx, SIZEOF_MMWORD
|
|
||||||
jmp short .rgb_ycc_cnv
|
|
||||||
.column_ld16:
|
|
||||||
test cl, 2*SIZEOF_MMWORD
|
|
||||||
mov ecx, SIZEOF_MMWORD
|
|
||||||
jz short .rgb_ycc_cnv
|
|
||||||
movq mmF,mmA
|
|
||||||
movq mmA, MMWORD [esi+0*SIZEOF_MMWORD]
|
|
||||||
movq mmG, MMWORD [esi+1*SIZEOF_MMWORD]
|
|
||||||
jmp short .rgb_ycc_cnv
|
|
||||||
alignx 16,7
|
|
||||||
|
|
||||||
.columnloop:
|
|
||||||
movq mmA, MMWORD [esi+0*SIZEOF_MMWORD]
|
|
||||||
movq mmG, MMWORD [esi+1*SIZEOF_MMWORD]
|
|
||||||
movq mmF, MMWORD [esi+2*SIZEOF_MMWORD]
|
|
||||||
|
|
||||||
.rgb_ycc_cnv:
|
|
||||||
; mmA=(00 10 20 01 11 21 02 12)
|
|
||||||
; mmG=(22 03 13 23 04 14 24 05)
|
|
||||||
; mmF=(15 25 06 16 26 07 17 27)
|
|
||||||
|
|
||||||
movq mmD,mmA
|
|
||||||
psllq mmA,4*BYTE_BIT ; mmA=(-- -- -- -- 00 10 20 01)
|
|
||||||
psrlq mmD,4*BYTE_BIT ; mmD=(11 21 02 12 -- -- -- --)
|
|
||||||
|
|
||||||
punpckhbw mmA,mmG ; mmA=(00 04 10 14 20 24 01 05)
|
|
||||||
psllq mmG,4*BYTE_BIT ; mmG=(-- -- -- -- 22 03 13 23)
|
|
||||||
|
|
||||||
punpcklbw mmD,mmF ; mmD=(11 15 21 25 02 06 12 16)
|
|
||||||
punpckhbw mmG,mmF ; mmG=(22 26 03 07 13 17 23 27)
|
|
||||||
|
|
||||||
movq mmE,mmA
|
|
||||||
psllq mmA,4*BYTE_BIT ; mmA=(-- -- -- -- 00 04 10 14)
|
|
||||||
psrlq mmE,4*BYTE_BIT ; mmE=(20 24 01 05 -- -- -- --)
|
|
||||||
|
|
||||||
punpckhbw mmA,mmD ; mmA=(00 02 04 06 10 12 14 16)
|
|
||||||
psllq mmD,4*BYTE_BIT ; mmD=(-- -- -- -- 11 15 21 25)
|
|
||||||
|
|
||||||
punpcklbw mmE,mmG ; mmE=(20 22 24 26 01 03 05 07)
|
|
||||||
punpckhbw mmD,mmG ; mmD=(11 13 15 17 21 23 25 27)
|
|
||||||
|
|
||||||
pxor mmH,mmH
|
|
||||||
|
|
||||||
movq mmC,mmA
|
|
||||||
punpcklbw mmA,mmH ; mmA=(00 02 04 06)
|
|
||||||
punpckhbw mmC,mmH ; mmC=(10 12 14 16)
|
|
||||||
|
|
||||||
movq mmB,mmE
|
|
||||||
punpcklbw mmE,mmH ; mmE=(20 22 24 26)
|
|
||||||
punpckhbw mmB,mmH ; mmB=(01 03 05 07)
|
|
||||||
|
|
||||||
movq mmF,mmD
|
|
||||||
punpcklbw mmD,mmH ; mmD=(11 13 15 17)
|
|
||||||
punpckhbw mmF,mmH ; mmF=(21 23 25 27)
|
|
||||||
|
|
||||||
%else ; RGB_PIXELSIZE == 4 ; -----------
|
|
||||||
|
|
||||||
.column_ld1:
|
|
||||||
test cl, SIZEOF_MMWORD/8
|
|
||||||
jz short .column_ld2
|
|
||||||
sub ecx, byte SIZEOF_MMWORD/8
|
|
||||||
movd mmA, DWORD [esi+ecx*RGB_PIXELSIZE]
|
|
||||||
.column_ld2:
|
|
||||||
test cl, SIZEOF_MMWORD/4
|
|
||||||
jz short .column_ld4
|
|
||||||
sub ecx, byte SIZEOF_MMWORD/4
|
|
||||||
movq mmF,mmA
|
|
||||||
movq mmA, MMWORD [esi+ecx*RGB_PIXELSIZE]
|
|
||||||
.column_ld4:
|
|
||||||
test cl, SIZEOF_MMWORD/2
|
|
||||||
mov ecx, SIZEOF_MMWORD
|
|
||||||
jz short .rgb_ycc_cnv
|
|
||||||
movq mmD,mmA
|
|
||||||
movq mmC,mmF
|
|
||||||
movq mmA, MMWORD [esi+0*SIZEOF_MMWORD]
|
|
||||||
movq mmF, MMWORD [esi+1*SIZEOF_MMWORD]
|
|
||||||
jmp short .rgb_ycc_cnv
|
|
||||||
alignx 16,7
|
|
||||||
|
|
||||||
.columnloop:
|
|
||||||
movq mmA, MMWORD [esi+0*SIZEOF_MMWORD]
|
|
||||||
movq mmF, MMWORD [esi+1*SIZEOF_MMWORD]
|
|
||||||
movq mmD, MMWORD [esi+2*SIZEOF_MMWORD]
|
|
||||||
movq mmC, MMWORD [esi+3*SIZEOF_MMWORD]
|
|
||||||
|
|
||||||
.rgb_ycc_cnv:
|
|
||||||
; mmA=(00 10 20 30 01 11 21 31)
|
|
||||||
; mmF=(02 12 22 32 03 13 23 33)
|
|
||||||
; mmD=(04 14 24 34 05 15 25 35)
|
|
||||||
; mmC=(06 16 26 36 07 17 27 37)
|
|
||||||
|
|
||||||
movq mmB,mmA
|
|
||||||
punpcklbw mmA,mmF ; mmA=(00 02 10 12 20 22 30 32)
|
|
||||||
punpckhbw mmB,mmF ; mmB=(01 03 11 13 21 23 31 33)
|
|
||||||
|
|
||||||
movq mmG,mmD
|
|
||||||
punpcklbw mmD,mmC ; mmD=(04 06 14 16 24 26 34 36)
|
|
||||||
punpckhbw mmG,mmC ; mmG=(05 07 15 17 25 27 35 37)
|
|
||||||
|
|
||||||
movq mmE,mmA
|
|
||||||
punpcklwd mmA,mmD ; mmA=(00 02 04 06 10 12 14 16)
|
|
||||||
punpckhwd mmE,mmD ; mmE=(20 22 24 26 30 32 34 36)
|
|
||||||
|
|
||||||
movq mmH,mmB
|
|
||||||
punpcklwd mmB,mmG ; mmB=(01 03 05 07 11 13 15 17)
|
|
||||||
punpckhwd mmH,mmG ; mmH=(21 23 25 27 31 33 35 37)
|
|
||||||
|
|
||||||
pxor mmF,mmF
|
|
||||||
|
|
||||||
movq mmC,mmA
|
|
||||||
punpcklbw mmA,mmF ; mmA=(00 02 04 06)
|
|
||||||
punpckhbw mmC,mmF ; mmC=(10 12 14 16)
|
|
||||||
|
|
||||||
movq mmD,mmB
|
|
||||||
punpcklbw mmB,mmF ; mmB=(01 03 05 07)
|
|
||||||
punpckhbw mmD,mmF ; mmD=(11 13 15 17)
|
|
||||||
|
|
||||||
movq mmG,mmE
|
|
||||||
punpcklbw mmE,mmF ; mmE=(20 22 24 26)
|
|
||||||
punpckhbw mmG,mmF ; mmG=(30 32 34 36)
|
|
||||||
|
|
||||||
punpcklbw mmF,mmH
|
|
||||||
punpckhbw mmH,mmH
|
|
||||||
psrlw mmF,BYTE_BIT ; mmF=(21 23 25 27)
|
|
||||||
psrlw mmH,BYTE_BIT ; mmH=(31 33 35 37)
|
|
||||||
|
|
||||||
%endif ; RGB_PIXELSIZE ; ---------------
|
|
||||||
|
|
||||||
; mm0=(R0 R2 R4 R6)=RE, mm2=(G0 G2 G4 G6)=GE, mm4=(B0 B2 B4 B6)=BE
|
|
||||||
; mm1=(R1 R3 R5 R7)=RO, mm3=(G1 G3 G5 G7)=GO, mm5=(B1 B3 B5 B7)=BO
|
|
||||||
|
|
||||||
; (Original)
|
|
||||||
; Y = 0.29900 * R + 0.58700 * G + 0.11400 * B
|
|
||||||
; Cb = -0.16874 * R - 0.33126 * G + 0.50000 * B + CENTERJSAMPLE
|
|
||||||
; Cr = 0.50000 * R - 0.41869 * G - 0.08131 * B + CENTERJSAMPLE
|
|
||||||
;
|
|
||||||
; (This implementation)
|
|
||||||
; Y = 0.29900 * R + 0.33700 * G + 0.11400 * B + 0.25000 * G
|
|
||||||
; Cb = -0.16874 * R - 0.33126 * G + 0.50000 * B + CENTERJSAMPLE
|
|
||||||
; Cr = 0.50000 * R - 0.41869 * G - 0.08131 * B + CENTERJSAMPLE
|
|
||||||
|
|
||||||
movq MMWORD [wk(0)], mm0 ; wk(0)=RE
|
|
||||||
movq MMWORD [wk(1)], mm1 ; wk(1)=RO
|
|
||||||
movq MMWORD [wk(2)], mm4 ; wk(2)=BE
|
|
||||||
movq MMWORD [wk(3)], mm5 ; wk(3)=BO
|
|
||||||
|
|
||||||
movq mm6,mm1
|
|
||||||
punpcklwd mm1,mm3
|
|
||||||
punpckhwd mm6,mm3
|
|
||||||
movq mm7,mm1
|
|
||||||
movq mm4,mm6
|
|
||||||
pmaddwd mm1,[GOTOFF(eax,PW_F0299_F0337)] ; mm1=ROL*FIX(0.299)+GOL*FIX(0.337)
|
|
||||||
pmaddwd mm6,[GOTOFF(eax,PW_F0299_F0337)] ; mm6=ROH*FIX(0.299)+GOH*FIX(0.337)
|
|
||||||
pmaddwd mm7,[GOTOFF(eax,PW_MF016_MF033)] ; mm7=ROL*-FIX(0.168)+GOL*-FIX(0.331)
|
|
||||||
pmaddwd mm4,[GOTOFF(eax,PW_MF016_MF033)] ; mm4=ROH*-FIX(0.168)+GOH*-FIX(0.331)
|
|
||||||
|
|
||||||
movq MMWORD [wk(4)], mm1 ; wk(4)=ROL*FIX(0.299)+GOL*FIX(0.337)
|
|
||||||
movq MMWORD [wk(5)], mm6 ; wk(5)=ROH*FIX(0.299)+GOH*FIX(0.337)
|
|
||||||
|
|
||||||
pxor mm1,mm1
|
|
||||||
pxor mm6,mm6
|
|
||||||
punpcklwd mm1,mm5 ; mm1=BOL
|
|
||||||
punpckhwd mm6,mm5 ; mm6=BOH
|
|
||||||
psrld mm1,1 ; mm1=BOL*FIX(0.500)
|
|
||||||
psrld mm6,1 ; mm6=BOH*FIX(0.500)
|
|
||||||
|
|
||||||
movq mm5,[GOTOFF(eax,PD_ONEHALFM1_CJ)] ; mm5=[PD_ONEHALFM1_CJ]
|
|
||||||
|
|
||||||
paddd mm7,mm1
|
|
||||||
paddd mm4,mm6
|
|
||||||
paddd mm7,mm5
|
|
||||||
paddd mm4,mm5
|
|
||||||
psrld mm7,SCALEBITS ; mm7=CbOL
|
|
||||||
psrld mm4,SCALEBITS ; mm4=CbOH
|
|
||||||
packssdw mm7,mm4 ; mm7=CbO
|
|
||||||
|
|
||||||
movq mm1, MMWORD [wk(2)] ; mm1=BE
|
|
||||||
|
|
||||||
movq mm6,mm0
|
|
||||||
punpcklwd mm0,mm2
|
|
||||||
punpckhwd mm6,mm2
|
|
||||||
movq mm5,mm0
|
|
||||||
movq mm4,mm6
|
|
||||||
pmaddwd mm0,[GOTOFF(eax,PW_F0299_F0337)] ; mm0=REL*FIX(0.299)+GEL*FIX(0.337)
|
|
||||||
pmaddwd mm6,[GOTOFF(eax,PW_F0299_F0337)] ; mm6=REH*FIX(0.299)+GEH*FIX(0.337)
|
|
||||||
pmaddwd mm5,[GOTOFF(eax,PW_MF016_MF033)] ; mm5=REL*-FIX(0.168)+GEL*-FIX(0.331)
|
|
||||||
pmaddwd mm4,[GOTOFF(eax,PW_MF016_MF033)] ; mm4=REH*-FIX(0.168)+GEH*-FIX(0.331)
|
|
||||||
|
|
||||||
movq MMWORD [wk(6)], mm0 ; wk(6)=REL*FIX(0.299)+GEL*FIX(0.337)
|
|
||||||
movq MMWORD [wk(7)], mm6 ; wk(7)=REH*FIX(0.299)+GEH*FIX(0.337)
|
|
||||||
|
|
||||||
pxor mm0,mm0
|
|
||||||
pxor mm6,mm6
|
|
||||||
punpcklwd mm0,mm1 ; mm0=BEL
|
|
||||||
punpckhwd mm6,mm1 ; mm6=BEH
|
|
||||||
psrld mm0,1 ; mm0=BEL*FIX(0.500)
|
|
||||||
psrld mm6,1 ; mm6=BEH*FIX(0.500)
|
|
||||||
|
|
||||||
movq mm1,[GOTOFF(eax,PD_ONEHALFM1_CJ)] ; mm1=[PD_ONEHALFM1_CJ]
|
|
||||||
|
|
||||||
paddd mm5,mm0
|
|
||||||
paddd mm4,mm6
|
|
||||||
paddd mm5,mm1
|
|
||||||
paddd mm4,mm1
|
|
||||||
psrld mm5,SCALEBITS ; mm5=CbEL
|
|
||||||
psrld mm4,SCALEBITS ; mm4=CbEH
|
|
||||||
packssdw mm5,mm4 ; mm5=CbE
|
|
||||||
|
|
||||||
psllw mm7,BYTE_BIT
|
|
||||||
por mm5,mm7 ; mm5=Cb
|
|
||||||
movq MMWORD [ebx], mm5 ; Save Cb
|
|
||||||
|
|
||||||
movq mm0, MMWORD [wk(3)] ; mm0=BO
|
|
||||||
movq mm6, MMWORD [wk(2)] ; mm6=BE
|
|
||||||
movq mm1, MMWORD [wk(1)] ; mm1=RO
|
|
||||||
|
|
||||||
movq mm4,mm0
|
|
||||||
punpcklwd mm0,mm3
|
|
||||||
punpckhwd mm4,mm3
|
|
||||||
movq mm7,mm0
|
|
||||||
movq mm5,mm4
|
|
||||||
pmaddwd mm0,[GOTOFF(eax,PW_F0114_F0250)] ; mm0=BOL*FIX(0.114)+GOL*FIX(0.250)
|
|
||||||
pmaddwd mm4,[GOTOFF(eax,PW_F0114_F0250)] ; mm4=BOH*FIX(0.114)+GOH*FIX(0.250)
|
|
||||||
pmaddwd mm7,[GOTOFF(eax,PW_MF008_MF041)] ; mm7=BOL*-FIX(0.081)+GOL*-FIX(0.418)
|
|
||||||
pmaddwd mm5,[GOTOFF(eax,PW_MF008_MF041)] ; mm5=BOH*-FIX(0.081)+GOH*-FIX(0.418)
|
|
||||||
|
|
||||||
movq mm3,[GOTOFF(eax,PD_ONEHALF)] ; mm3=[PD_ONEHALF]
|
|
||||||
|
|
||||||
paddd mm0, MMWORD [wk(4)]
|
|
||||||
paddd mm4, MMWORD [wk(5)]
|
|
||||||
paddd mm0,mm3
|
|
||||||
paddd mm4,mm3
|
|
||||||
psrld mm0,SCALEBITS ; mm0=YOL
|
|
||||||
psrld mm4,SCALEBITS ; mm4=YOH
|
|
||||||
packssdw mm0,mm4 ; mm0=YO
|
|
||||||
|
|
||||||
pxor mm3,mm3
|
|
||||||
pxor mm4,mm4
|
|
||||||
punpcklwd mm3,mm1 ; mm3=ROL
|
|
||||||
punpckhwd mm4,mm1 ; mm4=ROH
|
|
||||||
psrld mm3,1 ; mm3=ROL*FIX(0.500)
|
|
||||||
psrld mm4,1 ; mm4=ROH*FIX(0.500)
|
|
||||||
|
|
||||||
movq mm1,[GOTOFF(eax,PD_ONEHALFM1_CJ)] ; mm1=[PD_ONEHALFM1_CJ]
|
|
||||||
|
|
||||||
paddd mm7,mm3
|
|
||||||
paddd mm5,mm4
|
|
||||||
paddd mm7,mm1
|
|
||||||
paddd mm5,mm1
|
|
||||||
psrld mm7,SCALEBITS ; mm7=CrOL
|
|
||||||
psrld mm5,SCALEBITS ; mm5=CrOH
|
|
||||||
packssdw mm7,mm5 ; mm7=CrO
|
|
||||||
|
|
||||||
movq mm3, MMWORD [wk(0)] ; mm3=RE
|
|
||||||
|
|
||||||
movq mm4,mm6
|
|
||||||
punpcklwd mm6,mm2
|
|
||||||
punpckhwd mm4,mm2
|
|
||||||
movq mm1,mm6
|
|
||||||
movq mm5,mm4
|
|
||||||
pmaddwd mm6,[GOTOFF(eax,PW_F0114_F0250)] ; mm6=BEL*FIX(0.114)+GEL*FIX(0.250)
|
|
||||||
pmaddwd mm4,[GOTOFF(eax,PW_F0114_F0250)] ; mm4=BEH*FIX(0.114)+GEH*FIX(0.250)
|
|
||||||
pmaddwd mm1,[GOTOFF(eax,PW_MF008_MF041)] ; mm1=BEL*-FIX(0.081)+GEL*-FIX(0.418)
|
|
||||||
pmaddwd mm5,[GOTOFF(eax,PW_MF008_MF041)] ; mm5=BEH*-FIX(0.081)+GEH*-FIX(0.418)
|
|
||||||
|
|
||||||
movq mm2,[GOTOFF(eax,PD_ONEHALF)] ; mm2=[PD_ONEHALF]
|
|
||||||
|
|
||||||
paddd mm6, MMWORD [wk(6)]
|
|
||||||
paddd mm4, MMWORD [wk(7)]
|
|
||||||
paddd mm6,mm2
|
|
||||||
paddd mm4,mm2
|
|
||||||
psrld mm6,SCALEBITS ; mm6=YEL
|
|
||||||
psrld mm4,SCALEBITS ; mm4=YEH
|
|
||||||
packssdw mm6,mm4 ; mm6=YE
|
|
||||||
|
|
||||||
psllw mm0,BYTE_BIT
|
|
||||||
por mm6,mm0 ; mm6=Y
|
|
||||||
movq MMWORD [edi], mm6 ; Save Y
|
|
||||||
|
|
||||||
pxor mm2,mm2
|
|
||||||
pxor mm4,mm4
|
|
||||||
punpcklwd mm2,mm3 ; mm2=REL
|
|
||||||
punpckhwd mm4,mm3 ; mm4=REH
|
|
||||||
psrld mm2,1 ; mm2=REL*FIX(0.500)
|
|
||||||
psrld mm4,1 ; mm4=REH*FIX(0.500)
|
|
||||||
|
|
||||||
movq mm0,[GOTOFF(eax,PD_ONEHALFM1_CJ)] ; mm0=[PD_ONEHALFM1_CJ]
|
|
||||||
|
|
||||||
paddd mm1,mm2
|
|
||||||
paddd mm5,mm4
|
|
||||||
paddd mm1,mm0
|
|
||||||
paddd mm5,mm0
|
|
||||||
psrld mm1,SCALEBITS ; mm1=CrEL
|
|
||||||
psrld mm5,SCALEBITS ; mm5=CrEH
|
|
||||||
packssdw mm1,mm5 ; mm1=CrE
|
|
||||||
|
|
||||||
psllw mm7,BYTE_BIT
|
|
||||||
por mm1,mm7 ; mm1=Cr
|
|
||||||
movq MMWORD [edx], mm1 ; Save Cr
|
|
||||||
|
|
||||||
sub ecx, byte SIZEOF_MMWORD
|
|
||||||
add esi, byte RGB_PIXELSIZE*SIZEOF_MMWORD ; inptr
|
|
||||||
add edi, byte SIZEOF_MMWORD ; outptr0
|
|
||||||
add ebx, byte SIZEOF_MMWORD ; outptr1
|
|
||||||
add edx, byte SIZEOF_MMWORD ; outptr2
|
|
||||||
cmp ecx, byte SIZEOF_MMWORD
|
|
||||||
jae near .columnloop
|
|
||||||
test ecx,ecx
|
|
||||||
jnz near .column_ld1
|
|
||||||
|
|
||||||
pop ecx ; col
|
|
||||||
pop esi
|
|
||||||
pop edi
|
|
||||||
pop ebx
|
|
||||||
pop edx
|
|
||||||
poppic eax
|
|
||||||
|
|
||||||
add esi, byte SIZEOF_JSAMPROW ; input_buf
|
|
||||||
add edi, byte SIZEOF_JSAMPROW
|
|
||||||
add ebx, byte SIZEOF_JSAMPROW
|
|
||||||
add edx, byte SIZEOF_JSAMPROW
|
|
||||||
dec eax ; num_rows
|
|
||||||
jg near .rowloop
|
|
||||||
|
|
||||||
emms ; empty MMX state
|
|
||||||
|
|
||||||
.return:
|
|
||||||
pop edi
|
|
||||||
pop esi
|
|
||||||
; pop edx ; need not be preserved
|
|
||||||
; pop ecx ; need not be preserved
|
|
||||||
pop ebx
|
|
||||||
mov esp,ebp ; esp <- aligned ebp
|
|
||||||
pop esp ; esp <- original ebp
|
|
||||||
pop ebp
|
|
||||||
ret
|
|
||||||
|
|
||||||
|
|||||||
@@ -3,6 +3,7 @@
|
|||||||
;
|
;
|
||||||
; x86 SIMD extension for IJG JPEG library
|
; x86 SIMD extension for IJG JPEG library
|
||||||
; Copyright (C) 1999-2006, MIYASAKA Masaru.
|
; Copyright (C) 1999-2006, MIYASAKA Masaru.
|
||||||
|
; Copyright (C) 2009, D. R. Commander.
|
||||||
; For conditions of distribution and use, see copyright notice in jsimdext.inc
|
; For conditions of distribution and use, see copyright notice in jsimdext.inc
|
||||||
;
|
;
|
||||||
; This file should be assembled with NASM (Netwide Assembler),
|
; This file should be assembled with NASM (Netwide Assembler),
|
||||||
@@ -48,486 +49,70 @@ PD_ONEHALF times 4 dd (1 << (SCALEBITS-1))
|
|||||||
alignz 16
|
alignz 16
|
||||||
|
|
||||||
; --------------------------------------------------------------------------
|
; --------------------------------------------------------------------------
|
||||||
SECTION SEG_TEXT
|
%include "jcclrss2.asm"
|
||||||
BITS 32
|
|
||||||
;
|
%undef RGB_RED
|
||||||
; Convert some rows of samples to the output colorspace.
|
%undef RGB_GREEN
|
||||||
;
|
%undef RGB_BLUE
|
||||||
; GLOBAL(void)
|
%undef RGB_PIXELSIZE
|
||||||
; jsimd_rgb_ycc_convert_sse2 (JDIMENSION img_width,
|
%define RGB_RED 0
|
||||||
; JSAMPARRAY input_buf, JSAMPIMAGE output_buf,
|
%define RGB_GREEN 1
|
||||||
; JDIMENSION output_row, int num_rows);
|
%define RGB_BLUE 2
|
||||||
;
|
%define RGB_PIXELSIZE 3
|
||||||
|
%define jsimd_rgb_ycc_convert_sse2 jsimd_extrgb_ycc_convert_sse2
|
||||||
%define img_width(b) (b)+8 ; JDIMENSION img_width
|
%include "jcclrss2.asm"
|
||||||
%define input_buf(b) (b)+12 ; JSAMPARRAY input_buf
|
|
||||||
%define output_buf(b) (b)+16 ; JSAMPIMAGE output_buf
|
%undef RGB_RED
|
||||||
%define output_row(b) (b)+20 ; JDIMENSION output_row
|
%undef RGB_GREEN
|
||||||
%define num_rows(b) (b)+24 ; int num_rows
|
%undef RGB_BLUE
|
||||||
|
%undef RGB_PIXELSIZE
|
||||||
%define original_ebp ebp+0
|
%define RGB_RED 0
|
||||||
%define wk(i) ebp-(WK_NUM-(i))*SIZEOF_XMMWORD ; xmmword wk[WK_NUM]
|
%define RGB_GREEN 1
|
||||||
%define WK_NUM 8
|
%define RGB_BLUE 2
|
||||||
%define gotptr wk(0)-SIZEOF_POINTER ; void * gotptr
|
%define RGB_PIXELSIZE 4
|
||||||
|
%define jsimd_rgb_ycc_convert_sse2 jsimd_extrgbx_ycc_convert_sse2
|
||||||
align 16
|
%include "jcclrss2.asm"
|
||||||
global EXTN(jsimd_rgb_ycc_convert_sse2)
|
|
||||||
|
%undef RGB_RED
|
||||||
EXTN(jsimd_rgb_ycc_convert_sse2):
|
%undef RGB_GREEN
|
||||||
push ebp
|
%undef RGB_BLUE
|
||||||
mov eax,esp ; eax = original ebp
|
%undef RGB_PIXELSIZE
|
||||||
sub esp, byte 4
|
%define RGB_RED 2
|
||||||
and esp, byte (-SIZEOF_XMMWORD) ; align to 128 bits
|
%define RGB_GREEN 1
|
||||||
mov [esp],eax
|
%define RGB_BLUE 0
|
||||||
mov ebp,esp ; ebp = aligned ebp
|
%define RGB_PIXELSIZE 3
|
||||||
lea esp, [wk(0)]
|
%define jsimd_rgb_ycc_convert_sse2 jsimd_extbgr_ycc_convert_sse2
|
||||||
pushpic eax ; make a room for GOT address
|
%include "jcclrss2.asm"
|
||||||
push ebx
|
|
||||||
; push ecx ; need not be preserved
|
%undef RGB_RED
|
||||||
; push edx ; need not be preserved
|
%undef RGB_GREEN
|
||||||
push esi
|
%undef RGB_BLUE
|
||||||
push edi
|
%undef RGB_PIXELSIZE
|
||||||
|
%define RGB_RED 2
|
||||||
get_GOT ebx ; get GOT address
|
%define RGB_GREEN 1
|
||||||
movpic POINTER [gotptr], ebx ; save GOT address
|
%define RGB_BLUE 0
|
||||||
|
%define RGB_PIXELSIZE 4
|
||||||
mov ecx, JDIMENSION [img_width(eax)]
|
%define jsimd_rgb_ycc_convert_sse2 jsimd_extbgrx_ycc_convert_sse2
|
||||||
test ecx,ecx
|
%include "jcclrss2.asm"
|
||||||
jz near .return
|
|
||||||
|
%undef RGB_RED
|
||||||
push ecx
|
%undef RGB_GREEN
|
||||||
|
%undef RGB_BLUE
|
||||||
mov esi, JSAMPIMAGE [output_buf(eax)]
|
%undef RGB_PIXELSIZE
|
||||||
mov ecx, JDIMENSION [output_row(eax)]
|
%define RGB_RED 3
|
||||||
mov edi, JSAMPARRAY [esi+0*SIZEOF_JSAMPARRAY]
|
%define RGB_GREEN 2
|
||||||
mov ebx, JSAMPARRAY [esi+1*SIZEOF_JSAMPARRAY]
|
%define RGB_BLUE 1
|
||||||
mov edx, JSAMPARRAY [esi+2*SIZEOF_JSAMPARRAY]
|
%define RGB_PIXELSIZE 4
|
||||||
lea edi, [edi+ecx*SIZEOF_JSAMPROW]
|
%define jsimd_rgb_ycc_convert_sse2 jsimd_extxbgr_ycc_convert_sse2
|
||||||
lea ebx, [ebx+ecx*SIZEOF_JSAMPROW]
|
%include "jcclrss2.asm"
|
||||||
lea edx, [edx+ecx*SIZEOF_JSAMPROW]
|
|
||||||
|
%undef RGB_RED
|
||||||
pop ecx
|
%undef RGB_GREEN
|
||||||
|
%undef RGB_BLUE
|
||||||
mov esi, JSAMPARRAY [input_buf(eax)]
|
%undef RGB_PIXELSIZE
|
||||||
mov eax, INT [num_rows(eax)]
|
%define RGB_RED 1
|
||||||
test eax,eax
|
%define RGB_GREEN 2
|
||||||
jle near .return
|
%define RGB_BLUE 3
|
||||||
alignx 16,7
|
%define RGB_PIXELSIZE 4
|
||||||
.rowloop:
|
%define jsimd_rgb_ycc_convert_sse2 jsimd_extxrgb_ycc_convert_sse2
|
||||||
pushpic eax
|
%include "jcclrss2.asm"
|
||||||
push edx
|
|
||||||
push ebx
|
|
||||||
push edi
|
|
||||||
push esi
|
|
||||||
push ecx ; col
|
|
||||||
|
|
||||||
mov esi, JSAMPROW [esi] ; inptr
|
|
||||||
mov edi, JSAMPROW [edi] ; outptr0
|
|
||||||
mov ebx, JSAMPROW [ebx] ; outptr1
|
|
||||||
mov edx, JSAMPROW [edx] ; outptr2
|
|
||||||
movpic eax, POINTER [gotptr] ; load GOT address (eax)
|
|
||||||
|
|
||||||
cmp ecx, byte SIZEOF_XMMWORD
|
|
||||||
jae near .columnloop
|
|
||||||
alignx 16,7
|
|
||||||
|
|
||||||
%if RGB_PIXELSIZE == 3 ; ---------------
|
|
||||||
|
|
||||||
.column_ld1:
|
|
||||||
push eax
|
|
||||||
push edx
|
|
||||||
lea ecx,[ecx+ecx*2] ; imul ecx,RGB_PIXELSIZE
|
|
||||||
test cl, SIZEOF_BYTE
|
|
||||||
jz short .column_ld2
|
|
||||||
sub ecx, byte SIZEOF_BYTE
|
|
||||||
movzx eax, BYTE [esi+ecx]
|
|
||||||
.column_ld2:
|
|
||||||
test cl, SIZEOF_WORD
|
|
||||||
jz short .column_ld4
|
|
||||||
sub ecx, byte SIZEOF_WORD
|
|
||||||
movzx edx, WORD [esi+ecx]
|
|
||||||
shl eax, WORD_BIT
|
|
||||||
or eax,edx
|
|
||||||
.column_ld4:
|
|
||||||
movd xmmA,eax
|
|
||||||
pop edx
|
|
||||||
pop eax
|
|
||||||
test cl, SIZEOF_DWORD
|
|
||||||
jz short .column_ld8
|
|
||||||
sub ecx, byte SIZEOF_DWORD
|
|
||||||
movd xmmF, XMM_DWORD [esi+ecx]
|
|
||||||
pslldq xmmA, SIZEOF_DWORD
|
|
||||||
por xmmA,xmmF
|
|
||||||
.column_ld8:
|
|
||||||
test cl, SIZEOF_MMWORD
|
|
||||||
jz short .column_ld16
|
|
||||||
sub ecx, byte SIZEOF_MMWORD
|
|
||||||
movq xmmB, XMM_MMWORD [esi+ecx]
|
|
||||||
pslldq xmmA, SIZEOF_MMWORD
|
|
||||||
por xmmA,xmmB
|
|
||||||
.column_ld16:
|
|
||||||
test cl, SIZEOF_XMMWORD
|
|
||||||
jz short .column_ld32
|
|
||||||
movdqa xmmF,xmmA
|
|
||||||
movdqu xmmA, XMMWORD [esi+0*SIZEOF_XMMWORD]
|
|
||||||
mov ecx, SIZEOF_XMMWORD
|
|
||||||
jmp short .rgb_ycc_cnv
|
|
||||||
.column_ld32:
|
|
||||||
test cl, 2*SIZEOF_XMMWORD
|
|
||||||
mov ecx, SIZEOF_XMMWORD
|
|
||||||
jz short .rgb_ycc_cnv
|
|
||||||
movdqa xmmB,xmmA
|
|
||||||
movdqu xmmA, XMMWORD [esi+0*SIZEOF_XMMWORD]
|
|
||||||
movdqu xmmF, XMMWORD [esi+1*SIZEOF_XMMWORD]
|
|
||||||
jmp short .rgb_ycc_cnv
|
|
||||||
alignx 16,7
|
|
||||||
|
|
||||||
.columnloop:
|
|
||||||
movdqu xmmA, XMMWORD [esi+0*SIZEOF_XMMWORD]
|
|
||||||
movdqu xmmF, XMMWORD [esi+1*SIZEOF_XMMWORD]
|
|
||||||
movdqu xmmB, XMMWORD [esi+2*SIZEOF_XMMWORD]
|
|
||||||
|
|
||||||
.rgb_ycc_cnv:
|
|
||||||
; xmmA=(00 10 20 01 11 21 02 12 22 03 13 23 04 14 24 05)
|
|
||||||
; xmmF=(15 25 06 16 26 07 17 27 08 18 28 09 19 29 0A 1A)
|
|
||||||
; xmmB=(2A 0B 1B 2B 0C 1C 2C 0D 1D 2D 0E 1E 2E 0F 1F 2F)
|
|
||||||
|
|
||||||
movdqa xmmG,xmmA
|
|
||||||
pslldq xmmA,8 ; xmmA=(-- -- -- -- -- -- -- -- 00 10 20 01 11 21 02 12)
|
|
||||||
psrldq xmmG,8 ; xmmG=(22 03 13 23 04 14 24 05 -- -- -- -- -- -- -- --)
|
|
||||||
|
|
||||||
punpckhbw xmmA,xmmF ; xmmA=(00 08 10 18 20 28 01 09 11 19 21 29 02 0A 12 1A)
|
|
||||||
pslldq xmmF,8 ; xmmF=(-- -- -- -- -- -- -- -- 15 25 06 16 26 07 17 27)
|
|
||||||
|
|
||||||
punpcklbw xmmG,xmmB ; xmmG=(22 2A 03 0B 13 1B 23 2B 04 0C 14 1C 24 2C 05 0D)
|
|
||||||
punpckhbw xmmF,xmmB ; xmmF=(15 1D 25 2D 06 0E 16 1E 26 2E 07 0F 17 1F 27 2F)
|
|
||||||
|
|
||||||
movdqa xmmD,xmmA
|
|
||||||
pslldq xmmA,8 ; xmmA=(-- -- -- -- -- -- -- -- 00 08 10 18 20 28 01 09)
|
|
||||||
psrldq xmmD,8 ; xmmD=(11 19 21 29 02 0A 12 1A -- -- -- -- -- -- -- --)
|
|
||||||
|
|
||||||
punpckhbw xmmA,xmmG ; xmmA=(00 04 08 0C 10 14 18 1C 20 24 28 2C 01 05 09 0D)
|
|
||||||
pslldq xmmG,8 ; xmmG=(-- -- -- -- -- -- -- -- 22 2A 03 0B 13 1B 23 2B)
|
|
||||||
|
|
||||||
punpcklbw xmmD,xmmF ; xmmD=(11 15 19 1D 21 25 29 2D 02 06 0A 0E 12 16 1A 1E)
|
|
||||||
punpckhbw xmmG,xmmF ; xmmG=(22 26 2A 2E 03 07 0B 0F 13 17 1B 1F 23 27 2B 2F)
|
|
||||||
|
|
||||||
movdqa xmmE,xmmA
|
|
||||||
pslldq xmmA,8 ; xmmA=(-- -- -- -- -- -- -- -- 00 04 08 0C 10 14 18 1C)
|
|
||||||
psrldq xmmE,8 ; xmmE=(20 24 28 2C 01 05 09 0D -- -- -- -- -- -- -- --)
|
|
||||||
|
|
||||||
punpckhbw xmmA,xmmD ; xmmA=(00 02 04 06 08 0A 0C 0E 10 12 14 16 18 1A 1C 1E)
|
|
||||||
pslldq xmmD,8 ; xmmD=(-- -- -- -- -- -- -- -- 11 15 19 1D 21 25 29 2D)
|
|
||||||
|
|
||||||
punpcklbw xmmE,xmmG ; xmmE=(20 22 24 26 28 2A 2C 2E 01 03 05 07 09 0B 0D 0F)
|
|
||||||
punpckhbw xmmD,xmmG ; xmmD=(11 13 15 17 19 1B 1D 1F 21 23 25 27 29 2B 2D 2F)
|
|
||||||
|
|
||||||
pxor xmmH,xmmH
|
|
||||||
|
|
||||||
movdqa xmmC,xmmA
|
|
||||||
punpcklbw xmmA,xmmH ; xmmA=(00 02 04 06 08 0A 0C 0E)
|
|
||||||
punpckhbw xmmC,xmmH ; xmmC=(10 12 14 16 18 1A 1C 1E)
|
|
||||||
|
|
||||||
movdqa xmmB,xmmE
|
|
||||||
punpcklbw xmmE,xmmH ; xmmE=(20 22 24 26 28 2A 2C 2E)
|
|
||||||
punpckhbw xmmB,xmmH ; xmmB=(01 03 05 07 09 0B 0D 0F)
|
|
||||||
|
|
||||||
movdqa xmmF,xmmD
|
|
||||||
punpcklbw xmmD,xmmH ; xmmD=(11 13 15 17 19 1B 1D 1F)
|
|
||||||
punpckhbw xmmF,xmmH ; xmmF=(21 23 25 27 29 2B 2D 2F)
|
|
||||||
|
|
||||||
%else ; RGB_PIXELSIZE == 4 ; -----------
|
|
||||||
|
|
||||||
.column_ld1:
|
|
||||||
test cl, SIZEOF_XMMWORD/16
|
|
||||||
jz short .column_ld2
|
|
||||||
sub ecx, byte SIZEOF_XMMWORD/16
|
|
||||||
movd xmmA, DWORD [esi+ecx*RGB_PIXELSIZE]
|
|
||||||
.column_ld2:
|
|
||||||
test cl, SIZEOF_XMMWORD/8
|
|
||||||
jz short .column_ld4
|
|
||||||
sub ecx, byte SIZEOF_XMMWORD/8
|
|
||||||
movq xmmE, MMWORD [esi+ecx*RGB_PIXELSIZE]
|
|
||||||
pslldq xmmA, SIZEOF_MMWORD
|
|
||||||
por xmmA,xmmE
|
|
||||||
.column_ld4:
|
|
||||||
test cl, SIZEOF_XMMWORD/4
|
|
||||||
jz short .column_ld8
|
|
||||||
sub ecx, byte SIZEOF_XMMWORD/4
|
|
||||||
movdqa xmmE,xmmA
|
|
||||||
movdqu xmmA, XMMWORD [esi+ecx*RGB_PIXELSIZE]
|
|
||||||
.column_ld8:
|
|
||||||
test cl, SIZEOF_XMMWORD/2
|
|
||||||
mov ecx, SIZEOF_XMMWORD
|
|
||||||
jz short .rgb_ycc_cnv
|
|
||||||
movdqa xmmF,xmmA
|
|
||||||
movdqa xmmH,xmmE
|
|
||||||
movdqu xmmA, XMMWORD [esi+0*SIZEOF_XMMWORD]
|
|
||||||
movdqu xmmE, XMMWORD [esi+1*SIZEOF_XMMWORD]
|
|
||||||
jmp short .rgb_ycc_cnv
|
|
||||||
alignx 16,7
|
|
||||||
|
|
||||||
.columnloop:
|
|
||||||
movdqu xmmA, XMMWORD [esi+0*SIZEOF_XMMWORD]
|
|
||||||
movdqu xmmE, XMMWORD [esi+1*SIZEOF_XMMWORD]
|
|
||||||
movdqu xmmF, XMMWORD [esi+2*SIZEOF_XMMWORD]
|
|
||||||
movdqu xmmH, XMMWORD [esi+3*SIZEOF_XMMWORD]
|
|
||||||
|
|
||||||
.rgb_ycc_cnv:
|
|
||||||
; xmmA=(00 10 20 30 01 11 21 31 02 12 22 32 03 13 23 33)
|
|
||||||
; xmmE=(04 14 24 34 05 15 25 35 06 16 26 36 07 17 27 37)
|
|
||||||
; xmmF=(08 18 28 38 09 19 29 39 0A 1A 2A 3A 0B 1B 2B 3B)
|
|
||||||
; xmmH=(0C 1C 2C 3C 0D 1D 2D 3D 0E 1E 2E 3E 0F 1F 2F 3F)
|
|
||||||
|
|
||||||
movdqa xmmD,xmmA
|
|
||||||
punpcklbw xmmA,xmmE ; xmmA=(00 04 10 14 20 24 30 34 01 05 11 15 21 25 31 35)
|
|
||||||
punpckhbw xmmD,xmmE ; xmmD=(02 06 12 16 22 26 32 36 03 07 13 17 23 27 33 37)
|
|
||||||
|
|
||||||
movdqa xmmC,xmmF
|
|
||||||
punpcklbw xmmF,xmmH ; xmmF=(08 0C 18 1C 28 2C 38 3C 09 0D 19 1D 29 2D 39 3D)
|
|
||||||
punpckhbw xmmC,xmmH ; xmmC=(0A 0E 1A 1E 2A 2E 3A 3E 0B 0F 1B 1F 2B 2F 3B 3F)
|
|
||||||
|
|
||||||
movdqa xmmB,xmmA
|
|
||||||
punpcklwd xmmA,xmmF ; xmmA=(00 04 08 0C 10 14 18 1C 20 24 28 2C 30 34 38 3C)
|
|
||||||
punpckhwd xmmB,xmmF ; xmmB=(01 05 09 0D 11 15 19 1D 21 25 29 2D 31 35 39 3D)
|
|
||||||
|
|
||||||
movdqa xmmG,xmmD
|
|
||||||
punpcklwd xmmD,xmmC ; xmmD=(02 06 0A 0E 12 16 1A 1E 22 26 2A 2E 32 36 3A 3E)
|
|
||||||
punpckhwd xmmG,xmmC ; xmmG=(03 07 0B 0F 13 17 1B 1F 23 27 2B 2F 33 37 3B 3F)
|
|
||||||
|
|
||||||
movdqa xmmE,xmmA
|
|
||||||
punpcklbw xmmA,xmmD ; xmmA=(00 02 04 06 08 0A 0C 0E 10 12 14 16 18 1A 1C 1E)
|
|
||||||
punpckhbw xmmE,xmmD ; xmmE=(20 22 24 26 28 2A 2C 2E 30 32 34 36 38 3A 3C 3E)
|
|
||||||
|
|
||||||
movdqa xmmH,xmmB
|
|
||||||
punpcklbw xmmB,xmmG ; xmmB=(01 03 05 07 09 0B 0D 0F 11 13 15 17 19 1B 1D 1F)
|
|
||||||
punpckhbw xmmH,xmmG ; xmmH=(21 23 25 27 29 2B 2D 2F 31 33 35 37 39 3B 3D 3F)
|
|
||||||
|
|
||||||
pxor xmmF,xmmF
|
|
||||||
|
|
||||||
movdqa xmmC,xmmA
|
|
||||||
punpcklbw xmmA,xmmF ; xmmA=(00 02 04 06 08 0A 0C 0E)
|
|
||||||
punpckhbw xmmC,xmmF ; xmmC=(10 12 14 16 18 1A 1C 1E)
|
|
||||||
|
|
||||||
movdqa xmmD,xmmB
|
|
||||||
punpcklbw xmmB,xmmF ; xmmB=(01 03 05 07 09 0B 0D 0F)
|
|
||||||
punpckhbw xmmD,xmmF ; xmmD=(11 13 15 17 19 1B 1D 1F)
|
|
||||||
|
|
||||||
movdqa xmmG,xmmE
|
|
||||||
punpcklbw xmmE,xmmF ; xmmE=(20 22 24 26 28 2A 2C 2E)
|
|
||||||
punpckhbw xmmG,xmmF ; xmmG=(30 32 34 36 38 3A 3C 3E)
|
|
||||||
|
|
||||||
punpcklbw xmmF,xmmH
|
|
||||||
punpckhbw xmmH,xmmH
|
|
||||||
psrlw xmmF,BYTE_BIT ; xmmF=(21 23 25 27 29 2B 2D 2F)
|
|
||||||
psrlw xmmH,BYTE_BIT ; xmmH=(31 33 35 37 39 3B 3D 3F)
|
|
||||||
|
|
||||||
%endif ; RGB_PIXELSIZE ; ---------------
|
|
||||||
|
|
||||||
; xmm0=R(02468ACE)=RE, xmm2=G(02468ACE)=GE, xmm4=B(02468ACE)=BE
|
|
||||||
; xmm1=R(13579BDF)=RO, xmm3=G(13579BDF)=GO, xmm5=B(13579BDF)=BO
|
|
||||||
|
|
||||||
; (Original)
|
|
||||||
; Y = 0.29900 * R + 0.58700 * G + 0.11400 * B
|
|
||||||
; Cb = -0.16874 * R - 0.33126 * G + 0.50000 * B + CENTERJSAMPLE
|
|
||||||
; Cr = 0.50000 * R - 0.41869 * G - 0.08131 * B + CENTERJSAMPLE
|
|
||||||
;
|
|
||||||
; (This implementation)
|
|
||||||
; Y = 0.29900 * R + 0.33700 * G + 0.11400 * B + 0.25000 * G
|
|
||||||
; Cb = -0.16874 * R - 0.33126 * G + 0.50000 * B + CENTERJSAMPLE
|
|
||||||
; Cr = 0.50000 * R - 0.41869 * G - 0.08131 * B + CENTERJSAMPLE
|
|
||||||
|
|
||||||
movdqa XMMWORD [wk(0)], xmm0 ; wk(0)=RE
|
|
||||||
movdqa XMMWORD [wk(1)], xmm1 ; wk(1)=RO
|
|
||||||
movdqa XMMWORD [wk(2)], xmm4 ; wk(2)=BE
|
|
||||||
movdqa XMMWORD [wk(3)], xmm5 ; wk(3)=BO
|
|
||||||
|
|
||||||
movdqa xmm6,xmm1
|
|
||||||
punpcklwd xmm1,xmm3
|
|
||||||
punpckhwd xmm6,xmm3
|
|
||||||
movdqa xmm7,xmm1
|
|
||||||
movdqa xmm4,xmm6
|
|
||||||
pmaddwd xmm1,[GOTOFF(eax,PW_F0299_F0337)] ; xmm1=ROL*FIX(0.299)+GOL*FIX(0.337)
|
|
||||||
pmaddwd xmm6,[GOTOFF(eax,PW_F0299_F0337)] ; xmm6=ROH*FIX(0.299)+GOH*FIX(0.337)
|
|
||||||
pmaddwd xmm7,[GOTOFF(eax,PW_MF016_MF033)] ; xmm7=ROL*-FIX(0.168)+GOL*-FIX(0.331)
|
|
||||||
pmaddwd xmm4,[GOTOFF(eax,PW_MF016_MF033)] ; xmm4=ROH*-FIX(0.168)+GOH*-FIX(0.331)
|
|
||||||
|
|
||||||
movdqa XMMWORD [wk(4)], xmm1 ; wk(4)=ROL*FIX(0.299)+GOL*FIX(0.337)
|
|
||||||
movdqa XMMWORD [wk(5)], xmm6 ; wk(5)=ROH*FIX(0.299)+GOH*FIX(0.337)
|
|
||||||
|
|
||||||
pxor xmm1,xmm1
|
|
||||||
pxor xmm6,xmm6
|
|
||||||
punpcklwd xmm1,xmm5 ; xmm1=BOL
|
|
||||||
punpckhwd xmm6,xmm5 ; xmm6=BOH
|
|
||||||
psrld xmm1,1 ; xmm1=BOL*FIX(0.500)
|
|
||||||
psrld xmm6,1 ; xmm6=BOH*FIX(0.500)
|
|
||||||
|
|
||||||
movdqa xmm5,[GOTOFF(eax,PD_ONEHALFM1_CJ)] ; xmm5=[PD_ONEHALFM1_CJ]
|
|
||||||
|
|
||||||
paddd xmm7,xmm1
|
|
||||||
paddd xmm4,xmm6
|
|
||||||
paddd xmm7,xmm5
|
|
||||||
paddd xmm4,xmm5
|
|
||||||
psrld xmm7,SCALEBITS ; xmm7=CbOL
|
|
||||||
psrld xmm4,SCALEBITS ; xmm4=CbOH
|
|
||||||
packssdw xmm7,xmm4 ; xmm7=CbO
|
|
||||||
|
|
||||||
movdqa xmm1, XMMWORD [wk(2)] ; xmm1=BE
|
|
||||||
|
|
||||||
movdqa xmm6,xmm0
|
|
||||||
punpcklwd xmm0,xmm2
|
|
||||||
punpckhwd xmm6,xmm2
|
|
||||||
movdqa xmm5,xmm0
|
|
||||||
movdqa xmm4,xmm6
|
|
||||||
pmaddwd xmm0,[GOTOFF(eax,PW_F0299_F0337)] ; xmm0=REL*FIX(0.299)+GEL*FIX(0.337)
|
|
||||||
pmaddwd xmm6,[GOTOFF(eax,PW_F0299_F0337)] ; xmm6=REH*FIX(0.299)+GEH*FIX(0.337)
|
|
||||||
pmaddwd xmm5,[GOTOFF(eax,PW_MF016_MF033)] ; xmm5=REL*-FIX(0.168)+GEL*-FIX(0.331)
|
|
||||||
pmaddwd xmm4,[GOTOFF(eax,PW_MF016_MF033)] ; xmm4=REH*-FIX(0.168)+GEH*-FIX(0.331)
|
|
||||||
|
|
||||||
movdqa XMMWORD [wk(6)], xmm0 ; wk(6)=REL*FIX(0.299)+GEL*FIX(0.337)
|
|
||||||
movdqa XMMWORD [wk(7)], xmm6 ; wk(7)=REH*FIX(0.299)+GEH*FIX(0.337)
|
|
||||||
|
|
||||||
pxor xmm0,xmm0
|
|
||||||
pxor xmm6,xmm6
|
|
||||||
punpcklwd xmm0,xmm1 ; xmm0=BEL
|
|
||||||
punpckhwd xmm6,xmm1 ; xmm6=BEH
|
|
||||||
psrld xmm0,1 ; xmm0=BEL*FIX(0.500)
|
|
||||||
psrld xmm6,1 ; xmm6=BEH*FIX(0.500)
|
|
||||||
|
|
||||||
movdqa xmm1,[GOTOFF(eax,PD_ONEHALFM1_CJ)] ; xmm1=[PD_ONEHALFM1_CJ]
|
|
||||||
|
|
||||||
paddd xmm5,xmm0
|
|
||||||
paddd xmm4,xmm6
|
|
||||||
paddd xmm5,xmm1
|
|
||||||
paddd xmm4,xmm1
|
|
||||||
psrld xmm5,SCALEBITS ; xmm5=CbEL
|
|
||||||
psrld xmm4,SCALEBITS ; xmm4=CbEH
|
|
||||||
packssdw xmm5,xmm4 ; xmm5=CbE
|
|
||||||
|
|
||||||
psllw xmm7,BYTE_BIT
|
|
||||||
por xmm5,xmm7 ; xmm5=Cb
|
|
||||||
movdqa XMMWORD [ebx], xmm5 ; Save Cb
|
|
||||||
|
|
||||||
movdqa xmm0, XMMWORD [wk(3)] ; xmm0=BO
|
|
||||||
movdqa xmm6, XMMWORD [wk(2)] ; xmm6=BE
|
|
||||||
movdqa xmm1, XMMWORD [wk(1)] ; xmm1=RO
|
|
||||||
|
|
||||||
movdqa xmm4,xmm0
|
|
||||||
punpcklwd xmm0,xmm3
|
|
||||||
punpckhwd xmm4,xmm3
|
|
||||||
movdqa xmm7,xmm0
|
|
||||||
movdqa xmm5,xmm4
|
|
||||||
pmaddwd xmm0,[GOTOFF(eax,PW_F0114_F0250)] ; xmm0=BOL*FIX(0.114)+GOL*FIX(0.250)
|
|
||||||
pmaddwd xmm4,[GOTOFF(eax,PW_F0114_F0250)] ; xmm4=BOH*FIX(0.114)+GOH*FIX(0.250)
|
|
||||||
pmaddwd xmm7,[GOTOFF(eax,PW_MF008_MF041)] ; xmm7=BOL*-FIX(0.081)+GOL*-FIX(0.418)
|
|
||||||
pmaddwd xmm5,[GOTOFF(eax,PW_MF008_MF041)] ; xmm5=BOH*-FIX(0.081)+GOH*-FIX(0.418)
|
|
||||||
|
|
||||||
movdqa xmm3,[GOTOFF(eax,PD_ONEHALF)] ; xmm3=[PD_ONEHALF]
|
|
||||||
|
|
||||||
paddd xmm0, XMMWORD [wk(4)]
|
|
||||||
paddd xmm4, XMMWORD [wk(5)]
|
|
||||||
paddd xmm0,xmm3
|
|
||||||
paddd xmm4,xmm3
|
|
||||||
psrld xmm0,SCALEBITS ; xmm0=YOL
|
|
||||||
psrld xmm4,SCALEBITS ; xmm4=YOH
|
|
||||||
packssdw xmm0,xmm4 ; xmm0=YO
|
|
||||||
|
|
||||||
pxor xmm3,xmm3
|
|
||||||
pxor xmm4,xmm4
|
|
||||||
punpcklwd xmm3,xmm1 ; xmm3=ROL
|
|
||||||
punpckhwd xmm4,xmm1 ; xmm4=ROH
|
|
||||||
psrld xmm3,1 ; xmm3=ROL*FIX(0.500)
|
|
||||||
psrld xmm4,1 ; xmm4=ROH*FIX(0.500)
|
|
||||||
|
|
||||||
movdqa xmm1,[GOTOFF(eax,PD_ONEHALFM1_CJ)] ; xmm1=[PD_ONEHALFM1_CJ]
|
|
||||||
|
|
||||||
paddd xmm7,xmm3
|
|
||||||
paddd xmm5,xmm4
|
|
||||||
paddd xmm7,xmm1
|
|
||||||
paddd xmm5,xmm1
|
|
||||||
psrld xmm7,SCALEBITS ; xmm7=CrOL
|
|
||||||
psrld xmm5,SCALEBITS ; xmm5=CrOH
|
|
||||||
packssdw xmm7,xmm5 ; xmm7=CrO
|
|
||||||
|
|
||||||
movdqa xmm3, XMMWORD [wk(0)] ; xmm3=RE
|
|
||||||
|
|
||||||
movdqa xmm4,xmm6
|
|
||||||
punpcklwd xmm6,xmm2
|
|
||||||
punpckhwd xmm4,xmm2
|
|
||||||
movdqa xmm1,xmm6
|
|
||||||
movdqa xmm5,xmm4
|
|
||||||
pmaddwd xmm6,[GOTOFF(eax,PW_F0114_F0250)] ; xmm6=BEL*FIX(0.114)+GEL*FIX(0.250)
|
|
||||||
pmaddwd xmm4,[GOTOFF(eax,PW_F0114_F0250)] ; xmm4=BEH*FIX(0.114)+GEH*FIX(0.250)
|
|
||||||
pmaddwd xmm1,[GOTOFF(eax,PW_MF008_MF041)] ; xmm1=BEL*-FIX(0.081)+GEL*-FIX(0.418)
|
|
||||||
pmaddwd xmm5,[GOTOFF(eax,PW_MF008_MF041)] ; xmm5=BEH*-FIX(0.081)+GEH*-FIX(0.418)
|
|
||||||
|
|
||||||
movdqa xmm2,[GOTOFF(eax,PD_ONEHALF)] ; xmm2=[PD_ONEHALF]
|
|
||||||
|
|
||||||
paddd xmm6, XMMWORD [wk(6)]
|
|
||||||
paddd xmm4, XMMWORD [wk(7)]
|
|
||||||
paddd xmm6,xmm2
|
|
||||||
paddd xmm4,xmm2
|
|
||||||
psrld xmm6,SCALEBITS ; xmm6=YEL
|
|
||||||
psrld xmm4,SCALEBITS ; xmm4=YEH
|
|
||||||
packssdw xmm6,xmm4 ; xmm6=YE
|
|
||||||
|
|
||||||
psllw xmm0,BYTE_BIT
|
|
||||||
por xmm6,xmm0 ; xmm6=Y
|
|
||||||
movdqa XMMWORD [edi], xmm6 ; Save Y
|
|
||||||
|
|
||||||
pxor xmm2,xmm2
|
|
||||||
pxor xmm4,xmm4
|
|
||||||
punpcklwd xmm2,xmm3 ; xmm2=REL
|
|
||||||
punpckhwd xmm4,xmm3 ; xmm4=REH
|
|
||||||
psrld xmm2,1 ; xmm2=REL*FIX(0.500)
|
|
||||||
psrld xmm4,1 ; xmm4=REH*FIX(0.500)
|
|
||||||
|
|
||||||
movdqa xmm0,[GOTOFF(eax,PD_ONEHALFM1_CJ)] ; xmm0=[PD_ONEHALFM1_CJ]
|
|
||||||
|
|
||||||
paddd xmm1,xmm2
|
|
||||||
paddd xmm5,xmm4
|
|
||||||
paddd xmm1,xmm0
|
|
||||||
paddd xmm5,xmm0
|
|
||||||
psrld xmm1,SCALEBITS ; xmm1=CrEL
|
|
||||||
psrld xmm5,SCALEBITS ; xmm5=CrEH
|
|
||||||
packssdw xmm1,xmm5 ; xmm1=CrE
|
|
||||||
|
|
||||||
psllw xmm7,BYTE_BIT
|
|
||||||
por xmm1,xmm7 ; xmm1=Cr
|
|
||||||
movdqa XMMWORD [edx], xmm1 ; Save Cr
|
|
||||||
|
|
||||||
sub ecx, byte SIZEOF_XMMWORD
|
|
||||||
add esi, byte RGB_PIXELSIZE*SIZEOF_XMMWORD ; inptr
|
|
||||||
add edi, byte SIZEOF_XMMWORD ; outptr0
|
|
||||||
add ebx, byte SIZEOF_XMMWORD ; outptr1
|
|
||||||
add edx, byte SIZEOF_XMMWORD ; outptr2
|
|
||||||
cmp ecx, byte SIZEOF_XMMWORD
|
|
||||||
jae near .columnloop
|
|
||||||
test ecx,ecx
|
|
||||||
jnz near .column_ld1
|
|
||||||
|
|
||||||
pop ecx ; col
|
|
||||||
pop esi
|
|
||||||
pop edi
|
|
||||||
pop ebx
|
|
||||||
pop edx
|
|
||||||
poppic eax
|
|
||||||
|
|
||||||
add esi, byte SIZEOF_JSAMPROW ; input_buf
|
|
||||||
add edi, byte SIZEOF_JSAMPROW
|
|
||||||
add ebx, byte SIZEOF_JSAMPROW
|
|
||||||
add edx, byte SIZEOF_JSAMPROW
|
|
||||||
dec eax ; num_rows
|
|
||||||
jg near .rowloop
|
|
||||||
|
|
||||||
.return:
|
|
||||||
pop edi
|
|
||||||
pop esi
|
|
||||||
; pop edx ; need not be preserved
|
|
||||||
; pop ecx ; need not be preserved
|
|
||||||
pop ebx
|
|
||||||
mov esp,ebp ; esp <- aligned ebp
|
|
||||||
pop esp ; esp <- original ebp
|
|
||||||
pop ebp
|
|
||||||
ret
|
|
||||||
|
|
||||||
|
|||||||
402
simd/jdclrmmx.asm
Normal file
402
simd/jdclrmmx.asm
Normal file
@@ -0,0 +1,402 @@
|
|||||||
|
;
|
||||||
|
; jdclrmmx.asm - colorspace conversion (MMX)
|
||||||
|
;
|
||||||
|
; Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
|
||||||
|
;
|
||||||
|
; Based on
|
||||||
|
; x86 SIMD extension for IJG JPEG library
|
||||||
|
; Copyright (C) 1999-2006, MIYASAKA Masaru.
|
||||||
|
; For conditions of distribution and use, see copyright notice in jsimdext.inc
|
||||||
|
;
|
||||||
|
; This file should be assembled with NASM (Netwide Assembler),
|
||||||
|
; can *not* be assembled with Microsoft's MASM or any compatible
|
||||||
|
; assembler (including Borland's Turbo Assembler).
|
||||||
|
; NASM is available from http://nasm.sourceforge.net/ or
|
||||||
|
; http://sourceforge.net/project/showfiles.php?group_id=6208
|
||||||
|
;
|
||||||
|
; [TAB8]
|
||||||
|
|
||||||
|
; --------------------------------------------------------------------------
|
||||||
|
SECTION SEG_TEXT
|
||||||
|
BITS 32
|
||||||
|
;
|
||||||
|
; Convert some rows of samples to the output colorspace.
|
||||||
|
;
|
||||||
|
; GLOBAL(void)
|
||||||
|
; jsimd_ycc_rgb_convert_mmx (JDIMENSION out_width,
|
||||||
|
; JSAMPIMAGE input_buf, JDIMENSION input_row,
|
||||||
|
; JSAMPARRAY output_buf, int num_rows)
|
||||||
|
;
|
||||||
|
|
||||||
|
%define out_width(b) (b)+8 ; JDIMENSION out_width
|
||||||
|
%define input_buf(b) (b)+12 ; JSAMPIMAGE input_buf
|
||||||
|
%define input_row(b) (b)+16 ; JDIMENSION input_row
|
||||||
|
%define output_buf(b) (b)+20 ; JSAMPARRAY output_buf
|
||||||
|
%define num_rows(b) (b)+24 ; int num_rows
|
||||||
|
|
||||||
|
%define original_ebp ebp+0
|
||||||
|
%define wk(i) ebp-(WK_NUM-(i))*SIZEOF_MMWORD ; mmword wk[WK_NUM]
|
||||||
|
%define WK_NUM 2
|
||||||
|
%define gotptr wk(0)-SIZEOF_POINTER ; void * gotptr
|
||||||
|
|
||||||
|
align 16
|
||||||
|
global EXTN(jsimd_ycc_rgb_convert_mmx)
|
||||||
|
|
||||||
|
EXTN(jsimd_ycc_rgb_convert_mmx):
|
||||||
|
push ebp
|
||||||
|
mov eax,esp ; eax = original ebp
|
||||||
|
sub esp, byte 4
|
||||||
|
and esp, byte (-SIZEOF_MMWORD) ; align to 64 bits
|
||||||
|
mov [esp],eax
|
||||||
|
mov ebp,esp ; ebp = aligned ebp
|
||||||
|
lea esp, [wk(0)]
|
||||||
|
pushpic eax ; make a room for GOT address
|
||||||
|
push ebx
|
||||||
|
; push ecx ; need not be preserved
|
||||||
|
; push edx ; need not be preserved
|
||||||
|
push esi
|
||||||
|
push edi
|
||||||
|
|
||||||
|
get_GOT ebx ; get GOT address
|
||||||
|
movpic POINTER [gotptr], ebx ; save GOT address
|
||||||
|
|
||||||
|
mov ecx, JDIMENSION [out_width(eax)] ; num_cols
|
||||||
|
test ecx,ecx
|
||||||
|
jz near .return
|
||||||
|
|
||||||
|
push ecx
|
||||||
|
|
||||||
|
mov edi, JSAMPIMAGE [input_buf(eax)]
|
||||||
|
mov ecx, JDIMENSION [input_row(eax)]
|
||||||
|
mov esi, JSAMPARRAY [edi+0*SIZEOF_JSAMPARRAY]
|
||||||
|
mov ebx, JSAMPARRAY [edi+1*SIZEOF_JSAMPARRAY]
|
||||||
|
mov edx, JSAMPARRAY [edi+2*SIZEOF_JSAMPARRAY]
|
||||||
|
lea esi, [esi+ecx*SIZEOF_JSAMPROW]
|
||||||
|
lea ebx, [ebx+ecx*SIZEOF_JSAMPROW]
|
||||||
|
lea edx, [edx+ecx*SIZEOF_JSAMPROW]
|
||||||
|
|
||||||
|
pop ecx
|
||||||
|
|
||||||
|
mov edi, JSAMPARRAY [output_buf(eax)]
|
||||||
|
mov eax, INT [num_rows(eax)]
|
||||||
|
test eax,eax
|
||||||
|
jle near .return
|
||||||
|
alignx 16,7
|
||||||
|
.rowloop:
|
||||||
|
push eax
|
||||||
|
push edi
|
||||||
|
push edx
|
||||||
|
push ebx
|
||||||
|
push esi
|
||||||
|
push ecx ; col
|
||||||
|
|
||||||
|
mov esi, JSAMPROW [esi] ; inptr0
|
||||||
|
mov ebx, JSAMPROW [ebx] ; inptr1
|
||||||
|
mov edx, JSAMPROW [edx] ; inptr2
|
||||||
|
mov edi, JSAMPROW [edi] ; outptr
|
||||||
|
movpic eax, POINTER [gotptr] ; load GOT address (eax)
|
||||||
|
alignx 16,7
|
||||||
|
.columnloop:
|
||||||
|
|
||||||
|
movq mm5, MMWORD [ebx] ; mm5=Cb(01234567)
|
||||||
|
movq mm1, MMWORD [edx] ; mm1=Cr(01234567)
|
||||||
|
|
||||||
|
pcmpeqw mm4,mm4
|
||||||
|
pcmpeqw mm7,mm7
|
||||||
|
psrlw mm4,BYTE_BIT
|
||||||
|
psllw mm7,7 ; mm7={0xFF80 0xFF80 0xFF80 0xFF80}
|
||||||
|
movq mm0,mm4 ; mm0=mm4={0xFF 0x00 0xFF 0x00 ..}
|
||||||
|
|
||||||
|
pand mm4,mm5 ; mm4=Cb(0246)=CbE
|
||||||
|
psrlw mm5,BYTE_BIT ; mm5=Cb(1357)=CbO
|
||||||
|
pand mm0,mm1 ; mm0=Cr(0246)=CrE
|
||||||
|
psrlw mm1,BYTE_BIT ; mm1=Cr(1357)=CrO
|
||||||
|
|
||||||
|
paddw mm4,mm7
|
||||||
|
paddw mm5,mm7
|
||||||
|
paddw mm0,mm7
|
||||||
|
paddw mm1,mm7
|
||||||
|
|
||||||
|
; (Original)
|
||||||
|
; R = Y + 1.40200 * Cr
|
||||||
|
; G = Y - 0.34414 * Cb - 0.71414 * Cr
|
||||||
|
; B = Y + 1.77200 * Cb
|
||||||
|
;
|
||||||
|
; (This implementation)
|
||||||
|
; R = Y + 0.40200 * Cr + Cr
|
||||||
|
; G = Y - 0.34414 * Cb + 0.28586 * Cr - Cr
|
||||||
|
; B = Y - 0.22800 * Cb + Cb + Cb
|
||||||
|
|
||||||
|
movq mm2,mm4 ; mm2=CbE
|
||||||
|
movq mm3,mm5 ; mm3=CbO
|
||||||
|
paddw mm4,mm4 ; mm4=2*CbE
|
||||||
|
paddw mm5,mm5 ; mm5=2*CbO
|
||||||
|
movq mm6,mm0 ; mm6=CrE
|
||||||
|
movq mm7,mm1 ; mm7=CrO
|
||||||
|
paddw mm0,mm0 ; mm0=2*CrE
|
||||||
|
paddw mm1,mm1 ; mm1=2*CrO
|
||||||
|
|
||||||
|
pmulhw mm4,[GOTOFF(eax,PW_MF0228)] ; mm4=(2*CbE * -FIX(0.22800))
|
||||||
|
pmulhw mm5,[GOTOFF(eax,PW_MF0228)] ; mm5=(2*CbO * -FIX(0.22800))
|
||||||
|
pmulhw mm0,[GOTOFF(eax,PW_F0402)] ; mm0=(2*CrE * FIX(0.40200))
|
||||||
|
pmulhw mm1,[GOTOFF(eax,PW_F0402)] ; mm1=(2*CrO * FIX(0.40200))
|
||||||
|
|
||||||
|
paddw mm4,[GOTOFF(eax,PW_ONE)]
|
||||||
|
paddw mm5,[GOTOFF(eax,PW_ONE)]
|
||||||
|
psraw mm4,1 ; mm4=(CbE * -FIX(0.22800))
|
||||||
|
psraw mm5,1 ; mm5=(CbO * -FIX(0.22800))
|
||||||
|
paddw mm0,[GOTOFF(eax,PW_ONE)]
|
||||||
|
paddw mm1,[GOTOFF(eax,PW_ONE)]
|
||||||
|
psraw mm0,1 ; mm0=(CrE * FIX(0.40200))
|
||||||
|
psraw mm1,1 ; mm1=(CrO * FIX(0.40200))
|
||||||
|
|
||||||
|
paddw mm4,mm2
|
||||||
|
paddw mm5,mm3
|
||||||
|
paddw mm4,mm2 ; mm4=(CbE * FIX(1.77200))=(B-Y)E
|
||||||
|
paddw mm5,mm3 ; mm5=(CbO * FIX(1.77200))=(B-Y)O
|
||||||
|
paddw mm0,mm6 ; mm0=(CrE * FIX(1.40200))=(R-Y)E
|
||||||
|
paddw mm1,mm7 ; mm1=(CrO * FIX(1.40200))=(R-Y)O
|
||||||
|
|
||||||
|
movq MMWORD [wk(0)], mm4 ; wk(0)=(B-Y)E
|
||||||
|
movq MMWORD [wk(1)], mm5 ; wk(1)=(B-Y)O
|
||||||
|
|
||||||
|
movq mm4,mm2
|
||||||
|
movq mm5,mm3
|
||||||
|
punpcklwd mm2,mm6
|
||||||
|
punpckhwd mm4,mm6
|
||||||
|
pmaddwd mm2,[GOTOFF(eax,PW_MF0344_F0285)]
|
||||||
|
pmaddwd mm4,[GOTOFF(eax,PW_MF0344_F0285)]
|
||||||
|
punpcklwd mm3,mm7
|
||||||
|
punpckhwd mm5,mm7
|
||||||
|
pmaddwd mm3,[GOTOFF(eax,PW_MF0344_F0285)]
|
||||||
|
pmaddwd mm5,[GOTOFF(eax,PW_MF0344_F0285)]
|
||||||
|
|
||||||
|
paddd mm2,[GOTOFF(eax,PD_ONEHALF)]
|
||||||
|
paddd mm4,[GOTOFF(eax,PD_ONEHALF)]
|
||||||
|
psrad mm2,SCALEBITS
|
||||||
|
psrad mm4,SCALEBITS
|
||||||
|
paddd mm3,[GOTOFF(eax,PD_ONEHALF)]
|
||||||
|
paddd mm5,[GOTOFF(eax,PD_ONEHALF)]
|
||||||
|
psrad mm3,SCALEBITS
|
||||||
|
psrad mm5,SCALEBITS
|
||||||
|
|
||||||
|
packssdw mm2,mm4 ; mm2=CbE*-FIX(0.344)+CrE*FIX(0.285)
|
||||||
|
packssdw mm3,mm5 ; mm3=CbO*-FIX(0.344)+CrO*FIX(0.285)
|
||||||
|
psubw mm2,mm6 ; mm2=CbE*-FIX(0.344)+CrE*-FIX(0.714)=(G-Y)E
|
||||||
|
psubw mm3,mm7 ; mm3=CbO*-FIX(0.344)+CrO*-FIX(0.714)=(G-Y)O
|
||||||
|
|
||||||
|
movq mm5, MMWORD [esi] ; mm5=Y(01234567)
|
||||||
|
|
||||||
|
pcmpeqw mm4,mm4
|
||||||
|
psrlw mm4,BYTE_BIT ; mm4={0xFF 0x00 0xFF 0x00 ..}
|
||||||
|
pand mm4,mm5 ; mm4=Y(0246)=YE
|
||||||
|
psrlw mm5,BYTE_BIT ; mm5=Y(1357)=YO
|
||||||
|
|
||||||
|
paddw mm0,mm4 ; mm0=((R-Y)E+YE)=RE=(R0 R2 R4 R6)
|
||||||
|
paddw mm1,mm5 ; mm1=((R-Y)O+YO)=RO=(R1 R3 R5 R7)
|
||||||
|
packuswb mm0,mm0 ; mm0=(R0 R2 R4 R6 ** ** ** **)
|
||||||
|
packuswb mm1,mm1 ; mm1=(R1 R3 R5 R7 ** ** ** **)
|
||||||
|
|
||||||
|
paddw mm2,mm4 ; mm2=((G-Y)E+YE)=GE=(G0 G2 G4 G6)
|
||||||
|
paddw mm3,mm5 ; mm3=((G-Y)O+YO)=GO=(G1 G3 G5 G7)
|
||||||
|
packuswb mm2,mm2 ; mm2=(G0 G2 G4 G6 ** ** ** **)
|
||||||
|
packuswb mm3,mm3 ; mm3=(G1 G3 G5 G7 ** ** ** **)
|
||||||
|
|
||||||
|
paddw mm4, MMWORD [wk(0)] ; mm4=(YE+(B-Y)E)=BE=(B0 B2 B4 B6)
|
||||||
|
paddw mm5, MMWORD [wk(1)] ; mm5=(YO+(B-Y)O)=BO=(B1 B3 B5 B7)
|
||||||
|
packuswb mm4,mm4 ; mm4=(B0 B2 B4 B6 ** ** ** **)
|
||||||
|
packuswb mm5,mm5 ; mm5=(B1 B3 B5 B7 ** ** ** **)
|
||||||
|
|
||||||
|
%if RGB_PIXELSIZE == 3 ; ---------------
|
||||||
|
|
||||||
|
; mmA=(00 02 04 06 ** ** ** **), mmB=(01 03 05 07 ** ** ** **)
|
||||||
|
; mmC=(10 12 14 16 ** ** ** **), mmD=(11 13 15 17 ** ** ** **)
|
||||||
|
; mmE=(20 22 24 26 ** ** ** **), mmF=(21 23 25 27 ** ** ** **)
|
||||||
|
; mmG=(** ** ** ** ** ** ** **), mmH=(** ** ** ** ** ** ** **)
|
||||||
|
|
||||||
|
punpcklbw mmA,mmC ; mmA=(00 10 02 12 04 14 06 16)
|
||||||
|
punpcklbw mmE,mmB ; mmE=(20 01 22 03 24 05 26 07)
|
||||||
|
punpcklbw mmD,mmF ; mmD=(11 21 13 23 15 25 17 27)
|
||||||
|
|
||||||
|
movq mmG,mmA
|
||||||
|
movq mmH,mmA
|
||||||
|
punpcklwd mmA,mmE ; mmA=(00 10 20 01 02 12 22 03)
|
||||||
|
punpckhwd mmG,mmE ; mmG=(04 14 24 05 06 16 26 07)
|
||||||
|
|
||||||
|
psrlq mmH,2*BYTE_BIT ; mmH=(02 12 04 14 06 16 -- --)
|
||||||
|
psrlq mmE,2*BYTE_BIT ; mmE=(22 03 24 05 26 07 -- --)
|
||||||
|
|
||||||
|
movq mmC,mmD
|
||||||
|
movq mmB,mmD
|
||||||
|
punpcklwd mmD,mmH ; mmD=(11 21 02 12 13 23 04 14)
|
||||||
|
punpckhwd mmC,mmH ; mmC=(15 25 06 16 17 27 -- --)
|
||||||
|
|
||||||
|
psrlq mmB,2*BYTE_BIT ; mmB=(13 23 15 25 17 27 -- --)
|
||||||
|
|
||||||
|
movq mmF,mmE
|
||||||
|
punpcklwd mmE,mmB ; mmE=(22 03 13 23 24 05 15 25)
|
||||||
|
punpckhwd mmF,mmB ; mmF=(26 07 17 27 -- -- -- --)
|
||||||
|
|
||||||
|
punpckldq mmA,mmD ; mmA=(00 10 20 01 11 21 02 12)
|
||||||
|
punpckldq mmE,mmG ; mmE=(22 03 13 23 04 14 24 05)
|
||||||
|
punpckldq mmC,mmF ; mmC=(15 25 06 16 26 07 17 27)
|
||||||
|
|
||||||
|
cmp ecx, byte SIZEOF_MMWORD
|
||||||
|
jb short .column_st16
|
||||||
|
|
||||||
|
movq MMWORD [edi+0*SIZEOF_MMWORD], mmA
|
||||||
|
movq MMWORD [edi+1*SIZEOF_MMWORD], mmE
|
||||||
|
movq MMWORD [edi+2*SIZEOF_MMWORD], mmC
|
||||||
|
|
||||||
|
sub ecx, byte SIZEOF_MMWORD
|
||||||
|
jz short .nextrow
|
||||||
|
|
||||||
|
add esi, byte SIZEOF_MMWORD ; inptr0
|
||||||
|
add ebx, byte SIZEOF_MMWORD ; inptr1
|
||||||
|
add edx, byte SIZEOF_MMWORD ; inptr2
|
||||||
|
add edi, byte RGB_PIXELSIZE*SIZEOF_MMWORD ; outptr
|
||||||
|
jmp near .columnloop
|
||||||
|
alignx 16,7
|
||||||
|
|
||||||
|
.column_st16:
|
||||||
|
lea ecx, [ecx+ecx*2] ; imul ecx, RGB_PIXELSIZE
|
||||||
|
cmp ecx, byte 2*SIZEOF_MMWORD
|
||||||
|
jb short .column_st8
|
||||||
|
movq MMWORD [edi+0*SIZEOF_MMWORD], mmA
|
||||||
|
movq MMWORD [edi+1*SIZEOF_MMWORD], mmE
|
||||||
|
movq mmA,mmC
|
||||||
|
sub ecx, byte 2*SIZEOF_MMWORD
|
||||||
|
add edi, byte 2*SIZEOF_MMWORD
|
||||||
|
jmp short .column_st4
|
||||||
|
.column_st8:
|
||||||
|
cmp ecx, byte SIZEOF_MMWORD
|
||||||
|
jb short .column_st4
|
||||||
|
movq MMWORD [edi+0*SIZEOF_MMWORD], mmA
|
||||||
|
movq mmA,mmE
|
||||||
|
sub ecx, byte SIZEOF_MMWORD
|
||||||
|
add edi, byte SIZEOF_MMWORD
|
||||||
|
.column_st4:
|
||||||
|
movd eax,mmA
|
||||||
|
cmp ecx, byte SIZEOF_DWORD
|
||||||
|
jb short .column_st2
|
||||||
|
mov DWORD [edi+0*SIZEOF_DWORD], eax
|
||||||
|
psrlq mmA,DWORD_BIT
|
||||||
|
movd eax,mmA
|
||||||
|
sub ecx, byte SIZEOF_DWORD
|
||||||
|
add edi, byte SIZEOF_DWORD
|
||||||
|
.column_st2:
|
||||||
|
cmp ecx, byte SIZEOF_WORD
|
||||||
|
jb short .column_st1
|
||||||
|
mov WORD [edi+0*SIZEOF_WORD], ax
|
||||||
|
shr eax,WORD_BIT
|
||||||
|
sub ecx, byte SIZEOF_WORD
|
||||||
|
add edi, byte SIZEOF_WORD
|
||||||
|
.column_st1:
|
||||||
|
cmp ecx, byte SIZEOF_BYTE
|
||||||
|
jb short .nextrow
|
||||||
|
mov BYTE [edi+0*SIZEOF_BYTE], al
|
||||||
|
|
||||||
|
%else ; RGB_PIXELSIZE == 4 ; -----------
|
||||||
|
|
||||||
|
%ifdef RGBX_FILLER_0XFF
|
||||||
|
pcmpeqb mm6,mm6 ; mm6=(X0 X2 X4 X6 ** ** ** **)
|
||||||
|
pcmpeqb mm7,mm7 ; mm7=(X1 X3 X5 X7 ** ** ** **)
|
||||||
|
%else
|
||||||
|
pxor mm6,mm6 ; mm6=(X0 X2 X4 X6 ** ** ** **)
|
||||||
|
pxor mm7,mm7 ; mm7=(X1 X3 X5 X7 ** ** ** **)
|
||||||
|
%endif
|
||||||
|
; mmA=(00 02 04 06 ** ** ** **), mmB=(01 03 05 07 ** ** ** **)
|
||||||
|
; mmC=(10 12 14 16 ** ** ** **), mmD=(11 13 15 17 ** ** ** **)
|
||||||
|
; mmE=(20 22 24 26 ** ** ** **), mmF=(21 23 25 27 ** ** ** **)
|
||||||
|
; mmG=(30 32 34 36 ** ** ** **), mmH=(31 33 35 37 ** ** ** **)
|
||||||
|
|
||||||
|
punpcklbw mmA,mmC ; mmA=(00 10 02 12 04 14 06 16)
|
||||||
|
punpcklbw mmE,mmG ; mmE=(20 30 22 32 24 34 26 36)
|
||||||
|
punpcklbw mmB,mmD ; mmB=(01 11 03 13 05 15 07 17)
|
||||||
|
punpcklbw mmF,mmH ; mmF=(21 31 23 33 25 35 27 37)
|
||||||
|
|
||||||
|
movq mmC,mmA
|
||||||
|
punpcklwd mmA,mmE ; mmA=(00 10 20 30 02 12 22 32)
|
||||||
|
punpckhwd mmC,mmE ; mmC=(04 14 24 34 06 16 26 36)
|
||||||
|
movq mmG,mmB
|
||||||
|
punpcklwd mmB,mmF ; mmB=(01 11 21 31 03 13 23 33)
|
||||||
|
punpckhwd mmG,mmF ; mmG=(05 15 25 35 07 17 27 37)
|
||||||
|
|
||||||
|
movq mmD,mmA
|
||||||
|
punpckldq mmA,mmB ; mmA=(00 10 20 30 01 11 21 31)
|
||||||
|
punpckhdq mmD,mmB ; mmD=(02 12 22 32 03 13 23 33)
|
||||||
|
movq mmH,mmC
|
||||||
|
punpckldq mmC,mmG ; mmC=(04 14 24 34 05 15 25 35)
|
||||||
|
punpckhdq mmH,mmG ; mmH=(06 16 26 36 07 17 27 37)
|
||||||
|
|
||||||
|
cmp ecx, byte SIZEOF_MMWORD
|
||||||
|
jb short .column_st16
|
||||||
|
|
||||||
|
movq MMWORD [edi+0*SIZEOF_MMWORD], mmA
|
||||||
|
movq MMWORD [edi+1*SIZEOF_MMWORD], mmD
|
||||||
|
movq MMWORD [edi+2*SIZEOF_MMWORD], mmC
|
||||||
|
movq MMWORD [edi+3*SIZEOF_MMWORD], mmH
|
||||||
|
|
||||||
|
sub ecx, byte SIZEOF_MMWORD
|
||||||
|
jz short .nextrow
|
||||||
|
|
||||||
|
add esi, byte SIZEOF_MMWORD ; inptr0
|
||||||
|
add ebx, byte SIZEOF_MMWORD ; inptr1
|
||||||
|
add edx, byte SIZEOF_MMWORD ; inptr2
|
||||||
|
add edi, byte RGB_PIXELSIZE*SIZEOF_MMWORD ; outptr
|
||||||
|
jmp near .columnloop
|
||||||
|
alignx 16,7
|
||||||
|
|
||||||
|
.column_st16:
|
||||||
|
cmp ecx, byte SIZEOF_MMWORD/2
|
||||||
|
jb short .column_st8
|
||||||
|
movq MMWORD [edi+0*SIZEOF_MMWORD], mmA
|
||||||
|
movq MMWORD [edi+1*SIZEOF_MMWORD], mmD
|
||||||
|
movq mmA,mmC
|
||||||
|
movq mmD,mmH
|
||||||
|
sub ecx, byte SIZEOF_MMWORD/2
|
||||||
|
add edi, byte 2*SIZEOF_MMWORD
|
||||||
|
.column_st8:
|
||||||
|
cmp ecx, byte SIZEOF_MMWORD/4
|
||||||
|
jb short .column_st4
|
||||||
|
movq MMWORD [edi+0*SIZEOF_MMWORD], mmA
|
||||||
|
movq mmA,mmD
|
||||||
|
sub ecx, byte SIZEOF_MMWORD/4
|
||||||
|
add edi, byte 1*SIZEOF_MMWORD
|
||||||
|
.column_st4:
|
||||||
|
cmp ecx, byte SIZEOF_MMWORD/8
|
||||||
|
jb short .nextrow
|
||||||
|
movd DWORD [edi+0*SIZEOF_DWORD], mmA
|
||||||
|
|
||||||
|
%endif ; RGB_PIXELSIZE ; ---------------
|
||||||
|
|
||||||
|
alignx 16,7
|
||||||
|
|
||||||
|
.nextrow:
|
||||||
|
pop ecx
|
||||||
|
pop esi
|
||||||
|
pop ebx
|
||||||
|
pop edx
|
||||||
|
pop edi
|
||||||
|
pop eax
|
||||||
|
|
||||||
|
add esi, byte SIZEOF_JSAMPROW
|
||||||
|
add ebx, byte SIZEOF_JSAMPROW
|
||||||
|
add edx, byte SIZEOF_JSAMPROW
|
||||||
|
add edi, byte SIZEOF_JSAMPROW ; output_buf
|
||||||
|
dec eax ; num_rows
|
||||||
|
jg near .rowloop
|
||||||
|
|
||||||
|
emms ; empty MMX state
|
||||||
|
|
||||||
|
.return:
|
||||||
|
pop edi
|
||||||
|
pop esi
|
||||||
|
; pop edx ; need not be preserved
|
||||||
|
; pop ecx ; need not be preserved
|
||||||
|
pop ebx
|
||||||
|
mov esp,ebp ; esp <- aligned ebp
|
||||||
|
pop esp ; esp <- original ebp
|
||||||
|
pop ebp
|
||||||
|
ret
|
||||||
|
|
||||||
500
simd/jdclrss2.asm
Normal file
500
simd/jdclrss2.asm
Normal file
@@ -0,0 +1,500 @@
|
|||||||
|
;
|
||||||
|
; jdclrss2.asm - colorspace conversion (SSE2)
|
||||||
|
;
|
||||||
|
; Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
|
||||||
|
;
|
||||||
|
; Based on
|
||||||
|
; x86 SIMD extension for IJG JPEG library
|
||||||
|
; Copyright (C) 1999-2006, MIYASAKA Masaru.
|
||||||
|
; For conditions of distribution and use, see copyright notice in jsimdext.inc
|
||||||
|
;
|
||||||
|
; This file should be assembled with NASM (Netwide Assembler),
|
||||||
|
; can *not* be assembled with Microsoft's MASM or any compatible
|
||||||
|
; assembler (including Borland's Turbo Assembler).
|
||||||
|
; NASM is available from http://nasm.sourceforge.net/ or
|
||||||
|
; http://sourceforge.net/project/showfiles.php?group_id=6208
|
||||||
|
;
|
||||||
|
; [TAB8]
|
||||||
|
|
||||||
|
; --------------------------------------------------------------------------
|
||||||
|
SECTION SEG_TEXT
|
||||||
|
BITS 32
|
||||||
|
;
|
||||||
|
; Convert some rows of samples to the output colorspace.
|
||||||
|
;
|
||||||
|
; GLOBAL(void)
|
||||||
|
; jsimd_ycc_rgb_convert_sse2 (JDIMENSION out_width,
|
||||||
|
; JSAMPIMAGE input_buf, JDIMENSION input_row,
|
||||||
|
; JSAMPARRAY output_buf, int num_rows)
|
||||||
|
;
|
||||||
|
|
||||||
|
%define out_width(b) (b)+8 ; JDIMENSION out_width
|
||||||
|
%define input_buf(b) (b)+12 ; JSAMPIMAGE input_buf
|
||||||
|
%define input_row(b) (b)+16 ; JDIMENSION input_row
|
||||||
|
%define output_buf(b) (b)+20 ; JSAMPARRAY output_buf
|
||||||
|
%define num_rows(b) (b)+24 ; int num_rows
|
||||||
|
|
||||||
|
%define original_ebp ebp+0
|
||||||
|
%define wk(i) ebp-(WK_NUM-(i))*SIZEOF_XMMWORD ; xmmword wk[WK_NUM]
|
||||||
|
%define WK_NUM 2
|
||||||
|
%define gotptr wk(0)-SIZEOF_POINTER ; void * gotptr
|
||||||
|
|
||||||
|
align 16
|
||||||
|
global EXTN(jsimd_ycc_rgb_convert_sse2)
|
||||||
|
|
||||||
|
EXTN(jsimd_ycc_rgb_convert_sse2):
|
||||||
|
push ebp
|
||||||
|
mov eax,esp ; eax = original ebp
|
||||||
|
sub esp, byte 4
|
||||||
|
and esp, byte (-SIZEOF_XMMWORD) ; align to 128 bits
|
||||||
|
mov [esp],eax
|
||||||
|
mov ebp,esp ; ebp = aligned ebp
|
||||||
|
lea esp, [wk(0)]
|
||||||
|
pushpic eax ; make a room for GOT address
|
||||||
|
push ebx
|
||||||
|
; push ecx ; need not be preserved
|
||||||
|
; push edx ; need not be preserved
|
||||||
|
push esi
|
||||||
|
push edi
|
||||||
|
|
||||||
|
get_GOT ebx ; get GOT address
|
||||||
|
movpic POINTER [gotptr], ebx ; save GOT address
|
||||||
|
|
||||||
|
mov ecx, JDIMENSION [out_width(eax)] ; num_cols
|
||||||
|
test ecx,ecx
|
||||||
|
jz near .return
|
||||||
|
|
||||||
|
push ecx
|
||||||
|
|
||||||
|
mov edi, JSAMPIMAGE [input_buf(eax)]
|
||||||
|
mov ecx, JDIMENSION [input_row(eax)]
|
||||||
|
mov esi, JSAMPARRAY [edi+0*SIZEOF_JSAMPARRAY]
|
||||||
|
mov ebx, JSAMPARRAY [edi+1*SIZEOF_JSAMPARRAY]
|
||||||
|
mov edx, JSAMPARRAY [edi+2*SIZEOF_JSAMPARRAY]
|
||||||
|
lea esi, [esi+ecx*SIZEOF_JSAMPROW]
|
||||||
|
lea ebx, [ebx+ecx*SIZEOF_JSAMPROW]
|
||||||
|
lea edx, [edx+ecx*SIZEOF_JSAMPROW]
|
||||||
|
|
||||||
|
pop ecx
|
||||||
|
|
||||||
|
mov edi, JSAMPARRAY [output_buf(eax)]
|
||||||
|
mov eax, INT [num_rows(eax)]
|
||||||
|
test eax,eax
|
||||||
|
jle near .return
|
||||||
|
alignx 16,7
|
||||||
|
.rowloop:
|
||||||
|
push eax
|
||||||
|
push edi
|
||||||
|
push edx
|
||||||
|
push ebx
|
||||||
|
push esi
|
||||||
|
push ecx ; col
|
||||||
|
|
||||||
|
mov esi, JSAMPROW [esi] ; inptr0
|
||||||
|
mov ebx, JSAMPROW [ebx] ; inptr1
|
||||||
|
mov edx, JSAMPROW [edx] ; inptr2
|
||||||
|
mov edi, JSAMPROW [edi] ; outptr
|
||||||
|
movpic eax, POINTER [gotptr] ; load GOT address (eax)
|
||||||
|
alignx 16,7
|
||||||
|
.columnloop:
|
||||||
|
|
||||||
|
movdqa xmm5, XMMWORD [ebx] ; xmm5=Cb(0123456789ABCDEF)
|
||||||
|
movdqa xmm1, XMMWORD [edx] ; xmm1=Cr(0123456789ABCDEF)
|
||||||
|
|
||||||
|
pcmpeqw xmm4,xmm4
|
||||||
|
pcmpeqw xmm7,xmm7
|
||||||
|
psrlw xmm4,BYTE_BIT
|
||||||
|
psllw xmm7,7 ; xmm7={0xFF80 0xFF80 0xFF80 0xFF80 ..}
|
||||||
|
movdqa xmm0,xmm4 ; xmm0=xmm4={0xFF 0x00 0xFF 0x00 ..}
|
||||||
|
|
||||||
|
pand xmm4,xmm5 ; xmm4=Cb(02468ACE)=CbE
|
||||||
|
psrlw xmm5,BYTE_BIT ; xmm5=Cb(13579BDF)=CbO
|
||||||
|
pand xmm0,xmm1 ; xmm0=Cr(02468ACE)=CrE
|
||||||
|
psrlw xmm1,BYTE_BIT ; xmm1=Cr(13579BDF)=CrO
|
||||||
|
|
||||||
|
paddw xmm4,xmm7
|
||||||
|
paddw xmm5,xmm7
|
||||||
|
paddw xmm0,xmm7
|
||||||
|
paddw xmm1,xmm7
|
||||||
|
|
||||||
|
; (Original)
|
||||||
|
; R = Y + 1.40200 * Cr
|
||||||
|
; G = Y - 0.34414 * Cb - 0.71414 * Cr
|
||||||
|
; B = Y + 1.77200 * Cb
|
||||||
|
;
|
||||||
|
; (This implementation)
|
||||||
|
; R = Y + 0.40200 * Cr + Cr
|
||||||
|
; G = Y - 0.34414 * Cb + 0.28586 * Cr - Cr
|
||||||
|
; B = Y - 0.22800 * Cb + Cb + Cb
|
||||||
|
|
||||||
|
movdqa xmm2,xmm4 ; xmm2=CbE
|
||||||
|
movdqa xmm3,xmm5 ; xmm3=CbO
|
||||||
|
paddw xmm4,xmm4 ; xmm4=2*CbE
|
||||||
|
paddw xmm5,xmm5 ; xmm5=2*CbO
|
||||||
|
movdqa xmm6,xmm0 ; xmm6=CrE
|
||||||
|
movdqa xmm7,xmm1 ; xmm7=CrO
|
||||||
|
paddw xmm0,xmm0 ; xmm0=2*CrE
|
||||||
|
paddw xmm1,xmm1 ; xmm1=2*CrO
|
||||||
|
|
||||||
|
pmulhw xmm4,[GOTOFF(eax,PW_MF0228)] ; xmm4=(2*CbE * -FIX(0.22800))
|
||||||
|
pmulhw xmm5,[GOTOFF(eax,PW_MF0228)] ; xmm5=(2*CbO * -FIX(0.22800))
|
||||||
|
pmulhw xmm0,[GOTOFF(eax,PW_F0402)] ; xmm0=(2*CrE * FIX(0.40200))
|
||||||
|
pmulhw xmm1,[GOTOFF(eax,PW_F0402)] ; xmm1=(2*CrO * FIX(0.40200))
|
||||||
|
|
||||||
|
paddw xmm4,[GOTOFF(eax,PW_ONE)]
|
||||||
|
paddw xmm5,[GOTOFF(eax,PW_ONE)]
|
||||||
|
psraw xmm4,1 ; xmm4=(CbE * -FIX(0.22800))
|
||||||
|
psraw xmm5,1 ; xmm5=(CbO * -FIX(0.22800))
|
||||||
|
paddw xmm0,[GOTOFF(eax,PW_ONE)]
|
||||||
|
paddw xmm1,[GOTOFF(eax,PW_ONE)]
|
||||||
|
psraw xmm0,1 ; xmm0=(CrE * FIX(0.40200))
|
||||||
|
psraw xmm1,1 ; xmm1=(CrO * FIX(0.40200))
|
||||||
|
|
||||||
|
paddw xmm4,xmm2
|
||||||
|
paddw xmm5,xmm3
|
||||||
|
paddw xmm4,xmm2 ; xmm4=(CbE * FIX(1.77200))=(B-Y)E
|
||||||
|
paddw xmm5,xmm3 ; xmm5=(CbO * FIX(1.77200))=(B-Y)O
|
||||||
|
paddw xmm0,xmm6 ; xmm0=(CrE * FIX(1.40200))=(R-Y)E
|
||||||
|
paddw xmm1,xmm7 ; xmm1=(CrO * FIX(1.40200))=(R-Y)O
|
||||||
|
|
||||||
|
movdqa XMMWORD [wk(0)], xmm4 ; wk(0)=(B-Y)E
|
||||||
|
movdqa XMMWORD [wk(1)], xmm5 ; wk(1)=(B-Y)O
|
||||||
|
|
||||||
|
movdqa xmm4,xmm2
|
||||||
|
movdqa xmm5,xmm3
|
||||||
|
punpcklwd xmm2,xmm6
|
||||||
|
punpckhwd xmm4,xmm6
|
||||||
|
pmaddwd xmm2,[GOTOFF(eax,PW_MF0344_F0285)]
|
||||||
|
pmaddwd xmm4,[GOTOFF(eax,PW_MF0344_F0285)]
|
||||||
|
punpcklwd xmm3,xmm7
|
||||||
|
punpckhwd xmm5,xmm7
|
||||||
|
pmaddwd xmm3,[GOTOFF(eax,PW_MF0344_F0285)]
|
||||||
|
pmaddwd xmm5,[GOTOFF(eax,PW_MF0344_F0285)]
|
||||||
|
|
||||||
|
paddd xmm2,[GOTOFF(eax,PD_ONEHALF)]
|
||||||
|
paddd xmm4,[GOTOFF(eax,PD_ONEHALF)]
|
||||||
|
psrad xmm2,SCALEBITS
|
||||||
|
psrad xmm4,SCALEBITS
|
||||||
|
paddd xmm3,[GOTOFF(eax,PD_ONEHALF)]
|
||||||
|
paddd xmm5,[GOTOFF(eax,PD_ONEHALF)]
|
||||||
|
psrad xmm3,SCALEBITS
|
||||||
|
psrad xmm5,SCALEBITS
|
||||||
|
|
||||||
|
packssdw xmm2,xmm4 ; xmm2=CbE*-FIX(0.344)+CrE*FIX(0.285)
|
||||||
|
packssdw xmm3,xmm5 ; xmm3=CbO*-FIX(0.344)+CrO*FIX(0.285)
|
||||||
|
psubw xmm2,xmm6 ; xmm2=CbE*-FIX(0.344)+CrE*-FIX(0.714)=(G-Y)E
|
||||||
|
psubw xmm3,xmm7 ; xmm3=CbO*-FIX(0.344)+CrO*-FIX(0.714)=(G-Y)O
|
||||||
|
|
||||||
|
movdqa xmm5, XMMWORD [esi] ; xmm5=Y(0123456789ABCDEF)
|
||||||
|
|
||||||
|
pcmpeqw xmm4,xmm4
|
||||||
|
psrlw xmm4,BYTE_BIT ; xmm4={0xFF 0x00 0xFF 0x00 ..}
|
||||||
|
pand xmm4,xmm5 ; xmm4=Y(02468ACE)=YE
|
||||||
|
psrlw xmm5,BYTE_BIT ; xmm5=Y(13579BDF)=YO
|
||||||
|
|
||||||
|
paddw xmm0,xmm4 ; xmm0=((R-Y)E+YE)=RE=R(02468ACE)
|
||||||
|
paddw xmm1,xmm5 ; xmm1=((R-Y)O+YO)=RO=R(13579BDF)
|
||||||
|
packuswb xmm0,xmm0 ; xmm0=R(02468ACE********)
|
||||||
|
packuswb xmm1,xmm1 ; xmm1=R(13579BDF********)
|
||||||
|
|
||||||
|
paddw xmm2,xmm4 ; xmm2=((G-Y)E+YE)=GE=G(02468ACE)
|
||||||
|
paddw xmm3,xmm5 ; xmm3=((G-Y)O+YO)=GO=G(13579BDF)
|
||||||
|
packuswb xmm2,xmm2 ; xmm2=G(02468ACE********)
|
||||||
|
packuswb xmm3,xmm3 ; xmm3=G(13579BDF********)
|
||||||
|
|
||||||
|
paddw xmm4, XMMWORD [wk(0)] ; xmm4=(YE+(B-Y)E)=BE=B(02468ACE)
|
||||||
|
paddw xmm5, XMMWORD [wk(1)] ; xmm5=(YO+(B-Y)O)=BO=B(13579BDF)
|
||||||
|
packuswb xmm4,xmm4 ; xmm4=B(02468ACE********)
|
||||||
|
packuswb xmm5,xmm5 ; xmm5=B(13579BDF********)
|
||||||
|
|
||||||
|
%if RGB_PIXELSIZE == 3 ; ---------------
|
||||||
|
|
||||||
|
; xmmA=(00 02 04 06 08 0A 0C 0E **), xmmB=(01 03 05 07 09 0B 0D 0F **)
|
||||||
|
; xmmC=(10 12 14 16 18 1A 1C 1E **), xmmD=(11 13 15 17 19 1B 1D 1F **)
|
||||||
|
; xmmE=(20 22 24 26 28 2A 2C 2E **), xmmF=(21 23 25 27 29 2B 2D 2F **)
|
||||||
|
; xmmG=(** ** ** ** ** ** ** ** **), xmmH=(** ** ** ** ** ** ** ** **)
|
||||||
|
|
||||||
|
punpcklbw xmmA,xmmC ; xmmA=(00 10 02 12 04 14 06 16 08 18 0A 1A 0C 1C 0E 1E)
|
||||||
|
punpcklbw xmmE,xmmB ; xmmE=(20 01 22 03 24 05 26 07 28 09 2A 0B 2C 0D 2E 0F)
|
||||||
|
punpcklbw xmmD,xmmF ; xmmD=(11 21 13 23 15 25 17 27 19 29 1B 2B 1D 2D 1F 2F)
|
||||||
|
|
||||||
|
movdqa xmmG,xmmA
|
||||||
|
movdqa xmmH,xmmA
|
||||||
|
punpcklwd xmmA,xmmE ; xmmA=(00 10 20 01 02 12 22 03 04 14 24 05 06 16 26 07)
|
||||||
|
punpckhwd xmmG,xmmE ; xmmG=(08 18 28 09 0A 1A 2A 0B 0C 1C 2C 0D 0E 1E 2E 0F)
|
||||||
|
|
||||||
|
psrldq xmmH,2 ; xmmH=(02 12 04 14 06 16 08 18 0A 1A 0C 1C 0E 1E -- --)
|
||||||
|
psrldq xmmE,2 ; xmmE=(22 03 24 05 26 07 28 09 2A 0B 2C 0D 2E 0F -- --)
|
||||||
|
|
||||||
|
movdqa xmmC,xmmD
|
||||||
|
movdqa xmmB,xmmD
|
||||||
|
punpcklwd xmmD,xmmH ; xmmD=(11 21 02 12 13 23 04 14 15 25 06 16 17 27 08 18)
|
||||||
|
punpckhwd xmmC,xmmH ; xmmC=(19 29 0A 1A 1B 2B 0C 1C 1D 2D 0E 1E 1F 2F -- --)
|
||||||
|
|
||||||
|
psrldq xmmB,2 ; xmmB=(13 23 15 25 17 27 19 29 1B 2B 1D 2D 1F 2F -- --)
|
||||||
|
|
||||||
|
movdqa xmmF,xmmE
|
||||||
|
punpcklwd xmmE,xmmB ; xmmE=(22 03 13 23 24 05 15 25 26 07 17 27 28 09 19 29)
|
||||||
|
punpckhwd xmmF,xmmB ; xmmF=(2A 0B 1B 2B 2C 0D 1D 2D 2E 0F 1F 2F -- -- -- --)
|
||||||
|
|
||||||
|
pshufd xmmH,xmmA,0x4E; xmmH=(04 14 24 05 06 16 26 07 00 10 20 01 02 12 22 03)
|
||||||
|
movdqa xmmB,xmmE
|
||||||
|
punpckldq xmmA,xmmD ; xmmA=(00 10 20 01 11 21 02 12 02 12 22 03 13 23 04 14)
|
||||||
|
punpckldq xmmE,xmmH ; xmmE=(22 03 13 23 04 14 24 05 24 05 15 25 06 16 26 07)
|
||||||
|
punpckhdq xmmD,xmmB ; xmmD=(15 25 06 16 26 07 17 27 17 27 08 18 28 09 19 29)
|
||||||
|
|
||||||
|
pshufd xmmH,xmmG,0x4E; xmmH=(0C 1C 2C 0D 0E 1E 2E 0F 08 18 28 09 0A 1A 2A 0B)
|
||||||
|
movdqa xmmB,xmmF
|
||||||
|
punpckldq xmmG,xmmC ; xmmG=(08 18 28 09 19 29 0A 1A 0A 1A 2A 0B 1B 2B 0C 1C)
|
||||||
|
punpckldq xmmF,xmmH ; xmmF=(2A 0B 1B 2B 0C 1C 2C 0D 2C 0D 1D 2D 0E 1E 2E 0F)
|
||||||
|
punpckhdq xmmC,xmmB ; xmmC=(1D 2D 0E 1E 2E 0F 1F 2F 1F 2F -- -- -- -- -- --)
|
||||||
|
|
||||||
|
punpcklqdq xmmA,xmmE ; xmmA=(00 10 20 01 11 21 02 12 22 03 13 23 04 14 24 05)
|
||||||
|
punpcklqdq xmmD,xmmG ; xmmD=(15 25 06 16 26 07 17 27 08 18 28 09 19 29 0A 1A)
|
||||||
|
punpcklqdq xmmF,xmmC ; xmmF=(2A 0B 1B 2B 0C 1C 2C 0D 1D 2D 0E 1E 2E 0F 1F 2F)
|
||||||
|
|
||||||
|
cmp ecx, byte SIZEOF_XMMWORD
|
||||||
|
jb short .column_st32
|
||||||
|
|
||||||
|
test edi, SIZEOF_XMMWORD-1
|
||||||
|
jnz short .out1
|
||||||
|
; --(aligned)-------------------
|
||||||
|
movntdq XMMWORD [edi+0*SIZEOF_XMMWORD], xmmA
|
||||||
|
movntdq XMMWORD [edi+1*SIZEOF_XMMWORD], xmmD
|
||||||
|
movntdq XMMWORD [edi+2*SIZEOF_XMMWORD], xmmF
|
||||||
|
add edi, byte RGB_PIXELSIZE*SIZEOF_XMMWORD ; outptr
|
||||||
|
jmp short .out0
|
||||||
|
.out1: ; --(unaligned)-----------------
|
||||||
|
pcmpeqb xmmH,xmmH ; xmmH=(all 1's)
|
||||||
|
maskmovdqu xmmA,xmmH ; movntdqu XMMWORD [edi], xmmA
|
||||||
|
add edi, byte SIZEOF_XMMWORD ; outptr
|
||||||
|
maskmovdqu xmmD,xmmH ; movntdqu XMMWORD [edi], xmmD
|
||||||
|
add edi, byte SIZEOF_XMMWORD ; outptr
|
||||||
|
maskmovdqu xmmF,xmmH ; movntdqu XMMWORD [edi], xmmF
|
||||||
|
add edi, byte SIZEOF_XMMWORD ; outptr
|
||||||
|
.out0:
|
||||||
|
sub ecx, byte SIZEOF_XMMWORD
|
||||||
|
jz near .nextrow
|
||||||
|
|
||||||
|
add esi, byte SIZEOF_XMMWORD ; inptr0
|
||||||
|
add ebx, byte SIZEOF_XMMWORD ; inptr1
|
||||||
|
add edx, byte SIZEOF_XMMWORD ; inptr2
|
||||||
|
jmp near .columnloop
|
||||||
|
alignx 16,7
|
||||||
|
|
||||||
|
.column_st32:
|
||||||
|
pcmpeqb xmmH,xmmH ; xmmH=(all 1's)
|
||||||
|
lea ecx, [ecx+ecx*2] ; imul ecx, RGB_PIXELSIZE
|
||||||
|
cmp ecx, byte 2*SIZEOF_XMMWORD
|
||||||
|
jb short .column_st16
|
||||||
|
maskmovdqu xmmA,xmmH ; movntdqu XMMWORD [edi], xmmA
|
||||||
|
add edi, byte SIZEOF_XMMWORD ; outptr
|
||||||
|
maskmovdqu xmmD,xmmH ; movntdqu XMMWORD [edi], xmmD
|
||||||
|
add edi, byte SIZEOF_XMMWORD ; outptr
|
||||||
|
movdqa xmmA,xmmF
|
||||||
|
sub ecx, byte 2*SIZEOF_XMMWORD
|
||||||
|
jmp short .column_st15
|
||||||
|
.column_st16:
|
||||||
|
cmp ecx, byte SIZEOF_XMMWORD
|
||||||
|
jb short .column_st15
|
||||||
|
maskmovdqu xmmA,xmmH ; movntdqu XMMWORD [edi], xmmA
|
||||||
|
add edi, byte SIZEOF_XMMWORD ; outptr
|
||||||
|
movdqa xmmA,xmmD
|
||||||
|
sub ecx, byte SIZEOF_XMMWORD
|
||||||
|
.column_st15:
|
||||||
|
mov eax,ecx
|
||||||
|
xor ecx, byte 0x0F
|
||||||
|
shl ecx, 2
|
||||||
|
movd xmmB,ecx
|
||||||
|
psrlq xmmH,4
|
||||||
|
pcmpeqb xmmE,xmmE
|
||||||
|
psrlq xmmH,xmmB
|
||||||
|
psrlq xmmE,xmmB
|
||||||
|
punpcklbw xmmE,xmmH
|
||||||
|
; ----------------
|
||||||
|
mov ecx,edi
|
||||||
|
and ecx, byte SIZEOF_XMMWORD-1
|
||||||
|
jz short .adj0
|
||||||
|
add eax,ecx
|
||||||
|
cmp eax, byte SIZEOF_XMMWORD
|
||||||
|
ja short .adj0
|
||||||
|
and edi, byte (-SIZEOF_XMMWORD) ; align to 16-byte boundary
|
||||||
|
shl ecx, 3 ; pslldq xmmA,ecx & pslldq xmmE,ecx
|
||||||
|
movdqa xmmG,xmmA
|
||||||
|
movdqa xmmC,xmmE
|
||||||
|
pslldq xmmA, SIZEOF_XMMWORD/2
|
||||||
|
pslldq xmmE, SIZEOF_XMMWORD/2
|
||||||
|
movd xmmD,ecx
|
||||||
|
sub ecx, byte (SIZEOF_XMMWORD/2)*BYTE_BIT
|
||||||
|
jb short .adj1
|
||||||
|
movd xmmF,ecx
|
||||||
|
psllq xmmA,xmmF
|
||||||
|
psllq xmmE,xmmF
|
||||||
|
jmp short .adj0
|
||||||
|
.adj1: neg ecx
|
||||||
|
movd xmmF,ecx
|
||||||
|
psrlq xmmA,xmmF
|
||||||
|
psrlq xmmE,xmmF
|
||||||
|
psllq xmmG,xmmD
|
||||||
|
psllq xmmC,xmmD
|
||||||
|
por xmmA,xmmG
|
||||||
|
por xmmE,xmmC
|
||||||
|
.adj0: ; ----------------
|
||||||
|
maskmovdqu xmmA,xmmE ; movntdqu XMMWORD [edi], xmmA
|
||||||
|
|
||||||
|
%else ; RGB_PIXELSIZE == 4 ; -----------
|
||||||
|
|
||||||
|
%ifdef RGBX_FILLER_0XFF
|
||||||
|
pcmpeqb xmm6,xmm6 ; xmm6=XE=X(02468ACE********)
|
||||||
|
pcmpeqb xmm7,xmm7 ; xmm7=XO=X(13579BDF********)
|
||||||
|
%else
|
||||||
|
pxor xmm6,xmm6 ; xmm6=XE=X(02468ACE********)
|
||||||
|
pxor xmm7,xmm7 ; xmm7=XO=X(13579BDF********)
|
||||||
|
%endif
|
||||||
|
; xmmA=(00 02 04 06 08 0A 0C 0E **), xmmB=(01 03 05 07 09 0B 0D 0F **)
|
||||||
|
; xmmC=(10 12 14 16 18 1A 1C 1E **), xmmD=(11 13 15 17 19 1B 1D 1F **)
|
||||||
|
; xmmE=(20 22 24 26 28 2A 2C 2E **), xmmF=(21 23 25 27 29 2B 2D 2F **)
|
||||||
|
; xmmG=(30 32 34 36 38 3A 3C 3E **), xmmH=(31 33 35 37 39 3B 3D 3F **)
|
||||||
|
|
||||||
|
punpcklbw xmmA,xmmC ; xmmA=(00 10 02 12 04 14 06 16 08 18 0A 1A 0C 1C 0E 1E)
|
||||||
|
punpcklbw xmmE,xmmG ; xmmE=(20 30 22 32 24 34 26 36 28 38 2A 3A 2C 3C 2E 3E)
|
||||||
|
punpcklbw xmmB,xmmD ; xmmB=(01 11 03 13 05 15 07 17 09 19 0B 1B 0D 1D 0F 1F)
|
||||||
|
punpcklbw xmmF,xmmH ; xmmF=(21 31 23 33 25 35 27 37 29 39 2B 3B 2D 3D 2F 3F)
|
||||||
|
|
||||||
|
movdqa xmmC,xmmA
|
||||||
|
punpcklwd xmmA,xmmE ; xmmA=(00 10 20 30 02 12 22 32 04 14 24 34 06 16 26 36)
|
||||||
|
punpckhwd xmmC,xmmE ; xmmC=(08 18 28 38 0A 1A 2A 3A 0C 1C 2C 3C 0E 1E 2E 3E)
|
||||||
|
movdqa xmmG,xmmB
|
||||||
|
punpcklwd xmmB,xmmF ; xmmB=(01 11 21 31 03 13 23 33 05 15 25 35 07 17 27 37)
|
||||||
|
punpckhwd xmmG,xmmF ; xmmG=(09 19 29 39 0B 1B 2B 3B 0D 1D 2D 3D 0F 1F 2F 3F)
|
||||||
|
|
||||||
|
movdqa xmmD,xmmA
|
||||||
|
punpckldq xmmA,xmmB ; xmmA=(00 10 20 30 01 11 21 31 02 12 22 32 03 13 23 33)
|
||||||
|
punpckhdq xmmD,xmmB ; xmmD=(04 14 24 34 05 15 25 35 06 16 26 36 07 17 27 37)
|
||||||
|
movdqa xmmH,xmmC
|
||||||
|
punpckldq xmmC,xmmG ; xmmC=(08 18 28 38 09 19 29 39 0A 1A 2A 3A 0B 1B 2B 3B)
|
||||||
|
punpckhdq xmmH,xmmG ; xmmH=(0C 1C 2C 3C 0D 1D 2D 3D 0E 1E 2E 3E 0F 1F 2F 3F)
|
||||||
|
|
||||||
|
cmp ecx, byte SIZEOF_XMMWORD
|
||||||
|
jb short .column_st32
|
||||||
|
|
||||||
|
test edi, SIZEOF_XMMWORD-1
|
||||||
|
jnz short .out1
|
||||||
|
; --(aligned)-------------------
|
||||||
|
movntdq XMMWORD [edi+0*SIZEOF_XMMWORD], xmmA
|
||||||
|
movntdq XMMWORD [edi+1*SIZEOF_XMMWORD], xmmD
|
||||||
|
movntdq XMMWORD [edi+2*SIZEOF_XMMWORD], xmmC
|
||||||
|
movntdq XMMWORD [edi+3*SIZEOF_XMMWORD], xmmH
|
||||||
|
add edi, byte RGB_PIXELSIZE*SIZEOF_XMMWORD ; outptr
|
||||||
|
jmp short .out0
|
||||||
|
.out1: ; --(unaligned)-----------------
|
||||||
|
pcmpeqb xmmE,xmmE ; xmmE=(all 1's)
|
||||||
|
maskmovdqu xmmA,xmmE ; movntdqu XMMWORD [edi], xmmA
|
||||||
|
add edi, byte SIZEOF_XMMWORD ; outptr
|
||||||
|
maskmovdqu xmmD,xmmE ; movntdqu XMMWORD [edi], xmmD
|
||||||
|
add edi, byte SIZEOF_XMMWORD ; outptr
|
||||||
|
maskmovdqu xmmC,xmmE ; movntdqu XMMWORD [edi], xmmC
|
||||||
|
add edi, byte SIZEOF_XMMWORD ; outptr
|
||||||
|
maskmovdqu xmmH,xmmE ; movntdqu XMMWORD [edi], xmmH
|
||||||
|
add edi, byte SIZEOF_XMMWORD ; outptr
|
||||||
|
.out0:
|
||||||
|
sub ecx, byte SIZEOF_XMMWORD
|
||||||
|
jz near .nextrow
|
||||||
|
|
||||||
|
add esi, byte SIZEOF_XMMWORD ; inptr0
|
||||||
|
add ebx, byte SIZEOF_XMMWORD ; inptr1
|
||||||
|
add edx, byte SIZEOF_XMMWORD ; inptr2
|
||||||
|
jmp near .columnloop
|
||||||
|
alignx 16,7
|
||||||
|
|
||||||
|
.column_st32:
|
||||||
|
pcmpeqb xmmE,xmmE ; xmmE=(all 1's)
|
||||||
|
cmp ecx, byte SIZEOF_XMMWORD/2
|
||||||
|
jb short .column_st16
|
||||||
|
maskmovdqu xmmA,xmmE ; movntdqu XMMWORD [edi], xmmA
|
||||||
|
add edi, byte SIZEOF_XMMWORD ; outptr
|
||||||
|
maskmovdqu xmmD,xmmE ; movntdqu XMMWORD [edi], xmmD
|
||||||
|
add edi, byte SIZEOF_XMMWORD ; outptr
|
||||||
|
movdqa xmmA,xmmC
|
||||||
|
movdqa xmmD,xmmH
|
||||||
|
sub ecx, byte SIZEOF_XMMWORD/2
|
||||||
|
.column_st16:
|
||||||
|
cmp ecx, byte SIZEOF_XMMWORD/4
|
||||||
|
jb short .column_st15
|
||||||
|
maskmovdqu xmmA,xmmE ; movntdqu XMMWORD [edi], xmmA
|
||||||
|
add edi, byte SIZEOF_XMMWORD ; outptr
|
||||||
|
movdqa xmmA,xmmD
|
||||||
|
sub ecx, byte SIZEOF_XMMWORD/4
|
||||||
|
.column_st15:
|
||||||
|
cmp ecx, byte SIZEOF_XMMWORD/16
|
||||||
|
jb short .nextrow
|
||||||
|
mov eax,ecx
|
||||||
|
xor ecx, byte 0x03
|
||||||
|
inc ecx
|
||||||
|
shl ecx, 4
|
||||||
|
movd xmmF,ecx
|
||||||
|
psrlq xmmE,xmmF
|
||||||
|
punpcklbw xmmE,xmmE
|
||||||
|
; ----------------
|
||||||
|
mov ecx,edi
|
||||||
|
and ecx, byte SIZEOF_XMMWORD-1
|
||||||
|
jz short .adj0
|
||||||
|
lea eax, [ecx+eax*4] ; RGB_PIXELSIZE
|
||||||
|
cmp eax, byte SIZEOF_XMMWORD
|
||||||
|
ja short .adj0
|
||||||
|
and edi, byte (-SIZEOF_XMMWORD) ; align to 16-byte boundary
|
||||||
|
shl ecx, 3 ; pslldq xmmA,ecx & pslldq xmmE,ecx
|
||||||
|
movdqa xmmB,xmmA
|
||||||
|
movdqa xmmG,xmmE
|
||||||
|
pslldq xmmA, SIZEOF_XMMWORD/2
|
||||||
|
pslldq xmmE, SIZEOF_XMMWORD/2
|
||||||
|
movd xmmC,ecx
|
||||||
|
sub ecx, byte (SIZEOF_XMMWORD/2)*BYTE_BIT
|
||||||
|
jb short .adj1
|
||||||
|
movd xmmH,ecx
|
||||||
|
psllq xmmA,xmmH
|
||||||
|
psllq xmmE,xmmH
|
||||||
|
jmp short .adj0
|
||||||
|
.adj1: neg ecx
|
||||||
|
movd xmmH,ecx
|
||||||
|
psrlq xmmA,xmmH
|
||||||
|
psrlq xmmE,xmmH
|
||||||
|
psllq xmmB,xmmC
|
||||||
|
psllq xmmG,xmmC
|
||||||
|
por xmmA,xmmB
|
||||||
|
por xmmE,xmmG
|
||||||
|
.adj0: ; ----------------
|
||||||
|
maskmovdqu xmmA,xmmE ; movntdqu XMMWORD [edi], xmmA
|
||||||
|
|
||||||
|
%endif ; RGB_PIXELSIZE ; ---------------
|
||||||
|
|
||||||
|
alignx 16,7
|
||||||
|
|
||||||
|
.nextrow:
|
||||||
|
pop ecx
|
||||||
|
pop esi
|
||||||
|
pop ebx
|
||||||
|
pop edx
|
||||||
|
pop edi
|
||||||
|
pop eax
|
||||||
|
|
||||||
|
add esi, byte SIZEOF_JSAMPROW
|
||||||
|
add ebx, byte SIZEOF_JSAMPROW
|
||||||
|
add edx, byte SIZEOF_JSAMPROW
|
||||||
|
add edi, byte SIZEOF_JSAMPROW ; output_buf
|
||||||
|
dec eax ; num_rows
|
||||||
|
jg near .rowloop
|
||||||
|
|
||||||
|
sfence ; flush the write buffer
|
||||||
|
|
||||||
|
.return:
|
||||||
|
pop edi
|
||||||
|
pop esi
|
||||||
|
; pop edx ; need not be preserved
|
||||||
|
; pop ecx ; need not be preserved
|
||||||
|
pop ebx
|
||||||
|
mov esp,ebp ; esp <- aligned ebp
|
||||||
|
pop esp ; esp <- original ebp
|
||||||
|
pop ebp
|
||||||
|
ret
|
||||||
|
|
||||||
@@ -2,6 +2,7 @@
|
|||||||
; jdcolmmx.asm - colorspace conversion (MMX)
|
; jdcolmmx.asm - colorspace conversion (MMX)
|
||||||
;
|
;
|
||||||
; Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
|
; Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
|
||||||
|
; Copyright 2009 D. R. Commander
|
||||||
;
|
;
|
||||||
; Based on
|
; Based on
|
||||||
; x86 SIMD extension for IJG JPEG library
|
; x86 SIMD extension for IJG JPEG library
|
||||||
@@ -48,386 +49,70 @@ PD_ONEHALF times 2 dd 1 << (SCALEBITS-1)
|
|||||||
alignz 16
|
alignz 16
|
||||||
|
|
||||||
; --------------------------------------------------------------------------
|
; --------------------------------------------------------------------------
|
||||||
SECTION SEG_TEXT
|
%include "jdclrmmx.asm"
|
||||||
BITS 32
|
|
||||||
;
|
|
||||||
; Convert some rows of samples to the output colorspace.
|
|
||||||
;
|
|
||||||
; GLOBAL(void)
|
|
||||||
; jsimd_ycc_rgb_convert_mmx (JDIMENSION out_width,
|
|
||||||
; JSAMPIMAGE input_buf, JDIMENSION input_row,
|
|
||||||
; JSAMPARRAY output_buf, int num_rows)
|
|
||||||
;
|
|
||||||
|
|
||||||
%define out_width(b) (b)+8 ; JDIMENSION out_width
|
%undef RGB_RED
|
||||||
%define input_buf(b) (b)+12 ; JSAMPIMAGE input_buf
|
%undef RGB_GREEN
|
||||||
%define input_row(b) (b)+16 ; JDIMENSION input_row
|
%undef RGB_BLUE
|
||||||
%define output_buf(b) (b)+20 ; JSAMPARRAY output_buf
|
%undef RGB_PIXELSIZE
|
||||||
%define num_rows(b) (b)+24 ; int num_rows
|
%define RGB_RED 0
|
||||||
|
%define RGB_GREEN 1
|
||||||
|
%define RGB_BLUE 2
|
||||||
|
%define RGB_PIXELSIZE 3
|
||||||
|
%define jsimd_ycc_rgb_convert_mmx jsimd_ycc_extrgb_convert_mmx
|
||||||
|
%include "jdclrmmx.asm"
|
||||||
|
|
||||||
%define original_ebp ebp+0
|
%undef RGB_RED
|
||||||
%define wk(i) ebp-(WK_NUM-(i))*SIZEOF_MMWORD ; mmword wk[WK_NUM]
|
%undef RGB_GREEN
|
||||||
%define WK_NUM 2
|
%undef RGB_BLUE
|
||||||
%define gotptr wk(0)-SIZEOF_POINTER ; void * gotptr
|
%undef RGB_PIXELSIZE
|
||||||
|
%define RGB_RED 0
|
||||||
|
%define RGB_GREEN 1
|
||||||
|
%define RGB_BLUE 2
|
||||||
|
%define RGB_PIXELSIZE 4
|
||||||
|
%define jsimd_ycc_rgb_convert_mmx jsimd_ycc_extrgbx_convert_mmx
|
||||||
|
%include "jdclrmmx.asm"
|
||||||
|
|
||||||
align 16
|
%undef RGB_RED
|
||||||
global EXTN(jsimd_ycc_rgb_convert_mmx)
|
%undef RGB_GREEN
|
||||||
|
%undef RGB_BLUE
|
||||||
|
%undef RGB_PIXELSIZE
|
||||||
|
%define RGB_RED 2
|
||||||
|
%define RGB_GREEN 1
|
||||||
|
%define RGB_BLUE 0
|
||||||
|
%define RGB_PIXELSIZE 3
|
||||||
|
%define jsimd_ycc_rgb_convert_mmx jsimd_ycc_extbgr_convert_mmx
|
||||||
|
%include "jdclrmmx.asm"
|
||||||
|
|
||||||
EXTN(jsimd_ycc_rgb_convert_mmx):
|
%undef RGB_RED
|
||||||
push ebp
|
%undef RGB_GREEN
|
||||||
mov eax,esp ; eax = original ebp
|
%undef RGB_BLUE
|
||||||
sub esp, byte 4
|
%undef RGB_PIXELSIZE
|
||||||
and esp, byte (-SIZEOF_MMWORD) ; align to 64 bits
|
%define RGB_RED 2
|
||||||
mov [esp],eax
|
%define RGB_GREEN 1
|
||||||
mov ebp,esp ; ebp = aligned ebp
|
%define RGB_BLUE 0
|
||||||
lea esp, [wk(0)]
|
%define RGB_PIXELSIZE 4
|
||||||
pushpic eax ; make a room for GOT address
|
%define jsimd_ycc_rgb_convert_mmx jsimd_ycc_extbgrx_convert_mmx
|
||||||
push ebx
|
%include "jdclrmmx.asm"
|
||||||
; push ecx ; need not be preserved
|
|
||||||
; push edx ; need not be preserved
|
|
||||||
push esi
|
|
||||||
push edi
|
|
||||||
|
|
||||||
get_GOT ebx ; get GOT address
|
%undef RGB_RED
|
||||||
movpic POINTER [gotptr], ebx ; save GOT address
|
%undef RGB_GREEN
|
||||||
|
%undef RGB_BLUE
|
||||||
mov ecx, JDIMENSION [out_width(eax)] ; num_cols
|
%undef RGB_PIXELSIZE
|
||||||
test ecx,ecx
|
%define RGB_RED 3
|
||||||
jz near .return
|
%define RGB_GREEN 2
|
||||||
|
%define RGB_BLUE 1
|
||||||
push ecx
|
%define RGB_PIXELSIZE 4
|
||||||
|
%define jsimd_ycc_rgb_convert_mmx jsimd_ycc_extxbgr_convert_mmx
|
||||||
mov edi, JSAMPIMAGE [input_buf(eax)]
|
%include "jdclrmmx.asm"
|
||||||
mov ecx, JDIMENSION [input_row(eax)]
|
|
||||||
mov esi, JSAMPARRAY [edi+0*SIZEOF_JSAMPARRAY]
|
|
||||||
mov ebx, JSAMPARRAY [edi+1*SIZEOF_JSAMPARRAY]
|
|
||||||
mov edx, JSAMPARRAY [edi+2*SIZEOF_JSAMPARRAY]
|
|
||||||
lea esi, [esi+ecx*SIZEOF_JSAMPROW]
|
|
||||||
lea ebx, [ebx+ecx*SIZEOF_JSAMPROW]
|
|
||||||
lea edx, [edx+ecx*SIZEOF_JSAMPROW]
|
|
||||||
|
|
||||||
pop ecx
|
|
||||||
|
|
||||||
mov edi, JSAMPARRAY [output_buf(eax)]
|
|
||||||
mov eax, INT [num_rows(eax)]
|
|
||||||
test eax,eax
|
|
||||||
jle near .return
|
|
||||||
alignx 16,7
|
|
||||||
.rowloop:
|
|
||||||
push eax
|
|
||||||
push edi
|
|
||||||
push edx
|
|
||||||
push ebx
|
|
||||||
push esi
|
|
||||||
push ecx ; col
|
|
||||||
|
|
||||||
mov esi, JSAMPROW [esi] ; inptr0
|
|
||||||
mov ebx, JSAMPROW [ebx] ; inptr1
|
|
||||||
mov edx, JSAMPROW [edx] ; inptr2
|
|
||||||
mov edi, JSAMPROW [edi] ; outptr
|
|
||||||
movpic eax, POINTER [gotptr] ; load GOT address (eax)
|
|
||||||
alignx 16,7
|
|
||||||
.columnloop:
|
|
||||||
|
|
||||||
movq mm5, MMWORD [ebx] ; mm5=Cb(01234567)
|
|
||||||
movq mm1, MMWORD [edx] ; mm1=Cr(01234567)
|
|
||||||
|
|
||||||
pcmpeqw mm4,mm4
|
|
||||||
pcmpeqw mm7,mm7
|
|
||||||
psrlw mm4,BYTE_BIT
|
|
||||||
psllw mm7,7 ; mm7={0xFF80 0xFF80 0xFF80 0xFF80}
|
|
||||||
movq mm0,mm4 ; mm0=mm4={0xFF 0x00 0xFF 0x00 ..}
|
|
||||||
|
|
||||||
pand mm4,mm5 ; mm4=Cb(0246)=CbE
|
|
||||||
psrlw mm5,BYTE_BIT ; mm5=Cb(1357)=CbO
|
|
||||||
pand mm0,mm1 ; mm0=Cr(0246)=CrE
|
|
||||||
psrlw mm1,BYTE_BIT ; mm1=Cr(1357)=CrO
|
|
||||||
|
|
||||||
paddw mm4,mm7
|
|
||||||
paddw mm5,mm7
|
|
||||||
paddw mm0,mm7
|
|
||||||
paddw mm1,mm7
|
|
||||||
|
|
||||||
; (Original)
|
|
||||||
; R = Y + 1.40200 * Cr
|
|
||||||
; G = Y - 0.34414 * Cb - 0.71414 * Cr
|
|
||||||
; B = Y + 1.77200 * Cb
|
|
||||||
;
|
|
||||||
; (This implementation)
|
|
||||||
; R = Y + 0.40200 * Cr + Cr
|
|
||||||
; G = Y - 0.34414 * Cb + 0.28586 * Cr - Cr
|
|
||||||
; B = Y - 0.22800 * Cb + Cb + Cb
|
|
||||||
|
|
||||||
movq mm2,mm4 ; mm2=CbE
|
|
||||||
movq mm3,mm5 ; mm3=CbO
|
|
||||||
paddw mm4,mm4 ; mm4=2*CbE
|
|
||||||
paddw mm5,mm5 ; mm5=2*CbO
|
|
||||||
movq mm6,mm0 ; mm6=CrE
|
|
||||||
movq mm7,mm1 ; mm7=CrO
|
|
||||||
paddw mm0,mm0 ; mm0=2*CrE
|
|
||||||
paddw mm1,mm1 ; mm1=2*CrO
|
|
||||||
|
|
||||||
pmulhw mm4,[GOTOFF(eax,PW_MF0228)] ; mm4=(2*CbE * -FIX(0.22800))
|
|
||||||
pmulhw mm5,[GOTOFF(eax,PW_MF0228)] ; mm5=(2*CbO * -FIX(0.22800))
|
|
||||||
pmulhw mm0,[GOTOFF(eax,PW_F0402)] ; mm0=(2*CrE * FIX(0.40200))
|
|
||||||
pmulhw mm1,[GOTOFF(eax,PW_F0402)] ; mm1=(2*CrO * FIX(0.40200))
|
|
||||||
|
|
||||||
paddw mm4,[GOTOFF(eax,PW_ONE)]
|
|
||||||
paddw mm5,[GOTOFF(eax,PW_ONE)]
|
|
||||||
psraw mm4,1 ; mm4=(CbE * -FIX(0.22800))
|
|
||||||
psraw mm5,1 ; mm5=(CbO * -FIX(0.22800))
|
|
||||||
paddw mm0,[GOTOFF(eax,PW_ONE)]
|
|
||||||
paddw mm1,[GOTOFF(eax,PW_ONE)]
|
|
||||||
psraw mm0,1 ; mm0=(CrE * FIX(0.40200))
|
|
||||||
psraw mm1,1 ; mm1=(CrO * FIX(0.40200))
|
|
||||||
|
|
||||||
paddw mm4,mm2
|
|
||||||
paddw mm5,mm3
|
|
||||||
paddw mm4,mm2 ; mm4=(CbE * FIX(1.77200))=(B-Y)E
|
|
||||||
paddw mm5,mm3 ; mm5=(CbO * FIX(1.77200))=(B-Y)O
|
|
||||||
paddw mm0,mm6 ; mm0=(CrE * FIX(1.40200))=(R-Y)E
|
|
||||||
paddw mm1,mm7 ; mm1=(CrO * FIX(1.40200))=(R-Y)O
|
|
||||||
|
|
||||||
movq MMWORD [wk(0)], mm4 ; wk(0)=(B-Y)E
|
|
||||||
movq MMWORD [wk(1)], mm5 ; wk(1)=(B-Y)O
|
|
||||||
|
|
||||||
movq mm4,mm2
|
|
||||||
movq mm5,mm3
|
|
||||||
punpcklwd mm2,mm6
|
|
||||||
punpckhwd mm4,mm6
|
|
||||||
pmaddwd mm2,[GOTOFF(eax,PW_MF0344_F0285)]
|
|
||||||
pmaddwd mm4,[GOTOFF(eax,PW_MF0344_F0285)]
|
|
||||||
punpcklwd mm3,mm7
|
|
||||||
punpckhwd mm5,mm7
|
|
||||||
pmaddwd mm3,[GOTOFF(eax,PW_MF0344_F0285)]
|
|
||||||
pmaddwd mm5,[GOTOFF(eax,PW_MF0344_F0285)]
|
|
||||||
|
|
||||||
paddd mm2,[GOTOFF(eax,PD_ONEHALF)]
|
|
||||||
paddd mm4,[GOTOFF(eax,PD_ONEHALF)]
|
|
||||||
psrad mm2,SCALEBITS
|
|
||||||
psrad mm4,SCALEBITS
|
|
||||||
paddd mm3,[GOTOFF(eax,PD_ONEHALF)]
|
|
||||||
paddd mm5,[GOTOFF(eax,PD_ONEHALF)]
|
|
||||||
psrad mm3,SCALEBITS
|
|
||||||
psrad mm5,SCALEBITS
|
|
||||||
|
|
||||||
packssdw mm2,mm4 ; mm2=CbE*-FIX(0.344)+CrE*FIX(0.285)
|
|
||||||
packssdw mm3,mm5 ; mm3=CbO*-FIX(0.344)+CrO*FIX(0.285)
|
|
||||||
psubw mm2,mm6 ; mm2=CbE*-FIX(0.344)+CrE*-FIX(0.714)=(G-Y)E
|
|
||||||
psubw mm3,mm7 ; mm3=CbO*-FIX(0.344)+CrO*-FIX(0.714)=(G-Y)O
|
|
||||||
|
|
||||||
movq mm5, MMWORD [esi] ; mm5=Y(01234567)
|
|
||||||
|
|
||||||
pcmpeqw mm4,mm4
|
|
||||||
psrlw mm4,BYTE_BIT ; mm4={0xFF 0x00 0xFF 0x00 ..}
|
|
||||||
pand mm4,mm5 ; mm4=Y(0246)=YE
|
|
||||||
psrlw mm5,BYTE_BIT ; mm5=Y(1357)=YO
|
|
||||||
|
|
||||||
paddw mm0,mm4 ; mm0=((R-Y)E+YE)=RE=(R0 R2 R4 R6)
|
|
||||||
paddw mm1,mm5 ; mm1=((R-Y)O+YO)=RO=(R1 R3 R5 R7)
|
|
||||||
packuswb mm0,mm0 ; mm0=(R0 R2 R4 R6 ** ** ** **)
|
|
||||||
packuswb mm1,mm1 ; mm1=(R1 R3 R5 R7 ** ** ** **)
|
|
||||||
|
|
||||||
paddw mm2,mm4 ; mm2=((G-Y)E+YE)=GE=(G0 G2 G4 G6)
|
|
||||||
paddw mm3,mm5 ; mm3=((G-Y)O+YO)=GO=(G1 G3 G5 G7)
|
|
||||||
packuswb mm2,mm2 ; mm2=(G0 G2 G4 G6 ** ** ** **)
|
|
||||||
packuswb mm3,mm3 ; mm3=(G1 G3 G5 G7 ** ** ** **)
|
|
||||||
|
|
||||||
paddw mm4, MMWORD [wk(0)] ; mm4=(YE+(B-Y)E)=BE=(B0 B2 B4 B6)
|
|
||||||
paddw mm5, MMWORD [wk(1)] ; mm5=(YO+(B-Y)O)=BO=(B1 B3 B5 B7)
|
|
||||||
packuswb mm4,mm4 ; mm4=(B0 B2 B4 B6 ** ** ** **)
|
|
||||||
packuswb mm5,mm5 ; mm5=(B1 B3 B5 B7 ** ** ** **)
|
|
||||||
|
|
||||||
%if RGB_PIXELSIZE == 3 ; ---------------
|
|
||||||
|
|
||||||
; mmA=(00 02 04 06 ** ** ** **), mmB=(01 03 05 07 ** ** ** **)
|
|
||||||
; mmC=(10 12 14 16 ** ** ** **), mmD=(11 13 15 17 ** ** ** **)
|
|
||||||
; mmE=(20 22 24 26 ** ** ** **), mmF=(21 23 25 27 ** ** ** **)
|
|
||||||
; mmG=(** ** ** ** ** ** ** **), mmH=(** ** ** ** ** ** ** **)
|
|
||||||
|
|
||||||
punpcklbw mmA,mmC ; mmA=(00 10 02 12 04 14 06 16)
|
|
||||||
punpcklbw mmE,mmB ; mmE=(20 01 22 03 24 05 26 07)
|
|
||||||
punpcklbw mmD,mmF ; mmD=(11 21 13 23 15 25 17 27)
|
|
||||||
|
|
||||||
movq mmG,mmA
|
|
||||||
movq mmH,mmA
|
|
||||||
punpcklwd mmA,mmE ; mmA=(00 10 20 01 02 12 22 03)
|
|
||||||
punpckhwd mmG,mmE ; mmG=(04 14 24 05 06 16 26 07)
|
|
||||||
|
|
||||||
psrlq mmH,2*BYTE_BIT ; mmH=(02 12 04 14 06 16 -- --)
|
|
||||||
psrlq mmE,2*BYTE_BIT ; mmE=(22 03 24 05 26 07 -- --)
|
|
||||||
|
|
||||||
movq mmC,mmD
|
|
||||||
movq mmB,mmD
|
|
||||||
punpcklwd mmD,mmH ; mmD=(11 21 02 12 13 23 04 14)
|
|
||||||
punpckhwd mmC,mmH ; mmC=(15 25 06 16 17 27 -- --)
|
|
||||||
|
|
||||||
psrlq mmB,2*BYTE_BIT ; mmB=(13 23 15 25 17 27 -- --)
|
|
||||||
|
|
||||||
movq mmF,mmE
|
|
||||||
punpcklwd mmE,mmB ; mmE=(22 03 13 23 24 05 15 25)
|
|
||||||
punpckhwd mmF,mmB ; mmF=(26 07 17 27 -- -- -- --)
|
|
||||||
|
|
||||||
punpckldq mmA,mmD ; mmA=(00 10 20 01 11 21 02 12)
|
|
||||||
punpckldq mmE,mmG ; mmE=(22 03 13 23 04 14 24 05)
|
|
||||||
punpckldq mmC,mmF ; mmC=(15 25 06 16 26 07 17 27)
|
|
||||||
|
|
||||||
cmp ecx, byte SIZEOF_MMWORD
|
|
||||||
jb short .column_st16
|
|
||||||
|
|
||||||
movq MMWORD [edi+0*SIZEOF_MMWORD], mmA
|
|
||||||
movq MMWORD [edi+1*SIZEOF_MMWORD], mmE
|
|
||||||
movq MMWORD [edi+2*SIZEOF_MMWORD], mmC
|
|
||||||
|
|
||||||
sub ecx, byte SIZEOF_MMWORD
|
|
||||||
jz short .nextrow
|
|
||||||
|
|
||||||
add esi, byte SIZEOF_MMWORD ; inptr0
|
|
||||||
add ebx, byte SIZEOF_MMWORD ; inptr1
|
|
||||||
add edx, byte SIZEOF_MMWORD ; inptr2
|
|
||||||
add edi, byte RGB_PIXELSIZE*SIZEOF_MMWORD ; outptr
|
|
||||||
jmp near .columnloop
|
|
||||||
alignx 16,7
|
|
||||||
|
|
||||||
.column_st16:
|
|
||||||
lea ecx, [ecx+ecx*2] ; imul ecx, RGB_PIXELSIZE
|
|
||||||
cmp ecx, byte 2*SIZEOF_MMWORD
|
|
||||||
jb short .column_st8
|
|
||||||
movq MMWORD [edi+0*SIZEOF_MMWORD], mmA
|
|
||||||
movq MMWORD [edi+1*SIZEOF_MMWORD], mmE
|
|
||||||
movq mmA,mmC
|
|
||||||
sub ecx, byte 2*SIZEOF_MMWORD
|
|
||||||
add edi, byte 2*SIZEOF_MMWORD
|
|
||||||
jmp short .column_st4
|
|
||||||
.column_st8:
|
|
||||||
cmp ecx, byte SIZEOF_MMWORD
|
|
||||||
jb short .column_st4
|
|
||||||
movq MMWORD [edi+0*SIZEOF_MMWORD], mmA
|
|
||||||
movq mmA,mmE
|
|
||||||
sub ecx, byte SIZEOF_MMWORD
|
|
||||||
add edi, byte SIZEOF_MMWORD
|
|
||||||
.column_st4:
|
|
||||||
movd eax,mmA
|
|
||||||
cmp ecx, byte SIZEOF_DWORD
|
|
||||||
jb short .column_st2
|
|
||||||
mov DWORD [edi+0*SIZEOF_DWORD], eax
|
|
||||||
psrlq mmA,DWORD_BIT
|
|
||||||
movd eax,mmA
|
|
||||||
sub ecx, byte SIZEOF_DWORD
|
|
||||||
add edi, byte SIZEOF_DWORD
|
|
||||||
.column_st2:
|
|
||||||
cmp ecx, byte SIZEOF_WORD
|
|
||||||
jb short .column_st1
|
|
||||||
mov WORD [edi+0*SIZEOF_WORD], ax
|
|
||||||
shr eax,WORD_BIT
|
|
||||||
sub ecx, byte SIZEOF_WORD
|
|
||||||
add edi, byte SIZEOF_WORD
|
|
||||||
.column_st1:
|
|
||||||
cmp ecx, byte SIZEOF_BYTE
|
|
||||||
jb short .nextrow
|
|
||||||
mov BYTE [edi+0*SIZEOF_BYTE], al
|
|
||||||
|
|
||||||
%else ; RGB_PIXELSIZE == 4 ; -----------
|
|
||||||
|
|
||||||
%ifdef RGBX_FILLER_0XFF
|
|
||||||
pcmpeqb mm6,mm6 ; mm6=(X0 X2 X4 X6 ** ** ** **)
|
|
||||||
pcmpeqb mm7,mm7 ; mm7=(X1 X3 X5 X7 ** ** ** **)
|
|
||||||
%else
|
|
||||||
pxor mm6,mm6 ; mm6=(X0 X2 X4 X6 ** ** ** **)
|
|
||||||
pxor mm7,mm7 ; mm7=(X1 X3 X5 X7 ** ** ** **)
|
|
||||||
%endif
|
|
||||||
; mmA=(00 02 04 06 ** ** ** **), mmB=(01 03 05 07 ** ** ** **)
|
|
||||||
; mmC=(10 12 14 16 ** ** ** **), mmD=(11 13 15 17 ** ** ** **)
|
|
||||||
; mmE=(20 22 24 26 ** ** ** **), mmF=(21 23 25 27 ** ** ** **)
|
|
||||||
; mmG=(30 32 34 36 ** ** ** **), mmH=(31 33 35 37 ** ** ** **)
|
|
||||||
|
|
||||||
punpcklbw mmA,mmC ; mmA=(00 10 02 12 04 14 06 16)
|
|
||||||
punpcklbw mmE,mmG ; mmE=(20 30 22 32 24 34 26 36)
|
|
||||||
punpcklbw mmB,mmD ; mmB=(01 11 03 13 05 15 07 17)
|
|
||||||
punpcklbw mmF,mmH ; mmF=(21 31 23 33 25 35 27 37)
|
|
||||||
|
|
||||||
movq mmC,mmA
|
|
||||||
punpcklwd mmA,mmE ; mmA=(00 10 20 30 02 12 22 32)
|
|
||||||
punpckhwd mmC,mmE ; mmC=(04 14 24 34 06 16 26 36)
|
|
||||||
movq mmG,mmB
|
|
||||||
punpcklwd mmB,mmF ; mmB=(01 11 21 31 03 13 23 33)
|
|
||||||
punpckhwd mmG,mmF ; mmG=(05 15 25 35 07 17 27 37)
|
|
||||||
|
|
||||||
movq mmD,mmA
|
|
||||||
punpckldq mmA,mmB ; mmA=(00 10 20 30 01 11 21 31)
|
|
||||||
punpckhdq mmD,mmB ; mmD=(02 12 22 32 03 13 23 33)
|
|
||||||
movq mmH,mmC
|
|
||||||
punpckldq mmC,mmG ; mmC=(04 14 24 34 05 15 25 35)
|
|
||||||
punpckhdq mmH,mmG ; mmH=(06 16 26 36 07 17 27 37)
|
|
||||||
|
|
||||||
cmp ecx, byte SIZEOF_MMWORD
|
|
||||||
jb short .column_st16
|
|
||||||
|
|
||||||
movq MMWORD [edi+0*SIZEOF_MMWORD], mmA
|
|
||||||
movq MMWORD [edi+1*SIZEOF_MMWORD], mmD
|
|
||||||
movq MMWORD [edi+2*SIZEOF_MMWORD], mmC
|
|
||||||
movq MMWORD [edi+3*SIZEOF_MMWORD], mmH
|
|
||||||
|
|
||||||
sub ecx, byte SIZEOF_MMWORD
|
|
||||||
jz short .nextrow
|
|
||||||
|
|
||||||
add esi, byte SIZEOF_MMWORD ; inptr0
|
|
||||||
add ebx, byte SIZEOF_MMWORD ; inptr1
|
|
||||||
add edx, byte SIZEOF_MMWORD ; inptr2
|
|
||||||
add edi, byte RGB_PIXELSIZE*SIZEOF_MMWORD ; outptr
|
|
||||||
jmp near .columnloop
|
|
||||||
alignx 16,7
|
|
||||||
|
|
||||||
.column_st16:
|
|
||||||
cmp ecx, byte SIZEOF_MMWORD/2
|
|
||||||
jb short .column_st8
|
|
||||||
movq MMWORD [edi+0*SIZEOF_MMWORD], mmA
|
|
||||||
movq MMWORD [edi+1*SIZEOF_MMWORD], mmD
|
|
||||||
movq mmA,mmC
|
|
||||||
movq mmD,mmH
|
|
||||||
sub ecx, byte SIZEOF_MMWORD/2
|
|
||||||
add edi, byte 2*SIZEOF_MMWORD
|
|
||||||
.column_st8:
|
|
||||||
cmp ecx, byte SIZEOF_MMWORD/4
|
|
||||||
jb short .column_st4
|
|
||||||
movq MMWORD [edi+0*SIZEOF_MMWORD], mmA
|
|
||||||
movq mmA,mmD
|
|
||||||
sub ecx, byte SIZEOF_MMWORD/4
|
|
||||||
add edi, byte 1*SIZEOF_MMWORD
|
|
||||||
.column_st4:
|
|
||||||
cmp ecx, byte SIZEOF_MMWORD/8
|
|
||||||
jb short .nextrow
|
|
||||||
movd DWORD [edi+0*SIZEOF_DWORD], mmA
|
|
||||||
|
|
||||||
%endif ; RGB_PIXELSIZE ; ---------------
|
|
||||||
|
|
||||||
alignx 16,7
|
|
||||||
|
|
||||||
.nextrow:
|
|
||||||
pop ecx
|
|
||||||
pop esi
|
|
||||||
pop ebx
|
|
||||||
pop edx
|
|
||||||
pop edi
|
|
||||||
pop eax
|
|
||||||
|
|
||||||
add esi, byte SIZEOF_JSAMPROW
|
|
||||||
add ebx, byte SIZEOF_JSAMPROW
|
|
||||||
add edx, byte SIZEOF_JSAMPROW
|
|
||||||
add edi, byte SIZEOF_JSAMPROW ; output_buf
|
|
||||||
dec eax ; num_rows
|
|
||||||
jg near .rowloop
|
|
||||||
|
|
||||||
emms ; empty MMX state
|
|
||||||
|
|
||||||
.return:
|
|
||||||
pop edi
|
|
||||||
pop esi
|
|
||||||
; pop edx ; need not be preserved
|
|
||||||
; pop ecx ; need not be preserved
|
|
||||||
pop ebx
|
|
||||||
mov esp,ebp ; esp <- aligned ebp
|
|
||||||
pop esp ; esp <- original ebp
|
|
||||||
pop ebp
|
|
||||||
ret
|
|
||||||
|
|
||||||
|
%undef RGB_RED
|
||||||
|
%undef RGB_GREEN
|
||||||
|
%undef RGB_BLUE
|
||||||
|
%undef RGB_PIXELSIZE
|
||||||
|
%define RGB_RED 1
|
||||||
|
%define RGB_GREEN 2
|
||||||
|
%define RGB_BLUE 3
|
||||||
|
%define RGB_PIXELSIZE 4
|
||||||
|
%define jsimd_ycc_rgb_convert_mmx jsimd_ycc_extxrgb_convert_mmx
|
||||||
|
%include "jdclrmmx.asm"
|
||||||
|
|||||||
@@ -2,6 +2,7 @@
|
|||||||
; jdcolss2.asm - colorspace conversion (SSE2)
|
; jdcolss2.asm - colorspace conversion (SSE2)
|
||||||
;
|
;
|
||||||
; Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
|
; Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
|
||||||
|
; Copyright 2009 D. R. Commander
|
||||||
;
|
;
|
||||||
; Based on
|
; Based on
|
||||||
; x86 SIMD extension for IJG JPEG library
|
; x86 SIMD extension for IJG JPEG library
|
||||||
@@ -48,484 +49,70 @@ PD_ONEHALF times 4 dd 1 << (SCALEBITS-1)
|
|||||||
alignz 16
|
alignz 16
|
||||||
|
|
||||||
; --------------------------------------------------------------------------
|
; --------------------------------------------------------------------------
|
||||||
SECTION SEG_TEXT
|
%include "jdclrss2.asm"
|
||||||
BITS 32
|
|
||||||
;
|
|
||||||
; Convert some rows of samples to the output colorspace.
|
|
||||||
;
|
|
||||||
; GLOBAL(void)
|
|
||||||
; jsimd_ycc_rgb_convert_sse2 (JDIMENSION out_width,
|
|
||||||
; JSAMPIMAGE input_buf, JDIMENSION input_row,
|
|
||||||
; JSAMPARRAY output_buf, int num_rows)
|
|
||||||
;
|
|
||||||
|
|
||||||
%define out_width(b) (b)+8 ; JDIMENSION out_width
|
%undef RGB_RED
|
||||||
%define input_buf(b) (b)+12 ; JSAMPIMAGE input_buf
|
%undef RGB_GREEN
|
||||||
%define input_row(b) (b)+16 ; JDIMENSION input_row
|
%undef RGB_BLUE
|
||||||
%define output_buf(b) (b)+20 ; JSAMPARRAY output_buf
|
%undef RGB_PIXELSIZE
|
||||||
%define num_rows(b) (b)+24 ; int num_rows
|
%define RGB_RED 0
|
||||||
|
%define RGB_GREEN 1
|
||||||
|
%define RGB_BLUE 2
|
||||||
|
%define RGB_PIXELSIZE 3
|
||||||
|
%define jsimd_ycc_rgb_convert_sse2 jsimd_ycc_extrgb_convert_sse2
|
||||||
|
%include "jdclrss2.asm"
|
||||||
|
|
||||||
%define original_ebp ebp+0
|
%undef RGB_RED
|
||||||
%define wk(i) ebp-(WK_NUM-(i))*SIZEOF_XMMWORD ; xmmword wk[WK_NUM]
|
%undef RGB_GREEN
|
||||||
%define WK_NUM 2
|
%undef RGB_BLUE
|
||||||
%define gotptr wk(0)-SIZEOF_POINTER ; void * gotptr
|
%undef RGB_PIXELSIZE
|
||||||
|
%define RGB_RED 0
|
||||||
|
%define RGB_GREEN 1
|
||||||
|
%define RGB_BLUE 2
|
||||||
|
%define RGB_PIXELSIZE 4
|
||||||
|
%define jsimd_ycc_rgb_convert_sse2 jsimd_ycc_extrgbx_convert_sse2
|
||||||
|
%include "jdclrss2.asm"
|
||||||
|
|
||||||
align 16
|
%undef RGB_RED
|
||||||
global EXTN(jsimd_ycc_rgb_convert_sse2)
|
%undef RGB_GREEN
|
||||||
|
%undef RGB_BLUE
|
||||||
|
%undef RGB_PIXELSIZE
|
||||||
|
%define RGB_RED 2
|
||||||
|
%define RGB_GREEN 1
|
||||||
|
%define RGB_BLUE 0
|
||||||
|
%define RGB_PIXELSIZE 3
|
||||||
|
%define jsimd_ycc_rgb_convert_sse2 jsimd_ycc_extbgr_convert_sse2
|
||||||
|
%include "jdclrss2.asm"
|
||||||
|
|
||||||
EXTN(jsimd_ycc_rgb_convert_sse2):
|
%undef RGB_RED
|
||||||
push ebp
|
%undef RGB_GREEN
|
||||||
mov eax,esp ; eax = original ebp
|
%undef RGB_BLUE
|
||||||
sub esp, byte 4
|
%undef RGB_PIXELSIZE
|
||||||
and esp, byte (-SIZEOF_XMMWORD) ; align to 128 bits
|
%define RGB_RED 2
|
||||||
mov [esp],eax
|
%define RGB_GREEN 1
|
||||||
mov ebp,esp ; ebp = aligned ebp
|
%define RGB_BLUE 0
|
||||||
lea esp, [wk(0)]
|
%define RGB_PIXELSIZE 4
|
||||||
pushpic eax ; make a room for GOT address
|
%define jsimd_ycc_rgb_convert_sse2 jsimd_ycc_extbgrx_convert_sse2
|
||||||
push ebx
|
%include "jdclrss2.asm"
|
||||||
; push ecx ; need not be preserved
|
|
||||||
; push edx ; need not be preserved
|
|
||||||
push esi
|
|
||||||
push edi
|
|
||||||
|
|
||||||
get_GOT ebx ; get GOT address
|
%undef RGB_RED
|
||||||
movpic POINTER [gotptr], ebx ; save GOT address
|
%undef RGB_GREEN
|
||||||
|
%undef RGB_BLUE
|
||||||
mov ecx, JDIMENSION [out_width(eax)] ; num_cols
|
%undef RGB_PIXELSIZE
|
||||||
test ecx,ecx
|
%define RGB_RED 3
|
||||||
jz near .return
|
%define RGB_GREEN 2
|
||||||
|
%define RGB_BLUE 1
|
||||||
push ecx
|
%define RGB_PIXELSIZE 4
|
||||||
|
%define jsimd_ycc_rgb_convert_sse2 jsimd_ycc_extxbgr_convert_sse2
|
||||||
mov edi, JSAMPIMAGE [input_buf(eax)]
|
%include "jdclrss2.asm"
|
||||||
mov ecx, JDIMENSION [input_row(eax)]
|
|
||||||
mov esi, JSAMPARRAY [edi+0*SIZEOF_JSAMPARRAY]
|
|
||||||
mov ebx, JSAMPARRAY [edi+1*SIZEOF_JSAMPARRAY]
|
|
||||||
mov edx, JSAMPARRAY [edi+2*SIZEOF_JSAMPARRAY]
|
|
||||||
lea esi, [esi+ecx*SIZEOF_JSAMPROW]
|
|
||||||
lea ebx, [ebx+ecx*SIZEOF_JSAMPROW]
|
|
||||||
lea edx, [edx+ecx*SIZEOF_JSAMPROW]
|
|
||||||
|
|
||||||
pop ecx
|
|
||||||
|
|
||||||
mov edi, JSAMPARRAY [output_buf(eax)]
|
|
||||||
mov eax, INT [num_rows(eax)]
|
|
||||||
test eax,eax
|
|
||||||
jle near .return
|
|
||||||
alignx 16,7
|
|
||||||
.rowloop:
|
|
||||||
push eax
|
|
||||||
push edi
|
|
||||||
push edx
|
|
||||||
push ebx
|
|
||||||
push esi
|
|
||||||
push ecx ; col
|
|
||||||
|
|
||||||
mov esi, JSAMPROW [esi] ; inptr0
|
|
||||||
mov ebx, JSAMPROW [ebx] ; inptr1
|
|
||||||
mov edx, JSAMPROW [edx] ; inptr2
|
|
||||||
mov edi, JSAMPROW [edi] ; outptr
|
|
||||||
movpic eax, POINTER [gotptr] ; load GOT address (eax)
|
|
||||||
alignx 16,7
|
|
||||||
.columnloop:
|
|
||||||
|
|
||||||
movdqa xmm5, XMMWORD [ebx] ; xmm5=Cb(0123456789ABCDEF)
|
|
||||||
movdqa xmm1, XMMWORD [edx] ; xmm1=Cr(0123456789ABCDEF)
|
|
||||||
|
|
||||||
pcmpeqw xmm4,xmm4
|
|
||||||
pcmpeqw xmm7,xmm7
|
|
||||||
psrlw xmm4,BYTE_BIT
|
|
||||||
psllw xmm7,7 ; xmm7={0xFF80 0xFF80 0xFF80 0xFF80 ..}
|
|
||||||
movdqa xmm0,xmm4 ; xmm0=xmm4={0xFF 0x00 0xFF 0x00 ..}
|
|
||||||
|
|
||||||
pand xmm4,xmm5 ; xmm4=Cb(02468ACE)=CbE
|
|
||||||
psrlw xmm5,BYTE_BIT ; xmm5=Cb(13579BDF)=CbO
|
|
||||||
pand xmm0,xmm1 ; xmm0=Cr(02468ACE)=CrE
|
|
||||||
psrlw xmm1,BYTE_BIT ; xmm1=Cr(13579BDF)=CrO
|
|
||||||
|
|
||||||
paddw xmm4,xmm7
|
|
||||||
paddw xmm5,xmm7
|
|
||||||
paddw xmm0,xmm7
|
|
||||||
paddw xmm1,xmm7
|
|
||||||
|
|
||||||
; (Original)
|
|
||||||
; R = Y + 1.40200 * Cr
|
|
||||||
; G = Y - 0.34414 * Cb - 0.71414 * Cr
|
|
||||||
; B = Y + 1.77200 * Cb
|
|
||||||
;
|
|
||||||
; (This implementation)
|
|
||||||
; R = Y + 0.40200 * Cr + Cr
|
|
||||||
; G = Y - 0.34414 * Cb + 0.28586 * Cr - Cr
|
|
||||||
; B = Y - 0.22800 * Cb + Cb + Cb
|
|
||||||
|
|
||||||
movdqa xmm2,xmm4 ; xmm2=CbE
|
|
||||||
movdqa xmm3,xmm5 ; xmm3=CbO
|
|
||||||
paddw xmm4,xmm4 ; xmm4=2*CbE
|
|
||||||
paddw xmm5,xmm5 ; xmm5=2*CbO
|
|
||||||
movdqa xmm6,xmm0 ; xmm6=CrE
|
|
||||||
movdqa xmm7,xmm1 ; xmm7=CrO
|
|
||||||
paddw xmm0,xmm0 ; xmm0=2*CrE
|
|
||||||
paddw xmm1,xmm1 ; xmm1=2*CrO
|
|
||||||
|
|
||||||
pmulhw xmm4,[GOTOFF(eax,PW_MF0228)] ; xmm4=(2*CbE * -FIX(0.22800))
|
|
||||||
pmulhw xmm5,[GOTOFF(eax,PW_MF0228)] ; xmm5=(2*CbO * -FIX(0.22800))
|
|
||||||
pmulhw xmm0,[GOTOFF(eax,PW_F0402)] ; xmm0=(2*CrE * FIX(0.40200))
|
|
||||||
pmulhw xmm1,[GOTOFF(eax,PW_F0402)] ; xmm1=(2*CrO * FIX(0.40200))
|
|
||||||
|
|
||||||
paddw xmm4,[GOTOFF(eax,PW_ONE)]
|
|
||||||
paddw xmm5,[GOTOFF(eax,PW_ONE)]
|
|
||||||
psraw xmm4,1 ; xmm4=(CbE * -FIX(0.22800))
|
|
||||||
psraw xmm5,1 ; xmm5=(CbO * -FIX(0.22800))
|
|
||||||
paddw xmm0,[GOTOFF(eax,PW_ONE)]
|
|
||||||
paddw xmm1,[GOTOFF(eax,PW_ONE)]
|
|
||||||
psraw xmm0,1 ; xmm0=(CrE * FIX(0.40200))
|
|
||||||
psraw xmm1,1 ; xmm1=(CrO * FIX(0.40200))
|
|
||||||
|
|
||||||
paddw xmm4,xmm2
|
|
||||||
paddw xmm5,xmm3
|
|
||||||
paddw xmm4,xmm2 ; xmm4=(CbE * FIX(1.77200))=(B-Y)E
|
|
||||||
paddw xmm5,xmm3 ; xmm5=(CbO * FIX(1.77200))=(B-Y)O
|
|
||||||
paddw xmm0,xmm6 ; xmm0=(CrE * FIX(1.40200))=(R-Y)E
|
|
||||||
paddw xmm1,xmm7 ; xmm1=(CrO * FIX(1.40200))=(R-Y)O
|
|
||||||
|
|
||||||
movdqa XMMWORD [wk(0)], xmm4 ; wk(0)=(B-Y)E
|
|
||||||
movdqa XMMWORD [wk(1)], xmm5 ; wk(1)=(B-Y)O
|
|
||||||
|
|
||||||
movdqa xmm4,xmm2
|
|
||||||
movdqa xmm5,xmm3
|
|
||||||
punpcklwd xmm2,xmm6
|
|
||||||
punpckhwd xmm4,xmm6
|
|
||||||
pmaddwd xmm2,[GOTOFF(eax,PW_MF0344_F0285)]
|
|
||||||
pmaddwd xmm4,[GOTOFF(eax,PW_MF0344_F0285)]
|
|
||||||
punpcklwd xmm3,xmm7
|
|
||||||
punpckhwd xmm5,xmm7
|
|
||||||
pmaddwd xmm3,[GOTOFF(eax,PW_MF0344_F0285)]
|
|
||||||
pmaddwd xmm5,[GOTOFF(eax,PW_MF0344_F0285)]
|
|
||||||
|
|
||||||
paddd xmm2,[GOTOFF(eax,PD_ONEHALF)]
|
|
||||||
paddd xmm4,[GOTOFF(eax,PD_ONEHALF)]
|
|
||||||
psrad xmm2,SCALEBITS
|
|
||||||
psrad xmm4,SCALEBITS
|
|
||||||
paddd xmm3,[GOTOFF(eax,PD_ONEHALF)]
|
|
||||||
paddd xmm5,[GOTOFF(eax,PD_ONEHALF)]
|
|
||||||
psrad xmm3,SCALEBITS
|
|
||||||
psrad xmm5,SCALEBITS
|
|
||||||
|
|
||||||
packssdw xmm2,xmm4 ; xmm2=CbE*-FIX(0.344)+CrE*FIX(0.285)
|
|
||||||
packssdw xmm3,xmm5 ; xmm3=CbO*-FIX(0.344)+CrO*FIX(0.285)
|
|
||||||
psubw xmm2,xmm6 ; xmm2=CbE*-FIX(0.344)+CrE*-FIX(0.714)=(G-Y)E
|
|
||||||
psubw xmm3,xmm7 ; xmm3=CbO*-FIX(0.344)+CrO*-FIX(0.714)=(G-Y)O
|
|
||||||
|
|
||||||
movdqa xmm5, XMMWORD [esi] ; xmm5=Y(0123456789ABCDEF)
|
|
||||||
|
|
||||||
pcmpeqw xmm4,xmm4
|
|
||||||
psrlw xmm4,BYTE_BIT ; xmm4={0xFF 0x00 0xFF 0x00 ..}
|
|
||||||
pand xmm4,xmm5 ; xmm4=Y(02468ACE)=YE
|
|
||||||
psrlw xmm5,BYTE_BIT ; xmm5=Y(13579BDF)=YO
|
|
||||||
|
|
||||||
paddw xmm0,xmm4 ; xmm0=((R-Y)E+YE)=RE=R(02468ACE)
|
|
||||||
paddw xmm1,xmm5 ; xmm1=((R-Y)O+YO)=RO=R(13579BDF)
|
|
||||||
packuswb xmm0,xmm0 ; xmm0=R(02468ACE********)
|
|
||||||
packuswb xmm1,xmm1 ; xmm1=R(13579BDF********)
|
|
||||||
|
|
||||||
paddw xmm2,xmm4 ; xmm2=((G-Y)E+YE)=GE=G(02468ACE)
|
|
||||||
paddw xmm3,xmm5 ; xmm3=((G-Y)O+YO)=GO=G(13579BDF)
|
|
||||||
packuswb xmm2,xmm2 ; xmm2=G(02468ACE********)
|
|
||||||
packuswb xmm3,xmm3 ; xmm3=G(13579BDF********)
|
|
||||||
|
|
||||||
paddw xmm4, XMMWORD [wk(0)] ; xmm4=(YE+(B-Y)E)=BE=B(02468ACE)
|
|
||||||
paddw xmm5, XMMWORD [wk(1)] ; xmm5=(YO+(B-Y)O)=BO=B(13579BDF)
|
|
||||||
packuswb xmm4,xmm4 ; xmm4=B(02468ACE********)
|
|
||||||
packuswb xmm5,xmm5 ; xmm5=B(13579BDF********)
|
|
||||||
|
|
||||||
%if RGB_PIXELSIZE == 3 ; ---------------
|
|
||||||
|
|
||||||
; xmmA=(00 02 04 06 08 0A 0C 0E **), xmmB=(01 03 05 07 09 0B 0D 0F **)
|
|
||||||
; xmmC=(10 12 14 16 18 1A 1C 1E **), xmmD=(11 13 15 17 19 1B 1D 1F **)
|
|
||||||
; xmmE=(20 22 24 26 28 2A 2C 2E **), xmmF=(21 23 25 27 29 2B 2D 2F **)
|
|
||||||
; xmmG=(** ** ** ** ** ** ** ** **), xmmH=(** ** ** ** ** ** ** ** **)
|
|
||||||
|
|
||||||
punpcklbw xmmA,xmmC ; xmmA=(00 10 02 12 04 14 06 16 08 18 0A 1A 0C 1C 0E 1E)
|
|
||||||
punpcklbw xmmE,xmmB ; xmmE=(20 01 22 03 24 05 26 07 28 09 2A 0B 2C 0D 2E 0F)
|
|
||||||
punpcklbw xmmD,xmmF ; xmmD=(11 21 13 23 15 25 17 27 19 29 1B 2B 1D 2D 1F 2F)
|
|
||||||
|
|
||||||
movdqa xmmG,xmmA
|
|
||||||
movdqa xmmH,xmmA
|
|
||||||
punpcklwd xmmA,xmmE ; xmmA=(00 10 20 01 02 12 22 03 04 14 24 05 06 16 26 07)
|
|
||||||
punpckhwd xmmG,xmmE ; xmmG=(08 18 28 09 0A 1A 2A 0B 0C 1C 2C 0D 0E 1E 2E 0F)
|
|
||||||
|
|
||||||
psrldq xmmH,2 ; xmmH=(02 12 04 14 06 16 08 18 0A 1A 0C 1C 0E 1E -- --)
|
|
||||||
psrldq xmmE,2 ; xmmE=(22 03 24 05 26 07 28 09 2A 0B 2C 0D 2E 0F -- --)
|
|
||||||
|
|
||||||
movdqa xmmC,xmmD
|
|
||||||
movdqa xmmB,xmmD
|
|
||||||
punpcklwd xmmD,xmmH ; xmmD=(11 21 02 12 13 23 04 14 15 25 06 16 17 27 08 18)
|
|
||||||
punpckhwd xmmC,xmmH ; xmmC=(19 29 0A 1A 1B 2B 0C 1C 1D 2D 0E 1E 1F 2F -- --)
|
|
||||||
|
|
||||||
psrldq xmmB,2 ; xmmB=(13 23 15 25 17 27 19 29 1B 2B 1D 2D 1F 2F -- --)
|
|
||||||
|
|
||||||
movdqa xmmF,xmmE
|
|
||||||
punpcklwd xmmE,xmmB ; xmmE=(22 03 13 23 24 05 15 25 26 07 17 27 28 09 19 29)
|
|
||||||
punpckhwd xmmF,xmmB ; xmmF=(2A 0B 1B 2B 2C 0D 1D 2D 2E 0F 1F 2F -- -- -- --)
|
|
||||||
|
|
||||||
pshufd xmmH,xmmA,0x4E; xmmH=(04 14 24 05 06 16 26 07 00 10 20 01 02 12 22 03)
|
|
||||||
movdqa xmmB,xmmE
|
|
||||||
punpckldq xmmA,xmmD ; xmmA=(00 10 20 01 11 21 02 12 02 12 22 03 13 23 04 14)
|
|
||||||
punpckldq xmmE,xmmH ; xmmE=(22 03 13 23 04 14 24 05 24 05 15 25 06 16 26 07)
|
|
||||||
punpckhdq xmmD,xmmB ; xmmD=(15 25 06 16 26 07 17 27 17 27 08 18 28 09 19 29)
|
|
||||||
|
|
||||||
pshufd xmmH,xmmG,0x4E; xmmH=(0C 1C 2C 0D 0E 1E 2E 0F 08 18 28 09 0A 1A 2A 0B)
|
|
||||||
movdqa xmmB,xmmF
|
|
||||||
punpckldq xmmG,xmmC ; xmmG=(08 18 28 09 19 29 0A 1A 0A 1A 2A 0B 1B 2B 0C 1C)
|
|
||||||
punpckldq xmmF,xmmH ; xmmF=(2A 0B 1B 2B 0C 1C 2C 0D 2C 0D 1D 2D 0E 1E 2E 0F)
|
|
||||||
punpckhdq xmmC,xmmB ; xmmC=(1D 2D 0E 1E 2E 0F 1F 2F 1F 2F -- -- -- -- -- --)
|
|
||||||
|
|
||||||
punpcklqdq xmmA,xmmE ; xmmA=(00 10 20 01 11 21 02 12 22 03 13 23 04 14 24 05)
|
|
||||||
punpcklqdq xmmD,xmmG ; xmmD=(15 25 06 16 26 07 17 27 08 18 28 09 19 29 0A 1A)
|
|
||||||
punpcklqdq xmmF,xmmC ; xmmF=(2A 0B 1B 2B 0C 1C 2C 0D 1D 2D 0E 1E 2E 0F 1F 2F)
|
|
||||||
|
|
||||||
cmp ecx, byte SIZEOF_XMMWORD
|
|
||||||
jb short .column_st32
|
|
||||||
|
|
||||||
test edi, SIZEOF_XMMWORD-1
|
|
||||||
jnz short .out1
|
|
||||||
; --(aligned)-------------------
|
|
||||||
movntdq XMMWORD [edi+0*SIZEOF_XMMWORD], xmmA
|
|
||||||
movntdq XMMWORD [edi+1*SIZEOF_XMMWORD], xmmD
|
|
||||||
movntdq XMMWORD [edi+2*SIZEOF_XMMWORD], xmmF
|
|
||||||
add edi, byte RGB_PIXELSIZE*SIZEOF_XMMWORD ; outptr
|
|
||||||
jmp short .out0
|
|
||||||
.out1: ; --(unaligned)-----------------
|
|
||||||
pcmpeqb xmmH,xmmH ; xmmH=(all 1's)
|
|
||||||
maskmovdqu xmmA,xmmH ; movntdqu XMMWORD [edi], xmmA
|
|
||||||
add edi, byte SIZEOF_XMMWORD ; outptr
|
|
||||||
maskmovdqu xmmD,xmmH ; movntdqu XMMWORD [edi], xmmD
|
|
||||||
add edi, byte SIZEOF_XMMWORD ; outptr
|
|
||||||
maskmovdqu xmmF,xmmH ; movntdqu XMMWORD [edi], xmmF
|
|
||||||
add edi, byte SIZEOF_XMMWORD ; outptr
|
|
||||||
.out0:
|
|
||||||
sub ecx, byte SIZEOF_XMMWORD
|
|
||||||
jz near .nextrow
|
|
||||||
|
|
||||||
add esi, byte SIZEOF_XMMWORD ; inptr0
|
|
||||||
add ebx, byte SIZEOF_XMMWORD ; inptr1
|
|
||||||
add edx, byte SIZEOF_XMMWORD ; inptr2
|
|
||||||
jmp near .columnloop
|
|
||||||
alignx 16,7
|
|
||||||
|
|
||||||
.column_st32:
|
|
||||||
pcmpeqb xmmH,xmmH ; xmmH=(all 1's)
|
|
||||||
lea ecx, [ecx+ecx*2] ; imul ecx, RGB_PIXELSIZE
|
|
||||||
cmp ecx, byte 2*SIZEOF_XMMWORD
|
|
||||||
jb short .column_st16
|
|
||||||
maskmovdqu xmmA,xmmH ; movntdqu XMMWORD [edi], xmmA
|
|
||||||
add edi, byte SIZEOF_XMMWORD ; outptr
|
|
||||||
maskmovdqu xmmD,xmmH ; movntdqu XMMWORD [edi], xmmD
|
|
||||||
add edi, byte SIZEOF_XMMWORD ; outptr
|
|
||||||
movdqa xmmA,xmmF
|
|
||||||
sub ecx, byte 2*SIZEOF_XMMWORD
|
|
||||||
jmp short .column_st15
|
|
||||||
.column_st16:
|
|
||||||
cmp ecx, byte SIZEOF_XMMWORD
|
|
||||||
jb short .column_st15
|
|
||||||
maskmovdqu xmmA,xmmH ; movntdqu XMMWORD [edi], xmmA
|
|
||||||
add edi, byte SIZEOF_XMMWORD ; outptr
|
|
||||||
movdqa xmmA,xmmD
|
|
||||||
sub ecx, byte SIZEOF_XMMWORD
|
|
||||||
.column_st15:
|
|
||||||
mov eax,ecx
|
|
||||||
xor ecx, byte 0x0F
|
|
||||||
shl ecx, 2
|
|
||||||
movd xmmB,ecx
|
|
||||||
psrlq xmmH,4
|
|
||||||
pcmpeqb xmmE,xmmE
|
|
||||||
psrlq xmmH,xmmB
|
|
||||||
psrlq xmmE,xmmB
|
|
||||||
punpcklbw xmmE,xmmH
|
|
||||||
; ----------------
|
|
||||||
mov ecx,edi
|
|
||||||
and ecx, byte SIZEOF_XMMWORD-1
|
|
||||||
jz short .adj0
|
|
||||||
add eax,ecx
|
|
||||||
cmp eax, byte SIZEOF_XMMWORD
|
|
||||||
ja short .adj0
|
|
||||||
and edi, byte (-SIZEOF_XMMWORD) ; align to 16-byte boundary
|
|
||||||
shl ecx, 3 ; pslldq xmmA,ecx & pslldq xmmE,ecx
|
|
||||||
movdqa xmmG,xmmA
|
|
||||||
movdqa xmmC,xmmE
|
|
||||||
pslldq xmmA, SIZEOF_XMMWORD/2
|
|
||||||
pslldq xmmE, SIZEOF_XMMWORD/2
|
|
||||||
movd xmmD,ecx
|
|
||||||
sub ecx, byte (SIZEOF_XMMWORD/2)*BYTE_BIT
|
|
||||||
jb short .adj1
|
|
||||||
movd xmmF,ecx
|
|
||||||
psllq xmmA,xmmF
|
|
||||||
psllq xmmE,xmmF
|
|
||||||
jmp short .adj0
|
|
||||||
.adj1: neg ecx
|
|
||||||
movd xmmF,ecx
|
|
||||||
psrlq xmmA,xmmF
|
|
||||||
psrlq xmmE,xmmF
|
|
||||||
psllq xmmG,xmmD
|
|
||||||
psllq xmmC,xmmD
|
|
||||||
por xmmA,xmmG
|
|
||||||
por xmmE,xmmC
|
|
||||||
.adj0: ; ----------------
|
|
||||||
maskmovdqu xmmA,xmmE ; movntdqu XMMWORD [edi], xmmA
|
|
||||||
|
|
||||||
%else ; RGB_PIXELSIZE == 4 ; -----------
|
|
||||||
|
|
||||||
%ifdef RGBX_FILLER_0XFF
|
|
||||||
pcmpeqb xmm6,xmm6 ; xmm6=XE=X(02468ACE********)
|
|
||||||
pcmpeqb xmm7,xmm7 ; xmm7=XO=X(13579BDF********)
|
|
||||||
%else
|
|
||||||
pxor xmm6,xmm6 ; xmm6=XE=X(02468ACE********)
|
|
||||||
pxor xmm7,xmm7 ; xmm7=XO=X(13579BDF********)
|
|
||||||
%endif
|
|
||||||
; xmmA=(00 02 04 06 08 0A 0C 0E **), xmmB=(01 03 05 07 09 0B 0D 0F **)
|
|
||||||
; xmmC=(10 12 14 16 18 1A 1C 1E **), xmmD=(11 13 15 17 19 1B 1D 1F **)
|
|
||||||
; xmmE=(20 22 24 26 28 2A 2C 2E **), xmmF=(21 23 25 27 29 2B 2D 2F **)
|
|
||||||
; xmmG=(30 32 34 36 38 3A 3C 3E **), xmmH=(31 33 35 37 39 3B 3D 3F **)
|
|
||||||
|
|
||||||
punpcklbw xmmA,xmmC ; xmmA=(00 10 02 12 04 14 06 16 08 18 0A 1A 0C 1C 0E 1E)
|
|
||||||
punpcklbw xmmE,xmmG ; xmmE=(20 30 22 32 24 34 26 36 28 38 2A 3A 2C 3C 2E 3E)
|
|
||||||
punpcklbw xmmB,xmmD ; xmmB=(01 11 03 13 05 15 07 17 09 19 0B 1B 0D 1D 0F 1F)
|
|
||||||
punpcklbw xmmF,xmmH ; xmmF=(21 31 23 33 25 35 27 37 29 39 2B 3B 2D 3D 2F 3F)
|
|
||||||
|
|
||||||
movdqa xmmC,xmmA
|
|
||||||
punpcklwd xmmA,xmmE ; xmmA=(00 10 20 30 02 12 22 32 04 14 24 34 06 16 26 36)
|
|
||||||
punpckhwd xmmC,xmmE ; xmmC=(08 18 28 38 0A 1A 2A 3A 0C 1C 2C 3C 0E 1E 2E 3E)
|
|
||||||
movdqa xmmG,xmmB
|
|
||||||
punpcklwd xmmB,xmmF ; xmmB=(01 11 21 31 03 13 23 33 05 15 25 35 07 17 27 37)
|
|
||||||
punpckhwd xmmG,xmmF ; xmmG=(09 19 29 39 0B 1B 2B 3B 0D 1D 2D 3D 0F 1F 2F 3F)
|
|
||||||
|
|
||||||
movdqa xmmD,xmmA
|
|
||||||
punpckldq xmmA,xmmB ; xmmA=(00 10 20 30 01 11 21 31 02 12 22 32 03 13 23 33)
|
|
||||||
punpckhdq xmmD,xmmB ; xmmD=(04 14 24 34 05 15 25 35 06 16 26 36 07 17 27 37)
|
|
||||||
movdqa xmmH,xmmC
|
|
||||||
punpckldq xmmC,xmmG ; xmmC=(08 18 28 38 09 19 29 39 0A 1A 2A 3A 0B 1B 2B 3B)
|
|
||||||
punpckhdq xmmH,xmmG ; xmmH=(0C 1C 2C 3C 0D 1D 2D 3D 0E 1E 2E 3E 0F 1F 2F 3F)
|
|
||||||
|
|
||||||
cmp ecx, byte SIZEOF_XMMWORD
|
|
||||||
jb short .column_st32
|
|
||||||
|
|
||||||
test edi, SIZEOF_XMMWORD-1
|
|
||||||
jnz short .out1
|
|
||||||
; --(aligned)-------------------
|
|
||||||
movntdq XMMWORD [edi+0*SIZEOF_XMMWORD], xmmA
|
|
||||||
movntdq XMMWORD [edi+1*SIZEOF_XMMWORD], xmmD
|
|
||||||
movntdq XMMWORD [edi+2*SIZEOF_XMMWORD], xmmC
|
|
||||||
movntdq XMMWORD [edi+3*SIZEOF_XMMWORD], xmmH
|
|
||||||
add edi, byte RGB_PIXELSIZE*SIZEOF_XMMWORD ; outptr
|
|
||||||
jmp short .out0
|
|
||||||
.out1: ; --(unaligned)-----------------
|
|
||||||
pcmpeqb xmmE,xmmE ; xmmE=(all 1's)
|
|
||||||
maskmovdqu xmmA,xmmE ; movntdqu XMMWORD [edi], xmmA
|
|
||||||
add edi, byte SIZEOF_XMMWORD ; outptr
|
|
||||||
maskmovdqu xmmD,xmmE ; movntdqu XMMWORD [edi], xmmD
|
|
||||||
add edi, byte SIZEOF_XMMWORD ; outptr
|
|
||||||
maskmovdqu xmmC,xmmE ; movntdqu XMMWORD [edi], xmmC
|
|
||||||
add edi, byte SIZEOF_XMMWORD ; outptr
|
|
||||||
maskmovdqu xmmH,xmmE ; movntdqu XMMWORD [edi], xmmH
|
|
||||||
add edi, byte SIZEOF_XMMWORD ; outptr
|
|
||||||
.out0:
|
|
||||||
sub ecx, byte SIZEOF_XMMWORD
|
|
||||||
jz near .nextrow
|
|
||||||
|
|
||||||
add esi, byte SIZEOF_XMMWORD ; inptr0
|
|
||||||
add ebx, byte SIZEOF_XMMWORD ; inptr1
|
|
||||||
add edx, byte SIZEOF_XMMWORD ; inptr2
|
|
||||||
jmp near .columnloop
|
|
||||||
alignx 16,7
|
|
||||||
|
|
||||||
.column_st32:
|
|
||||||
pcmpeqb xmmE,xmmE ; xmmE=(all 1's)
|
|
||||||
cmp ecx, byte SIZEOF_XMMWORD/2
|
|
||||||
jb short .column_st16
|
|
||||||
maskmovdqu xmmA,xmmE ; movntdqu XMMWORD [edi], xmmA
|
|
||||||
add edi, byte SIZEOF_XMMWORD ; outptr
|
|
||||||
maskmovdqu xmmD,xmmE ; movntdqu XMMWORD [edi], xmmD
|
|
||||||
add edi, byte SIZEOF_XMMWORD ; outptr
|
|
||||||
movdqa xmmA,xmmC
|
|
||||||
movdqa xmmD,xmmH
|
|
||||||
sub ecx, byte SIZEOF_XMMWORD/2
|
|
||||||
.column_st16:
|
|
||||||
cmp ecx, byte SIZEOF_XMMWORD/4
|
|
||||||
jb short .column_st15
|
|
||||||
maskmovdqu xmmA,xmmE ; movntdqu XMMWORD [edi], xmmA
|
|
||||||
add edi, byte SIZEOF_XMMWORD ; outptr
|
|
||||||
movdqa xmmA,xmmD
|
|
||||||
sub ecx, byte SIZEOF_XMMWORD/4
|
|
||||||
.column_st15:
|
|
||||||
cmp ecx, byte SIZEOF_XMMWORD/16
|
|
||||||
jb short .nextrow
|
|
||||||
mov eax,ecx
|
|
||||||
xor ecx, byte 0x03
|
|
||||||
inc ecx
|
|
||||||
shl ecx, 4
|
|
||||||
movd xmmF,ecx
|
|
||||||
psrlq xmmE,xmmF
|
|
||||||
punpcklbw xmmE,xmmE
|
|
||||||
; ----------------
|
|
||||||
mov ecx,edi
|
|
||||||
and ecx, byte SIZEOF_XMMWORD-1
|
|
||||||
jz short .adj0
|
|
||||||
lea eax, [ecx+eax*4] ; RGB_PIXELSIZE
|
|
||||||
cmp eax, byte SIZEOF_XMMWORD
|
|
||||||
ja short .adj0
|
|
||||||
and edi, byte (-SIZEOF_XMMWORD) ; align to 16-byte boundary
|
|
||||||
shl ecx, 3 ; pslldq xmmA,ecx & pslldq xmmE,ecx
|
|
||||||
movdqa xmmB,xmmA
|
|
||||||
movdqa xmmG,xmmE
|
|
||||||
pslldq xmmA, SIZEOF_XMMWORD/2
|
|
||||||
pslldq xmmE, SIZEOF_XMMWORD/2
|
|
||||||
movd xmmC,ecx
|
|
||||||
sub ecx, byte (SIZEOF_XMMWORD/2)*BYTE_BIT
|
|
||||||
jb short .adj1
|
|
||||||
movd xmmH,ecx
|
|
||||||
psllq xmmA,xmmH
|
|
||||||
psllq xmmE,xmmH
|
|
||||||
jmp short .adj0
|
|
||||||
.adj1: neg ecx
|
|
||||||
movd xmmH,ecx
|
|
||||||
psrlq xmmA,xmmH
|
|
||||||
psrlq xmmE,xmmH
|
|
||||||
psllq xmmB,xmmC
|
|
||||||
psllq xmmG,xmmC
|
|
||||||
por xmmA,xmmB
|
|
||||||
por xmmE,xmmG
|
|
||||||
.adj0: ; ----------------
|
|
||||||
maskmovdqu xmmA,xmmE ; movntdqu XMMWORD [edi], xmmA
|
|
||||||
|
|
||||||
%endif ; RGB_PIXELSIZE ; ---------------
|
|
||||||
|
|
||||||
alignx 16,7
|
|
||||||
|
|
||||||
.nextrow:
|
|
||||||
pop ecx
|
|
||||||
pop esi
|
|
||||||
pop ebx
|
|
||||||
pop edx
|
|
||||||
pop edi
|
|
||||||
pop eax
|
|
||||||
|
|
||||||
add esi, byte SIZEOF_JSAMPROW
|
|
||||||
add ebx, byte SIZEOF_JSAMPROW
|
|
||||||
add edx, byte SIZEOF_JSAMPROW
|
|
||||||
add edi, byte SIZEOF_JSAMPROW ; output_buf
|
|
||||||
dec eax ; num_rows
|
|
||||||
jg near .rowloop
|
|
||||||
|
|
||||||
sfence ; flush the write buffer
|
|
||||||
|
|
||||||
.return:
|
|
||||||
pop edi
|
|
||||||
pop esi
|
|
||||||
; pop edx ; need not be preserved
|
|
||||||
; pop ecx ; need not be preserved
|
|
||||||
pop ebx
|
|
||||||
mov esp,ebp ; esp <- aligned ebp
|
|
||||||
pop esp ; esp <- original ebp
|
|
||||||
pop ebp
|
|
||||||
ret
|
|
||||||
|
|
||||||
|
%undef RGB_RED
|
||||||
|
%undef RGB_GREEN
|
||||||
|
%undef RGB_BLUE
|
||||||
|
%undef RGB_PIXELSIZE
|
||||||
|
%define RGB_RED 1
|
||||||
|
%define RGB_GREEN 2
|
||||||
|
%define RGB_BLUE 3
|
||||||
|
%define RGB_PIXELSIZE 4
|
||||||
|
%define jsimd_ycc_rgb_convert_sse2 jsimd_ycc_extxrgb_convert_sse2
|
||||||
|
%include "jdclrss2.asm"
|
||||||
|
|||||||
122
simd/jsimd.h
122
simd/jsimd.h
@@ -21,11 +21,35 @@
|
|||||||
#ifdef NEED_SHORT_EXTERNAL_NAMES
|
#ifdef NEED_SHORT_EXTERNAL_NAMES
|
||||||
#define jpeg_simd_cpu_support jSiCpuSupport
|
#define jpeg_simd_cpu_support jSiCpuSupport
|
||||||
#define jsimd_rgb_ycc_convert_mmx jSRGBYCCM
|
#define jsimd_rgb_ycc_convert_mmx jSRGBYCCM
|
||||||
|
#define jsimd_extrgb_ycc_convert_mmx jSEXTRGBYCCM
|
||||||
|
#define jsimd_extrgbx_ycc_convert_mmx jSEXTRGBXYCCM
|
||||||
|
#define jsimd_extbgr_ycc_convert_mmx jSEXTBGRYCCM
|
||||||
|
#define jsimd_extbgrx_ycc_convert_mmx jSEXTBGRXYCCM
|
||||||
|
#define jsimd_extxbgr_ycc_convert_mmx jSEXTXBGRYCCM
|
||||||
|
#define jsimd_extxrgb_ycc_convert_mmx jSEXTXRGBYCCM
|
||||||
#define jsimd_ycc_rgb_convert_mmx jSYCCRGBM
|
#define jsimd_ycc_rgb_convert_mmx jSYCCRGBM
|
||||||
|
#define jsimd_ycc_extrgb_convert_mmx jSYCCEXTRGBM
|
||||||
|
#define jsimd_ycc_extrgbx_convert_mmx jSYCCEXTRGBXM
|
||||||
|
#define jsimd_ycc_extbgr_convert_mmx jSYCCEXTBGRM
|
||||||
|
#define jsimd_ycc_extbgrx_convert_mmx jSYCCEXTBGRXM
|
||||||
|
#define jsimd_ycc_extxbgr_convert_mmx jSYCCEXTXBGRM
|
||||||
|
#define jsimd_ycc_extxrgb_convert_mmx jSYCCEXTXRGBM
|
||||||
#define jconst_rgb_ycc_convert_sse2 jSCRGBYCCS2
|
#define jconst_rgb_ycc_convert_sse2 jSCRGBYCCS2
|
||||||
#define jsimd_rgb_ycc_convert_sse2 jSRGBYCCS2
|
#define jsimd_rgb_ycc_convert_sse2 jSRGBYCCS2
|
||||||
|
#define jsimd_extrgb_ycc_convert_sse2 jSEXTRGBYCCS2
|
||||||
|
#define jsimd_extrgbx_ycc_convert_sse2 jSEXTRGBXYCCS2
|
||||||
|
#define jsimd_extbgr_ycc_convert_sse2 jSEXTBGRYCCS2
|
||||||
|
#define jsimd_extbgrx_ycc_convert_sse2 jSEXTBGRXYCCS2
|
||||||
|
#define jsimd_extxbgr_ycc_convert_sse2 jSEXTXBGRYCCS2
|
||||||
|
#define jsimd_extxrgb_ycc_convert_sse2 jSEXTXRGBYCCS2
|
||||||
#define jconst_ycc_rgb_convert_sse2 jSCYCCRGBS2
|
#define jconst_ycc_rgb_convert_sse2 jSCYCCRGBS2
|
||||||
#define jsimd_ycc_rgb_convert_sse2 jSYCCRGBS2
|
#define jsimd_ycc_rgb_convert_sse2 jSYCCRGBS2
|
||||||
|
#define jsimd_ycc_extrgb_convert_sse2 jSYCCEXTRGBS2
|
||||||
|
#define jsimd_ycc_extrgbx_convert_sse2 jSYCCEXTRGBXS2
|
||||||
|
#define jsimd_ycc_extbgr_convert_sse2 jSYCCEXTBGRS2
|
||||||
|
#define jsimd_ycc_extbgrx_convert_sse2 jSYCCEXTBGRXS2
|
||||||
|
#define jsimd_ycc_extxbgr_convert_sse2 jSYCCEXTXBGRS2
|
||||||
|
#define jsimd_ycc_extxrgb_convert_sse2 jSYCCEXTXRGBS2
|
||||||
#define jsimd_h2v2_downsample_mmx jSDnH2V2M
|
#define jsimd_h2v2_downsample_mmx jSDnH2V2M
|
||||||
#define jsimd_h2v1_downsample_mmx jSDnH2V1M
|
#define jsimd_h2v1_downsample_mmx jSDnH2V1M
|
||||||
#define jsimd_h2v2_downsample_sse2 jSDnH2V2S2
|
#define jsimd_h2v2_downsample_sse2 jSDnH2V2S2
|
||||||
@@ -89,21 +113,119 @@ EXTERN(void) jsimd_rgb_ycc_convert_mmx
|
|||||||
JPP((JDIMENSION img_width,
|
JPP((JDIMENSION img_width,
|
||||||
JSAMPARRAY input_buf, JSAMPIMAGE output_buf,
|
JSAMPARRAY input_buf, JSAMPIMAGE output_buf,
|
||||||
JDIMENSION output_row, int num_rows));
|
JDIMENSION output_row, int num_rows));
|
||||||
|
EXTERN(void) jsimd_extrgb_ycc_convert_mmx
|
||||||
|
JPP((JDIMENSION img_width,
|
||||||
|
JSAMPARRAY input_buf, JSAMPIMAGE output_buf,
|
||||||
|
JDIMENSION output_row, int num_rows));
|
||||||
|
EXTERN(void) jsimd_extrgbx_ycc_convert_mmx
|
||||||
|
JPP((JDIMENSION img_width,
|
||||||
|
JSAMPARRAY input_buf, JSAMPIMAGE output_buf,
|
||||||
|
JDIMENSION output_row, int num_rows));
|
||||||
|
EXTERN(void) jsimd_extbgr_ycc_convert_mmx
|
||||||
|
JPP((JDIMENSION img_width,
|
||||||
|
JSAMPARRAY input_buf, JSAMPIMAGE output_buf,
|
||||||
|
JDIMENSION output_row, int num_rows));
|
||||||
|
EXTERN(void) jsimd_extbgrx_ycc_convert_mmx
|
||||||
|
JPP((JDIMENSION img_width,
|
||||||
|
JSAMPARRAY input_buf, JSAMPIMAGE output_buf,
|
||||||
|
JDIMENSION output_row, int num_rows));
|
||||||
|
EXTERN(void) jsimd_extxbgr_ycc_convert_mmx
|
||||||
|
JPP((JDIMENSION img_width,
|
||||||
|
JSAMPARRAY input_buf, JSAMPIMAGE output_buf,
|
||||||
|
JDIMENSION output_row, int num_rows));
|
||||||
|
EXTERN(void) jsimd_extxrgb_ycc_convert_mmx
|
||||||
|
JPP((JDIMENSION img_width,
|
||||||
|
JSAMPARRAY input_buf, JSAMPIMAGE output_buf,
|
||||||
|
JDIMENSION output_row, int num_rows));
|
||||||
|
|
||||||
EXTERN(void) jsimd_ycc_rgb_convert_mmx
|
EXTERN(void) jsimd_ycc_rgb_convert_mmx
|
||||||
JPP((JDIMENSION out_width,
|
JPP((JDIMENSION out_width,
|
||||||
JSAMPIMAGE input_buf, JDIMENSION input_row,
|
JSAMPIMAGE input_buf, JDIMENSION input_row,
|
||||||
JSAMPARRAY output_buf, int num_rows));
|
JSAMPARRAY output_buf, int num_rows));
|
||||||
|
EXTERN(void) jsimd_ycc_extrgb_convert_mmx
|
||||||
|
JPP((JDIMENSION out_width,
|
||||||
|
JSAMPIMAGE input_buf, JDIMENSION input_row,
|
||||||
|
JSAMPARRAY output_buf, int num_rows));
|
||||||
|
EXTERN(void) jsimd_ycc_extrgbx_convert_mmx
|
||||||
|
JPP((JDIMENSION out_width,
|
||||||
|
JSAMPIMAGE input_buf, JDIMENSION input_row,
|
||||||
|
JSAMPARRAY output_buf, int num_rows));
|
||||||
|
EXTERN(void) jsimd_ycc_extbgr_convert_mmx
|
||||||
|
JPP((JDIMENSION out_width,
|
||||||
|
JSAMPIMAGE input_buf, JDIMENSION input_row,
|
||||||
|
JSAMPARRAY output_buf, int num_rows));
|
||||||
|
EXTERN(void) jsimd_ycc_extbgrx_convert_mmx
|
||||||
|
JPP((JDIMENSION out_width,
|
||||||
|
JSAMPIMAGE input_buf, JDIMENSION input_row,
|
||||||
|
JSAMPARRAY output_buf, int num_rows));
|
||||||
|
EXTERN(void) jsimd_ycc_extxbgr_convert_mmx
|
||||||
|
JPP((JDIMENSION out_width,
|
||||||
|
JSAMPIMAGE input_buf, JDIMENSION input_row,
|
||||||
|
JSAMPARRAY output_buf, int num_rows));
|
||||||
|
EXTERN(void) jsimd_ycc_extxrgb_convert_mmx
|
||||||
|
JPP((JDIMENSION out_width,
|
||||||
|
JSAMPIMAGE input_buf, JDIMENSION input_row,
|
||||||
|
JSAMPARRAY output_buf, int num_rows));
|
||||||
|
|
||||||
extern const int jconst_rgb_ycc_convert_sse2[];
|
extern const int jconst_rgb_ycc_convert_sse2[];
|
||||||
EXTERN(void) jsimd_rgb_ycc_convert_sse2
|
EXTERN(void) jsimd_rgb_ycc_convert_sse2
|
||||||
JPP((JDIMENSION img_width,
|
JPP((JDIMENSION img_width,
|
||||||
JSAMPARRAY input_buf, JSAMPIMAGE output_buf,
|
JSAMPARRAY input_buf, JSAMPIMAGE output_buf,
|
||||||
JDIMENSION output_row, int num_rows));
|
JDIMENSION output_row, int num_rows));
|
||||||
|
EXTERN(void) jsimd_extrgb_ycc_convert_sse2
|
||||||
|
JPP((JDIMENSION img_width,
|
||||||
|
JSAMPARRAY input_buf, JSAMPIMAGE output_buf,
|
||||||
|
JDIMENSION output_row, int num_rows));
|
||||||
|
EXTERN(void) jsimd_extrgbx_ycc_convert_sse2
|
||||||
|
JPP((JDIMENSION img_width,
|
||||||
|
JSAMPARRAY input_buf, JSAMPIMAGE output_buf,
|
||||||
|
JDIMENSION output_row, int num_rows));
|
||||||
|
EXTERN(void) jsimd_extbgr_ycc_convert_sse2
|
||||||
|
JPP((JDIMENSION img_width,
|
||||||
|
JSAMPARRAY input_buf, JSAMPIMAGE output_buf,
|
||||||
|
JDIMENSION output_row, int num_rows));
|
||||||
|
EXTERN(void) jsimd_extbgrx_ycc_convert_sse2
|
||||||
|
JPP((JDIMENSION img_width,
|
||||||
|
JSAMPARRAY input_buf, JSAMPIMAGE output_buf,
|
||||||
|
JDIMENSION output_row, int num_rows));
|
||||||
|
EXTERN(void) jsimd_extxbgr_ycc_convert_sse2
|
||||||
|
JPP((JDIMENSION img_width,
|
||||||
|
JSAMPARRAY input_buf, JSAMPIMAGE output_buf,
|
||||||
|
JDIMENSION output_row, int num_rows));
|
||||||
|
EXTERN(void) jsimd_extxrgb_ycc_convert_sse2
|
||||||
|
JPP((JDIMENSION img_width,
|
||||||
|
JSAMPARRAY input_buf, JSAMPIMAGE output_buf,
|
||||||
|
JDIMENSION output_row, int num_rows));
|
||||||
|
|
||||||
extern const int jconst_ycc_rgb_convert_sse2[];
|
extern const int jconst_ycc_rgb_convert_sse2[];
|
||||||
EXTERN(void) jsimd_ycc_rgb_convert_sse2
|
EXTERN(void) jsimd_ycc_rgb_convert_sse2
|
||||||
JPP((JDIMENSION out_width,
|
JPP((JDIMENSION out_width,
|
||||||
JSAMPIMAGE input_buf, JDIMENSION input_row,
|
JSAMPIMAGE input_buf, JDIMENSION input_row,
|
||||||
JSAMPARRAY output_buf, int num_rows));
|
JSAMPARRAY output_buf, int num_rows));
|
||||||
|
EXTERN(void) jsimd_ycc_extrgb_convert_sse2
|
||||||
|
JPP((JDIMENSION out_width,
|
||||||
|
JSAMPIMAGE input_buf, JDIMENSION input_row,
|
||||||
|
JSAMPARRAY output_buf, int num_rows));
|
||||||
|
EXTERN(void) jsimd_ycc_extrgbx_convert_sse2
|
||||||
|
JPP((JDIMENSION out_width,
|
||||||
|
JSAMPIMAGE input_buf, JDIMENSION input_row,
|
||||||
|
JSAMPARRAY output_buf, int num_rows));
|
||||||
|
EXTERN(void) jsimd_ycc_extbgr_convert_sse2
|
||||||
|
JPP((JDIMENSION out_width,
|
||||||
|
JSAMPIMAGE input_buf, JDIMENSION input_row,
|
||||||
|
JSAMPARRAY output_buf, int num_rows));
|
||||||
|
EXTERN(void) jsimd_ycc_extbgrx_convert_sse2
|
||||||
|
JPP((JDIMENSION out_width,
|
||||||
|
JSAMPIMAGE input_buf, JDIMENSION input_row,
|
||||||
|
JSAMPARRAY output_buf, int num_rows));
|
||||||
|
EXTERN(void) jsimd_ycc_extxbgr_convert_sse2
|
||||||
|
JPP((JDIMENSION out_width,
|
||||||
|
JSAMPIMAGE input_buf, JDIMENSION input_row,
|
||||||
|
JSAMPARRAY output_buf, int num_rows));
|
||||||
|
EXTERN(void) jsimd_ycc_extxrgb_convert_sse2
|
||||||
|
JPP((JDIMENSION out_width,
|
||||||
|
JSAMPIMAGE input_buf, JDIMENSION input_row,
|
||||||
|
JSAMPARRAY output_buf, int num_rows));
|
||||||
|
|
||||||
/* SIMD Downsample */
|
/* SIMD Downsample */
|
||||||
EXTERN(void) jsimd_h2v2_downsample_mmx
|
EXTERN(void) jsimd_h2v2_downsample_mmx
|
||||||
|
|||||||
Reference in New Issue
Block a user