MMI: Fix comp. perf. issue w/ unaligned image rows
Using ldc1 with a non-64-bit-aligned memory location causes as much as a 10x slow-down in overall compression performance.
This commit is contained in:
@@ -34,6 +34,9 @@ incorrect PPM images when used with the `-colors` option.
|
|||||||
7. Fixed an issue whereby a static build of libjpeg-turbo (a build in which
|
7. Fixed an issue whereby a static build of libjpeg-turbo (a build in which
|
||||||
`ENABLE_SHARED` is `0`) could not be installed using the Visual Studio IDE.
|
`ENABLE_SHARED` is `0`) could not be installed using the Visual Studio IDE.
|
||||||
|
|
||||||
|
8. Fixed a severe performance issue in the Loongson MMI SIMD extensions that
|
||||||
|
occurred when compressing RGB images whose image rows were not 64-bit-aligned.
|
||||||
|
|
||||||
|
|
||||||
2.0.1
|
2.0.1
|
||||||
=====
|
=====
|
||||||
|
|||||||
@@ -2,12 +2,13 @@
|
|||||||
* Loongson MMI optimizations for libjpeg-turbo
|
* Loongson MMI optimizations for libjpeg-turbo
|
||||||
*
|
*
|
||||||
* Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
|
* Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
|
||||||
* Copyright (C) 2014-2015, D. R. Commander. All Rights Reserved.
|
* Copyright (C) 2014-2015, 2019, D. R. Commander. All Rights Reserved.
|
||||||
* Copyright (C) 2016-2017, Loongson Technology Corporation Limited, BeiJing.
|
* Copyright (C) 2016-2018, Loongson Technology Corporation Limited, BeiJing.
|
||||||
* All Rights Reserved.
|
* All Rights Reserved.
|
||||||
* Authors: ZhuChen <zhuchen@loongson.cn>
|
* Authors: ZhuChen <zhuchen@loongson.cn>
|
||||||
* SunZhangzhi <sunzhangzhi-cq@loongson.cn>
|
* SunZhangzhi <sunzhangzhi-cq@loongson.cn>
|
||||||
* CaiWanwei <caiwanwei@loongson.cn>
|
* CaiWanwei <caiwanwei@loongson.cn>
|
||||||
|
* ZhangLixia <zhanglixia-hf@loongson.cn>
|
||||||
*
|
*
|
||||||
* Based on the x86 SIMD extension for IJG JPEG library
|
* Based on the x86 SIMD extension for IJG JPEG library
|
||||||
* Copyright (C) 1999-2006, MIYASAKA Masaru.
|
* Copyright (C) 1999-2006, MIYASAKA Masaru.
|
||||||
@@ -184,9 +185,15 @@ void jsimd_rgb_ycc_convert_mmi(JDIMENSION image_width, JSAMPARRAY input_buf,
|
|||||||
"$14", "memory"
|
"$14", "memory"
|
||||||
);
|
);
|
||||||
} else {
|
} else {
|
||||||
|
if (!(((long)inptr) & 7)) {
|
||||||
mmA = _mm_load_si64((__m64 *)&inptr[0]);
|
mmA = _mm_load_si64((__m64 *)&inptr[0]);
|
||||||
mmG = _mm_load_si64((__m64 *)&inptr[8]);
|
mmG = _mm_load_si64((__m64 *)&inptr[8]);
|
||||||
mmF = _mm_load_si64((__m64 *)&inptr[16]);
|
mmF = _mm_load_si64((__m64 *)&inptr[16]);
|
||||||
|
} else {
|
||||||
|
mmA = _mm_loadu_si64((__m64 *)&inptr[0]);
|
||||||
|
mmG = _mm_loadu_si64((__m64 *)&inptr[8]);
|
||||||
|
mmF = _mm_loadu_si64((__m64 *)&inptr[16]);
|
||||||
|
}
|
||||||
inptr += RGB_PIXELSIZE * 8;
|
inptr += RGB_PIXELSIZE * 8;
|
||||||
}
|
}
|
||||||
mmD = mmA;
|
mmD = mmA;
|
||||||
|
|||||||
@@ -1,8 +1,9 @@
|
|||||||
/*
|
/*
|
||||||
* Loongson MMI optimizations for libjpeg-turbo
|
* Loongson MMI optimizations for libjpeg-turbo
|
||||||
*
|
*
|
||||||
* Copyright (C) 2016-2017, Loongson Technology Corporation Limited, BeiJing.
|
* Copyright (C) 2016-2018, Loongson Technology Corporation Limited, BeiJing.
|
||||||
* All Rights Reserved.
|
* All Rights Reserved.
|
||||||
|
* Copyright (C) 2019, D. R. Commander. All Rights Reserved.
|
||||||
*
|
*
|
||||||
* This software is provided 'as-is', without any express or implied
|
* This software is provided 'as-is', without any express or implied
|
||||||
* warranty. In no event will the authors be held liable for any damages
|
* warranty. In no event will the authors be held liable for any damages
|
||||||
@@ -41,7 +42,7 @@ typedef float __m32;
|
|||||||
|
|
||||||
/********** Set Operations **********/
|
/********** Set Operations **********/
|
||||||
|
|
||||||
extern __inline __m64
|
extern __inline __m64 FUNCTION_ATTRIBS
|
||||||
_mm_setzero_si64(void)
|
_mm_setzero_si64(void)
|
||||||
{
|
{
|
||||||
return 0.0;
|
return 0.0;
|
||||||
@@ -1245,6 +1246,22 @@ _mm_load_si64(const __m64 *src)
|
|||||||
asm("ldc1 %0, %1\n\t"
|
asm("ldc1 %0, %1\n\t"
|
||||||
: "=f" (ret)
|
: "=f" (ret)
|
||||||
: "m" (*src)
|
: "m" (*src)
|
||||||
|
: "memory"
|
||||||
|
);
|
||||||
|
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
|
extern __inline __m64 FUNCTION_ATTRIBS
|
||||||
|
_mm_loadu_si64(const __m64 *src)
|
||||||
|
{
|
||||||
|
__m64 ret;
|
||||||
|
|
||||||
|
asm("gsldlc1 %0, 7(%1)\n\t"
|
||||||
|
"gsldrc1 %0, 0(%1)\n\t"
|
||||||
|
: "=f" (ret)
|
||||||
|
: "r" (src)
|
||||||
|
: "memory"
|
||||||
);
|
);
|
||||||
|
|
||||||
return ret;
|
return ret;
|
||||||
|
|||||||
Reference in New Issue
Block a user