Use intrinsics for loading aligned data in the IDCT functions. This has no effect on performance, but it makes it more obvious what that code is doing.
git-svn-id: svn+ssh://svn.code.sf.net/p/libjpeg-turbo/code/trunk@1491 632fc199-4ca6-4c93-a231-07263d6284db
This commit is contained in:
@@ -1,7 +1,7 @@
|
|||||||
/*
|
/*
|
||||||
* AltiVec optimizations for libjpeg-turbo
|
* AltiVec optimizations for libjpeg-turbo
|
||||||
*
|
*
|
||||||
* Copyright (C) 2014, D. R. Commander.
|
* Copyright (C) 2014-2015, D. R. Commander.
|
||||||
* All rights reserved.
|
* All rights reserved.
|
||||||
* This software is provided 'as-is', without any express or implied
|
* This software is provided 'as-is', without any express or implied
|
||||||
* warranty. In no event will the authors be held liable for any damages
|
* warranty. In no event will the authors be held liable for any damages
|
||||||
@@ -153,7 +153,7 @@ jsimd_idct_ifast_altivec (void * dct_table_, JCOEFPTR coef_block,
|
|||||||
tmp3 = vec_or(tmp3, col7);
|
tmp3 = vec_or(tmp3, col7);
|
||||||
tmp1 = vec_or(tmp1, tmp3);
|
tmp1 = vec_or(tmp1, tmp3);
|
||||||
|
|
||||||
quant0 = *(__vector short *)&dct_table[0];
|
quant0 = vec_ld(0, dct_table);
|
||||||
col0 = vec_mladd(col0, quant0, zero);
|
col0 = vec_mladd(col0, quant0, zero);
|
||||||
|
|
||||||
if (vec_all_eq(tmp1, zero)) {
|
if (vec_all_eq(tmp1, zero)) {
|
||||||
@@ -170,13 +170,13 @@ jsimd_idct_ifast_altivec (void * dct_table_, JCOEFPTR coef_block,
|
|||||||
|
|
||||||
} else {
|
} else {
|
||||||
|
|
||||||
quant1 = *(__vector short *)&dct_table[8];
|
quant1 = vec_ld(16, dct_table);
|
||||||
quant2 = *(__vector short *)&dct_table[16];
|
quant2 = vec_ld(32, dct_table);
|
||||||
quant3 = *(__vector short *)&dct_table[24];
|
quant3 = vec_ld(48, dct_table);
|
||||||
quant4 = *(__vector short *)&dct_table[32];
|
quant4 = vec_ld(64, dct_table);
|
||||||
quant5 = *(__vector short *)&dct_table[40];
|
quant5 = vec_ld(80, dct_table);
|
||||||
quant6 = *(__vector short *)&dct_table[48];
|
quant6 = vec_ld(96, dct_table);
|
||||||
quant7 = *(__vector short *)&dct_table[56];
|
quant7 = vec_ld(112, dct_table);
|
||||||
|
|
||||||
col1 = vec_mladd(col1, quant1, zero);
|
col1 = vec_mladd(col1, quant1, zero);
|
||||||
col2 = vec_mladd(col2, quant2, zero);
|
col2 = vec_mladd(col2, quant2, zero);
|
||||||
|
|||||||
@@ -1,7 +1,7 @@
|
|||||||
/*
|
/*
|
||||||
* AltiVec optimizations for libjpeg-turbo
|
* AltiVec optimizations for libjpeg-turbo
|
||||||
*
|
*
|
||||||
* Copyright (C) 2014, D. R. Commander.
|
* Copyright (C) 2014-2015, D. R. Commander.
|
||||||
* All rights reserved.
|
* All rights reserved.
|
||||||
* This software is provided 'as-is', without any express or implied
|
* This software is provided 'as-is', without any express or implied
|
||||||
* warranty. In no event will the authors be held liable for any damages
|
* warranty. In no event will the authors be held liable for any damages
|
||||||
@@ -246,14 +246,14 @@ jsimd_idct_islow_altivec (void * dct_table_, JCOEFPTR coef_block,
|
|||||||
|
|
||||||
/* Pass 1: process columns */
|
/* Pass 1: process columns */
|
||||||
|
|
||||||
col0 = *(__vector short *)&coef_block[0];
|
col0 = vec_ld(0, coef_block);
|
||||||
col1 = *(__vector short *)&coef_block[8];
|
col1 = vec_ld(16, coef_block);
|
||||||
col2 = *(__vector short *)&coef_block[16];
|
col2 = vec_ld(32, coef_block);
|
||||||
col3 = *(__vector short *)&coef_block[24];
|
col3 = vec_ld(48, coef_block);
|
||||||
col4 = *(__vector short *)&coef_block[32];
|
col4 = vec_ld(64, coef_block);
|
||||||
col5 = *(__vector short *)&coef_block[40];
|
col5 = vec_ld(80, coef_block);
|
||||||
col6 = *(__vector short *)&coef_block[48];
|
col6 = vec_ld(96, coef_block);
|
||||||
col7 = *(__vector short *)&coef_block[56];
|
col7 = vec_ld(112, coef_block);
|
||||||
|
|
||||||
tmp1 = vec_or(col1, col2);
|
tmp1 = vec_or(col1, col2);
|
||||||
tmp2 = vec_or(col3, col4);
|
tmp2 = vec_or(col3, col4);
|
||||||
@@ -262,7 +262,7 @@ jsimd_idct_islow_altivec (void * dct_table_, JCOEFPTR coef_block,
|
|||||||
tmp3 = vec_or(tmp3, col7);
|
tmp3 = vec_or(tmp3, col7);
|
||||||
tmp1 = vec_or(tmp1, tmp3);
|
tmp1 = vec_or(tmp1, tmp3);
|
||||||
|
|
||||||
quant0 = *(__vector short *)&dct_table[0];
|
quant0 = vec_ld(0, dct_table);
|
||||||
col0 = vec_mladd(col0, quant0, zero16);
|
col0 = vec_mladd(col0, quant0, zero16);
|
||||||
|
|
||||||
if (vec_all_eq(tmp1, zero16)) {
|
if (vec_all_eq(tmp1, zero16)) {
|
||||||
@@ -281,13 +281,13 @@ jsimd_idct_islow_altivec (void * dct_table_, JCOEFPTR coef_block,
|
|||||||
|
|
||||||
} else {
|
} else {
|
||||||
|
|
||||||
quant1 = *(__vector short *)&dct_table[8];
|
quant1 = vec_ld(16, dct_table);
|
||||||
quant2 = *(__vector short *)&dct_table[16];
|
quant2 = vec_ld(32, dct_table);
|
||||||
quant3 = *(__vector short *)&dct_table[24];
|
quant3 = vec_ld(48, dct_table);
|
||||||
quant4 = *(__vector short *)&dct_table[32];
|
quant4 = vec_ld(64, dct_table);
|
||||||
quant5 = *(__vector short *)&dct_table[40];
|
quant5 = vec_ld(80, dct_table);
|
||||||
quant6 = *(__vector short *)&dct_table[48];
|
quant6 = vec_ld(96, dct_table);
|
||||||
quant7 = *(__vector short *)&dct_table[56];
|
quant7 = vec_ld(112, dct_table);
|
||||||
|
|
||||||
col1 = vec_mladd(col1, quant1, zero16);
|
col1 = vec_mladd(col1, quant1, zero16);
|
||||||
col2 = vec_mladd(col2, quant2, zero16);
|
col2 = vec_mladd(col2, quant2, zero16);
|
||||||
|
|||||||
Reference in New Issue
Block a user