Use intrinsics for loading aligned data in the IDCT functions. This has no effect on performance, but it makes it more obvious what that code is doing.

git-svn-id: svn+ssh://svn.code.sf.net/p/libjpeg-turbo/code/trunk@1491 632fc199-4ca6-4c93-a231-07263d6284db
This commit is contained in:
DRC
2015-01-11 06:34:47 +00:00
parent f0abd46c3b
commit d71a6e0c25
2 changed files with 26 additions and 26 deletions

View File

@@ -1,7 +1,7 @@
/* /*
* AltiVec optimizations for libjpeg-turbo * AltiVec optimizations for libjpeg-turbo
* *
* Copyright (C) 2014, D. R. Commander. * Copyright (C) 2014-2015, D. R. Commander.
* All rights reserved. * All rights reserved.
* This software is provided 'as-is', without any express or implied * This software is provided 'as-is', without any express or implied
* warranty. In no event will the authors be held liable for any damages * warranty. In no event will the authors be held liable for any damages
@@ -153,7 +153,7 @@ jsimd_idct_ifast_altivec (void * dct_table_, JCOEFPTR coef_block,
tmp3 = vec_or(tmp3, col7); tmp3 = vec_or(tmp3, col7);
tmp1 = vec_or(tmp1, tmp3); tmp1 = vec_or(tmp1, tmp3);
quant0 = *(__vector short *)&dct_table[0]; quant0 = vec_ld(0, dct_table);
col0 = vec_mladd(col0, quant0, zero); col0 = vec_mladd(col0, quant0, zero);
if (vec_all_eq(tmp1, zero)) { if (vec_all_eq(tmp1, zero)) {
@@ -170,13 +170,13 @@ jsimd_idct_ifast_altivec (void * dct_table_, JCOEFPTR coef_block,
} else { } else {
quant1 = *(__vector short *)&dct_table[8]; quant1 = vec_ld(16, dct_table);
quant2 = *(__vector short *)&dct_table[16]; quant2 = vec_ld(32, dct_table);
quant3 = *(__vector short *)&dct_table[24]; quant3 = vec_ld(48, dct_table);
quant4 = *(__vector short *)&dct_table[32]; quant4 = vec_ld(64, dct_table);
quant5 = *(__vector short *)&dct_table[40]; quant5 = vec_ld(80, dct_table);
quant6 = *(__vector short *)&dct_table[48]; quant6 = vec_ld(96, dct_table);
quant7 = *(__vector short *)&dct_table[56]; quant7 = vec_ld(112, dct_table);
col1 = vec_mladd(col1, quant1, zero); col1 = vec_mladd(col1, quant1, zero);
col2 = vec_mladd(col2, quant2, zero); col2 = vec_mladd(col2, quant2, zero);

View File

@@ -1,7 +1,7 @@
/* /*
* AltiVec optimizations for libjpeg-turbo * AltiVec optimizations for libjpeg-turbo
* *
* Copyright (C) 2014, D. R. Commander. * Copyright (C) 2014-2015, D. R. Commander.
* All rights reserved. * All rights reserved.
* This software is provided 'as-is', without any express or implied * This software is provided 'as-is', without any express or implied
* warranty. In no event will the authors be held liable for any damages * warranty. In no event will the authors be held liable for any damages
@@ -246,14 +246,14 @@ jsimd_idct_islow_altivec (void * dct_table_, JCOEFPTR coef_block,
/* Pass 1: process columns */ /* Pass 1: process columns */
col0 = *(__vector short *)&coef_block[0]; col0 = vec_ld(0, coef_block);
col1 = *(__vector short *)&coef_block[8]; col1 = vec_ld(16, coef_block);
col2 = *(__vector short *)&coef_block[16]; col2 = vec_ld(32, coef_block);
col3 = *(__vector short *)&coef_block[24]; col3 = vec_ld(48, coef_block);
col4 = *(__vector short *)&coef_block[32]; col4 = vec_ld(64, coef_block);
col5 = *(__vector short *)&coef_block[40]; col5 = vec_ld(80, coef_block);
col6 = *(__vector short *)&coef_block[48]; col6 = vec_ld(96, coef_block);
col7 = *(__vector short *)&coef_block[56]; col7 = vec_ld(112, coef_block);
tmp1 = vec_or(col1, col2); tmp1 = vec_or(col1, col2);
tmp2 = vec_or(col3, col4); tmp2 = vec_or(col3, col4);
@@ -262,7 +262,7 @@ jsimd_idct_islow_altivec (void * dct_table_, JCOEFPTR coef_block,
tmp3 = vec_or(tmp3, col7); tmp3 = vec_or(tmp3, col7);
tmp1 = vec_or(tmp1, tmp3); tmp1 = vec_or(tmp1, tmp3);
quant0 = *(__vector short *)&dct_table[0]; quant0 = vec_ld(0, dct_table);
col0 = vec_mladd(col0, quant0, zero16); col0 = vec_mladd(col0, quant0, zero16);
if (vec_all_eq(tmp1, zero16)) { if (vec_all_eq(tmp1, zero16)) {
@@ -281,13 +281,13 @@ jsimd_idct_islow_altivec (void * dct_table_, JCOEFPTR coef_block,
} else { } else {
quant1 = *(__vector short *)&dct_table[8]; quant1 = vec_ld(16, dct_table);
quant2 = *(__vector short *)&dct_table[16]; quant2 = vec_ld(32, dct_table);
quant3 = *(__vector short *)&dct_table[24]; quant3 = vec_ld(48, dct_table);
quant4 = *(__vector short *)&dct_table[32]; quant4 = vec_ld(64, dct_table);
quant5 = *(__vector short *)&dct_table[40]; quant5 = vec_ld(80, dct_table);
quant6 = *(__vector short *)&dct_table[48]; quant6 = vec_ld(96, dct_table);
quant7 = *(__vector short *)&dct_table[56]; quant7 = vec_ld(112, dct_table);
col1 = vec_mladd(col1, quant1, zero16); col1 = vec_mladd(col1, quant1, zero16);
col2 = vec_mladd(col2, quant2, zero16); col2 = vec_mladd(col2, quant2, zero16);