AVX2: Introduce YMMBLOCK macro for readability
This commit is contained in:
@@ -2,7 +2,7 @@
|
|||||||
; jquanti.asm - sample quantization (AVX2)
|
; jquanti.asm - sample quantization (AVX2)
|
||||||
;
|
;
|
||||||
; Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
|
; Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
|
||||||
; Copyright (C) 2016, D. R. Commander.
|
; Copyright (C) 2016, 2018, D. R. Commander.
|
||||||
; Copyright (C) 2016, Matthieu Darbois.
|
; Copyright (C) 2016, Matthieu Darbois.
|
||||||
;
|
;
|
||||||
; Based on the x86 SIMD extension for IJG JPEG library
|
; Based on the x86 SIMD extension for IJG JPEG library
|
||||||
@@ -37,9 +37,9 @@
|
|||||||
; DCTELEM *workspace);
|
; DCTELEM *workspace);
|
||||||
;
|
;
|
||||||
|
|
||||||
%define RECIPROCAL(m,n,b) XMMBLOCK(DCTSIZE*0+(m),(n),(b),SIZEOF_DCTELEM)
|
%define RECIPROCAL(m,n,b) YMMBLOCK(DCTSIZE*0+(m),(n),(b),SIZEOF_DCTELEM)
|
||||||
%define CORRECTION(m,n,b) XMMBLOCK(DCTSIZE*1+(m),(n),(b),SIZEOF_DCTELEM)
|
%define CORRECTION(m,n,b) YMMBLOCK(DCTSIZE*1+(m),(n),(b),SIZEOF_DCTELEM)
|
||||||
%define SCALE(m,n,b) XMMBLOCK(DCTSIZE*2+(m),(n),(b),SIZEOF_DCTELEM)
|
%define SCALE(m,n,b) YMMBLOCK(DCTSIZE*2+(m),(n),(b),SIZEOF_DCTELEM)
|
||||||
|
|
||||||
%define coef_block ebp+8 ; JCOEFPTR coef_block
|
%define coef_block ebp+8 ; JCOEFPTR coef_block
|
||||||
%define divisors ebp+12 ; DCTELEM *divisors
|
%define divisors ebp+12 ; DCTELEM *divisors
|
||||||
@@ -61,10 +61,10 @@ EXTN(jsimd_quantize_avx2):
|
|||||||
mov edx, POINTER [divisors]
|
mov edx, POINTER [divisors]
|
||||||
mov edi, JCOEFPTR [coef_block]
|
mov edi, JCOEFPTR [coef_block]
|
||||||
|
|
||||||
vmovdqu ymm4, [XMMBLOCK(0,0,esi,SIZEOF_DCTELEM)]
|
vmovdqu ymm4, [YMMBLOCK(0,0,esi,SIZEOF_DCTELEM)]
|
||||||
vmovdqu ymm5, [XMMBLOCK(2,0,esi,SIZEOF_DCTELEM)]
|
vmovdqu ymm5, [YMMBLOCK(2,0,esi,SIZEOF_DCTELEM)]
|
||||||
vmovdqu ymm6, [XMMBLOCK(4,0,esi,SIZEOF_DCTELEM)]
|
vmovdqu ymm6, [YMMBLOCK(4,0,esi,SIZEOF_DCTELEM)]
|
||||||
vmovdqu ymm7, [XMMBLOCK(6,0,esi,SIZEOF_DCTELEM)]
|
vmovdqu ymm7, [YMMBLOCK(6,0,esi,SIZEOF_DCTELEM)]
|
||||||
vpabsw ymm0, ymm4
|
vpabsw ymm0, ymm4
|
||||||
vpabsw ymm1, ymm5
|
vpabsw ymm1, ymm5
|
||||||
vpabsw ymm2, ymm6
|
vpabsw ymm2, ymm6
|
||||||
@@ -88,10 +88,10 @@ EXTN(jsimd_quantize_avx2):
|
|||||||
vpsignw ymm2, ymm2, ymm6
|
vpsignw ymm2, ymm2, ymm6
|
||||||
vpsignw ymm3, ymm3, ymm7
|
vpsignw ymm3, ymm3, ymm7
|
||||||
|
|
||||||
vmovdqu [XMMBLOCK(0,0,edi,SIZEOF_DCTELEM)], ymm0
|
vmovdqu [YMMBLOCK(0,0,edi,SIZEOF_DCTELEM)], ymm0
|
||||||
vmovdqu [XMMBLOCK(2,0,edi,SIZEOF_DCTELEM)], ymm1
|
vmovdqu [YMMBLOCK(2,0,edi,SIZEOF_DCTELEM)], ymm1
|
||||||
vmovdqu [XMMBLOCK(4,0,edi,SIZEOF_DCTELEM)], ymm2
|
vmovdqu [YMMBLOCK(4,0,edi,SIZEOF_DCTELEM)], ymm2
|
||||||
vmovdqu [XMMBLOCK(6,0,edi,SIZEOF_DCTELEM)], ymm3
|
vmovdqu [YMMBLOCK(6,0,edi,SIZEOF_DCTELEM)], ymm3
|
||||||
|
|
||||||
vzeroupper
|
vzeroupper
|
||||||
pop edi
|
pop edi
|
||||||
|
|||||||
@@ -2,6 +2,7 @@
|
|||||||
; jdct.inc - private declarations for forward & reverse DCT subsystems
|
; jdct.inc - private declarations for forward & reverse DCT subsystems
|
||||||
;
|
;
|
||||||
; Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
|
; Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
|
||||||
|
; Copyright (C) 2018, D. R. Commander.
|
||||||
;
|
;
|
||||||
; Based on the x86 SIMD extension for IJG JPEG library
|
; Based on the x86 SIMD extension for IJG JPEG library
|
||||||
; Copyright (C) 1999-2006, MIYASAKA Masaru.
|
; Copyright (C) 1999-2006, MIYASAKA Masaru.
|
||||||
@@ -23,5 +24,6 @@
|
|||||||
%define DWBLOCK(m,n,b,s) ((b)+(m)*DCTSIZE*(s)+(n)*SIZEOF_DWORD)
|
%define DWBLOCK(m,n,b,s) ((b)+(m)*DCTSIZE*(s)+(n)*SIZEOF_DWORD)
|
||||||
%define MMBLOCK(m,n,b,s) ((b)+(m)*DCTSIZE*(s)+(n)*SIZEOF_MMWORD)
|
%define MMBLOCK(m,n,b,s) ((b)+(m)*DCTSIZE*(s)+(n)*SIZEOF_MMWORD)
|
||||||
%define XMMBLOCK(m,n,b,s) ((b)+(m)*DCTSIZE*(s)+(n)*SIZEOF_XMMWORD)
|
%define XMMBLOCK(m,n,b,s) ((b)+(m)*DCTSIZE*(s)+(n)*SIZEOF_XMMWORD)
|
||||||
|
%define YMMBLOCK(m,n,b,s) ((b)+(m)*DCTSIZE*(s)+(n)*SIZEOF_YMMWORD)
|
||||||
|
|
||||||
; --------------------------------------------------------------------------
|
; --------------------------------------------------------------------------
|
||||||
|
|||||||
@@ -2,7 +2,7 @@
|
|||||||
; jquanti.asm - sample data conversion and quantization (64-bit AVX2)
|
; jquanti.asm - sample data conversion and quantization (64-bit AVX2)
|
||||||
;
|
;
|
||||||
; Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
|
; Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
|
||||||
; Copyright (C) 2009, 2016, D. R. Commander.
|
; Copyright (C) 2009, 2016, 2018, D. R. Commander.
|
||||||
; Copyright (C) 2016, Matthieu Darbois.
|
; Copyright (C) 2016, Matthieu Darbois.
|
||||||
;
|
;
|
||||||
; Based on the x86 SIMD extension for IJG JPEG library
|
; Based on the x86 SIMD extension for IJG JPEG library
|
||||||
@@ -37,9 +37,9 @@
|
|||||||
; DCTELEM *workspace);
|
; DCTELEM *workspace);
|
||||||
;
|
;
|
||||||
|
|
||||||
%define RECIPROCAL(m,n,b) XMMBLOCK(DCTSIZE*0+(m),(n),(b),SIZEOF_DCTELEM)
|
%define RECIPROCAL(m,n,b) YMMBLOCK(DCTSIZE*0+(m),(n),(b),SIZEOF_DCTELEM)
|
||||||
%define CORRECTION(m,n,b) XMMBLOCK(DCTSIZE*1+(m),(n),(b),SIZEOF_DCTELEM)
|
%define CORRECTION(m,n,b) YMMBLOCK(DCTSIZE*1+(m),(n),(b),SIZEOF_DCTELEM)
|
||||||
%define SCALE(m,n,b) XMMBLOCK(DCTSIZE*2+(m),(n),(b),SIZEOF_DCTELEM)
|
%define SCALE(m,n,b) YMMBLOCK(DCTSIZE*2+(m),(n),(b),SIZEOF_DCTELEM)
|
||||||
|
|
||||||
; r10 = JCOEFPTR coef_block
|
; r10 = JCOEFPTR coef_block
|
||||||
; r11 = DCTELEM *divisors
|
; r11 = DCTELEM *divisors
|
||||||
@@ -54,10 +54,10 @@ EXTN(jsimd_quantize_avx2):
|
|||||||
mov rbp, rsp
|
mov rbp, rsp
|
||||||
collect_args 3
|
collect_args 3
|
||||||
|
|
||||||
vmovdqu ymm4, [XMMBLOCK(0,0,r12,SIZEOF_DCTELEM)]
|
vmovdqu ymm4, [YMMBLOCK(0,0,r12,SIZEOF_DCTELEM)]
|
||||||
vmovdqu ymm5, [XMMBLOCK(2,0,r12,SIZEOF_DCTELEM)]
|
vmovdqu ymm5, [YMMBLOCK(2,0,r12,SIZEOF_DCTELEM)]
|
||||||
vmovdqu ymm6, [XMMBLOCK(4,0,r12,SIZEOF_DCTELEM)]
|
vmovdqu ymm6, [YMMBLOCK(4,0,r12,SIZEOF_DCTELEM)]
|
||||||
vmovdqu ymm7, [XMMBLOCK(6,0,r12,SIZEOF_DCTELEM)]
|
vmovdqu ymm7, [YMMBLOCK(6,0,r12,SIZEOF_DCTELEM)]
|
||||||
vpabsw ymm0, ymm4
|
vpabsw ymm0, ymm4
|
||||||
vpabsw ymm1, ymm5
|
vpabsw ymm1, ymm5
|
||||||
vpabsw ymm2, ymm6
|
vpabsw ymm2, ymm6
|
||||||
@@ -81,10 +81,10 @@ EXTN(jsimd_quantize_avx2):
|
|||||||
vpsignw ymm2, ymm2, ymm6
|
vpsignw ymm2, ymm2, ymm6
|
||||||
vpsignw ymm3, ymm3, ymm7
|
vpsignw ymm3, ymm3, ymm7
|
||||||
|
|
||||||
vmovdqu [XMMBLOCK(0,0,r10,SIZEOF_DCTELEM)], ymm0
|
vmovdqu [YMMBLOCK(0,0,r10,SIZEOF_DCTELEM)], ymm0
|
||||||
vmovdqu [XMMBLOCK(2,0,r10,SIZEOF_DCTELEM)], ymm1
|
vmovdqu [YMMBLOCK(2,0,r10,SIZEOF_DCTELEM)], ymm1
|
||||||
vmovdqu [XMMBLOCK(4,0,r10,SIZEOF_DCTELEM)], ymm2
|
vmovdqu [YMMBLOCK(4,0,r10,SIZEOF_DCTELEM)], ymm2
|
||||||
vmovdqu [XMMBLOCK(6,0,r10,SIZEOF_DCTELEM)], ymm3
|
vmovdqu [YMMBLOCK(6,0,r10,SIZEOF_DCTELEM)], ymm3
|
||||||
|
|
||||||
vzeroupper
|
vzeroupper
|
||||||
uncollect_args 3
|
uncollect_args 3
|
||||||
|
|||||||
Reference in New Issue
Block a user