AVX2: Introduce YMMBLOCK macro for readability
This commit is contained in:
@@ -2,7 +2,7 @@
|
||||
; jquanti.asm - sample quantization (AVX2)
|
||||
;
|
||||
; Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
|
||||
; Copyright (C) 2016, D. R. Commander.
|
||||
; Copyright (C) 2016, 2018, D. R. Commander.
|
||||
; Copyright (C) 2016, Matthieu Darbois.
|
||||
;
|
||||
; Based on the x86 SIMD extension for IJG JPEG library
|
||||
@@ -37,9 +37,9 @@
|
||||
; DCTELEM *workspace);
|
||||
;
|
||||
|
||||
%define RECIPROCAL(m,n,b) XMMBLOCK(DCTSIZE*0+(m),(n),(b),SIZEOF_DCTELEM)
|
||||
%define CORRECTION(m,n,b) XMMBLOCK(DCTSIZE*1+(m),(n),(b),SIZEOF_DCTELEM)
|
||||
%define SCALE(m,n,b) XMMBLOCK(DCTSIZE*2+(m),(n),(b),SIZEOF_DCTELEM)
|
||||
%define RECIPROCAL(m,n,b) YMMBLOCK(DCTSIZE*0+(m),(n),(b),SIZEOF_DCTELEM)
|
||||
%define CORRECTION(m,n,b) YMMBLOCK(DCTSIZE*1+(m),(n),(b),SIZEOF_DCTELEM)
|
||||
%define SCALE(m,n,b) YMMBLOCK(DCTSIZE*2+(m),(n),(b),SIZEOF_DCTELEM)
|
||||
|
||||
%define coef_block ebp+8 ; JCOEFPTR coef_block
|
||||
%define divisors ebp+12 ; DCTELEM *divisors
|
||||
@@ -61,10 +61,10 @@ EXTN(jsimd_quantize_avx2):
|
||||
mov edx, POINTER [divisors]
|
||||
mov edi, JCOEFPTR [coef_block]
|
||||
|
||||
vmovdqu ymm4, [XMMBLOCK(0,0,esi,SIZEOF_DCTELEM)]
|
||||
vmovdqu ymm5, [XMMBLOCK(2,0,esi,SIZEOF_DCTELEM)]
|
||||
vmovdqu ymm6, [XMMBLOCK(4,0,esi,SIZEOF_DCTELEM)]
|
||||
vmovdqu ymm7, [XMMBLOCK(6,0,esi,SIZEOF_DCTELEM)]
|
||||
vmovdqu ymm4, [YMMBLOCK(0,0,esi,SIZEOF_DCTELEM)]
|
||||
vmovdqu ymm5, [YMMBLOCK(2,0,esi,SIZEOF_DCTELEM)]
|
||||
vmovdqu ymm6, [YMMBLOCK(4,0,esi,SIZEOF_DCTELEM)]
|
||||
vmovdqu ymm7, [YMMBLOCK(6,0,esi,SIZEOF_DCTELEM)]
|
||||
vpabsw ymm0, ymm4
|
||||
vpabsw ymm1, ymm5
|
||||
vpabsw ymm2, ymm6
|
||||
@@ -88,10 +88,10 @@ EXTN(jsimd_quantize_avx2):
|
||||
vpsignw ymm2, ymm2, ymm6
|
||||
vpsignw ymm3, ymm3, ymm7
|
||||
|
||||
vmovdqu [XMMBLOCK(0,0,edi,SIZEOF_DCTELEM)], ymm0
|
||||
vmovdqu [XMMBLOCK(2,0,edi,SIZEOF_DCTELEM)], ymm1
|
||||
vmovdqu [XMMBLOCK(4,0,edi,SIZEOF_DCTELEM)], ymm2
|
||||
vmovdqu [XMMBLOCK(6,0,edi,SIZEOF_DCTELEM)], ymm3
|
||||
vmovdqu [YMMBLOCK(0,0,edi,SIZEOF_DCTELEM)], ymm0
|
||||
vmovdqu [YMMBLOCK(2,0,edi,SIZEOF_DCTELEM)], ymm1
|
||||
vmovdqu [YMMBLOCK(4,0,edi,SIZEOF_DCTELEM)], ymm2
|
||||
vmovdqu [YMMBLOCK(6,0,edi,SIZEOF_DCTELEM)], ymm3
|
||||
|
||||
vzeroupper
|
||||
pop edi
|
||||
|
||||
@@ -2,6 +2,7 @@
|
||||
; jdct.inc - private declarations for forward & reverse DCT subsystems
|
||||
;
|
||||
; Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
|
||||
; Copyright (C) 2018, D. R. Commander.
|
||||
;
|
||||
; Based on the x86 SIMD extension for IJG JPEG library
|
||||
; Copyright (C) 1999-2006, MIYASAKA Masaru.
|
||||
@@ -23,5 +24,6 @@
|
||||
%define DWBLOCK(m,n,b,s) ((b)+(m)*DCTSIZE*(s)+(n)*SIZEOF_DWORD)
|
||||
%define MMBLOCK(m,n,b,s) ((b)+(m)*DCTSIZE*(s)+(n)*SIZEOF_MMWORD)
|
||||
%define XMMBLOCK(m,n,b,s) ((b)+(m)*DCTSIZE*(s)+(n)*SIZEOF_XMMWORD)
|
||||
%define YMMBLOCK(m,n,b,s) ((b)+(m)*DCTSIZE*(s)+(n)*SIZEOF_YMMWORD)
|
||||
|
||||
; --------------------------------------------------------------------------
|
||||
|
||||
@@ -2,7 +2,7 @@
|
||||
; jquanti.asm - sample data conversion and quantization (64-bit AVX2)
|
||||
;
|
||||
; Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
|
||||
; Copyright (C) 2009, 2016, D. R. Commander.
|
||||
; Copyright (C) 2009, 2016, 2018, D. R. Commander.
|
||||
; Copyright (C) 2016, Matthieu Darbois.
|
||||
;
|
||||
; Based on the x86 SIMD extension for IJG JPEG library
|
||||
@@ -37,9 +37,9 @@
|
||||
; DCTELEM *workspace);
|
||||
;
|
||||
|
||||
%define RECIPROCAL(m,n,b) XMMBLOCK(DCTSIZE*0+(m),(n),(b),SIZEOF_DCTELEM)
|
||||
%define CORRECTION(m,n,b) XMMBLOCK(DCTSIZE*1+(m),(n),(b),SIZEOF_DCTELEM)
|
||||
%define SCALE(m,n,b) XMMBLOCK(DCTSIZE*2+(m),(n),(b),SIZEOF_DCTELEM)
|
||||
%define RECIPROCAL(m,n,b) YMMBLOCK(DCTSIZE*0+(m),(n),(b),SIZEOF_DCTELEM)
|
||||
%define CORRECTION(m,n,b) YMMBLOCK(DCTSIZE*1+(m),(n),(b),SIZEOF_DCTELEM)
|
||||
%define SCALE(m,n,b) YMMBLOCK(DCTSIZE*2+(m),(n),(b),SIZEOF_DCTELEM)
|
||||
|
||||
; r10 = JCOEFPTR coef_block
|
||||
; r11 = DCTELEM *divisors
|
||||
@@ -54,10 +54,10 @@ EXTN(jsimd_quantize_avx2):
|
||||
mov rbp, rsp
|
||||
collect_args 3
|
||||
|
||||
vmovdqu ymm4, [XMMBLOCK(0,0,r12,SIZEOF_DCTELEM)]
|
||||
vmovdqu ymm5, [XMMBLOCK(2,0,r12,SIZEOF_DCTELEM)]
|
||||
vmovdqu ymm6, [XMMBLOCK(4,0,r12,SIZEOF_DCTELEM)]
|
||||
vmovdqu ymm7, [XMMBLOCK(6,0,r12,SIZEOF_DCTELEM)]
|
||||
vmovdqu ymm4, [YMMBLOCK(0,0,r12,SIZEOF_DCTELEM)]
|
||||
vmovdqu ymm5, [YMMBLOCK(2,0,r12,SIZEOF_DCTELEM)]
|
||||
vmovdqu ymm6, [YMMBLOCK(4,0,r12,SIZEOF_DCTELEM)]
|
||||
vmovdqu ymm7, [YMMBLOCK(6,0,r12,SIZEOF_DCTELEM)]
|
||||
vpabsw ymm0, ymm4
|
||||
vpabsw ymm1, ymm5
|
||||
vpabsw ymm2, ymm6
|
||||
@@ -81,10 +81,10 @@ EXTN(jsimd_quantize_avx2):
|
||||
vpsignw ymm2, ymm2, ymm6
|
||||
vpsignw ymm3, ymm3, ymm7
|
||||
|
||||
vmovdqu [XMMBLOCK(0,0,r10,SIZEOF_DCTELEM)], ymm0
|
||||
vmovdqu [XMMBLOCK(2,0,r10,SIZEOF_DCTELEM)], ymm1
|
||||
vmovdqu [XMMBLOCK(4,0,r10,SIZEOF_DCTELEM)], ymm2
|
||||
vmovdqu [XMMBLOCK(6,0,r10,SIZEOF_DCTELEM)], ymm3
|
||||
vmovdqu [YMMBLOCK(0,0,r10,SIZEOF_DCTELEM)], ymm0
|
||||
vmovdqu [YMMBLOCK(2,0,r10,SIZEOF_DCTELEM)], ymm1
|
||||
vmovdqu [YMMBLOCK(4,0,r10,SIZEOF_DCTELEM)], ymm2
|
||||
vmovdqu [YMMBLOCK(6,0,r10,SIZEOF_DCTELEM)], ymm3
|
||||
|
||||
vzeroupper
|
||||
uncollect_args 3
|
||||
|
||||
Reference in New Issue
Block a user