x86 SIMD: Capitalize all instruction-like macros

(to improve code readability)
This commit is contained in:
DRC
2024-02-29 12:18:49 -05:00
parent 26fc07c8d1
commit 1335547558
88 changed files with 662 additions and 661 deletions

View File

@@ -2,7 +2,7 @@
; jccolext.asm - colorspace conversion (AVX2)
;
; Copyright (C) 2015, Intel Corporation.
; Copyright (C) 2016, D. R. Commander.
; Copyright (C) 2016, 2024, D. R. Commander.
;
; Based on the x86 SIMD extension for IJG JPEG library
; Copyright (C) 1999-2006, MIYASAKA Masaru.
@@ -49,15 +49,15 @@ EXTN(jsimd_rgb_ycc_convert_avx2):
mov [esp], eax
mov ebp, esp ; ebp = aligned ebp
lea esp, [wk(0)]
pushpic eax ; make a room for GOT address
PUSHPIC eax ; make a room for GOT address
push ebx
; push ecx ; need not be preserved
; push edx ; need not be preserved
push esi
push edi
get_GOT ebx ; get GOT address
movpic POINTER [gotptr], ebx ; save GOT address
GET_GOT ebx ; get GOT address
MOVPIC POINTER [gotptr], ebx ; save GOT address
mov ecx, JDIMENSION [img_width(eax)]
test ecx, ecx
@@ -80,9 +80,9 @@ EXTN(jsimd_rgb_ycc_convert_avx2):
mov eax, INT [num_rows(eax)]
test eax, eax
jle near .return
alignx 16, 7
ALIGNX 16, 7
.rowloop:
pushpic eax
PUSHPIC eax
push edx
push ebx
push edi
@@ -93,11 +93,11 @@ EXTN(jsimd_rgb_ycc_convert_avx2):
mov edi, JSAMPROW [edi] ; outptr0
mov ebx, JSAMPROW [ebx] ; outptr1
mov edx, JSAMPROW [edx] ; outptr2
movpic eax, POINTER [gotptr] ; load GOT address (eax)
MOVPIC eax, POINTER [gotptr] ; load GOT address (eax)
cmp ecx, byte SIZEOF_YMMWORD
jae near .columnloop
alignx 16, 7
ALIGNX 16, 7
%if RGB_PIXELSIZE == 3 ; ---------------
@@ -154,7 +154,7 @@ EXTN(jsimd_rgb_ycc_convert_avx2):
vmovdqu ymmA, YMMWORD [esi+0*SIZEOF_YMMWORD]
vmovdqu ymmF, YMMWORD [esi+1*SIZEOF_YMMWORD]
jmp short .rgb_ycc_cnv
alignx 16, 7
ALIGNX 16, 7
.columnloop:
vmovdqu ymmA, YMMWORD [esi+0*SIZEOF_YMMWORD]
@@ -278,7 +278,7 @@ EXTN(jsimd_rgb_ycc_convert_avx2):
vmovdqu ymmA, YMMWORD [esi+0*SIZEOF_YMMWORD]
vmovdqu ymmF, YMMWORD [esi+1*SIZEOF_YMMWORD]
jmp short .rgb_ycc_cnv
alignx 16, 7
ALIGNX 16, 7
.columnloop:
vmovdqu ymmA, YMMWORD [esi+0*SIZEOF_YMMWORD]
@@ -552,7 +552,7 @@ EXTN(jsimd_rgb_ycc_convert_avx2):
pop edi
pop ebx
pop edx
poppic eax
POPPIC eax
add esi, byte SIZEOF_JSAMPROW ; input_buf
add edi, byte SIZEOF_JSAMPROW

View File

@@ -2,7 +2,7 @@
; jccolext.asm - colorspace conversion (MMX)
;
; Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
; Copyright (C) 2016, D. R. Commander.
; Copyright (C) 2016, 2024, D. R. Commander.
;
; Based on the x86 SIMD extension for IJG JPEG library
; Copyright (C) 1999-2006, MIYASAKA Masaru.
@@ -49,15 +49,15 @@ EXTN(jsimd_rgb_ycc_convert_mmx):
mov [esp], eax
mov ebp, esp ; ebp = aligned ebp
lea esp, [wk(0)]
pushpic eax ; make a room for GOT address
PUSHPIC eax ; make a room for GOT address
push ebx
; push ecx ; need not be preserved
; push edx ; need not be preserved
push esi
push edi
get_GOT ebx ; get GOT address
movpic POINTER [gotptr], ebx ; save GOT address
GET_GOT ebx ; get GOT address
MOVPIC POINTER [gotptr], ebx ; save GOT address
mov ecx, JDIMENSION [img_width(eax)] ; num_cols
test ecx, ecx
@@ -80,9 +80,9 @@ EXTN(jsimd_rgb_ycc_convert_mmx):
mov eax, INT [num_rows(eax)]
test eax, eax
jle near .return
alignx 16, 7
ALIGNX 16, 7
.rowloop:
pushpic eax
PUSHPIC eax
push edx
push ebx
push edi
@@ -93,11 +93,11 @@ EXTN(jsimd_rgb_ycc_convert_mmx):
mov edi, JSAMPROW [edi] ; outptr0
mov ebx, JSAMPROW [ebx] ; outptr1
mov edx, JSAMPROW [edx] ; outptr2
movpic eax, POINTER [gotptr] ; load GOT address (eax)
MOVPIC eax, POINTER [gotptr] ; load GOT address (eax)
cmp ecx, byte SIZEOF_MMWORD
jae short .columnloop
alignx 16, 7
ALIGNX 16, 7
%if RGB_PIXELSIZE == 3 ; ---------------
@@ -143,7 +143,7 @@ EXTN(jsimd_rgb_ycc_convert_mmx):
movq mmA, MMWORD [esi+0*SIZEOF_MMWORD]
movq mmG, MMWORD [esi+1*SIZEOF_MMWORD]
jmp short .rgb_ycc_cnv
alignx 16, 7
ALIGNX 16, 7
.columnloop:
movq mmA, MMWORD [esi+0*SIZEOF_MMWORD]
@@ -211,7 +211,7 @@ EXTN(jsimd_rgb_ycc_convert_mmx):
movq mmA, MMWORD [esi+0*SIZEOF_MMWORD]
movq mmF, MMWORD [esi+1*SIZEOF_MMWORD]
jmp short .rgb_ycc_cnv
alignx 16, 7
ALIGNX 16, 7
.columnloop:
movq mmA, MMWORD [esi+0*SIZEOF_MMWORD]
@@ -449,7 +449,7 @@ EXTN(jsimd_rgb_ycc_convert_mmx):
pop edi
pop ebx
pop edx
poppic eax
POPPIC eax
add esi, byte SIZEOF_JSAMPROW ; input_buf
add edi, byte SIZEOF_JSAMPROW

View File

@@ -1,7 +1,7 @@
;
; jccolext.asm - colorspace conversion (SSE2)
;
; Copyright (C) 2016, D. R. Commander.
; Copyright (C) 2016, 2024, D. R. Commander.
;
; Based on the x86 SIMD extension for IJG JPEG library
; Copyright (C) 1999-2006, MIYASAKA Masaru.
@@ -48,15 +48,15 @@ EXTN(jsimd_rgb_ycc_convert_sse2):
mov [esp], eax
mov ebp, esp ; ebp = aligned ebp
lea esp, [wk(0)]
pushpic eax ; make a room for GOT address
PUSHPIC eax ; make a room for GOT address
push ebx
; push ecx ; need not be preserved
; push edx ; need not be preserved
push esi
push edi
get_GOT ebx ; get GOT address
movpic POINTER [gotptr], ebx ; save GOT address
GET_GOT ebx ; get GOT address
MOVPIC POINTER [gotptr], ebx ; save GOT address
mov ecx, JDIMENSION [img_width(eax)]
test ecx, ecx
@@ -79,9 +79,9 @@ EXTN(jsimd_rgb_ycc_convert_sse2):
mov eax, INT [num_rows(eax)]
test eax, eax
jle near .return
alignx 16, 7
ALIGNX 16, 7
.rowloop:
pushpic eax
PUSHPIC eax
push edx
push ebx
push edi
@@ -92,11 +92,11 @@ EXTN(jsimd_rgb_ycc_convert_sse2):
mov edi, JSAMPROW [edi] ; outptr0
mov ebx, JSAMPROW [ebx] ; outptr1
mov edx, JSAMPROW [edx] ; outptr2
movpic eax, POINTER [gotptr] ; load GOT address (eax)
MOVPIC eax, POINTER [gotptr] ; load GOT address (eax)
cmp ecx, byte SIZEOF_XMMWORD
jae near .columnloop
alignx 16, 7
ALIGNX 16, 7
%if RGB_PIXELSIZE == 3 ; ---------------
@@ -147,7 +147,7 @@ EXTN(jsimd_rgb_ycc_convert_sse2):
movdqu xmmA, XMMWORD [esi+0*SIZEOF_XMMWORD]
movdqu xmmF, XMMWORD [esi+1*SIZEOF_XMMWORD]
jmp short .rgb_ycc_cnv
alignx 16, 7
ALIGNX 16, 7
.columnloop:
movdqu xmmA, XMMWORD [esi+0*SIZEOF_XMMWORD]
@@ -232,7 +232,7 @@ EXTN(jsimd_rgb_ycc_convert_sse2):
movdqu xmmA, XMMWORD [esi+0*SIZEOF_XMMWORD]
movdqu xmmE, XMMWORD [esi+1*SIZEOF_XMMWORD]
jmp short .rgb_ycc_cnv
alignx 16, 7
ALIGNX 16, 7
.columnloop:
movdqu xmmA, XMMWORD [esi+0*SIZEOF_XMMWORD]
@@ -478,7 +478,7 @@ EXTN(jsimd_rgb_ycc_convert_sse2):
pop edi
pop ebx
pop edx
poppic eax
POPPIC eax
add esi, byte SIZEOF_JSAMPROW ; input_buf
add edi, byte SIZEOF_JSAMPROW

View File

@@ -1,7 +1,7 @@
;
; jccolor.asm - colorspace conversion (AVX2)
;
; Copyright (C) 2009, 2016, D. R. Commander.
; Copyright (C) 2009, 2016, 2024, D. R. Commander.
; Copyright (C) 2015, Intel Corporation.
;
; Based on the x86 SIMD extension for IJG JPEG library
@@ -33,7 +33,7 @@ F_0_337 equ (F_0_587 - F_0_250) ; FIX(0.58700) - FIX(0.25000)
; --------------------------------------------------------------------------
SECTION SEG_CONST
alignz 32
ALIGNZ 32
GLOBAL_DATA(jconst_rgb_ycc_convert_avx2)
EXTN(jconst_rgb_ycc_convert_avx2):
@@ -46,7 +46,7 @@ PD_ONEHALFM1_CJ times 8 dd (1 << (SCALEBITS - 1)) - 1 + \
(CENTERJSAMPLE << SCALEBITS)
PD_ONEHALF times 8 dd (1 << (SCALEBITS - 1))
alignz 32
ALIGNZ 32
; --------------------------------------------------------------------------
SECTION SEG_TEXT

View File

@@ -2,7 +2,7 @@
; jccolor.asm - colorspace conversion (MMX)
;
; Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
; Copyright (C) 2009, 2016, D. R. Commander.
; Copyright (C) 2009, 2016, 2024, D. R. Commander.
;
; Based on the x86 SIMD extension for IJG JPEG library
; Copyright (C) 1999-2006, MIYASAKA Masaru.
@@ -33,7 +33,7 @@ F_0_337 equ (F_0_587 - F_0_250) ; FIX(0.58700) - FIX(0.25000)
; --------------------------------------------------------------------------
SECTION SEG_CONST
alignz 32
ALIGNZ 32
GLOBAL_DATA(jconst_rgb_ycc_convert_mmx)
EXTN(jconst_rgb_ycc_convert_mmx):
@@ -46,7 +46,7 @@ PD_ONEHALFM1_CJ times 2 dd (1 << (SCALEBITS - 1)) - 1 + \
(CENTERJSAMPLE << SCALEBITS)
PD_ONEHALF times 2 dd (1 << (SCALEBITS - 1))
alignz 32
ALIGNZ 32
; --------------------------------------------------------------------------
SECTION SEG_TEXT

View File

@@ -1,7 +1,7 @@
;
; jccolor.asm - colorspace conversion (SSE2)
;
; Copyright (C) 2009, 2016, D. R. Commander.
; Copyright (C) 2009, 2016, 2024, D. R. Commander.
;
; Based on the x86 SIMD extension for IJG JPEG library
; Copyright (C) 1999-2006, MIYASAKA Masaru.
@@ -32,7 +32,7 @@ F_0_337 equ (F_0_587 - F_0_250) ; FIX(0.58700) - FIX(0.25000)
; --------------------------------------------------------------------------
SECTION SEG_CONST
alignz 32
ALIGNZ 32
GLOBAL_DATA(jconst_rgb_ycc_convert_sse2)
EXTN(jconst_rgb_ycc_convert_sse2):
@@ -45,7 +45,7 @@ PD_ONEHALFM1_CJ times 4 dd (1 << (SCALEBITS - 1)) - 1 + \
(CENTERJSAMPLE << SCALEBITS)
PD_ONEHALF times 4 dd (1 << (SCALEBITS - 1))
alignz 32
ALIGNZ 32
; --------------------------------------------------------------------------
SECTION SEG_TEXT

View File

@@ -1,7 +1,7 @@
;
; jcgray.asm - grayscale colorspace conversion (AVX2)
;
; Copyright (C) 2011, 2016, D. R. Commander.
; Copyright (C) 2011, 2016, 2024, D. R. Commander.
; Copyright (C) 2015, Intel Corporation.
;
; Based on the x86 SIMD extension for IJG JPEG library
@@ -29,7 +29,7 @@ F_0_337 equ (F_0_587 - F_0_250) ; FIX(0.58700) - FIX(0.25000)
; --------------------------------------------------------------------------
SECTION SEG_CONST
alignz 32
ALIGNZ 32
GLOBAL_DATA(jconst_rgb_gray_convert_avx2)
EXTN(jconst_rgb_gray_convert_avx2):
@@ -38,7 +38,7 @@ PW_F0299_F0337 times 8 dw F_0_299, F_0_337
PW_F0114_F0250 times 8 dw F_0_114, F_0_250
PD_ONEHALF times 8 dd (1 << (SCALEBITS - 1))
alignz 32
ALIGNZ 32
; --------------------------------------------------------------------------
SECTION SEG_TEXT

View File

@@ -2,7 +2,7 @@
; jcgray.asm - grayscale colorspace conversion (MMX)
;
; Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
; Copyright (C) 2011, 2016, D. R. Commander.
; Copyright (C) 2011, 2016, 2024, D. R. Commander.
;
; Based on the x86 SIMD extension for IJG JPEG library
; Copyright (C) 1999-2006, MIYASAKA Masaru.
@@ -29,7 +29,7 @@ F_0_337 equ (F_0_587 - F_0_250) ; FIX(0.58700) - FIX(0.25000)
; --------------------------------------------------------------------------
SECTION SEG_CONST
alignz 32
ALIGNZ 32
GLOBAL_DATA(jconst_rgb_gray_convert_mmx)
EXTN(jconst_rgb_gray_convert_mmx):
@@ -38,7 +38,7 @@ PW_F0299_F0337 times 2 dw F_0_299, F_0_337
PW_F0114_F0250 times 2 dw F_0_114, F_0_250
PD_ONEHALF times 2 dd (1 << (SCALEBITS - 1))
alignz 32
ALIGNZ 32
; --------------------------------------------------------------------------
SECTION SEG_TEXT

View File

@@ -1,7 +1,7 @@
;
; jcgray.asm - grayscale colorspace conversion (SSE2)
;
; Copyright (C) 2011, 2016, D. R. Commander.
; Copyright (C) 2011, 2016, 2024, D. R. Commander.
;
; Based on the x86 SIMD extension for IJG JPEG library
; Copyright (C) 1999-2006, MIYASAKA Masaru.
@@ -28,7 +28,7 @@ F_0_337 equ (F_0_587 - F_0_250) ; FIX(0.58700) - FIX(0.25000)
; --------------------------------------------------------------------------
SECTION SEG_CONST
alignz 32
ALIGNZ 32
GLOBAL_DATA(jconst_rgb_gray_convert_sse2)
EXTN(jconst_rgb_gray_convert_sse2):
@@ -37,7 +37,7 @@ PW_F0299_F0337 times 4 dw F_0_299, F_0_337
PW_F0114_F0250 times 4 dw F_0_114, F_0_250
PD_ONEHALF times 4 dd (1 << (SCALEBITS - 1))
alignz 32
ALIGNZ 32
; --------------------------------------------------------------------------
SECTION SEG_TEXT

View File

@@ -1,7 +1,7 @@
;
; jcgryext.asm - grayscale colorspace conversion (AVX2)
;
; Copyright (C) 2011, 2016, D. R. Commander.
; Copyright (C) 2011, 2016, 2024, D. R. Commander.
; Copyright (C) 2015, Intel Corporation.
;
; Based on the x86 SIMD extension for IJG JPEG library
@@ -49,15 +49,15 @@ EXTN(jsimd_rgb_gray_convert_avx2):
mov [esp], eax
mov ebp, esp ; ebp = aligned ebp
lea esp, [wk(0)]
pushpic eax ; make a room for GOT address
PUSHPIC eax ; make a room for GOT address
push ebx
; push ecx ; need not be preserved
; push edx ; need not be preserved
push esi
push edi
get_GOT ebx ; get GOT address
movpic POINTER [gotptr], ebx ; save GOT address
GET_GOT ebx ; get GOT address
MOVPIC POINTER [gotptr], ebx ; save GOT address
mov ecx, JDIMENSION [img_width(eax)]
test ecx, ecx
@@ -76,20 +76,20 @@ EXTN(jsimd_rgb_gray_convert_avx2):
mov eax, INT [num_rows(eax)]
test eax, eax
jle near .return
alignx 16, 7
ALIGNX 16, 7
.rowloop:
pushpic eax
PUSHPIC eax
push edi
push esi
push ecx ; col
mov esi, JSAMPROW [esi] ; inptr
mov edi, JSAMPROW [edi] ; outptr0
movpic eax, POINTER [gotptr] ; load GOT address (eax)
MOVPIC eax, POINTER [gotptr] ; load GOT address (eax)
cmp ecx, byte SIZEOF_YMMWORD
jae near .columnloop
alignx 16, 7
ALIGNX 16, 7
%if RGB_PIXELSIZE == 3 ; ---------------
@@ -146,7 +146,7 @@ EXTN(jsimd_rgb_gray_convert_avx2):
vmovdqu ymmA, YMMWORD [esi+0*SIZEOF_YMMWORD]
vmovdqu ymmF, YMMWORD [esi+1*SIZEOF_YMMWORD]
jmp short .rgb_gray_cnv
alignx 16, 7
ALIGNX 16, 7
.columnloop:
vmovdqu ymmA, YMMWORD [esi+0*SIZEOF_YMMWORD]
@@ -270,7 +270,7 @@ EXTN(jsimd_rgb_gray_convert_avx2):
vmovdqu ymmA, YMMWORD [esi+0*SIZEOF_YMMWORD]
vmovdqu ymmF, YMMWORD [esi+1*SIZEOF_YMMWORD]
jmp short .rgb_gray_cnv
alignx 16, 7
ALIGNX 16, 7
.columnloop:
vmovdqu ymmA, YMMWORD [esi+0*SIZEOF_YMMWORD]
@@ -433,7 +433,7 @@ EXTN(jsimd_rgb_gray_convert_avx2):
pop ecx ; col
pop esi
pop edi
poppic eax
POPPIC eax
add esi, byte SIZEOF_JSAMPROW ; input_buf
add edi, byte SIZEOF_JSAMPROW

View File

@@ -2,7 +2,7 @@
; jcgryext.asm - grayscale colorspace conversion (MMX)
;
; Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
; Copyright (C) 2011, 2016, D. R. Commander.
; Copyright (C) 2011, 2016, 2024, D. R. Commander.
;
; Based on the x86 SIMD extension for IJG JPEG library
; Copyright (C) 1999-2006, MIYASAKA Masaru.
@@ -49,15 +49,15 @@ EXTN(jsimd_rgb_gray_convert_mmx):
mov [esp], eax
mov ebp, esp ; ebp = aligned ebp
lea esp, [wk(0)]
pushpic eax ; make a room for GOT address
PUSHPIC eax ; make a room for GOT address
push ebx
; push ecx ; need not be preserved
; push edx ; need not be preserved
push esi
push edi
get_GOT ebx ; get GOT address
movpic POINTER [gotptr], ebx ; save GOT address
GET_GOT ebx ; get GOT address
MOVPIC POINTER [gotptr], ebx ; save GOT address
mov ecx, JDIMENSION [img_width(eax)] ; num_cols
test ecx, ecx
@@ -76,20 +76,20 @@ EXTN(jsimd_rgb_gray_convert_mmx):
mov eax, INT [num_rows(eax)]
test eax, eax
jle near .return
alignx 16, 7
ALIGNX 16, 7
.rowloop:
pushpic eax
PUSHPIC eax
push edi
push esi
push ecx ; col
mov esi, JSAMPROW [esi] ; inptr
mov edi, JSAMPROW [edi] ; outptr0
movpic eax, POINTER [gotptr] ; load GOT address (eax)
MOVPIC eax, POINTER [gotptr] ; load GOT address (eax)
cmp ecx, byte SIZEOF_MMWORD
jae short .columnloop
alignx 16, 7
ALIGNX 16, 7
%if RGB_PIXELSIZE == 3 ; ---------------
@@ -135,7 +135,7 @@ EXTN(jsimd_rgb_gray_convert_mmx):
movq mmA, MMWORD [esi+0*SIZEOF_MMWORD]
movq mmG, MMWORD [esi+1*SIZEOF_MMWORD]
jmp short .rgb_gray_cnv
alignx 16, 7
ALIGNX 16, 7
.columnloop:
movq mmA, MMWORD [esi+0*SIZEOF_MMWORD]
@@ -203,7 +203,7 @@ EXTN(jsimd_rgb_gray_convert_mmx):
movq mmA, MMWORD [esi+0*SIZEOF_MMWORD]
movq mmF, MMWORD [esi+1*SIZEOF_MMWORD]
jmp short .rgb_gray_cnv
alignx 16, 7
ALIGNX 16, 7
.columnloop:
movq mmA, MMWORD [esi+0*SIZEOF_MMWORD]
@@ -330,7 +330,7 @@ EXTN(jsimd_rgb_gray_convert_mmx):
pop ecx ; col
pop esi
pop edi
poppic eax
POPPIC eax
add esi, byte SIZEOF_JSAMPROW ; input_buf
add edi, byte SIZEOF_JSAMPROW

View File

@@ -1,7 +1,7 @@
;
; jcgryext.asm - grayscale colorspace conversion (SSE2)
;
; Copyright (C) 2011, 2016, D. R. Commander.
; Copyright (C) 2011, 2016, 2024, D. R. Commander.
;
; Based on the x86 SIMD extension for IJG JPEG library
; Copyright (C) 1999-2006, MIYASAKA Masaru.
@@ -48,15 +48,15 @@ EXTN(jsimd_rgb_gray_convert_sse2):
mov [esp], eax
mov ebp, esp ; ebp = aligned ebp
lea esp, [wk(0)]
pushpic eax ; make a room for GOT address
PUSHPIC eax ; make a room for GOT address
push ebx
; push ecx ; need not be preserved
; push edx ; need not be preserved
push esi
push edi
get_GOT ebx ; get GOT address
movpic POINTER [gotptr], ebx ; save GOT address
GET_GOT ebx ; get GOT address
MOVPIC POINTER [gotptr], ebx ; save GOT address
mov ecx, JDIMENSION [img_width(eax)]
test ecx, ecx
@@ -75,20 +75,20 @@ EXTN(jsimd_rgb_gray_convert_sse2):
mov eax, INT [num_rows(eax)]
test eax, eax
jle near .return
alignx 16, 7
ALIGNX 16, 7
.rowloop:
pushpic eax
PUSHPIC eax
push edi
push esi
push ecx ; col
mov esi, JSAMPROW [esi] ; inptr
mov edi, JSAMPROW [edi] ; outptr0
movpic eax, POINTER [gotptr] ; load GOT address (eax)
MOVPIC eax, POINTER [gotptr] ; load GOT address (eax)
cmp ecx, byte SIZEOF_XMMWORD
jae near .columnloop
alignx 16, 7
ALIGNX 16, 7
%if RGB_PIXELSIZE == 3 ; ---------------
@@ -139,7 +139,7 @@ EXTN(jsimd_rgb_gray_convert_sse2):
movdqu xmmA, XMMWORD [esi+0*SIZEOF_XMMWORD]
movdqu xmmF, XMMWORD [esi+1*SIZEOF_XMMWORD]
jmp short .rgb_gray_cnv
alignx 16, 7
ALIGNX 16, 7
.columnloop:
movdqu xmmA, XMMWORD [esi+0*SIZEOF_XMMWORD]
@@ -224,7 +224,7 @@ EXTN(jsimd_rgb_gray_convert_sse2):
movdqu xmmA, XMMWORD [esi+0*SIZEOF_XMMWORD]
movdqu xmmE, XMMWORD [esi+1*SIZEOF_XMMWORD]
jmp short .rgb_gray_cnv
alignx 16, 7
ALIGNX 16, 7
.columnloop:
movdqu xmmA, XMMWORD [esi+0*SIZEOF_XMMWORD]
@@ -359,7 +359,7 @@ EXTN(jsimd_rgb_gray_convert_sse2):
pop ecx ; col
pop esi
pop edi
poppic eax
POPPIC eax
add esi, byte SIZEOF_JSAMPROW ; input_buf
add edi, byte SIZEOF_JSAMPROW

View File

@@ -42,7 +42,7 @@ endstruc
EXTN(jconst_huff_encode_one_block):
alignz 32
ALIGNZ 32
jpeg_mask_bits dq 0x0000, 0x0001, 0x0003, 0x0007
dq 0x000f, 0x001f, 0x003f, 0x007f
@@ -84,7 +84,7 @@ times 1 << 12 db 13
times 1 << 13 db 14
times 1 << 14 db 15
alignz 32
ALIGNZ 32
%ifdef PIC
%define NBITS(x) nbits_base + x
@@ -236,7 +236,7 @@ times 1 << 14 db 15
; If PIC is defined, load the address of a symbol defined in this file into a
; register. Equivalent to
; get_GOT %1
; GET_GOT %1
; lea %1, [GOTOFF(%1, %2)]
; without using the GOT.
;

View File

@@ -3,7 +3,7 @@
;
; Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
; Copyright (C) 2015, Intel Corporation.
; Copyright (C) 2016, D. R. Commander.
; Copyright (C) 2016, 2024, D. R. Commander.
;
; Based on the x86 SIMD extension for IJG JPEG library
; Copyright (C) 1999-2006, MIYASAKA Masaru.
@@ -70,7 +70,7 @@ EXTN(jsimd_h2v1_downsample_avx2):
cld
mov esi, JSAMPARRAY [input_data(ebp)] ; input_data
alignx 16, 7
ALIGNX 16, 7
.expandloop:
push eax
push ecx
@@ -106,7 +106,7 @@ EXTN(jsimd_h2v1_downsample_avx2):
mov esi, JSAMPARRAY [input_data(ebp)] ; input_data
mov edi, JSAMPARRAY [output_data(ebp)] ; output_data
alignx 16, 7
ALIGNX 16, 7
.rowloop:
push ecx
push edi
@@ -117,7 +117,7 @@ EXTN(jsimd_h2v1_downsample_avx2):
cmp ecx, byte SIZEOF_YMMWORD
jae short .columnloop
alignx 16, 7
ALIGNX 16, 7
.columnloop_r24:
; ecx can possibly be 8, 16, 24
@@ -141,7 +141,7 @@ EXTN(jsimd_h2v1_downsample_avx2):
vpxor ymm1, ymm1, ymm1
mov ecx, SIZEOF_YMMWORD
jmp short .downsample
alignx 16, 7
ALIGNX 16, 7
.columnloop:
vmovdqu ymm0, YMMWORD [esi+0*SIZEOF_YMMWORD]
@@ -243,7 +243,7 @@ EXTN(jsimd_h2v2_downsample_avx2):
cld
mov esi, JSAMPARRAY [input_data(ebp)] ; input_data
alignx 16, 7
ALIGNX 16, 7
.expandloop:
push eax
push ecx
@@ -279,7 +279,7 @@ EXTN(jsimd_h2v2_downsample_avx2):
mov esi, JSAMPARRAY [input_data(ebp)] ; input_data
mov edi, JSAMPARRAY [output_data(ebp)] ; output_data
alignx 16, 7
ALIGNX 16, 7
.rowloop:
push ecx
push edi
@@ -291,7 +291,7 @@ EXTN(jsimd_h2v2_downsample_avx2):
cmp ecx, byte SIZEOF_YMMWORD
jae short .columnloop
alignx 16, 7
ALIGNX 16, 7
.columnloop_r24:
cmp ecx, 24
@@ -320,7 +320,7 @@ EXTN(jsimd_h2v2_downsample_avx2):
vpxor ymm3, ymm3, ymm3
mov ecx, SIZEOF_YMMWORD
jmp short .downsample
alignx 16, 7
ALIGNX 16, 7
.columnloop:
vmovdqu ymm0, YMMWORD [edx+0*SIZEOF_YMMWORD]

View File

@@ -2,7 +2,7 @@
; jcsample.asm - downsampling (MMX)
;
; Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
; Copyright (C) 2016, D. R. Commander.
; Copyright (C) 2016, 2024, D. R. Commander.
;
; Based on the x86 SIMD extension for IJG JPEG library
; Copyright (C) 1999-2006, MIYASAKA Masaru.
@@ -69,7 +69,7 @@ EXTN(jsimd_h2v1_downsample_mmx):
cld
mov esi, JSAMPARRAY [input_data(ebp)] ; input_data
alignx 16, 7
ALIGNX 16, 7
.expandloop:
push eax
push ecx
@@ -104,7 +104,7 @@ EXTN(jsimd_h2v1_downsample_mmx):
mov esi, JSAMPARRAY [input_data(ebp)] ; input_data
mov edi, JSAMPARRAY [output_data(ebp)] ; output_data
alignx 16, 7
ALIGNX 16, 7
.rowloop:
push ecx
push edi
@@ -112,7 +112,7 @@ EXTN(jsimd_h2v1_downsample_mmx):
mov esi, JSAMPROW [esi] ; inptr
mov edi, JSAMPROW [edi] ; outptr
alignx 16, 7
ALIGNX 16, 7
.columnloop:
movq mm0, MMWORD [esi+0*SIZEOF_MMWORD]
@@ -212,7 +212,7 @@ EXTN(jsimd_h2v2_downsample_mmx):
cld
mov esi, JSAMPARRAY [input_data(ebp)] ; input_data
alignx 16, 7
ALIGNX 16, 7
.expandloop:
push eax
push ecx
@@ -247,7 +247,7 @@ EXTN(jsimd_h2v2_downsample_mmx):
mov esi, JSAMPARRAY [input_data(ebp)] ; input_data
mov edi, JSAMPARRAY [output_data(ebp)] ; output_data
alignx 16, 7
ALIGNX 16, 7
.rowloop:
push ecx
push edi
@@ -256,7 +256,7 @@ EXTN(jsimd_h2v2_downsample_mmx):
mov edx, JSAMPROW [esi+0*SIZEOF_JSAMPROW] ; inptr0
mov esi, JSAMPROW [esi+1*SIZEOF_JSAMPROW] ; inptr1
mov edi, JSAMPROW [edi] ; outptr
alignx 16, 7
ALIGNX 16, 7
.columnloop:
movq mm0, MMWORD [edx+0*SIZEOF_MMWORD]

View File

@@ -2,7 +2,7 @@
; jcsample.asm - downsampling (SSE2)
;
; Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
; Copyright (C) 2016, D. R. Commander.
; Copyright (C) 2016, 2024, D. R. Commander.
;
; Based on the x86 SIMD extension for IJG JPEG library
; Copyright (C) 1999-2006, MIYASAKA Masaru.
@@ -69,7 +69,7 @@ EXTN(jsimd_h2v1_downsample_sse2):
cld
mov esi, JSAMPARRAY [input_data(ebp)] ; input_data
alignx 16, 7
ALIGNX 16, 7
.expandloop:
push eax
push ecx
@@ -104,7 +104,7 @@ EXTN(jsimd_h2v1_downsample_sse2):
mov esi, JSAMPARRAY [input_data(ebp)] ; input_data
mov edi, JSAMPARRAY [output_data(ebp)] ; output_data
alignx 16, 7
ALIGNX 16, 7
.rowloop:
push ecx
push edi
@@ -115,14 +115,14 @@ EXTN(jsimd_h2v1_downsample_sse2):
cmp ecx, byte SIZEOF_XMMWORD
jae short .columnloop
alignx 16, 7
ALIGNX 16, 7
.columnloop_r8:
movdqa xmm0, XMMWORD [esi+0*SIZEOF_XMMWORD]
pxor xmm1, xmm1
mov ecx, SIZEOF_XMMWORD
jmp short .downsample
alignx 16, 7
ALIGNX 16, 7
.columnloop:
movdqa xmm0, XMMWORD [esi+0*SIZEOF_XMMWORD]
@@ -225,7 +225,7 @@ EXTN(jsimd_h2v2_downsample_sse2):
cld
mov esi, JSAMPARRAY [input_data(ebp)] ; input_data
alignx 16, 7
ALIGNX 16, 7
.expandloop:
push eax
push ecx
@@ -260,7 +260,7 @@ EXTN(jsimd_h2v2_downsample_sse2):
mov esi, JSAMPARRAY [input_data(ebp)] ; input_data
mov edi, JSAMPARRAY [output_data(ebp)] ; output_data
alignx 16, 7
ALIGNX 16, 7
.rowloop:
push ecx
push edi
@@ -272,7 +272,7 @@ EXTN(jsimd_h2v2_downsample_sse2):
cmp ecx, byte SIZEOF_XMMWORD
jae short .columnloop
alignx 16, 7
ALIGNX 16, 7
.columnloop_r8:
movdqa xmm0, XMMWORD [edx+0*SIZEOF_XMMWORD]
@@ -281,7 +281,7 @@ EXTN(jsimd_h2v2_downsample_sse2):
pxor xmm3, xmm3
mov ecx, SIZEOF_XMMWORD
jmp short .downsample
alignx 16, 7
ALIGNX 16, 7
.columnloop:
movdqa xmm0, XMMWORD [edx+0*SIZEOF_XMMWORD]

View File

@@ -2,7 +2,7 @@
; jdcolext.asm - colorspace conversion (AVX2)
;
; Copyright 2009, 2012 Pierre Ossman <ossman@cendio.se> for Cendio AB
; Copyright (C) 2012, 2016, D. R. Commander.
; Copyright (C) 2012, 2016, 2024, D. R. Commander.
; Copyright (C) 2015, Intel Corporation.
;
; Based on the x86 SIMD extension for IJG JPEG library
@@ -50,15 +50,15 @@ EXTN(jsimd_ycc_rgb_convert_avx2):
mov [esp], eax
mov ebp, esp ; ebp = aligned ebp
lea esp, [wk(0)]
pushpic eax ; make a room for GOT address
PUSHPIC eax ; make a room for GOT address
push ebx
; push ecx ; need not be preserved
; push edx ; need not be preserved
push esi
push edi
get_GOT ebx ; get GOT address
movpic POINTER [gotptr], ebx ; save GOT address
GET_GOT ebx ; get GOT address
MOVPIC POINTER [gotptr], ebx ; save GOT address
mov ecx, JDIMENSION [out_width(eax)] ; num_cols
test ecx, ecx
@@ -81,7 +81,7 @@ EXTN(jsimd_ycc_rgb_convert_avx2):
mov eax, INT [num_rows(eax)]
test eax, eax
jle near .return
alignx 16, 7
ALIGNX 16, 7
.rowloop:
push eax
push edi
@@ -94,8 +94,8 @@ EXTN(jsimd_ycc_rgb_convert_avx2):
mov ebx, JSAMPROW [ebx] ; inptr1
mov edx, JSAMPROW [edx] ; inptr2
mov edi, JSAMPROW [edi] ; outptr
movpic eax, POINTER [gotptr] ; load GOT address (eax)
alignx 16, 7
MOVPIC eax, POINTER [gotptr] ; load GOT address (eax)
ALIGNX 16, 7
.columnloop:
vmovdqu ymm5, YMMWORD [ebx] ; ymm5=Cb(0123456789ABCDEFGHIJKLMNOPQRSTUV)
@@ -295,7 +295,7 @@ EXTN(jsimd_ycc_rgb_convert_avx2):
add ebx, byte SIZEOF_YMMWORD ; inptr1
add edx, byte SIZEOF_YMMWORD ; inptr2
jmp near .columnloop
alignx 16, 7
ALIGNX 16, 7
.column_st64:
lea ecx, [ecx+ecx*2] ; imul ecx, RGB_PIXELSIZE
@@ -436,7 +436,7 @@ EXTN(jsimd_ycc_rgb_convert_avx2):
add ebx, byte SIZEOF_YMMWORD ; inptr1
add edx, byte SIZEOF_YMMWORD ; inptr2
jmp near .columnloop
alignx 16, 7
ALIGNX 16, 7
.column_st64:
cmp ecx, byte SIZEOF_YMMWORD/2
@@ -479,7 +479,7 @@ EXTN(jsimd_ycc_rgb_convert_avx2):
%endif ; RGB_PIXELSIZE ; ---------------
alignx 16, 7
ALIGNX 16, 7
.nextrow:
pop ecx

View File

@@ -2,7 +2,7 @@
; jdcolext.asm - colorspace conversion (MMX)
;
; Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
; Copyright (C) 2016, D. R. Commander.
; Copyright (C) 2016, 2024, D. R. Commander.
;
; Based on the x86 SIMD extension for IJG JPEG library
; Copyright (C) 1999-2006, MIYASAKA Masaru.
@@ -49,15 +49,15 @@ EXTN(jsimd_ycc_rgb_convert_mmx):
mov [esp], eax
mov ebp, esp ; ebp = aligned ebp
lea esp, [wk(0)]
pushpic eax ; make a room for GOT address
PUSHPIC eax ; make a room for GOT address
push ebx
; push ecx ; need not be preserved
; push edx ; need not be preserved
push esi
push edi
get_GOT ebx ; get GOT address
movpic POINTER [gotptr], ebx ; save GOT address
GET_GOT ebx ; get GOT address
MOVPIC POINTER [gotptr], ebx ; save GOT address
mov ecx, JDIMENSION [out_width(eax)] ; num_cols
test ecx, ecx
@@ -80,7 +80,7 @@ EXTN(jsimd_ycc_rgb_convert_mmx):
mov eax, INT [num_rows(eax)]
test eax, eax
jle near .return
alignx 16, 7
ALIGNX 16, 7
.rowloop:
push eax
push edi
@@ -93,8 +93,8 @@ EXTN(jsimd_ycc_rgb_convert_mmx):
mov ebx, JSAMPROW [ebx] ; inptr1
mov edx, JSAMPROW [edx] ; inptr2
mov edi, JSAMPROW [edi] ; outptr
movpic eax, POINTER [gotptr] ; load GOT address (eax)
alignx 16, 7
MOVPIC eax, POINTER [gotptr] ; load GOT address (eax)
ALIGNX 16, 7
.columnloop:
movq mm5, MMWORD [ebx] ; mm5=Cb(01234567)
@@ -255,7 +255,7 @@ EXTN(jsimd_ycc_rgb_convert_mmx):
add edx, byte SIZEOF_MMWORD ; inptr2
add edi, byte RGB_PIXELSIZE*SIZEOF_MMWORD ; outptr
jmp near .columnloop
alignx 16, 7
ALIGNX 16, 7
.column_st16:
lea ecx, [ecx+ecx*2] ; imul ecx, RGB_PIXELSIZE
@@ -344,7 +344,7 @@ EXTN(jsimd_ycc_rgb_convert_mmx):
add edx, byte SIZEOF_MMWORD ; inptr2
add edi, byte RGB_PIXELSIZE*SIZEOF_MMWORD ; outptr
jmp near .columnloop
alignx 16, 7
ALIGNX 16, 7
.column_st16:
cmp ecx, byte SIZEOF_MMWORD/2
@@ -369,7 +369,7 @@ EXTN(jsimd_ycc_rgb_convert_mmx):
%endif ; RGB_PIXELSIZE ; ---------------
alignx 16, 7
ALIGNX 16, 7
.nextrow:
pop ecx

View File

@@ -2,7 +2,7 @@
; jdcolext.asm - colorspace conversion (SSE2)
;
; Copyright 2009, 2012 Pierre Ossman <ossman@cendio.se> for Cendio AB
; Copyright (C) 2012, 2016, D. R. Commander.
; Copyright (C) 2012, 2016, 2024, D. R. Commander.
;
; Based on the x86 SIMD extension for IJG JPEG library
; Copyright (C) 1999-2006, MIYASAKA Masaru.
@@ -49,15 +49,15 @@ EXTN(jsimd_ycc_rgb_convert_sse2):
mov [esp], eax
mov ebp, esp ; ebp = aligned ebp
lea esp, [wk(0)]
pushpic eax ; make a room for GOT address
PUSHPIC eax ; make a room for GOT address
push ebx
; push ecx ; need not be preserved
; push edx ; need not be preserved
push esi
push edi
get_GOT ebx ; get GOT address
movpic POINTER [gotptr], ebx ; save GOT address
GET_GOT ebx ; get GOT address
MOVPIC POINTER [gotptr], ebx ; save GOT address
mov ecx, JDIMENSION [out_width(eax)] ; num_cols
test ecx, ecx
@@ -80,7 +80,7 @@ EXTN(jsimd_ycc_rgb_convert_sse2):
mov eax, INT [num_rows(eax)]
test eax, eax
jle near .return
alignx 16, 7
ALIGNX 16, 7
.rowloop:
push eax
push edi
@@ -93,8 +93,8 @@ EXTN(jsimd_ycc_rgb_convert_sse2):
mov ebx, JSAMPROW [ebx] ; inptr1
mov edx, JSAMPROW [edx] ; inptr2
mov edi, JSAMPROW [edi] ; outptr
movpic eax, POINTER [gotptr] ; load GOT address (eax)
alignx 16, 7
MOVPIC eax, POINTER [gotptr] ; load GOT address (eax)
ALIGNX 16, 7
.columnloop:
movdqa xmm5, XMMWORD [ebx] ; xmm5=Cb(0123456789ABCDEF)
@@ -275,7 +275,7 @@ EXTN(jsimd_ycc_rgb_convert_sse2):
add ebx, byte SIZEOF_XMMWORD ; inptr1
add edx, byte SIZEOF_XMMWORD ; inptr2
jmp near .columnloop
alignx 16, 7
ALIGNX 16, 7
.column_st32:
lea ecx, [ecx+ecx*2] ; imul ecx, RGB_PIXELSIZE
@@ -387,7 +387,7 @@ EXTN(jsimd_ycc_rgb_convert_sse2):
add ebx, byte SIZEOF_XMMWORD ; inptr1
add edx, byte SIZEOF_XMMWORD ; inptr2
jmp near .columnloop
alignx 16, 7
ALIGNX 16, 7
.column_st32:
cmp ecx, byte SIZEOF_XMMWORD/2
@@ -423,7 +423,7 @@ EXTN(jsimd_ycc_rgb_convert_sse2):
%endif ; RGB_PIXELSIZE ; ---------------
alignx 16, 7
ALIGNX 16, 7
.nextrow:
pop ecx

View File

@@ -3,7 +3,7 @@
;
; Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
; Copyright (C) 2015, Intel Corporation.
; Copyright (C) 2016, D. R. Commander.
; Copyright (C) 2016, 2024, D. R. Commander.
;
; Based on the x86 SIMD extension for IJG JPEG library
; Copyright (C) 1999-2006, MIYASAKA Masaru.
@@ -32,7 +32,7 @@ F_0_228 equ (131072 - F_1_772) ; FIX(2) - FIX(1.77200)
; --------------------------------------------------------------------------
SECTION SEG_CONST
alignz 32
ALIGNZ 32
GLOBAL_DATA(jconst_ycc_rgb_convert_avx2)
EXTN(jconst_ycc_rgb_convert_avx2):
@@ -43,7 +43,7 @@ PW_MF0344_F0285 times 8 dw -F_0_344, F_0_285
PW_ONE times 16 dw 1
PD_ONEHALF times 8 dd 1 << (SCALEBITS - 1)
alignz 32
ALIGNZ 32
; --------------------------------------------------------------------------
SECTION SEG_TEXT

View File

@@ -2,7 +2,7 @@
; jdcolor.asm - colorspace conversion (MMX)
;
; Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
; Copyright (C) 2009, 2016, D. R. Commander.
; Copyright (C) 2009, 2016, 2024, D. R. Commander.
;
; Based on the x86 SIMD extension for IJG JPEG library
; Copyright (C) 1999-2006, MIYASAKA Masaru.
@@ -31,7 +31,7 @@ F_0_228 equ (131072 - F_1_772) ; FIX(2) - FIX(1.77200)
; --------------------------------------------------------------------------
SECTION SEG_CONST
alignz 32
ALIGNZ 32
GLOBAL_DATA(jconst_ycc_rgb_convert_mmx)
EXTN(jconst_ycc_rgb_convert_mmx):
@@ -42,7 +42,7 @@ PW_MF0344_F0285 times 2 dw -F_0_344, F_0_285
PW_ONE times 4 dw 1
PD_ONEHALF times 2 dd 1 << (SCALEBITS - 1)
alignz 32
ALIGNZ 32
; --------------------------------------------------------------------------
SECTION SEG_TEXT

View File

@@ -2,7 +2,7 @@
; jdcolor.asm - colorspace conversion (SSE2)
;
; Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
; Copyright (C) 2009, 2016, D. R. Commander.
; Copyright (C) 2009, 2016, 2024, D. R. Commander.
;
; Based on the x86 SIMD extension for IJG JPEG library
; Copyright (C) 1999-2006, MIYASAKA Masaru.
@@ -31,7 +31,7 @@ F_0_228 equ (131072 - F_1_772) ; FIX(2) - FIX(1.77200)
; --------------------------------------------------------------------------
SECTION SEG_CONST
alignz 32
ALIGNZ 32
GLOBAL_DATA(jconst_ycc_rgb_convert_sse2)
EXTN(jconst_ycc_rgb_convert_sse2):
@@ -42,7 +42,7 @@ PW_MF0344_F0285 times 4 dw -F_0_344, F_0_285
PW_ONE times 8 dw 1
PD_ONEHALF times 4 dd 1 << (SCALEBITS - 1)
alignz 32
ALIGNZ 32
; --------------------------------------------------------------------------
SECTION SEG_TEXT

View File

@@ -2,7 +2,7 @@
; jdmerge.asm - merged upsampling/color conversion (AVX2)
;
; Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
; Copyright (C) 2009, 2016, D. R. Commander.
; Copyright (C) 2009, 2016, 2024, D. R. Commander.
; Copyright (C) 2015, Intel Corporation.
;
; Based on the x86 SIMD extension for IJG JPEG library
@@ -32,7 +32,7 @@ F_0_228 equ (131072 - F_1_772) ; FIX(2) - FIX(1.77200)
; --------------------------------------------------------------------------
SECTION SEG_CONST
alignz 32
ALIGNZ 32
GLOBAL_DATA(jconst_merged_upsample_avx2)
EXTN(jconst_merged_upsample_avx2):
@@ -43,7 +43,7 @@ PW_MF0344_F0285 times 8 dw -F_0_344, F_0_285
PW_ONE times 16 dw 1
PD_ONEHALF times 8 dd 1 << (SCALEBITS - 1)
alignz 32
ALIGNZ 32
; --------------------------------------------------------------------------
SECTION SEG_TEXT

View File

@@ -2,7 +2,7 @@
; jdmerge.asm - merged upsampling/color conversion (MMX)
;
; Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
; Copyright (C) 2009, 2016, D. R. Commander.
; Copyright (C) 2009, 2016, 2024, D. R. Commander.
;
; Based on the x86 SIMD extension for IJG JPEG library
; Copyright (C) 1999-2006, MIYASAKA Masaru.
@@ -31,7 +31,7 @@ F_0_228 equ (131072 - F_1_772) ; FIX(2) - FIX(1.77200)
; --------------------------------------------------------------------------
SECTION SEG_CONST
alignz 32
ALIGNZ 32
GLOBAL_DATA(jconst_merged_upsample_mmx)
EXTN(jconst_merged_upsample_mmx):
@@ -42,7 +42,7 @@ PW_MF0344_F0285 times 2 dw -F_0_344, F_0_285
PW_ONE times 4 dw 1
PD_ONEHALF times 2 dd 1 << (SCALEBITS - 1)
alignz 32
ALIGNZ 32
; --------------------------------------------------------------------------
SECTION SEG_TEXT

View File

@@ -2,7 +2,7 @@
; jdmerge.asm - merged upsampling/color conversion (SSE2)
;
; Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
; Copyright (C) 2009, 2016, D. R. Commander.
; Copyright (C) 2009, 2016, 2024, D. R. Commander.
;
; Based on the x86 SIMD extension for IJG JPEG library
; Copyright (C) 1999-2006, MIYASAKA Masaru.
@@ -31,7 +31,7 @@ F_0_228 equ (131072 - F_1_772) ; FIX(2) - FIX(1.77200)
; --------------------------------------------------------------------------
SECTION SEG_CONST
alignz 32
ALIGNZ 32
GLOBAL_DATA(jconst_merged_upsample_sse2)
EXTN(jconst_merged_upsample_sse2):
@@ -42,7 +42,7 @@ PW_MF0344_F0285 times 4 dw -F_0_344, F_0_285
PW_ONE times 8 dw 1
PD_ONEHALF times 4 dd 1 << (SCALEBITS - 1)
alignz 32
ALIGNZ 32
; --------------------------------------------------------------------------
SECTION SEG_TEXT

View File

@@ -2,7 +2,7 @@
; jdmrgext.asm - merged upsampling/color conversion (AVX2)
;
; Copyright 2009, 2012 Pierre Ossman <ossman@cendio.se> for Cendio AB
; Copyright (C) 2012, 2016, D. R. Commander.
; Copyright (C) 2012, 2016, 2024, D. R. Commander.
; Copyright (C) 2015, Intel Corporation.
;
; Based on the x86 SIMD extension for IJG JPEG library
@@ -50,15 +50,15 @@ EXTN(jsimd_h2v1_merged_upsample_avx2):
mov [esp], eax
mov ebp, esp ; ebp = aligned ebp
lea esp, [wk(0)]
pushpic eax ; make a room for GOT address
PUSHPIC eax ; make a room for GOT address
push ebx
; push ecx ; need not be preserved
; push edx ; need not be preserved
push esi
push edi
get_GOT ebx ; get GOT address
movpic POINTER [gotptr], ebx ; save GOT address
GET_GOT ebx ; get GOT address
MOVPIC POINTER [gotptr], ebx ; save GOT address
mov ecx, JDIMENSION [output_width(eax)] ; col
test ecx, ecx
@@ -79,9 +79,9 @@ EXTN(jsimd_h2v1_merged_upsample_avx2):
pop ecx ; col
alignx 16, 7
ALIGNX 16, 7
.columnloop:
movpic eax, POINTER [gotptr] ; load GOT address (eax)
MOVPIC eax, POINTER [gotptr] ; load GOT address (eax)
vmovdqu ymm6, YMMWORD [ebx] ; ymm6=Cb(0123456789ABCDEFGHIJKLMNOPQRSTUV)
vmovdqu ymm7, YMMWORD [edx] ; ymm7=Cr(0123456789ABCDEFGHIJKLMNOPQRSTUV)
@@ -168,13 +168,13 @@ EXTN(jsimd_h2v1_merged_upsample_avx2):
mov al, 2 ; Yctr
jmp short .Yloop_1st
alignx 16, 7
ALIGNX 16, 7
.Yloop_2nd:
vmovdqa ymm0, YMMWORD [wk(1)] ; ymm0=(R-Y)H
vmovdqa ymm2, YMMWORD [wk(2)] ; ymm2=(G-Y)H
vmovdqa ymm4, YMMWORD [wk(0)] ; ymm4=(B-Y)H
alignx 16, 7
ALIGNX 16, 7
.Yloop_1st:
vmovdqu ymm7, YMMWORD [esi] ; ymm7=Y(0123456789ABCDEFGHIJKLMNOPQRSTUV)
@@ -301,7 +301,7 @@ EXTN(jsimd_h2v1_merged_upsample_avx2):
add ebx, byte SIZEOF_YMMWORD ; inptr1
add edx, byte SIZEOF_YMMWORD ; inptr2
jmp near .columnloop
alignx 16, 7
ALIGNX 16, 7
.column_st64:
lea ecx, [ecx+ecx*2] ; imul ecx, RGB_PIXELSIZE
@@ -445,7 +445,7 @@ EXTN(jsimd_h2v1_merged_upsample_avx2):
add ebx, byte SIZEOF_YMMWORD ; inptr1
add edx, byte SIZEOF_YMMWORD ; inptr2
jmp near .columnloop
alignx 16, 7
ALIGNX 16, 7
.column_st64:
cmp ecx, byte SIZEOF_YMMWORD/2

View File

@@ -2,7 +2,7 @@
; jdmrgext.asm - merged upsampling/color conversion (MMX)
;
; Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
; Copyright (C) 2016, D. R. Commander.
; Copyright (C) 2016, 2024, D. R. Commander.
;
; Based on the x86 SIMD extension for IJG JPEG library
; Copyright (C) 1999-2006, MIYASAKA Masaru.
@@ -47,15 +47,15 @@ EXTN(jsimd_h2v1_merged_upsample_mmx):
mov [esp], eax
mov ebp, esp ; ebp = aligned ebp
lea esp, [wk(0)]
pushpic eax ; make a room for GOT address
PUSHPIC eax ; make a room for GOT address
push ebx
; push ecx ; need not be preserved
; push edx ; need not be preserved
push esi
push edi
get_GOT ebx ; get GOT address
movpic POINTER [gotptr], ebx ; save GOT address
GET_GOT ebx ; get GOT address
MOVPIC POINTER [gotptr], ebx ; save GOT address
mov ecx, JDIMENSION [output_width(eax)] ; col
test ecx, ecx
@@ -76,9 +76,9 @@ EXTN(jsimd_h2v1_merged_upsample_mmx):
pop ecx ; col
alignx 16, 7
ALIGNX 16, 7
.columnloop:
movpic eax, POINTER [gotptr] ; load GOT address (eax)
MOVPIC eax, POINTER [gotptr] ; load GOT address (eax)
movq mm6, MMWORD [ebx] ; mm6=Cb(01234567)
movq mm7, MMWORD [edx] ; mm7=Cr(01234567)
@@ -171,13 +171,13 @@ EXTN(jsimd_h2v1_merged_upsample_mmx):
mov al, 2 ; Yctr
jmp short .Yloop_1st
alignx 16, 7
ALIGNX 16, 7
.Yloop_2nd:
movq mm0, MMWORD [wk(1)] ; mm0=(R-Y)H
movq mm2, MMWORD [wk(2)] ; mm2=(G-Y)H
movq mm4, MMWORD [wk(0)] ; mm4=(B-Y)H
alignx 16, 7
ALIGNX 16, 7
.Yloop_1st:
movq mm7, MMWORD [esi] ; mm7=Y(01234567)
@@ -258,7 +258,7 @@ EXTN(jsimd_h2v1_merged_upsample_mmx):
add ebx, byte SIZEOF_MMWORD ; inptr1
add edx, byte SIZEOF_MMWORD ; inptr2
jmp near .columnloop
alignx 16, 7
ALIGNX 16, 7
.column_st16:
lea ecx, [ecx+ecx*2] ; imul ecx, RGB_PIXELSIZE
@@ -350,7 +350,7 @@ EXTN(jsimd_h2v1_merged_upsample_mmx):
add ebx, byte SIZEOF_MMWORD ; inptr1
add edx, byte SIZEOF_MMWORD ; inptr2
jmp near .columnloop
alignx 16, 7
ALIGNX 16, 7
.column_st16:
cmp ecx, byte SIZEOF_MMWORD/2

View File

@@ -2,7 +2,7 @@
; jdmrgext.asm - merged upsampling/color conversion (SSE2)
;
; Copyright 2009, 2012 Pierre Ossman <ossman@cendio.se> for Cendio AB
; Copyright (C) 2012, 2016, D. R. Commander.
; Copyright (C) 2012, 2016, 2024, D. R. Commander.
;
; Based on the x86 SIMD extension for IJG JPEG library
; Copyright (C) 1999-2006, MIYASAKA Masaru.
@@ -49,15 +49,15 @@ EXTN(jsimd_h2v1_merged_upsample_sse2):
mov [esp], eax
mov ebp, esp ; ebp = aligned ebp
lea esp, [wk(0)]
pushpic eax ; make a room for GOT address
PUSHPIC eax ; make a room for GOT address
push ebx
; push ecx ; need not be preserved
; push edx ; need not be preserved
push esi
push edi
get_GOT ebx ; get GOT address
movpic POINTER [gotptr], ebx ; save GOT address
GET_GOT ebx ; get GOT address
MOVPIC POINTER [gotptr], ebx ; save GOT address
mov ecx, JDIMENSION [output_width(eax)] ; col
test ecx, ecx
@@ -78,9 +78,9 @@ EXTN(jsimd_h2v1_merged_upsample_sse2):
pop ecx ; col
alignx 16, 7
ALIGNX 16, 7
.columnloop:
movpic eax, POINTER [gotptr] ; load GOT address (eax)
MOVPIC eax, POINTER [gotptr] ; load GOT address (eax)
movdqa xmm6, XMMWORD [ebx] ; xmm6=Cb(0123456789ABCDEF)
movdqa xmm7, XMMWORD [edx] ; xmm7=Cr(0123456789ABCDEF)
@@ -173,13 +173,13 @@ EXTN(jsimd_h2v1_merged_upsample_sse2):
mov al, 2 ; Yctr
jmp short .Yloop_1st
alignx 16, 7
ALIGNX 16, 7
.Yloop_2nd:
movdqa xmm0, XMMWORD [wk(1)] ; xmm0=(R-Y)H
movdqa xmm2, XMMWORD [wk(2)] ; xmm2=(G-Y)H
movdqa xmm4, XMMWORD [wk(0)] ; xmm4=(B-Y)H
alignx 16, 7
ALIGNX 16, 7
.Yloop_1st:
movdqa xmm7, XMMWORD [esi] ; xmm7=Y(0123456789ABCDEF)
@@ -280,7 +280,7 @@ EXTN(jsimd_h2v1_merged_upsample_sse2):
add ebx, byte SIZEOF_XMMWORD ; inptr1
add edx, byte SIZEOF_XMMWORD ; inptr2
jmp near .columnloop
alignx 16, 7
ALIGNX 16, 7
.column_st32:
lea ecx, [ecx+ecx*2] ; imul ecx, RGB_PIXELSIZE
@@ -395,7 +395,7 @@ EXTN(jsimd_h2v1_merged_upsample_sse2):
add ebx, byte SIZEOF_XMMWORD ; inptr1
add edx, byte SIZEOF_XMMWORD ; inptr2
jmp near .columnloop
alignx 16, 7
ALIGNX 16, 7
.column_st32:
cmp ecx, byte SIZEOF_XMMWORD/2

View File

@@ -3,7 +3,7 @@
;
; Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
; Copyright (C) 2015, Intel Corporation.
; Copyright (C) 2016, D. R. Commander.
; Copyright (C) 2016, 2024, D. R. Commander.
;
; Based on the x86 SIMD extension for IJG JPEG library
; Copyright (C) 1999-2006, MIYASAKA Masaru.
@@ -20,7 +20,7 @@
; --------------------------------------------------------------------------
SECTION SEG_CONST
alignz 32
ALIGNZ 32
GLOBAL_DATA(jconst_fancy_upsample_avx2)
EXTN(jconst_fancy_upsample_avx2):
@@ -31,7 +31,7 @@ PW_THREE times 16 dw 3
PW_SEVEN times 16 dw 7
PW_EIGHT times 16 dw 8
alignz 32
ALIGNZ 32
; --------------------------------------------------------------------------
SECTION SEG_TEXT
@@ -62,13 +62,13 @@ PW_EIGHT times 16 dw 8
EXTN(jsimd_h2v1_fancy_upsample_avx2):
push ebp
mov ebp, esp
pushpic ebx
PUSHPIC ebx
; push ecx ; need not be preserved
; push edx ; need not be preserved
push esi
push edi
get_GOT ebx ; get GOT address
GET_GOT ebx ; get GOT address
mov eax, JDIMENSION [downsamp_width(ebp)] ; colctr
test eax, eax
@@ -81,7 +81,7 @@ EXTN(jsimd_h2v1_fancy_upsample_avx2):
mov esi, JSAMPARRAY [input_data(ebp)] ; input_data
mov edi, POINTER [output_data_ptr(ebp)]
mov edi, JSAMPARRAY [edi] ; output_data
alignx 16, 7
ALIGNX 16, 7
.rowloop:
push eax ; colctr
push edi
@@ -104,7 +104,7 @@ EXTN(jsimd_h2v1_fancy_upsample_avx2):
and eax, byte -SIZEOF_YMMWORD
cmp eax, byte SIZEOF_YMMWORD
ja short .columnloop
alignx 16, 7
ALIGNX 16, 7
.columnloop_last:
vpcmpeqb xmm6, xmm6, xmm6
@@ -112,7 +112,7 @@ EXTN(jsimd_h2v1_fancy_upsample_avx2):
vperm2i128 ymm6, ymm6, ymm6, 1 ; (---- ---- ... ---- ---- ff) MSB is ff
vpand ymm6, ymm6, YMMWORD [esi+0*SIZEOF_YMMWORD]
jmp short .upsample
alignx 16, 7
ALIGNX 16, 7
.columnloop:
vmovdqu ymm6, YMMWORD [esi+1*SIZEOF_YMMWORD]
@@ -196,7 +196,7 @@ EXTN(jsimd_h2v1_fancy_upsample_avx2):
pop esi
; pop edx ; need not be preserved
; pop ecx ; need not be preserved
poppic ebx
POPPIC ebx
pop ebp
ret
@@ -234,15 +234,15 @@ EXTN(jsimd_h2v2_fancy_upsample_avx2):
mov [esp], eax
mov ebp, esp ; ebp = aligned ebp
lea esp, [wk(0)]
pushpic eax ; make a room for GOT address
PUSHPIC eax ; make a room for GOT address
push ebx
; push ecx ; need not be preserved
; push edx ; need not be preserved
push esi
push edi
get_GOT ebx ; get GOT address
movpic POINTER [gotptr], ebx ; save GOT address
GET_GOT ebx ; get GOT address
MOVPIC POINTER [gotptr], ebx ; save GOT address
mov edx, eax ; edx = original ebp
mov eax, JDIMENSION [downsamp_width(edx)] ; colctr
@@ -256,7 +256,7 @@ EXTN(jsimd_h2v2_fancy_upsample_avx2):
mov esi, JSAMPARRAY [input_data(edx)] ; input_data
mov edi, POINTER [output_data_ptr(edx)]
mov edi, JSAMPARRAY [edi] ; output_data
alignx 16, 7
ALIGNX 16, 7
.rowloop:
push eax ; colctr
push ecx
@@ -286,8 +286,8 @@ EXTN(jsimd_h2v2_fancy_upsample_avx2):
vmovdqu ymm1, YMMWORD [ecx+0*SIZEOF_YMMWORD] ; ymm1=row[-1][0]
vmovdqu ymm2, YMMWORD [esi+0*SIZEOF_YMMWORD] ; ymm2=row[+1][0]
pushpic ebx
movpic ebx, POINTER [gotptr] ; load GOT address
PUSHPIC ebx
MOVPIC ebx, POINTER [gotptr] ; load GOT address
vpxor ymm3, ymm3, ymm3 ; ymm3=(all 0's)
@@ -328,19 +328,19 @@ EXTN(jsimd_h2v2_fancy_upsample_avx2):
vmovdqa YMMWORD [wk(0)], ymm1
vmovdqa YMMWORD [wk(1)], ymm2
poppic ebx
POPPIC ebx
add eax, byte SIZEOF_YMMWORD-1
and eax, byte -SIZEOF_YMMWORD
cmp eax, byte SIZEOF_YMMWORD
ja short .columnloop
alignx 16, 7
ALIGNX 16, 7
.columnloop_last:
; -- process the last column block
pushpic ebx
movpic ebx, POINTER [gotptr] ; load GOT address
PUSHPIC ebx
MOVPIC ebx, POINTER [gotptr] ; load GOT address
vpcmpeqb xmm1, xmm1, xmm1
vpslldq xmm1, xmm1, (SIZEOF_XMMWORD-2)
@@ -353,7 +353,7 @@ EXTN(jsimd_h2v2_fancy_upsample_avx2):
vmovdqa YMMWORD [wk(3)], ymm2 ; ymm2=(-- -- -- -- -- -- -- -- -- -- -- -- -- -- -- 31)
jmp near .upsample
alignx 16, 7
ALIGNX 16, 7
.columnloop:
; -- process the next column block
@@ -362,8 +362,8 @@ EXTN(jsimd_h2v2_fancy_upsample_avx2):
vmovdqu ymm1, YMMWORD [ecx+1*SIZEOF_YMMWORD] ; ymm1=row[-1][1]
vmovdqu ymm2, YMMWORD [esi+1*SIZEOF_YMMWORD] ; ymm2=row[+1][1]
pushpic ebx
movpic ebx, POINTER [gotptr] ; load GOT address
PUSHPIC ebx
MOVPIC ebx, POINTER [gotptr] ; load GOT address
vpxor ymm3, ymm3, ymm3 ; ymm3=(all 0's)
@@ -516,7 +516,7 @@ EXTN(jsimd_h2v2_fancy_upsample_avx2):
vmovdqu YMMWORD [edi+0*SIZEOF_YMMWORD], ymm1
vmovdqu YMMWORD [edi+1*SIZEOF_YMMWORD], ymm0
poppic ebx
POPPIC ebx
sub eax, byte SIZEOF_YMMWORD
add ecx, byte 1*SIZEOF_YMMWORD ; inptr1(above)
@@ -590,7 +590,7 @@ EXTN(jsimd_h2v1_upsample_avx2):
mov esi, JSAMPARRAY [input_data(ebp)] ; input_data
mov edi, POINTER [output_data_ptr(ebp)]
mov edi, JSAMPARRAY [edi] ; output_data
alignx 16, 7
ALIGNX 16, 7
.rowloop:
push edi
push esi
@@ -598,7 +598,7 @@ EXTN(jsimd_h2v1_upsample_avx2):
mov esi, JSAMPROW [esi] ; inptr
mov edi, JSAMPROW [edi] ; outptr
mov eax, edx ; colctr
alignx 16, 7
ALIGNX 16, 7
.columnloop:
cmp eax, byte SIZEOF_YMMWORD
@@ -629,7 +629,7 @@ EXTN(jsimd_h2v1_upsample_avx2):
add esi, byte SIZEOF_YMMWORD ; inptr
add edi, byte 2*SIZEOF_YMMWORD ; outptr
jmp short .columnloop
alignx 16, 7
ALIGNX 16, 7
.nextrow:
pop esi
@@ -689,7 +689,7 @@ EXTN(jsimd_h2v2_upsample_avx2):
mov esi, JSAMPARRAY [input_data(ebp)] ; input_data
mov edi, POINTER [output_data_ptr(ebp)]
mov edi, JSAMPARRAY [edi] ; output_data
alignx 16, 7
ALIGNX 16, 7
.rowloop:
push edi
push esi
@@ -698,7 +698,7 @@ EXTN(jsimd_h2v2_upsample_avx2):
mov ebx, JSAMPROW [edi+0*SIZEOF_JSAMPROW] ; outptr0
mov edi, JSAMPROW [edi+1*SIZEOF_JSAMPROW] ; outptr1
mov eax, edx ; colctr
alignx 16, 7
ALIGNX 16, 7
.columnloop:
cmp eax, byte SIZEOF_YMMWORD
@@ -734,7 +734,7 @@ EXTN(jsimd_h2v2_upsample_avx2):
add ebx, 2*SIZEOF_YMMWORD ; outptr0
add edi, 2*SIZEOF_YMMWORD ; outptr1
jmp short .columnloop
alignx 16, 7
ALIGNX 16, 7
.nextrow:
pop esi

View File

@@ -2,7 +2,7 @@
; jdsample.asm - upsampling (MMX)
;
; Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
; Copyright (C) 2016, D. R. Commander.
; Copyright (C) 2016, 2024, D. R. Commander.
;
; Based on the x86 SIMD extension for IJG JPEG library
; Copyright (C) 1999-2006, MIYASAKA Masaru.
@@ -19,7 +19,7 @@
; --------------------------------------------------------------------------
SECTION SEG_CONST
alignz 32
ALIGNZ 32
GLOBAL_DATA(jconst_fancy_upsample_mmx)
EXTN(jconst_fancy_upsample_mmx):
@@ -30,7 +30,7 @@ PW_THREE times 4 dw 3
PW_SEVEN times 4 dw 7
PW_EIGHT times 4 dw 8
alignz 32
ALIGNZ 32
; --------------------------------------------------------------------------
SECTION SEG_TEXT
@@ -61,13 +61,13 @@ PW_EIGHT times 4 dw 8
EXTN(jsimd_h2v1_fancy_upsample_mmx):
push ebp
mov ebp, esp
pushpic ebx
PUSHPIC ebx
; push ecx ; need not be preserved
; push edx ; need not be preserved
push esi
push edi
get_GOT ebx ; get GOT address
GET_GOT ebx ; get GOT address
mov eax, JDIMENSION [downsamp_width(ebp)] ; colctr
test eax, eax
@@ -80,7 +80,7 @@ EXTN(jsimd_h2v1_fancy_upsample_mmx):
mov esi, JSAMPARRAY [input_data(ebp)] ; input_data
mov edi, POINTER [output_data_ptr(ebp)]
mov edi, JSAMPARRAY [edi] ; output_data
alignx 16, 7
ALIGNX 16, 7
.rowloop:
push eax ; colctr
push edi
@@ -103,14 +103,14 @@ EXTN(jsimd_h2v1_fancy_upsample_mmx):
and eax, byte -SIZEOF_MMWORD
cmp eax, byte SIZEOF_MMWORD
ja short .columnloop
alignx 16, 7
ALIGNX 16, 7
.columnloop_last:
pcmpeqb mm6, mm6
psllq mm6, (SIZEOF_MMWORD-1)*BYTE_BIT
pand mm6, MMWORD [esi+0*SIZEOF_MMWORD]
jmp short .upsample
alignx 16, 7
ALIGNX 16, 7
.columnloop:
movq mm6, MMWORD [esi+1*SIZEOF_MMWORD]
@@ -187,7 +187,7 @@ EXTN(jsimd_h2v1_fancy_upsample_mmx):
pop esi
; pop edx ; need not be preserved
; pop ecx ; need not be preserved
poppic ebx
POPPIC ebx
pop ebp
ret
@@ -224,15 +224,15 @@ EXTN(jsimd_h2v2_fancy_upsample_mmx):
mov [esp], eax
mov ebp, esp ; ebp = aligned ebp
lea esp, [wk(0)]
pushpic eax ; make a room for GOT address
PUSHPIC eax ; make a room for GOT address
push ebx
; push ecx ; need not be preserved
; push edx ; need not be preserved
push esi
push edi
get_GOT ebx ; get GOT address
movpic POINTER [gotptr], ebx ; save GOT address
GET_GOT ebx ; get GOT address
MOVPIC POINTER [gotptr], ebx ; save GOT address
mov edx, eax ; edx = original ebp
mov eax, JDIMENSION [downsamp_width(edx)] ; colctr
@@ -246,7 +246,7 @@ EXTN(jsimd_h2v2_fancy_upsample_mmx):
mov esi, JSAMPARRAY [input_data(edx)] ; input_data
mov edi, POINTER [output_data_ptr(edx)]
mov edi, JSAMPARRAY [edi] ; output_data
alignx 16, 7
ALIGNX 16, 7
.rowloop:
push eax ; colctr
push ecx
@@ -276,8 +276,8 @@ EXTN(jsimd_h2v2_fancy_upsample_mmx):
movq mm1, MMWORD [ecx+0*SIZEOF_MMWORD] ; mm1=row[-1][0]
movq mm2, MMWORD [esi+0*SIZEOF_MMWORD] ; mm2=row[+1][0]
pushpic ebx
movpic ebx, POINTER [gotptr] ; load GOT address
PUSHPIC ebx
MOVPIC ebx, POINTER [gotptr] ; load GOT address
pxor mm3, mm3 ; mm3=(all 0's)
movq mm4, mm0
@@ -312,19 +312,19 @@ EXTN(jsimd_h2v2_fancy_upsample_mmx):
movq MMWORD [wk(0)], mm1
movq MMWORD [wk(1)], mm2
poppic ebx
POPPIC ebx
add eax, byte SIZEOF_MMWORD-1
and eax, byte -SIZEOF_MMWORD
cmp eax, byte SIZEOF_MMWORD
ja short .columnloop
alignx 16, 7
ALIGNX 16, 7
.columnloop_last:
; -- process the last column block
pushpic ebx
movpic ebx, POINTER [gotptr] ; load GOT address
PUSHPIC ebx
MOVPIC ebx, POINTER [gotptr] ; load GOT address
pcmpeqb mm1, mm1
psllq mm1, (SIZEOF_MMWORD-2)*BYTE_BIT
@@ -337,7 +337,7 @@ EXTN(jsimd_h2v2_fancy_upsample_mmx):
movq MMWORD [wk(3)], mm2
jmp short .upsample
alignx 16, 7
ALIGNX 16, 7
.columnloop:
; -- process the next column block
@@ -346,8 +346,8 @@ EXTN(jsimd_h2v2_fancy_upsample_mmx):
movq mm1, MMWORD [ecx+1*SIZEOF_MMWORD] ; mm1=row[-1][1]
movq mm2, MMWORD [esi+1*SIZEOF_MMWORD] ; mm2=row[+1][1]
pushpic ebx
movpic ebx, POINTER [gotptr] ; load GOT address
PUSHPIC ebx
MOVPIC ebx, POINTER [gotptr] ; load GOT address
pxor mm3, mm3 ; mm3=(all 0's)
movq mm4, mm0
@@ -486,7 +486,7 @@ EXTN(jsimd_h2v2_fancy_upsample_mmx):
movq MMWORD [edi+0*SIZEOF_MMWORD], mm1
movq MMWORD [edi+1*SIZEOF_MMWORD], mm0
poppic ebx
POPPIC ebx
sub eax, byte SIZEOF_MMWORD
add ecx, byte 1*SIZEOF_MMWORD ; inptr1(above)
@@ -561,7 +561,7 @@ EXTN(jsimd_h2v1_upsample_mmx):
mov esi, JSAMPARRAY [input_data(ebp)] ; input_data
mov edi, POINTER [output_data_ptr(ebp)]
mov edi, JSAMPARRAY [edi] ; output_data
alignx 16, 7
ALIGNX 16, 7
.rowloop:
push edi
push esi
@@ -569,7 +569,7 @@ EXTN(jsimd_h2v1_upsample_mmx):
mov esi, JSAMPROW [esi] ; inptr
mov edi, JSAMPROW [edi] ; outptr
mov eax, edx ; colctr
alignx 16, 7
ALIGNX 16, 7
.columnloop:
movq mm0, MMWORD [esi+0*SIZEOF_MMWORD]
@@ -599,7 +599,7 @@ EXTN(jsimd_h2v1_upsample_mmx):
add esi, byte 2*SIZEOF_MMWORD ; inptr
add edi, byte 4*SIZEOF_MMWORD ; outptr
jmp short .columnloop
alignx 16, 7
ALIGNX 16, 7
.nextrow:
pop esi
@@ -660,7 +660,7 @@ EXTN(jsimd_h2v2_upsample_mmx):
mov esi, JSAMPARRAY [input_data(ebp)] ; input_data
mov edi, POINTER [output_data_ptr(ebp)]
mov edi, JSAMPARRAY [edi] ; output_data
alignx 16, 7
ALIGNX 16, 7
.rowloop:
push edi
push esi
@@ -669,7 +669,7 @@ EXTN(jsimd_h2v2_upsample_mmx):
mov ebx, JSAMPROW [edi+0*SIZEOF_JSAMPROW] ; outptr0
mov edi, JSAMPROW [edi+1*SIZEOF_JSAMPROW] ; outptr1
mov eax, edx ; colctr
alignx 16, 7
ALIGNX 16, 7
.columnloop:
movq mm0, MMWORD [esi+0*SIZEOF_MMWORD]
@@ -704,7 +704,7 @@ EXTN(jsimd_h2v2_upsample_mmx):
add ebx, byte 4*SIZEOF_MMWORD ; outptr0
add edi, byte 4*SIZEOF_MMWORD ; outptr1
jmp short .columnloop
alignx 16, 7
ALIGNX 16, 7
.nextrow:
pop esi

View File

@@ -2,7 +2,7 @@
; jdsample.asm - upsampling (SSE2)
;
; Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
; Copyright (C) 2016, D. R. Commander.
; Copyright (C) 2016, 2024, D. R. Commander.
;
; Based on the x86 SIMD extension for IJG JPEG library
; Copyright (C) 1999-2006, MIYASAKA Masaru.
@@ -19,7 +19,7 @@
; --------------------------------------------------------------------------
SECTION SEG_CONST
alignz 32
ALIGNZ 32
GLOBAL_DATA(jconst_fancy_upsample_sse2)
EXTN(jconst_fancy_upsample_sse2):
@@ -30,7 +30,7 @@ PW_THREE times 8 dw 3
PW_SEVEN times 8 dw 7
PW_EIGHT times 8 dw 8
alignz 32
ALIGNZ 32
; --------------------------------------------------------------------------
SECTION SEG_TEXT
@@ -61,13 +61,13 @@ PW_EIGHT times 8 dw 8
EXTN(jsimd_h2v1_fancy_upsample_sse2):
push ebp
mov ebp, esp
pushpic ebx
PUSHPIC ebx
; push ecx ; need not be preserved
; push edx ; need not be preserved
push esi
push edi
get_GOT ebx ; get GOT address
GET_GOT ebx ; get GOT address
mov eax, JDIMENSION [downsamp_width(ebp)] ; colctr
test eax, eax
@@ -80,7 +80,7 @@ EXTN(jsimd_h2v1_fancy_upsample_sse2):
mov esi, JSAMPARRAY [input_data(ebp)] ; input_data
mov edi, POINTER [output_data_ptr(ebp)]
mov edi, JSAMPARRAY [edi] ; output_data
alignx 16, 7
ALIGNX 16, 7
.rowloop:
push eax ; colctr
push edi
@@ -103,14 +103,14 @@ EXTN(jsimd_h2v1_fancy_upsample_sse2):
and eax, byte -SIZEOF_XMMWORD
cmp eax, byte SIZEOF_XMMWORD
ja short .columnloop
alignx 16, 7
ALIGNX 16, 7
.columnloop_last:
pcmpeqb xmm6, xmm6
pslldq xmm6, (SIZEOF_XMMWORD-1)
pand xmm6, XMMWORD [esi+0*SIZEOF_XMMWORD]
jmp short .upsample
alignx 16, 7
ALIGNX 16, 7
.columnloop:
movdqa xmm6, XMMWORD [esi+1*SIZEOF_XMMWORD]
@@ -185,7 +185,7 @@ EXTN(jsimd_h2v1_fancy_upsample_sse2):
pop esi
; pop edx ; need not be preserved
; pop ecx ; need not be preserved
poppic ebx
POPPIC ebx
pop ebp
ret
@@ -223,15 +223,15 @@ EXTN(jsimd_h2v2_fancy_upsample_sse2):
mov [esp], eax
mov ebp, esp ; ebp = aligned ebp
lea esp, [wk(0)]
pushpic eax ; make a room for GOT address
PUSHPIC eax ; make a room for GOT address
push ebx
; push ecx ; need not be preserved
; push edx ; need not be preserved
push esi
push edi
get_GOT ebx ; get GOT address
movpic POINTER [gotptr], ebx ; save GOT address
GET_GOT ebx ; get GOT address
MOVPIC POINTER [gotptr], ebx ; save GOT address
mov edx, eax ; edx = original ebp
mov eax, JDIMENSION [downsamp_width(edx)] ; colctr
@@ -245,7 +245,7 @@ EXTN(jsimd_h2v2_fancy_upsample_sse2):
mov esi, JSAMPARRAY [input_data(edx)] ; input_data
mov edi, POINTER [output_data_ptr(edx)]
mov edi, JSAMPARRAY [edi] ; output_data
alignx 16, 7
ALIGNX 16, 7
.rowloop:
push eax ; colctr
push ecx
@@ -275,8 +275,8 @@ EXTN(jsimd_h2v2_fancy_upsample_sse2):
movdqa xmm1, XMMWORD [ecx+0*SIZEOF_XMMWORD] ; xmm1=row[-1][0]
movdqa xmm2, XMMWORD [esi+0*SIZEOF_XMMWORD] ; xmm2=row[+1][0]
pushpic ebx
movpic ebx, POINTER [gotptr] ; load GOT address
PUSHPIC ebx
MOVPIC ebx, POINTER [gotptr] ; load GOT address
pxor xmm3, xmm3 ; xmm3=(all 0's)
movdqa xmm4, xmm0
@@ -311,19 +311,19 @@ EXTN(jsimd_h2v2_fancy_upsample_sse2):
movdqa XMMWORD [wk(0)], xmm1
movdqa XMMWORD [wk(1)], xmm2
poppic ebx
POPPIC ebx
add eax, byte SIZEOF_XMMWORD-1
and eax, byte -SIZEOF_XMMWORD
cmp eax, byte SIZEOF_XMMWORD
ja short .columnloop
alignx 16, 7
ALIGNX 16, 7
.columnloop_last:
; -- process the last column block
pushpic ebx
movpic ebx, POINTER [gotptr] ; load GOT address
PUSHPIC ebx
MOVPIC ebx, POINTER [gotptr] ; load GOT address
pcmpeqb xmm1, xmm1
pslldq xmm1, (SIZEOF_XMMWORD-2)
@@ -336,7 +336,7 @@ EXTN(jsimd_h2v2_fancy_upsample_sse2):
movdqa XMMWORD [wk(3)], xmm2 ; xmm2=(-- -- -- -- -- -- -- 15)
jmp near .upsample
alignx 16, 7
ALIGNX 16, 7
.columnloop:
; -- process the next column block
@@ -345,8 +345,8 @@ EXTN(jsimd_h2v2_fancy_upsample_sse2):
movdqa xmm1, XMMWORD [ecx+1*SIZEOF_XMMWORD] ; xmm1=row[-1][1]
movdqa xmm2, XMMWORD [esi+1*SIZEOF_XMMWORD] ; xmm2=row[+1][1]
pushpic ebx
movpic ebx, POINTER [gotptr] ; load GOT address
PUSHPIC ebx
MOVPIC ebx, POINTER [gotptr] ; load GOT address
pxor xmm3, xmm3 ; xmm3=(all 0's)
movdqa xmm4, xmm0
@@ -485,7 +485,7 @@ EXTN(jsimd_h2v2_fancy_upsample_sse2):
movdqa XMMWORD [edi+0*SIZEOF_XMMWORD], xmm1
movdqa XMMWORD [edi+1*SIZEOF_XMMWORD], xmm0
poppic ebx
POPPIC ebx
sub eax, byte SIZEOF_XMMWORD
add ecx, byte 1*SIZEOF_XMMWORD ; inptr1(above)
@@ -558,7 +558,7 @@ EXTN(jsimd_h2v1_upsample_sse2):
mov esi, JSAMPARRAY [input_data(ebp)] ; input_data
mov edi, POINTER [output_data_ptr(ebp)]
mov edi, JSAMPARRAY [edi] ; output_data
alignx 16, 7
ALIGNX 16, 7
.rowloop:
push edi
push esi
@@ -566,7 +566,7 @@ EXTN(jsimd_h2v1_upsample_sse2):
mov esi, JSAMPROW [esi] ; inptr
mov edi, JSAMPROW [edi] ; outptr
mov eax, edx ; colctr
alignx 16, 7
ALIGNX 16, 7
.columnloop:
movdqa xmm0, XMMWORD [esi+0*SIZEOF_XMMWORD]
@@ -596,7 +596,7 @@ EXTN(jsimd_h2v1_upsample_sse2):
add esi, byte 2*SIZEOF_XMMWORD ; inptr
add edi, byte 4*SIZEOF_XMMWORD ; outptr
jmp short .columnloop
alignx 16, 7
ALIGNX 16, 7
.nextrow:
pop esi
@@ -655,7 +655,7 @@ EXTN(jsimd_h2v2_upsample_sse2):
mov esi, JSAMPARRAY [input_data(ebp)] ; input_data
mov edi, POINTER [output_data_ptr(ebp)]
mov edi, JSAMPARRAY [edi] ; output_data
alignx 16, 7
ALIGNX 16, 7
.rowloop:
push edi
push esi
@@ -664,7 +664,7 @@ EXTN(jsimd_h2v2_upsample_sse2):
mov ebx, JSAMPROW [edi+0*SIZEOF_JSAMPROW] ; outptr0
mov edi, JSAMPROW [edi+1*SIZEOF_JSAMPROW] ; outptr1
mov eax, edx ; colctr
alignx 16, 7
ALIGNX 16, 7
.columnloop:
movdqa xmm0, XMMWORD [esi+0*SIZEOF_XMMWORD]
@@ -699,7 +699,7 @@ EXTN(jsimd_h2v2_upsample_sse2):
add ebx, byte 4*SIZEOF_XMMWORD ; outptr0
add edi, byte 4*SIZEOF_XMMWORD ; outptr1
jmp short .columnloop
alignx 16, 7
ALIGNX 16, 7
.nextrow:
pop esi

View File

@@ -2,7 +2,7 @@
; jfdctflt.asm - floating-point FDCT (3DNow!)
;
; Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
; Copyright (C) 2016, D. R. Commander.
; Copyright (C) 2016, 2024, D. R. Commander.
;
; Based on the x86 SIMD extension for IJG JPEG library
; Copyright (C) 1999-2006, MIYASAKA Masaru.
@@ -24,7 +24,7 @@
; --------------------------------------------------------------------------
SECTION SEG_CONST
alignz 32
ALIGNZ 32
GLOBAL_DATA(jconst_fdct_float_3dnow)
EXTN(jconst_fdct_float_3dnow):
@@ -34,7 +34,7 @@ PD_0_707 times 2 dd 0.707106781186547524400844
PD_0_541 times 2 dd 0.541196100146196984399723
PD_1_306 times 2 dd 1.306562964876376527856643
alignz 32
ALIGNZ 32
; --------------------------------------------------------------------------
SECTION SEG_TEXT
@@ -63,19 +63,19 @@ EXTN(jsimd_fdct_float_3dnow):
mov [esp], eax
mov ebp, esp ; ebp = aligned ebp
lea esp, [wk(0)]
pushpic ebx
PUSHPIC ebx
; push ecx ; need not be preserved
; push edx ; need not be preserved
; push esi ; unused
; push edi ; unused
get_GOT ebx ; get GOT address
GET_GOT ebx ; get GOT address
; ---- Pass 1: process rows.
mov edx, POINTER [data(eax)] ; (FAST_FLOAT *)
mov ecx, DCTSIZE/2
alignx 16, 7
ALIGNX 16, 7
.rowloop:
movq mm0, MMWORD [MMBLOCK(0,0,edx,SIZEOF_FAST_FLOAT)]
@@ -190,7 +190,7 @@ EXTN(jsimd_fdct_float_3dnow):
mov edx, POINTER [data(eax)] ; (FAST_FLOAT *)
mov ecx, DCTSIZE/2
alignx 16, 7
ALIGNX 16, 7
.columnloop:
movq mm0, MMWORD [MMBLOCK(0,0,edx,SIZEOF_FAST_FLOAT)]
@@ -307,7 +307,7 @@ EXTN(jsimd_fdct_float_3dnow):
; pop esi ; unused
; pop edx ; need not be preserved
; pop ecx ; need not be preserved
poppic ebx
POPPIC ebx
mov esp, ebp ; esp <- aligned ebp
pop esp ; esp <- original ebp
pop ebp

View File

@@ -2,7 +2,7 @@
; jfdctflt.asm - floating-point FDCT (SSE)
;
; Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
; Copyright (C) 2016, D. R. Commander.
; Copyright (C) 2016, 2024, D. R. Commander.
;
; Based on the x86 SIMD extension for IJG JPEG library
; Copyright (C) 1999-2006, MIYASAKA Masaru.
@@ -34,7 +34,7 @@
; --------------------------------------------------------------------------
SECTION SEG_CONST
alignz 32
ALIGNZ 32
GLOBAL_DATA(jconst_fdct_float_sse)
EXTN(jconst_fdct_float_sse):
@@ -44,7 +44,7 @@ PD_0_707 times 4 dd 0.707106781186547524400844
PD_0_541 times 4 dd 0.541196100146196984399723
PD_1_306 times 4 dd 1.306562964876376527856643
alignz 32
ALIGNZ 32
; --------------------------------------------------------------------------
SECTION SEG_TEXT
@@ -74,19 +74,19 @@ EXTN(jsimd_fdct_float_sse):
mov [esp], eax
mov ebp, esp ; ebp = aligned ebp
lea esp, [wk(0)]
pushpic ebx
PUSHPIC ebx
; push ecx ; need not be preserved
; push edx ; need not be preserved
; push esi ; unused
; push edi ; unused
get_GOT ebx ; get GOT address
GET_GOT ebx ; get GOT address
; ---- Pass 1: process rows.
mov edx, POINTER [data(eax)] ; (FAST_FLOAT *)
mov ecx, DCTSIZE/4
alignx 16, 7
ALIGNX 16, 7
.rowloop:
movaps xmm0, XMMWORD [XMMBLOCK(2,0,edx,SIZEOF_FAST_FLOAT)]
@@ -222,7 +222,7 @@ EXTN(jsimd_fdct_float_sse):
mov edx, POINTER [data(eax)] ; (FAST_FLOAT *)
mov ecx, DCTSIZE/4
alignx 16, 7
ALIGNX 16, 7
.columnloop:
movaps xmm0, XMMWORD [XMMBLOCK(2,0,edx,SIZEOF_FAST_FLOAT)]
@@ -358,7 +358,7 @@ EXTN(jsimd_fdct_float_sse):
; pop esi ; unused
; pop edx ; need not be preserved
; pop ecx ; need not be preserved
poppic ebx
POPPIC ebx
mov esp, ebp ; esp <- aligned ebp
pop esp ; esp <- original ebp
pop ebp

View File

@@ -2,7 +2,7 @@
; jfdctfst.asm - fast integer FDCT (MMX)
;
; Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
; Copyright (C) 2016, D. R. Commander.
; Copyright (C) 2016, 2024, D. R. Commander.
;
; Based on the x86 SIMD extension for IJG JPEG library
; Copyright (C) 1999-2006, MIYASAKA Masaru.
@@ -49,7 +49,7 @@ F_1_306 equ DESCALE(1402911301, 30 - CONST_BITS) ; FIX(1.306562965)
%define PRE_MULTIPLY_SCALE_BITS 2
%define CONST_SHIFT (16 - PRE_MULTIPLY_SCALE_BITS - CONST_BITS)
alignz 32
ALIGNZ 32
GLOBAL_DATA(jconst_fdct_ifast_mmx)
EXTN(jconst_fdct_ifast_mmx):
@@ -59,7 +59,7 @@ PW_F0382 times 4 dw F_0_382 << CONST_SHIFT
PW_F0541 times 4 dw F_0_541 << CONST_SHIFT
PW_F1306 times 4 dw F_1_306 << CONST_SHIFT
alignz 32
ALIGNZ 32
; --------------------------------------------------------------------------
SECTION SEG_TEXT
@@ -88,19 +88,19 @@ EXTN(jsimd_fdct_ifast_mmx):
mov [esp], eax
mov ebp, esp ; ebp = aligned ebp
lea esp, [wk(0)]
pushpic ebx
PUSHPIC ebx
; push ecx ; need not be preserved
; push edx ; need not be preserved
; push esi ; unused
; push edi ; unused
get_GOT ebx ; get GOT address
GET_GOT ebx ; get GOT address
; ---- Pass 1: process rows.
mov edx, POINTER [data(eax)] ; (DCTELEM *)
mov ecx, DCTSIZE/4
alignx 16, 7
ALIGNX 16, 7
.rowloop:
movq mm0, MMWORD [MMBLOCK(2,0,edx,SIZEOF_DCTELEM)]
@@ -241,7 +241,7 @@ EXTN(jsimd_fdct_ifast_mmx):
mov edx, POINTER [data(eax)] ; (DCTELEM *)
mov ecx, DCTSIZE/4
alignx 16, 7
ALIGNX 16, 7
.columnloop:
movq mm0, MMWORD [MMBLOCK(2,0,edx,SIZEOF_DCTELEM)]
@@ -384,7 +384,7 @@ EXTN(jsimd_fdct_ifast_mmx):
; pop esi ; unused
; pop edx ; need not be preserved
; pop ecx ; need not be preserved
poppic ebx
POPPIC ebx
mov esp, ebp ; esp <- aligned ebp
pop esp ; esp <- original ebp
pop ebp

View File

@@ -2,7 +2,7 @@
; jfdctfst.asm - fast integer FDCT (SSE2)
;
; Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
; Copyright (C) 2016, D. R. Commander.
; Copyright (C) 2016, 2024, D. R. Commander.
;
; Based on the x86 SIMD extension for IJG JPEG library
; Copyright (C) 1999-2006, MIYASAKA Masaru.
@@ -49,7 +49,7 @@ F_1_306 equ DESCALE(1402911301, 30 - CONST_BITS) ; FIX(1.306562965)
%define PRE_MULTIPLY_SCALE_BITS 2
%define CONST_SHIFT (16 - PRE_MULTIPLY_SCALE_BITS - CONST_BITS)
alignz 32
ALIGNZ 32
GLOBAL_DATA(jconst_fdct_ifast_sse2)
EXTN(jconst_fdct_ifast_sse2):
@@ -59,7 +59,7 @@ PW_F0382 times 8 dw F_0_382 << CONST_SHIFT
PW_F0541 times 8 dw F_0_541 << CONST_SHIFT
PW_F1306 times 8 dw F_1_306 << CONST_SHIFT
alignz 32
ALIGNZ 32
; --------------------------------------------------------------------------
SECTION SEG_TEXT
@@ -89,13 +89,13 @@ EXTN(jsimd_fdct_ifast_sse2):
mov [esp], eax
mov ebp, esp ; ebp = aligned ebp
lea esp, [wk(0)]
pushpic ebx
PUSHPIC ebx
; push ecx ; unused
; push edx ; need not be preserved
; push esi ; unused
; push edi ; unused
get_GOT ebx ; get GOT address
GET_GOT ebx ; get GOT address
; ---- Pass 1: process rows.
@@ -392,7 +392,7 @@ EXTN(jsimd_fdct_ifast_sse2):
; pop esi ; unused
; pop edx ; need not be preserved
; pop ecx ; unused
poppic ebx
POPPIC ebx
mov esp, ebp ; esp <- aligned ebp
pop esp ; esp <- original ebp
pop ebp

View File

@@ -2,7 +2,7 @@
; jfdctint.asm - accurate integer FDCT (AVX2)
;
; Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
; Copyright (C) 2009, 2016, 2018, 2020, D. R. Commander.
; Copyright (C) 2009, 2016, 2018, 2020, 2024, D. R. Commander.
;
; Based on the x86 SIMD extension for IJG JPEG library
; Copyright (C) 1999-2006, MIYASAKA Masaru.
@@ -65,7 +65,7 @@ F_3_072 equ DESCALE(3299298341, 30 - CONST_BITS) ; FIX(3.072711026)
; %1-%4: Input/output registers
; %5-%8: Temp registers
%macro dotranspose 8
%macro DOTRANSPOSE 8
; %1=(00 01 02 03 04 05 06 07 40 41 42 43 44 45 46 47)
; %2=(10 11 12 13 14 15 16 17 50 51 52 53 54 55 56 57)
; %3=(20 21 22 23 24 25 26 27 60 61 62 63 64 65 66 67)
@@ -108,7 +108,7 @@ F_3_072 equ DESCALE(3299298341, 30 - CONST_BITS) ; FIX(3.072711026)
; %5-%8: Temp registers
; %9: Pass (1 or 2)
%macro dodct 9
%macro DODCT 9
vpsubw %5, %1, %4 ; %5=data1_0-data6_7=tmp6_7
vpaddw %6, %1, %4 ; %6=data1_0+data6_7=tmp1_0
vpaddw %7, %2, %3 ; %7=data3_2+data4_5=tmp3_2
@@ -223,7 +223,7 @@ F_3_072 equ DESCALE(3299298341, 30 - CONST_BITS) ; FIX(3.072711026)
; --------------------------------------------------------------------------
SECTION SEG_CONST
alignz 32
ALIGNZ 32
GLOBAL_DATA(jconst_fdct_islow_avx2)
EXTN(jconst_fdct_islow_avx2):
@@ -242,7 +242,7 @@ PW_DESCALE_P2X times 16 dw 1 << (PASS1_BITS - 1)
PW_1_NEG1 times 8 dw 1
times 8 dw -1
alignz 32
ALIGNZ 32
; --------------------------------------------------------------------------
SECTION SEG_TEXT
@@ -262,13 +262,13 @@ PW_1_NEG1 times 8 dw 1
EXTN(jsimd_fdct_islow_avx2):
push ebp
mov ebp, esp
pushpic ebx
PUSHPIC ebx
; push ecx ; unused
; push edx ; need not be preserved
; push esi ; unused
; push edi ; unused
get_GOT ebx ; get GOT address
GET_GOT ebx ; get GOT address
; ---- Pass 1: process rows.
@@ -292,9 +292,9 @@ EXTN(jsimd_fdct_islow_avx2):
; ymm2=(20 21 22 23 24 25 26 27 60 61 62 63 64 65 66 67)
; ymm3=(30 31 32 33 34 35 36 37 70 71 72 73 74 75 76 77)
dotranspose ymm0, ymm1, ymm2, ymm3, ymm4, ymm5, ymm6, ymm7
DOTRANSPOSE ymm0, ymm1, ymm2, ymm3, ymm4, ymm5, ymm6, ymm7
dodct ymm0, ymm1, ymm2, ymm3, ymm4, ymm5, ymm6, ymm7, 1
DODCT ymm0, ymm1, ymm2, ymm3, ymm4, ymm5, ymm6, ymm7, 1
; ymm0=data0_4, ymm1=data3_1, ymm2=data2_6, ymm3=data7_5
; ---- Pass 2: process columns.
@@ -302,9 +302,9 @@ EXTN(jsimd_fdct_islow_avx2):
vperm2i128 ymm4, ymm1, ymm3, 0x20 ; ymm4=data3_7
vperm2i128 ymm1, ymm1, ymm3, 0x31 ; ymm1=data1_5
dotranspose ymm0, ymm1, ymm2, ymm4, ymm3, ymm5, ymm6, ymm7
DOTRANSPOSE ymm0, ymm1, ymm2, ymm4, ymm3, ymm5, ymm6, ymm7
dodct ymm0, ymm1, ymm2, ymm4, ymm3, ymm5, ymm6, ymm7, 2
DODCT ymm0, ymm1, ymm2, ymm4, ymm3, ymm5, ymm6, ymm7, 2
; ymm0=data0_4, ymm1=data3_1, ymm2=data2_6, ymm4=data7_5
vperm2i128 ymm3, ymm0, ymm1, 0x30 ; ymm3=data0_1
@@ -322,7 +322,7 @@ EXTN(jsimd_fdct_islow_avx2):
; pop esi ; unused
; pop edx ; need not be preserved
; pop ecx ; unused
poppic ebx
POPPIC ebx
pop ebp
ret

View File

@@ -2,7 +2,7 @@
; jfdctint.asm - accurate integer FDCT (MMX)
;
; Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
; Copyright (C) 2016, 2020, D. R. Commander.
; Copyright (C) 2016, 2020, 2024, D. R. Commander.
;
; Based on the x86 SIMD extension for IJG JPEG library
; Copyright (C) 1999-2006, MIYASAKA Masaru.
@@ -63,7 +63,7 @@ F_3_072 equ DESCALE(3299298341, 30 - CONST_BITS) ; FIX(3.072711026)
; --------------------------------------------------------------------------
SECTION SEG_CONST
alignz 32
ALIGNZ 32
GLOBAL_DATA(jconst_fdct_islow_mmx)
EXTN(jconst_fdct_islow_mmx):
@@ -80,7 +80,7 @@ PD_DESCALE_P1 times 2 dd 1 << (DESCALE_P1 - 1)
PD_DESCALE_P2 times 2 dd 1 << (DESCALE_P2 - 1)
PW_DESCALE_P2X times 4 dw 1 << (PASS1_BITS - 1)
alignz 32
ALIGNZ 32
; --------------------------------------------------------------------------
SECTION SEG_TEXT
@@ -109,19 +109,19 @@ EXTN(jsimd_fdct_islow_mmx):
mov [esp], eax
mov ebp, esp ; ebp = aligned ebp
lea esp, [wk(0)]
pushpic ebx
PUSHPIC ebx
; push ecx ; need not be preserved
; push edx ; need not be preserved
; push esi ; unused
; push edi ; unused
get_GOT ebx ; get GOT address
GET_GOT ebx ; get GOT address
; ---- Pass 1: process rows.
mov edx, POINTER [data(eax)] ; (DCTELEM *)
mov ecx, DCTSIZE/4
alignx 16, 7
ALIGNX 16, 7
.rowloop:
movq mm0, MMWORD [MMBLOCK(2,0,edx,SIZEOF_DCTELEM)]
@@ -363,7 +363,7 @@ EXTN(jsimd_fdct_islow_mmx):
mov edx, POINTER [data(eax)] ; (DCTELEM *)
mov ecx, DCTSIZE/4
alignx 16, 7
ALIGNX 16, 7
.columnloop:
movq mm0, MMWORD [MMBLOCK(2,0,edx,SIZEOF_DCTELEM)]
@@ -609,7 +609,7 @@ EXTN(jsimd_fdct_islow_mmx):
; pop esi ; unused
; pop edx ; need not be preserved
; pop ecx ; need not be preserved
poppic ebx
POPPIC ebx
mov esp, ebp ; esp <- aligned ebp
pop esp ; esp <- original ebp
pop ebp

View File

@@ -2,7 +2,7 @@
; jfdctint.asm - accurate integer FDCT (SSE2)
;
; Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
; Copyright (C) 2016, 2020, D. R. Commander.
; Copyright (C) 2016, 2020, 2024, D. R. Commander.
;
; Based on the x86 SIMD extension for IJG JPEG library
; Copyright (C) 1999-2006, MIYASAKA Masaru.
@@ -63,7 +63,7 @@ F_3_072 equ DESCALE(3299298341, 30 - CONST_BITS) ; FIX(3.072711026)
; --------------------------------------------------------------------------
SECTION SEG_CONST
alignz 32
ALIGNZ 32
GLOBAL_DATA(jconst_fdct_islow_sse2)
EXTN(jconst_fdct_islow_sse2):
@@ -80,7 +80,7 @@ PD_DESCALE_P1 times 4 dd 1 << (DESCALE_P1 - 1)
PD_DESCALE_P2 times 4 dd 1 << (DESCALE_P2 - 1)
PW_DESCALE_P2X times 8 dw 1 << (PASS1_BITS - 1)
alignz 32
ALIGNZ 32
; --------------------------------------------------------------------------
SECTION SEG_TEXT
@@ -110,13 +110,13 @@ EXTN(jsimd_fdct_islow_sse2):
mov [esp], eax
mov ebp, esp ; ebp = aligned ebp
lea esp, [wk(0)]
pushpic ebx
PUSHPIC ebx
; push ecx ; unused
; push edx ; need not be preserved
; push esi ; unused
; push edi ; unused
get_GOT ebx ; get GOT address
GET_GOT ebx ; get GOT address
; ---- Pass 1: process rows.
@@ -622,7 +622,7 @@ EXTN(jsimd_fdct_islow_sse2):
; pop esi ; unused
; pop edx ; need not be preserved
; pop ecx ; unused
poppic ebx
POPPIC ebx
mov esp, ebp ; esp <- aligned ebp
pop esp ; esp <- original ebp
pop ebp

View File

@@ -2,7 +2,7 @@
; jidctflt.asm - floating-point IDCT (3DNow! & MMX)
;
; Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
; Copyright (C) 2016, D. R. Commander.
; Copyright (C) 2016, 2024, D. R. Commander.
;
; Based on the x86 SIMD extension for IJG JPEG library
; Copyright (C) 1999-2006, MIYASAKA Masaru.
@@ -24,7 +24,7 @@
; --------------------------------------------------------------------------
SECTION SEG_CONST
alignz 32
ALIGNZ 32
GLOBAL_DATA(jconst_idct_float_3dnow)
EXTN(jconst_idct_float_3dnow):
@@ -36,7 +36,7 @@ PD_2_613 times 2 dd 2.613125929752753055713286
PD_RNDINT_MAGIC times 2 dd 100663296.0 ; (float)(0x00C00000 << 3)
PB_CENTERJSAMP times 8 db CENTERJSAMPLE
alignz 32
ALIGNZ 32
; --------------------------------------------------------------------------
SECTION SEG_TEXT
@@ -78,7 +78,7 @@ EXTN(jsimd_idct_float_3dnow):
push esi
push edi
get_GOT ebx ; get GOT address
GET_GOT ebx ; get GOT address
; ---- Pass 1: process columns from input, store into work array.
@@ -87,21 +87,21 @@ EXTN(jsimd_idct_float_3dnow):
mov esi, JCOEFPTR [coef_block(eax)] ; inptr
lea edi, [workspace] ; FAST_FLOAT *wsptr
mov ecx, DCTSIZE/2 ; ctr
alignx 16, 7
ALIGNX 16, 7
.columnloop:
%ifndef NO_ZERO_COLUMN_TEST_FLOAT_3DNOW
mov eax, dword [DWBLOCK(1,0,esi,SIZEOF_JCOEF)]
or eax, dword [DWBLOCK(2,0,esi,SIZEOF_JCOEF)]
jnz short .columnDCT
pushpic ebx ; save GOT address
PUSHPIC ebx ; save GOT address
mov ebx, dword [DWBLOCK(3,0,esi,SIZEOF_JCOEF)]
mov eax, dword [DWBLOCK(4,0,esi,SIZEOF_JCOEF)]
or ebx, dword [DWBLOCK(5,0,esi,SIZEOF_JCOEF)]
or eax, dword [DWBLOCK(6,0,esi,SIZEOF_JCOEF)]
or ebx, dword [DWBLOCK(7,0,esi,SIZEOF_JCOEF)]
or eax, ebx
poppic ebx ; restore GOT address
POPPIC ebx ; restore GOT address
jnz short .columnDCT
; -- AC terms all zero
@@ -127,7 +127,7 @@ EXTN(jsimd_idct_float_3dnow):
movq MMWORD [MMBLOCK(1,2,edi,SIZEOF_FAST_FLOAT)], mm1
movq MMWORD [MMBLOCK(1,3,edi,SIZEOF_FAST_FLOAT)], mm1
jmp near .nextcolumn
alignx 16, 7
ALIGNX 16, 7
%endif
.columnDCT:
@@ -293,7 +293,7 @@ EXTN(jsimd_idct_float_3dnow):
mov edi, JSAMPARRAY [output_buf(eax)] ; (JSAMPROW *)
mov eax, JDIMENSION [output_col(eax)]
mov ecx, DCTSIZE/2 ; ctr
alignx 16, 7
ALIGNX 16, 7
.rowloop:
; -- Even part
@@ -420,14 +420,14 @@ EXTN(jsimd_idct_float_3dnow):
punpckldq mm6, mm4 ; mm6=(00 01 02 03 04 05 06 07)
punpckhdq mm7, mm4 ; mm7=(10 11 12 13 14 15 16 17)
pushpic ebx ; save GOT address
PUSHPIC ebx ; save GOT address
mov edx, JSAMPROW [edi+0*SIZEOF_JSAMPROW]
mov ebx, JSAMPROW [edi+1*SIZEOF_JSAMPROW]
movq MMWORD [edx+eax*SIZEOF_JSAMPLE], mm6
movq MMWORD [ebx+eax*SIZEOF_JSAMPLE], mm7
poppic ebx ; restore GOT address
POPPIC ebx ; restore GOT address
add esi, byte 2*SIZEOF_FAST_FLOAT ; wsptr
add edi, byte 2*SIZEOF_JSAMPROW

View File

@@ -2,7 +2,7 @@
; jidctflt.asm - floating-point IDCT (SSE & MMX)
;
; Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
; Copyright (C) 2016, D. R. Commander.
; Copyright (C) 2016, 2024, D. R. Commander.
;
; Based on the x86 SIMD extension for IJG JPEG library
; Copyright (C) 1999-2006, MIYASAKA Masaru.
@@ -23,18 +23,18 @@
; --------------------------------------------------------------------------
%macro unpcklps2 2 ; %1=(0 1 2 3) / %2=(4 5 6 7) => %1=(0 1 4 5)
%macro UNPCKLPS2 2 ; %1=(0 1 2 3) / %2=(4 5 6 7) => %1=(0 1 4 5)
shufps %1, %2, 0x44
%endmacro
%macro unpckhps2 2 ; %1=(0 1 2 3) / %2=(4 5 6 7) => %1=(2 3 6 7)
%macro UNPCKHPS2 2 ; %1=(0 1 2 3) / %2=(4 5 6 7) => %1=(2 3 6 7)
shufps %1, %2, 0xEE
%endmacro
; --------------------------------------------------------------------------
SECTION SEG_CONST
alignz 32
ALIGNZ 32
GLOBAL_DATA(jconst_idct_float_sse)
EXTN(jconst_idct_float_sse):
@@ -46,7 +46,7 @@ PD_M2_613 times 4 dd -2.613125929752753055713286
PD_0_125 times 4 dd 0.125 ; 1/8
PB_CENTERJSAMP times 8 db CENTERJSAMPLE
alignz 32
ALIGNZ 32
; --------------------------------------------------------------------------
SECTION SEG_TEXT
@@ -88,7 +88,7 @@ EXTN(jsimd_idct_float_sse):
push esi
push edi
get_GOT ebx ; get GOT address
GET_GOT ebx ; get GOT address
; ---- Pass 1: process columns from input, store into work array.
@@ -97,7 +97,7 @@ EXTN(jsimd_idct_float_sse):
mov esi, JCOEFPTR [coef_block(eax)] ; inptr
lea edi, [workspace] ; FAST_FLOAT *wsptr
mov ecx, DCTSIZE/4 ; ctr
alignx 16, 7
ALIGNX 16, 7
.columnloop:
%ifndef NO_ZERO_COLUMN_TEST_FLOAT_SSE
mov eax, dword [DWBLOCK(1,0,esi,SIZEOF_JCOEF)]
@@ -149,7 +149,7 @@ EXTN(jsimd_idct_float_sse):
movaps XMMWORD [XMMBLOCK(3,0,edi,SIZEOF_FAST_FLOAT)], xmm3
movaps XMMWORD [XMMBLOCK(3,1,edi,SIZEOF_FAST_FLOAT)], xmm3
jmp near .nextcolumn
alignx 16, 7
ALIGNX 16, 7
%endif
.columnDCT:
@@ -325,11 +325,11 @@ EXTN(jsimd_idct_float_sse):
unpckhps xmm4, xmm0 ; xmm4=(42 52 43 53)
movaps xmm3, xmm6 ; transpose coefficients(phase 2)
unpcklps2 xmm6, xmm7 ; xmm6=(00 10 20 30)
unpckhps2 xmm3, xmm7 ; xmm3=(01 11 21 31)
UNPCKLPS2 xmm6, xmm7 ; xmm6=(00 10 20 30)
UNPCKHPS2 xmm3, xmm7 ; xmm3=(01 11 21 31)
movaps xmm0, xmm1 ; transpose coefficients(phase 2)
unpcklps2 xmm1, xmm2 ; xmm1=(02 12 22 32)
unpckhps2 xmm0, xmm2 ; xmm0=(03 13 23 33)
UNPCKLPS2 xmm1, xmm2 ; xmm1=(02 12 22 32)
UNPCKHPS2 xmm0, xmm2 ; xmm0=(03 13 23 33)
movaps xmm7, XMMWORD [wk(0)] ; xmm7=(60 70 61 71)
movaps xmm2, XMMWORD [wk(1)] ; xmm2=(62 72 63 73)
@@ -340,11 +340,11 @@ EXTN(jsimd_idct_float_sse):
movaps XMMWORD [XMMBLOCK(3,0,edi,SIZEOF_FAST_FLOAT)], xmm0
movaps xmm6, xmm5 ; transpose coefficients(phase 2)
unpcklps2 xmm5, xmm7 ; xmm5=(40 50 60 70)
unpckhps2 xmm6, xmm7 ; xmm6=(41 51 61 71)
UNPCKLPS2 xmm5, xmm7 ; xmm5=(40 50 60 70)
UNPCKHPS2 xmm6, xmm7 ; xmm6=(41 51 61 71)
movaps xmm3, xmm4 ; transpose coefficients(phase 2)
unpcklps2 xmm4, xmm2 ; xmm4=(42 52 62 72)
unpckhps2 xmm3, xmm2 ; xmm3=(43 53 63 73)
UNPCKLPS2 xmm4, xmm2 ; xmm4=(42 52 62 72)
UNPCKHPS2 xmm3, xmm2 ; xmm3=(43 53 63 73)
movaps XMMWORD [XMMBLOCK(0,1,edi,SIZEOF_FAST_FLOAT)], xmm5
movaps XMMWORD [XMMBLOCK(1,1,edi,SIZEOF_FAST_FLOAT)], xmm6
@@ -372,7 +372,7 @@ EXTN(jsimd_idct_float_sse):
mov edi, JSAMPARRAY [output_buf(eax)] ; (JSAMPROW *)
mov eax, JDIMENSION [output_col(eax)]
mov ecx, DCTSIZE/4 ; ctr
alignx 16, 7
ALIGNX 16, 7
.rowloop:
; -- Even part
@@ -536,7 +536,7 @@ EXTN(jsimd_idct_float_sse):
punpckldq mm5, mm6 ; mm5=(20 21 22 23 24 25 26 27)
punpckhdq mm4, mm6 ; mm4=(30 31 32 33 34 35 36 37)
pushpic ebx ; save GOT address
PUSHPIC ebx ; save GOT address
mov edx, JSAMPROW [edi+0*SIZEOF_JSAMPROW]
mov ebx, JSAMPROW [edi+1*SIZEOF_JSAMPROW]
@@ -547,7 +547,7 @@ EXTN(jsimd_idct_float_sse):
movq MMWORD [edx+eax*SIZEOF_JSAMPLE], mm5
movq MMWORD [ebx+eax*SIZEOF_JSAMPLE], mm4
poppic ebx ; restore GOT address
POPPIC ebx ; restore GOT address
add esi, byte 4*SIZEOF_FAST_FLOAT ; wsptr
add edi, byte 4*SIZEOF_JSAMPROW

View File

@@ -2,7 +2,7 @@
; jidctflt.asm - floating-point IDCT (SSE & SSE2)
;
; Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
; Copyright (C) 2016, D. R. Commander.
; Copyright (C) 2016, 2024, D. R. Commander.
;
; Based on the x86 SIMD extension for IJG JPEG library
; Copyright (C) 1999-2006, MIYASAKA Masaru.
@@ -23,18 +23,18 @@
; --------------------------------------------------------------------------
%macro unpcklps2 2 ; %1=(0 1 2 3) / %2=(4 5 6 7) => %1=(0 1 4 5)
%macro UNPCKLPS2 2 ; %1=(0 1 2 3) / %2=(4 5 6 7) => %1=(0 1 4 5)
shufps %1, %2, 0x44
%endmacro
%macro unpckhps2 2 ; %1=(0 1 2 3) / %2=(4 5 6 7) => %1=(2 3 6 7)
%macro UNPCKHPS2 2 ; %1=(0 1 2 3) / %2=(4 5 6 7) => %1=(2 3 6 7)
shufps %1, %2, 0xEE
%endmacro
; --------------------------------------------------------------------------
SECTION SEG_CONST
alignz 32
ALIGNZ 32
GLOBAL_DATA(jconst_idct_float_sse2)
EXTN(jconst_idct_float_sse2):
@@ -46,7 +46,7 @@ PD_M2_613 times 4 dd -2.613125929752753055713286
PD_RNDINT_MAGIC times 4 dd 100663296.0 ; (float)(0x00C00000 << 3)
PB_CENTERJSAMP times 16 db CENTERJSAMPLE
alignz 32
ALIGNZ 32
; --------------------------------------------------------------------------
SECTION SEG_TEXT
@@ -88,7 +88,7 @@ EXTN(jsimd_idct_float_sse2):
push esi
push edi
get_GOT ebx ; get GOT address
GET_GOT ebx ; get GOT address
; ---- Pass 1: process columns from input, store into work array.
@@ -97,7 +97,7 @@ EXTN(jsimd_idct_float_sse2):
mov esi, JCOEFPTR [coef_block(eax)] ; inptr
lea edi, [workspace] ; FAST_FLOAT *wsptr
mov ecx, DCTSIZE/4 ; ctr
alignx 16, 7
ALIGNX 16, 7
.columnloop:
%ifndef NO_ZERO_COLUMN_TEST_FLOAT_SSE
mov eax, dword [DWBLOCK(1,0,esi,SIZEOF_JCOEF)]
@@ -150,7 +150,7 @@ EXTN(jsimd_idct_float_sse2):
movaps XMMWORD [XMMBLOCK(3,0,edi,SIZEOF_FAST_FLOAT)], xmm3
movaps XMMWORD [XMMBLOCK(3,1,edi,SIZEOF_FAST_FLOAT)], xmm3
jmp near .nextcolumn
alignx 16, 7
ALIGNX 16, 7
%endif
.columnDCT:
@@ -287,11 +287,11 @@ EXTN(jsimd_idct_float_sse2):
unpckhps xmm4, xmm0 ; xmm4=(42 52 43 53)
movaps xmm3, xmm6 ; transpose coefficients(phase 2)
unpcklps2 xmm6, xmm7 ; xmm6=(00 10 20 30)
unpckhps2 xmm3, xmm7 ; xmm3=(01 11 21 31)
UNPCKLPS2 xmm6, xmm7 ; xmm6=(00 10 20 30)
UNPCKHPS2 xmm3, xmm7 ; xmm3=(01 11 21 31)
movaps xmm0, xmm1 ; transpose coefficients(phase 2)
unpcklps2 xmm1, xmm2 ; xmm1=(02 12 22 32)
unpckhps2 xmm0, xmm2 ; xmm0=(03 13 23 33)
UNPCKLPS2 xmm1, xmm2 ; xmm1=(02 12 22 32)
UNPCKHPS2 xmm0, xmm2 ; xmm0=(03 13 23 33)
movaps xmm7, XMMWORD [wk(0)] ; xmm7=(60 70 61 71)
movaps xmm2, XMMWORD [wk(1)] ; xmm2=(62 72 63 73)
@@ -302,11 +302,11 @@ EXTN(jsimd_idct_float_sse2):
movaps XMMWORD [XMMBLOCK(3,0,edi,SIZEOF_FAST_FLOAT)], xmm0
movaps xmm6, xmm5 ; transpose coefficients(phase 2)
unpcklps2 xmm5, xmm7 ; xmm5=(40 50 60 70)
unpckhps2 xmm6, xmm7 ; xmm6=(41 51 61 71)
UNPCKLPS2 xmm5, xmm7 ; xmm5=(40 50 60 70)
UNPCKHPS2 xmm6, xmm7 ; xmm6=(41 51 61 71)
movaps xmm3, xmm4 ; transpose coefficients(phase 2)
unpcklps2 xmm4, xmm2 ; xmm4=(42 52 62 72)
unpckhps2 xmm3, xmm2 ; xmm3=(43 53 63 73)
UNPCKLPS2 xmm4, xmm2 ; xmm4=(42 52 62 72)
UNPCKHPS2 xmm3, xmm2 ; xmm3=(43 53 63 73)
movaps XMMWORD [XMMBLOCK(0,1,edi,SIZEOF_FAST_FLOAT)], xmm5
movaps XMMWORD [XMMBLOCK(1,1,edi,SIZEOF_FAST_FLOAT)], xmm6
@@ -334,7 +334,7 @@ EXTN(jsimd_idct_float_sse2):
mov edi, JSAMPARRAY [output_buf(eax)] ; (JSAMPROW *)
mov eax, JDIMENSION [output_col(eax)]
mov ecx, DCTSIZE/4 ; ctr
alignx 16, 7
ALIGNX 16, 7
.rowloop:
; -- Even part
@@ -464,7 +464,7 @@ EXTN(jsimd_idct_float_sse2):
pshufd xmm5, xmm6, 0x4E ; xmm5=(10 11 12 13 14 15 16 17 00 01 02 03 04 05 06 07)
pshufd xmm3, xmm7, 0x4E ; xmm3=(30 31 32 33 34 35 36 37 20 21 22 23 24 25 26 27)
pushpic ebx ; save GOT address
PUSHPIC ebx ; save GOT address
mov edx, JSAMPROW [edi+0*SIZEOF_JSAMPROW]
mov ebx, JSAMPROW [edi+2*SIZEOF_JSAMPROW]
@@ -475,7 +475,7 @@ EXTN(jsimd_idct_float_sse2):
movq XMM_MMWORD [edx+eax*SIZEOF_JSAMPLE], xmm5
movq XMM_MMWORD [ebx+eax*SIZEOF_JSAMPLE], xmm3
poppic ebx ; restore GOT address
POPPIC ebx ; restore GOT address
add esi, byte 4*SIZEOF_FAST_FLOAT ; wsptr
add edi, byte 4*SIZEOF_JSAMPROW

View File

@@ -2,7 +2,7 @@
; jidctfst.asm - fast integer IDCT (MMX)
;
; Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
; Copyright (C) 2016, D. R. Commander.
; Copyright (C) 2016, 2024, D. R. Commander.
;
; Based on the x86 SIMD extension for IJG JPEG library
; Copyright (C) 1999-2006, MIYASAKA Masaru.
@@ -56,7 +56,7 @@ F_1_613 equ (F_2_613 - (1 << CONST_BITS)) ; FIX(2.613125930) - FIX(1)
%define PRE_MULTIPLY_SCALE_BITS 2
%define CONST_SHIFT (16 - PRE_MULTIPLY_SCALE_BITS - CONST_BITS)
alignz 32
ALIGNZ 32
GLOBAL_DATA(jconst_idct_ifast_mmx)
EXTN(jconst_idct_ifast_mmx):
@@ -67,7 +67,7 @@ PW_MF1613 times 4 dw -F_1_613 << CONST_SHIFT
PW_F1082 times 4 dw F_1_082 << CONST_SHIFT
PB_CENTERJSAMP times 8 db CENTERJSAMPLE
alignz 32
ALIGNZ 32
; --------------------------------------------------------------------------
SECTION SEG_TEXT
@@ -109,7 +109,7 @@ EXTN(jsimd_idct_ifast_mmx):
push esi
push edi
get_GOT ebx ; get GOT address
GET_GOT ebx ; get GOT address
; ---- Pass 1: process columns from input, store into work array.
@@ -118,7 +118,7 @@ EXTN(jsimd_idct_ifast_mmx):
mov esi, JCOEFPTR [coef_block(eax)] ; inptr
lea edi, [workspace] ; JCOEF *wsptr
mov ecx, DCTSIZE/4 ; ctr
alignx 16, 7
ALIGNX 16, 7
.columnloop:
%ifndef NO_ZERO_COLUMN_TEST_IFAST_MMX
mov eax, dword [DWBLOCK(1,0,esi,SIZEOF_JCOEF)]
@@ -163,7 +163,7 @@ EXTN(jsimd_idct_ifast_mmx):
movq MMWORD [MMBLOCK(3,0,edi,SIZEOF_JCOEF)], mm3
movq MMWORD [MMBLOCK(3,1,edi,SIZEOF_JCOEF)], mm3
jmp near .nextcolumn
alignx 16, 7
ALIGNX 16, 7
%endif
.columnDCT:
@@ -326,7 +326,7 @@ EXTN(jsimd_idct_ifast_mmx):
mov edi, JSAMPARRAY [output_buf(eax)] ; (JSAMPROW *)
mov eax, JDIMENSION [output_col(eax)]
mov ecx, DCTSIZE/4 ; ctr
alignx 16, 7
ALIGNX 16, 7
.rowloop:
; -- Even part
@@ -464,7 +464,7 @@ EXTN(jsimd_idct_ifast_mmx):
punpckldq mm5, mm4 ; mm5=(20 21 22 23 24 25 26 27)
punpckhdq mm1, mm4 ; mm1=(30 31 32 33 34 35 36 37)
pushpic ebx ; save GOT address
PUSHPIC ebx ; save GOT address
mov edx, JSAMPROW [edi+0*SIZEOF_JSAMPROW]
mov ebx, JSAMPROW [edi+1*SIZEOF_JSAMPROW]
@@ -475,7 +475,7 @@ EXTN(jsimd_idct_ifast_mmx):
movq MMWORD [edx+eax*SIZEOF_JSAMPLE], mm5
movq MMWORD [ebx+eax*SIZEOF_JSAMPLE], mm1
poppic ebx ; restore GOT address
POPPIC ebx ; restore GOT address
add esi, byte 4*SIZEOF_JCOEF ; wsptr
add edi, byte 4*SIZEOF_JSAMPROW

View File

@@ -2,7 +2,7 @@
; jidctfst.asm - fast integer IDCT (SSE2)
;
; Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
; Copyright (C) 2016, D. R. Commander.
; Copyright (C) 2016, 2024, D. R. Commander.
;
; Based on the x86 SIMD extension for IJG JPEG library
; Copyright (C) 1999-2006, MIYASAKA Masaru.
@@ -56,7 +56,7 @@ F_1_613 equ (F_2_613 - (1 << CONST_BITS)) ; FIX(2.613125930) - FIX(1)
%define PRE_MULTIPLY_SCALE_BITS 2
%define CONST_SHIFT (16 - PRE_MULTIPLY_SCALE_BITS - CONST_BITS)
alignz 32
ALIGNZ 32
GLOBAL_DATA(jconst_idct_ifast_sse2)
EXTN(jconst_idct_ifast_sse2):
@@ -67,7 +67,7 @@ PW_MF1613 times 8 dw -F_1_613 << CONST_SHIFT
PW_F1082 times 8 dw F_1_082 << CONST_SHIFT
PB_CENTERJSAMP times 16 db CENTERJSAMPLE
alignz 32
ALIGNZ 32
; --------------------------------------------------------------------------
SECTION SEG_TEXT
@@ -101,13 +101,13 @@ EXTN(jsimd_idct_ifast_sse2):
mov [esp], eax
mov ebp, esp ; ebp = aligned ebp
lea esp, [wk(0)]
pushpic ebx
PUSHPIC ebx
; push ecx ; unused
; push edx ; need not be preserved
push esi
push edi
get_GOT ebx ; get GOT address
GET_GOT ebx ; get GOT address
; ---- Pass 1: process columns from input.
@@ -155,7 +155,7 @@ EXTN(jsimd_idct_ifast_sse2):
movdqa XMMWORD [wk(0)], xmm2 ; wk(0)=col1
movdqa XMMWORD [wk(1)], xmm0 ; wk(1)=col3
jmp near .column_end
alignx 16, 7
ALIGNX 16, 7
%endif
.columnDCT:
@@ -490,7 +490,7 @@ EXTN(jsimd_idct_ifast_sse2):
pop esi
; pop edx ; need not be preserved
; pop ecx ; unused
poppic ebx
POPPIC ebx
mov esp, ebp ; esp <- aligned ebp
pop esp ; esp <- original ebp
pop ebp

View File

@@ -2,7 +2,7 @@
; jidctint.asm - accurate integer IDCT (AVX2)
;
; Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
; Copyright (C) 2009, 2016, 2018, 2020, D. R. Commander.
; Copyright (C) 2009, 2016, 2018, 2020, 2024, D. R. Commander.
;
; Based on the x86 SIMD extension for IJG JPEG library
; Copyright (C) 1999-2006, MIYASAKA Masaru.
@@ -65,7 +65,7 @@ F_3_072 equ DESCALE(3299298341, 30 - CONST_BITS) ; FIX(3.072711026)
; %1-%4: Input/output registers
; %5-%8: Temp registers
%macro dotranspose 8
%macro DOTRANSPOSE 8
; %5=(00 10 20 30 40 50 60 70 01 11 21 31 41 51 61 71)
; %6=(03 13 23 33 43 53 63 73 02 12 22 32 42 52 62 72)
; %7=(04 14 24 34 44 54 64 74 05 15 25 35 45 55 65 75)
@@ -118,7 +118,7 @@ F_3_072 equ DESCALE(3299298341, 30 - CONST_BITS) ; FIX(3.072711026)
; %5-%12: Temp registers
; %9: Pass (1 or 2)
%macro dodct 13
%macro DODCT 13
; -- Even part
; (Original)
@@ -250,7 +250,7 @@ F_3_072 equ DESCALE(3299298341, 30 - CONST_BITS) ; FIX(3.072711026)
; --------------------------------------------------------------------------
SECTION SEG_CONST
alignz 32
ALIGNZ 32
GLOBAL_DATA(jconst_idct_islow_avx2)
EXTN(jconst_idct_islow_avx2):
@@ -269,7 +269,7 @@ PB_CENTERJSAMP times 32 db CENTERJSAMPLE
PW_1_NEG1 times 8 dw 1
times 8 dw -1
alignz 32
ALIGNZ 32
; --------------------------------------------------------------------------
SECTION SEG_TEXT
@@ -303,13 +303,13 @@ EXTN(jsimd_idct_islow_avx2):
mov [esp], eax
mov ebp, esp ; ebp = aligned ebp
lea esp, [wk(0)]
pushpic ebx
PUSHPIC ebx
; push ecx ; unused
; push edx ; need not be preserved
push esi
push edi
get_GOT ebx ; get GOT address
GET_GOT ebx ; get GOT address
; ---- Pass 1: process columns.
@@ -353,7 +353,7 @@ EXTN(jsimd_idct_islow_avx2):
vpshufd ymm3, ymm4, 0xFF ; ymm3=col3_7=(03 03 03 03 03 03 03 03 07 07 07 07 07 07 07 07)
jmp near .column_end
alignx 16, 7
ALIGNX 16, 7
%endif
.columnDCT:
@@ -371,10 +371,10 @@ EXTN(jsimd_idct_islow_avx2):
vperm2i128 ymm2, ymm5, ymm7, 0x20 ; ymm2=in2_6
vperm2i128 ymm3, ymm7, ymm6, 0x31 ; ymm3=in7_5
dodct ymm0, ymm1, ymm2, ymm3, ymm4, ymm5, ymm6, ymm7, XMMWORD [wk(0)], XMMWORD [wk(1)], XMMWORD [wk(2)], XMMWORD [wk(3)], 1
DODCT ymm0, ymm1, ymm2, ymm3, ymm4, ymm5, ymm6, ymm7, XMMWORD [wk(0)], XMMWORD [wk(1)], XMMWORD [wk(2)], XMMWORD [wk(3)], 1
; ymm0=data0_1, ymm1=data3_2, ymm2=data4_5, ymm3=data7_6
dotranspose ymm0, ymm1, ymm2, ymm3, ymm4, ymm5, ymm6, ymm7
DOTRANSPOSE ymm0, ymm1, ymm2, ymm3, ymm4, ymm5, ymm6, ymm7
; ymm0=data0_4, ymm1=data1_5, ymm2=data2_6, ymm3=data3_7
.column_end:
@@ -395,10 +395,10 @@ EXTN(jsimd_idct_islow_avx2):
vperm2i128 ymm4, ymm3, ymm1, 0x31 ; ymm3=in7_5
vperm2i128 ymm1, ymm3, ymm1, 0x20 ; ymm1=in3_1
dodct ymm0, ymm1, ymm2, ymm4, ymm3, ymm5, ymm6, ymm7, XMMWORD [wk(0)], XMMWORD [wk(1)], XMMWORD [wk(2)], XMMWORD [wk(3)], 2
DODCT ymm0, ymm1, ymm2, ymm4, ymm3, ymm5, ymm6, ymm7, XMMWORD [wk(0)], XMMWORD [wk(1)], XMMWORD [wk(2)], XMMWORD [wk(3)], 2
; ymm0=data0_1, ymm1=data3_2, ymm2=data4_5, ymm4=data7_6
dotranspose ymm0, ymm1, ymm2, ymm4, ymm3, ymm5, ymm6, ymm7
DOTRANSPOSE ymm0, ymm1, ymm2, ymm4, ymm3, ymm5, ymm6, ymm7
; ymm0=data0_4, ymm1=data1_5, ymm2=data2_6, ymm4=data3_7
vpacksswb ymm0, ymm0, ymm1 ; ymm0=data01_45
@@ -442,7 +442,7 @@ EXTN(jsimd_idct_islow_avx2):
pop esi
; pop edx ; need not be preserved
; pop ecx ; unused
poppic ebx
POPPIC ebx
mov esp, ebp ; esp <- aligned ebp
pop esp ; esp <- original ebp
pop ebp

View File

@@ -2,7 +2,7 @@
; jidctint.asm - accurate integer IDCT (MMX)
;
; Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
; Copyright (C) 2016, 2020, D. R. Commander.
; Copyright (C) 2016, 2020, 2024, D. R. Commander.
;
; Based on the x86 SIMD extension for IJG JPEG library
; Copyright (C) 1999-2006, MIYASAKA Masaru.
@@ -63,7 +63,7 @@ F_3_072 equ DESCALE(3299298341, 30 - CONST_BITS) ; FIX(3.072711026)
; --------------------------------------------------------------------------
SECTION SEG_CONST
alignz 32
ALIGNZ 32
GLOBAL_DATA(jconst_idct_islow_mmx)
EXTN(jconst_idct_islow_mmx):
@@ -80,7 +80,7 @@ PD_DESCALE_P1 times 2 dd 1 << (DESCALE_P1 - 1)
PD_DESCALE_P2 times 2 dd 1 << (DESCALE_P2 - 1)
PB_CENTERJSAMP times 8 db CENTERJSAMPLE
alignz 32
ALIGNZ 32
; --------------------------------------------------------------------------
SECTION SEG_TEXT
@@ -122,7 +122,7 @@ EXTN(jsimd_idct_islow_mmx):
push esi
push edi
get_GOT ebx ; get GOT address
GET_GOT ebx ; get GOT address
; ---- Pass 1: process columns from input, store into work array.
@@ -131,7 +131,7 @@ EXTN(jsimd_idct_islow_mmx):
mov esi, JCOEFPTR [coef_block(eax)] ; inptr
lea edi, [workspace] ; JCOEF *wsptr
mov ecx, DCTSIZE/4 ; ctr
alignx 16, 7
ALIGNX 16, 7
.columnloop:
%ifndef NO_ZERO_COLUMN_TEST_ISLOW_MMX
mov eax, dword [DWBLOCK(1,0,esi,SIZEOF_JCOEF)]
@@ -178,7 +178,7 @@ EXTN(jsimd_idct_islow_mmx):
movq MMWORD [MMBLOCK(3,0,edi,SIZEOF_JCOEF)], mm3
movq MMWORD [MMBLOCK(3,1,edi,SIZEOF_JCOEF)], mm3
jmp near .nextcolumn
alignx 16, 7
ALIGNX 16, 7
%endif
.columnDCT:
@@ -513,7 +513,7 @@ EXTN(jsimd_idct_islow_mmx):
mov edi, JSAMPARRAY [output_buf(eax)] ; (JSAMPROW *)
mov eax, JDIMENSION [output_col(eax)]
mov ecx, DCTSIZE/4 ; ctr
alignx 16, 7
ALIGNX 16, 7
.rowloop:
; -- Even part
@@ -816,7 +816,7 @@ EXTN(jsimd_idct_islow_mmx):
punpckldq mm7, mm5 ; mm7=(20 21 22 23 24 25 26 27)
punpckhdq mm4, mm5 ; mm4=(30 31 32 33 34 35 36 37)
pushpic ebx ; save GOT address
PUSHPIC ebx ; save GOT address
mov edx, JSAMPROW [edi+0*SIZEOF_JSAMPROW]
mov ebx, JSAMPROW [edi+1*SIZEOF_JSAMPROW]
@@ -827,7 +827,7 @@ EXTN(jsimd_idct_islow_mmx):
movq MMWORD [edx+eax*SIZEOF_JSAMPLE], mm7
movq MMWORD [ebx+eax*SIZEOF_JSAMPLE], mm4
poppic ebx ; restore GOT address
POPPIC ebx ; restore GOT address
add esi, byte 4*SIZEOF_JCOEF ; wsptr
add edi, byte 4*SIZEOF_JSAMPROW

View File

@@ -2,7 +2,7 @@
; jidctint.asm - accurate integer IDCT (SSE2)
;
; Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
; Copyright (C) 2016, 2020, D. R. Commander.
; Copyright (C) 2016, 2020, 2024, D. R. Commander.
;
; Based on the x86 SIMD extension for IJG JPEG library
; Copyright (C) 1999-2006, MIYASAKA Masaru.
@@ -63,7 +63,7 @@ F_3_072 equ DESCALE(3299298341, 30 - CONST_BITS) ; FIX(3.072711026)
; --------------------------------------------------------------------------
SECTION SEG_CONST
alignz 32
ALIGNZ 32
GLOBAL_DATA(jconst_idct_islow_sse2)
EXTN(jconst_idct_islow_sse2):
@@ -80,7 +80,7 @@ PD_DESCALE_P1 times 4 dd 1 << (DESCALE_P1 - 1)
PD_DESCALE_P2 times 4 dd 1 << (DESCALE_P2 - 1)
PB_CENTERJSAMP times 16 db CENTERJSAMPLE
alignz 32
ALIGNZ 32
; --------------------------------------------------------------------------
SECTION SEG_TEXT
@@ -114,13 +114,13 @@ EXTN(jsimd_idct_islow_sse2):
mov [esp], eax
mov ebp, esp ; ebp = aligned ebp
lea esp, [wk(0)]
pushpic ebx
PUSHPIC ebx
; push ecx ; unused
; push edx ; need not be preserved
push esi
push edi
get_GOT ebx ; get GOT address
GET_GOT ebx ; get GOT address
; ---- Pass 1: process columns from input.
@@ -172,7 +172,7 @@ EXTN(jsimd_idct_islow_sse2):
movdqa XMMWORD [wk(10)], xmm3 ; wk(10)=col5
movdqa XMMWORD [wk(11)], xmm4 ; wk(11)=col7
jmp near .column_end
alignx 16, 7
ALIGNX 16, 7
%endif
.columnDCT:
@@ -847,7 +847,7 @@ EXTN(jsimd_idct_islow_sse2):
pop esi
; pop edx ; need not be preserved
; pop ecx ; unused
poppic ebx
POPPIC ebx
mov esp, ebp ; esp <- aligned ebp
pop esp ; esp <- original ebp
pop ebp

View File

@@ -2,7 +2,7 @@
; jidctred.asm - reduced-size IDCT (MMX)
;
; Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
; Copyright (C) 2016, D. R. Commander.
; Copyright (C) 2016, 2024, D. R. Commander.
;
; Based on the x86 SIMD extension for IJG JPEG library
; Copyright (C) 1999-2006, MIYASAKA Masaru.
@@ -69,7 +69,7 @@ F_3_624 equ DESCALE(3891787747, 30 - CONST_BITS) ; FIX(3.624509785)
; --------------------------------------------------------------------------
SECTION SEG_CONST
alignz 32
ALIGNZ 32
GLOBAL_DATA(jconst_idct_red_mmx)
EXTN(jconst_idct_red_mmx):
@@ -87,7 +87,7 @@ PD_DESCALE_P1_2 times 2 dd 1 << (DESCALE_P1_2 - 1)
PD_DESCALE_P2_2 times 2 dd 1 << (DESCALE_P2_2 - 1)
PB_CENTERJSAMP times 8 db CENTERJSAMPLE
alignz 32
ALIGNZ 32
; --------------------------------------------------------------------------
SECTION SEG_TEXT
@@ -124,13 +124,13 @@ EXTN(jsimd_idct_4x4_mmx):
mov [esp], eax
mov ebp, esp ; ebp = aligned ebp
lea esp, [workspace]
pushpic ebx
PUSHPIC ebx
; push ecx ; need not be preserved
; push edx ; need not be preserved
push esi
push edi
get_GOT ebx ; get GOT address
GET_GOT ebx ; get GOT address
; ---- Pass 1: process columns from input, store into work array.
@@ -139,7 +139,7 @@ EXTN(jsimd_idct_4x4_mmx):
mov esi, JCOEFPTR [coef_block(eax)] ; inptr
lea edi, [workspace] ; JCOEF *wsptr
mov ecx, DCTSIZE/4 ; ctr
alignx 16, 7
ALIGNX 16, 7
.columnloop:
%ifndef NO_ZERO_COLUMN_TEST_4X4_MMX
mov eax, dword [DWBLOCK(1,0,esi,SIZEOF_JCOEF)]
@@ -181,7 +181,7 @@ EXTN(jsimd_idct_4x4_mmx):
movq MMWORD [MMBLOCK(2,0,edi,SIZEOF_JCOEF)], mm2
movq MMWORD [MMBLOCK(3,0,edi,SIZEOF_JCOEF)], mm3
jmp near .nextcolumn
alignx 16, 7
ALIGNX 16, 7
%endif
.columnDCT:
@@ -479,7 +479,7 @@ EXTN(jsimd_idct_4x4_mmx):
pop esi
; pop edx ; need not be preserved
; pop ecx ; need not be preserved
poppic ebx
POPPIC ebx
mov esp, ebp ; esp <- aligned ebp
pop esp ; esp <- original ebp
pop ebp
@@ -512,7 +512,7 @@ EXTN(jsimd_idct_2x2_mmx):
push esi
push edi
get_GOT ebx ; get GOT address
GET_GOT ebx ; get GOT address
; ---- Pass 1: process columns from input.

View File

@@ -2,7 +2,7 @@
; jidctred.asm - reduced-size IDCT (SSE2)
;
; Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
; Copyright (C) 2016, D. R. Commander.
; Copyright (C) 2016, 2024, D. R. Commander.
;
; Based on the x86 SIMD extension for IJG JPEG library
; Copyright (C) 1999-2006, MIYASAKA Masaru.
@@ -69,7 +69,7 @@ F_3_624 equ DESCALE(3891787747, 30 - CONST_BITS) ; FIX(3.624509785)
; --------------------------------------------------------------------------
SECTION SEG_CONST
alignz 32
ALIGNZ 32
GLOBAL_DATA(jconst_idct_red_sse2)
EXTN(jconst_idct_red_sse2):
@@ -87,7 +87,7 @@ PD_DESCALE_P1_2 times 4 dd 1 << (DESCALE_P1_2 - 1)
PD_DESCALE_P2_2 times 4 dd 1 << (DESCALE_P2_2 - 1)
PB_CENTERJSAMP times 16 db CENTERJSAMPLE
alignz 32
ALIGNZ 32
; --------------------------------------------------------------------------
SECTION SEG_TEXT
@@ -122,13 +122,13 @@ EXTN(jsimd_idct_4x4_sse2):
mov [esp], eax
mov ebp, esp ; ebp = aligned ebp
lea esp, [wk(0)]
pushpic ebx
PUSHPIC ebx
; push ecx ; unused
; push edx ; need not be preserved
push esi
push edi
get_GOT ebx ; get GOT address
GET_GOT ebx ; get GOT address
; ---- Pass 1: process columns from input.
@@ -171,7 +171,7 @@ EXTN(jsimd_idct_4x4_sse2):
pshufd xmm3, xmm3, 0xFA ; xmm3=[col6 col7]=(06 06 06 06 07 07 07 07)
jmp near .column_end
alignx 16, 7
ALIGNX 16, 7
%endif
.columnDCT:
@@ -400,7 +400,7 @@ EXTN(jsimd_idct_4x4_sse2):
pop esi
; pop edx ; need not be preserved
; pop ecx ; unused
poppic ebx
POPPIC ebx
mov esp, ebp ; esp <- aligned ebp
pop esp ; esp <- original ebp
pop ebp
@@ -433,7 +433,7 @@ EXTN(jsimd_idct_2x2_sse2):
push esi
push edi
get_GOT ebx ; get GOT address
GET_GOT ebx ; get GOT address
; ---- Pass 1: process columns from input.

View File

@@ -2,7 +2,7 @@
; jquant.asm - sample data conversion and quantization (3DNow! & MMX)
;
; Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
; Copyright (C) 2016, D. R. Commander.
; Copyright (C) 2016, 2024, D. R. Commander.
;
; Based on the x86 SIMD extension for IJG JPEG library
; Copyright (C) 1999-2006, MIYASAKA Masaru.
@@ -52,7 +52,7 @@ EXTN(jsimd_convsamp_float_3dnow):
mov eax, JDIMENSION [start_col]
mov edi, POINTER [workspace] ; (DCTELEM *)
mov ecx, DCTSIZE/2
alignx 16, 7
ALIGNX 16, 7
.convloop:
mov ebx, JSAMPROW [esi+0*SIZEOF_JSAMPROW] ; (JSAMPLE *)
mov edx, JSAMPROW [esi+1*SIZEOF_JSAMPROW] ; (JSAMPLE *)
@@ -154,7 +154,7 @@ EXTN(jsimd_quantize_float_3dnow):
mov edx, POINTER [divisors]
mov edi, JCOEFPTR [coef_block]
mov eax, DCTSIZE2/16
alignx 16, 7
ALIGNX 16, 7
.quantloop:
movq mm0, MMWORD [MMBLOCK(0,0,esi,SIZEOF_FAST_FLOAT)]
movq mm1, MMWORD [MMBLOCK(0,1,esi,SIZEOF_FAST_FLOAT)]

View File

@@ -2,7 +2,7 @@
; jquant.asm - sample data conversion and quantization (MMX)
;
; Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
; Copyright (C) 2016, D. R. Commander.
; Copyright (C) 2016, 2024, D. R. Commander.
;
; Based on the x86 SIMD extension for IJG JPEG library
; Copyright (C) 1999-2006, MIYASAKA Masaru.
@@ -52,7 +52,7 @@ EXTN(jsimd_convsamp_mmx):
mov eax, JDIMENSION [start_col]
mov edi, POINTER [workspace] ; (DCTELEM *)
mov ecx, DCTSIZE/4
alignx 16, 7
ALIGNX 16, 7
.convloop:
mov ebx, JSAMPROW [esi+0*SIZEOF_JSAMPROW] ; (JSAMPLE *)
mov edx, JSAMPROW [esi+1*SIZEOF_JSAMPROW] ; (JSAMPLE *)
@@ -157,10 +157,10 @@ EXTN(jsimd_quantize_mmx):
mov edx, POINTER [divisors]
mov edi, JCOEFPTR [coef_block]
mov ah, 2
alignx 16, 7
ALIGNX 16, 7
.quantloop1:
mov al, DCTSIZE2/8/2
alignx 16, 7
ALIGNX 16, 7
.quantloop2:
movq mm2, MMWORD [MMBLOCK(0,0,esi,SIZEOF_DCTELEM)]
movq mm3, MMWORD [MMBLOCK(0,1,esi,SIZEOF_DCTELEM)]

View File

@@ -2,7 +2,7 @@
; jquant.asm - sample data conversion and quantization (SSE & MMX)
;
; Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
; Copyright (C) 2016, D. R. Commander.
; Copyright (C) 2016, 2024, D. R. Commander.
;
; Based on the x86 SIMD extension for IJG JPEG library
; Copyright (C) 1999-2006, MIYASAKA Masaru.
@@ -52,7 +52,7 @@ EXTN(jsimd_convsamp_float_sse):
mov eax, JDIMENSION [start_col]
mov edi, POINTER [workspace] ; (DCTELEM *)
mov ecx, DCTSIZE/2
alignx 16, 7
ALIGNX 16, 7
.convloop:
mov ebx, JSAMPROW [esi+0*SIZEOF_JSAMPROW] ; (JSAMPLE *)
mov edx, JSAMPROW [esi+1*SIZEOF_JSAMPROW] ; (JSAMPLE *)
@@ -150,7 +150,7 @@ EXTN(jsimd_quantize_float_sse):
mov edx, POINTER [divisors]
mov edi, JCOEFPTR [coef_block]
mov eax, DCTSIZE2/16
alignx 16, 7
ALIGNX 16, 7
.quantloop:
movaps xmm0, XMMWORD [XMMBLOCK(0,0,esi,SIZEOF_FAST_FLOAT)]
movaps xmm1, XMMWORD [XMMBLOCK(0,1,esi,SIZEOF_FAST_FLOAT)]

View File

@@ -2,7 +2,7 @@
; jquantf.asm - sample data conversion and quantization (SSE & SSE2)
;
; Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
; Copyright (C) 2016, D. R. Commander.
; Copyright (C) 2016, 2024, D. R. Commander.
;
; Based on the x86 SIMD extension for IJG JPEG library
; Copyright (C) 1999-2006, MIYASAKA Masaru.
@@ -52,7 +52,7 @@ EXTN(jsimd_convsamp_float_sse2):
mov eax, JDIMENSION [start_col]
mov edi, POINTER [workspace] ; (DCTELEM *)
mov ecx, DCTSIZE/2
alignx 16, 7
ALIGNX 16, 7
.convloop:
mov ebx, JSAMPROW [esi+0*SIZEOF_JSAMPROW] ; (JSAMPLE *)
mov edx, JSAMPROW [esi+1*SIZEOF_JSAMPROW] ; (JSAMPLE *)
@@ -127,7 +127,7 @@ EXTN(jsimd_quantize_float_sse2):
mov edx, POINTER [divisors]
mov edi, JCOEFPTR [coef_block]
mov eax, DCTSIZE2/16
alignx 16, 7
ALIGNX 16, 7
.quantloop:
movaps xmm0, XMMWORD [XMMBLOCK(0,0,esi,SIZEOF_FAST_FLOAT)]
movaps xmm1, XMMWORD [XMMBLOCK(0,1,esi,SIZEOF_FAST_FLOAT)]

View File

@@ -2,7 +2,7 @@
; jquanti.asm - sample data conversion and quantization (SSE2)
;
; Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
; Copyright (C) 2016, D. R. Commander.
; Copyright (C) 2016, 2024, D. R. Commander.
;
; Based on the x86 SIMD extension for IJG JPEG library
; Copyright (C) 1999-2006, MIYASAKA Masaru.
@@ -52,7 +52,7 @@ EXTN(jsimd_convsamp_sse2):
mov eax, JDIMENSION [start_col]
mov edi, POINTER [workspace] ; (DCTELEM *)
mov ecx, DCTSIZE/4
alignx 16, 7
ALIGNX 16, 7
.convloop:
mov ebx, JSAMPROW [esi+0*SIZEOF_JSAMPROW] ; (JSAMPLE *)
mov edx, JSAMPROW [esi+1*SIZEOF_JSAMPROW] ; (JSAMPLE *)
@@ -133,7 +133,7 @@ EXTN(jsimd_quantize_sse2):
mov edx, POINTER [divisors]
mov edi, JCOEFPTR [coef_block]
mov eax, DCTSIZE2/32
alignx 16, 7
ALIGNX 16, 7
.quantloop:
movdqa xmm4, XMMWORD [XMMBLOCK(0,0,esi,SIZEOF_DCTELEM)]
movdqa xmm5, XMMWORD [XMMBLOCK(1,0,esi,SIZEOF_DCTELEM)]

View File

@@ -2,7 +2,7 @@
; jsimdext.inc - common declarations
;
; Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
; Copyright (C) 2010, 2016, 2018-2019, D. R. Commander.
; Copyright (C) 2010, 2016, 2018-2019, 2024, D. R. Commander.
; Copyright (C) 2018, Matthieu Darbois.
; Copyright (C) 2018, Matthias Räncker.
; Copyright (C) 2023, Aliaksiej Kandracienka.
@@ -272,7 +272,7 @@ const_base:
%define GOTOFF(got, sym) (got) + (sym) - const_base
%imacro get_GOT 1
%imacro GET_GOT 1
; NOTE: this macro destroys ecx resister.
call %%geteip
add ecx, byte (%%ref - $)
@@ -304,7 +304,7 @@ const_base:
%define GOTOFF(got, sym) (got) + (sym) wrt ..gotoff
%imacro get_GOT 1
%imacro GET_GOT 1
extern GOT_SYMBOL
call %%geteip
add %1, GOT_SYMBOL + $$ - $ wrt ..gotpc
@@ -317,13 +317,13 @@ const_base:
%endif ; GOT_SYMBOL == _MACHO_PIC_ ----------------
%imacro pushpic 1.nolist
%imacro PUSHPIC 1.nolist
push %1
%endmacro
%imacro poppic 1.nolist
%imacro POPPIC 1.nolist
pop %1
%endmacro
%imacro movpic 2.nolist
%imacro MOVPIC 2.nolist
mov %1, %2
%endmacro
@@ -331,13 +331,13 @@ const_base:
%define GOTOFF(got, sym) (sym)
%imacro get_GOT 1.nolist
%imacro GET_GOT 1.nolist
%endmacro
%imacro pushpic 1.nolist
%imacro PUSHPIC 1.nolist
%endmacro
%imacro poppic 1.nolist
%imacro POPPIC 1.nolist
%endmacro
%imacro movpic 2.nolist
%imacro MOVPIC 2.nolist
%endmacro
%endif ; PIC -----------------------------------------
@@ -349,7 +349,7 @@ const_base:
%define MSKLE(x, y) (~(((y) & 0xFFFF) - ((x) & 0xFFFF)) >> 16)
%define FILLB(b, n) (($$-(b)) & ((n)-1))
%imacro alignx 1-2.nolist 0xFFFF
%imacro ALIGNX 1-2.nolist 0xFFFF
%%bs: \
times MSKLE(FILLB(%%bs, %1), %2) & MSKLE(16, FILLB($, %1)) & FILLB($, %1) \
db 0x90 ; nop
@@ -371,7 +371,7 @@ const_base:
; Align the next data on {2,4,8,16,..}-byte boundary.
;
%imacro alignz 1.nolist
%imacro ALIGNZ 1.nolist
align %1, db 0 ; filling zeros
%endmacro
@@ -379,7 +379,7 @@ const_base:
%ifdef WIN64
%imacro collect_args 1
%imacro COLLECT_ARGS 1
sub rsp, SIZEOF_XMMWORD
movaps XMMWORD [rsp], xmm6
sub rsp, SIZEOF_XMMWORD
@@ -408,7 +408,7 @@ const_base:
push rdi
%endmacro
%imacro uncollect_args 1
%imacro UNCOLLECT_ARGS 1
pop rdi
pop rsi
%if %1 > 5
@@ -429,7 +429,7 @@ const_base:
add rsp, SIZEOF_XMMWORD
%endmacro
%imacro push_xmm 1
%imacro PUSH_XMM 1
sub rsp, %1 * SIZEOF_XMMWORD
movaps XMMWORD [rsp+0*SIZEOF_XMMWORD], xmm8
%if %1 > 1
@@ -443,7 +443,7 @@ const_base:
%endif
%endmacro
%imacro pop_xmm 1
%imacro POP_XMM 1
movaps xmm8, XMMWORD [rsp+0*SIZEOF_XMMWORD]
%if %1 > 1
movaps xmm9, XMMWORD [rsp+1*SIZEOF_XMMWORD]
@@ -459,7 +459,7 @@ const_base:
%else
%imacro collect_args 1
%imacro COLLECT_ARGS 1
push r10
mov r10, rdi
%if %1 > 1
@@ -484,7 +484,7 @@ const_base:
%endif
%endmacro
%imacro uncollect_args 1
%imacro UNCOLLECT_ARGS 1
%if %1 > 5
pop r15
%endif
@@ -503,10 +503,10 @@ const_base:
pop r10
%endmacro
%imacro push_xmm 1
%imacro PUSH_XMM 1
%endmacro
%imacro pop_xmm 1
%imacro POP_XMM 1
%endmacro
%endif

View File

@@ -1,7 +1,7 @@
;
; jccolext.asm - colorspace conversion (64-bit AVX2)
;
; Copyright (C) 2009, 2016, D. R. Commander.
; Copyright (C) 2009, 2016, 2024, D. R. Commander.
; Copyright (C) 2015, Intel Corporation.
; Copyright (C) 2018, Matthias Räncker.
; Copyright (C) 2023, Aliaksiej Kandracienka.
@@ -48,7 +48,7 @@ EXTN(jsimd_rgb_ycc_convert_avx2):
; Allocate stack space for wk array. r15 is used to access it.
mov r15, rsp
sub rsp, (SIZEOF_YMMWORD * WK_NUM)
collect_args 5
COLLECT_ARGS 5
push rbx
mov ecx, r10d
@@ -549,7 +549,7 @@ EXTN(jsimd_rgb_ycc_convert_avx2):
.return:
pop rbx
vzeroupper
uncollect_args 5
UNCOLLECT_ARGS 5
lea rsp, [rbp-8]
pop r15
pop rbp

View File

@@ -1,7 +1,7 @@
;
; jccolext.asm - colorspace conversion (64-bit SSE2)
;
; Copyright (C) 2009, 2016, D. R. Commander.
; Copyright (C) 2009, 2016, 2024, D. R. Commander.
; Copyright (C) 2018, Matthias Räncker.
; Copyright (C) 2023, Aliaksiej Kandracienka.
;
@@ -47,7 +47,7 @@ EXTN(jsimd_rgb_ycc_convert_sse2):
; Allocate stack space for wk array. r15 is used to access it.
mov r15, rsp
sub rsp, (SIZEOF_XMMWORD * WK_NUM)
collect_args 5
COLLECT_ARGS 5
push rbx
mov ecx, r10d
@@ -474,7 +474,7 @@ EXTN(jsimd_rgb_ycc_convert_sse2):
.return:
pop rbx
uncollect_args 5
UNCOLLECT_ARGS 5
lea rsp, [rbp-8]
pop r15
pop rbp

View File

@@ -1,7 +1,7 @@
;
; jccolor.asm - colorspace conversion (64-bit AVX2)
;
; Copyright (C) 2009, 2016, D. R. Commander.
; Copyright (C) 2009, 2016, 2024, D. R. Commander.
; Copyright (C) 2015, Intel Corporation.
;
; Based on the x86 SIMD extension for IJG JPEG library
@@ -33,7 +33,7 @@ F_0_337 equ (F_0_587 - F_0_250) ; FIX(0.58700) - FIX(0.25000)
; --------------------------------------------------------------------------
SECTION SEG_CONST
alignz 32
ALIGNZ 32
GLOBAL_DATA(jconst_rgb_ycc_convert_avx2)
EXTN(jconst_rgb_ycc_convert_avx2):
@@ -46,7 +46,7 @@ PD_ONEHALFM1_CJ times 8 dd (1 << (SCALEBITS - 1)) - 1 + \
(CENTERJSAMPLE << SCALEBITS)
PD_ONEHALF times 8 dd (1 << (SCALEBITS - 1))
alignz 32
ALIGNZ 32
; --------------------------------------------------------------------------
SECTION SEG_TEXT

View File

@@ -1,7 +1,7 @@
;
; jccolor.asm - colorspace conversion (64-bit SSE2)
;
; Copyright (C) 2009, 2016, D. R. Commander.
; Copyright (C) 2009, 2016, 2024, D. R. Commander.
;
; Based on the x86 SIMD extension for IJG JPEG library
; Copyright (C) 1999-2006, MIYASAKA Masaru.
@@ -32,7 +32,7 @@ F_0_337 equ (F_0_587 - F_0_250) ; FIX(0.58700) - FIX(0.25000)
; --------------------------------------------------------------------------
SECTION SEG_CONST
alignz 32
ALIGNZ 32
GLOBAL_DATA(jconst_rgb_ycc_convert_sse2)
EXTN(jconst_rgb_ycc_convert_sse2):
@@ -45,7 +45,7 @@ PD_ONEHALFM1_CJ times 4 dd (1 << (SCALEBITS - 1)) - 1 + \
(CENTERJSAMPLE << SCALEBITS)
PD_ONEHALF times 4 dd (1 << (SCALEBITS - 1))
alignz 32
ALIGNZ 32
; --------------------------------------------------------------------------
SECTION SEG_TEXT

View File

@@ -1,7 +1,7 @@
;
; jcgray.asm - grayscale colorspace conversion (64-bit AVX2)
;
; Copyright (C) 2011, 2016, D. R. Commander.
; Copyright (C) 2011, 2016, 2024, D. R. Commander.
; Copyright (C) 2015, Intel Corporation.
;
; Based on the x86 SIMD extension for IJG JPEG library
@@ -29,7 +29,7 @@ F_0_337 equ (F_0_587 - F_0_250) ; FIX(0.58700) - FIX(0.25000)
; --------------------------------------------------------------------------
SECTION SEG_CONST
alignz 32
ALIGNZ 32
GLOBAL_DATA(jconst_rgb_gray_convert_avx2)
EXTN(jconst_rgb_gray_convert_avx2):
@@ -38,7 +38,7 @@ PW_F0299_F0337 times 8 dw F_0_299, F_0_337
PW_F0114_F0250 times 8 dw F_0_114, F_0_250
PD_ONEHALF times 8 dd (1 << (SCALEBITS - 1))
alignz 32
ALIGNZ 32
; --------------------------------------------------------------------------
SECTION SEG_TEXT

View File

@@ -1,7 +1,7 @@
;
; jcgray.asm - grayscale colorspace conversion (64-bit SSE2)
;
; Copyright (C) 2011, 2016, D. R. Commander.
; Copyright (C) 2011, 2016, 2024, D. R. Commander.
;
; Based on the x86 SIMD extension for IJG JPEG library
; Copyright (C) 1999-2006, MIYASAKA Masaru.
@@ -28,7 +28,7 @@ F_0_337 equ (F_0_587 - F_0_250) ; FIX(0.58700) - FIX(0.25000)
; --------------------------------------------------------------------------
SECTION SEG_CONST
alignz 32
ALIGNZ 32
GLOBAL_DATA(jconst_rgb_gray_convert_sse2)
EXTN(jconst_rgb_gray_convert_sse2):
@@ -37,7 +37,7 @@ PW_F0299_F0337 times 4 dw F_0_299, F_0_337
PW_F0114_F0250 times 4 dw F_0_114, F_0_250
PD_ONEHALF times 4 dd (1 << (SCALEBITS - 1))
alignz 32
ALIGNZ 32
; --------------------------------------------------------------------------
SECTION SEG_TEXT

View File

@@ -1,7 +1,7 @@
;
; jcgryext.asm - grayscale colorspace conversion (64-bit AVX2)
;
; Copyright (C) 2011, 2016, D. R. Commander.
; Copyright (C) 2011, 2016, 2024, D. R. Commander.
; Copyright (C) 2015, Intel Corporation.
; Copyright (C) 2018, Matthias Räncker.
; Copyright (C) 2023, Aliaksiej Kandracienka.
@@ -48,7 +48,7 @@ EXTN(jsimd_rgb_gray_convert_avx2):
; Allocate stack space for wk array. r15 is used to access it.
mov r15, rsp
sub rsp, byte (SIZEOF_YMMWORD * WK_NUM)
collect_args 5
COLLECT_ARGS 5
push rbx
mov ecx, r10d
@@ -428,7 +428,7 @@ EXTN(jsimd_rgb_gray_convert_avx2):
.return:
pop rbx
vzeroupper
uncollect_args 5
UNCOLLECT_ARGS 5
lea rsp, [rbp-8]
pop r15
pop rbp

View File

@@ -1,7 +1,7 @@
;
; jcgryext.asm - grayscale colorspace conversion (64-bit SSE2)
;
; Copyright (C) 2011, 2016, D. R. Commander.
; Copyright (C) 2011, 2016, 2024, D. R. Commander.
; Copyright (C) 2018, Matthias Räncker.
; Copyright (C) 2023, Aliaksiej Kandracienka.
;
@@ -47,7 +47,7 @@ EXTN(jsimd_rgb_gray_convert_sse2):
; Allocate stack space for wk array. r15 is used to access it.
mov r15, rsp
sub rsp, byte (SIZEOF_XMMWORD * WK_NUM)
collect_args 5
COLLECT_ARGS 5
push rbx
mov ecx, r10d
@@ -353,7 +353,7 @@ EXTN(jsimd_rgb_gray_convert_sse2):
.return:
pop rbx
uncollect_args 5
UNCOLLECT_ARGS 5
lea rsp, [rbp-8]
pop r15
pop rbp

View File

@@ -39,7 +39,7 @@ endstruc
; --------------------------------------------------------------------------
SECTION SEG_CONST
alignz 32
ALIGNZ 32
GLOBAL_DATA(jconst_huff_encode_one_block)
EXTN(jconst_huff_encode_one_block):
@@ -49,7 +49,7 @@ jpeg_mask_bits dd 0x0000, 0x0001, 0x0003, 0x0007
dd 0x00ff, 0x01ff, 0x03ff, 0x07ff
dd 0x0fff, 0x1fff, 0x3fff, 0x7fff
alignz 32
ALIGNZ 32
times 1 << 14 db 15
times 1 << 13 db 14
@@ -87,7 +87,7 @@ times 1 << 13 db 14
times 1 << 14 db 15
times 1 << 15 db 16
alignz 32
ALIGNZ 32
%define NBITS(x) nbits_base + x
%define MASK_BITS(x) NBITS((x) * 4) + (jpeg_mask_bits - EXTN(jpeg_nbits_table))

View File

@@ -4,6 +4,7 @@
;
; Copyright (C) 2016, 2018, Matthieu Darbois
; Copyright (C) 2023, Aliaksiej Kandracienka.
; Copyright (C) 2024, D. R. Commander.
;
; Based on the x86 SIMD extension for IJG JPEG library
; Copyright (C) 1999-2006, MIYASAKA Masaru.
@@ -287,7 +288,7 @@ EXTN(jsimd_encode_mcu_AC_first_prepare_sse2):
and rsp, byte (-SIZEOF_XMMWORD) ; align to 128 bits
sub rsp, SIZEOF_XMMWORD
movdqa XMMWORD [rsp], ZERO
collect_args 6
COLLECT_ARGS 6
movd AL, r13d
pxor ZERO, ZERO
@@ -381,7 +382,7 @@ EXTN(jsimd_encode_mcu_AC_first_prepare_sse2):
REDUCE0
uncollect_args 6
UNCOLLECT_ARGS 6
movdqa ZERO, XMMWORD [rsp]
mov rsp, rbp
pop rbp
@@ -450,7 +451,7 @@ EXTN(jsimd_encode_mcu_AC_refine_prepare_sse2):
and rsp, byte (-SIZEOF_XMMWORD) ; align to 128 bits
sub rsp, SIZEOF_XMMWORD
movdqa XMMWORD [rsp], ZERO
collect_args 6
COLLECT_ARGS 6
xor SIGN, SIGN
xor EOB, EOB
@@ -598,7 +599,7 @@ EXTN(jsimd_encode_mcu_AC_refine_prepare_sse2):
REDUCE0
mov eax, EOB
uncollect_args 6
UNCOLLECT_ARGS 6
movdqa ZERO, XMMWORD [rsp]
mov rsp, rbp
pop rbp

View File

@@ -2,7 +2,7 @@
; jcsample.asm - downsampling (64-bit AVX2)
;
; Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
; Copyright (C) 2009, 2016, D. R. Commander.
; Copyright (C) 2009, 2016, 2024, D. R. Commander.
; Copyright (C) 2015, Intel Corporation.
; Copyright (C) 2018, Matthias Räncker.
;
@@ -46,7 +46,7 @@
EXTN(jsimd_h2v1_downsample_avx2):
push rbp
mov rbp, rsp
collect_args 6
COLLECT_ARGS 6
mov ecx, r13d
shl rcx, 3 ; imul rcx,DCTSIZE (rcx = output_cols)
@@ -177,7 +177,7 @@ EXTN(jsimd_h2v1_downsample_avx2):
.return:
vzeroupper
uncollect_args 6
UNCOLLECT_ARGS 6
pop rbp
ret
@@ -207,7 +207,7 @@ EXTN(jsimd_h2v1_downsample_avx2):
EXTN(jsimd_h2v2_downsample_avx2):
push rbp
mov rbp, rsp
collect_args 6
COLLECT_ARGS 6
mov ecx, r13d
shl rcx, 3 ; imul rcx,DCTSIZE (rcx = output_cols)
@@ -356,7 +356,7 @@ EXTN(jsimd_h2v2_downsample_avx2):
.return:
vzeroupper
uncollect_args 6
UNCOLLECT_ARGS 6
pop rbp
ret

View File

@@ -2,7 +2,7 @@
; jcsample.asm - downsampling (64-bit SSE2)
;
; Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
; Copyright (C) 2009, 2016, D. R. Commander.
; Copyright (C) 2009, 2016, 2024, D. R. Commander.
; Copyright (C) 2018, Matthias Räncker.
;
; Based on the x86 SIMD extension for IJG JPEG library
@@ -45,7 +45,7 @@
EXTN(jsimd_h2v1_downsample_sse2):
push rbp
mov rbp, rsp
collect_args 6
COLLECT_ARGS 6
mov ecx, r13d
shl rcx, 3 ; imul rcx,DCTSIZE (rcx = output_cols)
@@ -159,7 +159,7 @@ EXTN(jsimd_h2v1_downsample_sse2):
jg near .rowloop
.return:
uncollect_args 6
UNCOLLECT_ARGS 6
pop rbp
ret
@@ -189,7 +189,7 @@ EXTN(jsimd_h2v1_downsample_sse2):
EXTN(jsimd_h2v2_downsample_sse2):
push rbp
mov rbp, rsp
collect_args 6
COLLECT_ARGS 6
mov ecx, r13d
shl rcx, 3 ; imul rcx,DCTSIZE (rcx = output_cols)
@@ -319,7 +319,7 @@ EXTN(jsimd_h2v2_downsample_sse2):
jg near .rowloop
.return:
uncollect_args 6
UNCOLLECT_ARGS 6
pop rbp
ret

View File

@@ -2,7 +2,7 @@
; jdcolext.asm - colorspace conversion (64-bit AVX2)
;
; Copyright 2009, 2012 Pierre Ossman <ossman@cendio.se> for Cendio AB
; Copyright (C) 2009, 2012, 2016, D. R. Commander.
; Copyright (C) 2009, 2012, 2016, 2024, D. R. Commander.
; Copyright (C) 2015, Intel Corporation.
; Copyright (C) 2018, Matthias Räncker.
; Copyright (C) 2023, Aliaksiej Kandracienka.
@@ -49,7 +49,7 @@ EXTN(jsimd_ycc_rgb_convert_avx2):
; Allocate stack space for wk array. r15 is used to access it.
mov r15, rsp
sub rsp, byte (WK_NUM * SIZEOF_YMMWORD)
collect_args 5
COLLECT_ARGS 5
push rbx
mov ecx, r10d ; num_cols
@@ -486,7 +486,7 @@ EXTN(jsimd_ycc_rgb_convert_avx2):
.return:
pop rbx
vzeroupper
uncollect_args 5
UNCOLLECT_ARGS 5
lea rsp, [rbp-8]
pop r15
pop rbp

View File

@@ -2,7 +2,7 @@
; jdcolext.asm - colorspace conversion (64-bit SSE2)
;
; Copyright 2009, 2012 Pierre Ossman <ossman@cendio.se> for Cendio AB
; Copyright (C) 2009, 2012, 2016, D. R. Commander.
; Copyright (C) 2009, 2012, 2016, 2024, D. R. Commander.
; Copyright (C) 2018, Matthias Räncker.
; Copyright (C) 2023, Aliaksiej Kandracienka.
;
@@ -48,7 +48,7 @@ EXTN(jsimd_ycc_rgb_convert_sse2):
; Allocate stack space for wk array. r15 is used to access it.
mov r15, rsp
sub rsp, byte (SIZEOF_XMMWORD * WK_NUM)
collect_args 5
COLLECT_ARGS 5
push rbx
mov ecx, r10d ; num_cols
@@ -429,7 +429,7 @@ EXTN(jsimd_ycc_rgb_convert_sse2):
.return:
pop rbx
uncollect_args 5
UNCOLLECT_ARGS 5
lea rsp, [rbp-8]
pop r15
pop rbp

View File

@@ -2,7 +2,7 @@
; jdcolor.asm - colorspace conversion (64-bit AVX2)
;
; Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
; Copyright (C) 2009, 2016, D. R. Commander.
; Copyright (C) 2009, 2016, 2024, D. R. Commander.
; Copyright (C) 2015, Intel Corporation.
;
; Based on the x86 SIMD extension for IJG JPEG library
@@ -32,7 +32,7 @@ F_0_228 equ (131072 - F_1_772) ; FIX(2) - FIX(1.77200)
; --------------------------------------------------------------------------
SECTION SEG_CONST
alignz 32
ALIGNZ 32
GLOBAL_DATA(jconst_ycc_rgb_convert_avx2)
EXTN(jconst_ycc_rgb_convert_avx2):
@@ -43,7 +43,7 @@ PW_MF0344_F0285 times 8 dw -F_0_344, F_0_285
PW_ONE times 16 dw 1
PD_ONEHALF times 8 dd 1 << (SCALEBITS - 1)
alignz 32
ALIGNZ 32
; --------------------------------------------------------------------------
SECTION SEG_TEXT

View File

@@ -2,7 +2,7 @@
; jdcolor.asm - colorspace conversion (64-bit SSE2)
;
; Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
; Copyright (C) 2009, 2016, D. R. Commander.
; Copyright (C) 2009, 2016, 2024, D. R. Commander.
;
; Based on the x86 SIMD extension for IJG JPEG library
; Copyright (C) 1999-2006, MIYASAKA Masaru.
@@ -31,7 +31,7 @@ F_0_228 equ (131072 - F_1_772) ; FIX(2) - FIX(1.77200)
; --------------------------------------------------------------------------
SECTION SEG_CONST
alignz 32
ALIGNZ 32
GLOBAL_DATA(jconst_ycc_rgb_convert_sse2)
EXTN(jconst_ycc_rgb_convert_sse2):
@@ -42,7 +42,7 @@ PW_MF0344_F0285 times 4 dw -F_0_344, F_0_285
PW_ONE times 8 dw 1
PD_ONEHALF times 4 dd 1 << (SCALEBITS - 1)
alignz 32
ALIGNZ 32
; --------------------------------------------------------------------------
SECTION SEG_TEXT

View File

@@ -2,7 +2,7 @@
; jdmerge.asm - merged upsampling/color conversion (64-bit AVX2)
;
; Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
; Copyright (C) 2009, 2016, D. R. Commander.
; Copyright (C) 2009, 2016, 2024, D. R. Commander.
; Copyright (C) 2015, Intel Corporation.
;
; Based on the x86 SIMD extension for IJG JPEG library
@@ -32,7 +32,7 @@ F_0_228 equ (131072 - F_1_772) ; FIX(2) - FIX(1.77200)
; --------------------------------------------------------------------------
SECTION SEG_CONST
alignz 32
ALIGNZ 32
GLOBAL_DATA(jconst_merged_upsample_avx2)
EXTN(jconst_merged_upsample_avx2):
@@ -43,7 +43,7 @@ PW_MF0344_F0285 times 8 dw -F_0_344, F_0_285
PW_ONE times 16 dw 1
PD_ONEHALF times 8 dd 1 << (SCALEBITS - 1)
alignz 32
ALIGNZ 32
; --------------------------------------------------------------------------
SECTION SEG_TEXT

View File

@@ -2,7 +2,7 @@
; jdmerge.asm - merged upsampling/color conversion (64-bit SSE2)
;
; Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
; Copyright (C) 2009, 2016, D. R. Commander.
; Copyright (C) 2009, 2016, 2024, D. R. Commander.
;
; Based on the x86 SIMD extension for IJG JPEG library
; Copyright (C) 1999-2006, MIYASAKA Masaru.
@@ -31,7 +31,7 @@ F_0_228 equ (131072 - F_1_772) ; FIX(2) - FIX(1.77200)
; --------------------------------------------------------------------------
SECTION SEG_CONST
alignz 32
ALIGNZ 32
GLOBAL_DATA(jconst_merged_upsample_sse2)
EXTN(jconst_merged_upsample_sse2):
@@ -42,7 +42,7 @@ PW_MF0344_F0285 times 4 dw -F_0_344, F_0_285
PW_ONE times 8 dw 1
PD_ONEHALF times 4 dd 1 << (SCALEBITS - 1)
alignz 32
ALIGNZ 32
; --------------------------------------------------------------------------
SECTION SEG_TEXT

View File

@@ -2,7 +2,7 @@
; jdmrgext.asm - merged upsampling/color conversion (64-bit AVX2)
;
; Copyright 2009, 2012 Pierre Ossman <ossman@cendio.se> for Cendio AB
; Copyright (C) 2009, 2012, 2016, D. R. Commander.
; Copyright (C) 2009, 2012, 2016, 2024, D. R. Commander.
; Copyright (C) 2015, Intel Corporation.
; Copyright (C) 2018, Matthias Räncker.
; Copyright (C) 2023, Aliaksiej Kandracienka.
@@ -49,7 +49,7 @@ EXTN(jsimd_h2v1_merged_upsample_avx2):
; Allocate stack space for wk array. r15 is used to access it.
mov r15, rsp
sub rsp, SIZEOF_YMMWORD * WK_NUM
collect_args 4
COLLECT_ARGS 4
push rbx
mov ecx, r10d ; col
@@ -480,7 +480,7 @@ EXTN(jsimd_h2v1_merged_upsample_avx2):
.return:
pop rbx
vzeroupper
uncollect_args 4
UNCOLLECT_ARGS 4
lea rsp, [rbp-8]
pop r15
pop rbp
@@ -508,7 +508,7 @@ EXTN(jsimd_h2v1_merged_upsample_avx2):
EXTN(jsimd_h2v2_merged_upsample_avx2):
push rbp
mov rbp, rsp
collect_args 4
COLLECT_ARGS 4
push rbx
mov eax, r10d
@@ -587,7 +587,7 @@ EXTN(jsimd_h2v2_merged_upsample_avx2):
add rsp, SIZEOF_JSAMPARRAY*4
pop rbx
uncollect_args 4
UNCOLLECT_ARGS 4
pop rbp
ret

View File

@@ -2,7 +2,7 @@
; jdmrgext.asm - merged upsampling/color conversion (64-bit SSE2)
;
; Copyright 2009, 2012 Pierre Ossman <ossman@cendio.se> for Cendio AB
; Copyright (C) 2009, 2012, 2016, D. R. Commander.
; Copyright (C) 2009, 2012, 2016, 2024, D. R. Commander.
; Copyright (C) 2018, Matthias Räncker.
; Copyright (C) 2023, Aliaksiej Kandracienka.
;
@@ -48,7 +48,7 @@ EXTN(jsimd_h2v1_merged_upsample_sse2):
; Allocate stack space for wk array. r15 is used to access it.
mov r15, rsp
sub rsp, byte (SIZEOF_XMMWORD * WK_NUM)
collect_args 4
COLLECT_ARGS 4
push rbx
mov ecx, r10d ; col
@@ -422,7 +422,7 @@ EXTN(jsimd_h2v1_merged_upsample_sse2):
.return:
pop rbx
uncollect_args 4
UNCOLLECT_ARGS 4
lea rsp, [rbp-8]
pop r15
pop rbp
@@ -450,7 +450,7 @@ EXTN(jsimd_h2v1_merged_upsample_sse2):
EXTN(jsimd_h2v2_merged_upsample_sse2):
push rbp
mov rbp, rsp
collect_args 4
COLLECT_ARGS 4
push rbx
mov eax, r10d
@@ -529,7 +529,7 @@ EXTN(jsimd_h2v2_merged_upsample_sse2):
add rsp, SIZEOF_JSAMPARRAY*4
pop rbx
uncollect_args 4
UNCOLLECT_ARGS 4
pop rbp
ret

View File

@@ -2,7 +2,7 @@
; jdsample.asm - upsampling (64-bit AVX2)
;
; Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
; Copyright (C) 2009, 2016, D. R. Commander.
; Copyright (C) 2009, 2016, 2024, D. R. Commander.
; Copyright (C) 2015, Intel Corporation.
; Copyright (C) 2018, Matthias Räncker.
; Copyright (C) 2023, Aliaksiej Kandracienka.
@@ -22,7 +22,7 @@
; --------------------------------------------------------------------------
SECTION SEG_CONST
alignz 32
ALIGNZ 32
GLOBAL_DATA(jconst_fancy_upsample_avx2)
EXTN(jconst_fancy_upsample_avx2):
@@ -33,7 +33,7 @@ PW_THREE times 16 dw 3
PW_SEVEN times 16 dw 7
PW_EIGHT times 16 dw 8
alignz 32
ALIGNZ 32
; --------------------------------------------------------------------------
SECTION SEG_TEXT
@@ -64,8 +64,8 @@ PW_EIGHT times 16 dw 8
EXTN(jsimd_h2v1_fancy_upsample_avx2):
push rbp
mov rbp, rsp
push_xmm 3
collect_args 4
PUSH_XMM 3
COLLECT_ARGS 4
mov eax, r11d ; colctr
test rax, rax
@@ -186,8 +186,8 @@ EXTN(jsimd_h2v1_fancy_upsample_avx2):
.return:
vzeroupper
uncollect_args 4
pop_xmm 3
UNCOLLECT_ARGS 4
POP_XMM 3
pop rbp
ret
@@ -222,8 +222,8 @@ EXTN(jsimd_h2v2_fancy_upsample_avx2):
; Allocate stack space for wk array. r15 is used to access it.
mov r15, rsp
sub rsp, (SIZEOF_YMMWORD * WK_NUM)
push_xmm 3
collect_args 4
PUSH_XMM 3
COLLECT_ARGS 4
push rbx
mov eax, r11d ; colctr
@@ -498,8 +498,8 @@ EXTN(jsimd_h2v2_fancy_upsample_avx2):
.return:
pop rbx
vzeroupper
uncollect_args 4
pop_xmm 3
UNCOLLECT_ARGS 4
POP_XMM 3
lea rsp, [rbp-8]
pop r15
pop rbp
@@ -526,7 +526,7 @@ EXTN(jsimd_h2v2_fancy_upsample_avx2):
EXTN(jsimd_h2v1_upsample_avx2):
push rbp
mov rbp, rsp
collect_args 4
COLLECT_ARGS 4
mov edx, r11d
add rdx, byte (SIZEOF_YMMWORD-1)
@@ -589,7 +589,7 @@ EXTN(jsimd_h2v1_upsample_avx2):
.return:
vzeroupper
uncollect_args 4
UNCOLLECT_ARGS 4
pop rbp
ret
@@ -614,7 +614,7 @@ EXTN(jsimd_h2v1_upsample_avx2):
EXTN(jsimd_h2v2_upsample_avx2):
push rbp
mov rbp, rsp
collect_args 4
COLLECT_ARGS 4
push rbx
mov edx, r11d
@@ -685,7 +685,7 @@ EXTN(jsimd_h2v2_upsample_avx2):
.return:
pop rbx
vzeroupper
uncollect_args 4
UNCOLLECT_ARGS 4
pop rbp
ret

View File

@@ -2,7 +2,7 @@
; jdsample.asm - upsampling (64-bit SSE2)
;
; Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
; Copyright (C) 2009, 2016, D. R. Commander.
; Copyright (C) 2009, 2016, 2024, D. R. Commander.
; Copyright (C) 2018, Matthias Räncker.
; Copyright (C) 2023, Aliaksiej Kandracienka.
;
@@ -21,7 +21,7 @@
; --------------------------------------------------------------------------
SECTION SEG_CONST
alignz 32
ALIGNZ 32
GLOBAL_DATA(jconst_fancy_upsample_sse2)
EXTN(jconst_fancy_upsample_sse2):
@@ -32,7 +32,7 @@ PW_THREE times 8 dw 3
PW_SEVEN times 8 dw 7
PW_EIGHT times 8 dw 8
alignz 32
ALIGNZ 32
; --------------------------------------------------------------------------
SECTION SEG_TEXT
@@ -63,7 +63,7 @@ PW_EIGHT times 8 dw 8
EXTN(jsimd_h2v1_fancy_upsample_sse2):
push rbp
mov rbp, rsp
collect_args 4
COLLECT_ARGS 4
mov eax, r11d ; colctr
test rax, rax
@@ -174,7 +174,7 @@ EXTN(jsimd_h2v1_fancy_upsample_sse2):
jg near .rowloop
.return:
uncollect_args 4
UNCOLLECT_ARGS 4
pop rbp
ret
@@ -209,7 +209,7 @@ EXTN(jsimd_h2v2_fancy_upsample_sse2):
; Allocate stack space for wk array. r15 is used to access it.
mov r15, rsp
sub rsp, byte (SIZEOF_XMMWORD * WK_NUM)
collect_args 4
COLLECT_ARGS 4
push rbx
mov eax, r11d ; colctr
@@ -472,7 +472,7 @@ EXTN(jsimd_h2v2_fancy_upsample_sse2):
.return:
pop rbx
uncollect_args 4
UNCOLLECT_ARGS 4
lea rsp, [rbp-8]
pop r15
pop rbp
@@ -499,7 +499,7 @@ EXTN(jsimd_h2v2_fancy_upsample_sse2):
EXTN(jsimd_h2v1_upsample_sse2):
push rbp
mov rbp, rsp
collect_args 4
COLLECT_ARGS 4
mov edx, r11d
add rdx, byte (2*SIZEOF_XMMWORD)-1
@@ -560,7 +560,7 @@ EXTN(jsimd_h2v1_upsample_sse2):
jg short .rowloop
.return:
uncollect_args 4
UNCOLLECT_ARGS 4
pop rbp
ret
@@ -585,7 +585,7 @@ EXTN(jsimd_h2v1_upsample_sse2):
EXTN(jsimd_h2v2_upsample_sse2):
push rbp
mov rbp, rsp
collect_args 4
COLLECT_ARGS 4
push rbx
mov edx, r11d
@@ -654,7 +654,7 @@ EXTN(jsimd_h2v2_upsample_sse2):
.return:
pop rbx
uncollect_args 4
UNCOLLECT_ARGS 4
pop rbp
ret

View File

@@ -2,7 +2,7 @@
; jfdctflt.asm - floating-point FDCT (64-bit SSE)
;
; Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
; Copyright (C) 2009, 2016, D. R. Commander.
; Copyright (C) 2009, 2016, 2024, D. R. Commander.
; Copyright (C) 2023, Aliaksiej Kandracienka.
;
; Based on the x86 SIMD extension for IJG JPEG library
@@ -35,7 +35,7 @@
; --------------------------------------------------------------------------
SECTION SEG_CONST
alignz 32
ALIGNZ 32
GLOBAL_DATA(jconst_fdct_float_sse)
EXTN(jconst_fdct_float_sse):
@@ -45,7 +45,7 @@ PD_0_707 times 4 dd 0.707106781186547524400844
PD_0_541 times 4 dd 0.541196100146196984399723
PD_1_306 times 4 dd 1.306562964876376527856643
alignz 32
ALIGNZ 32
; --------------------------------------------------------------------------
SECTION SEG_TEXT
@@ -73,7 +73,7 @@ EXTN(jsimd_fdct_float_sse):
; Allocate stack space for wk array. r15 is used to access it.
mov r15, rsp
sub rsp, byte (SIZEOF_XMMWORD * WK_NUM)
collect_args 1
COLLECT_ARGS 1
; ---- Pass 1: process rows.
@@ -345,7 +345,7 @@ EXTN(jsimd_fdct_float_sse):
dec rcx
jnz near .columnloop
uncollect_args 1
UNCOLLECT_ARGS 1
lea rsp, [rbp-8]
pop r15
pop rbp

View File

@@ -2,7 +2,7 @@
; jfdctfst.asm - fast integer FDCT (64-bit SSE2)
;
; Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
; Copyright (C) 2009, 2016, D. R. Commander.
; Copyright (C) 2009, 2016, 2024, D. R. Commander.
; Copyright (C) 2023, Aliaksiej Kandracienka.
;
; Based on the x86 SIMD extension for IJG JPEG library
@@ -50,7 +50,7 @@ F_1_306 equ DESCALE(1402911301, 30 - CONST_BITS) ; FIX(1.306562965)
%define PRE_MULTIPLY_SCALE_BITS 2
%define CONST_SHIFT (16 - PRE_MULTIPLY_SCALE_BITS - CONST_BITS)
alignz 32
ALIGNZ 32
GLOBAL_DATA(jconst_fdct_ifast_sse2)
EXTN(jconst_fdct_ifast_sse2):
@@ -60,7 +60,7 @@ PW_F0382 times 8 dw F_0_382 << CONST_SHIFT
PW_F0541 times 8 dw F_0_541 << CONST_SHIFT
PW_F1306 times 8 dw F_1_306 << CONST_SHIFT
alignz 32
ALIGNZ 32
; --------------------------------------------------------------------------
SECTION SEG_TEXT
@@ -88,7 +88,7 @@ EXTN(jsimd_fdct_ifast_sse2):
; Allocate stack space for wk array. r15 is used to access it.
mov r15, rsp
sub rsp, byte (SIZEOF_XMMWORD * WK_NUM)
collect_args 1
COLLECT_ARGS 1
; ---- Pass 1: process rows.
@@ -379,7 +379,7 @@ EXTN(jsimd_fdct_ifast_sse2):
movdqa XMMWORD [XMMBLOCK(5,0,rdx,SIZEOF_DCTELEM)], xmm6
movdqa XMMWORD [XMMBLOCK(1,0,rdx,SIZEOF_DCTELEM)], xmm2
uncollect_args 1
UNCOLLECT_ARGS 1
lea rsp, [rbp-8]
pop r15
pop rbp

View File

@@ -2,7 +2,7 @@
; jfdctint.asm - accurate integer FDCT (64-bit AVX2)
;
; Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
; Copyright (C) 2009, 2016, 2018, 2020, D. R. Commander.
; Copyright (C) 2009, 2016, 2018, 2020, 2024, D. R. Commander.
;
; Based on the x86 SIMD extension for IJG JPEG library
; Copyright (C) 1999-2006, MIYASAKA Masaru.
@@ -65,7 +65,7 @@ F_3_072 equ DESCALE(3299298341, 30 - CONST_BITS) ; FIX(3.072711026)
; %1-%4: Input/output registers
; %5-%8: Temp registers
%macro dotranspose 8
%macro DOTRANSPOSE 8
; %1=(00 01 02 03 04 05 06 07 40 41 42 43 44 45 46 47)
; %2=(10 11 12 13 14 15 16 17 50 51 52 53 54 55 56 57)
; %3=(20 21 22 23 24 25 26 27 60 61 62 63 64 65 66 67)
@@ -108,7 +108,7 @@ F_3_072 equ DESCALE(3299298341, 30 - CONST_BITS) ; FIX(3.072711026)
; %5-%8: Temp registers
; %9: Pass (1 or 2)
%macro dodct 9
%macro DODCT 9
vpsubw %5, %1, %4 ; %5=data1_0-data6_7=tmp6_7
vpaddw %6, %1, %4 ; %6=data1_0+data6_7=tmp1_0
vpaddw %7, %2, %3 ; %7=data3_2+data4_5=tmp3_2
@@ -223,7 +223,7 @@ F_3_072 equ DESCALE(3299298341, 30 - CONST_BITS) ; FIX(3.072711026)
; --------------------------------------------------------------------------
SECTION SEG_CONST
alignz 32
ALIGNZ 32
GLOBAL_DATA(jconst_fdct_islow_avx2)
EXTN(jconst_fdct_islow_avx2):
@@ -242,7 +242,7 @@ PW_DESCALE_P2X times 16 dw 1 << (PASS1_BITS - 1)
PW_1_NEG1 times 8 dw 1
times 8 dw -1
alignz 32
ALIGNZ 32
; --------------------------------------------------------------------------
SECTION SEG_TEXT
@@ -262,7 +262,7 @@ PW_1_NEG1 times 8 dw 1
EXTN(jsimd_fdct_islow_avx2):
push rbp
mov rbp, rsp
collect_args 1
COLLECT_ARGS 1
; ---- Pass 1: process rows.
@@ -284,9 +284,9 @@ EXTN(jsimd_fdct_islow_avx2):
; ymm2=(20 21 22 23 24 25 26 27 60 61 62 63 64 65 66 67)
; ymm3=(30 31 32 33 34 35 36 37 70 71 72 73 74 75 76 77)
dotranspose ymm0, ymm1, ymm2, ymm3, ymm4, ymm5, ymm6, ymm7
DOTRANSPOSE ymm0, ymm1, ymm2, ymm3, ymm4, ymm5, ymm6, ymm7
dodct ymm0, ymm1, ymm2, ymm3, ymm4, ymm5, ymm6, ymm7, 1
DODCT ymm0, ymm1, ymm2, ymm3, ymm4, ymm5, ymm6, ymm7, 1
; ymm0=data0_4, ymm1=data3_1, ymm2=data2_6, ymm3=data7_5
; ---- Pass 2: process columns.
@@ -294,9 +294,9 @@ EXTN(jsimd_fdct_islow_avx2):
vperm2i128 ymm4, ymm1, ymm3, 0x20 ; ymm4=data3_7
vperm2i128 ymm1, ymm1, ymm3, 0x31 ; ymm1=data1_5
dotranspose ymm0, ymm1, ymm2, ymm4, ymm3, ymm5, ymm6, ymm7
DOTRANSPOSE ymm0, ymm1, ymm2, ymm4, ymm3, ymm5, ymm6, ymm7
dodct ymm0, ymm1, ymm2, ymm4, ymm3, ymm5, ymm6, ymm7, 2
DODCT ymm0, ymm1, ymm2, ymm4, ymm3, ymm5, ymm6, ymm7, 2
; ymm0=data0_4, ymm1=data3_1, ymm2=data2_6, ymm4=data7_5
vperm2i128 ymm3, ymm0, ymm1, 0x30 ; ymm3=data0_1
@@ -310,7 +310,7 @@ EXTN(jsimd_fdct_islow_avx2):
vmovdqu YMMWORD [YMMBLOCK(6,0,r10,SIZEOF_DCTELEM)], ymm7
vzeroupper
uncollect_args 1
UNCOLLECT_ARGS 1
pop rbp
ret

View File

@@ -2,7 +2,7 @@
; jfdctint.asm - accurate integer FDCT (64-bit SSE2)
;
; Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
; Copyright (C) 2009, 2016, 2020, D. R. Commander.
; Copyright (C) 2009, 2016, 2020, 2024, D. R. Commander.
; Copyright (C) 2023, Aliaksiej Kandracienka.
;
; Based on the x86 SIMD extension for IJG JPEG library
@@ -64,7 +64,7 @@ F_3_072 equ DESCALE(3299298341, 30 - CONST_BITS) ; FIX(3.072711026)
; --------------------------------------------------------------------------
SECTION SEG_CONST
alignz 32
ALIGNZ 32
GLOBAL_DATA(jconst_fdct_islow_sse2)
EXTN(jconst_fdct_islow_sse2):
@@ -81,7 +81,7 @@ PD_DESCALE_P1 times 4 dd 1 << (DESCALE_P1 - 1)
PD_DESCALE_P2 times 4 dd 1 << (DESCALE_P2 - 1)
PW_DESCALE_P2X times 8 dw 1 << (PASS1_BITS - 1)
alignz 32
ALIGNZ 32
; --------------------------------------------------------------------------
SECTION SEG_TEXT
@@ -109,7 +109,7 @@ EXTN(jsimd_fdct_islow_sse2):
; Allocate stack space for wk array. r15 is used to access it.
mov r15, rsp
sub rsp, byte (SIZEOF_XMMWORD * WK_NUM)
collect_args 1
COLLECT_ARGS 1
; ---- Pass 1: process rows.
@@ -609,7 +609,7 @@ EXTN(jsimd_fdct_islow_sse2):
movdqa XMMWORD [XMMBLOCK(5,0,rdx,SIZEOF_DCTELEM)], xmm1
movdqa XMMWORD [XMMBLOCK(3,0,rdx,SIZEOF_DCTELEM)], xmm3
uncollect_args 1
UNCOLLECT_ARGS 1
lea rsp, [rbp-8]
pop r15
pop rbp

View File

@@ -2,7 +2,7 @@
; jidctflt.asm - floating-point IDCT (64-bit SSE & SSE2)
;
; Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
; Copyright (C) 2009, 2016, D. R. Commander.
; Copyright (C) 2009, 2016, 2024, D. R. Commander.
; Copyright (C) 2018, Matthias Räncker.
; Copyright (C) 2023, Aliaksiej Kandracienka.
;
@@ -25,18 +25,18 @@
; --------------------------------------------------------------------------
%macro unpcklps2 2 ; %1=(0 1 2 3) / %2=(4 5 6 7) => %1=(0 1 4 5)
%macro UNPCKLPS2 2 ; %1=(0 1 2 3) / %2=(4 5 6 7) => %1=(0 1 4 5)
shufps %1, %2, 0x44
%endmacro
%macro unpckhps2 2 ; %1=(0 1 2 3) / %2=(4 5 6 7) => %1=(2 3 6 7)
%macro UNPCKHPS2 2 ; %1=(0 1 2 3) / %2=(4 5 6 7) => %1=(2 3 6 7)
shufps %1, %2, 0xEE
%endmacro
; --------------------------------------------------------------------------
SECTION SEG_CONST
alignz 32
ALIGNZ 32
GLOBAL_DATA(jconst_idct_float_sse2)
EXTN(jconst_idct_float_sse2):
@@ -48,7 +48,7 @@ PD_M2_613 times 4 dd -2.613125929752753055713286
PD_RNDINT_MAGIC times 4 dd 100663296.0 ; (float)(0x00C00000 << 3)
PB_CENTERJSAMP times 16 db CENTERJSAMPLE
alignz 32
ALIGNZ 32
; --------------------------------------------------------------------------
SECTION SEG_TEXT
@@ -83,7 +83,7 @@ EXTN(jsimd_idct_float_sse2):
; Allocate stack space for wk array. r15 is used to access it.
mov r15, rsp
lea rsp, [workspace]
collect_args 4
COLLECT_ARGS 4
push rbx
; ---- Pass 1: process columns from input, store into work array.
@@ -280,11 +280,11 @@ EXTN(jsimd_idct_float_sse2):
unpckhps xmm4, xmm0 ; xmm4=(42 52 43 53)
movaps xmm3, xmm6 ; transpose coefficients(phase 2)
unpcklps2 xmm6, xmm7 ; xmm6=(00 10 20 30)
unpckhps2 xmm3, xmm7 ; xmm3=(01 11 21 31)
UNPCKLPS2 xmm6, xmm7 ; xmm6=(00 10 20 30)
UNPCKHPS2 xmm3, xmm7 ; xmm3=(01 11 21 31)
movaps xmm0, xmm1 ; transpose coefficients(phase 2)
unpcklps2 xmm1, xmm2 ; xmm1=(02 12 22 32)
unpckhps2 xmm0, xmm2 ; xmm0=(03 13 23 33)
UNPCKLPS2 xmm1, xmm2 ; xmm1=(02 12 22 32)
UNPCKHPS2 xmm0, xmm2 ; xmm0=(03 13 23 33)
movaps xmm7, XMMWORD [wk(0)] ; xmm7=(60 70 61 71)
movaps xmm2, XMMWORD [wk(1)] ; xmm2=(62 72 63 73)
@@ -295,11 +295,11 @@ EXTN(jsimd_idct_float_sse2):
movaps XMMWORD [XMMBLOCK(3,0,rdi,SIZEOF_FAST_FLOAT)], xmm0
movaps xmm6, xmm5 ; transpose coefficients(phase 2)
unpcklps2 xmm5, xmm7 ; xmm5=(40 50 60 70)
unpckhps2 xmm6, xmm7 ; xmm6=(41 51 61 71)
UNPCKLPS2 xmm5, xmm7 ; xmm5=(40 50 60 70)
UNPCKHPS2 xmm6, xmm7 ; xmm6=(41 51 61 71)
movaps xmm3, xmm4 ; transpose coefficients(phase 2)
unpcklps2 xmm4, xmm2 ; xmm4=(42 52 62 72)
unpckhps2 xmm3, xmm2 ; xmm3=(43 53 63 73)
UNPCKLPS2 xmm4, xmm2 ; xmm4=(42 52 62 72)
UNPCKHPS2 xmm3, xmm2 ; xmm3=(43 53 63 73)
movaps XMMWORD [XMMBLOCK(0,1,rdi,SIZEOF_FAST_FLOAT)], xmm5
movaps XMMWORD [XMMBLOCK(1,1,rdi,SIZEOF_FAST_FLOAT)], xmm6
@@ -470,7 +470,7 @@ EXTN(jsimd_idct_float_sse2):
jnz near .rowloop
pop rbx
uncollect_args 4
UNCOLLECT_ARGS 4
lea rsp, [rbp-8]
pop r15
pop rbp

View File

@@ -2,7 +2,7 @@
; jidctfst.asm - fast integer IDCT (64-bit SSE2)
;
; Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
; Copyright (C) 2009, 2016, D. R. Commander.
; Copyright (C) 2009, 2016, 2024, D. R. Commander.
; Copyright (C) 2018, Matthias Räncker.
; Copyright (C) 2023, Aliaksiej Kandracienka.
;
@@ -58,7 +58,7 @@ F_1_613 equ (F_2_613 - (1 << CONST_BITS)) ; FIX(2.613125930) - FIX(1)
%define PRE_MULTIPLY_SCALE_BITS 2
%define CONST_SHIFT (16 - PRE_MULTIPLY_SCALE_BITS - CONST_BITS)
alignz 32
ALIGNZ 32
GLOBAL_DATA(jconst_idct_ifast_sse2)
EXTN(jconst_idct_ifast_sse2):
@@ -69,7 +69,7 @@ PW_MF1613 times 8 dw -F_1_613 << CONST_SHIFT
PW_F1082 times 8 dw F_1_082 << CONST_SHIFT
PB_CENTERJSAMP times 16 db CENTERJSAMPLE
alignz 32
ALIGNZ 32
; --------------------------------------------------------------------------
SECTION SEG_TEXT
@@ -102,7 +102,7 @@ EXTN(jsimd_idct_ifast_sse2):
; Allocate stack space for wk array. r15 is used to access it.
mov r15, rsp
sub rsp, byte (SIZEOF_XMMWORD * WK_NUM)
collect_args 4
COLLECT_ARGS 4
; ---- Pass 1: process columns from input.
@@ -478,7 +478,7 @@ EXTN(jsimd_idct_ifast_sse2):
movq XMM_MMWORD [rdx+rax*SIZEOF_JSAMPLE], xmm6
movq XMM_MMWORD [rsi+rax*SIZEOF_JSAMPLE], xmm2
uncollect_args 4
UNCOLLECT_ARGS 4
lea rsp, [rbp-8]
pop r15
pop rbp

View File

@@ -2,7 +2,7 @@
; jidctint.asm - accurate integer IDCT (64-bit AVX2)
;
; Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
; Copyright (C) 2009, 2016, 2018, 2020, D. R. Commander.
; Copyright (C) 2009, 2016, 2018, 2020, 2024, D. R. Commander.
; Copyright (C) 2018, Matthias Räncker.
;
; Based on the x86 SIMD extension for IJG JPEG library
@@ -66,7 +66,7 @@ F_3_072 equ DESCALE(3299298341, 30 - CONST_BITS) ; FIX(3.072711026)
; %1-%4: Input/output registers
; %5-%8: Temp registers
%macro dotranspose 8
%macro DOTRANSPOSE 8
; %5=(00 10 20 30 40 50 60 70 01 11 21 31 41 51 61 71)
; %6=(03 13 23 33 43 53 63 73 02 12 22 32 42 52 62 72)
; %7=(04 14 24 34 44 54 64 74 05 15 25 35 45 55 65 75)
@@ -119,7 +119,7 @@ F_3_072 equ DESCALE(3299298341, 30 - CONST_BITS) ; FIX(3.072711026)
; %5-%12: Temp registers
; %9: Pass (1 or 2)
%macro dodct 13
%macro DODCT 13
; -- Even part
; (Original)
@@ -241,7 +241,7 @@ F_3_072 equ DESCALE(3299298341, 30 - CONST_BITS) ; FIX(3.072711026)
; --------------------------------------------------------------------------
SECTION SEG_CONST
alignz 32
ALIGNZ 32
GLOBAL_DATA(jconst_idct_islow_avx2)
EXTN(jconst_idct_islow_avx2):
@@ -260,7 +260,7 @@ PB_CENTERJSAMP times 32 db CENTERJSAMPLE
PW_1_NEG1 times 8 dw 1
times 8 dw -1
alignz 32
ALIGNZ 32
; --------------------------------------------------------------------------
SECTION SEG_TEXT
@@ -284,8 +284,8 @@ PW_1_NEG1 times 8 dw 1
EXTN(jsimd_idct_islow_avx2):
push rbp
mov rbp, rsp ; rbp = aligned rbp
push_xmm 4
collect_args 4
PUSH_XMM 4
COLLECT_ARGS 4
; ---- Pass 1: process columns.
@@ -342,10 +342,10 @@ EXTN(jsimd_idct_islow_avx2):
vperm2i128 ymm2, ymm5, ymm7, 0x20 ; ymm2=in2_6
vperm2i128 ymm3, ymm7, ymm6, 0x31 ; ymm3=in7_5
dodct ymm0, ymm1, ymm2, ymm3, ymm4, ymm5, ymm6, ymm7, ymm8, ymm9, ymm10, ymm11, 1
DODCT ymm0, ymm1, ymm2, ymm3, ymm4, ymm5, ymm6, ymm7, ymm8, ymm9, ymm10, ymm11, 1
; ymm0=data0_1, ymm1=data3_2, ymm2=data4_5, ymm3=data7_6
dotranspose ymm0, ymm1, ymm2, ymm3, ymm4, ymm5, ymm6, ymm7
DOTRANSPOSE ymm0, ymm1, ymm2, ymm3, ymm4, ymm5, ymm6, ymm7
; ymm0=data0_4, ymm1=data1_5, ymm2=data2_6, ymm3=data3_7
.column_end:
@@ -362,10 +362,10 @@ EXTN(jsimd_idct_islow_avx2):
vperm2i128 ymm4, ymm3, ymm1, 0x31 ; ymm3=in7_5
vperm2i128 ymm1, ymm3, ymm1, 0x20 ; ymm1=in3_1
dodct ymm0, ymm1, ymm2, ymm4, ymm3, ymm5, ymm6, ymm7, ymm8, ymm9, ymm10, ymm11, 2
DODCT ymm0, ymm1, ymm2, ymm4, ymm3, ymm5, ymm6, ymm7, ymm8, ymm9, ymm10, ymm11, 2
; ymm0=data0_1, ymm1=data3_2, ymm2=data4_5, ymm4=data7_6
dotranspose ymm0, ymm1, ymm2, ymm4, ymm3, ymm5, ymm6, ymm7
DOTRANSPOSE ymm0, ymm1, ymm2, ymm4, ymm3, ymm5, ymm6, ymm7
; ymm0=data0_4, ymm1=data1_5, ymm2=data2_6, ymm4=data3_7
vpacksswb ymm0, ymm0, ymm1 ; ymm0=data01_45
@@ -407,8 +407,8 @@ EXTN(jsimd_idct_islow_avx2):
movq XMM_MMWORD [rdx+rax*SIZEOF_JSAMPLE], xmm6
movq XMM_MMWORD [rsi+rax*SIZEOF_JSAMPLE], xmm7
uncollect_args 4
pop_xmm 4
UNCOLLECT_ARGS 4
POP_XMM 4
pop rbp
ret

View File

@@ -2,7 +2,7 @@
; jidctint.asm - accurate integer IDCT (64-bit SSE2)
;
; Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
; Copyright (C) 2009, 2016, 2020, D. R. Commander.
; Copyright (C) 2009, 2016, 2020, 2024, D. R. Commander.
; Copyright (C) 2018, Matthias Räncker.
; Copyright (C) 2023, Aliaksiej Kandracienka.
;
@@ -65,7 +65,7 @@ F_3_072 equ DESCALE(3299298341, 30 - CONST_BITS) ; FIX(3.072711026)
; --------------------------------------------------------------------------
SECTION SEG_CONST
alignz 32
ALIGNZ 32
GLOBAL_DATA(jconst_idct_islow_sse2)
EXTN(jconst_idct_islow_sse2):
@@ -82,7 +82,7 @@ PD_DESCALE_P1 times 4 dd 1 << (DESCALE_P1 - 1)
PD_DESCALE_P2 times 4 dd 1 << (DESCALE_P2 - 1)
PB_CENTERJSAMP times 16 db CENTERJSAMPLE
alignz 32
ALIGNZ 32
; --------------------------------------------------------------------------
SECTION SEG_TEXT
@@ -115,7 +115,7 @@ EXTN(jsimd_idct_islow_sse2):
; Allocate stack space for wk array. r15 is used to access it.
mov r15, rsp
sub rsp, (SIZEOF_XMMWORD * WK_NUM)
collect_args 4
COLLECT_ARGS 4
; ---- Pass 1: process columns from input.
@@ -835,7 +835,7 @@ EXTN(jsimd_idct_islow_sse2):
movq XMM_MMWORD [rdx+rax*SIZEOF_JSAMPLE], xmm2
movq XMM_MMWORD [rsi+rax*SIZEOF_JSAMPLE], xmm5
uncollect_args 4
UNCOLLECT_ARGS 4
lea rsp, [rbp-8]
pop r15
pop rbp

View File

@@ -2,7 +2,7 @@
; jidctred.asm - reduced-size IDCT (64-bit SSE2)
;
; Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
; Copyright (C) 2009, 2016, D. R. Commander.
; Copyright (C) 2009, 2016, 2024, D. R. Commander.
; Copyright (C) 2018, Matthias Räncker.
; Copyright (C) 2023, Aliaksiej Kandracienka.
;
@@ -71,7 +71,7 @@ F_3_624 equ DESCALE(3891787747, 30 - CONST_BITS) ; FIX(3.624509785)
; --------------------------------------------------------------------------
SECTION SEG_CONST
alignz 32
ALIGNZ 32
GLOBAL_DATA(jconst_idct_red_sse2)
EXTN(jconst_idct_red_sse2):
@@ -89,7 +89,7 @@ PD_DESCALE_P1_2 times 4 dd 1 << (DESCALE_P1_2 - 1)
PD_DESCALE_P2_2 times 4 dd 1 << (DESCALE_P2_2 - 1)
PB_CENTERJSAMP times 16 db CENTERJSAMPLE
alignz 32
ALIGNZ 32
; --------------------------------------------------------------------------
SECTION SEG_TEXT
@@ -123,7 +123,7 @@ EXTN(jsimd_idct_4x4_sse2):
; Allocate stack space for wk array. r15 is used to access it.
mov r15, rsp
sub rsp, byte (SIZEOF_XMMWORD * WK_NUM)
collect_args 4
COLLECT_ARGS 4
; ---- Pass 1: process columns from input.
@@ -388,7 +388,7 @@ EXTN(jsimd_idct_4x4_sse2):
movd XMM_DWORD [rdx+rax*SIZEOF_JSAMPLE], xmm1
movd XMM_DWORD [rsi+rax*SIZEOF_JSAMPLE], xmm3
uncollect_args 4
UNCOLLECT_ARGS 4
lea rsp, [rbp-8]
pop r15
pop rbp
@@ -415,7 +415,7 @@ EXTN(jsimd_idct_4x4_sse2):
EXTN(jsimd_idct_2x2_sse2):
push rbp
mov rbp, rsp
collect_args 4
COLLECT_ARGS 4
push rbx
; ---- Pass 1: process columns from input.
@@ -563,7 +563,7 @@ EXTN(jsimd_idct_2x2_sse2):
mov word [rsi+rax*SIZEOF_JSAMPLE], cx
pop rbx
uncollect_args 4
UNCOLLECT_ARGS 4
pop rbp
ret

View File

@@ -2,7 +2,7 @@
; jquantf.asm - sample data conversion and quantization (64-bit SSE & SSE2)
;
; Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
; Copyright (C) 2009, 2016, D. R. Commander.
; Copyright (C) 2009, 2016, 2024, D. R. Commander.
; Copyright (C) 2018, Matthias Räncker.
;
; Based on the x86 SIMD extension for IJG JPEG library
@@ -39,7 +39,7 @@
EXTN(jsimd_convsamp_float_sse2):
push rbp
mov rbp, rsp
collect_args 3
COLLECT_ARGS 3
push rbx
pcmpeqw xmm7, xmm7
@@ -88,7 +88,7 @@ EXTN(jsimd_convsamp_float_sse2):
jnz short .convloop
pop rbx
uncollect_args 3
UNCOLLECT_ARGS 3
pop rbp
ret
@@ -111,7 +111,7 @@ EXTN(jsimd_convsamp_float_sse2):
EXTN(jsimd_quantize_float_sse2):
push rbp
mov rbp, rsp
collect_args 3
COLLECT_ARGS 3
mov rsi, r12
mov rdx, r11
@@ -144,7 +144,7 @@ EXTN(jsimd_quantize_float_sse2):
dec rax
jnz short .quantloop
uncollect_args 3
UNCOLLECT_ARGS 3
pop rbp
ret

View File

@@ -2,7 +2,7 @@
; jquanti.asm - sample data conversion and quantization (64-bit AVX2)
;
; Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
; Copyright (C) 2009, 2016, 2018, D. R. Commander.
; Copyright (C) 2009, 2016, 2018, 2024, D. R. Commander.
; Copyright (C) 2016, Matthieu Darbois.
; Copyright (C) 2018, Matthias Räncker.
;
@@ -40,7 +40,7 @@
EXTN(jsimd_convsamp_avx2):
push rbp
mov rbp, rsp
collect_args 3
COLLECT_ARGS 3
mov eax, r11d
@@ -83,7 +83,7 @@ EXTN(jsimd_convsamp_avx2):
vmovdqu YMMWORD [YMMBLOCK(6,0,r12,SIZEOF_DCTELEM)], ymm3
vzeroupper
uncollect_args 3
UNCOLLECT_ARGS 3
pop rbp
ret
@@ -117,7 +117,7 @@ EXTN(jsimd_convsamp_avx2):
EXTN(jsimd_quantize_avx2):
push rbp
mov rbp, rsp
collect_args 3
COLLECT_ARGS 3
vmovdqu ymm4, [YMMBLOCK(0,0,r12,SIZEOF_DCTELEM)]
vmovdqu ymm5, [YMMBLOCK(2,0,r12,SIZEOF_DCTELEM)]
@@ -152,7 +152,7 @@ EXTN(jsimd_quantize_avx2):
vmovdqu [YMMBLOCK(6,0,r10,SIZEOF_DCTELEM)], ymm3
vzeroupper
uncollect_args 3
UNCOLLECT_ARGS 3
pop rbp
ret

View File

@@ -2,7 +2,7 @@
; jquanti.asm - sample data conversion and quantization (64-bit SSE2)
;
; Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
; Copyright (C) 2009, 2016, D. R. Commander.
; Copyright (C) 2009, 2016, 2024, D. R. Commander.
; Copyright (C) 2018, Matthias Räncker.
;
; Based on the x86 SIMD extension for IJG JPEG library
@@ -39,7 +39,7 @@
EXTN(jsimd_convsamp_sse2):
push rbp
mov rbp, rsp
collect_args 3
COLLECT_ARGS 3
push rbx
pxor xmm6, xmm6 ; xmm6=(all 0's)
@@ -83,7 +83,7 @@ EXTN(jsimd_convsamp_sse2):
jnz short .convloop
pop rbx
uncollect_args 3
UNCOLLECT_ARGS 3
pop rbp
ret
@@ -117,7 +117,7 @@ EXTN(jsimd_convsamp_sse2):
EXTN(jsimd_quantize_sse2):
push rbp
mov rbp, rsp
collect_args 3
COLLECT_ARGS 3
mov rsi, r12
mov rdx, r11
@@ -177,7 +177,7 @@ EXTN(jsimd_quantize_sse2):
dec rax
jnz near .quantloop
uncollect_args 3
UNCOLLECT_ARGS 3
pop rbp
ret