x86 SIMD: Check for CPUID leaf 07H before using
According to Intel's manual [1], "If a value entered for CPUID.EAX is higher than the maximum input value for basic or extended function for that processor then the data for the highest basic information leaf is returned." Right now, libjpeg-turbo doesn't first check that leaf 07H is supported before attempting to use it, so the ostensible AVX2 bit (Bit 05) of the CPUID result might actually be Bit 05 from a lower leaf. That bit might be set, even if the CPU doesn't support AVX2. This commit modifies the x86 and x86-64 SIMD feature detection code so that it first checks whether CPUID leaf 07H is supported before attempting to use it to check for AVX2 instruction support. DRC: This commit should fix https://bugzilla.mozilla.org/show_bug.cgi?id=1520760 However, I have not personally been able to reproduce that issue, despite using a Nehalem (pre-AVX2) CPU on which the maximum CPUID leaf has been limited via a BIOS setting. Closes #348 [1] "Intel® 64 and IA-32 Architectures Software Developer's Manual, Volume 2 (2A, 2B, 2C & 2D): Instruction Set Reference, A-Z", https://software.intel.com/sites/default/files/managed/a4/60/325383-sdm-vol-2abcd.pdf, page 3-192.
This commit is contained in:
@@ -7,6 +7,12 @@
|
||||
platforms when passing invalid arguments to certain methods in the TurboJPEG
|
||||
Java API.
|
||||
|
||||
2. Fixed a regression in the SIMD feature detection code, introduced by
|
||||
the AVX2 SIMD extensions (2.0 beta1[1]), that was known to cause an illegal
|
||||
instruction exception, in rare cases, on CPUs that lack support for CPUID leaf
|
||||
07H (or on which the maximum CPUID leaf has been limited by way of a BIOS
|
||||
setting.)
|
||||
|
||||
|
||||
2.0.2
|
||||
=====
|
||||
|
||||
@@ -51,29 +51,14 @@ EXTN(jpeg_simd_cpu_support):
|
||||
xor eax, edx
|
||||
jz near .return ; CPUID is not supported
|
||||
|
||||
; Check for MMX instruction support
|
||||
; Check whether CPUID leaf 07H is supported
|
||||
; (leaf 07H is used to check for AVX2 instruction support)
|
||||
xor eax, eax
|
||||
cpuid
|
||||
test eax, eax
|
||||
jz near .return
|
||||
|
||||
xor eax, eax
|
||||
inc eax
|
||||
cpuid
|
||||
mov eax, edx ; eax = Standard feature flags
|
||||
|
||||
test eax, 1<<23 ; bit23:MMX
|
||||
jz short .no_mmx
|
||||
or edi, byte JSIMD_MMX
|
||||
.no_mmx:
|
||||
test eax, 1<<25 ; bit25:SSE
|
||||
jz short .no_sse
|
||||
or edi, byte JSIMD_SSE
|
||||
.no_sse:
|
||||
test eax, 1<<26 ; bit26:SSE2
|
||||
jz short .no_sse2
|
||||
or edi, byte JSIMD_SSE2
|
||||
.no_sse2:
|
||||
cmp eax, 7
|
||||
jl short .no_avx2 ; Maximum leaf < 07H
|
||||
|
||||
; Check for AVX2 instruction support
|
||||
mov eax, 7
|
||||
@@ -102,6 +87,26 @@ EXTN(jpeg_simd_cpu_support):
|
||||
or edi, JSIMD_AVX2
|
||||
.no_avx2:
|
||||
|
||||
; Check CPUID leaf 01H for MMX, SSE, and SSE2 support
|
||||
xor eax, eax
|
||||
inc eax
|
||||
cpuid
|
||||
mov eax, edx ; eax = Standard feature flags
|
||||
|
||||
; Check for MMX instruction support
|
||||
test eax, 1<<23 ; bit23:MMX
|
||||
jz short .no_mmx
|
||||
or edi, byte JSIMD_MMX
|
||||
.no_mmx:
|
||||
test eax, 1<<25 ; bit25:SSE
|
||||
jz short .no_sse
|
||||
or edi, byte JSIMD_SSE
|
||||
.no_sse:
|
||||
test eax, 1<<26 ; bit26:SSE2
|
||||
jz short .no_sse2
|
||||
or edi, byte JSIMD_SSE2
|
||||
.no_sse2:
|
||||
|
||||
; Check for 3DNow! instruction support
|
||||
mov eax, 0x80000000
|
||||
cpuid
|
||||
|
||||
@@ -38,14 +38,23 @@ EXTN(jpeg_simd_cpu_support):
|
||||
|
||||
xor rdi, rdi ; simd support flag
|
||||
|
||||
; Assume that all x86-64 processors support SSE & SSE2 instructions
|
||||
or rdi, JSIMD_SSE2
|
||||
or rdi, JSIMD_SSE
|
||||
|
||||
; Check whether CPUID leaf 07H is supported
|
||||
; (leaf 07H is used to check for AVX2 instruction support)
|
||||
mov rax, 0
|
||||
cpuid
|
||||
cmp rax, 7
|
||||
jl short .return ; Maximum leaf < 07H
|
||||
|
||||
; Check for AVX2 instruction support
|
||||
mov rax, 7
|
||||
xor rcx, rcx
|
||||
cpuid
|
||||
mov rax, rbx ; rax = Extended feature flags
|
||||
|
||||
or rdi, JSIMD_SSE2
|
||||
or rdi, JSIMD_SSE
|
||||
test rax, 1<<5 ; bit5:AVX2
|
||||
jz short .return
|
||||
|
||||
|
||||
Reference in New Issue
Block a user