x86-64 SIMD: Support CET if C compiler enables it
- Detect at configure time, via the __CET__ C preprocessor macro, whether the C compiler will include either indirect branch tracking (IBT) or shadow stack support, and define a NASM macro (__CET__) if so. - Modify the x86-64 SIMD code so that it includes appropriate endbr64 instructions (to support IBT) and an appropriate .note.gnu.property section (to support both IBT and shadow stack) when __CET__ is defined. Closes #350
This commit is contained in:
2
.github/workflows/build.yml
vendored
2
.github/workflows/build.yml
vendored
@@ -149,7 +149,7 @@ jobs:
|
||||
mkdir build
|
||||
pushd build
|
||||
cmake -G"Unix Makefiles" -DWITH_JPEG8=1 \
|
||||
-DCMAKE_C_FLAGS='--std=gnu90 -Wall -Werror -Wextra -Wpedantic -pedantic-errors -Wdouble-promotion -Wformat-overflow=2 -Wformat-security -Wformat-signedness -Wformat-truncation=2 -Wformat-y2k -Wmissing-include-dirs -Wshift-overflow=2 -Wswitch-bool -Wno-unused-parameter -Wuninitialized -Wstrict-overflow=2 -Wstringop-overflow=4 -Wstringop-truncation -Wduplicated-branches -Wduplicated-cond -Wdeclaration-after-statement -Wshadow -Wunsafe-loop-optimizations -Wundef -Wcast-align -Wno-clobbered -Wjump-misses-init -Wno-sign-compare -Wlogical-op -Waggregate-return -Wstrict-prototypes -Wold-style-definition -Wmissing-prototypes -Wmissing-declarations -Wpacked -Wredundant-decls -Wnested-externs -Winline -Wno-long-long -Wdisabled-optimization -Wno-overlength-strings' \
|
||||
-DCMAKE_C_FLAGS='--std=gnu90 -Wall -Werror -Wextra -Wpedantic -pedantic-errors -Wdouble-promotion -Wformat-overflow=2 -Wformat-security -Wformat-signedness -Wformat-truncation=2 -Wformat-y2k -Wmissing-include-dirs -Wshift-overflow=2 -Wswitch-bool -Wno-unused-parameter -Wuninitialized -Wstrict-overflow=2 -Wstringop-overflow=4 -Wstringop-truncation -Wduplicated-branches -Wduplicated-cond -Wdeclaration-after-statement -Wshadow -Wunsafe-loop-optimizations -Wundef -Wcast-align -Wno-clobbered -Wjump-misses-init -Wno-sign-compare -Wlogical-op -Waggregate-return -Wstrict-prototypes -Wold-style-definition -Wmissing-prototypes -Wmissing-declarations -Wpacked -Wredundant-decls -Wnested-externs -Winline -Wno-long-long -Wdisabled-optimization -Wno-overlength-strings -fcf-protection' \
|
||||
..
|
||||
export NUMCPUS=`grep -c '^processor' /proc/cpuinfo`
|
||||
make -j$NUMCPUS --load-average=$NUMCPUS
|
||||
|
||||
@@ -14,6 +14,8 @@ Build Requirements
|
||||
(if building x86 or x86-64 SIMD extensions)
|
||||
* If using NASM, 2.13 or later is required.
|
||||
* If using Yasm, 1.2.0 or later is required.
|
||||
* NASM 2.15 or later is required if building libjpeg-turbo with Intel
|
||||
Control-flow Enforcement Technology (CET) support.
|
||||
* If building on macOS, NASM or Yasm can be obtained from
|
||||
[MacPorts](http://www.macports.org/) or [Homebrew](http://brew.sh/).
|
||||
- NOTE: Currently, if it is desirable to hide the SIMD function symbols in
|
||||
|
||||
@@ -8,6 +8,10 @@ libjpeg-turbo components to depend on the Visual C++ run-time DLL when built
|
||||
with Visual C++ and CMake 3.15 or later, regardless of value of the
|
||||
`WITH_CRT_DLL` CMake variable.
|
||||
|
||||
2. The x86-64 SIMD extensions now include support for Intel Control-flow
|
||||
Enforcement Technology (CET), which is enabled automatically if CET is enabled
|
||||
in the C compiler.
|
||||
|
||||
|
||||
3.0.2
|
||||
=====
|
||||
|
||||
@@ -96,6 +96,18 @@ if(NOT WIN32 AND (CMAKE_POSITION_INDEPENDENT_CODE OR ENABLE_SHARED))
|
||||
set(CMAKE_ASM_NASM_FLAGS "${CMAKE_ASM_NASM_FLAGS} -DPIC")
|
||||
endif()
|
||||
|
||||
if(CPU_TYPE STREQUAL "x86_64" AND CMAKE_ASM_NASM_OBJECT_FORMAT MATCHES "^elf")
|
||||
check_c_source_compiles("
|
||||
#if (__CET__ & 3) == 0
|
||||
#error \"CET not enabled\"
|
||||
#endif
|
||||
int main(void) { return 0; }" HAVE_CET)
|
||||
|
||||
if(HAVE_CET)
|
||||
set(CMAKE_ASM_NASM_FLAGS "${CMAKE_ASM_NASM_FLAGS} -D__CET__")
|
||||
endif()
|
||||
endif()
|
||||
|
||||
string(TOUPPER ${CMAKE_BUILD_TYPE} CMAKE_BUILD_TYPE_UC)
|
||||
set(EFFECTIVE_ASM_NASM_FLAGS "${CMAKE_ASM_NASM_FLAGS} ${CMAKE_ASM_NASM_FLAGS_${CMAKE_BUILD_TYPE_UC}}")
|
||||
message(STATUS "CMAKE_ASM_NASM_FLAGS = ${EFFECTIVE_ASM_NASM_FLAGS}")
|
||||
|
||||
@@ -76,6 +76,14 @@
|
||||
; mark stack as non-executable
|
||||
section .note.GNU-stack noalloc noexec nowrite progbits
|
||||
|
||||
%ifdef __CET__
|
||||
%ifdef __x86_64__
|
||||
section .note.gnu.property note alloc noexec align=8
|
||||
dd 0x00000004, 0x00000010, 0x00000005, 0x00554e47
|
||||
dd 0xc0000002, 0x00000004, 0x00000003, 0x00000000
|
||||
%endif
|
||||
%endif
|
||||
|
||||
; -- segment definition --
|
||||
;
|
||||
%ifdef __x86_64__
|
||||
@@ -513,6 +521,19 @@ const_base:
|
||||
|
||||
%endif
|
||||
|
||||
%ifdef __CET__
|
||||
|
||||
%imacro ENDBR64 0
|
||||
dd 0xfa1e0ff3
|
||||
%endmacro
|
||||
|
||||
%else
|
||||
|
||||
%imacro ENDBR64 0
|
||||
%endmacro
|
||||
|
||||
%endif
|
||||
|
||||
; --------------------------------------------------------------------------
|
||||
; Defines picked up from the C headers
|
||||
;
|
||||
|
||||
@@ -41,6 +41,7 @@
|
||||
GLOBAL_FUNCTION(jsimd_rgb_ycc_convert_avx2)
|
||||
|
||||
EXTN(jsimd_rgb_ycc_convert_avx2):
|
||||
ENDBR64
|
||||
push rbp
|
||||
mov rbp, rsp
|
||||
push r15
|
||||
|
||||
@@ -40,6 +40,7 @@
|
||||
GLOBAL_FUNCTION(jsimd_rgb_ycc_convert_sse2)
|
||||
|
||||
EXTN(jsimd_rgb_ycc_convert_sse2):
|
||||
ENDBR64
|
||||
push rbp
|
||||
mov rbp, rsp
|
||||
push r15
|
||||
|
||||
@@ -41,6 +41,7 @@
|
||||
GLOBAL_FUNCTION(jsimd_rgb_gray_convert_avx2)
|
||||
|
||||
EXTN(jsimd_rgb_gray_convert_avx2):
|
||||
ENDBR64
|
||||
push rbp
|
||||
mov rbp, rsp
|
||||
push r15
|
||||
|
||||
@@ -40,6 +40,7 @@
|
||||
GLOBAL_FUNCTION(jsimd_rgb_gray_convert_sse2)
|
||||
|
||||
EXTN(jsimd_rgb_gray_convert_sse2):
|
||||
ENDBR64
|
||||
push rbp
|
||||
mov rbp, rsp
|
||||
push r15
|
||||
|
||||
@@ -261,6 +261,7 @@ times 1 << 15 db 16
|
||||
GLOBAL_FUNCTION(jsimd_huff_encode_one_block_sse2)
|
||||
|
||||
EXTN(jsimd_huff_encode_one_block_sse2):
|
||||
ENDBR64
|
||||
push rbp
|
||||
mov rbp, rsp
|
||||
|
||||
|
||||
@@ -283,6 +283,7 @@
|
||||
GLOBAL_FUNCTION(jsimd_encode_mcu_AC_first_prepare_sse2)
|
||||
|
||||
EXTN(jsimd_encode_mcu_AC_first_prepare_sse2):
|
||||
ENDBR64
|
||||
push rbp
|
||||
mov rbp, rsp
|
||||
and rsp, byte (-SIZEOF_XMMWORD) ; align to 128 bits
|
||||
@@ -446,6 +447,7 @@ EXTN(jsimd_encode_mcu_AC_first_prepare_sse2):
|
||||
GLOBAL_FUNCTION(jsimd_encode_mcu_AC_refine_prepare_sse2)
|
||||
|
||||
EXTN(jsimd_encode_mcu_AC_refine_prepare_sse2):
|
||||
ENDBR64
|
||||
push rbp
|
||||
mov rbp, rsp
|
||||
and rsp, byte (-SIZEOF_XMMWORD) ; align to 128 bits
|
||||
|
||||
@@ -44,6 +44,7 @@
|
||||
GLOBAL_FUNCTION(jsimd_h2v1_downsample_avx2)
|
||||
|
||||
EXTN(jsimd_h2v1_downsample_avx2):
|
||||
ENDBR64
|
||||
push rbp
|
||||
mov rbp, rsp
|
||||
COLLECT_ARGS 6
|
||||
@@ -205,6 +206,7 @@ EXTN(jsimd_h2v1_downsample_avx2):
|
||||
GLOBAL_FUNCTION(jsimd_h2v2_downsample_avx2)
|
||||
|
||||
EXTN(jsimd_h2v2_downsample_avx2):
|
||||
ENDBR64
|
||||
push rbp
|
||||
mov rbp, rsp
|
||||
COLLECT_ARGS 6
|
||||
|
||||
@@ -43,6 +43,7 @@
|
||||
GLOBAL_FUNCTION(jsimd_h2v1_downsample_sse2)
|
||||
|
||||
EXTN(jsimd_h2v1_downsample_sse2):
|
||||
ENDBR64
|
||||
push rbp
|
||||
mov rbp, rsp
|
||||
COLLECT_ARGS 6
|
||||
@@ -187,6 +188,7 @@ EXTN(jsimd_h2v1_downsample_sse2):
|
||||
GLOBAL_FUNCTION(jsimd_h2v2_downsample_sse2)
|
||||
|
||||
EXTN(jsimd_h2v2_downsample_sse2):
|
||||
ENDBR64
|
||||
push rbp
|
||||
mov rbp, rsp
|
||||
COLLECT_ARGS 6
|
||||
|
||||
@@ -42,6 +42,7 @@
|
||||
GLOBAL_FUNCTION(jsimd_ycc_rgb_convert_avx2)
|
||||
|
||||
EXTN(jsimd_ycc_rgb_convert_avx2):
|
||||
ENDBR64
|
||||
push rbp
|
||||
mov rbp, rsp
|
||||
push r15
|
||||
|
||||
@@ -41,6 +41,7 @@
|
||||
GLOBAL_FUNCTION(jsimd_ycc_rgb_convert_sse2)
|
||||
|
||||
EXTN(jsimd_ycc_rgb_convert_sse2):
|
||||
ENDBR64
|
||||
push rbp
|
||||
mov rbp, rsp
|
||||
push r15
|
||||
|
||||
@@ -42,6 +42,7 @@
|
||||
GLOBAL_FUNCTION(jsimd_h2v1_merged_upsample_avx2)
|
||||
|
||||
EXTN(jsimd_h2v1_merged_upsample_avx2):
|
||||
ENDBR64
|
||||
push rbp
|
||||
mov rbp, rsp
|
||||
push r15
|
||||
@@ -506,6 +507,7 @@ EXTN(jsimd_h2v1_merged_upsample_avx2):
|
||||
GLOBAL_FUNCTION(jsimd_h2v2_merged_upsample_avx2)
|
||||
|
||||
EXTN(jsimd_h2v2_merged_upsample_avx2):
|
||||
ENDBR64
|
||||
push rbp
|
||||
mov rbp, rsp
|
||||
COLLECT_ARGS 4
|
||||
|
||||
@@ -41,6 +41,7 @@
|
||||
GLOBAL_FUNCTION(jsimd_h2v1_merged_upsample_sse2)
|
||||
|
||||
EXTN(jsimd_h2v1_merged_upsample_sse2):
|
||||
ENDBR64
|
||||
push rbp
|
||||
mov rbp, rsp
|
||||
push r15
|
||||
@@ -448,6 +449,7 @@ EXTN(jsimd_h2v1_merged_upsample_sse2):
|
||||
GLOBAL_FUNCTION(jsimd_h2v2_merged_upsample_sse2)
|
||||
|
||||
EXTN(jsimd_h2v2_merged_upsample_sse2):
|
||||
ENDBR64
|
||||
push rbp
|
||||
mov rbp, rsp
|
||||
COLLECT_ARGS 4
|
||||
|
||||
@@ -62,6 +62,7 @@ PW_EIGHT times 16 dw 8
|
||||
GLOBAL_FUNCTION(jsimd_h2v1_fancy_upsample_avx2)
|
||||
|
||||
EXTN(jsimd_h2v1_fancy_upsample_avx2):
|
||||
ENDBR64
|
||||
push rbp
|
||||
mov rbp, rsp
|
||||
PUSH_XMM 3
|
||||
@@ -215,6 +216,7 @@ EXTN(jsimd_h2v1_fancy_upsample_avx2):
|
||||
GLOBAL_FUNCTION(jsimd_h2v2_fancy_upsample_avx2)
|
||||
|
||||
EXTN(jsimd_h2v2_fancy_upsample_avx2):
|
||||
ENDBR64
|
||||
push rbp
|
||||
mov rbp, rsp
|
||||
push r15
|
||||
@@ -524,6 +526,7 @@ EXTN(jsimd_h2v2_fancy_upsample_avx2):
|
||||
GLOBAL_FUNCTION(jsimd_h2v1_upsample_avx2)
|
||||
|
||||
EXTN(jsimd_h2v1_upsample_avx2):
|
||||
ENDBR64
|
||||
push rbp
|
||||
mov rbp, rsp
|
||||
COLLECT_ARGS 4
|
||||
@@ -612,6 +615,7 @@ EXTN(jsimd_h2v1_upsample_avx2):
|
||||
GLOBAL_FUNCTION(jsimd_h2v2_upsample_avx2)
|
||||
|
||||
EXTN(jsimd_h2v2_upsample_avx2):
|
||||
ENDBR64
|
||||
push rbp
|
||||
mov rbp, rsp
|
||||
COLLECT_ARGS 4
|
||||
|
||||
@@ -61,6 +61,7 @@ PW_EIGHT times 8 dw 8
|
||||
GLOBAL_FUNCTION(jsimd_h2v1_fancy_upsample_sse2)
|
||||
|
||||
EXTN(jsimd_h2v1_fancy_upsample_sse2):
|
||||
ENDBR64
|
||||
push rbp
|
||||
mov rbp, rsp
|
||||
COLLECT_ARGS 4
|
||||
@@ -202,6 +203,7 @@ EXTN(jsimd_h2v1_fancy_upsample_sse2):
|
||||
GLOBAL_FUNCTION(jsimd_h2v2_fancy_upsample_sse2)
|
||||
|
||||
EXTN(jsimd_h2v2_fancy_upsample_sse2):
|
||||
ENDBR64
|
||||
push rbp
|
||||
mov rbp, rsp
|
||||
push r15
|
||||
@@ -497,6 +499,7 @@ EXTN(jsimd_h2v2_fancy_upsample_sse2):
|
||||
GLOBAL_FUNCTION(jsimd_h2v1_upsample_sse2)
|
||||
|
||||
EXTN(jsimd_h2v1_upsample_sse2):
|
||||
ENDBR64
|
||||
push rbp
|
||||
mov rbp, rsp
|
||||
COLLECT_ARGS 4
|
||||
@@ -583,6 +586,7 @@ EXTN(jsimd_h2v1_upsample_sse2):
|
||||
GLOBAL_FUNCTION(jsimd_h2v2_upsample_sse2)
|
||||
|
||||
EXTN(jsimd_h2v2_upsample_sse2):
|
||||
ENDBR64
|
||||
push rbp
|
||||
mov rbp, rsp
|
||||
COLLECT_ARGS 4
|
||||
|
||||
@@ -66,6 +66,7 @@ PD_1_306 times 4 dd 1.306562964876376527856643
|
||||
GLOBAL_FUNCTION(jsimd_fdct_float_sse)
|
||||
|
||||
EXTN(jsimd_fdct_float_sse):
|
||||
ENDBR64
|
||||
push rbp
|
||||
mov rbp, rsp
|
||||
push r15
|
||||
|
||||
@@ -81,6 +81,7 @@ PW_F1306 times 8 dw F_1_306 << CONST_SHIFT
|
||||
GLOBAL_FUNCTION(jsimd_fdct_ifast_sse2)
|
||||
|
||||
EXTN(jsimd_fdct_ifast_sse2):
|
||||
ENDBR64
|
||||
push rbp
|
||||
mov rbp, rsp
|
||||
push r15
|
||||
|
||||
@@ -260,6 +260,7 @@ PW_1_NEG1 times 8 dw 1
|
||||
GLOBAL_FUNCTION(jsimd_fdct_islow_avx2)
|
||||
|
||||
EXTN(jsimd_fdct_islow_avx2):
|
||||
ENDBR64
|
||||
push rbp
|
||||
mov rbp, rsp
|
||||
COLLECT_ARGS 1
|
||||
|
||||
@@ -102,6 +102,7 @@ PW_DESCALE_P2X times 8 dw 1 << (PASS1_BITS - 1)
|
||||
GLOBAL_FUNCTION(jsimd_fdct_islow_sse2)
|
||||
|
||||
EXTN(jsimd_fdct_islow_sse2):
|
||||
ENDBR64
|
||||
push rbp
|
||||
mov rbp, rsp
|
||||
push r15
|
||||
|
||||
@@ -76,6 +76,7 @@ PB_CENTERJSAMP times 16 db CENTERJSAMPLE
|
||||
GLOBAL_FUNCTION(jsimd_idct_float_sse2)
|
||||
|
||||
EXTN(jsimd_idct_float_sse2):
|
||||
ENDBR64
|
||||
push rbp
|
||||
mov rbp, rsp
|
||||
push r15
|
||||
|
||||
@@ -95,6 +95,7 @@ PB_CENTERJSAMP times 16 db CENTERJSAMPLE
|
||||
GLOBAL_FUNCTION(jsimd_idct_ifast_sse2)
|
||||
|
||||
EXTN(jsimd_idct_ifast_sse2):
|
||||
ENDBR64
|
||||
push rbp
|
||||
mov rbp, rsp
|
||||
push r15
|
||||
|
||||
@@ -282,6 +282,7 @@ PW_1_NEG1 times 8 dw 1
|
||||
GLOBAL_FUNCTION(jsimd_idct_islow_avx2)
|
||||
|
||||
EXTN(jsimd_idct_islow_avx2):
|
||||
ENDBR64
|
||||
push rbp
|
||||
mov rbp, rsp ; rbp = aligned rbp
|
||||
PUSH_XMM 4
|
||||
|
||||
@@ -108,6 +108,7 @@ PB_CENTERJSAMP times 16 db CENTERJSAMPLE
|
||||
GLOBAL_FUNCTION(jsimd_idct_islow_sse2)
|
||||
|
||||
EXTN(jsimd_idct_islow_sse2):
|
||||
ENDBR64
|
||||
push rbp
|
||||
mov rbp, rsp
|
||||
push r15
|
||||
|
||||
@@ -116,6 +116,7 @@ PB_CENTERJSAMP times 16 db CENTERJSAMPLE
|
||||
GLOBAL_FUNCTION(jsimd_idct_4x4_sse2)
|
||||
|
||||
EXTN(jsimd_idct_4x4_sse2):
|
||||
ENDBR64
|
||||
push rbp
|
||||
mov rbp, rsp
|
||||
push r15
|
||||
@@ -413,6 +414,7 @@ EXTN(jsimd_idct_4x4_sse2):
|
||||
GLOBAL_FUNCTION(jsimd_idct_2x2_sse2)
|
||||
|
||||
EXTN(jsimd_idct_2x2_sse2):
|
||||
ENDBR64
|
||||
push rbp
|
||||
mov rbp, rsp
|
||||
COLLECT_ARGS 4
|
||||
|
||||
@@ -37,6 +37,7 @@
|
||||
GLOBAL_FUNCTION(jsimd_convsamp_float_sse2)
|
||||
|
||||
EXTN(jsimd_convsamp_float_sse2):
|
||||
ENDBR64
|
||||
push rbp
|
||||
mov rbp, rsp
|
||||
COLLECT_ARGS 3
|
||||
@@ -109,6 +110,7 @@ EXTN(jsimd_convsamp_float_sse2):
|
||||
GLOBAL_FUNCTION(jsimd_quantize_float_sse2)
|
||||
|
||||
EXTN(jsimd_quantize_float_sse2):
|
||||
ENDBR64
|
||||
push rbp
|
||||
mov rbp, rsp
|
||||
COLLECT_ARGS 3
|
||||
|
||||
@@ -38,6 +38,7 @@
|
||||
GLOBAL_FUNCTION(jsimd_convsamp_avx2)
|
||||
|
||||
EXTN(jsimd_convsamp_avx2):
|
||||
ENDBR64
|
||||
push rbp
|
||||
mov rbp, rsp
|
||||
COLLECT_ARGS 3
|
||||
@@ -115,6 +116,7 @@ EXTN(jsimd_convsamp_avx2):
|
||||
GLOBAL_FUNCTION(jsimd_quantize_avx2)
|
||||
|
||||
EXTN(jsimd_quantize_avx2):
|
||||
ENDBR64
|
||||
push rbp
|
||||
mov rbp, rsp
|
||||
COLLECT_ARGS 3
|
||||
|
||||
@@ -37,6 +37,7 @@
|
||||
GLOBAL_FUNCTION(jsimd_convsamp_sse2)
|
||||
|
||||
EXTN(jsimd_convsamp_sse2):
|
||||
ENDBR64
|
||||
push rbp
|
||||
mov rbp, rsp
|
||||
COLLECT_ARGS 3
|
||||
@@ -115,6 +116,7 @@ EXTN(jsimd_convsamp_sse2):
|
||||
GLOBAL_FUNCTION(jsimd_quantize_sse2)
|
||||
|
||||
EXTN(jsimd_quantize_sse2):
|
||||
ENDBR64
|
||||
push rbp
|
||||
mov rbp, rsp
|
||||
COLLECT_ARGS 3
|
||||
|
||||
Reference in New Issue
Block a user