x86-64 SIMD: Support CET if C compiler enables it

- Detect at configure time, via the __CET__ C preprocessor macro,
  whether the C compiler will include either indirect branch tracking
  (IBT) or shadow stack support, and define a NASM macro (__CET__) if
  so.

- Modify the x86-64 SIMD code so that it includes appropriate endbr64
  instructions (to support IBT) and an appropriate .note.gnu.property
  section (to support both IBT and shadow stack) when __CET__ is
  defined.

Closes #350
This commit is contained in:
DRC
2024-02-29 16:10:20 -05:00
parent 1335547558
commit 3202feb08a
31 changed files with 81 additions and 1 deletions

View File

@@ -149,7 +149,7 @@ jobs:
mkdir build
pushd build
cmake -G"Unix Makefiles" -DWITH_JPEG8=1 \
-DCMAKE_C_FLAGS='--std=gnu90 -Wall -Werror -Wextra -Wpedantic -pedantic-errors -Wdouble-promotion -Wformat-overflow=2 -Wformat-security -Wformat-signedness -Wformat-truncation=2 -Wformat-y2k -Wmissing-include-dirs -Wshift-overflow=2 -Wswitch-bool -Wno-unused-parameter -Wuninitialized -Wstrict-overflow=2 -Wstringop-overflow=4 -Wstringop-truncation -Wduplicated-branches -Wduplicated-cond -Wdeclaration-after-statement -Wshadow -Wunsafe-loop-optimizations -Wundef -Wcast-align -Wno-clobbered -Wjump-misses-init -Wno-sign-compare -Wlogical-op -Waggregate-return -Wstrict-prototypes -Wold-style-definition -Wmissing-prototypes -Wmissing-declarations -Wpacked -Wredundant-decls -Wnested-externs -Winline -Wno-long-long -Wdisabled-optimization -Wno-overlength-strings' \
-DCMAKE_C_FLAGS='--std=gnu90 -Wall -Werror -Wextra -Wpedantic -pedantic-errors -Wdouble-promotion -Wformat-overflow=2 -Wformat-security -Wformat-signedness -Wformat-truncation=2 -Wformat-y2k -Wmissing-include-dirs -Wshift-overflow=2 -Wswitch-bool -Wno-unused-parameter -Wuninitialized -Wstrict-overflow=2 -Wstringop-overflow=4 -Wstringop-truncation -Wduplicated-branches -Wduplicated-cond -Wdeclaration-after-statement -Wshadow -Wunsafe-loop-optimizations -Wundef -Wcast-align -Wno-clobbered -Wjump-misses-init -Wno-sign-compare -Wlogical-op -Waggregate-return -Wstrict-prototypes -Wold-style-definition -Wmissing-prototypes -Wmissing-declarations -Wpacked -Wredundant-decls -Wnested-externs -Winline -Wno-long-long -Wdisabled-optimization -Wno-overlength-strings -fcf-protection' \
..
export NUMCPUS=`grep -c '^processor' /proc/cpuinfo`
make -j$NUMCPUS --load-average=$NUMCPUS

View File

@@ -14,6 +14,8 @@ Build Requirements
(if building x86 or x86-64 SIMD extensions)
* If using NASM, 2.13 or later is required.
* If using Yasm, 1.2.0 or later is required.
* NASM 2.15 or later is required if building libjpeg-turbo with Intel
Control-flow Enforcement Technology (CET) support.
* If building on macOS, NASM or Yasm can be obtained from
[MacPorts](http://www.macports.org/) or [Homebrew](http://brew.sh/).
- NOTE: Currently, if it is desirable to hide the SIMD function symbols in

View File

@@ -8,6 +8,10 @@ libjpeg-turbo components to depend on the Visual C++ run-time DLL when built
with Visual C++ and CMake 3.15 or later, regardless of value of the
`WITH_CRT_DLL` CMake variable.
2. The x86-64 SIMD extensions now include support for Intel Control-flow
Enforcement Technology (CET), which is enabled automatically if CET is enabled
in the C compiler.
3.0.2
=====

View File

@@ -96,6 +96,18 @@ if(NOT WIN32 AND (CMAKE_POSITION_INDEPENDENT_CODE OR ENABLE_SHARED))
set(CMAKE_ASM_NASM_FLAGS "${CMAKE_ASM_NASM_FLAGS} -DPIC")
endif()
if(CPU_TYPE STREQUAL "x86_64" AND CMAKE_ASM_NASM_OBJECT_FORMAT MATCHES "^elf")
check_c_source_compiles("
#if (__CET__ & 3) == 0
#error \"CET not enabled\"
#endif
int main(void) { return 0; }" HAVE_CET)
if(HAVE_CET)
set(CMAKE_ASM_NASM_FLAGS "${CMAKE_ASM_NASM_FLAGS} -D__CET__")
endif()
endif()
string(TOUPPER ${CMAKE_BUILD_TYPE} CMAKE_BUILD_TYPE_UC)
set(EFFECTIVE_ASM_NASM_FLAGS "${CMAKE_ASM_NASM_FLAGS} ${CMAKE_ASM_NASM_FLAGS_${CMAKE_BUILD_TYPE_UC}}")
message(STATUS "CMAKE_ASM_NASM_FLAGS = ${EFFECTIVE_ASM_NASM_FLAGS}")

View File

@@ -76,6 +76,14 @@
; mark stack as non-executable
section .note.GNU-stack noalloc noexec nowrite progbits
%ifdef __CET__
%ifdef __x86_64__
section .note.gnu.property note alloc noexec align=8
dd 0x00000004, 0x00000010, 0x00000005, 0x00554e47
dd 0xc0000002, 0x00000004, 0x00000003, 0x00000000
%endif
%endif
; -- segment definition --
;
%ifdef __x86_64__
@@ -513,6 +521,19 @@ const_base:
%endif
%ifdef __CET__
%imacro ENDBR64 0
dd 0xfa1e0ff3
%endmacro
%else
%imacro ENDBR64 0
%endmacro
%endif
; --------------------------------------------------------------------------
; Defines picked up from the C headers
;

View File

@@ -41,6 +41,7 @@
GLOBAL_FUNCTION(jsimd_rgb_ycc_convert_avx2)
EXTN(jsimd_rgb_ycc_convert_avx2):
ENDBR64
push rbp
mov rbp, rsp
push r15

View File

@@ -40,6 +40,7 @@
GLOBAL_FUNCTION(jsimd_rgb_ycc_convert_sse2)
EXTN(jsimd_rgb_ycc_convert_sse2):
ENDBR64
push rbp
mov rbp, rsp
push r15

View File

@@ -41,6 +41,7 @@
GLOBAL_FUNCTION(jsimd_rgb_gray_convert_avx2)
EXTN(jsimd_rgb_gray_convert_avx2):
ENDBR64
push rbp
mov rbp, rsp
push r15

View File

@@ -40,6 +40,7 @@
GLOBAL_FUNCTION(jsimd_rgb_gray_convert_sse2)
EXTN(jsimd_rgb_gray_convert_sse2):
ENDBR64
push rbp
mov rbp, rsp
push r15

View File

@@ -261,6 +261,7 @@ times 1 << 15 db 16
GLOBAL_FUNCTION(jsimd_huff_encode_one_block_sse2)
EXTN(jsimd_huff_encode_one_block_sse2):
ENDBR64
push rbp
mov rbp, rsp

View File

@@ -283,6 +283,7 @@
GLOBAL_FUNCTION(jsimd_encode_mcu_AC_first_prepare_sse2)
EXTN(jsimd_encode_mcu_AC_first_prepare_sse2):
ENDBR64
push rbp
mov rbp, rsp
and rsp, byte (-SIZEOF_XMMWORD) ; align to 128 bits
@@ -446,6 +447,7 @@ EXTN(jsimd_encode_mcu_AC_first_prepare_sse2):
GLOBAL_FUNCTION(jsimd_encode_mcu_AC_refine_prepare_sse2)
EXTN(jsimd_encode_mcu_AC_refine_prepare_sse2):
ENDBR64
push rbp
mov rbp, rsp
and rsp, byte (-SIZEOF_XMMWORD) ; align to 128 bits

View File

@@ -44,6 +44,7 @@
GLOBAL_FUNCTION(jsimd_h2v1_downsample_avx2)
EXTN(jsimd_h2v1_downsample_avx2):
ENDBR64
push rbp
mov rbp, rsp
COLLECT_ARGS 6
@@ -205,6 +206,7 @@ EXTN(jsimd_h2v1_downsample_avx2):
GLOBAL_FUNCTION(jsimd_h2v2_downsample_avx2)
EXTN(jsimd_h2v2_downsample_avx2):
ENDBR64
push rbp
mov rbp, rsp
COLLECT_ARGS 6

View File

@@ -43,6 +43,7 @@
GLOBAL_FUNCTION(jsimd_h2v1_downsample_sse2)
EXTN(jsimd_h2v1_downsample_sse2):
ENDBR64
push rbp
mov rbp, rsp
COLLECT_ARGS 6
@@ -187,6 +188,7 @@ EXTN(jsimd_h2v1_downsample_sse2):
GLOBAL_FUNCTION(jsimd_h2v2_downsample_sse2)
EXTN(jsimd_h2v2_downsample_sse2):
ENDBR64
push rbp
mov rbp, rsp
COLLECT_ARGS 6

View File

@@ -42,6 +42,7 @@
GLOBAL_FUNCTION(jsimd_ycc_rgb_convert_avx2)
EXTN(jsimd_ycc_rgb_convert_avx2):
ENDBR64
push rbp
mov rbp, rsp
push r15

View File

@@ -41,6 +41,7 @@
GLOBAL_FUNCTION(jsimd_ycc_rgb_convert_sse2)
EXTN(jsimd_ycc_rgb_convert_sse2):
ENDBR64
push rbp
mov rbp, rsp
push r15

View File

@@ -42,6 +42,7 @@
GLOBAL_FUNCTION(jsimd_h2v1_merged_upsample_avx2)
EXTN(jsimd_h2v1_merged_upsample_avx2):
ENDBR64
push rbp
mov rbp, rsp
push r15
@@ -506,6 +507,7 @@ EXTN(jsimd_h2v1_merged_upsample_avx2):
GLOBAL_FUNCTION(jsimd_h2v2_merged_upsample_avx2)
EXTN(jsimd_h2v2_merged_upsample_avx2):
ENDBR64
push rbp
mov rbp, rsp
COLLECT_ARGS 4

View File

@@ -41,6 +41,7 @@
GLOBAL_FUNCTION(jsimd_h2v1_merged_upsample_sse2)
EXTN(jsimd_h2v1_merged_upsample_sse2):
ENDBR64
push rbp
mov rbp, rsp
push r15
@@ -448,6 +449,7 @@ EXTN(jsimd_h2v1_merged_upsample_sse2):
GLOBAL_FUNCTION(jsimd_h2v2_merged_upsample_sse2)
EXTN(jsimd_h2v2_merged_upsample_sse2):
ENDBR64
push rbp
mov rbp, rsp
COLLECT_ARGS 4

View File

@@ -62,6 +62,7 @@ PW_EIGHT times 16 dw 8
GLOBAL_FUNCTION(jsimd_h2v1_fancy_upsample_avx2)
EXTN(jsimd_h2v1_fancy_upsample_avx2):
ENDBR64
push rbp
mov rbp, rsp
PUSH_XMM 3
@@ -215,6 +216,7 @@ EXTN(jsimd_h2v1_fancy_upsample_avx2):
GLOBAL_FUNCTION(jsimd_h2v2_fancy_upsample_avx2)
EXTN(jsimd_h2v2_fancy_upsample_avx2):
ENDBR64
push rbp
mov rbp, rsp
push r15
@@ -524,6 +526,7 @@ EXTN(jsimd_h2v2_fancy_upsample_avx2):
GLOBAL_FUNCTION(jsimd_h2v1_upsample_avx2)
EXTN(jsimd_h2v1_upsample_avx2):
ENDBR64
push rbp
mov rbp, rsp
COLLECT_ARGS 4
@@ -612,6 +615,7 @@ EXTN(jsimd_h2v1_upsample_avx2):
GLOBAL_FUNCTION(jsimd_h2v2_upsample_avx2)
EXTN(jsimd_h2v2_upsample_avx2):
ENDBR64
push rbp
mov rbp, rsp
COLLECT_ARGS 4

View File

@@ -61,6 +61,7 @@ PW_EIGHT times 8 dw 8
GLOBAL_FUNCTION(jsimd_h2v1_fancy_upsample_sse2)
EXTN(jsimd_h2v1_fancy_upsample_sse2):
ENDBR64
push rbp
mov rbp, rsp
COLLECT_ARGS 4
@@ -202,6 +203,7 @@ EXTN(jsimd_h2v1_fancy_upsample_sse2):
GLOBAL_FUNCTION(jsimd_h2v2_fancy_upsample_sse2)
EXTN(jsimd_h2v2_fancy_upsample_sse2):
ENDBR64
push rbp
mov rbp, rsp
push r15
@@ -497,6 +499,7 @@ EXTN(jsimd_h2v2_fancy_upsample_sse2):
GLOBAL_FUNCTION(jsimd_h2v1_upsample_sse2)
EXTN(jsimd_h2v1_upsample_sse2):
ENDBR64
push rbp
mov rbp, rsp
COLLECT_ARGS 4
@@ -583,6 +586,7 @@ EXTN(jsimd_h2v1_upsample_sse2):
GLOBAL_FUNCTION(jsimd_h2v2_upsample_sse2)
EXTN(jsimd_h2v2_upsample_sse2):
ENDBR64
push rbp
mov rbp, rsp
COLLECT_ARGS 4

View File

@@ -66,6 +66,7 @@ PD_1_306 times 4 dd 1.306562964876376527856643
GLOBAL_FUNCTION(jsimd_fdct_float_sse)
EXTN(jsimd_fdct_float_sse):
ENDBR64
push rbp
mov rbp, rsp
push r15

View File

@@ -81,6 +81,7 @@ PW_F1306 times 8 dw F_1_306 << CONST_SHIFT
GLOBAL_FUNCTION(jsimd_fdct_ifast_sse2)
EXTN(jsimd_fdct_ifast_sse2):
ENDBR64
push rbp
mov rbp, rsp
push r15

View File

@@ -260,6 +260,7 @@ PW_1_NEG1 times 8 dw 1
GLOBAL_FUNCTION(jsimd_fdct_islow_avx2)
EXTN(jsimd_fdct_islow_avx2):
ENDBR64
push rbp
mov rbp, rsp
COLLECT_ARGS 1

View File

@@ -102,6 +102,7 @@ PW_DESCALE_P2X times 8 dw 1 << (PASS1_BITS - 1)
GLOBAL_FUNCTION(jsimd_fdct_islow_sse2)
EXTN(jsimd_fdct_islow_sse2):
ENDBR64
push rbp
mov rbp, rsp
push r15

View File

@@ -76,6 +76,7 @@ PB_CENTERJSAMP times 16 db CENTERJSAMPLE
GLOBAL_FUNCTION(jsimd_idct_float_sse2)
EXTN(jsimd_idct_float_sse2):
ENDBR64
push rbp
mov rbp, rsp
push r15

View File

@@ -95,6 +95,7 @@ PB_CENTERJSAMP times 16 db CENTERJSAMPLE
GLOBAL_FUNCTION(jsimd_idct_ifast_sse2)
EXTN(jsimd_idct_ifast_sse2):
ENDBR64
push rbp
mov rbp, rsp
push r15

View File

@@ -282,6 +282,7 @@ PW_1_NEG1 times 8 dw 1
GLOBAL_FUNCTION(jsimd_idct_islow_avx2)
EXTN(jsimd_idct_islow_avx2):
ENDBR64
push rbp
mov rbp, rsp ; rbp = aligned rbp
PUSH_XMM 4

View File

@@ -108,6 +108,7 @@ PB_CENTERJSAMP times 16 db CENTERJSAMPLE
GLOBAL_FUNCTION(jsimd_idct_islow_sse2)
EXTN(jsimd_idct_islow_sse2):
ENDBR64
push rbp
mov rbp, rsp
push r15

View File

@@ -116,6 +116,7 @@ PB_CENTERJSAMP times 16 db CENTERJSAMPLE
GLOBAL_FUNCTION(jsimd_idct_4x4_sse2)
EXTN(jsimd_idct_4x4_sse2):
ENDBR64
push rbp
mov rbp, rsp
push r15
@@ -413,6 +414,7 @@ EXTN(jsimd_idct_4x4_sse2):
GLOBAL_FUNCTION(jsimd_idct_2x2_sse2)
EXTN(jsimd_idct_2x2_sse2):
ENDBR64
push rbp
mov rbp, rsp
COLLECT_ARGS 4

View File

@@ -37,6 +37,7 @@
GLOBAL_FUNCTION(jsimd_convsamp_float_sse2)
EXTN(jsimd_convsamp_float_sse2):
ENDBR64
push rbp
mov rbp, rsp
COLLECT_ARGS 3
@@ -109,6 +110,7 @@ EXTN(jsimd_convsamp_float_sse2):
GLOBAL_FUNCTION(jsimd_quantize_float_sse2)
EXTN(jsimd_quantize_float_sse2):
ENDBR64
push rbp
mov rbp, rsp
COLLECT_ARGS 3

View File

@@ -38,6 +38,7 @@
GLOBAL_FUNCTION(jsimd_convsamp_avx2)
EXTN(jsimd_convsamp_avx2):
ENDBR64
push rbp
mov rbp, rsp
COLLECT_ARGS 3
@@ -115,6 +116,7 @@ EXTN(jsimd_convsamp_avx2):
GLOBAL_FUNCTION(jsimd_quantize_avx2)
EXTN(jsimd_quantize_avx2):
ENDBR64
push rbp
mov rbp, rsp
COLLECT_ARGS 3

View File

@@ -37,6 +37,7 @@
GLOBAL_FUNCTION(jsimd_convsamp_sse2)
EXTN(jsimd_convsamp_sse2):
ENDBR64
push rbp
mov rbp, rsp
COLLECT_ARGS 3
@@ -115,6 +116,7 @@ EXTN(jsimd_convsamp_sse2):
GLOBAL_FUNCTION(jsimd_quantize_sse2)
EXTN(jsimd_quantize_sse2):
ENDBR64
push rbp
mov rbp, rsp
COLLECT_ARGS 3