diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 542635a4..ffae8f8e 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -149,7 +149,7 @@ jobs: mkdir build pushd build cmake -G"Unix Makefiles" -DWITH_JPEG8=1 \ - -DCMAKE_C_FLAGS='--std=gnu90 -Wall -Werror -Wextra -Wpedantic -pedantic-errors -Wdouble-promotion -Wformat-overflow=2 -Wformat-security -Wformat-signedness -Wformat-truncation=2 -Wformat-y2k -Wmissing-include-dirs -Wshift-overflow=2 -Wswitch-bool -Wno-unused-parameter -Wuninitialized -Wstrict-overflow=2 -Wstringop-overflow=4 -Wstringop-truncation -Wduplicated-branches -Wduplicated-cond -Wdeclaration-after-statement -Wshadow -Wunsafe-loop-optimizations -Wundef -Wcast-align -Wno-clobbered -Wjump-misses-init -Wno-sign-compare -Wlogical-op -Waggregate-return -Wstrict-prototypes -Wold-style-definition -Wmissing-prototypes -Wmissing-declarations -Wpacked -Wredundant-decls -Wnested-externs -Winline -Wno-long-long -Wdisabled-optimization -Wno-overlength-strings' \ + -DCMAKE_C_FLAGS='--std=gnu90 -Wall -Werror -Wextra -Wpedantic -pedantic-errors -Wdouble-promotion -Wformat-overflow=2 -Wformat-security -Wformat-signedness -Wformat-truncation=2 -Wformat-y2k -Wmissing-include-dirs -Wshift-overflow=2 -Wswitch-bool -Wno-unused-parameter -Wuninitialized -Wstrict-overflow=2 -Wstringop-overflow=4 -Wstringop-truncation -Wduplicated-branches -Wduplicated-cond -Wdeclaration-after-statement -Wshadow -Wunsafe-loop-optimizations -Wundef -Wcast-align -Wno-clobbered -Wjump-misses-init -Wno-sign-compare -Wlogical-op -Waggregate-return -Wstrict-prototypes -Wold-style-definition -Wmissing-prototypes -Wmissing-declarations -Wpacked -Wredundant-decls -Wnested-externs -Winline -Wno-long-long -Wdisabled-optimization -Wno-overlength-strings -fcf-protection' \ .. export NUMCPUS=`grep -c '^processor' /proc/cpuinfo` make -j$NUMCPUS --load-average=$NUMCPUS diff --git a/BUILDING.md b/BUILDING.md index f6ec42e1..d06bce86 100644 --- a/BUILDING.md +++ b/BUILDING.md @@ -14,6 +14,8 @@ Build Requirements (if building x86 or x86-64 SIMD extensions) * If using NASM, 2.13 or later is required. * If using Yasm, 1.2.0 or later is required. + * NASM 2.15 or later is required if building libjpeg-turbo with Intel + Control-flow Enforcement Technology (CET) support. * If building on macOS, NASM or Yasm can be obtained from [MacPorts](http://www.macports.org/) or [Homebrew](http://brew.sh/). - NOTE: Currently, if it is desirable to hide the SIMD function symbols in diff --git a/ChangeLog.md b/ChangeLog.md index 3982dcd8..c4872e27 100644 --- a/ChangeLog.md +++ b/ChangeLog.md @@ -8,6 +8,10 @@ libjpeg-turbo components to depend on the Visual C++ run-time DLL when built with Visual C++ and CMake 3.15 or later, regardless of value of the `WITH_CRT_DLL` CMake variable. +2. The x86-64 SIMD extensions now include support for Intel Control-flow +Enforcement Technology (CET), which is enabled automatically if CET is enabled +in the C compiler. + 3.0.2 ===== diff --git a/simd/CMakeLists.txt b/simd/CMakeLists.txt index 60249001..02379559 100644 --- a/simd/CMakeLists.txt +++ b/simd/CMakeLists.txt @@ -96,6 +96,18 @@ if(NOT WIN32 AND (CMAKE_POSITION_INDEPENDENT_CODE OR ENABLE_SHARED)) set(CMAKE_ASM_NASM_FLAGS "${CMAKE_ASM_NASM_FLAGS} -DPIC") endif() +if(CPU_TYPE STREQUAL "x86_64" AND CMAKE_ASM_NASM_OBJECT_FORMAT MATCHES "^elf") + check_c_source_compiles(" + #if (__CET__ & 3) == 0 + #error \"CET not enabled\" + #endif + int main(void) { return 0; }" HAVE_CET) + + if(HAVE_CET) + set(CMAKE_ASM_NASM_FLAGS "${CMAKE_ASM_NASM_FLAGS} -D__CET__") + endif() +endif() + string(TOUPPER ${CMAKE_BUILD_TYPE} CMAKE_BUILD_TYPE_UC) set(EFFECTIVE_ASM_NASM_FLAGS "${CMAKE_ASM_NASM_FLAGS} ${CMAKE_ASM_NASM_FLAGS_${CMAKE_BUILD_TYPE_UC}}") message(STATUS "CMAKE_ASM_NASM_FLAGS = ${EFFECTIVE_ASM_NASM_FLAGS}") diff --git a/simd/nasm/jsimdext.inc b/simd/nasm/jsimdext.inc index 2da5017e..b5341ed2 100644 --- a/simd/nasm/jsimdext.inc +++ b/simd/nasm/jsimdext.inc @@ -76,6 +76,14 @@ ; mark stack as non-executable section .note.GNU-stack noalloc noexec nowrite progbits +%ifdef __CET__ +%ifdef __x86_64__ +section .note.gnu.property note alloc noexec align=8 + dd 0x00000004, 0x00000010, 0x00000005, 0x00554e47 + dd 0xc0000002, 0x00000004, 0x00000003, 0x00000000 +%endif +%endif + ; -- segment definition -- ; %ifdef __x86_64__ @@ -513,6 +521,19 @@ const_base: %endif +%ifdef __CET__ + +%imacro ENDBR64 0 + dd 0xfa1e0ff3 +%endmacro + +%else + +%imacro ENDBR64 0 +%endmacro + +%endif + ; -------------------------------------------------------------------------- ; Defines picked up from the C headers ; diff --git a/simd/x86_64/jccolext-avx2.asm b/simd/x86_64/jccolext-avx2.asm index 2b29add4..39e6f207 100644 --- a/simd/x86_64/jccolext-avx2.asm +++ b/simd/x86_64/jccolext-avx2.asm @@ -41,6 +41,7 @@ GLOBAL_FUNCTION(jsimd_rgb_ycc_convert_avx2) EXTN(jsimd_rgb_ycc_convert_avx2): + ENDBR64 push rbp mov rbp, rsp push r15 diff --git a/simd/x86_64/jccolext-sse2.asm b/simd/x86_64/jccolext-sse2.asm index 18b367eb..2073988d 100644 --- a/simd/x86_64/jccolext-sse2.asm +++ b/simd/x86_64/jccolext-sse2.asm @@ -40,6 +40,7 @@ GLOBAL_FUNCTION(jsimd_rgb_ycc_convert_sse2) EXTN(jsimd_rgb_ycc_convert_sse2): + ENDBR64 push rbp mov rbp, rsp push r15 diff --git a/simd/x86_64/jcgryext-avx2.asm b/simd/x86_64/jcgryext-avx2.asm index d780336d..d2ae6d63 100644 --- a/simd/x86_64/jcgryext-avx2.asm +++ b/simd/x86_64/jcgryext-avx2.asm @@ -41,6 +41,7 @@ GLOBAL_FUNCTION(jsimd_rgb_gray_convert_avx2) EXTN(jsimd_rgb_gray_convert_avx2): + ENDBR64 push rbp mov rbp, rsp push r15 diff --git a/simd/x86_64/jcgryext-sse2.asm b/simd/x86_64/jcgryext-sse2.asm index 4789fccf..3c2834e9 100644 --- a/simd/x86_64/jcgryext-sse2.asm +++ b/simd/x86_64/jcgryext-sse2.asm @@ -40,6 +40,7 @@ GLOBAL_FUNCTION(jsimd_rgb_gray_convert_sse2) EXTN(jsimd_rgb_gray_convert_sse2): + ENDBR64 push rbp mov rbp, rsp push r15 diff --git a/simd/x86_64/jchuff-sse2.asm b/simd/x86_64/jchuff-sse2.asm index fca3e94d..39aa2465 100644 --- a/simd/x86_64/jchuff-sse2.asm +++ b/simd/x86_64/jchuff-sse2.asm @@ -261,6 +261,7 @@ times 1 << 15 db 16 GLOBAL_FUNCTION(jsimd_huff_encode_one_block_sse2) EXTN(jsimd_huff_encode_one_block_sse2): + ENDBR64 push rbp mov rbp, rsp diff --git a/simd/x86_64/jcphuff-sse2.asm b/simd/x86_64/jcphuff-sse2.asm index 1d47b9a1..0e274046 100644 --- a/simd/x86_64/jcphuff-sse2.asm +++ b/simd/x86_64/jcphuff-sse2.asm @@ -283,6 +283,7 @@ GLOBAL_FUNCTION(jsimd_encode_mcu_AC_first_prepare_sse2) EXTN(jsimd_encode_mcu_AC_first_prepare_sse2): + ENDBR64 push rbp mov rbp, rsp and rsp, byte (-SIZEOF_XMMWORD) ; align to 128 bits @@ -446,6 +447,7 @@ EXTN(jsimd_encode_mcu_AC_first_prepare_sse2): GLOBAL_FUNCTION(jsimd_encode_mcu_AC_refine_prepare_sse2) EXTN(jsimd_encode_mcu_AC_refine_prepare_sse2): + ENDBR64 push rbp mov rbp, rsp and rsp, byte (-SIZEOF_XMMWORD) ; align to 128 bits diff --git a/simd/x86_64/jcsample-avx2.asm b/simd/x86_64/jcsample-avx2.asm index ff13862b..fede6b38 100644 --- a/simd/x86_64/jcsample-avx2.asm +++ b/simd/x86_64/jcsample-avx2.asm @@ -44,6 +44,7 @@ GLOBAL_FUNCTION(jsimd_h2v1_downsample_avx2) EXTN(jsimd_h2v1_downsample_avx2): + ENDBR64 push rbp mov rbp, rsp COLLECT_ARGS 6 @@ -205,6 +206,7 @@ EXTN(jsimd_h2v1_downsample_avx2): GLOBAL_FUNCTION(jsimd_h2v2_downsample_avx2) EXTN(jsimd_h2v2_downsample_avx2): + ENDBR64 push rbp mov rbp, rsp COLLECT_ARGS 6 diff --git a/simd/x86_64/jcsample-sse2.asm b/simd/x86_64/jcsample-sse2.asm index 44384466..0a0ee65e 100644 --- a/simd/x86_64/jcsample-sse2.asm +++ b/simd/x86_64/jcsample-sse2.asm @@ -43,6 +43,7 @@ GLOBAL_FUNCTION(jsimd_h2v1_downsample_sse2) EXTN(jsimd_h2v1_downsample_sse2): + ENDBR64 push rbp mov rbp, rsp COLLECT_ARGS 6 @@ -187,6 +188,7 @@ EXTN(jsimd_h2v1_downsample_sse2): GLOBAL_FUNCTION(jsimd_h2v2_downsample_sse2) EXTN(jsimd_h2v2_downsample_sse2): + ENDBR64 push rbp mov rbp, rsp COLLECT_ARGS 6 diff --git a/simd/x86_64/jdcolext-avx2.asm b/simd/x86_64/jdcolext-avx2.asm index 5b9d60ad..a8384cb5 100644 --- a/simd/x86_64/jdcolext-avx2.asm +++ b/simd/x86_64/jdcolext-avx2.asm @@ -42,6 +42,7 @@ GLOBAL_FUNCTION(jsimd_ycc_rgb_convert_avx2) EXTN(jsimd_ycc_rgb_convert_avx2): + ENDBR64 push rbp mov rbp, rsp push r15 diff --git a/simd/x86_64/jdcolext-sse2.asm b/simd/x86_64/jdcolext-sse2.asm index 33eb7abd..bfb59abf 100644 --- a/simd/x86_64/jdcolext-sse2.asm +++ b/simd/x86_64/jdcolext-sse2.asm @@ -41,6 +41,7 @@ GLOBAL_FUNCTION(jsimd_ycc_rgb_convert_sse2) EXTN(jsimd_ycc_rgb_convert_sse2): + ENDBR64 push rbp mov rbp, rsp push r15 diff --git a/simd/x86_64/jdmrgext-avx2.asm b/simd/x86_64/jdmrgext-avx2.asm index d19c5f4b..3392f3a3 100644 --- a/simd/x86_64/jdmrgext-avx2.asm +++ b/simd/x86_64/jdmrgext-avx2.asm @@ -42,6 +42,7 @@ GLOBAL_FUNCTION(jsimd_h2v1_merged_upsample_avx2) EXTN(jsimd_h2v1_merged_upsample_avx2): + ENDBR64 push rbp mov rbp, rsp push r15 @@ -506,6 +507,7 @@ EXTN(jsimd_h2v1_merged_upsample_avx2): GLOBAL_FUNCTION(jsimd_h2v2_merged_upsample_avx2) EXTN(jsimd_h2v2_merged_upsample_avx2): + ENDBR64 push rbp mov rbp, rsp COLLECT_ARGS 4 diff --git a/simd/x86_64/jdmrgext-sse2.asm b/simd/x86_64/jdmrgext-sse2.asm index db934201..901db984 100644 --- a/simd/x86_64/jdmrgext-sse2.asm +++ b/simd/x86_64/jdmrgext-sse2.asm @@ -41,6 +41,7 @@ GLOBAL_FUNCTION(jsimd_h2v1_merged_upsample_sse2) EXTN(jsimd_h2v1_merged_upsample_sse2): + ENDBR64 push rbp mov rbp, rsp push r15 @@ -448,6 +449,7 @@ EXTN(jsimd_h2v1_merged_upsample_sse2): GLOBAL_FUNCTION(jsimd_h2v2_merged_upsample_sse2) EXTN(jsimd_h2v2_merged_upsample_sse2): + ENDBR64 push rbp mov rbp, rsp COLLECT_ARGS 4 diff --git a/simd/x86_64/jdsample-avx2.asm b/simd/x86_64/jdsample-avx2.asm index e88778e2..017427a1 100644 --- a/simd/x86_64/jdsample-avx2.asm +++ b/simd/x86_64/jdsample-avx2.asm @@ -62,6 +62,7 @@ PW_EIGHT times 16 dw 8 GLOBAL_FUNCTION(jsimd_h2v1_fancy_upsample_avx2) EXTN(jsimd_h2v1_fancy_upsample_avx2): + ENDBR64 push rbp mov rbp, rsp PUSH_XMM 3 @@ -215,6 +216,7 @@ EXTN(jsimd_h2v1_fancy_upsample_avx2): GLOBAL_FUNCTION(jsimd_h2v2_fancy_upsample_avx2) EXTN(jsimd_h2v2_fancy_upsample_avx2): + ENDBR64 push rbp mov rbp, rsp push r15 @@ -524,6 +526,7 @@ EXTN(jsimd_h2v2_fancy_upsample_avx2): GLOBAL_FUNCTION(jsimd_h2v1_upsample_avx2) EXTN(jsimd_h2v1_upsample_avx2): + ENDBR64 push rbp mov rbp, rsp COLLECT_ARGS 4 @@ -612,6 +615,7 @@ EXTN(jsimd_h2v1_upsample_avx2): GLOBAL_FUNCTION(jsimd_h2v2_upsample_avx2) EXTN(jsimd_h2v2_upsample_avx2): + ENDBR64 push rbp mov rbp, rsp COLLECT_ARGS 4 diff --git a/simd/x86_64/jdsample-sse2.asm b/simd/x86_64/jdsample-sse2.asm index d590b0cd..95c4d4c9 100644 --- a/simd/x86_64/jdsample-sse2.asm +++ b/simd/x86_64/jdsample-sse2.asm @@ -61,6 +61,7 @@ PW_EIGHT times 8 dw 8 GLOBAL_FUNCTION(jsimd_h2v1_fancy_upsample_sse2) EXTN(jsimd_h2v1_fancy_upsample_sse2): + ENDBR64 push rbp mov rbp, rsp COLLECT_ARGS 4 @@ -202,6 +203,7 @@ EXTN(jsimd_h2v1_fancy_upsample_sse2): GLOBAL_FUNCTION(jsimd_h2v2_fancy_upsample_sse2) EXTN(jsimd_h2v2_fancy_upsample_sse2): + ENDBR64 push rbp mov rbp, rsp push r15 @@ -497,6 +499,7 @@ EXTN(jsimd_h2v2_fancy_upsample_sse2): GLOBAL_FUNCTION(jsimd_h2v1_upsample_sse2) EXTN(jsimd_h2v1_upsample_sse2): + ENDBR64 push rbp mov rbp, rsp COLLECT_ARGS 4 @@ -583,6 +586,7 @@ EXTN(jsimd_h2v1_upsample_sse2): GLOBAL_FUNCTION(jsimd_h2v2_upsample_sse2) EXTN(jsimd_h2v2_upsample_sse2): + ENDBR64 push rbp mov rbp, rsp COLLECT_ARGS 4 diff --git a/simd/x86_64/jfdctflt-sse.asm b/simd/x86_64/jfdctflt-sse.asm index d80610c0..cf46d93d 100644 --- a/simd/x86_64/jfdctflt-sse.asm +++ b/simd/x86_64/jfdctflt-sse.asm @@ -66,6 +66,7 @@ PD_1_306 times 4 dd 1.306562964876376527856643 GLOBAL_FUNCTION(jsimd_fdct_float_sse) EXTN(jsimd_fdct_float_sse): + ENDBR64 push rbp mov rbp, rsp push r15 diff --git a/simd/x86_64/jfdctfst-sse2.asm b/simd/x86_64/jfdctfst-sse2.asm index fa6e4e5e..cdc62365 100644 --- a/simd/x86_64/jfdctfst-sse2.asm +++ b/simd/x86_64/jfdctfst-sse2.asm @@ -81,6 +81,7 @@ PW_F1306 times 8 dw F_1_306 << CONST_SHIFT GLOBAL_FUNCTION(jsimd_fdct_ifast_sse2) EXTN(jsimd_fdct_ifast_sse2): + ENDBR64 push rbp mov rbp, rsp push r15 diff --git a/simd/x86_64/jfdctint-avx2.asm b/simd/x86_64/jfdctint-avx2.asm index 22c5fa9b..b6b4c73a 100644 --- a/simd/x86_64/jfdctint-avx2.asm +++ b/simd/x86_64/jfdctint-avx2.asm @@ -260,6 +260,7 @@ PW_1_NEG1 times 8 dw 1 GLOBAL_FUNCTION(jsimd_fdct_islow_avx2) EXTN(jsimd_fdct_islow_avx2): + ENDBR64 push rbp mov rbp, rsp COLLECT_ARGS 1 diff --git a/simd/x86_64/jfdctint-sse2.asm b/simd/x86_64/jfdctint-sse2.asm index 4b2e0f2f..44e7cd05 100644 --- a/simd/x86_64/jfdctint-sse2.asm +++ b/simd/x86_64/jfdctint-sse2.asm @@ -102,6 +102,7 @@ PW_DESCALE_P2X times 8 dw 1 << (PASS1_BITS - 1) GLOBAL_FUNCTION(jsimd_fdct_islow_sse2) EXTN(jsimd_fdct_islow_sse2): + ENDBR64 push rbp mov rbp, rsp push r15 diff --git a/simd/x86_64/jidctflt-sse2.asm b/simd/x86_64/jidctflt-sse2.asm index fd448ceb..c7cb39a0 100644 --- a/simd/x86_64/jidctflt-sse2.asm +++ b/simd/x86_64/jidctflt-sse2.asm @@ -76,6 +76,7 @@ PB_CENTERJSAMP times 16 db CENTERJSAMPLE GLOBAL_FUNCTION(jsimd_idct_float_sse2) EXTN(jsimd_idct_float_sse2): + ENDBR64 push rbp mov rbp, rsp push r15 diff --git a/simd/x86_64/jidctfst-sse2.asm b/simd/x86_64/jidctfst-sse2.asm index 55554c59..fd3bc32c 100644 --- a/simd/x86_64/jidctfst-sse2.asm +++ b/simd/x86_64/jidctfst-sse2.asm @@ -95,6 +95,7 @@ PB_CENTERJSAMP times 16 db CENTERJSAMPLE GLOBAL_FUNCTION(jsimd_idct_ifast_sse2) EXTN(jsimd_idct_ifast_sse2): + ENDBR64 push rbp mov rbp, rsp push r15 diff --git a/simd/x86_64/jidctint-avx2.asm b/simd/x86_64/jidctint-avx2.asm index 487c5fbc..84d125bd 100644 --- a/simd/x86_64/jidctint-avx2.asm +++ b/simd/x86_64/jidctint-avx2.asm @@ -282,6 +282,7 @@ PW_1_NEG1 times 8 dw 1 GLOBAL_FUNCTION(jsimd_idct_islow_avx2) EXTN(jsimd_idct_islow_avx2): + ENDBR64 push rbp mov rbp, rsp ; rbp = aligned rbp PUSH_XMM 4 diff --git a/simd/x86_64/jidctint-sse2.asm b/simd/x86_64/jidctint-sse2.asm index 87a0067f..3f098b2c 100644 --- a/simd/x86_64/jidctint-sse2.asm +++ b/simd/x86_64/jidctint-sse2.asm @@ -108,6 +108,7 @@ PB_CENTERJSAMP times 16 db CENTERJSAMPLE GLOBAL_FUNCTION(jsimd_idct_islow_sse2) EXTN(jsimd_idct_islow_sse2): + ENDBR64 push rbp mov rbp, rsp push r15 diff --git a/simd/x86_64/jidctred-sse2.asm b/simd/x86_64/jidctred-sse2.asm index cf92accb..2657cf3c 100644 --- a/simd/x86_64/jidctred-sse2.asm +++ b/simd/x86_64/jidctred-sse2.asm @@ -116,6 +116,7 @@ PB_CENTERJSAMP times 16 db CENTERJSAMPLE GLOBAL_FUNCTION(jsimd_idct_4x4_sse2) EXTN(jsimd_idct_4x4_sse2): + ENDBR64 push rbp mov rbp, rsp push r15 @@ -413,6 +414,7 @@ EXTN(jsimd_idct_4x4_sse2): GLOBAL_FUNCTION(jsimd_idct_2x2_sse2) EXTN(jsimd_idct_2x2_sse2): + ENDBR64 push rbp mov rbp, rsp COLLECT_ARGS 4 diff --git a/simd/x86_64/jquantf-sse2.asm b/simd/x86_64/jquantf-sse2.asm index 0d13a68c..8bd79662 100644 --- a/simd/x86_64/jquantf-sse2.asm +++ b/simd/x86_64/jquantf-sse2.asm @@ -37,6 +37,7 @@ GLOBAL_FUNCTION(jsimd_convsamp_float_sse2) EXTN(jsimd_convsamp_float_sse2): + ENDBR64 push rbp mov rbp, rsp COLLECT_ARGS 3 @@ -109,6 +110,7 @@ EXTN(jsimd_convsamp_float_sse2): GLOBAL_FUNCTION(jsimd_quantize_float_sse2) EXTN(jsimd_quantize_float_sse2): + ENDBR64 push rbp mov rbp, rsp COLLECT_ARGS 3 diff --git a/simd/x86_64/jquanti-avx2.asm b/simd/x86_64/jquanti-avx2.asm index d4c06c1b..c8ebd796 100644 --- a/simd/x86_64/jquanti-avx2.asm +++ b/simd/x86_64/jquanti-avx2.asm @@ -38,6 +38,7 @@ GLOBAL_FUNCTION(jsimd_convsamp_avx2) EXTN(jsimd_convsamp_avx2): + ENDBR64 push rbp mov rbp, rsp COLLECT_ARGS 3 @@ -115,6 +116,7 @@ EXTN(jsimd_convsamp_avx2): GLOBAL_FUNCTION(jsimd_quantize_avx2) EXTN(jsimd_quantize_avx2): + ENDBR64 push rbp mov rbp, rsp COLLECT_ARGS 3 diff --git a/simd/x86_64/jquanti-sse2.asm b/simd/x86_64/jquanti-sse2.asm index 5898c4c4..352d7405 100644 --- a/simd/x86_64/jquanti-sse2.asm +++ b/simd/x86_64/jquanti-sse2.asm @@ -37,6 +37,7 @@ GLOBAL_FUNCTION(jsimd_convsamp_sse2) EXTN(jsimd_convsamp_sse2): + ENDBR64 push rbp mov rbp, rsp COLLECT_ARGS 3 @@ -115,6 +116,7 @@ EXTN(jsimd_convsamp_sse2): GLOBAL_FUNCTION(jsimd_quantize_sse2) EXTN(jsimd_quantize_sse2): + ENDBR64 push rbp mov rbp, rsp COLLECT_ARGS 3