The IJG convention is to format copyright notices as: Copyright (C) YYYY, Owner. We try to maintain this convention for any code that is part of the libjpeg API library (with the exception of preserving the copyright notices from Cendio's code verbatim, since those predate libjpeg-turbo.) Note that the phrase "All Rights Reserved" is no longer necessary, since all Buenos Aires Convention signatories signed onto the Berne Convention in 2000. However, our convention is to retain this phrase for any files that have a self-contained copyright header but to leave it off of any files that refer to another file for conditions of distribution and use. For instance, all of the non-SIMD files in the libjpeg API library refer to README.ijg, and the copyright message in that file contains "All Rights Reserved", so it is unnecessary to add it to the individual files. The TurboJPEG code retains my preferred formatting convention for copyright notices, which is based on that of VirtualGL (where the TurboJPEG API originated.)
376 lines
12 KiB
PHP
376 lines
12 KiB
PHP
;
|
|
; jsimdext.inc - common declarations
|
|
;
|
|
; Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
|
|
; Copyright (C) 2010, D. R. Commander.
|
|
;
|
|
; Based on the x86 SIMD extension for IJG JPEG library - version 1.02
|
|
;
|
|
; Copyright (C) 1999-2006, MIYASAKA Masaru.
|
|
;
|
|
; This software is provided 'as-is', without any express or implied
|
|
; warranty. In no event will the authors be held liable for any damages
|
|
; arising from the use of this software.
|
|
;
|
|
; Permission is granted to anyone to use this software for any purpose,
|
|
; including commercial applications, and to alter it and redistribute it
|
|
; freely, subject to the following restrictions:
|
|
;
|
|
; 1. The origin of this software must not be misrepresented; you must not
|
|
; claim that you wrote the original software. If you use this software
|
|
; in a product, an acknowledgment in the product documentation would be
|
|
; appreciated but is not required.
|
|
; 2. Altered source versions must be plainly marked as such, and must not be
|
|
; misrepresented as being the original software.
|
|
; 3. This notice may not be removed or altered from any source distribution.
|
|
;
|
|
; [TAB8]
|
|
|
|
; ==========================================================================
|
|
; System-dependent configurations
|
|
|
|
%ifdef WIN32 ; ----(nasm -fwin32 -DWIN32 ...)--------
|
|
; * Microsoft Visual C++
|
|
; * MinGW (Minimalist GNU for Windows)
|
|
; * CygWin
|
|
; * LCC-Win32
|
|
|
|
; -- segment definition --
|
|
;
|
|
%ifdef __YASM_VER__
|
|
%define SEG_TEXT .text align=16
|
|
%define SEG_CONST .rdata align=16
|
|
%else
|
|
%define SEG_TEXT .text align=16 public use32 class=CODE
|
|
%define SEG_CONST .rdata align=16 public use32 class=CONST
|
|
%endif
|
|
|
|
%elifdef WIN64 ; ----(nasm -fwin64 -DWIN64 ...)--------
|
|
; * Microsoft Visual C++
|
|
|
|
; -- segment definition --
|
|
;
|
|
%ifdef __YASM_VER__
|
|
%define SEG_TEXT .text align=16
|
|
%define SEG_CONST .rdata align=16
|
|
%else
|
|
%define SEG_TEXT .text align=16 public use64 class=CODE
|
|
%define SEG_CONST .rdata align=16 public use64 class=CONST
|
|
%endif
|
|
%define EXTN(name) name ; foo() -> foo
|
|
|
|
%elifdef OBJ32 ; ----(nasm -fobj -DOBJ32 ...)----------
|
|
; * Borland C++ (Win32)
|
|
|
|
; -- segment definition --
|
|
;
|
|
%define SEG_TEXT _text align=16 public use32 class=CODE
|
|
%define SEG_CONST _data align=16 public use32 class=DATA
|
|
|
|
%elifdef ELF ; ----(nasm -felf[64] -DELF ...)------------
|
|
; * Linux
|
|
; * *BSD family Unix using elf format
|
|
; * Unix System V, including Solaris x86, UnixWare and SCO Unix
|
|
|
|
; mark stack as non-executable
|
|
section .note.GNU-stack noalloc noexec nowrite progbits
|
|
|
|
; -- segment definition --
|
|
;
|
|
%ifdef __x86_64__
|
|
%define SEG_TEXT .text progbits align=16
|
|
%define SEG_CONST .rodata progbits align=16
|
|
%else
|
|
%define SEG_TEXT .text progbits alloc exec nowrite align=16
|
|
%define SEG_CONST .rodata progbits alloc noexec nowrite align=16
|
|
%endif
|
|
|
|
; To make the code position-independent, append -DPIC to the commandline
|
|
;
|
|
%define GOT_SYMBOL _GLOBAL_OFFSET_TABLE_ ; ELF supports PIC
|
|
%define EXTN(name) name ; foo() -> foo
|
|
|
|
%elifdef AOUT ; ----(nasm -faoutb/aout -DAOUT ...)----
|
|
; * Older Linux using a.out format (nasm -f aout -DAOUT ...)
|
|
; * *BSD family Unix using a.out format (nasm -f aoutb -DAOUT ...)
|
|
|
|
; -- segment definition --
|
|
;
|
|
%define SEG_TEXT .text
|
|
%define SEG_CONST .data
|
|
|
|
; To make the code position-independent, append -DPIC to the commandline
|
|
;
|
|
%define GOT_SYMBOL __GLOBAL_OFFSET_TABLE_ ; BSD-style a.out supports PIC
|
|
|
|
%elifdef MACHO ; ----(nasm -fmacho -DMACHO ...)--------
|
|
; * NeXTstep/OpenStep/Rhapsody/Darwin/MacOS X (Mach-O format)
|
|
|
|
; -- segment definition --
|
|
;
|
|
%define SEG_TEXT .text ;align=16 ; nasm doesn't accept align=16. why?
|
|
%define SEG_CONST .rodata align=16
|
|
|
|
; The generation of position-independent code (PIC) is the default on Darwin.
|
|
;
|
|
%define PIC
|
|
%define GOT_SYMBOL _MACHO_PIC_ ; Mach-O style code-relative addressing
|
|
|
|
%else ; ----(Other case)----------------------
|
|
|
|
; -- segment definition --
|
|
;
|
|
%define SEG_TEXT .text
|
|
%define SEG_CONST .data
|
|
|
|
%endif ; ----------------------------------------------
|
|
|
|
; ==========================================================================
|
|
|
|
; --------------------------------------------------------------------------
|
|
; Common types
|
|
;
|
|
%ifdef __x86_64__
|
|
%define POINTER qword ; general pointer type
|
|
%define SIZEOF_POINTER SIZEOF_QWORD ; sizeof(POINTER)
|
|
%define POINTER_BIT QWORD_BIT ; sizeof(POINTER)*BYTE_BIT
|
|
%else
|
|
%define POINTER dword ; general pointer type
|
|
%define SIZEOF_POINTER SIZEOF_DWORD ; sizeof(POINTER)
|
|
%define POINTER_BIT DWORD_BIT ; sizeof(POINTER)*BYTE_BIT
|
|
%endif
|
|
|
|
%define INT dword ; signed integer type
|
|
%define SIZEOF_INT SIZEOF_DWORD ; sizeof(INT)
|
|
%define INT_BIT DWORD_BIT ; sizeof(INT)*BYTE_BIT
|
|
|
|
%define FP32 dword ; IEEE754 single
|
|
%define SIZEOF_FP32 SIZEOF_DWORD ; sizeof(FP32)
|
|
%define FP32_BIT DWORD_BIT ; sizeof(FP32)*BYTE_BIT
|
|
|
|
%define MMWORD qword ; int64 (MMX register)
|
|
%define SIZEOF_MMWORD SIZEOF_QWORD ; sizeof(MMWORD)
|
|
%define MMWORD_BIT QWORD_BIT ; sizeof(MMWORD)*BYTE_BIT
|
|
|
|
; NASM is buggy and doesn't properly handle operand sizes for SSE
|
|
; instructions, so for now we have to define XMMWORD as blank.
|
|
%define XMMWORD ; int128 (SSE register)
|
|
%define SIZEOF_XMMWORD SIZEOF_OWORD ; sizeof(XMMWORD)
|
|
%define XMMWORD_BIT OWORD_BIT ; sizeof(XMMWORD)*BYTE_BIT
|
|
|
|
; Similar hacks for when we load a dword or MMWORD into an xmm# register
|
|
%define XMM_DWORD
|
|
%define XMM_MMWORD
|
|
|
|
%define SIZEOF_BYTE 1 ; sizeof(BYTE)
|
|
%define SIZEOF_WORD 2 ; sizeof(WORD)
|
|
%define SIZEOF_DWORD 4 ; sizeof(DWORD)
|
|
%define SIZEOF_QWORD 8 ; sizeof(QWORD)
|
|
%define SIZEOF_OWORD 16 ; sizeof(OWORD)
|
|
|
|
%define BYTE_BIT 8 ; CHAR_BIT in C
|
|
%define WORD_BIT 16 ; sizeof(WORD)*BYTE_BIT
|
|
%define DWORD_BIT 32 ; sizeof(DWORD)*BYTE_BIT
|
|
%define QWORD_BIT 64 ; sizeof(QWORD)*BYTE_BIT
|
|
%define OWORD_BIT 128 ; sizeof(OWORD)*BYTE_BIT
|
|
|
|
; --------------------------------------------------------------------------
|
|
; External Symbol Name
|
|
;
|
|
%ifndef EXTN
|
|
%define EXTN(name) _ %+ name ; foo() -> _foo
|
|
%endif
|
|
|
|
; --------------------------------------------------------------------------
|
|
; Macros for position-independent code (PIC) support
|
|
;
|
|
%ifndef GOT_SYMBOL
|
|
%undef PIC
|
|
%endif
|
|
|
|
%ifdef PIC ; -------------------------------------------
|
|
|
|
%ifidn GOT_SYMBOL,_MACHO_PIC_ ; --------------------
|
|
|
|
; At present, nasm doesn't seem to support PIC generation for Mach-O.
|
|
; The PIC support code below is a little tricky.
|
|
|
|
SECTION SEG_CONST
|
|
const_base:
|
|
|
|
%define GOTOFF(got,sym) (got) + (sym) - const_base
|
|
|
|
%imacro get_GOT 1
|
|
; NOTE: this macro destroys ecx resister.
|
|
call %%geteip
|
|
add ecx, byte (%%ref - $)
|
|
jmp short %%adjust
|
|
%%geteip:
|
|
mov ecx, POINTER [esp]
|
|
ret
|
|
%%adjust:
|
|
push ebp
|
|
xor ebp,ebp ; ebp = 0
|
|
%ifidni %1,ebx ; (%1 == ebx)
|
|
; db 0x8D,0x9C + jmp near const_base =
|
|
; lea ebx, [ecx+ebp*8+(const_base-%%ref)] ; 8D,9C,E9,(offset32)
|
|
db 0x8D,0x9C ; 8D,9C
|
|
jmp near const_base ; E9,(const_base-%%ref)
|
|
%%ref:
|
|
%else ; (%1 != ebx)
|
|
; db 0x8D,0x8C + jmp near const_base =
|
|
; lea ecx, [ecx+ebp*8+(const_base-%%ref)] ; 8D,8C,E9,(offset32)
|
|
db 0x8D,0x8C ; 8D,8C
|
|
jmp near const_base ; E9,(const_base-%%ref)
|
|
%%ref: mov %1, ecx
|
|
%endif ; (%1 == ebx)
|
|
pop ebp
|
|
%endmacro
|
|
|
|
%else ; GOT_SYMBOL != _MACHO_PIC_ ----------------
|
|
|
|
%define GOTOFF(got,sym) (got) + (sym) wrt ..gotoff
|
|
|
|
%imacro get_GOT 1
|
|
extern GOT_SYMBOL
|
|
call %%geteip
|
|
add %1, GOT_SYMBOL + $$ - $ wrt ..gotpc
|
|
jmp short %%done
|
|
%%geteip:
|
|
mov %1, POINTER [esp]
|
|
ret
|
|
%%done:
|
|
%endmacro
|
|
|
|
%endif ; GOT_SYMBOL == _MACHO_PIC_ ----------------
|
|
|
|
%imacro pushpic 1.nolist
|
|
push %1
|
|
%endmacro
|
|
%imacro poppic 1.nolist
|
|
pop %1
|
|
%endmacro
|
|
%imacro movpic 2.nolist
|
|
mov %1,%2
|
|
%endmacro
|
|
|
|
%else ; !PIC -----------------------------------------
|
|
|
|
%define GOTOFF(got,sym) (sym)
|
|
|
|
%imacro get_GOT 1.nolist
|
|
%endmacro
|
|
%imacro pushpic 1.nolist
|
|
%endmacro
|
|
%imacro poppic 1.nolist
|
|
%endmacro
|
|
%imacro movpic 2.nolist
|
|
%endmacro
|
|
|
|
%endif ; PIC -----------------------------------------
|
|
|
|
; --------------------------------------------------------------------------
|
|
; Align the next instruction on {2,4,8,16,..}-byte boundary.
|
|
; ".balign n,,m" in GNU as
|
|
;
|
|
%define MSKLE(x,y) (~(((y) & 0xFFFF) - ((x) & 0xFFFF)) >> 16)
|
|
%define FILLB(b,n) (($$-(b)) & ((n)-1))
|
|
|
|
%imacro alignx 1-2.nolist 0xFFFF
|
|
%%bs: times MSKLE(FILLB(%%bs,%1),%2) & MSKLE(16,FILLB($,%1)) & FILLB($,%1) \
|
|
db 0x90 ; nop
|
|
times MSKLE(FILLB(%%bs,%1),%2) & FILLB($,%1)/9 \
|
|
db 0x8D,0x9C,0x23,0x00,0x00,0x00,0x00 ; lea ebx,[ebx+0x00000000]
|
|
times MSKLE(FILLB(%%bs,%1),%2) & FILLB($,%1)/7 \
|
|
db 0x8D,0xAC,0x25,0x00,0x00,0x00,0x00 ; lea ebp,[ebp+0x00000000]
|
|
times MSKLE(FILLB(%%bs,%1),%2) & FILLB($,%1)/6 \
|
|
db 0x8D,0xAD,0x00,0x00,0x00,0x00 ; lea ebp,[ebp+0x00000000]
|
|
times MSKLE(FILLB(%%bs,%1),%2) & FILLB($,%1)/4 \
|
|
db 0x8D,0x6C,0x25,0x00 ; lea ebp,[ebp+0x00]
|
|
times MSKLE(FILLB(%%bs,%1),%2) & FILLB($,%1)/3 \
|
|
db 0x8D,0x6D,0x00 ; lea ebp,[ebp+0x00]
|
|
times MSKLE(FILLB(%%bs,%1),%2) & FILLB($,%1)/2 \
|
|
db 0x8B,0xED ; mov ebp,ebp
|
|
times MSKLE(FILLB(%%bs,%1),%2) & FILLB($,%1)/1 \
|
|
db 0x90 ; nop
|
|
%endmacro
|
|
|
|
; Align the next data on {2,4,8,16,..}-byte boundary.
|
|
;
|
|
%imacro alignz 1.nolist
|
|
align %1, db 0 ; filling zeros
|
|
%endmacro
|
|
|
|
%ifdef __x86_64__
|
|
|
|
%ifdef WIN64
|
|
|
|
%imacro collect_args 0
|
|
push r12
|
|
push r13
|
|
push r14
|
|
push r15
|
|
mov r10, rcx
|
|
mov r11, rdx
|
|
mov r12, r8
|
|
mov r13, r9
|
|
mov r14, [rax+48]
|
|
mov r15, [rax+56]
|
|
push rsi
|
|
push rdi
|
|
sub rsp, SIZEOF_XMMWORD
|
|
movaps XMMWORD [rsp], xmm6
|
|
sub rsp, SIZEOF_XMMWORD
|
|
movaps XMMWORD [rsp], xmm7
|
|
%endmacro
|
|
|
|
%imacro uncollect_args 0
|
|
movaps xmm7, XMMWORD [rsp]
|
|
add rsp, SIZEOF_XMMWORD
|
|
movaps xmm6, XMMWORD [rsp]
|
|
add rsp, SIZEOF_XMMWORD
|
|
pop rdi
|
|
pop rsi
|
|
pop r15
|
|
pop r14
|
|
pop r13
|
|
pop r12
|
|
%endmacro
|
|
|
|
%else
|
|
|
|
%imacro collect_args 0
|
|
push r10
|
|
push r11
|
|
push r12
|
|
push r13
|
|
push r14
|
|
push r15
|
|
mov r10, rdi
|
|
mov r11, rsi
|
|
mov r12, rdx
|
|
mov r13, rcx
|
|
mov r14, r8
|
|
mov r15, r9
|
|
%endmacro
|
|
|
|
%imacro uncollect_args 0
|
|
pop r15
|
|
pop r14
|
|
pop r13
|
|
pop r12
|
|
pop r11
|
|
pop r10
|
|
%endmacro
|
|
|
|
%endif
|
|
|
|
%endif
|
|
|
|
; --------------------------------------------------------------------------
|
|
; Defines picked up from the C headers
|
|
;
|
|
%include "jsimdcfg.inc"
|
|
|
|
; --------------------------------------------------------------------------
|