1 ;==----------------------------------------------------------------------------
2 ; Efficient _bswap16/32/64 implementations.
3 ; Dual mode (32/64 bit) assembly source.
4 ; Copyright (c) Love Nystrom
5 ;==----------------------------------------------------------------------------
7 TITLE Efficient _bswap16/32/64 implementations for 32 and 64 bit targets.
13 .686P ; Change this if you need to
14 .XMM ; Change this if you need to
18 ;==============================================================================
19 ; Common x86/x64 - Constants and parameterless functions
20 ;==============================================================================
23 ; No common code or data.
26 ;==============================================================================
27 IF (_WIN64 eq 0) ; 32 bit version
28 ;==============================================================================
29 ECHO === 32-BIT ASSEMBLY (ML) ===
33 ;; Vanilla byte order swappers (x86)
35 PUBLIC __bswap16 ; extern "C" UINT16 _cdecl _bswap16( UINT16 W )
36 PUBLIC __bswap32 ; extern "C" UINT32 _cdecl _bswap32( UINT32 Val )
37 PUBLIC __bswap64 ; extern "C" UINT64 _cdecl _bswap64( UINT64 Val )
40 mov ax, word ptr [esp+4]
46 mov eax, dword ptr [esp+4]
52 mov edx, dword ptr [esp+4]
53 mov eax, dword ptr [esp+8]
59 ;; COLORREF / RGBQUAD byte order swappers (x86)
61 PUBLIC __swapRGB ; extern "C" DWORD __cdecl _swapRGB( DWORD rgb );
62 PUBLIC __swapRGB2 ; extern "C" DWORD __cdecl _swapRGB2( DWORD rgb );
64 __swapRGB PROC ; Preserves the high byte
65 mov eax, dword ptr [esp+4]
74 __swapRGB2 PROC ; Clears the high byte
75 mov eax, dword ptr [esp+4]
83 ;==============================================================================
85 ;==============================================================================
86 ECHO === 64-BIT ASSEMBLY (ML64) ===
88 ; Extern C assembly routines does NOT get an added underscore with MSVC + ML64.
89 ; Hence the x64 assembly routines must be named *_exactly as the C prototypes_*.
91 ; Fastcall is used regardless of prototype declaration!
92 ; Arguments -> RCX, RDX, R8, R9, then stack.
94 ; The four register args are backed by unused stack cells.
95 ; Ergo, after std prologue the fifth argument is at [RBP+48].
97 ; Normal fastcall stack cleanup convention (function pop args) is *not used*.
98 ; Functions end with 'ret 0' even if they had stack args.
100 ; RAX, RCX, RDX, R8, R9, R10, R11 are considered volatile.
101 ; RBX, RBP, RDI, RSI, RSP, R12, R13, R14, and R15 are nonvolatile
102 ; and must be saved and restored by a function that use them.
104 ; frame$ = 10h ; Offset from rbp to first shadow arg after 'enter 0,0'
105 ; x64 fastcall arguments:
106 ; rcx = arg1 (rbp+10h)
107 ; rdx = arg2 (rbp+18h)
108 ; r8 = arg3 (rbp+20h)
109 ; r9 = arg4 (rbp+28h)
110 ; [rbp+30h] = arg5 (rbp+30h)
114 ;; Vanilla byte order swappers (x64)
116 PUBLIC _bswap16 ; extern "C" UINT16 _bswap16( UINT16 W )
117 PUBLIC _bswap32 ; extern "C" UINT32 _bswap32( UINT32 Val )
118 PUBLIC _bswap64 ; extern "C" UINT64 _bswap64( UINT64 Val )
138 ;; COLORREF / RGBQUAD byte order swappers (x64)
140 PUBLIC _swapRGB ; extern "C" DWORD __cdecl _swapRGB( DWORD rgb );
141 PUBLIC _swapRGB2 ; extern "C" DWORD __cdecl _swapRGB2( DWORD rgb );
143 _swapRGB PROC ; Preserves the high byte
152 _swapRGB2 PROC ; Clears the high byte