3 ; This file contains both 32 and 64 bit versions of the functions.
4 ; Copyright (c) Love Nystrom.
6 TITLE 32/64 bit assembly routines.
17 ;==============================================================================
18 ; Common x86/x64 - Constants and parameterless functions
19 ;==============================================================================
22 ; Common code and data...
25 ;==============================================================================
26 IF (_WIN64 eq 0) ; 32 bit version
27 ;==============================================================================
28 ECHO === 32-BIT ASSEMBLY (ML) ===
31 ; Argument-passing: Right to left. By value, unless pointer or reference.
32 ; Stack-maintenance: Called function pops the stack.
33 ; Name-decoration: An underscore (_) is prefixed to the name. The name is
34 ; followed by the at sign (@) followed by the number of bytes (in decimal)
35 ; in the argument list. In other words, a function declared as
36 ; int func( int a, double b ); is decorated as: _func@12
42 ; EXTERN_C COLORREF GradientColor( COLORREF c1, COLORREF c2, WORD Ix, WORD Length )
44 ; GradientColor procuces a smooth color crossfade from c1 to c2.
45 ; Length is the desired run length from c1 to c2, and Ix is the 0-based step nr.
47 ; BYTE r1 = GetRValue( c1 );
48 ; BYTE g1 = GetGValue( c1 );
49 ; BYTE b1 = GetBValue( c1 );
50 ; int dr = int(GetRValue( c2 )) - r1;
51 ; int dg = int(GetGValue( c2 )) - g1;
52 ; int db = int(GetBValue( c2 )) - b1;
53 ; int N = Length-1; // Divisor need to comply with Ix 0..Length-1
54 ; int r = int(r1) + ((dr * int(Ix)) / N);
55 ; int g = int(g1) + ((dg * int(Ix)) / N);
56 ; int b = int(b1) + ((db * int(Ix)) / N);
57 ; return RGB( byte(r), byte(g), byte(b) );
59 ; PONDER: This could probably get much faster by use of some SIMD ops.
60 ; AGH, can't use SIMD since there's no packed div..
62 _GradientColor PROC ;; Implementation 1, plain CPU, no MMX
66 sub esp, 8 ; [esp] = N = Length-1
69 mov ecx, [ebp+12] ; c2
70 ; int N = Length-1; // Divisor need to comply with Ix 0..Length-1
71 mov eax, [ebp+20] ; Length
73 mov [esp], eax ; [esp] = N = Length-1
75 mov [esp+4], eax ; [esp+4] = Result
78 ; BYTE r1 = GetRValue( c1 );
79 ; int dr = int(GetRValue( c2 )) - r1;
83 ; int r = int(r1) + ((dr * int(Ix)) / N);
84 imul word ptr [ebp+16] ; Ix
85 idiv word ptr [esp] ; N
87 mov byte ptr [esp+4], al ; r
94 imul word ptr [ebp+16] ; Ix
95 idiv word ptr [esp] ; N
97 mov byte ptr [esp+5], al ; g
104 imul word ptr [ebp+16] ; Ix
105 idiv word ptr [esp] ; N
107 mov byte ptr [esp+6], al ; b
109 mov eax, [esp+4] ; [Result]
117 IF 0 ;; FIXME: Saturation problem in 'div bh' when result would be > 255
118 ;; Meanwhile use the C++ version in GdiUtil.cpp
120 PUBLIC _ScaleColorRef@12 ; COLORREF __stdcall ScaleColorRef( COLORREF rgb, BYTE mul, BYTE div );
122 _ScaleColorRef@12 PROC ; Linetest OK
123 ; Sadly, it can't be done in MMX since there's no packed div.
127 mov esi, [esp+12] ; rgb
128 xor edx, edx ; result
129 mov bl, [esp+16] ; mul
130 mov bh, [esp+20] ; div
136 ; If mul+div <= 2, return color unmodified.
137 ; E.g: mul or div == 0, mul and div == 1
141 mov ecx, 3 ; loop cnt
143 mov eax, esi ; Get color component into AL
144 or al, al ; Test color component
145 jz L2 ; Skip muldiv by zero color
146 mul bl ; AX <- AL * r8
147 cmp bh, 1 ; Test divisor
148 jbe L2 ; div <= 1, so don't div
149 div bh ; AL <- AX / r8, AH <- AX % r8
151 mov dl, al ; Get scaled color component
152 shl edx, 8 ; Shift scaled comp up
153 shr esi, 8 ; Shift to next color comp
157 shr eax, 8 ; Undo the last color shift
162 _ScaleColorRef@12 ENDP
167 ;==============================================================================
168 ELSE ; 64 bit version
169 ;==============================================================================
170 ECHO === 64-BIT ASSEMBLY (ML64) ===
172 ; Extern C assembly routines does NOT get an added underscore with MSVC + ML64.
173 ; Hence the x64 assembly routines must be named _exactly as the C prototypes_,
174 ; or, in case of C++ class members, the mangled C++ identifiers.
176 ; Fastcall is used regardless of prototype declaration!
177 ; Arguments -> RCX, RDX, R8, R9, then stack.
179 ; The four register args are backed by unused stack cells.
180 ; Ergo, after std prologue the fifth argument is at [RBP+48].
182 ; Normal fastcall stack cleanup convention (function pop args) is *not used*.
183 ; Functions end with 'ret 0' even if they had stack args.
185 ; RAX, RCX, RDX, R8, R9, R10, R11 are considered volatile.
186 ; RBX, RBP, RDI, RSI, RSP, R12, R13, R14, and R15 are nonvolatile
187 ; and must be saved and restored by a function that use them.
189 ; frame$ = 10h ; Offset from rbp to first shadow arg after 'enter 0,0'
190 ; x64 fastcall arguments:
191 ; rcx = arg1 (rbp+10h)
192 ; rdx = arg2 (rbp+18h)
193 ; r8 = arg3 (rbp+20h)
194 ; r9 = arg4 (rbp+28h)
201 ; COLORREF GradientColor( COLORREF c1, COLORREF c2, WORD Ix, WORD Length )
209 mov r10d, edx ; r10d = c2
210 sub rsp, 8 ; [rsp] = result (need bytewise access).
212 dec r9w ; r9w = N = Length-1
214 mov [rsp], eax ; [rsp] = Result
219 sub ax, dx ; ax = dr = r2-r1
223 mov byte ptr [rsp], al ; R
233 mov byte ptr [rsp+1], al ; G
243 mov byte ptr [esp+2], al ; B
245 mov eax, [rsp] ; [Result]
248 ret ; Caller balances the stack