VirtualBox

source: vbox/trunk/src/VBox/Runtime/common/asm/ASMMemFirstMismatchingU8.asm@ 93943

Last change on this file since 93943 was 93115, checked in by vboxsync, 3 years ago

scm --update-copyright-year

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 8.5 KB
Line 
1; $Id: ASMMemFirstMismatchingU8.asm 93115 2022-01-01 11:31:46Z vboxsync $
2;; @file
3; IPRT - ASMMemFirstMismatchingU8().
4;
5
6;
7; Copyright (C) 2006-2022 Oracle Corporation
8;
9; This file is part of VirtualBox Open Source Edition (OSE), as
10; available from http://www.virtualbox.org. This file is free software;
11; you can redistribute it and/or modify it under the terms of the GNU
12; General Public License (GPL) as published by the Free Software
13; Foundation, in version 2 as it comes in the "COPYING" file of the
14; VirtualBox OSE distribution. VirtualBox OSE is distributed in the
15; hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
16;
17; The contents of this file may alternatively be used under the terms
18; of the Common Development and Distribution License Version 1.0
19; (CDDL) only, as it comes in the "COPYING.CDDL" file of the
20; VirtualBox OSE distribution, in which case the provisions of the
21; CDDL are applicable instead of those of the GPL.
22;
23; You may elect to license modified versions of this file under the
24; terms and conditions of either the GPL or the CDDL or both.
25;
26
27
28;*******************************************************************************
29;* Header Files *
30;*******************************************************************************
31%define RT_ASM_WITH_SEH64
32%include "iprt/asmdefs.mac"
33
34
35BEGINCODE
36
37;;
38; Variant of ASMMemFirstMismatchingU8 with a fixed @a u8 value.
39; We repeat the prolog and join the generic function.
40;
41RT_BEGINPROC ASMMemFirstNonZero
42 ;
43 ; Prologue.
44 ;
45%if ARCH_BITS != 64
46 push xBP
47 mov xBP, xSP
48 push xDI
49 %if ARCH_BITS == 16
50 push es
51 %endif
52%elifdef ASM_CALL64_MSC
53 mov r9, rdi ; save rdi in r9
54%endif
55SEH64_END_PROLOGUE
56
57 ;
58 ; Normalize input; rdi=pv, rcx=cb, rax=0
59 ;
60 %if ARCH_BITS == 64
61 %ifdef ASM_CALL64_MSC
62 mov rdi, rcx
63 mov rcx, rdx
64 jrcxz RT_CONCAT(NAME(ASMMemFirstMismatchingU8),.return_all_same)
65 xor eax, eax
66 %else
67 mov rcx, rsi
68 jrcxz RT_CONCAT(NAME(ASMMemFirstMismatchingU8),.return_all_same)
69 xor eax, eax
70 %endif
71
72 %elif ARCH_BITS == 32
73 mov ecx, [ebp + 0ch]
74 jecxz RT_CONCAT(NAME(ASMMemFirstMismatchingU8),.return_all_same)
75 mov edi, [ebp + 08h]
76 xor eax, eax
77
78 %elif ARCH_BITS == 16
79 mov cx, [bp + 08h] ; cb
80 jcxz RT_CONCAT(NAME(ASMMemFirstMismatchingU8),.return16_all_same)
81 les di, [bp + 04h] ; pv (far)
82 xor ax, ax
83
84 %else
85 %error "Invalid ARCH_BITS value"
86 %endif
87
88 ;
89 ; Join ASMMemFirstMismatchingU8
90 ;
91 jmp RT_CONCAT(NAME(ASMMemFirstMismatchingU8),.is_all_zero_joining)
92ENDPROC ASMMemFirstNonZero
93
94
95;;
96; Inverted memchr.
97;
98; @returns Pointer to the byte which doesn't equal u8.
99; @returns NULL if all equal to u8.
100;
101; @param msc:rcx gcc:rdi pv Pointer to the memory block.
102; @param msc:rdx gcc:rsi cb Number of bytes in the block. This MUST be aligned on 32-bit!
103; @param msc:r8b gcc:dl u8 The value it's supposed to be filled with.
104;
105; @cproto DECLINLINE(void *) ASMMemFirstMismatchingU8(void const *pv, size_t cb, uint8_t u8)
106;
107RT_BEGINPROC ASMMemFirstMismatchingU8
108 ;
109 ; Prologue.
110 ;
111%if ARCH_BITS != 64
112 push xBP
113 mov xBP, xSP
114 push xDI
115 %if ARCH_BITS == 16
116 push es
117 %endif
118%elifdef ASM_CALL64_MSC
119 mov r9, rdi ; save rdi in r9
120%endif
121SEH64_END_PROLOGUE
122
123%if ARCH_BITS != 16
124 ;
125 ; The 32-bit and 64-bit variant of the code.
126 ;
127
128 ; Normalize input; rdi=pv, rcx=cb, rax=eight-times-u8
129 %if ARCH_BITS == 64
130 %ifdef ASM_CALL64_MSC
131 mov rdi, rcx
132 mov rcx, rdx
133 jrcxz .return_all_same
134 movzx r8d, r8b
135 mov rax, qword 0101010101010101h
136 imul rax, r8
137 %else
138 mov rcx, rsi
139 jrcxz .return_all_same
140 movzx edx, dl
141 mov rax, qword 0101010101010101h
142 imul rax, rdx
143 %endif
144
145 %elif ARCH_BITS == 32
146 mov ecx, [ebp + 0ch]
147 jecxz .return_all_same
148 mov edi, [ebp + 08h]
149 movzx eax, byte [ebp + 10h]
150 mov ah, al
151 movzx edx, ax
152 shl eax, 16
153 or eax, edx
154 %else
155 %error "Invalid ARCH_BITS value"
156 %endif
157
158.is_all_zero_joining:
159 cld
160
161 ; Unaligned pointer? Align it (elsewhere).
162 test edi, xCB - 1
163 jnz .unaligned_pv
164.aligned_pv:
165
166 ; Do the dword/qword scan.
167 mov edx, xCB - 1
168 and edx, ecx ; Remaining bytes for tail scan
169 %if ARCH_BITS == 64
170 shr xCX, 3
171 repe scasq
172 %else
173 shr xCX, 2
174 repe scasd
175 %endif
176 jne .multibyte_mismatch
177
178 ; Prep for tail scan.
179 mov ecx, edx
180
181 ;
182 ; Byte by byte scan.
183 ;
184.byte_by_byte:
185 repe scasb
186 jne .return_xDI
187
188.return_all_same:
189 xor eax, eax
190 %ifdef ASM_CALL64_MSC
191 mov rdi, r9 ; restore rdi
192 %elif ARCH_BITS == 32
193 pop edi
194 leave
195 %endif
196 ret
197
198 ; Return after byte scan mismatch.
199.return_xDI:
200 lea xAX, [xDI - 1]
201 %ifdef ASM_CALL64_MSC
202 mov rdi, r9 ; restore rdi
203 %elif ARCH_BITS == 32
204 pop edi
205 leave
206 %endif
207 ret
208
209 ;
210 ; Multibyte mismatch. We rewind and do a byte scan of the remainder.
211 ; (can't just search the qword as the buffer must be considered volatile).
212 ;
213.multibyte_mismatch:
214 lea xDI, [xDI - xCB]
215 lea xCX, [xCX * xCB + xCB]
216 or ecx, edx
217 jmp .byte_by_byte
218
219 ;
220 ; Unaligned pointer. If it's worth it, align the pointer, but if the
221 ; memory block is too small do the byte scan variant.
222 ;
223.unaligned_pv:
224 cmp xCX, 4*xCB ; 4 steps seems reasonable.
225 jbe .byte_by_byte
226
227 ; Unrolled buffer realignment.
228 %if ARCH_BITS == 64
229 dec xCX
230 scasb
231 jne .return_xDI
232 test edi, xCB - 1
233 jz .aligned_pv
234
235 dec xCX
236 scasb
237 jne .return_xDI
238 test edi, xCB - 1
239 jz .aligned_pv
240
241 dec xCX
242 scasb
243 jne .return_xDI
244 test edi, xCB - 1
245 jz .aligned_pv
246
247 dec xCX
248 scasb
249 jne .return_xDI
250 test edi, xCB - 1
251 jz .aligned_pv
252 %endif
253
254 dec xCX
255 scasb
256 jne .return_xDI
257 test edi, xCB - 1
258 jz .aligned_pv
259
260 dec xCX
261 scasb
262 jne .return_xDI
263 test edi, xCB - 1
264 jz .aligned_pv
265
266 dec xCX
267 scasb
268 jne .return_xDI
269 jmp .aligned_pv
270
271
272%else ; ARCH_BITS == 16
273
274 ;
275 ; The 16-bit variant of the code is a little simpler since we're
276 ; working with two byte words in the 'fast' scan. We also keep
277 ; this separate from the 32-bit/64-bit code because that allows
278 ; avoid a few rex prefixes here and there by using extended
279 ; registers (e??) where we don't care about the whole register.
280 ;
281CPU 8086
282
283 ; Load input parameters.
284 mov cx, [bp + 08h] ; cb
285 jcxz .return16_all_same
286 les di, [bp + 04h] ; pv (far)
287 mov al, [bp + 0ah] ; u8
288 mov ah, al
289
290.is_all_zero_joining:
291 cld
292
293 ; Align the pointer.
294 test di, 1
295 jz .word_scan
296
297 dec cx
298 scasb
299 jne .return16_di
300 jcxz .return16_all_same
301
302 ; Scan word-by-word.
303.word_scan:
304 mov dx, cx
305 shr cx, 1
306 repe scasw
307 jne .word_mismatch
308
309 ; do we have a tail byte?
310 test dl, 1
311 jz .return16_all_same
312 scasb
313 jne .return16_di
314
315.return16_all_same:
316 xor ax, ax
317 xor dx, dx
318.return16:
319 pop es
320 pop di
321 pop bp
322 ret
323
324.word_mismatch:
325 ; back up a word.
326 inc cx
327 sub di, 2
328
329 ; Do byte-by-byte scanning of the rest of the buffer.
330 shl cx, 1
331 mov dl, 1
332 and dl, [bp + 08h] ; cb
333 or cl, dl
334 repe scasb
335 je .return16_all_same
336
337.return16_di:
338 mov ax, di
339 dec ax
340 mov dx, es
341 jmp .return16
342
343%endif ; ARCH_BITS == 16
344ENDPROC ASMMemFirstMismatchingU8
345
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette