VirtualBox

source: vbox/trunk/src/libs/openssl-3.1.3/engines/asm/e_padlock-x86_64.pl@ 102797

Last change on this file since 102797 was 101211, checked in by vboxsync, 15 months ago

openssl-3.1.3: Applied and adjusted our OpenSSL changes to 3.1.2. bugref:10527

File size: 13.1 KB
Line 
1#! /usr/bin/env perl
2# Copyright 2011-2023 The OpenSSL Project Authors. All Rights Reserved.
3#
4# Licensed under the Apache License 2.0 (the "License"). You may not use
5# this file except in compliance with the License. You can obtain a copy
6# in the file LICENSE in the source distribution or at
7# https://www.openssl.org/source/license.html
8
9
10# ====================================================================
11# Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
12# project. The module is, however, dual licensed under OpenSSL and
13# CRYPTOGAMS licenses depending on where you obtain it. For further
14# details see http://www.openssl.org/~appro/cryptogams/.
15# ====================================================================
16
17# September 2011
18#
19# Assembler helpers for Padlock engine. See even e_padlock-x86.pl for
20# details.
21
22# $output is the last argument if it looks like a file (it has an extension)
23# $flavour is the first argument if it doesn't look like a file
24$output = $#ARGV >= 0 && $ARGV[$#ARGV] =~ m|\.\w+$| ? pop : undef;
25$flavour = $#ARGV >= 0 && $ARGV[0] !~ m|\.| ? shift : undef;
26
27$win64=0; $win64=1 if ($flavour =~ /[nm]asm|mingw64/ || $output =~ /\.asm$/);
28
29$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
30( $xlate="${dir}x86_64-xlate.pl" and -f $xlate ) or
31( $xlate="${dir}../../crypto/perlasm/x86_64-xlate.pl" and -f $xlate) or
32die "can't locate x86_64-xlate.pl";
33
34open OUT,"| \"$^X\" \"$xlate\" $flavour \"$output\""
35 or die "can't call $xlate: $!";
36*STDOUT=*OUT;
37
38$code=".text\n";
39
40%PADLOCK_PREFETCH=(ecb=>128, cbc=>64, ctr32=>32); # prefetch errata
41$PADLOCK_CHUNK=512; # Must be a power of 2 between 32 and 2^20
42
43$ctx="%rdx";
44$out="%rdi";
45$inp="%rsi";
46$len="%rcx";
47$chunk="%rbx";
48
49($arg1,$arg2,$arg3,$arg4)=$win64?("%rcx","%rdx","%r8", "%r9") : # Win64 order
50 ("%rdi","%rsi","%rdx","%rcx"); # Unix order
51
52$code.=<<___;
53.globl padlock_capability
54.type padlock_capability,\@abi-omnipotent
55.align 16
56padlock_capability:
57 mov %rbx,%r8
58 xor %eax,%eax
59 cpuid
60 xor %eax,%eax
61 cmp \$`"0x".unpack("H*",'tneC')`,%ebx
62 jne .Lzhaoxin
63 cmp \$`"0x".unpack("H*",'Hrua')`,%edx
64 jne .Lnoluck
65 cmp \$`"0x".unpack("H*",'slua')`,%ecx
66 jne .Lnoluck
67 jmp .LzhaoxinEnd
68.Lzhaoxin:
69 cmp \$`"0x".unpack("H*",'hS ')`,%ebx
70 jne .Lnoluck
71 cmp \$`"0x".unpack("H*",'hgna')`,%edx
72 jne .Lnoluck
73 cmp \$`"0x".unpack("H*",' ia')`,%ecx
74 jne .Lnoluck
75.LzhaoxinEnd:
76 mov \$0xC0000000,%eax
77 cpuid
78 mov %eax,%edx
79 xor %eax,%eax
80 cmp \$0xC0000001,%edx
81 jb .Lnoluck
82 mov \$0xC0000001,%eax
83 cpuid
84 mov %edx,%eax
85 and \$0xffffffef,%eax
86 or \$0x10,%eax # set Nano bit#4
87.Lnoluck:
88 mov %r8,%rbx
89 ret
90.size padlock_capability,.-padlock_capability
91
92.globl padlock_key_bswap
93.type padlock_key_bswap,\@abi-omnipotent,0
94.align 16
95padlock_key_bswap:
96 mov 240($arg1),%edx
97 inc %edx
98 shl \$2,%edx
99.Lbswap_loop:
100 mov ($arg1),%eax
101 bswap %eax
102 mov %eax,($arg1)
103 lea 4($arg1),$arg1
104 sub \$1,%edx
105 jnz .Lbswap_loop
106 ret
107.size padlock_key_bswap,.-padlock_key_bswap
108
109.globl padlock_verify_context
110.type padlock_verify_context,\@abi-omnipotent
111.align 16
112padlock_verify_context:
113 mov $arg1,$ctx
114 pushf
115 lea .Lpadlock_saved_context(%rip),%rax
116 call _padlock_verify_ctx
117 lea 8(%rsp),%rsp
118 ret
119.size padlock_verify_context,.-padlock_verify_context
120
121.type _padlock_verify_ctx,\@abi-omnipotent
122.align 16
123_padlock_verify_ctx:
124 mov 8(%rsp),%r8
125 bt \$30,%r8
126 jnc .Lverified
127 cmp (%rax),$ctx
128 je .Lverified
129 pushf
130 popf
131.Lverified:
132 mov $ctx,(%rax)
133 ret
134.size _padlock_verify_ctx,.-_padlock_verify_ctx
135
136.globl padlock_reload_key
137.type padlock_reload_key,\@abi-omnipotent
138.align 16
139padlock_reload_key:
140 pushf
141 popf
142 ret
143.size padlock_reload_key,.-padlock_reload_key
144
145.globl padlock_aes_block
146.type padlock_aes_block,\@function,3
147.align 16
148padlock_aes_block:
149 mov %rbx,%r8
150 mov \$1,$len
151 lea 32($ctx),%rbx # key
152 lea 16($ctx),$ctx # control word
153 .byte 0xf3,0x0f,0xa7,0xc8 # rep xcryptecb
154 mov %r8,%rbx
155 ret
156.size padlock_aes_block,.-padlock_aes_block
157
158.globl padlock_xstore
159.type padlock_xstore,\@function,2
160.align 16
161padlock_xstore:
162 mov %esi,%edx
163 .byte 0x0f,0xa7,0xc0 # xstore
164 ret
165.size padlock_xstore,.-padlock_xstore
166
167.globl padlock_sha1_oneshot
168.type padlock_sha1_oneshot,\@function,3
169.align 16
170padlock_sha1_oneshot:
171 mov %rdx,%rcx
172 mov %rdi,%rdx # put aside %rdi
173 movups (%rdi),%xmm0 # copy-in context
174 sub \$128+8,%rsp
175 mov 16(%rdi),%eax
176 movaps %xmm0,(%rsp)
177 mov %rsp,%rdi
178 mov %eax,16(%rsp)
179 xor %rax,%rax
180 .byte 0xf3,0x0f,0xa6,0xc8 # rep xsha1
181 movaps (%rsp),%xmm0
182 mov 16(%rsp),%eax
183 add \$128+8,%rsp
184 movups %xmm0,(%rdx) # copy-out context
185 mov %eax,16(%rdx)
186 ret
187.size padlock_sha1_oneshot,.-padlock_sha1_oneshot
188
189.globl padlock_sha1_blocks
190.type padlock_sha1_blocks,\@function,3
191.align 16
192padlock_sha1_blocks:
193 mov %rdx,%rcx
194 mov %rdi,%rdx # put aside %rdi
195 movups (%rdi),%xmm0 # copy-in context
196 sub \$128+8,%rsp
197 mov 16(%rdi),%eax
198 movaps %xmm0,(%rsp)
199 mov %rsp,%rdi
200 mov %eax,16(%rsp)
201 mov \$-1,%rax
202 .byte 0xf3,0x0f,0xa6,0xc8 # rep xsha1
203 movaps (%rsp),%xmm0
204 mov 16(%rsp),%eax
205 add \$128+8,%rsp
206 movups %xmm0,(%rdx) # copy-out context
207 mov %eax,16(%rdx)
208 ret
209.size padlock_sha1_blocks,.-padlock_sha1_blocks
210
211.globl padlock_sha256_oneshot
212.type padlock_sha256_oneshot,\@function,3
213.align 16
214padlock_sha256_oneshot:
215 mov %rdx,%rcx
216 mov %rdi,%rdx # put aside %rdi
217 movups (%rdi),%xmm0 # copy-in context
218 sub \$128+8,%rsp
219 movups 16(%rdi),%xmm1
220 movaps %xmm0,(%rsp)
221 mov %rsp,%rdi
222 movaps %xmm1,16(%rsp)
223 xor %rax,%rax
224 .byte 0xf3,0x0f,0xa6,0xd0 # rep xsha256
225 movaps (%rsp),%xmm0
226 movaps 16(%rsp),%xmm1
227 add \$128+8,%rsp
228 movups %xmm0,(%rdx) # copy-out context
229 movups %xmm1,16(%rdx)
230 ret
231.size padlock_sha256_oneshot,.-padlock_sha256_oneshot
232
233.globl padlock_sha256_blocks
234.type padlock_sha256_blocks,\@function,3
235.align 16
236padlock_sha256_blocks:
237 mov %rdx,%rcx
238 mov %rdi,%rdx # put aside %rdi
239 movups (%rdi),%xmm0 # copy-in context
240 sub \$128+8,%rsp
241 movups 16(%rdi),%xmm1
242 movaps %xmm0,(%rsp)
243 mov %rsp,%rdi
244 movaps %xmm1,16(%rsp)
245 mov \$-1,%rax
246 .byte 0xf3,0x0f,0xa6,0xd0 # rep xsha256
247 movaps (%rsp),%xmm0
248 movaps 16(%rsp),%xmm1
249 add \$128+8,%rsp
250 movups %xmm0,(%rdx) # copy-out context
251 movups %xmm1,16(%rdx)
252 ret
253.size padlock_sha256_blocks,.-padlock_sha256_blocks
254
255.globl padlock_sha512_blocks
256.type padlock_sha512_blocks,\@function,3
257.align 16
258padlock_sha512_blocks:
259 mov %rdx,%rcx
260 mov %rdi,%rdx # put aside %rdi
261 movups (%rdi),%xmm0 # copy-in context
262 sub \$128+8,%rsp
263 movups 16(%rdi),%xmm1
264 movups 32(%rdi),%xmm2
265 movups 48(%rdi),%xmm3
266 movaps %xmm0,(%rsp)
267 mov %rsp,%rdi
268 movaps %xmm1,16(%rsp)
269 movaps %xmm2,32(%rsp)
270 movaps %xmm3,48(%rsp)
271 .byte 0xf3,0x0f,0xa6,0xe0 # rep xha512
272 movaps (%rsp),%xmm0
273 movaps 16(%rsp),%xmm1
274 movaps 32(%rsp),%xmm2
275 movaps 48(%rsp),%xmm3
276 add \$128+8,%rsp
277 movups %xmm0,(%rdx) # copy-out context
278 movups %xmm1,16(%rdx)
279 movups %xmm2,32(%rdx)
280 movups %xmm3,48(%rdx)
281 ret
282.size padlock_sha512_blocks,.-padlock_sha512_blocks
283___
284
285sub generate_mode {
286my ($mode,$opcode) = @_;
287# int padlock_$mode_encrypt(void *out, const void *inp,
288# struct padlock_cipher_data *ctx, size_t len);
289$code.=<<___;
290.globl padlock_${mode}_encrypt
291.type padlock_${mode}_encrypt,\@function,4
292.align 16
293padlock_${mode}_encrypt:
294 push %rbp
295 push %rbx
296
297 xor %eax,%eax
298 test \$15,$ctx
299 jnz .L${mode}_abort
300 test \$15,$len
301 jnz .L${mode}_abort
302 lea .Lpadlock_saved_context(%rip),%rax
303 pushf
304 cld
305 call _padlock_verify_ctx
306 lea 16($ctx),$ctx # control word
307 xor %eax,%eax
308 xor %ebx,%ebx
309 testl \$`1<<5`,($ctx) # align bit in control word
310 jnz .L${mode}_aligned
311 test \$0x0f,$out
312 setz %al # !out_misaligned
313 test \$0x0f,$inp
314 setz %bl # !inp_misaligned
315 test %ebx,%eax
316 jnz .L${mode}_aligned
317 neg %rax
318 mov \$$PADLOCK_CHUNK,$chunk
319 not %rax # out_misaligned?-1:0
320 lea (%rsp),%rbp
321 cmp $chunk,$len
322 cmovc $len,$chunk # chunk=len>PADLOCK_CHUNK?PADLOCK_CHUNK:len
323 and $chunk,%rax # out_misaligned?chunk:0
324 mov $len,$chunk
325 neg %rax
326 and \$$PADLOCK_CHUNK-1,$chunk # chunk%=PADLOCK_CHUNK
327 lea (%rax,%rbp),%rsp
328 mov \$$PADLOCK_CHUNK,%rax
329 cmovz %rax,$chunk # chunk=chunk?:PADLOCK_CHUNK
330___
331$code.=<<___ if ($mode eq "ctr32");
332.L${mode}_reenter:
333 mov -4($ctx),%eax # pull 32-bit counter
334 bswap %eax
335 neg %eax
336 and \$`$PADLOCK_CHUNK/16-1`,%eax
337 mov \$$PADLOCK_CHUNK,$chunk
338 shl \$4,%eax
339 cmovz $chunk,%rax
340 cmp %rax,$len
341 cmova %rax,$chunk # don't let counter cross PADLOCK_CHUNK
342 cmovbe $len,$chunk
343___
344$code.=<<___ if ($PADLOCK_PREFETCH{$mode});
345 cmp $chunk,$len
346 ja .L${mode}_loop
347 mov $inp,%rax # check if prefetch crosses page
348 cmp %rsp,%rbp
349 cmove $out,%rax
350 add $len,%rax
351 neg %rax
352 and \$0xfff,%rax # distance to page boundary
353 cmp \$$PADLOCK_PREFETCH{$mode},%rax
354 mov \$-$PADLOCK_PREFETCH{$mode},%rax
355 cmovae $chunk,%rax # mask=distance<prefetch?-prefetch:-1
356 and %rax,$chunk
357 jz .L${mode}_unaligned_tail
358___
359$code.=<<___;
360 jmp .L${mode}_loop
361.align 16
362.L${mode}_loop:
363 cmp $len,$chunk # ctr32 artefact
364 cmova $len,$chunk # ctr32 artefact
365 mov $out,%r8 # save parameters
366 mov $inp,%r9
367 mov $len,%r10
368 mov $chunk,$len
369 mov $chunk,%r11
370 test \$0x0f,$out # out_misaligned
371 cmovnz %rsp,$out
372 test \$0x0f,$inp # inp_misaligned
373 jz .L${mode}_inp_aligned
374 shr \$3,$len
375 .byte 0xf3,0x48,0xa5 # rep movsq
376 sub $chunk,$out
377 mov $chunk,$len
378 mov $out,$inp
379.L${mode}_inp_aligned:
380 lea -16($ctx),%rax # ivp
381 lea 16($ctx),%rbx # key
382 shr \$4,$len
383 .byte 0xf3,0x0f,0xa7,$opcode # rep xcrypt*
384___
385$code.=<<___ if ($mode !~ /ecb|ctr/);
386 movdqa (%rax),%xmm0
387 movdqa %xmm0,-16($ctx) # copy [or refresh] iv
388___
389$code.=<<___ if ($mode eq "ctr32");
390 mov -4($ctx),%eax # pull 32-bit counter
391 test \$0xffff0000,%eax
392 jnz .L${mode}_no_carry
393 bswap %eax
394 add \$0x10000,%eax
395 bswap %eax
396 mov %eax,-4($ctx)
397.L${mode}_no_carry:
398___
399$code.=<<___;
400 mov %r8,$out # restore parameters
401 mov %r11,$chunk
402 test \$0x0f,$out
403 jz .L${mode}_out_aligned
404 mov $chunk,$len
405 lea (%rsp),$inp
406 shr \$3,$len
407 .byte 0xf3,0x48,0xa5 # rep movsq
408 sub $chunk,$out
409.L${mode}_out_aligned:
410 mov %r9,$inp
411 mov %r10,$len
412 add $chunk,$out
413 add $chunk,$inp
414 sub $chunk,$len
415 mov \$$PADLOCK_CHUNK,$chunk
416___
417 if (!$PADLOCK_PREFETCH{$mode}) {
418$code.=<<___;
419 jnz .L${mode}_loop
420___
421 } else {
422$code.=<<___;
423 jz .L${mode}_break
424 cmp $chunk,$len
425 jae .L${mode}_loop
426___
427$code.=<<___ if ($mode eq "ctr32");
428 mov $len,$chunk
429 mov $inp,%rax # check if prefetch crosses page
430 cmp %rsp,%rbp
431 cmove $out,%rax
432 add $len,%rax
433 neg %rax
434 and \$0xfff,%rax # distance to page boundary
435 cmp \$$PADLOCK_PREFETCH{$mode},%rax
436 mov \$-$PADLOCK_PREFETCH{$mode},%rax
437 cmovae $chunk,%rax
438 and %rax,$chunk
439 jnz .L${mode}_loop
440___
441$code.=<<___;
442.L${mode}_unaligned_tail:
443 xor %eax,%eax
444 cmp %rsp,%rbp
445 cmove $len,%rax
446 mov $out,%r8 # save parameters
447 mov $len,$chunk
448 sub %rax,%rsp # alloca
449 shr \$3,$len
450 lea (%rsp),$out
451 .byte 0xf3,0x48,0xa5 # rep movsq
452 mov %rsp,$inp
453 mov %r8, $out # restore parameters
454 mov $chunk,$len
455 jmp .L${mode}_loop
456.align 16
457.L${mode}_break:
458___
459 }
460$code.=<<___;
461 cmp %rbp,%rsp
462 je .L${mode}_done
463
464 pxor %xmm0,%xmm0
465 lea (%rsp),%rax
466.L${mode}_bzero:
467 movaps %xmm0,(%rax)
468 lea 16(%rax),%rax
469 cmp %rax,%rbp
470 ja .L${mode}_bzero
471
472.L${mode}_done:
473 lea (%rbp),%rsp
474 jmp .L${mode}_exit
475
476.align 16
477.L${mode}_aligned:
478___
479$code.=<<___ if ($mode eq "ctr32");
480 mov -4($ctx),%eax # pull 32-bit counter
481 bswap %eax
482 neg %eax
483 and \$0xffff,%eax
484 mov \$`16*0x10000`,$chunk
485 shl \$4,%eax
486 cmovz $chunk,%rax
487 cmp %rax,$len
488 cmova %rax,$chunk # don't let counter cross 2^16
489 cmovbe $len,$chunk
490 jbe .L${mode}_aligned_skip
491
492.L${mode}_aligned_loop:
493 mov $len,%r10 # save parameters
494 mov $chunk,$len
495 mov $chunk,%r11
496
497 lea -16($ctx),%rax # ivp
498 lea 16($ctx),%rbx # key
499 shr \$4,$len # len/=AES_BLOCK_SIZE
500 .byte 0xf3,0x0f,0xa7,$opcode # rep xcrypt*
501
502 mov -4($ctx),%eax # pull 32-bit counter
503 bswap %eax
504 add \$0x10000,%eax
505 bswap %eax
506 mov %eax,-4($ctx)
507
508 mov %r10,$len # restore parameters
509 sub %r11,$len
510 mov \$`16*0x10000`,$chunk
511 jz .L${mode}_exit
512 cmp $chunk,$len
513 jae .L${mode}_aligned_loop
514
515.L${mode}_aligned_skip:
516___
517$code.=<<___ if ($PADLOCK_PREFETCH{$mode});
518 lea ($inp,$len),%rbp
519 neg %rbp
520 and \$0xfff,%rbp # distance to page boundary
521 xor %eax,%eax
522 cmp \$$PADLOCK_PREFETCH{$mode},%rbp
523 mov \$$PADLOCK_PREFETCH{$mode}-1,%rbp
524 cmovae %rax,%rbp
525 and $len,%rbp # remainder
526 sub %rbp,$len
527 jz .L${mode}_aligned_tail
528___
529$code.=<<___;
530 lea -16($ctx),%rax # ivp
531 lea 16($ctx),%rbx # key
532 shr \$4,$len # len/=AES_BLOCK_SIZE
533 .byte 0xf3,0x0f,0xa7,$opcode # rep xcrypt*
534___
535$code.=<<___ if ($mode !~ /ecb|ctr/);
536 movdqa (%rax),%xmm0
537 movdqa %xmm0,-16($ctx) # copy [or refresh] iv
538___
539$code.=<<___ if ($PADLOCK_PREFETCH{$mode});
540 test %rbp,%rbp # check remainder
541 jz .L${mode}_exit
542
543.L${mode}_aligned_tail:
544 mov $out,%r8
545 mov %rbp,$chunk
546 mov %rbp,$len
547 lea (%rsp),%rbp
548 sub $len,%rsp
549 shr \$3,$len
550 lea (%rsp),$out
551 .byte 0xf3,0x48,0xa5 # rep movsq
552 lea (%r8),$out
553 lea (%rsp),$inp
554 mov $chunk,$len
555 jmp .L${mode}_loop
556___
557$code.=<<___;
558.L${mode}_exit:
559 mov \$1,%eax
560 lea 8(%rsp),%rsp
561.L${mode}_abort:
562 pop %rbx
563 pop %rbp
564 ret
565.size padlock_${mode}_encrypt,.-padlock_${mode}_encrypt
566___
567}
568
569&generate_mode("ecb",0xc8);
570&generate_mode("cbc",0xd0);
571&generate_mode("cfb",0xe0);
572&generate_mode("ofb",0xe8);
573&generate_mode("ctr32",0xd8); # all 64-bit CPUs have working CTR...
574
575$code.=<<___;
576.asciz "VIA Padlock x86_64 module, CRYPTOGAMS by <appro\@openssl.org>"
577.align 16
578.data
579.align 8
580.Lpadlock_saved_context:
581 .quad 0
582___
583$code =~ s/\`([^\`]*)\`/eval($1)/gem;
584
585print $code;
586
587close STDOUT;
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette