VirtualBox

source: vbox/trunk/src/libs/openssl-3.0.1/crypto/genasm-macosx/aesni-x86_64.S@ 94081

Last change on this file since 94081 was 83531, checked in by vboxsync, 5 years ago

setting svn:sync-process=export for openssl-1.1.1f, all files except tests

File size: 80.1 KB
Line 
1.text
2
3.globl _aesni_encrypt
4
5.p2align 4
6_aesni_encrypt:
7
8 movups (%rdi),%xmm2
9 movl 240(%rdx),%eax
10 movups (%rdx),%xmm0
11 movups 16(%rdx),%xmm1
12 leaq 32(%rdx),%rdx
13 xorps %xmm0,%xmm2
14L$oop_enc1_1:
15.byte 102,15,56,220,209
16 decl %eax
17 movups (%rdx),%xmm1
18 leaq 16(%rdx),%rdx
19 jnz L$oop_enc1_1
20.byte 102,15,56,221,209
21 pxor %xmm0,%xmm0
22 pxor %xmm1,%xmm1
23 movups %xmm2,(%rsi)
24 pxor %xmm2,%xmm2
25 .byte 0xf3,0xc3
26
27
28
29.globl _aesni_decrypt
30
31.p2align 4
32_aesni_decrypt:
33
34 movups (%rdi),%xmm2
35 movl 240(%rdx),%eax
36 movups (%rdx),%xmm0
37 movups 16(%rdx),%xmm1
38 leaq 32(%rdx),%rdx
39 xorps %xmm0,%xmm2
40L$oop_dec1_2:
41.byte 102,15,56,222,209
42 decl %eax
43 movups (%rdx),%xmm1
44 leaq 16(%rdx),%rdx
45 jnz L$oop_dec1_2
46.byte 102,15,56,223,209
47 pxor %xmm0,%xmm0
48 pxor %xmm1,%xmm1
49 movups %xmm2,(%rsi)
50 pxor %xmm2,%xmm2
51 .byte 0xf3,0xc3
52
53
54
55.p2align 4
56_aesni_encrypt2:
57
58 movups (%rcx),%xmm0
59 shll $4,%eax
60 movups 16(%rcx),%xmm1
61 xorps %xmm0,%xmm2
62 xorps %xmm0,%xmm3
63 movups 32(%rcx),%xmm0
64 leaq 32(%rcx,%rax,1),%rcx
65 negq %rax
66 addq $16,%rax
67
68L$enc_loop2:
69.byte 102,15,56,220,209
70.byte 102,15,56,220,217
71 movups (%rcx,%rax,1),%xmm1
72 addq $32,%rax
73.byte 102,15,56,220,208
74.byte 102,15,56,220,216
75 movups -16(%rcx,%rax,1),%xmm0
76 jnz L$enc_loop2
77
78.byte 102,15,56,220,209
79.byte 102,15,56,220,217
80.byte 102,15,56,221,208
81.byte 102,15,56,221,216
82 .byte 0xf3,0xc3
83
84
85
86.p2align 4
87_aesni_decrypt2:
88
89 movups (%rcx),%xmm0
90 shll $4,%eax
91 movups 16(%rcx),%xmm1
92 xorps %xmm0,%xmm2
93 xorps %xmm0,%xmm3
94 movups 32(%rcx),%xmm0
95 leaq 32(%rcx,%rax,1),%rcx
96 negq %rax
97 addq $16,%rax
98
99L$dec_loop2:
100.byte 102,15,56,222,209
101.byte 102,15,56,222,217
102 movups (%rcx,%rax,1),%xmm1
103 addq $32,%rax
104.byte 102,15,56,222,208
105.byte 102,15,56,222,216
106 movups -16(%rcx,%rax,1),%xmm0
107 jnz L$dec_loop2
108
109.byte 102,15,56,222,209
110.byte 102,15,56,222,217
111.byte 102,15,56,223,208
112.byte 102,15,56,223,216
113 .byte 0xf3,0xc3
114
115
116
117.p2align 4
118_aesni_encrypt3:
119
120 movups (%rcx),%xmm0
121 shll $4,%eax
122 movups 16(%rcx),%xmm1
123 xorps %xmm0,%xmm2
124 xorps %xmm0,%xmm3
125 xorps %xmm0,%xmm4
126 movups 32(%rcx),%xmm0
127 leaq 32(%rcx,%rax,1),%rcx
128 negq %rax
129 addq $16,%rax
130
131L$enc_loop3:
132.byte 102,15,56,220,209
133.byte 102,15,56,220,217
134.byte 102,15,56,220,225
135 movups (%rcx,%rax,1),%xmm1
136 addq $32,%rax
137.byte 102,15,56,220,208
138.byte 102,15,56,220,216
139.byte 102,15,56,220,224
140 movups -16(%rcx,%rax,1),%xmm0
141 jnz L$enc_loop3
142
143.byte 102,15,56,220,209
144.byte 102,15,56,220,217
145.byte 102,15,56,220,225
146.byte 102,15,56,221,208
147.byte 102,15,56,221,216
148.byte 102,15,56,221,224
149 .byte 0xf3,0xc3
150
151
152
153.p2align 4
154_aesni_decrypt3:
155
156 movups (%rcx),%xmm0
157 shll $4,%eax
158 movups 16(%rcx),%xmm1
159 xorps %xmm0,%xmm2
160 xorps %xmm0,%xmm3
161 xorps %xmm0,%xmm4
162 movups 32(%rcx),%xmm0
163 leaq 32(%rcx,%rax,1),%rcx
164 negq %rax
165 addq $16,%rax
166
167L$dec_loop3:
168.byte 102,15,56,222,209
169.byte 102,15,56,222,217
170.byte 102,15,56,222,225
171 movups (%rcx,%rax,1),%xmm1
172 addq $32,%rax
173.byte 102,15,56,222,208
174.byte 102,15,56,222,216
175.byte 102,15,56,222,224
176 movups -16(%rcx,%rax,1),%xmm0
177 jnz L$dec_loop3
178
179.byte 102,15,56,222,209
180.byte 102,15,56,222,217
181.byte 102,15,56,222,225
182.byte 102,15,56,223,208
183.byte 102,15,56,223,216
184.byte 102,15,56,223,224
185 .byte 0xf3,0xc3
186
187
188
189.p2align 4
190_aesni_encrypt4:
191
192 movups (%rcx),%xmm0
193 shll $4,%eax
194 movups 16(%rcx),%xmm1
195 xorps %xmm0,%xmm2
196 xorps %xmm0,%xmm3
197 xorps %xmm0,%xmm4
198 xorps %xmm0,%xmm5
199 movups 32(%rcx),%xmm0
200 leaq 32(%rcx,%rax,1),%rcx
201 negq %rax
202.byte 0x0f,0x1f,0x00
203 addq $16,%rax
204
205L$enc_loop4:
206.byte 102,15,56,220,209
207.byte 102,15,56,220,217
208.byte 102,15,56,220,225
209.byte 102,15,56,220,233
210 movups (%rcx,%rax,1),%xmm1
211 addq $32,%rax
212.byte 102,15,56,220,208
213.byte 102,15,56,220,216
214.byte 102,15,56,220,224
215.byte 102,15,56,220,232
216 movups -16(%rcx,%rax,1),%xmm0
217 jnz L$enc_loop4
218
219.byte 102,15,56,220,209
220.byte 102,15,56,220,217
221.byte 102,15,56,220,225
222.byte 102,15,56,220,233
223.byte 102,15,56,221,208
224.byte 102,15,56,221,216
225.byte 102,15,56,221,224
226.byte 102,15,56,221,232
227 .byte 0xf3,0xc3
228
229
230
231.p2align 4
232_aesni_decrypt4:
233
234 movups (%rcx),%xmm0
235 shll $4,%eax
236 movups 16(%rcx),%xmm1
237 xorps %xmm0,%xmm2
238 xorps %xmm0,%xmm3
239 xorps %xmm0,%xmm4
240 xorps %xmm0,%xmm5
241 movups 32(%rcx),%xmm0
242 leaq 32(%rcx,%rax,1),%rcx
243 negq %rax
244.byte 0x0f,0x1f,0x00
245 addq $16,%rax
246
247L$dec_loop4:
248.byte 102,15,56,222,209
249.byte 102,15,56,222,217
250.byte 102,15,56,222,225
251.byte 102,15,56,222,233
252 movups (%rcx,%rax,1),%xmm1
253 addq $32,%rax
254.byte 102,15,56,222,208
255.byte 102,15,56,222,216
256.byte 102,15,56,222,224
257.byte 102,15,56,222,232
258 movups -16(%rcx,%rax,1),%xmm0
259 jnz L$dec_loop4
260
261.byte 102,15,56,222,209
262.byte 102,15,56,222,217
263.byte 102,15,56,222,225
264.byte 102,15,56,222,233
265.byte 102,15,56,223,208
266.byte 102,15,56,223,216
267.byte 102,15,56,223,224
268.byte 102,15,56,223,232
269 .byte 0xf3,0xc3
270
271
272
273.p2align 4
274_aesni_encrypt6:
275
276 movups (%rcx),%xmm0
277 shll $4,%eax
278 movups 16(%rcx),%xmm1
279 xorps %xmm0,%xmm2
280 pxor %xmm0,%xmm3
281 pxor %xmm0,%xmm4
282.byte 102,15,56,220,209
283 leaq 32(%rcx,%rax,1),%rcx
284 negq %rax
285.byte 102,15,56,220,217
286 pxor %xmm0,%xmm5
287 pxor %xmm0,%xmm6
288.byte 102,15,56,220,225
289 pxor %xmm0,%xmm7
290 movups (%rcx,%rax,1),%xmm0
291 addq $16,%rax
292 jmp L$enc_loop6_enter
293.p2align 4
294L$enc_loop6:
295.byte 102,15,56,220,209
296.byte 102,15,56,220,217
297.byte 102,15,56,220,225
298L$enc_loop6_enter:
299.byte 102,15,56,220,233
300.byte 102,15,56,220,241
301.byte 102,15,56,220,249
302 movups (%rcx,%rax,1),%xmm1
303 addq $32,%rax
304.byte 102,15,56,220,208
305.byte 102,15,56,220,216
306.byte 102,15,56,220,224
307.byte 102,15,56,220,232
308.byte 102,15,56,220,240
309.byte 102,15,56,220,248
310 movups -16(%rcx,%rax,1),%xmm0
311 jnz L$enc_loop6
312
313.byte 102,15,56,220,209
314.byte 102,15,56,220,217
315.byte 102,15,56,220,225
316.byte 102,15,56,220,233
317.byte 102,15,56,220,241
318.byte 102,15,56,220,249
319.byte 102,15,56,221,208
320.byte 102,15,56,221,216
321.byte 102,15,56,221,224
322.byte 102,15,56,221,232
323.byte 102,15,56,221,240
324.byte 102,15,56,221,248
325 .byte 0xf3,0xc3
326
327
328
329.p2align 4
330_aesni_decrypt6:
331
332 movups (%rcx),%xmm0
333 shll $4,%eax
334 movups 16(%rcx),%xmm1
335 xorps %xmm0,%xmm2
336 pxor %xmm0,%xmm3
337 pxor %xmm0,%xmm4
338.byte 102,15,56,222,209
339 leaq 32(%rcx,%rax,1),%rcx
340 negq %rax
341.byte 102,15,56,222,217
342 pxor %xmm0,%xmm5
343 pxor %xmm0,%xmm6
344.byte 102,15,56,222,225
345 pxor %xmm0,%xmm7
346 movups (%rcx,%rax,1),%xmm0
347 addq $16,%rax
348 jmp L$dec_loop6_enter
349.p2align 4
350L$dec_loop6:
351.byte 102,15,56,222,209
352.byte 102,15,56,222,217
353.byte 102,15,56,222,225
354L$dec_loop6_enter:
355.byte 102,15,56,222,233
356.byte 102,15,56,222,241
357.byte 102,15,56,222,249
358 movups (%rcx,%rax,1),%xmm1
359 addq $32,%rax
360.byte 102,15,56,222,208
361.byte 102,15,56,222,216
362.byte 102,15,56,222,224
363.byte 102,15,56,222,232
364.byte 102,15,56,222,240
365.byte 102,15,56,222,248
366 movups -16(%rcx,%rax,1),%xmm0
367 jnz L$dec_loop6
368
369.byte 102,15,56,222,209
370.byte 102,15,56,222,217
371.byte 102,15,56,222,225
372.byte 102,15,56,222,233
373.byte 102,15,56,222,241
374.byte 102,15,56,222,249
375.byte 102,15,56,223,208
376.byte 102,15,56,223,216
377.byte 102,15,56,223,224
378.byte 102,15,56,223,232
379.byte 102,15,56,223,240
380.byte 102,15,56,223,248
381 .byte 0xf3,0xc3
382
383
384
385.p2align 4
386_aesni_encrypt8:
387
388 movups (%rcx),%xmm0
389 shll $4,%eax
390 movups 16(%rcx),%xmm1
391 xorps %xmm0,%xmm2
392 xorps %xmm0,%xmm3
393 pxor %xmm0,%xmm4
394 pxor %xmm0,%xmm5
395 pxor %xmm0,%xmm6
396 leaq 32(%rcx,%rax,1),%rcx
397 negq %rax
398.byte 102,15,56,220,209
399 pxor %xmm0,%xmm7
400 pxor %xmm0,%xmm8
401.byte 102,15,56,220,217
402 pxor %xmm0,%xmm9
403 movups (%rcx,%rax,1),%xmm0
404 addq $16,%rax
405 jmp L$enc_loop8_inner
406.p2align 4
407L$enc_loop8:
408.byte 102,15,56,220,209
409.byte 102,15,56,220,217
410L$enc_loop8_inner:
411.byte 102,15,56,220,225
412.byte 102,15,56,220,233
413.byte 102,15,56,220,241
414.byte 102,15,56,220,249
415.byte 102,68,15,56,220,193
416.byte 102,68,15,56,220,201
417L$enc_loop8_enter:
418 movups (%rcx,%rax,1),%xmm1
419 addq $32,%rax
420.byte 102,15,56,220,208
421.byte 102,15,56,220,216
422.byte 102,15,56,220,224
423.byte 102,15,56,220,232
424.byte 102,15,56,220,240
425.byte 102,15,56,220,248
426.byte 102,68,15,56,220,192
427.byte 102,68,15,56,220,200
428 movups -16(%rcx,%rax,1),%xmm0
429 jnz L$enc_loop8
430
431.byte 102,15,56,220,209
432.byte 102,15,56,220,217
433.byte 102,15,56,220,225
434.byte 102,15,56,220,233
435.byte 102,15,56,220,241
436.byte 102,15,56,220,249
437.byte 102,68,15,56,220,193
438.byte 102,68,15,56,220,201
439.byte 102,15,56,221,208
440.byte 102,15,56,221,216
441.byte 102,15,56,221,224
442.byte 102,15,56,221,232
443.byte 102,15,56,221,240
444.byte 102,15,56,221,248
445.byte 102,68,15,56,221,192
446.byte 102,68,15,56,221,200
447 .byte 0xf3,0xc3
448
449
450
451.p2align 4
452_aesni_decrypt8:
453
454 movups (%rcx),%xmm0
455 shll $4,%eax
456 movups 16(%rcx),%xmm1
457 xorps %xmm0,%xmm2
458 xorps %xmm0,%xmm3
459 pxor %xmm0,%xmm4
460 pxor %xmm0,%xmm5
461 pxor %xmm0,%xmm6
462 leaq 32(%rcx,%rax,1),%rcx
463 negq %rax
464.byte 102,15,56,222,209
465 pxor %xmm0,%xmm7
466 pxor %xmm0,%xmm8
467.byte 102,15,56,222,217
468 pxor %xmm0,%xmm9
469 movups (%rcx,%rax,1),%xmm0
470 addq $16,%rax
471 jmp L$dec_loop8_inner
472.p2align 4
473L$dec_loop8:
474.byte 102,15,56,222,209
475.byte 102,15,56,222,217
476L$dec_loop8_inner:
477.byte 102,15,56,222,225
478.byte 102,15,56,222,233
479.byte 102,15,56,222,241
480.byte 102,15,56,222,249
481.byte 102,68,15,56,222,193
482.byte 102,68,15,56,222,201
483L$dec_loop8_enter:
484 movups (%rcx,%rax,1),%xmm1
485 addq $32,%rax
486.byte 102,15,56,222,208
487.byte 102,15,56,222,216
488.byte 102,15,56,222,224
489.byte 102,15,56,222,232
490.byte 102,15,56,222,240
491.byte 102,15,56,222,248
492.byte 102,68,15,56,222,192
493.byte 102,68,15,56,222,200
494 movups -16(%rcx,%rax,1),%xmm0
495 jnz L$dec_loop8
496
497.byte 102,15,56,222,209
498.byte 102,15,56,222,217
499.byte 102,15,56,222,225
500.byte 102,15,56,222,233
501.byte 102,15,56,222,241
502.byte 102,15,56,222,249
503.byte 102,68,15,56,222,193
504.byte 102,68,15,56,222,201
505.byte 102,15,56,223,208
506.byte 102,15,56,223,216
507.byte 102,15,56,223,224
508.byte 102,15,56,223,232
509.byte 102,15,56,223,240
510.byte 102,15,56,223,248
511.byte 102,68,15,56,223,192
512.byte 102,68,15,56,223,200
513 .byte 0xf3,0xc3
514
515
516.globl _aesni_ecb_encrypt
517
518.p2align 4
519_aesni_ecb_encrypt:
520
521 andq $-16,%rdx
522 jz L$ecb_ret
523
524 movl 240(%rcx),%eax
525 movups (%rcx),%xmm0
526 movq %rcx,%r11
527 movl %eax,%r10d
528 testl %r8d,%r8d
529 jz L$ecb_decrypt
530
531 cmpq $0x80,%rdx
532 jb L$ecb_enc_tail
533
534 movdqu (%rdi),%xmm2
535 movdqu 16(%rdi),%xmm3
536 movdqu 32(%rdi),%xmm4
537 movdqu 48(%rdi),%xmm5
538 movdqu 64(%rdi),%xmm6
539 movdqu 80(%rdi),%xmm7
540 movdqu 96(%rdi),%xmm8
541 movdqu 112(%rdi),%xmm9
542 leaq 128(%rdi),%rdi
543 subq $0x80,%rdx
544 jmp L$ecb_enc_loop8_enter
545.p2align 4
546L$ecb_enc_loop8:
547 movups %xmm2,(%rsi)
548 movq %r11,%rcx
549 movdqu (%rdi),%xmm2
550 movl %r10d,%eax
551 movups %xmm3,16(%rsi)
552 movdqu 16(%rdi),%xmm3
553 movups %xmm4,32(%rsi)
554 movdqu 32(%rdi),%xmm4
555 movups %xmm5,48(%rsi)
556 movdqu 48(%rdi),%xmm5
557 movups %xmm6,64(%rsi)
558 movdqu 64(%rdi),%xmm6
559 movups %xmm7,80(%rsi)
560 movdqu 80(%rdi),%xmm7
561 movups %xmm8,96(%rsi)
562 movdqu 96(%rdi),%xmm8
563 movups %xmm9,112(%rsi)
564 leaq 128(%rsi),%rsi
565 movdqu 112(%rdi),%xmm9
566 leaq 128(%rdi),%rdi
567L$ecb_enc_loop8_enter:
568
569 call _aesni_encrypt8
570
571 subq $0x80,%rdx
572 jnc L$ecb_enc_loop8
573
574 movups %xmm2,(%rsi)
575 movq %r11,%rcx
576 movups %xmm3,16(%rsi)
577 movl %r10d,%eax
578 movups %xmm4,32(%rsi)
579 movups %xmm5,48(%rsi)
580 movups %xmm6,64(%rsi)
581 movups %xmm7,80(%rsi)
582 movups %xmm8,96(%rsi)
583 movups %xmm9,112(%rsi)
584 leaq 128(%rsi),%rsi
585 addq $0x80,%rdx
586 jz L$ecb_ret
587
588L$ecb_enc_tail:
589 movups (%rdi),%xmm2
590 cmpq $0x20,%rdx
591 jb L$ecb_enc_one
592 movups 16(%rdi),%xmm3
593 je L$ecb_enc_two
594 movups 32(%rdi),%xmm4
595 cmpq $0x40,%rdx
596 jb L$ecb_enc_three
597 movups 48(%rdi),%xmm5
598 je L$ecb_enc_four
599 movups 64(%rdi),%xmm6
600 cmpq $0x60,%rdx
601 jb L$ecb_enc_five
602 movups 80(%rdi),%xmm7
603 je L$ecb_enc_six
604 movdqu 96(%rdi),%xmm8
605 xorps %xmm9,%xmm9
606 call _aesni_encrypt8
607 movups %xmm2,(%rsi)
608 movups %xmm3,16(%rsi)
609 movups %xmm4,32(%rsi)
610 movups %xmm5,48(%rsi)
611 movups %xmm6,64(%rsi)
612 movups %xmm7,80(%rsi)
613 movups %xmm8,96(%rsi)
614 jmp L$ecb_ret
615.p2align 4
616L$ecb_enc_one:
617 movups (%rcx),%xmm0
618 movups 16(%rcx),%xmm1
619 leaq 32(%rcx),%rcx
620 xorps %xmm0,%xmm2
621L$oop_enc1_3:
622.byte 102,15,56,220,209
623 decl %eax
624 movups (%rcx),%xmm1
625 leaq 16(%rcx),%rcx
626 jnz L$oop_enc1_3
627.byte 102,15,56,221,209
628 movups %xmm2,(%rsi)
629 jmp L$ecb_ret
630.p2align 4
631L$ecb_enc_two:
632 call _aesni_encrypt2
633 movups %xmm2,(%rsi)
634 movups %xmm3,16(%rsi)
635 jmp L$ecb_ret
636.p2align 4
637L$ecb_enc_three:
638 call _aesni_encrypt3
639 movups %xmm2,(%rsi)
640 movups %xmm3,16(%rsi)
641 movups %xmm4,32(%rsi)
642 jmp L$ecb_ret
643.p2align 4
644L$ecb_enc_four:
645 call _aesni_encrypt4
646 movups %xmm2,(%rsi)
647 movups %xmm3,16(%rsi)
648 movups %xmm4,32(%rsi)
649 movups %xmm5,48(%rsi)
650 jmp L$ecb_ret
651.p2align 4
652L$ecb_enc_five:
653 xorps %xmm7,%xmm7
654 call _aesni_encrypt6
655 movups %xmm2,(%rsi)
656 movups %xmm3,16(%rsi)
657 movups %xmm4,32(%rsi)
658 movups %xmm5,48(%rsi)
659 movups %xmm6,64(%rsi)
660 jmp L$ecb_ret
661.p2align 4
662L$ecb_enc_six:
663 call _aesni_encrypt6
664 movups %xmm2,(%rsi)
665 movups %xmm3,16(%rsi)
666 movups %xmm4,32(%rsi)
667 movups %xmm5,48(%rsi)
668 movups %xmm6,64(%rsi)
669 movups %xmm7,80(%rsi)
670 jmp L$ecb_ret
671
672.p2align 4
673L$ecb_decrypt:
674 cmpq $0x80,%rdx
675 jb L$ecb_dec_tail
676
677 movdqu (%rdi),%xmm2
678 movdqu 16(%rdi),%xmm3
679 movdqu 32(%rdi),%xmm4
680 movdqu 48(%rdi),%xmm5
681 movdqu 64(%rdi),%xmm6
682 movdqu 80(%rdi),%xmm7
683 movdqu 96(%rdi),%xmm8
684 movdqu 112(%rdi),%xmm9
685 leaq 128(%rdi),%rdi
686 subq $0x80,%rdx
687 jmp L$ecb_dec_loop8_enter
688.p2align 4
689L$ecb_dec_loop8:
690 movups %xmm2,(%rsi)
691 movq %r11,%rcx
692 movdqu (%rdi),%xmm2
693 movl %r10d,%eax
694 movups %xmm3,16(%rsi)
695 movdqu 16(%rdi),%xmm3
696 movups %xmm4,32(%rsi)
697 movdqu 32(%rdi),%xmm4
698 movups %xmm5,48(%rsi)
699 movdqu 48(%rdi),%xmm5
700 movups %xmm6,64(%rsi)
701 movdqu 64(%rdi),%xmm6
702 movups %xmm7,80(%rsi)
703 movdqu 80(%rdi),%xmm7
704 movups %xmm8,96(%rsi)
705 movdqu 96(%rdi),%xmm8
706 movups %xmm9,112(%rsi)
707 leaq 128(%rsi),%rsi
708 movdqu 112(%rdi),%xmm9
709 leaq 128(%rdi),%rdi
710L$ecb_dec_loop8_enter:
711
712 call _aesni_decrypt8
713
714 movups (%r11),%xmm0
715 subq $0x80,%rdx
716 jnc L$ecb_dec_loop8
717
718 movups %xmm2,(%rsi)
719 pxor %xmm2,%xmm2
720 movq %r11,%rcx
721 movups %xmm3,16(%rsi)
722 pxor %xmm3,%xmm3
723 movl %r10d,%eax
724 movups %xmm4,32(%rsi)
725 pxor %xmm4,%xmm4
726 movups %xmm5,48(%rsi)
727 pxor %xmm5,%xmm5
728 movups %xmm6,64(%rsi)
729 pxor %xmm6,%xmm6
730 movups %xmm7,80(%rsi)
731 pxor %xmm7,%xmm7
732 movups %xmm8,96(%rsi)
733 pxor %xmm8,%xmm8
734 movups %xmm9,112(%rsi)
735 pxor %xmm9,%xmm9
736 leaq 128(%rsi),%rsi
737 addq $0x80,%rdx
738 jz L$ecb_ret
739
740L$ecb_dec_tail:
741 movups (%rdi),%xmm2
742 cmpq $0x20,%rdx
743 jb L$ecb_dec_one
744 movups 16(%rdi),%xmm3
745 je L$ecb_dec_two
746 movups 32(%rdi),%xmm4
747 cmpq $0x40,%rdx
748 jb L$ecb_dec_three
749 movups 48(%rdi),%xmm5
750 je L$ecb_dec_four
751 movups 64(%rdi),%xmm6
752 cmpq $0x60,%rdx
753 jb L$ecb_dec_five
754 movups 80(%rdi),%xmm7
755 je L$ecb_dec_six
756 movups 96(%rdi),%xmm8
757 movups (%rcx),%xmm0
758 xorps %xmm9,%xmm9
759 call _aesni_decrypt8
760 movups %xmm2,(%rsi)
761 pxor %xmm2,%xmm2
762 movups %xmm3,16(%rsi)
763 pxor %xmm3,%xmm3
764 movups %xmm4,32(%rsi)
765 pxor %xmm4,%xmm4
766 movups %xmm5,48(%rsi)
767 pxor %xmm5,%xmm5
768 movups %xmm6,64(%rsi)
769 pxor %xmm6,%xmm6
770 movups %xmm7,80(%rsi)
771 pxor %xmm7,%xmm7
772 movups %xmm8,96(%rsi)
773 pxor %xmm8,%xmm8
774 pxor %xmm9,%xmm9
775 jmp L$ecb_ret
776.p2align 4
777L$ecb_dec_one:
778 movups (%rcx),%xmm0
779 movups 16(%rcx),%xmm1
780 leaq 32(%rcx),%rcx
781 xorps %xmm0,%xmm2
782L$oop_dec1_4:
783.byte 102,15,56,222,209
784 decl %eax
785 movups (%rcx),%xmm1
786 leaq 16(%rcx),%rcx
787 jnz L$oop_dec1_4
788.byte 102,15,56,223,209
789 movups %xmm2,(%rsi)
790 pxor %xmm2,%xmm2
791 jmp L$ecb_ret
792.p2align 4
793L$ecb_dec_two:
794 call _aesni_decrypt2
795 movups %xmm2,(%rsi)
796 pxor %xmm2,%xmm2
797 movups %xmm3,16(%rsi)
798 pxor %xmm3,%xmm3
799 jmp L$ecb_ret
800.p2align 4
801L$ecb_dec_three:
802 call _aesni_decrypt3
803 movups %xmm2,(%rsi)
804 pxor %xmm2,%xmm2
805 movups %xmm3,16(%rsi)
806 pxor %xmm3,%xmm3
807 movups %xmm4,32(%rsi)
808 pxor %xmm4,%xmm4
809 jmp L$ecb_ret
810.p2align 4
811L$ecb_dec_four:
812 call _aesni_decrypt4
813 movups %xmm2,(%rsi)
814 pxor %xmm2,%xmm2
815 movups %xmm3,16(%rsi)
816 pxor %xmm3,%xmm3
817 movups %xmm4,32(%rsi)
818 pxor %xmm4,%xmm4
819 movups %xmm5,48(%rsi)
820 pxor %xmm5,%xmm5
821 jmp L$ecb_ret
822.p2align 4
823L$ecb_dec_five:
824 xorps %xmm7,%xmm7
825 call _aesni_decrypt6
826 movups %xmm2,(%rsi)
827 pxor %xmm2,%xmm2
828 movups %xmm3,16(%rsi)
829 pxor %xmm3,%xmm3
830 movups %xmm4,32(%rsi)
831 pxor %xmm4,%xmm4
832 movups %xmm5,48(%rsi)
833 pxor %xmm5,%xmm5
834 movups %xmm6,64(%rsi)
835 pxor %xmm6,%xmm6
836 pxor %xmm7,%xmm7
837 jmp L$ecb_ret
838.p2align 4
839L$ecb_dec_six:
840 call _aesni_decrypt6
841 movups %xmm2,(%rsi)
842 pxor %xmm2,%xmm2
843 movups %xmm3,16(%rsi)
844 pxor %xmm3,%xmm3
845 movups %xmm4,32(%rsi)
846 pxor %xmm4,%xmm4
847 movups %xmm5,48(%rsi)
848 pxor %xmm5,%xmm5
849 movups %xmm6,64(%rsi)
850 pxor %xmm6,%xmm6
851 movups %xmm7,80(%rsi)
852 pxor %xmm7,%xmm7
853
854L$ecb_ret:
855 xorps %xmm0,%xmm0
856 pxor %xmm1,%xmm1
857 .byte 0xf3,0xc3
858
859
860.globl _aesni_ccm64_encrypt_blocks
861
862.p2align 4
863_aesni_ccm64_encrypt_blocks:
864 movl 240(%rcx),%eax
865 movdqu (%r8),%xmm6
866 movdqa L$increment64(%rip),%xmm9
867 movdqa L$bswap_mask(%rip),%xmm7
868
869 shll $4,%eax
870 movl $16,%r10d
871 leaq 0(%rcx),%r11
872 movdqu (%r9),%xmm3
873 movdqa %xmm6,%xmm2
874 leaq 32(%rcx,%rax,1),%rcx
875.byte 102,15,56,0,247
876 subq %rax,%r10
877 jmp L$ccm64_enc_outer
878.p2align 4
879L$ccm64_enc_outer:
880 movups (%r11),%xmm0
881 movq %r10,%rax
882 movups (%rdi),%xmm8
883
884 xorps %xmm0,%xmm2
885 movups 16(%r11),%xmm1
886 xorps %xmm8,%xmm0
887 xorps %xmm0,%xmm3
888 movups 32(%r11),%xmm0
889
890L$ccm64_enc2_loop:
891.byte 102,15,56,220,209
892.byte 102,15,56,220,217
893 movups (%rcx,%rax,1),%xmm1
894 addq $32,%rax
895.byte 102,15,56,220,208
896.byte 102,15,56,220,216
897 movups -16(%rcx,%rax,1),%xmm0
898 jnz L$ccm64_enc2_loop
899.byte 102,15,56,220,209
900.byte 102,15,56,220,217
901 paddq %xmm9,%xmm6
902 decq %rdx
903.byte 102,15,56,221,208
904.byte 102,15,56,221,216
905
906 leaq 16(%rdi),%rdi
907 xorps %xmm2,%xmm8
908 movdqa %xmm6,%xmm2
909 movups %xmm8,(%rsi)
910.byte 102,15,56,0,215
911 leaq 16(%rsi),%rsi
912 jnz L$ccm64_enc_outer
913
914 pxor %xmm0,%xmm0
915 pxor %xmm1,%xmm1
916 pxor %xmm2,%xmm2
917 movups %xmm3,(%r9)
918 pxor %xmm3,%xmm3
919 pxor %xmm8,%xmm8
920 pxor %xmm6,%xmm6
921 .byte 0xf3,0xc3
922
923.globl _aesni_ccm64_decrypt_blocks
924
925.p2align 4
926_aesni_ccm64_decrypt_blocks:
927 movl 240(%rcx),%eax
928 movups (%r8),%xmm6
929 movdqu (%r9),%xmm3
930 movdqa L$increment64(%rip),%xmm9
931 movdqa L$bswap_mask(%rip),%xmm7
932
933 movaps %xmm6,%xmm2
934 movl %eax,%r10d
935 movq %rcx,%r11
936.byte 102,15,56,0,247
937 movups (%rcx),%xmm0
938 movups 16(%rcx),%xmm1
939 leaq 32(%rcx),%rcx
940 xorps %xmm0,%xmm2
941L$oop_enc1_5:
942.byte 102,15,56,220,209
943 decl %eax
944 movups (%rcx),%xmm1
945 leaq 16(%rcx),%rcx
946 jnz L$oop_enc1_5
947.byte 102,15,56,221,209
948 shll $4,%r10d
949 movl $16,%eax
950 movups (%rdi),%xmm8
951 paddq %xmm9,%xmm6
952 leaq 16(%rdi),%rdi
953 subq %r10,%rax
954 leaq 32(%r11,%r10,1),%rcx
955 movq %rax,%r10
956 jmp L$ccm64_dec_outer
957.p2align 4
958L$ccm64_dec_outer:
959 xorps %xmm2,%xmm8
960 movdqa %xmm6,%xmm2
961 movups %xmm8,(%rsi)
962 leaq 16(%rsi),%rsi
963.byte 102,15,56,0,215
964
965 subq $1,%rdx
966 jz L$ccm64_dec_break
967
968 movups (%r11),%xmm0
969 movq %r10,%rax
970 movups 16(%r11),%xmm1
971 xorps %xmm0,%xmm8
972 xorps %xmm0,%xmm2
973 xorps %xmm8,%xmm3
974 movups 32(%r11),%xmm0
975 jmp L$ccm64_dec2_loop
976.p2align 4
977L$ccm64_dec2_loop:
978.byte 102,15,56,220,209
979.byte 102,15,56,220,217
980 movups (%rcx,%rax,1),%xmm1
981 addq $32,%rax
982.byte 102,15,56,220,208
983.byte 102,15,56,220,216
984 movups -16(%rcx,%rax,1),%xmm0
985 jnz L$ccm64_dec2_loop
986 movups (%rdi),%xmm8
987 paddq %xmm9,%xmm6
988.byte 102,15,56,220,209
989.byte 102,15,56,220,217
990.byte 102,15,56,221,208
991.byte 102,15,56,221,216
992 leaq 16(%rdi),%rdi
993 jmp L$ccm64_dec_outer
994
995.p2align 4
996L$ccm64_dec_break:
997
998 movl 240(%r11),%eax
999 movups (%r11),%xmm0
1000 movups 16(%r11),%xmm1
1001 xorps %xmm0,%xmm8
1002 leaq 32(%r11),%r11
1003 xorps %xmm8,%xmm3
1004L$oop_enc1_6:
1005.byte 102,15,56,220,217
1006 decl %eax
1007 movups (%r11),%xmm1
1008 leaq 16(%r11),%r11
1009 jnz L$oop_enc1_6
1010.byte 102,15,56,221,217
1011 pxor %xmm0,%xmm0
1012 pxor %xmm1,%xmm1
1013 pxor %xmm2,%xmm2
1014 movups %xmm3,(%r9)
1015 pxor %xmm3,%xmm3
1016 pxor %xmm8,%xmm8
1017 pxor %xmm6,%xmm6
1018 .byte 0xf3,0xc3
1019
1020.globl _aesni_ctr32_encrypt_blocks
1021
1022.p2align 4
1023_aesni_ctr32_encrypt_blocks:
1024
1025 cmpq $1,%rdx
1026 jne L$ctr32_bulk
1027
1028
1029
1030 movups (%r8),%xmm2
1031 movups (%rdi),%xmm3
1032 movl 240(%rcx),%edx
1033 movups (%rcx),%xmm0
1034 movups 16(%rcx),%xmm1
1035 leaq 32(%rcx),%rcx
1036 xorps %xmm0,%xmm2
1037L$oop_enc1_7:
1038.byte 102,15,56,220,209
1039 decl %edx
1040 movups (%rcx),%xmm1
1041 leaq 16(%rcx),%rcx
1042 jnz L$oop_enc1_7
1043.byte 102,15,56,221,209
1044 pxor %xmm0,%xmm0
1045 pxor %xmm1,%xmm1
1046 xorps %xmm3,%xmm2
1047 pxor %xmm3,%xmm3
1048 movups %xmm2,(%rsi)
1049 xorps %xmm2,%xmm2
1050 jmp L$ctr32_epilogue
1051
1052.p2align 4
1053L$ctr32_bulk:
1054 leaq (%rsp),%r11
1055
1056 pushq %rbp
1057
1058 subq $128,%rsp
1059 andq $-16,%rsp
1060
1061
1062
1063
1064 movdqu (%r8),%xmm2
1065 movdqu (%rcx),%xmm0
1066 movl 12(%r8),%r8d
1067 pxor %xmm0,%xmm2
1068 movl 12(%rcx),%ebp
1069 movdqa %xmm2,0(%rsp)
1070 bswapl %r8d
1071 movdqa %xmm2,%xmm3
1072 movdqa %xmm2,%xmm4
1073 movdqa %xmm2,%xmm5
1074 movdqa %xmm2,64(%rsp)
1075 movdqa %xmm2,80(%rsp)
1076 movdqa %xmm2,96(%rsp)
1077 movq %rdx,%r10
1078 movdqa %xmm2,112(%rsp)
1079
1080 leaq 1(%r8),%rax
1081 leaq 2(%r8),%rdx
1082 bswapl %eax
1083 bswapl %edx
1084 xorl %ebp,%eax
1085 xorl %ebp,%edx
1086.byte 102,15,58,34,216,3
1087 leaq 3(%r8),%rax
1088 movdqa %xmm3,16(%rsp)
1089.byte 102,15,58,34,226,3
1090 bswapl %eax
1091 movq %r10,%rdx
1092 leaq 4(%r8),%r10
1093 movdqa %xmm4,32(%rsp)
1094 xorl %ebp,%eax
1095 bswapl %r10d
1096.byte 102,15,58,34,232,3
1097 xorl %ebp,%r10d
1098 movdqa %xmm5,48(%rsp)
1099 leaq 5(%r8),%r9
1100 movl %r10d,64+12(%rsp)
1101 bswapl %r9d
1102 leaq 6(%r8),%r10
1103 movl 240(%rcx),%eax
1104 xorl %ebp,%r9d
1105 bswapl %r10d
1106 movl %r9d,80+12(%rsp)
1107 xorl %ebp,%r10d
1108 leaq 7(%r8),%r9
1109 movl %r10d,96+12(%rsp)
1110 bswapl %r9d
1111 movl _OPENSSL_ia32cap_P+4(%rip),%r10d
1112 xorl %ebp,%r9d
1113 andl $71303168,%r10d
1114 movl %r9d,112+12(%rsp)
1115
1116 movups 16(%rcx),%xmm1
1117
1118 movdqa 64(%rsp),%xmm6
1119 movdqa 80(%rsp),%xmm7
1120
1121 cmpq $8,%rdx
1122 jb L$ctr32_tail
1123
1124 subq $6,%rdx
1125 cmpl $4194304,%r10d
1126 je L$ctr32_6x
1127
1128 leaq 128(%rcx),%rcx
1129 subq $2,%rdx
1130 jmp L$ctr32_loop8
1131
1132.p2align 4
1133L$ctr32_6x:
1134 shll $4,%eax
1135 movl $48,%r10d
1136 bswapl %ebp
1137 leaq 32(%rcx,%rax,1),%rcx
1138 subq %rax,%r10
1139 jmp L$ctr32_loop6
1140
1141.p2align 4
1142L$ctr32_loop6:
1143 addl $6,%r8d
1144 movups -48(%rcx,%r10,1),%xmm0
1145.byte 102,15,56,220,209
1146 movl %r8d,%eax
1147 xorl %ebp,%eax
1148.byte 102,15,56,220,217
1149.byte 0x0f,0x38,0xf1,0x44,0x24,12
1150 leal 1(%r8),%eax
1151.byte 102,15,56,220,225
1152 xorl %ebp,%eax
1153.byte 0x0f,0x38,0xf1,0x44,0x24,28
1154.byte 102,15,56,220,233
1155 leal 2(%r8),%eax
1156 xorl %ebp,%eax
1157.byte 102,15,56,220,241
1158.byte 0x0f,0x38,0xf1,0x44,0x24,44
1159 leal 3(%r8),%eax
1160.byte 102,15,56,220,249
1161 movups -32(%rcx,%r10,1),%xmm1
1162 xorl %ebp,%eax
1163
1164.byte 102,15,56,220,208
1165.byte 0x0f,0x38,0xf1,0x44,0x24,60
1166 leal 4(%r8),%eax
1167.byte 102,15,56,220,216
1168 xorl %ebp,%eax
1169.byte 0x0f,0x38,0xf1,0x44,0x24,76
1170.byte 102,15,56,220,224
1171 leal 5(%r8),%eax
1172 xorl %ebp,%eax
1173.byte 102,15,56,220,232
1174.byte 0x0f,0x38,0xf1,0x44,0x24,92
1175 movq %r10,%rax
1176.byte 102,15,56,220,240
1177.byte 102,15,56,220,248
1178 movups -16(%rcx,%r10,1),%xmm0
1179
1180 call L$enc_loop6
1181
1182 movdqu (%rdi),%xmm8
1183 movdqu 16(%rdi),%xmm9
1184 movdqu 32(%rdi),%xmm10
1185 movdqu 48(%rdi),%xmm11
1186 movdqu 64(%rdi),%xmm12
1187 movdqu 80(%rdi),%xmm13
1188 leaq 96(%rdi),%rdi
1189 movups -64(%rcx,%r10,1),%xmm1
1190 pxor %xmm2,%xmm8
1191 movaps 0(%rsp),%xmm2
1192 pxor %xmm3,%xmm9
1193 movaps 16(%rsp),%xmm3
1194 pxor %xmm4,%xmm10
1195 movaps 32(%rsp),%xmm4
1196 pxor %xmm5,%xmm11
1197 movaps 48(%rsp),%xmm5
1198 pxor %xmm6,%xmm12
1199 movaps 64(%rsp),%xmm6
1200 pxor %xmm7,%xmm13
1201 movaps 80(%rsp),%xmm7
1202 movdqu %xmm8,(%rsi)
1203 movdqu %xmm9,16(%rsi)
1204 movdqu %xmm10,32(%rsi)
1205 movdqu %xmm11,48(%rsi)
1206 movdqu %xmm12,64(%rsi)
1207 movdqu %xmm13,80(%rsi)
1208 leaq 96(%rsi),%rsi
1209
1210 subq $6,%rdx
1211 jnc L$ctr32_loop6
1212
1213 addq $6,%rdx
1214 jz L$ctr32_done
1215
1216 leal -48(%r10),%eax
1217 leaq -80(%rcx,%r10,1),%rcx
1218 negl %eax
1219 shrl $4,%eax
1220 jmp L$ctr32_tail
1221
1222.p2align 5
1223L$ctr32_loop8:
1224 addl $8,%r8d
1225 movdqa 96(%rsp),%xmm8
1226.byte 102,15,56,220,209
1227 movl %r8d,%r9d
1228 movdqa 112(%rsp),%xmm9
1229.byte 102,15,56,220,217
1230 bswapl %r9d
1231 movups 32-128(%rcx),%xmm0
1232.byte 102,15,56,220,225
1233 xorl %ebp,%r9d
1234 nop
1235.byte 102,15,56,220,233
1236 movl %r9d,0+12(%rsp)
1237 leaq 1(%r8),%r9
1238.byte 102,15,56,220,241
1239.byte 102,15,56,220,249
1240.byte 102,68,15,56,220,193
1241.byte 102,68,15,56,220,201
1242 movups 48-128(%rcx),%xmm1
1243 bswapl %r9d
1244.byte 102,15,56,220,208
1245.byte 102,15,56,220,216
1246 xorl %ebp,%r9d
1247.byte 0x66,0x90
1248.byte 102,15,56,220,224
1249.byte 102,15,56,220,232
1250 movl %r9d,16+12(%rsp)
1251 leaq 2(%r8),%r9
1252.byte 102,15,56,220,240
1253.byte 102,15,56,220,248
1254.byte 102,68,15,56,220,192
1255.byte 102,68,15,56,220,200
1256 movups 64-128(%rcx),%xmm0
1257 bswapl %r9d
1258.byte 102,15,56,220,209
1259.byte 102,15,56,220,217
1260 xorl %ebp,%r9d
1261.byte 0x66,0x90
1262.byte 102,15,56,220,225
1263.byte 102,15,56,220,233
1264 movl %r9d,32+12(%rsp)
1265 leaq 3(%r8),%r9
1266.byte 102,15,56,220,241
1267.byte 102,15,56,220,249
1268.byte 102,68,15,56,220,193
1269.byte 102,68,15,56,220,201
1270 movups 80-128(%rcx),%xmm1
1271 bswapl %r9d
1272.byte 102,15,56,220,208
1273.byte 102,15,56,220,216
1274 xorl %ebp,%r9d
1275.byte 0x66,0x90
1276.byte 102,15,56,220,224
1277.byte 102,15,56,220,232
1278 movl %r9d,48+12(%rsp)
1279 leaq 4(%r8),%r9
1280.byte 102,15,56,220,240
1281.byte 102,15,56,220,248
1282.byte 102,68,15,56,220,192
1283.byte 102,68,15,56,220,200
1284 movups 96-128(%rcx),%xmm0
1285 bswapl %r9d
1286.byte 102,15,56,220,209
1287.byte 102,15,56,220,217
1288 xorl %ebp,%r9d
1289.byte 0x66,0x90
1290.byte 102,15,56,220,225
1291.byte 102,15,56,220,233
1292 movl %r9d,64+12(%rsp)
1293 leaq 5(%r8),%r9
1294.byte 102,15,56,220,241
1295.byte 102,15,56,220,249
1296.byte 102,68,15,56,220,193
1297.byte 102,68,15,56,220,201
1298 movups 112-128(%rcx),%xmm1
1299 bswapl %r9d
1300.byte 102,15,56,220,208
1301.byte 102,15,56,220,216
1302 xorl %ebp,%r9d
1303.byte 0x66,0x90
1304.byte 102,15,56,220,224
1305.byte 102,15,56,220,232
1306 movl %r9d,80+12(%rsp)
1307 leaq 6(%r8),%r9
1308.byte 102,15,56,220,240
1309.byte 102,15,56,220,248
1310.byte 102,68,15,56,220,192
1311.byte 102,68,15,56,220,200
1312 movups 128-128(%rcx),%xmm0
1313 bswapl %r9d
1314.byte 102,15,56,220,209
1315.byte 102,15,56,220,217
1316 xorl %ebp,%r9d
1317.byte 0x66,0x90
1318.byte 102,15,56,220,225
1319.byte 102,15,56,220,233
1320 movl %r9d,96+12(%rsp)
1321 leaq 7(%r8),%r9
1322.byte 102,15,56,220,241
1323.byte 102,15,56,220,249
1324.byte 102,68,15,56,220,193
1325.byte 102,68,15,56,220,201
1326 movups 144-128(%rcx),%xmm1
1327 bswapl %r9d
1328.byte 102,15,56,220,208
1329.byte 102,15,56,220,216
1330.byte 102,15,56,220,224
1331 xorl %ebp,%r9d
1332 movdqu 0(%rdi),%xmm10
1333.byte 102,15,56,220,232
1334 movl %r9d,112+12(%rsp)
1335 cmpl $11,%eax
1336.byte 102,15,56,220,240
1337.byte 102,15,56,220,248
1338.byte 102,68,15,56,220,192
1339.byte 102,68,15,56,220,200
1340 movups 160-128(%rcx),%xmm0
1341
1342 jb L$ctr32_enc_done
1343
1344.byte 102,15,56,220,209
1345.byte 102,15,56,220,217
1346.byte 102,15,56,220,225
1347.byte 102,15,56,220,233
1348.byte 102,15,56,220,241
1349.byte 102,15,56,220,249
1350.byte 102,68,15,56,220,193
1351.byte 102,68,15,56,220,201
1352 movups 176-128(%rcx),%xmm1
1353
1354.byte 102,15,56,220,208
1355.byte 102,15,56,220,216
1356.byte 102,15,56,220,224
1357.byte 102,15,56,220,232
1358.byte 102,15,56,220,240
1359.byte 102,15,56,220,248
1360.byte 102,68,15,56,220,192
1361.byte 102,68,15,56,220,200
1362 movups 192-128(%rcx),%xmm0
1363 je L$ctr32_enc_done
1364
1365.byte 102,15,56,220,209
1366.byte 102,15,56,220,217
1367.byte 102,15,56,220,225
1368.byte 102,15,56,220,233
1369.byte 102,15,56,220,241
1370.byte 102,15,56,220,249
1371.byte 102,68,15,56,220,193
1372.byte 102,68,15,56,220,201
1373 movups 208-128(%rcx),%xmm1
1374
1375.byte 102,15,56,220,208
1376.byte 102,15,56,220,216
1377.byte 102,15,56,220,224
1378.byte 102,15,56,220,232
1379.byte 102,15,56,220,240
1380.byte 102,15,56,220,248
1381.byte 102,68,15,56,220,192
1382.byte 102,68,15,56,220,200
1383 movups 224-128(%rcx),%xmm0
1384 jmp L$ctr32_enc_done
1385
1386.p2align 4
1387L$ctr32_enc_done:
1388 movdqu 16(%rdi),%xmm11
1389 pxor %xmm0,%xmm10
1390 movdqu 32(%rdi),%xmm12
1391 pxor %xmm0,%xmm11
1392 movdqu 48(%rdi),%xmm13
1393 pxor %xmm0,%xmm12
1394 movdqu 64(%rdi),%xmm14
1395 pxor %xmm0,%xmm13
1396 movdqu 80(%rdi),%xmm15
1397 pxor %xmm0,%xmm14
1398 pxor %xmm0,%xmm15
1399.byte 102,15,56,220,209
1400.byte 102,15,56,220,217
1401.byte 102,15,56,220,225
1402.byte 102,15,56,220,233
1403.byte 102,15,56,220,241
1404.byte 102,15,56,220,249
1405.byte 102,68,15,56,220,193
1406.byte 102,68,15,56,220,201
1407 movdqu 96(%rdi),%xmm1
1408 leaq 128(%rdi),%rdi
1409
1410.byte 102,65,15,56,221,210
1411 pxor %xmm0,%xmm1
1412 movdqu 112-128(%rdi),%xmm10
1413.byte 102,65,15,56,221,219
1414 pxor %xmm0,%xmm10
1415 movdqa 0(%rsp),%xmm11
1416.byte 102,65,15,56,221,228
1417.byte 102,65,15,56,221,237
1418 movdqa 16(%rsp),%xmm12
1419 movdqa 32(%rsp),%xmm13
1420.byte 102,65,15,56,221,246
1421.byte 102,65,15,56,221,255
1422 movdqa 48(%rsp),%xmm14
1423 movdqa 64(%rsp),%xmm15
1424.byte 102,68,15,56,221,193
1425 movdqa 80(%rsp),%xmm0
1426 movups 16-128(%rcx),%xmm1
1427.byte 102,69,15,56,221,202
1428
1429 movups %xmm2,(%rsi)
1430 movdqa %xmm11,%xmm2
1431 movups %xmm3,16(%rsi)
1432 movdqa %xmm12,%xmm3
1433 movups %xmm4,32(%rsi)
1434 movdqa %xmm13,%xmm4
1435 movups %xmm5,48(%rsi)
1436 movdqa %xmm14,%xmm5
1437 movups %xmm6,64(%rsi)
1438 movdqa %xmm15,%xmm6
1439 movups %xmm7,80(%rsi)
1440 movdqa %xmm0,%xmm7
1441 movups %xmm8,96(%rsi)
1442 movups %xmm9,112(%rsi)
1443 leaq 128(%rsi),%rsi
1444
1445 subq $8,%rdx
1446 jnc L$ctr32_loop8
1447
1448 addq $8,%rdx
1449 jz L$ctr32_done
1450 leaq -128(%rcx),%rcx
1451
1452L$ctr32_tail:
1453
1454
1455 leaq 16(%rcx),%rcx
1456 cmpq $4,%rdx
1457 jb L$ctr32_loop3
1458 je L$ctr32_loop4
1459
1460
1461 shll $4,%eax
1462 movdqa 96(%rsp),%xmm8
1463 pxor %xmm9,%xmm9
1464
1465 movups 16(%rcx),%xmm0
1466.byte 102,15,56,220,209
1467.byte 102,15,56,220,217
1468 leaq 32-16(%rcx,%rax,1),%rcx
1469 negq %rax
1470.byte 102,15,56,220,225
1471 addq $16,%rax
1472 movups (%rdi),%xmm10
1473.byte 102,15,56,220,233
1474.byte 102,15,56,220,241
1475 movups 16(%rdi),%xmm11
1476 movups 32(%rdi),%xmm12
1477.byte 102,15,56,220,249
1478.byte 102,68,15,56,220,193
1479
1480 call L$enc_loop8_enter
1481
1482 movdqu 48(%rdi),%xmm13
1483 pxor %xmm10,%xmm2
1484 movdqu 64(%rdi),%xmm10
1485 pxor %xmm11,%xmm3
1486 movdqu %xmm2,(%rsi)
1487 pxor %xmm12,%xmm4
1488 movdqu %xmm3,16(%rsi)
1489 pxor %xmm13,%xmm5
1490 movdqu %xmm4,32(%rsi)
1491 pxor %xmm10,%xmm6
1492 movdqu %xmm5,48(%rsi)
1493 movdqu %xmm6,64(%rsi)
1494 cmpq $6,%rdx
1495 jb L$ctr32_done
1496
1497 movups 80(%rdi),%xmm11
1498 xorps %xmm11,%xmm7
1499 movups %xmm7,80(%rsi)
1500 je L$ctr32_done
1501
1502 movups 96(%rdi),%xmm12
1503 xorps %xmm12,%xmm8
1504 movups %xmm8,96(%rsi)
1505 jmp L$ctr32_done
1506
1507.p2align 5
1508L$ctr32_loop4:
1509.byte 102,15,56,220,209
1510 leaq 16(%rcx),%rcx
1511 decl %eax
1512.byte 102,15,56,220,217
1513.byte 102,15,56,220,225
1514.byte 102,15,56,220,233
1515 movups (%rcx),%xmm1
1516 jnz L$ctr32_loop4
1517.byte 102,15,56,221,209
1518.byte 102,15,56,221,217
1519 movups (%rdi),%xmm10
1520 movups 16(%rdi),%xmm11
1521.byte 102,15,56,221,225
1522.byte 102,15,56,221,233
1523 movups 32(%rdi),%xmm12
1524 movups 48(%rdi),%xmm13
1525
1526 xorps %xmm10,%xmm2
1527 movups %xmm2,(%rsi)
1528 xorps %xmm11,%xmm3
1529 movups %xmm3,16(%rsi)
1530 pxor %xmm12,%xmm4
1531 movdqu %xmm4,32(%rsi)
1532 pxor %xmm13,%xmm5
1533 movdqu %xmm5,48(%rsi)
1534 jmp L$ctr32_done
1535
1536.p2align 5
1537L$ctr32_loop3:
1538.byte 102,15,56,220,209
1539 leaq 16(%rcx),%rcx
1540 decl %eax
1541.byte 102,15,56,220,217
1542.byte 102,15,56,220,225
1543 movups (%rcx),%xmm1
1544 jnz L$ctr32_loop3
1545.byte 102,15,56,221,209
1546.byte 102,15,56,221,217
1547.byte 102,15,56,221,225
1548
1549 movups (%rdi),%xmm10
1550 xorps %xmm10,%xmm2
1551 movups %xmm2,(%rsi)
1552 cmpq $2,%rdx
1553 jb L$ctr32_done
1554
1555 movups 16(%rdi),%xmm11
1556 xorps %xmm11,%xmm3
1557 movups %xmm3,16(%rsi)
1558 je L$ctr32_done
1559
1560 movups 32(%rdi),%xmm12
1561 xorps %xmm12,%xmm4
1562 movups %xmm4,32(%rsi)
1563
1564L$ctr32_done:
1565 xorps %xmm0,%xmm0
1566 xorl %ebp,%ebp
1567 pxor %xmm1,%xmm1
1568 pxor %xmm2,%xmm2
1569 pxor %xmm3,%xmm3
1570 pxor %xmm4,%xmm4
1571 pxor %xmm5,%xmm5
1572 pxor %xmm6,%xmm6
1573 pxor %xmm7,%xmm7
1574 movaps %xmm0,0(%rsp)
1575 pxor %xmm8,%xmm8
1576 movaps %xmm0,16(%rsp)
1577 pxor %xmm9,%xmm9
1578 movaps %xmm0,32(%rsp)
1579 pxor %xmm10,%xmm10
1580 movaps %xmm0,48(%rsp)
1581 pxor %xmm11,%xmm11
1582 movaps %xmm0,64(%rsp)
1583 pxor %xmm12,%xmm12
1584 movaps %xmm0,80(%rsp)
1585 pxor %xmm13,%xmm13
1586 movaps %xmm0,96(%rsp)
1587 pxor %xmm14,%xmm14
1588 movaps %xmm0,112(%rsp)
1589 pxor %xmm15,%xmm15
1590 movq -8(%r11),%rbp
1591
1592 leaq (%r11),%rsp
1593
1594L$ctr32_epilogue:
1595 .byte 0xf3,0xc3
1596
1597
1598.globl _aesni_xts_encrypt
1599
1600.p2align 4
1601_aesni_xts_encrypt:
1602
1603 leaq (%rsp),%r11
1604
1605 pushq %rbp
1606
1607 subq $112,%rsp
1608 andq $-16,%rsp
1609 movups (%r9),%xmm2
1610 movl 240(%r8),%eax
1611 movl 240(%rcx),%r10d
1612 movups (%r8),%xmm0
1613 movups 16(%r8),%xmm1
1614 leaq 32(%r8),%r8
1615 xorps %xmm0,%xmm2
1616L$oop_enc1_8:
1617.byte 102,15,56,220,209
1618 decl %eax
1619 movups (%r8),%xmm1
1620 leaq 16(%r8),%r8
1621 jnz L$oop_enc1_8
1622.byte 102,15,56,221,209
1623 movups (%rcx),%xmm0
1624 movq %rcx,%rbp
1625 movl %r10d,%eax
1626 shll $4,%r10d
1627 movq %rdx,%r9
1628 andq $-16,%rdx
1629
1630 movups 16(%rcx,%r10,1),%xmm1
1631
1632 movdqa L$xts_magic(%rip),%xmm8
1633 movdqa %xmm2,%xmm15
1634 pshufd $0x5f,%xmm2,%xmm9
1635 pxor %xmm0,%xmm1
1636 movdqa %xmm9,%xmm14
1637 paddd %xmm9,%xmm9
1638 movdqa %xmm15,%xmm10
1639 psrad $31,%xmm14
1640 paddq %xmm15,%xmm15
1641 pand %xmm8,%xmm14
1642 pxor %xmm0,%xmm10
1643 pxor %xmm14,%xmm15
1644 movdqa %xmm9,%xmm14
1645 paddd %xmm9,%xmm9
1646 movdqa %xmm15,%xmm11
1647 psrad $31,%xmm14
1648 paddq %xmm15,%xmm15
1649 pand %xmm8,%xmm14
1650 pxor %xmm0,%xmm11
1651 pxor %xmm14,%xmm15
1652 movdqa %xmm9,%xmm14
1653 paddd %xmm9,%xmm9
1654 movdqa %xmm15,%xmm12
1655 psrad $31,%xmm14
1656 paddq %xmm15,%xmm15
1657 pand %xmm8,%xmm14
1658 pxor %xmm0,%xmm12
1659 pxor %xmm14,%xmm15
1660 movdqa %xmm9,%xmm14
1661 paddd %xmm9,%xmm9
1662 movdqa %xmm15,%xmm13
1663 psrad $31,%xmm14
1664 paddq %xmm15,%xmm15
1665 pand %xmm8,%xmm14
1666 pxor %xmm0,%xmm13
1667 pxor %xmm14,%xmm15
1668 movdqa %xmm15,%xmm14
1669 psrad $31,%xmm9
1670 paddq %xmm15,%xmm15
1671 pand %xmm8,%xmm9
1672 pxor %xmm0,%xmm14
1673 pxor %xmm9,%xmm15
1674 movaps %xmm1,96(%rsp)
1675
1676 subq $96,%rdx
1677 jc L$xts_enc_short
1678
1679 movl $16+96,%eax
1680 leaq 32(%rbp,%r10,1),%rcx
1681 subq %r10,%rax
1682 movups 16(%rbp),%xmm1
1683 movq %rax,%r10
1684 leaq L$xts_magic(%rip),%r8
1685 jmp L$xts_enc_grandloop
1686
1687.p2align 5
1688L$xts_enc_grandloop:
1689 movdqu 0(%rdi),%xmm2
1690 movdqa %xmm0,%xmm8
1691 movdqu 16(%rdi),%xmm3
1692 pxor %xmm10,%xmm2
1693 movdqu 32(%rdi),%xmm4
1694 pxor %xmm11,%xmm3
1695.byte 102,15,56,220,209
1696 movdqu 48(%rdi),%xmm5
1697 pxor %xmm12,%xmm4
1698.byte 102,15,56,220,217
1699 movdqu 64(%rdi),%xmm6
1700 pxor %xmm13,%xmm5
1701.byte 102,15,56,220,225
1702 movdqu 80(%rdi),%xmm7
1703 pxor %xmm15,%xmm8
1704 movdqa 96(%rsp),%xmm9
1705 pxor %xmm14,%xmm6
1706.byte 102,15,56,220,233
1707 movups 32(%rbp),%xmm0
1708 leaq 96(%rdi),%rdi
1709 pxor %xmm8,%xmm7
1710
1711 pxor %xmm9,%xmm10
1712.byte 102,15,56,220,241
1713 pxor %xmm9,%xmm11
1714 movdqa %xmm10,0(%rsp)
1715.byte 102,15,56,220,249
1716 movups 48(%rbp),%xmm1
1717 pxor %xmm9,%xmm12
1718
1719.byte 102,15,56,220,208
1720 pxor %xmm9,%xmm13
1721 movdqa %xmm11,16(%rsp)
1722.byte 102,15,56,220,216
1723 pxor %xmm9,%xmm14
1724 movdqa %xmm12,32(%rsp)
1725.byte 102,15,56,220,224
1726.byte 102,15,56,220,232
1727 pxor %xmm9,%xmm8
1728 movdqa %xmm14,64(%rsp)
1729.byte 102,15,56,220,240
1730.byte 102,15,56,220,248
1731 movups 64(%rbp),%xmm0
1732 movdqa %xmm8,80(%rsp)
1733 pshufd $0x5f,%xmm15,%xmm9
1734 jmp L$xts_enc_loop6
1735.p2align 5
1736L$xts_enc_loop6:
1737.byte 102,15,56,220,209
1738.byte 102,15,56,220,217
1739.byte 102,15,56,220,225
1740.byte 102,15,56,220,233
1741.byte 102,15,56,220,241
1742.byte 102,15,56,220,249
1743 movups -64(%rcx,%rax,1),%xmm1
1744 addq $32,%rax
1745
1746.byte 102,15,56,220,208
1747.byte 102,15,56,220,216
1748.byte 102,15,56,220,224
1749.byte 102,15,56,220,232
1750.byte 102,15,56,220,240
1751.byte 102,15,56,220,248
1752 movups -80(%rcx,%rax,1),%xmm0
1753 jnz L$xts_enc_loop6
1754
1755 movdqa (%r8),%xmm8
1756 movdqa %xmm9,%xmm14
1757 paddd %xmm9,%xmm9
1758.byte 102,15,56,220,209
1759 paddq %xmm15,%xmm15
1760 psrad $31,%xmm14
1761.byte 102,15,56,220,217
1762 pand %xmm8,%xmm14
1763 movups (%rbp),%xmm10
1764.byte 102,15,56,220,225
1765.byte 102,15,56,220,233
1766.byte 102,15,56,220,241
1767 pxor %xmm14,%xmm15
1768 movaps %xmm10,%xmm11
1769.byte 102,15,56,220,249
1770 movups -64(%rcx),%xmm1
1771
1772 movdqa %xmm9,%xmm14
1773.byte 102,15,56,220,208
1774 paddd %xmm9,%xmm9
1775 pxor %xmm15,%xmm10
1776.byte 102,15,56,220,216
1777 psrad $31,%xmm14
1778 paddq %xmm15,%xmm15
1779.byte 102,15,56,220,224
1780.byte 102,15,56,220,232
1781 pand %xmm8,%xmm14
1782 movaps %xmm11,%xmm12
1783.byte 102,15,56,220,240
1784 pxor %xmm14,%xmm15
1785 movdqa %xmm9,%xmm14
1786.byte 102,15,56,220,248
1787 movups -48(%rcx),%xmm0
1788
1789 paddd %xmm9,%xmm9
1790.byte 102,15,56,220,209
1791 pxor %xmm15,%xmm11
1792 psrad $31,%xmm14
1793.byte 102,15,56,220,217
1794 paddq %xmm15,%xmm15
1795 pand %xmm8,%xmm14
1796.byte 102,15,56,220,225
1797.byte 102,15,56,220,233
1798 movdqa %xmm13,48(%rsp)
1799 pxor %xmm14,%xmm15
1800.byte 102,15,56,220,241
1801 movaps %xmm12,%xmm13
1802 movdqa %xmm9,%xmm14
1803.byte 102,15,56,220,249
1804 movups -32(%rcx),%xmm1
1805
1806 paddd %xmm9,%xmm9
1807.byte 102,15,56,220,208
1808 pxor %xmm15,%xmm12
1809 psrad $31,%xmm14
1810.byte 102,15,56,220,216
1811 paddq %xmm15,%xmm15
1812 pand %xmm8,%xmm14
1813.byte 102,15,56,220,224
1814.byte 102,15,56,220,232
1815.byte 102,15,56,220,240
1816 pxor %xmm14,%xmm15
1817 movaps %xmm13,%xmm14
1818.byte 102,15,56,220,248
1819
1820 movdqa %xmm9,%xmm0
1821 paddd %xmm9,%xmm9
1822.byte 102,15,56,220,209
1823 pxor %xmm15,%xmm13
1824 psrad $31,%xmm0
1825.byte 102,15,56,220,217
1826 paddq %xmm15,%xmm15
1827 pand %xmm8,%xmm0
1828.byte 102,15,56,220,225
1829.byte 102,15,56,220,233
1830 pxor %xmm0,%xmm15
1831 movups (%rbp),%xmm0
1832.byte 102,15,56,220,241
1833.byte 102,15,56,220,249
1834 movups 16(%rbp),%xmm1
1835
1836 pxor %xmm15,%xmm14
1837.byte 102,15,56,221,84,36,0
1838 psrad $31,%xmm9
1839 paddq %xmm15,%xmm15
1840.byte 102,15,56,221,92,36,16
1841.byte 102,15,56,221,100,36,32
1842 pand %xmm8,%xmm9
1843 movq %r10,%rax
1844.byte 102,15,56,221,108,36,48
1845.byte 102,15,56,221,116,36,64
1846.byte 102,15,56,221,124,36,80
1847 pxor %xmm9,%xmm15
1848
1849 leaq 96(%rsi),%rsi
1850 movups %xmm2,-96(%rsi)
1851 movups %xmm3,-80(%rsi)
1852 movups %xmm4,-64(%rsi)
1853 movups %xmm5,-48(%rsi)
1854 movups %xmm6,-32(%rsi)
1855 movups %xmm7,-16(%rsi)
1856 subq $96,%rdx
1857 jnc L$xts_enc_grandloop
1858
1859 movl $16+96,%eax
1860 subl %r10d,%eax
1861 movq %rbp,%rcx
1862 shrl $4,%eax
1863
1864L$xts_enc_short:
1865
1866 movl %eax,%r10d
1867 pxor %xmm0,%xmm10
1868 addq $96,%rdx
1869 jz L$xts_enc_done
1870
1871 pxor %xmm0,%xmm11
1872 cmpq $0x20,%rdx
1873 jb L$xts_enc_one
1874 pxor %xmm0,%xmm12
1875 je L$xts_enc_two
1876
1877 pxor %xmm0,%xmm13
1878 cmpq $0x40,%rdx
1879 jb L$xts_enc_three
1880 pxor %xmm0,%xmm14
1881 je L$xts_enc_four
1882
1883 movdqu (%rdi),%xmm2
1884 movdqu 16(%rdi),%xmm3
1885 movdqu 32(%rdi),%xmm4
1886 pxor %xmm10,%xmm2
1887 movdqu 48(%rdi),%xmm5
1888 pxor %xmm11,%xmm3
1889 movdqu 64(%rdi),%xmm6
1890 leaq 80(%rdi),%rdi
1891 pxor %xmm12,%xmm4
1892 pxor %xmm13,%xmm5
1893 pxor %xmm14,%xmm6
1894 pxor %xmm7,%xmm7
1895
1896 call _aesni_encrypt6
1897
1898 xorps %xmm10,%xmm2
1899 movdqa %xmm15,%xmm10
1900 xorps %xmm11,%xmm3
1901 xorps %xmm12,%xmm4
1902 movdqu %xmm2,(%rsi)
1903 xorps %xmm13,%xmm5
1904 movdqu %xmm3,16(%rsi)
1905 xorps %xmm14,%xmm6
1906 movdqu %xmm4,32(%rsi)
1907 movdqu %xmm5,48(%rsi)
1908 movdqu %xmm6,64(%rsi)
1909 leaq 80(%rsi),%rsi
1910 jmp L$xts_enc_done
1911
1912.p2align 4
1913L$xts_enc_one:
1914 movups (%rdi),%xmm2
1915 leaq 16(%rdi),%rdi
1916 xorps %xmm10,%xmm2
1917 movups (%rcx),%xmm0
1918 movups 16(%rcx),%xmm1
1919 leaq 32(%rcx),%rcx
1920 xorps %xmm0,%xmm2
1921L$oop_enc1_9:
1922.byte 102,15,56,220,209
1923 decl %eax
1924 movups (%rcx),%xmm1
1925 leaq 16(%rcx),%rcx
1926 jnz L$oop_enc1_9
1927.byte 102,15,56,221,209
1928 xorps %xmm10,%xmm2
1929 movdqa %xmm11,%xmm10
1930 movups %xmm2,(%rsi)
1931 leaq 16(%rsi),%rsi
1932 jmp L$xts_enc_done
1933
1934.p2align 4
1935L$xts_enc_two:
1936 movups (%rdi),%xmm2
1937 movups 16(%rdi),%xmm3
1938 leaq 32(%rdi),%rdi
1939 xorps %xmm10,%xmm2
1940 xorps %xmm11,%xmm3
1941
1942 call _aesni_encrypt2
1943
1944 xorps %xmm10,%xmm2
1945 movdqa %xmm12,%xmm10
1946 xorps %xmm11,%xmm3
1947 movups %xmm2,(%rsi)
1948 movups %xmm3,16(%rsi)
1949 leaq 32(%rsi),%rsi
1950 jmp L$xts_enc_done
1951
1952.p2align 4
1953L$xts_enc_three:
1954 movups (%rdi),%xmm2
1955 movups 16(%rdi),%xmm3
1956 movups 32(%rdi),%xmm4
1957 leaq 48(%rdi),%rdi
1958 xorps %xmm10,%xmm2
1959 xorps %xmm11,%xmm3
1960 xorps %xmm12,%xmm4
1961
1962 call _aesni_encrypt3
1963
1964 xorps %xmm10,%xmm2
1965 movdqa %xmm13,%xmm10
1966 xorps %xmm11,%xmm3
1967 xorps %xmm12,%xmm4
1968 movups %xmm2,(%rsi)
1969 movups %xmm3,16(%rsi)
1970 movups %xmm4,32(%rsi)
1971 leaq 48(%rsi),%rsi
1972 jmp L$xts_enc_done
1973
1974.p2align 4
1975L$xts_enc_four:
1976 movups (%rdi),%xmm2
1977 movups 16(%rdi),%xmm3
1978 movups 32(%rdi),%xmm4
1979 xorps %xmm10,%xmm2
1980 movups 48(%rdi),%xmm5
1981 leaq 64(%rdi),%rdi
1982 xorps %xmm11,%xmm3
1983 xorps %xmm12,%xmm4
1984 xorps %xmm13,%xmm5
1985
1986 call _aesni_encrypt4
1987
1988 pxor %xmm10,%xmm2
1989 movdqa %xmm14,%xmm10
1990 pxor %xmm11,%xmm3
1991 pxor %xmm12,%xmm4
1992 movdqu %xmm2,(%rsi)
1993 pxor %xmm13,%xmm5
1994 movdqu %xmm3,16(%rsi)
1995 movdqu %xmm4,32(%rsi)
1996 movdqu %xmm5,48(%rsi)
1997 leaq 64(%rsi),%rsi
1998 jmp L$xts_enc_done
1999
2000.p2align 4
2001L$xts_enc_done:
2002 andq $15,%r9
2003 jz L$xts_enc_ret
2004 movq %r9,%rdx
2005
2006L$xts_enc_steal:
2007 movzbl (%rdi),%eax
2008 movzbl -16(%rsi),%ecx
2009 leaq 1(%rdi),%rdi
2010 movb %al,-16(%rsi)
2011 movb %cl,0(%rsi)
2012 leaq 1(%rsi),%rsi
2013 subq $1,%rdx
2014 jnz L$xts_enc_steal
2015
2016 subq %r9,%rsi
2017 movq %rbp,%rcx
2018 movl %r10d,%eax
2019
2020 movups -16(%rsi),%xmm2
2021 xorps %xmm10,%xmm2
2022 movups (%rcx),%xmm0
2023 movups 16(%rcx),%xmm1
2024 leaq 32(%rcx),%rcx
2025 xorps %xmm0,%xmm2
2026L$oop_enc1_10:
2027.byte 102,15,56,220,209
2028 decl %eax
2029 movups (%rcx),%xmm1
2030 leaq 16(%rcx),%rcx
2031 jnz L$oop_enc1_10
2032.byte 102,15,56,221,209
2033 xorps %xmm10,%xmm2
2034 movups %xmm2,-16(%rsi)
2035
2036L$xts_enc_ret:
2037 xorps %xmm0,%xmm0
2038 pxor %xmm1,%xmm1
2039 pxor %xmm2,%xmm2
2040 pxor %xmm3,%xmm3
2041 pxor %xmm4,%xmm4
2042 pxor %xmm5,%xmm5
2043 pxor %xmm6,%xmm6
2044 pxor %xmm7,%xmm7
2045 movaps %xmm0,0(%rsp)
2046 pxor %xmm8,%xmm8
2047 movaps %xmm0,16(%rsp)
2048 pxor %xmm9,%xmm9
2049 movaps %xmm0,32(%rsp)
2050 pxor %xmm10,%xmm10
2051 movaps %xmm0,48(%rsp)
2052 pxor %xmm11,%xmm11
2053 movaps %xmm0,64(%rsp)
2054 pxor %xmm12,%xmm12
2055 movaps %xmm0,80(%rsp)
2056 pxor %xmm13,%xmm13
2057 movaps %xmm0,96(%rsp)
2058 pxor %xmm14,%xmm14
2059 pxor %xmm15,%xmm15
2060 movq -8(%r11),%rbp
2061
2062 leaq (%r11),%rsp
2063
2064L$xts_enc_epilogue:
2065 .byte 0xf3,0xc3
2066
2067
2068.globl _aesni_xts_decrypt
2069
2070.p2align 4
2071_aesni_xts_decrypt:
2072
2073 leaq (%rsp),%r11
2074
2075 pushq %rbp
2076
2077 subq $112,%rsp
2078 andq $-16,%rsp
2079 movups (%r9),%xmm2
2080 movl 240(%r8),%eax
2081 movl 240(%rcx),%r10d
2082 movups (%r8),%xmm0
2083 movups 16(%r8),%xmm1
2084 leaq 32(%r8),%r8
2085 xorps %xmm0,%xmm2
2086L$oop_enc1_11:
2087.byte 102,15,56,220,209
2088 decl %eax
2089 movups (%r8),%xmm1
2090 leaq 16(%r8),%r8
2091 jnz L$oop_enc1_11
2092.byte 102,15,56,221,209
2093 xorl %eax,%eax
2094 testq $15,%rdx
2095 setnz %al
2096 shlq $4,%rax
2097 subq %rax,%rdx
2098
2099 movups (%rcx),%xmm0
2100 movq %rcx,%rbp
2101 movl %r10d,%eax
2102 shll $4,%r10d
2103 movq %rdx,%r9
2104 andq $-16,%rdx
2105
2106 movups 16(%rcx,%r10,1),%xmm1
2107
2108 movdqa L$xts_magic(%rip),%xmm8
2109 movdqa %xmm2,%xmm15
2110 pshufd $0x5f,%xmm2,%xmm9
2111 pxor %xmm0,%xmm1
2112 movdqa %xmm9,%xmm14
2113 paddd %xmm9,%xmm9
2114 movdqa %xmm15,%xmm10
2115 psrad $31,%xmm14
2116 paddq %xmm15,%xmm15
2117 pand %xmm8,%xmm14
2118 pxor %xmm0,%xmm10
2119 pxor %xmm14,%xmm15
2120 movdqa %xmm9,%xmm14
2121 paddd %xmm9,%xmm9
2122 movdqa %xmm15,%xmm11
2123 psrad $31,%xmm14
2124 paddq %xmm15,%xmm15
2125 pand %xmm8,%xmm14
2126 pxor %xmm0,%xmm11
2127 pxor %xmm14,%xmm15
2128 movdqa %xmm9,%xmm14
2129 paddd %xmm9,%xmm9
2130 movdqa %xmm15,%xmm12
2131 psrad $31,%xmm14
2132 paddq %xmm15,%xmm15
2133 pand %xmm8,%xmm14
2134 pxor %xmm0,%xmm12
2135 pxor %xmm14,%xmm15
2136 movdqa %xmm9,%xmm14
2137 paddd %xmm9,%xmm9
2138 movdqa %xmm15,%xmm13
2139 psrad $31,%xmm14
2140 paddq %xmm15,%xmm15
2141 pand %xmm8,%xmm14
2142 pxor %xmm0,%xmm13
2143 pxor %xmm14,%xmm15
2144 movdqa %xmm15,%xmm14
2145 psrad $31,%xmm9
2146 paddq %xmm15,%xmm15
2147 pand %xmm8,%xmm9
2148 pxor %xmm0,%xmm14
2149 pxor %xmm9,%xmm15
2150 movaps %xmm1,96(%rsp)
2151
2152 subq $96,%rdx
2153 jc L$xts_dec_short
2154
2155 movl $16+96,%eax
2156 leaq 32(%rbp,%r10,1),%rcx
2157 subq %r10,%rax
2158 movups 16(%rbp),%xmm1
2159 movq %rax,%r10
2160 leaq L$xts_magic(%rip),%r8
2161 jmp L$xts_dec_grandloop
2162
2163.p2align 5
2164L$xts_dec_grandloop:
2165 movdqu 0(%rdi),%xmm2
2166 movdqa %xmm0,%xmm8
2167 movdqu 16(%rdi),%xmm3
2168 pxor %xmm10,%xmm2
2169 movdqu 32(%rdi),%xmm4
2170 pxor %xmm11,%xmm3
2171.byte 102,15,56,222,209
2172 movdqu 48(%rdi),%xmm5
2173 pxor %xmm12,%xmm4
2174.byte 102,15,56,222,217
2175 movdqu 64(%rdi),%xmm6
2176 pxor %xmm13,%xmm5
2177.byte 102,15,56,222,225
2178 movdqu 80(%rdi),%xmm7
2179 pxor %xmm15,%xmm8
2180 movdqa 96(%rsp),%xmm9
2181 pxor %xmm14,%xmm6
2182.byte 102,15,56,222,233
2183 movups 32(%rbp),%xmm0
2184 leaq 96(%rdi),%rdi
2185 pxor %xmm8,%xmm7
2186
2187 pxor %xmm9,%xmm10
2188.byte 102,15,56,222,241
2189 pxor %xmm9,%xmm11
2190 movdqa %xmm10,0(%rsp)
2191.byte 102,15,56,222,249
2192 movups 48(%rbp),%xmm1
2193 pxor %xmm9,%xmm12
2194
2195.byte 102,15,56,222,208
2196 pxor %xmm9,%xmm13
2197 movdqa %xmm11,16(%rsp)
2198.byte 102,15,56,222,216
2199 pxor %xmm9,%xmm14
2200 movdqa %xmm12,32(%rsp)
2201.byte 102,15,56,222,224
2202.byte 102,15,56,222,232
2203 pxor %xmm9,%xmm8
2204 movdqa %xmm14,64(%rsp)
2205.byte 102,15,56,222,240
2206.byte 102,15,56,222,248
2207 movups 64(%rbp),%xmm0
2208 movdqa %xmm8,80(%rsp)
2209 pshufd $0x5f,%xmm15,%xmm9
2210 jmp L$xts_dec_loop6
2211.p2align 5
2212L$xts_dec_loop6:
2213.byte 102,15,56,222,209
2214.byte 102,15,56,222,217
2215.byte 102,15,56,222,225
2216.byte 102,15,56,222,233
2217.byte 102,15,56,222,241
2218.byte 102,15,56,222,249
2219 movups -64(%rcx,%rax,1),%xmm1
2220 addq $32,%rax
2221
2222.byte 102,15,56,222,208
2223.byte 102,15,56,222,216
2224.byte 102,15,56,222,224
2225.byte 102,15,56,222,232
2226.byte 102,15,56,222,240
2227.byte 102,15,56,222,248
2228 movups -80(%rcx,%rax,1),%xmm0
2229 jnz L$xts_dec_loop6
2230
2231 movdqa (%r8),%xmm8
2232 movdqa %xmm9,%xmm14
2233 paddd %xmm9,%xmm9
2234.byte 102,15,56,222,209
2235 paddq %xmm15,%xmm15
2236 psrad $31,%xmm14
2237.byte 102,15,56,222,217
2238 pand %xmm8,%xmm14
2239 movups (%rbp),%xmm10
2240.byte 102,15,56,222,225
2241.byte 102,15,56,222,233
2242.byte 102,15,56,222,241
2243 pxor %xmm14,%xmm15
2244 movaps %xmm10,%xmm11
2245.byte 102,15,56,222,249
2246 movups -64(%rcx),%xmm1
2247
2248 movdqa %xmm9,%xmm14
2249.byte 102,15,56,222,208
2250 paddd %xmm9,%xmm9
2251 pxor %xmm15,%xmm10
2252.byte 102,15,56,222,216
2253 psrad $31,%xmm14
2254 paddq %xmm15,%xmm15
2255.byte 102,15,56,222,224
2256.byte 102,15,56,222,232
2257 pand %xmm8,%xmm14
2258 movaps %xmm11,%xmm12
2259.byte 102,15,56,222,240
2260 pxor %xmm14,%xmm15
2261 movdqa %xmm9,%xmm14
2262.byte 102,15,56,222,248
2263 movups -48(%rcx),%xmm0
2264
2265 paddd %xmm9,%xmm9
2266.byte 102,15,56,222,209
2267 pxor %xmm15,%xmm11
2268 psrad $31,%xmm14
2269.byte 102,15,56,222,217
2270 paddq %xmm15,%xmm15
2271 pand %xmm8,%xmm14
2272.byte 102,15,56,222,225
2273.byte 102,15,56,222,233
2274 movdqa %xmm13,48(%rsp)
2275 pxor %xmm14,%xmm15
2276.byte 102,15,56,222,241
2277 movaps %xmm12,%xmm13
2278 movdqa %xmm9,%xmm14
2279.byte 102,15,56,222,249
2280 movups -32(%rcx),%xmm1
2281
2282 paddd %xmm9,%xmm9
2283.byte 102,15,56,222,208
2284 pxor %xmm15,%xmm12
2285 psrad $31,%xmm14
2286.byte 102,15,56,222,216
2287 paddq %xmm15,%xmm15
2288 pand %xmm8,%xmm14
2289.byte 102,15,56,222,224
2290.byte 102,15,56,222,232
2291.byte 102,15,56,222,240
2292 pxor %xmm14,%xmm15
2293 movaps %xmm13,%xmm14
2294.byte 102,15,56,222,248
2295
2296 movdqa %xmm9,%xmm0
2297 paddd %xmm9,%xmm9
2298.byte 102,15,56,222,209
2299 pxor %xmm15,%xmm13
2300 psrad $31,%xmm0
2301.byte 102,15,56,222,217
2302 paddq %xmm15,%xmm15
2303 pand %xmm8,%xmm0
2304.byte 102,15,56,222,225
2305.byte 102,15,56,222,233
2306 pxor %xmm0,%xmm15
2307 movups (%rbp),%xmm0
2308.byte 102,15,56,222,241
2309.byte 102,15,56,222,249
2310 movups 16(%rbp),%xmm1
2311
2312 pxor %xmm15,%xmm14
2313.byte 102,15,56,223,84,36,0
2314 psrad $31,%xmm9
2315 paddq %xmm15,%xmm15
2316.byte 102,15,56,223,92,36,16
2317.byte 102,15,56,223,100,36,32
2318 pand %xmm8,%xmm9
2319 movq %r10,%rax
2320.byte 102,15,56,223,108,36,48
2321.byte 102,15,56,223,116,36,64
2322.byte 102,15,56,223,124,36,80
2323 pxor %xmm9,%xmm15
2324
2325 leaq 96(%rsi),%rsi
2326 movups %xmm2,-96(%rsi)
2327 movups %xmm3,-80(%rsi)
2328 movups %xmm4,-64(%rsi)
2329 movups %xmm5,-48(%rsi)
2330 movups %xmm6,-32(%rsi)
2331 movups %xmm7,-16(%rsi)
2332 subq $96,%rdx
2333 jnc L$xts_dec_grandloop
2334
2335 movl $16+96,%eax
2336 subl %r10d,%eax
2337 movq %rbp,%rcx
2338 shrl $4,%eax
2339
2340L$xts_dec_short:
2341
2342 movl %eax,%r10d
2343 pxor %xmm0,%xmm10
2344 pxor %xmm0,%xmm11
2345 addq $96,%rdx
2346 jz L$xts_dec_done
2347
2348 pxor %xmm0,%xmm12
2349 cmpq $0x20,%rdx
2350 jb L$xts_dec_one
2351 pxor %xmm0,%xmm13
2352 je L$xts_dec_two
2353
2354 pxor %xmm0,%xmm14
2355 cmpq $0x40,%rdx
2356 jb L$xts_dec_three
2357 je L$xts_dec_four
2358
2359 movdqu (%rdi),%xmm2
2360 movdqu 16(%rdi),%xmm3
2361 movdqu 32(%rdi),%xmm4
2362 pxor %xmm10,%xmm2
2363 movdqu 48(%rdi),%xmm5
2364 pxor %xmm11,%xmm3
2365 movdqu 64(%rdi),%xmm6
2366 leaq 80(%rdi),%rdi
2367 pxor %xmm12,%xmm4
2368 pxor %xmm13,%xmm5
2369 pxor %xmm14,%xmm6
2370
2371 call _aesni_decrypt6
2372
2373 xorps %xmm10,%xmm2
2374 xorps %xmm11,%xmm3
2375 xorps %xmm12,%xmm4
2376 movdqu %xmm2,(%rsi)
2377 xorps %xmm13,%xmm5
2378 movdqu %xmm3,16(%rsi)
2379 xorps %xmm14,%xmm6
2380 movdqu %xmm4,32(%rsi)
2381 pxor %xmm14,%xmm14
2382 movdqu %xmm5,48(%rsi)
2383 pcmpgtd %xmm15,%xmm14
2384 movdqu %xmm6,64(%rsi)
2385 leaq 80(%rsi),%rsi
2386 pshufd $0x13,%xmm14,%xmm11
2387 andq $15,%r9
2388 jz L$xts_dec_ret
2389
2390 movdqa %xmm15,%xmm10
2391 paddq %xmm15,%xmm15
2392 pand %xmm8,%xmm11
2393 pxor %xmm15,%xmm11
2394 jmp L$xts_dec_done2
2395
2396.p2align 4
2397L$xts_dec_one:
2398 movups (%rdi),%xmm2
2399 leaq 16(%rdi),%rdi
2400 xorps %xmm10,%xmm2
2401 movups (%rcx),%xmm0
2402 movups 16(%rcx),%xmm1
2403 leaq 32(%rcx),%rcx
2404 xorps %xmm0,%xmm2
2405L$oop_dec1_12:
2406.byte 102,15,56,222,209
2407 decl %eax
2408 movups (%rcx),%xmm1
2409 leaq 16(%rcx),%rcx
2410 jnz L$oop_dec1_12
2411.byte 102,15,56,223,209
2412 xorps %xmm10,%xmm2
2413 movdqa %xmm11,%xmm10
2414 movups %xmm2,(%rsi)
2415 movdqa %xmm12,%xmm11
2416 leaq 16(%rsi),%rsi
2417 jmp L$xts_dec_done
2418
2419.p2align 4
2420L$xts_dec_two:
2421 movups (%rdi),%xmm2
2422 movups 16(%rdi),%xmm3
2423 leaq 32(%rdi),%rdi
2424 xorps %xmm10,%xmm2
2425 xorps %xmm11,%xmm3
2426
2427 call _aesni_decrypt2
2428
2429 xorps %xmm10,%xmm2
2430 movdqa %xmm12,%xmm10
2431 xorps %xmm11,%xmm3
2432 movdqa %xmm13,%xmm11
2433 movups %xmm2,(%rsi)
2434 movups %xmm3,16(%rsi)
2435 leaq 32(%rsi),%rsi
2436 jmp L$xts_dec_done
2437
2438.p2align 4
2439L$xts_dec_three:
2440 movups (%rdi),%xmm2
2441 movups 16(%rdi),%xmm3
2442 movups 32(%rdi),%xmm4
2443 leaq 48(%rdi),%rdi
2444 xorps %xmm10,%xmm2
2445 xorps %xmm11,%xmm3
2446 xorps %xmm12,%xmm4
2447
2448 call _aesni_decrypt3
2449
2450 xorps %xmm10,%xmm2
2451 movdqa %xmm13,%xmm10
2452 xorps %xmm11,%xmm3
2453 movdqa %xmm14,%xmm11
2454 xorps %xmm12,%xmm4
2455 movups %xmm2,(%rsi)
2456 movups %xmm3,16(%rsi)
2457 movups %xmm4,32(%rsi)
2458 leaq 48(%rsi),%rsi
2459 jmp L$xts_dec_done
2460
2461.p2align 4
2462L$xts_dec_four:
2463 movups (%rdi),%xmm2
2464 movups 16(%rdi),%xmm3
2465 movups 32(%rdi),%xmm4
2466 xorps %xmm10,%xmm2
2467 movups 48(%rdi),%xmm5
2468 leaq 64(%rdi),%rdi
2469 xorps %xmm11,%xmm3
2470 xorps %xmm12,%xmm4
2471 xorps %xmm13,%xmm5
2472
2473 call _aesni_decrypt4
2474
2475 pxor %xmm10,%xmm2
2476 movdqa %xmm14,%xmm10
2477 pxor %xmm11,%xmm3
2478 movdqa %xmm15,%xmm11
2479 pxor %xmm12,%xmm4
2480 movdqu %xmm2,(%rsi)
2481 pxor %xmm13,%xmm5
2482 movdqu %xmm3,16(%rsi)
2483 movdqu %xmm4,32(%rsi)
2484 movdqu %xmm5,48(%rsi)
2485 leaq 64(%rsi),%rsi
2486 jmp L$xts_dec_done
2487
2488.p2align 4
2489L$xts_dec_done:
2490 andq $15,%r9
2491 jz L$xts_dec_ret
2492L$xts_dec_done2:
2493 movq %r9,%rdx
2494 movq %rbp,%rcx
2495 movl %r10d,%eax
2496
2497 movups (%rdi),%xmm2
2498 xorps %xmm11,%xmm2
2499 movups (%rcx),%xmm0
2500 movups 16(%rcx),%xmm1
2501 leaq 32(%rcx),%rcx
2502 xorps %xmm0,%xmm2
2503L$oop_dec1_13:
2504.byte 102,15,56,222,209
2505 decl %eax
2506 movups (%rcx),%xmm1
2507 leaq 16(%rcx),%rcx
2508 jnz L$oop_dec1_13
2509.byte 102,15,56,223,209
2510 xorps %xmm11,%xmm2
2511 movups %xmm2,(%rsi)
2512
2513L$xts_dec_steal:
2514 movzbl 16(%rdi),%eax
2515 movzbl (%rsi),%ecx
2516 leaq 1(%rdi),%rdi
2517 movb %al,(%rsi)
2518 movb %cl,16(%rsi)
2519 leaq 1(%rsi),%rsi
2520 subq $1,%rdx
2521 jnz L$xts_dec_steal
2522
2523 subq %r9,%rsi
2524 movq %rbp,%rcx
2525 movl %r10d,%eax
2526
2527 movups (%rsi),%xmm2
2528 xorps %xmm10,%xmm2
2529 movups (%rcx),%xmm0
2530 movups 16(%rcx),%xmm1
2531 leaq 32(%rcx),%rcx
2532 xorps %xmm0,%xmm2
2533L$oop_dec1_14:
2534.byte 102,15,56,222,209
2535 decl %eax
2536 movups (%rcx),%xmm1
2537 leaq 16(%rcx),%rcx
2538 jnz L$oop_dec1_14
2539.byte 102,15,56,223,209
2540 xorps %xmm10,%xmm2
2541 movups %xmm2,(%rsi)
2542
2543L$xts_dec_ret:
2544 xorps %xmm0,%xmm0
2545 pxor %xmm1,%xmm1
2546 pxor %xmm2,%xmm2
2547 pxor %xmm3,%xmm3
2548 pxor %xmm4,%xmm4
2549 pxor %xmm5,%xmm5
2550 pxor %xmm6,%xmm6
2551 pxor %xmm7,%xmm7
2552 movaps %xmm0,0(%rsp)
2553 pxor %xmm8,%xmm8
2554 movaps %xmm0,16(%rsp)
2555 pxor %xmm9,%xmm9
2556 movaps %xmm0,32(%rsp)
2557 pxor %xmm10,%xmm10
2558 movaps %xmm0,48(%rsp)
2559 pxor %xmm11,%xmm11
2560 movaps %xmm0,64(%rsp)
2561 pxor %xmm12,%xmm12
2562 movaps %xmm0,80(%rsp)
2563 pxor %xmm13,%xmm13
2564 movaps %xmm0,96(%rsp)
2565 pxor %xmm14,%xmm14
2566 pxor %xmm15,%xmm15
2567 movq -8(%r11),%rbp
2568
2569 leaq (%r11),%rsp
2570
2571L$xts_dec_epilogue:
2572 .byte 0xf3,0xc3
2573
2574
2575.globl _aesni_ocb_encrypt
2576
2577.p2align 5
2578_aesni_ocb_encrypt:
2579
2580 leaq (%rsp),%rax
2581 pushq %rbx
2582
2583 pushq %rbp
2584
2585 pushq %r12
2586
2587 pushq %r13
2588
2589 pushq %r14
2590
2591 movq 8(%rax),%rbx
2592 movq 8+8(%rax),%rbp
2593
2594 movl 240(%rcx),%r10d
2595 movq %rcx,%r11
2596 shll $4,%r10d
2597 movups (%rcx),%xmm9
2598 movups 16(%rcx,%r10,1),%xmm1
2599
2600 movdqu (%r9),%xmm15
2601 pxor %xmm1,%xmm9
2602 pxor %xmm1,%xmm15
2603
2604 movl $16+32,%eax
2605 leaq 32(%r11,%r10,1),%rcx
2606 movups 16(%r11),%xmm1
2607 subq %r10,%rax
2608 movq %rax,%r10
2609
2610 movdqu (%rbx),%xmm10
2611 movdqu (%rbp),%xmm8
2612
2613 testq $1,%r8
2614 jnz L$ocb_enc_odd
2615
2616 bsfq %r8,%r12
2617 addq $1,%r8
2618 shlq $4,%r12
2619 movdqu (%rbx,%r12,1),%xmm7
2620 movdqu (%rdi),%xmm2
2621 leaq 16(%rdi),%rdi
2622
2623 call __ocb_encrypt1
2624
2625 movdqa %xmm7,%xmm15
2626 movups %xmm2,(%rsi)
2627 leaq 16(%rsi),%rsi
2628 subq $1,%rdx
2629 jz L$ocb_enc_done
2630
2631L$ocb_enc_odd:
2632 leaq 1(%r8),%r12
2633 leaq 3(%r8),%r13
2634 leaq 5(%r8),%r14
2635 leaq 6(%r8),%r8
2636 bsfq %r12,%r12
2637 bsfq %r13,%r13
2638 bsfq %r14,%r14
2639 shlq $4,%r12
2640 shlq $4,%r13
2641 shlq $4,%r14
2642
2643 subq $6,%rdx
2644 jc L$ocb_enc_short
2645 jmp L$ocb_enc_grandloop
2646
2647.p2align 5
2648L$ocb_enc_grandloop:
2649 movdqu 0(%rdi),%xmm2
2650 movdqu 16(%rdi),%xmm3
2651 movdqu 32(%rdi),%xmm4
2652 movdqu 48(%rdi),%xmm5
2653 movdqu 64(%rdi),%xmm6
2654 movdqu 80(%rdi),%xmm7
2655 leaq 96(%rdi),%rdi
2656
2657 call __ocb_encrypt6
2658
2659 movups %xmm2,0(%rsi)
2660 movups %xmm3,16(%rsi)
2661 movups %xmm4,32(%rsi)
2662 movups %xmm5,48(%rsi)
2663 movups %xmm6,64(%rsi)
2664 movups %xmm7,80(%rsi)
2665 leaq 96(%rsi),%rsi
2666 subq $6,%rdx
2667 jnc L$ocb_enc_grandloop
2668
2669L$ocb_enc_short:
2670 addq $6,%rdx
2671 jz L$ocb_enc_done
2672
2673 movdqu 0(%rdi),%xmm2
2674 cmpq $2,%rdx
2675 jb L$ocb_enc_one
2676 movdqu 16(%rdi),%xmm3
2677 je L$ocb_enc_two
2678
2679 movdqu 32(%rdi),%xmm4
2680 cmpq $4,%rdx
2681 jb L$ocb_enc_three
2682 movdqu 48(%rdi),%xmm5
2683 je L$ocb_enc_four
2684
2685 movdqu 64(%rdi),%xmm6
2686 pxor %xmm7,%xmm7
2687
2688 call __ocb_encrypt6
2689
2690 movdqa %xmm14,%xmm15
2691 movups %xmm2,0(%rsi)
2692 movups %xmm3,16(%rsi)
2693 movups %xmm4,32(%rsi)
2694 movups %xmm5,48(%rsi)
2695 movups %xmm6,64(%rsi)
2696
2697 jmp L$ocb_enc_done
2698
2699.p2align 4
2700L$ocb_enc_one:
2701 movdqa %xmm10,%xmm7
2702
2703 call __ocb_encrypt1
2704
2705 movdqa %xmm7,%xmm15
2706 movups %xmm2,0(%rsi)
2707 jmp L$ocb_enc_done
2708
2709.p2align 4
2710L$ocb_enc_two:
2711 pxor %xmm4,%xmm4
2712 pxor %xmm5,%xmm5
2713
2714 call __ocb_encrypt4
2715
2716 movdqa %xmm11,%xmm15
2717 movups %xmm2,0(%rsi)
2718 movups %xmm3,16(%rsi)
2719
2720 jmp L$ocb_enc_done
2721
2722.p2align 4
2723L$ocb_enc_three:
2724 pxor %xmm5,%xmm5
2725
2726 call __ocb_encrypt4
2727
2728 movdqa %xmm12,%xmm15
2729 movups %xmm2,0(%rsi)
2730 movups %xmm3,16(%rsi)
2731 movups %xmm4,32(%rsi)
2732
2733 jmp L$ocb_enc_done
2734
2735.p2align 4
2736L$ocb_enc_four:
2737 call __ocb_encrypt4
2738
2739 movdqa %xmm13,%xmm15
2740 movups %xmm2,0(%rsi)
2741 movups %xmm3,16(%rsi)
2742 movups %xmm4,32(%rsi)
2743 movups %xmm5,48(%rsi)
2744
2745L$ocb_enc_done:
2746 pxor %xmm0,%xmm15
2747 movdqu %xmm8,(%rbp)
2748 movdqu %xmm15,(%r9)
2749
2750 xorps %xmm0,%xmm0
2751 pxor %xmm1,%xmm1
2752 pxor %xmm2,%xmm2
2753 pxor %xmm3,%xmm3
2754 pxor %xmm4,%xmm4
2755 pxor %xmm5,%xmm5
2756 pxor %xmm6,%xmm6
2757 pxor %xmm7,%xmm7
2758 pxor %xmm8,%xmm8
2759 pxor %xmm9,%xmm9
2760 pxor %xmm10,%xmm10
2761 pxor %xmm11,%xmm11
2762 pxor %xmm12,%xmm12
2763 pxor %xmm13,%xmm13
2764 pxor %xmm14,%xmm14
2765 pxor %xmm15,%xmm15
2766 leaq 40(%rsp),%rax
2767
2768 movq -40(%rax),%r14
2769
2770 movq -32(%rax),%r13
2771
2772 movq -24(%rax),%r12
2773
2774 movq -16(%rax),%rbp
2775
2776 movq -8(%rax),%rbx
2777
2778 leaq (%rax),%rsp
2779
2780L$ocb_enc_epilogue:
2781 .byte 0xf3,0xc3
2782
2783
2784
2785
2786.p2align 5
2787__ocb_encrypt6:
2788 pxor %xmm9,%xmm15
2789 movdqu (%rbx,%r12,1),%xmm11
2790 movdqa %xmm10,%xmm12
2791 movdqu (%rbx,%r13,1),%xmm13
2792 movdqa %xmm10,%xmm14
2793 pxor %xmm15,%xmm10
2794 movdqu (%rbx,%r14,1),%xmm15
2795 pxor %xmm10,%xmm11
2796 pxor %xmm2,%xmm8
2797 pxor %xmm10,%xmm2
2798 pxor %xmm11,%xmm12
2799 pxor %xmm3,%xmm8
2800 pxor %xmm11,%xmm3
2801 pxor %xmm12,%xmm13
2802 pxor %xmm4,%xmm8
2803 pxor %xmm12,%xmm4
2804 pxor %xmm13,%xmm14
2805 pxor %xmm5,%xmm8
2806 pxor %xmm13,%xmm5
2807 pxor %xmm14,%xmm15
2808 pxor %xmm6,%xmm8
2809 pxor %xmm14,%xmm6
2810 pxor %xmm7,%xmm8
2811 pxor %xmm15,%xmm7
2812 movups 32(%r11),%xmm0
2813
2814 leaq 1(%r8),%r12
2815 leaq 3(%r8),%r13
2816 leaq 5(%r8),%r14
2817 addq $6,%r8
2818 pxor %xmm9,%xmm10
2819 bsfq %r12,%r12
2820 bsfq %r13,%r13
2821 bsfq %r14,%r14
2822
2823.byte 102,15,56,220,209
2824.byte 102,15,56,220,217
2825.byte 102,15,56,220,225
2826.byte 102,15,56,220,233
2827 pxor %xmm9,%xmm11
2828 pxor %xmm9,%xmm12
2829.byte 102,15,56,220,241
2830 pxor %xmm9,%xmm13
2831 pxor %xmm9,%xmm14
2832.byte 102,15,56,220,249
2833 movups 48(%r11),%xmm1
2834 pxor %xmm9,%xmm15
2835
2836.byte 102,15,56,220,208
2837.byte 102,15,56,220,216
2838.byte 102,15,56,220,224
2839.byte 102,15,56,220,232
2840.byte 102,15,56,220,240
2841.byte 102,15,56,220,248
2842 movups 64(%r11),%xmm0
2843 shlq $4,%r12
2844 shlq $4,%r13
2845 jmp L$ocb_enc_loop6
2846
2847.p2align 5
2848L$ocb_enc_loop6:
2849.byte 102,15,56,220,209
2850.byte 102,15,56,220,217
2851.byte 102,15,56,220,225
2852.byte 102,15,56,220,233
2853.byte 102,15,56,220,241
2854.byte 102,15,56,220,249
2855 movups (%rcx,%rax,1),%xmm1
2856 addq $32,%rax
2857
2858.byte 102,15,56,220,208
2859.byte 102,15,56,220,216
2860.byte 102,15,56,220,224
2861.byte 102,15,56,220,232
2862.byte 102,15,56,220,240
2863.byte 102,15,56,220,248
2864 movups -16(%rcx,%rax,1),%xmm0
2865 jnz L$ocb_enc_loop6
2866
2867.byte 102,15,56,220,209
2868.byte 102,15,56,220,217
2869.byte 102,15,56,220,225
2870.byte 102,15,56,220,233
2871.byte 102,15,56,220,241
2872.byte 102,15,56,220,249
2873 movups 16(%r11),%xmm1
2874 shlq $4,%r14
2875
2876.byte 102,65,15,56,221,210
2877 movdqu (%rbx),%xmm10
2878 movq %r10,%rax
2879.byte 102,65,15,56,221,219
2880.byte 102,65,15,56,221,228
2881.byte 102,65,15,56,221,237
2882.byte 102,65,15,56,221,246
2883.byte 102,65,15,56,221,255
2884 .byte 0xf3,0xc3
2885
2886
2887
2888.p2align 5
2889__ocb_encrypt4:
2890 pxor %xmm9,%xmm15
2891 movdqu (%rbx,%r12,1),%xmm11
2892 movdqa %xmm10,%xmm12
2893 movdqu (%rbx,%r13,1),%xmm13
2894 pxor %xmm15,%xmm10
2895 pxor %xmm10,%xmm11
2896 pxor %xmm2,%xmm8
2897 pxor %xmm10,%xmm2
2898 pxor %xmm11,%xmm12
2899 pxor %xmm3,%xmm8
2900 pxor %xmm11,%xmm3
2901 pxor %xmm12,%xmm13
2902 pxor %xmm4,%xmm8
2903 pxor %xmm12,%xmm4
2904 pxor %xmm5,%xmm8
2905 pxor %xmm13,%xmm5
2906 movups 32(%r11),%xmm0
2907
2908 pxor %xmm9,%xmm10
2909 pxor %xmm9,%xmm11
2910 pxor %xmm9,%xmm12
2911 pxor %xmm9,%xmm13
2912
2913.byte 102,15,56,220,209
2914.byte 102,15,56,220,217
2915.byte 102,15,56,220,225
2916.byte 102,15,56,220,233
2917 movups 48(%r11),%xmm1
2918
2919.byte 102,15,56,220,208
2920.byte 102,15,56,220,216
2921.byte 102,15,56,220,224
2922.byte 102,15,56,220,232
2923 movups 64(%r11),%xmm0
2924 jmp L$ocb_enc_loop4
2925
2926.p2align 5
2927L$ocb_enc_loop4:
2928.byte 102,15,56,220,209
2929.byte 102,15,56,220,217
2930.byte 102,15,56,220,225
2931.byte 102,15,56,220,233
2932 movups (%rcx,%rax,1),%xmm1
2933 addq $32,%rax
2934
2935.byte 102,15,56,220,208
2936.byte 102,15,56,220,216
2937.byte 102,15,56,220,224
2938.byte 102,15,56,220,232
2939 movups -16(%rcx,%rax,1),%xmm0
2940 jnz L$ocb_enc_loop4
2941
2942.byte 102,15,56,220,209
2943.byte 102,15,56,220,217
2944.byte 102,15,56,220,225
2945.byte 102,15,56,220,233
2946 movups 16(%r11),%xmm1
2947 movq %r10,%rax
2948
2949.byte 102,65,15,56,221,210
2950.byte 102,65,15,56,221,219
2951.byte 102,65,15,56,221,228
2952.byte 102,65,15,56,221,237
2953 .byte 0xf3,0xc3
2954
2955
2956
2957.p2align 5
2958__ocb_encrypt1:
2959 pxor %xmm15,%xmm7
2960 pxor %xmm9,%xmm7
2961 pxor %xmm2,%xmm8
2962 pxor %xmm7,%xmm2
2963 movups 32(%r11),%xmm0
2964
2965.byte 102,15,56,220,209
2966 movups 48(%r11),%xmm1
2967 pxor %xmm9,%xmm7
2968
2969.byte 102,15,56,220,208
2970 movups 64(%r11),%xmm0
2971 jmp L$ocb_enc_loop1
2972
2973.p2align 5
2974L$ocb_enc_loop1:
2975.byte 102,15,56,220,209
2976 movups (%rcx,%rax,1),%xmm1
2977 addq $32,%rax
2978
2979.byte 102,15,56,220,208
2980 movups -16(%rcx,%rax,1),%xmm0
2981 jnz L$ocb_enc_loop1
2982
2983.byte 102,15,56,220,209
2984 movups 16(%r11),%xmm1
2985 movq %r10,%rax
2986
2987.byte 102,15,56,221,215
2988 .byte 0xf3,0xc3
2989
2990
2991.globl _aesni_ocb_decrypt
2992
2993.p2align 5
2994_aesni_ocb_decrypt:
2995
2996 leaq (%rsp),%rax
2997 pushq %rbx
2998
2999 pushq %rbp
3000
3001 pushq %r12
3002
3003 pushq %r13
3004
3005 pushq %r14
3006
3007 movq 8(%rax),%rbx
3008 movq 8+8(%rax),%rbp
3009
3010 movl 240(%rcx),%r10d
3011 movq %rcx,%r11
3012 shll $4,%r10d
3013 movups (%rcx),%xmm9
3014 movups 16(%rcx,%r10,1),%xmm1
3015
3016 movdqu (%r9),%xmm15
3017 pxor %xmm1,%xmm9
3018 pxor %xmm1,%xmm15
3019
3020 movl $16+32,%eax
3021 leaq 32(%r11,%r10,1),%rcx
3022 movups 16(%r11),%xmm1
3023 subq %r10,%rax
3024 movq %rax,%r10
3025
3026 movdqu (%rbx),%xmm10
3027 movdqu (%rbp),%xmm8
3028
3029 testq $1,%r8
3030 jnz L$ocb_dec_odd
3031
3032 bsfq %r8,%r12
3033 addq $1,%r8
3034 shlq $4,%r12
3035 movdqu (%rbx,%r12,1),%xmm7
3036 movdqu (%rdi),%xmm2
3037 leaq 16(%rdi),%rdi
3038
3039 call __ocb_decrypt1
3040
3041 movdqa %xmm7,%xmm15
3042 movups %xmm2,(%rsi)
3043 xorps %xmm2,%xmm8
3044 leaq 16(%rsi),%rsi
3045 subq $1,%rdx
3046 jz L$ocb_dec_done
3047
3048L$ocb_dec_odd:
3049 leaq 1(%r8),%r12
3050 leaq 3(%r8),%r13
3051 leaq 5(%r8),%r14
3052 leaq 6(%r8),%r8
3053 bsfq %r12,%r12
3054 bsfq %r13,%r13
3055 bsfq %r14,%r14
3056 shlq $4,%r12
3057 shlq $4,%r13
3058 shlq $4,%r14
3059
3060 subq $6,%rdx
3061 jc L$ocb_dec_short
3062 jmp L$ocb_dec_grandloop
3063
3064.p2align 5
3065L$ocb_dec_grandloop:
3066 movdqu 0(%rdi),%xmm2
3067 movdqu 16(%rdi),%xmm3
3068 movdqu 32(%rdi),%xmm4
3069 movdqu 48(%rdi),%xmm5
3070 movdqu 64(%rdi),%xmm6
3071 movdqu 80(%rdi),%xmm7
3072 leaq 96(%rdi),%rdi
3073
3074 call __ocb_decrypt6
3075
3076 movups %xmm2,0(%rsi)
3077 pxor %xmm2,%xmm8
3078 movups %xmm3,16(%rsi)
3079 pxor %xmm3,%xmm8
3080 movups %xmm4,32(%rsi)
3081 pxor %xmm4,%xmm8
3082 movups %xmm5,48(%rsi)
3083 pxor %xmm5,%xmm8
3084 movups %xmm6,64(%rsi)
3085 pxor %xmm6,%xmm8
3086 movups %xmm7,80(%rsi)
3087 pxor %xmm7,%xmm8
3088 leaq 96(%rsi),%rsi
3089 subq $6,%rdx
3090 jnc L$ocb_dec_grandloop
3091
3092L$ocb_dec_short:
3093 addq $6,%rdx
3094 jz L$ocb_dec_done
3095
3096 movdqu 0(%rdi),%xmm2
3097 cmpq $2,%rdx
3098 jb L$ocb_dec_one
3099 movdqu 16(%rdi),%xmm3
3100 je L$ocb_dec_two
3101
3102 movdqu 32(%rdi),%xmm4
3103 cmpq $4,%rdx
3104 jb L$ocb_dec_three
3105 movdqu 48(%rdi),%xmm5
3106 je L$ocb_dec_four
3107
3108 movdqu 64(%rdi),%xmm6
3109 pxor %xmm7,%xmm7
3110
3111 call __ocb_decrypt6
3112
3113 movdqa %xmm14,%xmm15
3114 movups %xmm2,0(%rsi)
3115 pxor %xmm2,%xmm8
3116 movups %xmm3,16(%rsi)
3117 pxor %xmm3,%xmm8
3118 movups %xmm4,32(%rsi)
3119 pxor %xmm4,%xmm8
3120 movups %xmm5,48(%rsi)
3121 pxor %xmm5,%xmm8
3122 movups %xmm6,64(%rsi)
3123 pxor %xmm6,%xmm8
3124
3125 jmp L$ocb_dec_done
3126
3127.p2align 4
3128L$ocb_dec_one:
3129 movdqa %xmm10,%xmm7
3130
3131 call __ocb_decrypt1
3132
3133 movdqa %xmm7,%xmm15
3134 movups %xmm2,0(%rsi)
3135 xorps %xmm2,%xmm8
3136 jmp L$ocb_dec_done
3137
3138.p2align 4
3139L$ocb_dec_two:
3140 pxor %xmm4,%xmm4
3141 pxor %xmm5,%xmm5
3142
3143 call __ocb_decrypt4
3144
3145 movdqa %xmm11,%xmm15
3146 movups %xmm2,0(%rsi)
3147 xorps %xmm2,%xmm8
3148 movups %xmm3,16(%rsi)
3149 xorps %xmm3,%xmm8
3150
3151 jmp L$ocb_dec_done
3152
3153.p2align 4
3154L$ocb_dec_three:
3155 pxor %xmm5,%xmm5
3156
3157 call __ocb_decrypt4
3158
3159 movdqa %xmm12,%xmm15
3160 movups %xmm2,0(%rsi)
3161 xorps %xmm2,%xmm8
3162 movups %xmm3,16(%rsi)
3163 xorps %xmm3,%xmm8
3164 movups %xmm4,32(%rsi)
3165 xorps %xmm4,%xmm8
3166
3167 jmp L$ocb_dec_done
3168
3169.p2align 4
3170L$ocb_dec_four:
3171 call __ocb_decrypt4
3172
3173 movdqa %xmm13,%xmm15
3174 movups %xmm2,0(%rsi)
3175 pxor %xmm2,%xmm8
3176 movups %xmm3,16(%rsi)
3177 pxor %xmm3,%xmm8
3178 movups %xmm4,32(%rsi)
3179 pxor %xmm4,%xmm8
3180 movups %xmm5,48(%rsi)
3181 pxor %xmm5,%xmm8
3182
3183L$ocb_dec_done:
3184 pxor %xmm0,%xmm15
3185 movdqu %xmm8,(%rbp)
3186 movdqu %xmm15,(%r9)
3187
3188 xorps %xmm0,%xmm0
3189 pxor %xmm1,%xmm1
3190 pxor %xmm2,%xmm2
3191 pxor %xmm3,%xmm3
3192 pxor %xmm4,%xmm4
3193 pxor %xmm5,%xmm5
3194 pxor %xmm6,%xmm6
3195 pxor %xmm7,%xmm7
3196 pxor %xmm8,%xmm8
3197 pxor %xmm9,%xmm9
3198 pxor %xmm10,%xmm10
3199 pxor %xmm11,%xmm11
3200 pxor %xmm12,%xmm12
3201 pxor %xmm13,%xmm13
3202 pxor %xmm14,%xmm14
3203 pxor %xmm15,%xmm15
3204 leaq 40(%rsp),%rax
3205
3206 movq -40(%rax),%r14
3207
3208 movq -32(%rax),%r13
3209
3210 movq -24(%rax),%r12
3211
3212 movq -16(%rax),%rbp
3213
3214 movq -8(%rax),%rbx
3215
3216 leaq (%rax),%rsp
3217
3218L$ocb_dec_epilogue:
3219 .byte 0xf3,0xc3
3220
3221
3222
3223
3224.p2align 5
3225__ocb_decrypt6:
3226 pxor %xmm9,%xmm15
3227 movdqu (%rbx,%r12,1),%xmm11
3228 movdqa %xmm10,%xmm12
3229 movdqu (%rbx,%r13,1),%xmm13
3230 movdqa %xmm10,%xmm14
3231 pxor %xmm15,%xmm10
3232 movdqu (%rbx,%r14,1),%xmm15
3233 pxor %xmm10,%xmm11
3234 pxor %xmm10,%xmm2
3235 pxor %xmm11,%xmm12
3236 pxor %xmm11,%xmm3
3237 pxor %xmm12,%xmm13
3238 pxor %xmm12,%xmm4
3239 pxor %xmm13,%xmm14
3240 pxor %xmm13,%xmm5
3241 pxor %xmm14,%xmm15
3242 pxor %xmm14,%xmm6
3243 pxor %xmm15,%xmm7
3244 movups 32(%r11),%xmm0
3245
3246 leaq 1(%r8),%r12
3247 leaq 3(%r8),%r13
3248 leaq 5(%r8),%r14
3249 addq $6,%r8
3250 pxor %xmm9,%xmm10
3251 bsfq %r12,%r12
3252 bsfq %r13,%r13
3253 bsfq %r14,%r14
3254
3255.byte 102,15,56,222,209
3256.byte 102,15,56,222,217
3257.byte 102,15,56,222,225
3258.byte 102,15,56,222,233
3259 pxor %xmm9,%xmm11
3260 pxor %xmm9,%xmm12
3261.byte 102,15,56,222,241
3262 pxor %xmm9,%xmm13
3263 pxor %xmm9,%xmm14
3264.byte 102,15,56,222,249
3265 movups 48(%r11),%xmm1
3266 pxor %xmm9,%xmm15
3267
3268.byte 102,15,56,222,208
3269.byte 102,15,56,222,216
3270.byte 102,15,56,222,224
3271.byte 102,15,56,222,232
3272.byte 102,15,56,222,240
3273.byte 102,15,56,222,248
3274 movups 64(%r11),%xmm0
3275 shlq $4,%r12
3276 shlq $4,%r13
3277 jmp L$ocb_dec_loop6
3278
3279.p2align 5
3280L$ocb_dec_loop6:
3281.byte 102,15,56,222,209
3282.byte 102,15,56,222,217
3283.byte 102,15,56,222,225
3284.byte 102,15,56,222,233
3285.byte 102,15,56,222,241
3286.byte 102,15,56,222,249
3287 movups (%rcx,%rax,1),%xmm1
3288 addq $32,%rax
3289
3290.byte 102,15,56,222,208
3291.byte 102,15,56,222,216
3292.byte 102,15,56,222,224
3293.byte 102,15,56,222,232
3294.byte 102,15,56,222,240
3295.byte 102,15,56,222,248
3296 movups -16(%rcx,%rax,1),%xmm0
3297 jnz L$ocb_dec_loop6
3298
3299.byte 102,15,56,222,209
3300.byte 102,15,56,222,217
3301.byte 102,15,56,222,225
3302.byte 102,15,56,222,233
3303.byte 102,15,56,222,241
3304.byte 102,15,56,222,249
3305 movups 16(%r11),%xmm1
3306 shlq $4,%r14
3307
3308.byte 102,65,15,56,223,210
3309 movdqu (%rbx),%xmm10
3310 movq %r10,%rax
3311.byte 102,65,15,56,223,219
3312.byte 102,65,15,56,223,228
3313.byte 102,65,15,56,223,237
3314.byte 102,65,15,56,223,246
3315.byte 102,65,15,56,223,255
3316 .byte 0xf3,0xc3
3317
3318
3319
3320.p2align 5
3321__ocb_decrypt4:
3322 pxor %xmm9,%xmm15
3323 movdqu (%rbx,%r12,1),%xmm11
3324 movdqa %xmm10,%xmm12
3325 movdqu (%rbx,%r13,1),%xmm13
3326 pxor %xmm15,%xmm10
3327 pxor %xmm10,%xmm11
3328 pxor %xmm10,%xmm2
3329 pxor %xmm11,%xmm12
3330 pxor %xmm11,%xmm3
3331 pxor %xmm12,%xmm13
3332 pxor %xmm12,%xmm4
3333 pxor %xmm13,%xmm5
3334 movups 32(%r11),%xmm0
3335
3336 pxor %xmm9,%xmm10
3337 pxor %xmm9,%xmm11
3338 pxor %xmm9,%xmm12
3339 pxor %xmm9,%xmm13
3340
3341.byte 102,15,56,222,209
3342.byte 102,15,56,222,217
3343.byte 102,15,56,222,225
3344.byte 102,15,56,222,233
3345 movups 48(%r11),%xmm1
3346
3347.byte 102,15,56,222,208
3348.byte 102,15,56,222,216
3349.byte 102,15,56,222,224
3350.byte 102,15,56,222,232
3351 movups 64(%r11),%xmm0
3352 jmp L$ocb_dec_loop4
3353
3354.p2align 5
3355L$ocb_dec_loop4:
3356.byte 102,15,56,222,209
3357.byte 102,15,56,222,217
3358.byte 102,15,56,222,225
3359.byte 102,15,56,222,233
3360 movups (%rcx,%rax,1),%xmm1
3361 addq $32,%rax
3362
3363.byte 102,15,56,222,208
3364.byte 102,15,56,222,216
3365.byte 102,15,56,222,224
3366.byte 102,15,56,222,232
3367 movups -16(%rcx,%rax,1),%xmm0
3368 jnz L$ocb_dec_loop4
3369
3370.byte 102,15,56,222,209
3371.byte 102,15,56,222,217
3372.byte 102,15,56,222,225
3373.byte 102,15,56,222,233
3374 movups 16(%r11),%xmm1
3375 movq %r10,%rax
3376
3377.byte 102,65,15,56,223,210
3378.byte 102,65,15,56,223,219
3379.byte 102,65,15,56,223,228
3380.byte 102,65,15,56,223,237
3381 .byte 0xf3,0xc3
3382
3383
3384
3385.p2align 5
3386__ocb_decrypt1:
3387 pxor %xmm15,%xmm7
3388 pxor %xmm9,%xmm7
3389 pxor %xmm7,%xmm2
3390 movups 32(%r11),%xmm0
3391
3392.byte 102,15,56,222,209
3393 movups 48(%r11),%xmm1
3394 pxor %xmm9,%xmm7
3395
3396.byte 102,15,56,222,208
3397 movups 64(%r11),%xmm0
3398 jmp L$ocb_dec_loop1
3399
3400.p2align 5
3401L$ocb_dec_loop1:
3402.byte 102,15,56,222,209
3403 movups (%rcx,%rax,1),%xmm1
3404 addq $32,%rax
3405
3406.byte 102,15,56,222,208
3407 movups -16(%rcx,%rax,1),%xmm0
3408 jnz L$ocb_dec_loop1
3409
3410.byte 102,15,56,222,209
3411 movups 16(%r11),%xmm1
3412 movq %r10,%rax
3413
3414.byte 102,15,56,223,215
3415 .byte 0xf3,0xc3
3416
3417.globl _aesni_cbc_encrypt
3418
3419.p2align 4
3420_aesni_cbc_encrypt:
3421
3422 testq %rdx,%rdx
3423 jz L$cbc_ret
3424
3425 movl 240(%rcx),%r10d
3426 movq %rcx,%r11
3427 testl %r9d,%r9d
3428 jz L$cbc_decrypt
3429
3430 movups (%r8),%xmm2
3431 movl %r10d,%eax
3432 cmpq $16,%rdx
3433 jb L$cbc_enc_tail
3434 subq $16,%rdx
3435 jmp L$cbc_enc_loop
3436.p2align 4
3437L$cbc_enc_loop:
3438 movups (%rdi),%xmm3
3439 leaq 16(%rdi),%rdi
3440
3441 movups (%rcx),%xmm0
3442 movups 16(%rcx),%xmm1
3443 xorps %xmm0,%xmm3
3444 leaq 32(%rcx),%rcx
3445 xorps %xmm3,%xmm2
3446L$oop_enc1_15:
3447.byte 102,15,56,220,209
3448 decl %eax
3449 movups (%rcx),%xmm1
3450 leaq 16(%rcx),%rcx
3451 jnz L$oop_enc1_15
3452.byte 102,15,56,221,209
3453 movl %r10d,%eax
3454 movq %r11,%rcx
3455 movups %xmm2,0(%rsi)
3456 leaq 16(%rsi),%rsi
3457 subq $16,%rdx
3458 jnc L$cbc_enc_loop
3459 addq $16,%rdx
3460 jnz L$cbc_enc_tail
3461 pxor %xmm0,%xmm0
3462 pxor %xmm1,%xmm1
3463 movups %xmm2,(%r8)
3464 pxor %xmm2,%xmm2
3465 pxor %xmm3,%xmm3
3466 jmp L$cbc_ret
3467
3468L$cbc_enc_tail:
3469 movq %rdx,%rcx
3470 xchgq %rdi,%rsi
3471.long 0x9066A4F3
3472 movl $16,%ecx
3473 subq %rdx,%rcx
3474 xorl %eax,%eax
3475.long 0x9066AAF3
3476 leaq -16(%rdi),%rdi
3477 movl %r10d,%eax
3478 movq %rdi,%rsi
3479 movq %r11,%rcx
3480 xorq %rdx,%rdx
3481 jmp L$cbc_enc_loop
3482
3483.p2align 4
3484L$cbc_decrypt:
3485 cmpq $16,%rdx
3486 jne L$cbc_decrypt_bulk
3487
3488
3489
3490 movdqu (%rdi),%xmm2
3491 movdqu (%r8),%xmm3
3492 movdqa %xmm2,%xmm4
3493 movups (%rcx),%xmm0
3494 movups 16(%rcx),%xmm1
3495 leaq 32(%rcx),%rcx
3496 xorps %xmm0,%xmm2
3497L$oop_dec1_16:
3498.byte 102,15,56,222,209
3499 decl %r10d
3500 movups (%rcx),%xmm1
3501 leaq 16(%rcx),%rcx
3502 jnz L$oop_dec1_16
3503.byte 102,15,56,223,209
3504 pxor %xmm0,%xmm0
3505 pxor %xmm1,%xmm1
3506 movdqu %xmm4,(%r8)
3507 xorps %xmm3,%xmm2
3508 pxor %xmm3,%xmm3
3509 movups %xmm2,(%rsi)
3510 pxor %xmm2,%xmm2
3511 jmp L$cbc_ret
3512.p2align 4
3513L$cbc_decrypt_bulk:
3514 leaq (%rsp),%r11
3515
3516 pushq %rbp
3517
3518 subq $16,%rsp
3519 andq $-16,%rsp
3520 movq %rcx,%rbp
3521 movups (%r8),%xmm10
3522 movl %r10d,%eax
3523 cmpq $0x50,%rdx
3524 jbe L$cbc_dec_tail
3525
3526 movups (%rcx),%xmm0
3527 movdqu 0(%rdi),%xmm2
3528 movdqu 16(%rdi),%xmm3
3529 movdqa %xmm2,%xmm11
3530 movdqu 32(%rdi),%xmm4
3531 movdqa %xmm3,%xmm12
3532 movdqu 48(%rdi),%xmm5
3533 movdqa %xmm4,%xmm13
3534 movdqu 64(%rdi),%xmm6
3535 movdqa %xmm5,%xmm14
3536 movdqu 80(%rdi),%xmm7
3537 movdqa %xmm6,%xmm15
3538 movl _OPENSSL_ia32cap_P+4(%rip),%r9d
3539 cmpq $0x70,%rdx
3540 jbe L$cbc_dec_six_or_seven
3541
3542 andl $71303168,%r9d
3543 subq $0x50,%rdx
3544 cmpl $4194304,%r9d
3545 je L$cbc_dec_loop6_enter
3546 subq $0x20,%rdx
3547 leaq 112(%rcx),%rcx
3548 jmp L$cbc_dec_loop8_enter
3549.p2align 4
3550L$cbc_dec_loop8:
3551 movups %xmm9,(%rsi)
3552 leaq 16(%rsi),%rsi
3553L$cbc_dec_loop8_enter:
3554 movdqu 96(%rdi),%xmm8
3555 pxor %xmm0,%xmm2
3556 movdqu 112(%rdi),%xmm9
3557 pxor %xmm0,%xmm3
3558 movups 16-112(%rcx),%xmm1
3559 pxor %xmm0,%xmm4
3560 movq $-1,%rbp
3561 cmpq $0x70,%rdx
3562 pxor %xmm0,%xmm5
3563 pxor %xmm0,%xmm6
3564 pxor %xmm0,%xmm7
3565 pxor %xmm0,%xmm8
3566
3567.byte 102,15,56,222,209
3568 pxor %xmm0,%xmm9
3569 movups 32-112(%rcx),%xmm0
3570.byte 102,15,56,222,217
3571.byte 102,15,56,222,225
3572.byte 102,15,56,222,233
3573.byte 102,15,56,222,241
3574.byte 102,15,56,222,249
3575.byte 102,68,15,56,222,193
3576 adcq $0,%rbp
3577 andq $128,%rbp
3578.byte 102,68,15,56,222,201
3579 addq %rdi,%rbp
3580 movups 48-112(%rcx),%xmm1
3581.byte 102,15,56,222,208
3582.byte 102,15,56,222,216
3583.byte 102,15,56,222,224
3584.byte 102,15,56,222,232
3585.byte 102,15,56,222,240
3586.byte 102,15,56,222,248
3587.byte 102,68,15,56,222,192
3588.byte 102,68,15,56,222,200
3589 movups 64-112(%rcx),%xmm0
3590 nop
3591.byte 102,15,56,222,209
3592.byte 102,15,56,222,217
3593.byte 102,15,56,222,225
3594.byte 102,15,56,222,233
3595.byte 102,15,56,222,241
3596.byte 102,15,56,222,249
3597.byte 102,68,15,56,222,193
3598.byte 102,68,15,56,222,201
3599 movups 80-112(%rcx),%xmm1
3600 nop
3601.byte 102,15,56,222,208
3602.byte 102,15,56,222,216
3603.byte 102,15,56,222,224
3604.byte 102,15,56,222,232
3605.byte 102,15,56,222,240
3606.byte 102,15,56,222,248
3607.byte 102,68,15,56,222,192
3608.byte 102,68,15,56,222,200
3609 movups 96-112(%rcx),%xmm0
3610 nop
3611.byte 102,15,56,222,209
3612.byte 102,15,56,222,217
3613.byte 102,15,56,222,225
3614.byte 102,15,56,222,233
3615.byte 102,15,56,222,241
3616.byte 102,15,56,222,249
3617.byte 102,68,15,56,222,193
3618.byte 102,68,15,56,222,201
3619 movups 112-112(%rcx),%xmm1
3620 nop
3621.byte 102,15,56,222,208
3622.byte 102,15,56,222,216
3623.byte 102,15,56,222,224
3624.byte 102,15,56,222,232
3625.byte 102,15,56,222,240
3626.byte 102,15,56,222,248
3627.byte 102,68,15,56,222,192
3628.byte 102,68,15,56,222,200
3629 movups 128-112(%rcx),%xmm0
3630 nop
3631.byte 102,15,56,222,209
3632.byte 102,15,56,222,217
3633.byte 102,15,56,222,225
3634.byte 102,15,56,222,233
3635.byte 102,15,56,222,241
3636.byte 102,15,56,222,249
3637.byte 102,68,15,56,222,193
3638.byte 102,68,15,56,222,201
3639 movups 144-112(%rcx),%xmm1
3640 cmpl $11,%eax
3641.byte 102,15,56,222,208
3642.byte 102,15,56,222,216
3643.byte 102,15,56,222,224
3644.byte 102,15,56,222,232
3645.byte 102,15,56,222,240
3646.byte 102,15,56,222,248
3647.byte 102,68,15,56,222,192
3648.byte 102,68,15,56,222,200
3649 movups 160-112(%rcx),%xmm0
3650 jb L$cbc_dec_done
3651.byte 102,15,56,222,209
3652.byte 102,15,56,222,217
3653.byte 102,15,56,222,225
3654.byte 102,15,56,222,233
3655.byte 102,15,56,222,241
3656.byte 102,15,56,222,249
3657.byte 102,68,15,56,222,193
3658.byte 102,68,15,56,222,201
3659 movups 176-112(%rcx),%xmm1
3660 nop
3661.byte 102,15,56,222,208
3662.byte 102,15,56,222,216
3663.byte 102,15,56,222,224
3664.byte 102,15,56,222,232
3665.byte 102,15,56,222,240
3666.byte 102,15,56,222,248
3667.byte 102,68,15,56,222,192
3668.byte 102,68,15,56,222,200
3669 movups 192-112(%rcx),%xmm0
3670 je L$cbc_dec_done
3671.byte 102,15,56,222,209
3672.byte 102,15,56,222,217
3673.byte 102,15,56,222,225
3674.byte 102,15,56,222,233
3675.byte 102,15,56,222,241
3676.byte 102,15,56,222,249
3677.byte 102,68,15,56,222,193
3678.byte 102,68,15,56,222,201
3679 movups 208-112(%rcx),%xmm1
3680 nop
3681.byte 102,15,56,222,208
3682.byte 102,15,56,222,216
3683.byte 102,15,56,222,224
3684.byte 102,15,56,222,232
3685.byte 102,15,56,222,240
3686.byte 102,15,56,222,248
3687.byte 102,68,15,56,222,192
3688.byte 102,68,15,56,222,200
3689 movups 224-112(%rcx),%xmm0
3690 jmp L$cbc_dec_done
3691.p2align 4
3692L$cbc_dec_done:
3693.byte 102,15,56,222,209
3694.byte 102,15,56,222,217
3695 pxor %xmm0,%xmm10
3696 pxor %xmm0,%xmm11
3697.byte 102,15,56,222,225
3698.byte 102,15,56,222,233
3699 pxor %xmm0,%xmm12
3700 pxor %xmm0,%xmm13
3701.byte 102,15,56,222,241
3702.byte 102,15,56,222,249
3703 pxor %xmm0,%xmm14
3704 pxor %xmm0,%xmm15
3705.byte 102,68,15,56,222,193
3706.byte 102,68,15,56,222,201
3707 movdqu 80(%rdi),%xmm1
3708
3709.byte 102,65,15,56,223,210
3710 movdqu 96(%rdi),%xmm10
3711 pxor %xmm0,%xmm1
3712.byte 102,65,15,56,223,219
3713 pxor %xmm0,%xmm10
3714 movdqu 112(%rdi),%xmm0
3715.byte 102,65,15,56,223,228
3716 leaq 128(%rdi),%rdi
3717 movdqu 0(%rbp),%xmm11
3718.byte 102,65,15,56,223,237
3719.byte 102,65,15,56,223,246
3720 movdqu 16(%rbp),%xmm12
3721 movdqu 32(%rbp),%xmm13
3722.byte 102,65,15,56,223,255
3723.byte 102,68,15,56,223,193
3724 movdqu 48(%rbp),%xmm14
3725 movdqu 64(%rbp),%xmm15
3726.byte 102,69,15,56,223,202
3727 movdqa %xmm0,%xmm10
3728 movdqu 80(%rbp),%xmm1
3729 movups -112(%rcx),%xmm0
3730
3731 movups %xmm2,(%rsi)
3732 movdqa %xmm11,%xmm2
3733 movups %xmm3,16(%rsi)
3734 movdqa %xmm12,%xmm3
3735 movups %xmm4,32(%rsi)
3736 movdqa %xmm13,%xmm4
3737 movups %xmm5,48(%rsi)
3738 movdqa %xmm14,%xmm5
3739 movups %xmm6,64(%rsi)
3740 movdqa %xmm15,%xmm6
3741 movups %xmm7,80(%rsi)
3742 movdqa %xmm1,%xmm7
3743 movups %xmm8,96(%rsi)
3744 leaq 112(%rsi),%rsi
3745
3746 subq $0x80,%rdx
3747 ja L$cbc_dec_loop8
3748
3749 movaps %xmm9,%xmm2
3750 leaq -112(%rcx),%rcx
3751 addq $0x70,%rdx
3752 jle L$cbc_dec_clear_tail_collected
3753 movups %xmm9,(%rsi)
3754 leaq 16(%rsi),%rsi
3755 cmpq $0x50,%rdx
3756 jbe L$cbc_dec_tail
3757
3758 movaps %xmm11,%xmm2
3759L$cbc_dec_six_or_seven:
3760 cmpq $0x60,%rdx
3761 ja L$cbc_dec_seven
3762
3763 movaps %xmm7,%xmm8
3764 call _aesni_decrypt6
3765 pxor %xmm10,%xmm2
3766 movaps %xmm8,%xmm10
3767 pxor %xmm11,%xmm3
3768 movdqu %xmm2,(%rsi)
3769 pxor %xmm12,%xmm4
3770 movdqu %xmm3,16(%rsi)
3771 pxor %xmm3,%xmm3
3772 pxor %xmm13,%xmm5
3773 movdqu %xmm4,32(%rsi)
3774 pxor %xmm4,%xmm4
3775 pxor %xmm14,%xmm6
3776 movdqu %xmm5,48(%rsi)
3777 pxor %xmm5,%xmm5
3778 pxor %xmm15,%xmm7
3779 movdqu %xmm6,64(%rsi)
3780 pxor %xmm6,%xmm6
3781 leaq 80(%rsi),%rsi
3782 movdqa %xmm7,%xmm2
3783 pxor %xmm7,%xmm7
3784 jmp L$cbc_dec_tail_collected
3785
3786.p2align 4
3787L$cbc_dec_seven:
3788 movups 96(%rdi),%xmm8
3789 xorps %xmm9,%xmm9
3790 call _aesni_decrypt8
3791 movups 80(%rdi),%xmm9
3792 pxor %xmm10,%xmm2
3793 movups 96(%rdi),%xmm10
3794 pxor %xmm11,%xmm3
3795 movdqu %xmm2,(%rsi)
3796 pxor %xmm12,%xmm4
3797 movdqu %xmm3,16(%rsi)
3798 pxor %xmm3,%xmm3
3799 pxor %xmm13,%xmm5
3800 movdqu %xmm4,32(%rsi)
3801 pxor %xmm4,%xmm4
3802 pxor %xmm14,%xmm6
3803 movdqu %xmm5,48(%rsi)
3804 pxor %xmm5,%xmm5
3805 pxor %xmm15,%xmm7
3806 movdqu %xmm6,64(%rsi)
3807 pxor %xmm6,%xmm6
3808 pxor %xmm9,%xmm8
3809 movdqu %xmm7,80(%rsi)
3810 pxor %xmm7,%xmm7
3811 leaq 96(%rsi),%rsi
3812 movdqa %xmm8,%xmm2
3813 pxor %xmm8,%xmm8
3814 pxor %xmm9,%xmm9
3815 jmp L$cbc_dec_tail_collected
3816
3817.p2align 4
3818L$cbc_dec_loop6:
3819 movups %xmm7,(%rsi)
3820 leaq 16(%rsi),%rsi
3821 movdqu 0(%rdi),%xmm2
3822 movdqu 16(%rdi),%xmm3
3823 movdqa %xmm2,%xmm11
3824 movdqu 32(%rdi),%xmm4
3825 movdqa %xmm3,%xmm12
3826 movdqu 48(%rdi),%xmm5
3827 movdqa %xmm4,%xmm13
3828 movdqu 64(%rdi),%xmm6
3829 movdqa %xmm5,%xmm14
3830 movdqu 80(%rdi),%xmm7
3831 movdqa %xmm6,%xmm15
3832L$cbc_dec_loop6_enter:
3833 leaq 96(%rdi),%rdi
3834 movdqa %xmm7,%xmm8
3835
3836 call _aesni_decrypt6
3837
3838 pxor %xmm10,%xmm2
3839 movdqa %xmm8,%xmm10
3840 pxor %xmm11,%xmm3
3841 movdqu %xmm2,(%rsi)
3842 pxor %xmm12,%xmm4
3843 movdqu %xmm3,16(%rsi)
3844 pxor %xmm13,%xmm5
3845 movdqu %xmm4,32(%rsi)
3846 pxor %xmm14,%xmm6
3847 movq %rbp,%rcx
3848 movdqu %xmm5,48(%rsi)
3849 pxor %xmm15,%xmm7
3850 movl %r10d,%eax
3851 movdqu %xmm6,64(%rsi)
3852 leaq 80(%rsi),%rsi
3853 subq $0x60,%rdx
3854 ja L$cbc_dec_loop6
3855
3856 movdqa %xmm7,%xmm2
3857 addq $0x50,%rdx
3858 jle L$cbc_dec_clear_tail_collected
3859 movups %xmm7,(%rsi)
3860 leaq 16(%rsi),%rsi
3861
3862L$cbc_dec_tail:
3863 movups (%rdi),%xmm2
3864 subq $0x10,%rdx
3865 jbe L$cbc_dec_one
3866
3867 movups 16(%rdi),%xmm3
3868 movaps %xmm2,%xmm11
3869 subq $0x10,%rdx
3870 jbe L$cbc_dec_two
3871
3872 movups 32(%rdi),%xmm4
3873 movaps %xmm3,%xmm12
3874 subq $0x10,%rdx
3875 jbe L$cbc_dec_three
3876
3877 movups 48(%rdi),%xmm5
3878 movaps %xmm4,%xmm13
3879 subq $0x10,%rdx
3880 jbe L$cbc_dec_four
3881
3882 movups 64(%rdi),%xmm6
3883 movaps %xmm5,%xmm14
3884 movaps %xmm6,%xmm15
3885 xorps %xmm7,%xmm7
3886 call _aesni_decrypt6
3887 pxor %xmm10,%xmm2
3888 movaps %xmm15,%xmm10
3889 pxor %xmm11,%xmm3
3890 movdqu %xmm2,(%rsi)
3891 pxor %xmm12,%xmm4
3892 movdqu %xmm3,16(%rsi)
3893 pxor %xmm3,%xmm3
3894 pxor %xmm13,%xmm5
3895 movdqu %xmm4,32(%rsi)
3896 pxor %xmm4,%xmm4
3897 pxor %xmm14,%xmm6
3898 movdqu %xmm5,48(%rsi)
3899 pxor %xmm5,%xmm5
3900 leaq 64(%rsi),%rsi
3901 movdqa %xmm6,%xmm2
3902 pxor %xmm6,%xmm6
3903 pxor %xmm7,%xmm7
3904 subq $0x10,%rdx
3905 jmp L$cbc_dec_tail_collected
3906
3907.p2align 4
3908L$cbc_dec_one:
3909 movaps %xmm2,%xmm11
3910 movups (%rcx),%xmm0
3911 movups 16(%rcx),%xmm1
3912 leaq 32(%rcx),%rcx
3913 xorps %xmm0,%xmm2
3914L$oop_dec1_17:
3915.byte 102,15,56,222,209
3916 decl %eax
3917 movups (%rcx),%xmm1
3918 leaq 16(%rcx),%rcx
3919 jnz L$oop_dec1_17
3920.byte 102,15,56,223,209
3921 xorps %xmm10,%xmm2
3922 movaps %xmm11,%xmm10
3923 jmp L$cbc_dec_tail_collected
3924.p2align 4
3925L$cbc_dec_two:
3926 movaps %xmm3,%xmm12
3927 call _aesni_decrypt2
3928 pxor %xmm10,%xmm2
3929 movaps %xmm12,%xmm10
3930 pxor %xmm11,%xmm3
3931 movdqu %xmm2,(%rsi)
3932 movdqa %xmm3,%xmm2
3933 pxor %xmm3,%xmm3
3934 leaq 16(%rsi),%rsi
3935 jmp L$cbc_dec_tail_collected
3936.p2align 4
3937L$cbc_dec_three:
3938 movaps %xmm4,%xmm13
3939 call _aesni_decrypt3
3940 pxor %xmm10,%xmm2
3941 movaps %xmm13,%xmm10
3942 pxor %xmm11,%xmm3
3943 movdqu %xmm2,(%rsi)
3944 pxor %xmm12,%xmm4
3945 movdqu %xmm3,16(%rsi)
3946 pxor %xmm3,%xmm3
3947 movdqa %xmm4,%xmm2
3948 pxor %xmm4,%xmm4
3949 leaq 32(%rsi),%rsi
3950 jmp L$cbc_dec_tail_collected
3951.p2align 4
3952L$cbc_dec_four:
3953 movaps %xmm5,%xmm14
3954 call _aesni_decrypt4
3955 pxor %xmm10,%xmm2
3956 movaps %xmm14,%xmm10
3957 pxor %xmm11,%xmm3
3958 movdqu %xmm2,(%rsi)
3959 pxor %xmm12,%xmm4
3960 movdqu %xmm3,16(%rsi)
3961 pxor %xmm3,%xmm3
3962 pxor %xmm13,%xmm5
3963 movdqu %xmm4,32(%rsi)
3964 pxor %xmm4,%xmm4
3965 movdqa %xmm5,%xmm2
3966 pxor %xmm5,%xmm5
3967 leaq 48(%rsi),%rsi
3968 jmp L$cbc_dec_tail_collected
3969
3970.p2align 4
3971L$cbc_dec_clear_tail_collected:
3972 pxor %xmm3,%xmm3
3973 pxor %xmm4,%xmm4
3974 pxor %xmm5,%xmm5
3975 pxor %xmm6,%xmm6
3976 pxor %xmm7,%xmm7
3977 pxor %xmm8,%xmm8
3978 pxor %xmm9,%xmm9
3979L$cbc_dec_tail_collected:
3980 movups %xmm10,(%r8)
3981 andq $15,%rdx
3982 jnz L$cbc_dec_tail_partial
3983 movups %xmm2,(%rsi)
3984 pxor %xmm2,%xmm2
3985 jmp L$cbc_dec_ret
3986.p2align 4
3987L$cbc_dec_tail_partial:
3988 movaps %xmm2,(%rsp)
3989 pxor %xmm2,%xmm2
3990 movq $16,%rcx
3991 movq %rsi,%rdi
3992 subq %rdx,%rcx
3993 leaq (%rsp),%rsi
3994.long 0x9066A4F3
3995 movdqa %xmm2,(%rsp)
3996
3997L$cbc_dec_ret:
3998 xorps %xmm0,%xmm0
3999 pxor %xmm1,%xmm1
4000 movq -8(%r11),%rbp
4001
4002 leaq (%r11),%rsp
4003
4004L$cbc_ret:
4005 .byte 0xf3,0xc3
4006
4007
4008.globl _aesni_set_decrypt_key
4009
4010.p2align 4
4011_aesni_set_decrypt_key:
4012
4013.byte 0x48,0x83,0xEC,0x08
4014
4015 call __aesni_set_encrypt_key
4016 shll $4,%esi
4017 testl %eax,%eax
4018 jnz L$dec_key_ret
4019 leaq 16(%rdx,%rsi,1),%rdi
4020
4021 movups (%rdx),%xmm0
4022 movups (%rdi),%xmm1
4023 movups %xmm0,(%rdi)
4024 movups %xmm1,(%rdx)
4025 leaq 16(%rdx),%rdx
4026 leaq -16(%rdi),%rdi
4027
4028L$dec_key_inverse:
4029 movups (%rdx),%xmm0
4030 movups (%rdi),%xmm1
4031.byte 102,15,56,219,192
4032.byte 102,15,56,219,201
4033 leaq 16(%rdx),%rdx
4034 leaq -16(%rdi),%rdi
4035 movups %xmm0,16(%rdi)
4036 movups %xmm1,-16(%rdx)
4037 cmpq %rdx,%rdi
4038 ja L$dec_key_inverse
4039
4040 movups (%rdx),%xmm0
4041.byte 102,15,56,219,192
4042 pxor %xmm1,%xmm1
4043 movups %xmm0,(%rdi)
4044 pxor %xmm0,%xmm0
4045L$dec_key_ret:
4046 addq $8,%rsp
4047
4048 .byte 0xf3,0xc3
4049
4050L$SEH_end_set_decrypt_key:
4051
4052.globl _aesni_set_encrypt_key
4053
4054.p2align 4
4055_aesni_set_encrypt_key:
4056__aesni_set_encrypt_key:
4057
4058.byte 0x48,0x83,0xEC,0x08
4059
4060 movq $-1,%rax
4061 testq %rdi,%rdi
4062 jz L$enc_key_ret
4063 testq %rdx,%rdx
4064 jz L$enc_key_ret
4065
4066 movl $268437504,%r10d
4067 movups (%rdi),%xmm0
4068 xorps %xmm4,%xmm4
4069 andl _OPENSSL_ia32cap_P+4(%rip),%r10d
4070 leaq 16(%rdx),%rax
4071 cmpl $256,%esi
4072 je L$14rounds
4073 cmpl $192,%esi
4074 je L$12rounds
4075 cmpl $128,%esi
4076 jne L$bad_keybits
4077
4078L$10rounds:
4079 movl $9,%esi
4080 cmpl $268435456,%r10d
4081 je L$10rounds_alt
4082
4083 movups %xmm0,(%rdx)
4084.byte 102,15,58,223,200,1
4085 call L$key_expansion_128_cold
4086.byte 102,15,58,223,200,2
4087 call L$key_expansion_128
4088.byte 102,15,58,223,200,4
4089 call L$key_expansion_128
4090.byte 102,15,58,223,200,8
4091 call L$key_expansion_128
4092.byte 102,15,58,223,200,16
4093 call L$key_expansion_128
4094.byte 102,15,58,223,200,32
4095 call L$key_expansion_128
4096.byte 102,15,58,223,200,64
4097 call L$key_expansion_128
4098.byte 102,15,58,223,200,128
4099 call L$key_expansion_128
4100.byte 102,15,58,223,200,27
4101 call L$key_expansion_128
4102.byte 102,15,58,223,200,54
4103 call L$key_expansion_128
4104 movups %xmm0,(%rax)
4105 movl %esi,80(%rax)
4106 xorl %eax,%eax
4107 jmp L$enc_key_ret
4108
4109.p2align 4
4110L$10rounds_alt:
4111 movdqa L$key_rotate(%rip),%xmm5
4112 movl $8,%r10d
4113 movdqa L$key_rcon1(%rip),%xmm4
4114 movdqa %xmm0,%xmm2
4115 movdqu %xmm0,(%rdx)
4116 jmp L$oop_key128
4117
4118.p2align 4
4119L$oop_key128:
4120.byte 102,15,56,0,197
4121.byte 102,15,56,221,196
4122 pslld $1,%xmm4
4123 leaq 16(%rax),%rax
4124
4125 movdqa %xmm2,%xmm3
4126 pslldq $4,%xmm2
4127 pxor %xmm2,%xmm3
4128 pslldq $4,%xmm2
4129 pxor %xmm2,%xmm3
4130 pslldq $4,%xmm2
4131 pxor %xmm3,%xmm2
4132
4133 pxor %xmm2,%xmm0
4134 movdqu %xmm0,-16(%rax)
4135 movdqa %xmm0,%xmm2
4136
4137 decl %r10d
4138 jnz L$oop_key128
4139
4140 movdqa L$key_rcon1b(%rip),%xmm4
4141
4142.byte 102,15,56,0,197
4143.byte 102,15,56,221,196
4144 pslld $1,%xmm4
4145
4146 movdqa %xmm2,%xmm3
4147 pslldq $4,%xmm2
4148 pxor %xmm2,%xmm3
4149 pslldq $4,%xmm2
4150 pxor %xmm2,%xmm3
4151 pslldq $4,%xmm2
4152 pxor %xmm3,%xmm2
4153
4154 pxor %xmm2,%xmm0
4155 movdqu %xmm0,(%rax)
4156
4157 movdqa %xmm0,%xmm2
4158.byte 102,15,56,0,197
4159.byte 102,15,56,221,196
4160
4161 movdqa %xmm2,%xmm3
4162 pslldq $4,%xmm2
4163 pxor %xmm2,%xmm3
4164 pslldq $4,%xmm2
4165 pxor %xmm2,%xmm3
4166 pslldq $4,%xmm2
4167 pxor %xmm3,%xmm2
4168
4169 pxor %xmm2,%xmm0
4170 movdqu %xmm0,16(%rax)
4171
4172 movl %esi,96(%rax)
4173 xorl %eax,%eax
4174 jmp L$enc_key_ret
4175
4176.p2align 4
4177L$12rounds:
4178 movq 16(%rdi),%xmm2
4179 movl $11,%esi
4180 cmpl $268435456,%r10d
4181 je L$12rounds_alt
4182
4183 movups %xmm0,(%rdx)
4184.byte 102,15,58,223,202,1
4185 call L$key_expansion_192a_cold
4186.byte 102,15,58,223,202,2
4187 call L$key_expansion_192b
4188.byte 102,15,58,223,202,4
4189 call L$key_expansion_192a
4190.byte 102,15,58,223,202,8
4191 call L$key_expansion_192b
4192.byte 102,15,58,223,202,16
4193 call L$key_expansion_192a
4194.byte 102,15,58,223,202,32
4195 call L$key_expansion_192b
4196.byte 102,15,58,223,202,64
4197 call L$key_expansion_192a
4198.byte 102,15,58,223,202,128
4199 call L$key_expansion_192b
4200 movups %xmm0,(%rax)
4201 movl %esi,48(%rax)
4202 xorq %rax,%rax
4203 jmp L$enc_key_ret
4204
4205.p2align 4
4206L$12rounds_alt:
4207 movdqa L$key_rotate192(%rip),%xmm5
4208 movdqa L$key_rcon1(%rip),%xmm4
4209 movl $8,%r10d
4210 movdqu %xmm0,(%rdx)
4211 jmp L$oop_key192
4212
4213.p2align 4
4214L$oop_key192:
4215 movq %xmm2,0(%rax)
4216 movdqa %xmm2,%xmm1
4217.byte 102,15,56,0,213
4218.byte 102,15,56,221,212
4219 pslld $1,%xmm4
4220 leaq 24(%rax),%rax
4221
4222 movdqa %xmm0,%xmm3
4223 pslldq $4,%xmm0
4224 pxor %xmm0,%xmm3
4225 pslldq $4,%xmm0
4226 pxor %xmm0,%xmm3
4227 pslldq $4,%xmm0
4228 pxor %xmm3,%xmm0
4229
4230 pshufd $0xff,%xmm0,%xmm3
4231 pxor %xmm1,%xmm3
4232 pslldq $4,%xmm1
4233 pxor %xmm1,%xmm3
4234
4235 pxor %xmm2,%xmm0
4236 pxor %xmm3,%xmm2
4237 movdqu %xmm0,-16(%rax)
4238
4239 decl %r10d
4240 jnz L$oop_key192
4241
4242 movl %esi,32(%rax)
4243 xorl %eax,%eax
4244 jmp L$enc_key_ret
4245
4246.p2align 4
4247L$14rounds:
4248 movups 16(%rdi),%xmm2
4249 movl $13,%esi
4250 leaq 16(%rax),%rax
4251 cmpl $268435456,%r10d
4252 je L$14rounds_alt
4253
4254 movups %xmm0,(%rdx)
4255 movups %xmm2,16(%rdx)
4256.byte 102,15,58,223,202,1
4257 call L$key_expansion_256a_cold
4258.byte 102,15,58,223,200,1
4259 call L$key_expansion_256b
4260.byte 102,15,58,223,202,2
4261 call L$key_expansion_256a
4262.byte 102,15,58,223,200,2
4263 call L$key_expansion_256b
4264.byte 102,15,58,223,202,4
4265 call L$key_expansion_256a
4266.byte 102,15,58,223,200,4
4267 call L$key_expansion_256b
4268.byte 102,15,58,223,202,8
4269 call L$key_expansion_256a
4270.byte 102,15,58,223,200,8
4271 call L$key_expansion_256b
4272.byte 102,15,58,223,202,16
4273 call L$key_expansion_256a
4274.byte 102,15,58,223,200,16
4275 call L$key_expansion_256b
4276.byte 102,15,58,223,202,32
4277 call L$key_expansion_256a
4278.byte 102,15,58,223,200,32
4279 call L$key_expansion_256b
4280.byte 102,15,58,223,202,64
4281 call L$key_expansion_256a
4282 movups %xmm0,(%rax)
4283 movl %esi,16(%rax)
4284 xorq %rax,%rax
4285 jmp L$enc_key_ret
4286
4287.p2align 4
4288L$14rounds_alt:
4289 movdqa L$key_rotate(%rip),%xmm5
4290 movdqa L$key_rcon1(%rip),%xmm4
4291 movl $7,%r10d
4292 movdqu %xmm0,0(%rdx)
4293 movdqa %xmm2,%xmm1
4294 movdqu %xmm2,16(%rdx)
4295 jmp L$oop_key256
4296
4297.p2align 4
4298L$oop_key256:
4299.byte 102,15,56,0,213
4300.byte 102,15,56,221,212
4301
4302 movdqa %xmm0,%xmm3
4303 pslldq $4,%xmm0
4304 pxor %xmm0,%xmm3
4305 pslldq $4,%xmm0
4306 pxor %xmm0,%xmm3
4307 pslldq $4,%xmm0
4308 pxor %xmm3,%xmm0
4309 pslld $1,%xmm4
4310
4311 pxor %xmm2,%xmm0
4312 movdqu %xmm0,(%rax)
4313
4314 decl %r10d
4315 jz L$done_key256
4316
4317 pshufd $0xff,%xmm0,%xmm2
4318 pxor %xmm3,%xmm3
4319.byte 102,15,56,221,211
4320
4321 movdqa %xmm1,%xmm3
4322 pslldq $4,%xmm1
4323 pxor %xmm1,%xmm3
4324 pslldq $4,%xmm1
4325 pxor %xmm1,%xmm3
4326 pslldq $4,%xmm1
4327 pxor %xmm3,%xmm1
4328
4329 pxor %xmm1,%xmm2
4330 movdqu %xmm2,16(%rax)
4331 leaq 32(%rax),%rax
4332 movdqa %xmm2,%xmm1
4333
4334 jmp L$oop_key256
4335
4336L$done_key256:
4337 movl %esi,16(%rax)
4338 xorl %eax,%eax
4339 jmp L$enc_key_ret
4340
4341.p2align 4
4342L$bad_keybits:
4343 movq $-2,%rax
4344L$enc_key_ret:
4345 pxor %xmm0,%xmm0
4346 pxor %xmm1,%xmm1
4347 pxor %xmm2,%xmm2
4348 pxor %xmm3,%xmm3
4349 pxor %xmm4,%xmm4
4350 pxor %xmm5,%xmm5
4351 addq $8,%rsp
4352
4353 .byte 0xf3,0xc3
4354
4355L$SEH_end_set_encrypt_key:
4356
4357.p2align 4
4358L$key_expansion_128:
4359 movups %xmm0,(%rax)
4360 leaq 16(%rax),%rax
4361L$key_expansion_128_cold:
4362 shufps $16,%xmm0,%xmm4
4363 xorps %xmm4,%xmm0
4364 shufps $140,%xmm0,%xmm4
4365 xorps %xmm4,%xmm0
4366 shufps $255,%xmm1,%xmm1
4367 xorps %xmm1,%xmm0
4368 .byte 0xf3,0xc3
4369
4370.p2align 4
4371L$key_expansion_192a:
4372 movups %xmm0,(%rax)
4373 leaq 16(%rax),%rax
4374L$key_expansion_192a_cold:
4375 movaps %xmm2,%xmm5
4376L$key_expansion_192b_warm:
4377 shufps $16,%xmm0,%xmm4
4378 movdqa %xmm2,%xmm3
4379 xorps %xmm4,%xmm0
4380 shufps $140,%xmm0,%xmm4
4381 pslldq $4,%xmm3
4382 xorps %xmm4,%xmm0
4383 pshufd $85,%xmm1,%xmm1
4384 pxor %xmm3,%xmm2
4385 pxor %xmm1,%xmm0
4386 pshufd $255,%xmm0,%xmm3
4387 pxor %xmm3,%xmm2
4388 .byte 0xf3,0xc3
4389
4390.p2align 4
4391L$key_expansion_192b:
4392 movaps %xmm0,%xmm3
4393 shufps $68,%xmm0,%xmm5
4394 movups %xmm5,(%rax)
4395 shufps $78,%xmm2,%xmm3
4396 movups %xmm3,16(%rax)
4397 leaq 32(%rax),%rax
4398 jmp L$key_expansion_192b_warm
4399
4400.p2align 4
4401L$key_expansion_256a:
4402 movups %xmm2,(%rax)
4403 leaq 16(%rax),%rax
4404L$key_expansion_256a_cold:
4405 shufps $16,%xmm0,%xmm4
4406 xorps %xmm4,%xmm0
4407 shufps $140,%xmm0,%xmm4
4408 xorps %xmm4,%xmm0
4409 shufps $255,%xmm1,%xmm1
4410 xorps %xmm1,%xmm0
4411 .byte 0xf3,0xc3
4412
4413.p2align 4
4414L$key_expansion_256b:
4415 movups %xmm0,(%rax)
4416 leaq 16(%rax),%rax
4417
4418 shufps $16,%xmm2,%xmm4
4419 xorps %xmm4,%xmm2
4420 shufps $140,%xmm2,%xmm4
4421 xorps %xmm4,%xmm2
4422 shufps $170,%xmm1,%xmm1
4423 xorps %xmm1,%xmm2
4424 .byte 0xf3,0xc3
4425
4426
4427.p2align 6
4428L$bswap_mask:
4429.byte 15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0
4430L$increment32:
4431.long 6,6,6,0
4432L$increment64:
4433.long 1,0,0,0
4434L$xts_magic:
4435.long 0x87,0,1,0
4436L$increment1:
4437.byte 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1
4438L$key_rotate:
4439.long 0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d
4440L$key_rotate192:
4441.long 0x04070605,0x04070605,0x04070605,0x04070605
4442L$key_rcon1:
4443.long 1,1,1,1
4444L$key_rcon1b:
4445.long 0x1b,0x1b,0x1b,0x1b
4446
4447.byte 65,69,83,32,102,111,114,32,73,110,116,101,108,32,65,69,83,45,78,73,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
4448.p2align 6
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette