default rel %define XMMWORD %define YMMWORD %define ZMMWORD section .text code align=64 EXTERN OPENSSL_ia32cap_P global sha256_multi_block ALIGN 32 sha256_multi_block: mov QWORD[8+rsp],rdi ;WIN64 prologue mov QWORD[16+rsp],rsi mov rax,rsp $L$SEH_begin_sha256_multi_block: mov rdi,rcx mov rsi,rdx mov rdx,r8 mov rcx,QWORD[((OPENSSL_ia32cap_P+4))] bt rcx,61 jc NEAR _shaext_shortcut test ecx,268435456 jnz NEAR _avx_shortcut mov rax,rsp push rbx push rbp lea rsp,[((-168))+rsp] movaps XMMWORD[rsp],xmm6 movaps XMMWORD[16+rsp],xmm7 movaps XMMWORD[32+rsp],xmm8 movaps XMMWORD[48+rsp],xmm9 movaps XMMWORD[(-120)+rax],xmm10 movaps XMMWORD[(-104)+rax],xmm11 movaps XMMWORD[(-88)+rax],xmm12 movaps XMMWORD[(-72)+rax],xmm13 movaps XMMWORD[(-56)+rax],xmm14 movaps XMMWORD[(-40)+rax],xmm15 sub rsp,288 and rsp,-256 mov QWORD[272+rsp],rax $L$body: lea rbp,[((K256+128))] lea rbx,[256+rsp] lea rdi,[128+rdi] $L$oop_grande: mov DWORD[280+rsp],edx xor edx,edx mov r8,QWORD[rsi] mov ecx,DWORD[8+rsi] cmp ecx,edx cmovg edx,ecx test ecx,ecx mov DWORD[rbx],ecx cmovle r8,rbp mov r9,QWORD[16+rsi] mov ecx,DWORD[24+rsi] cmp ecx,edx cmovg edx,ecx test ecx,ecx mov DWORD[4+rbx],ecx cmovle r9,rbp mov r10,QWORD[32+rsi] mov ecx,DWORD[40+rsi] cmp ecx,edx cmovg edx,ecx test ecx,ecx mov DWORD[8+rbx],ecx cmovle r10,rbp mov r11,QWORD[48+rsi] mov ecx,DWORD[56+rsi] cmp ecx,edx cmovg edx,ecx test ecx,ecx mov DWORD[12+rbx],ecx cmovle r11,rbp test edx,edx jz NEAR $L$done movdqu xmm8,XMMWORD[((0-128))+rdi] lea rax,[128+rsp] movdqu xmm9,XMMWORD[((32-128))+rdi] movdqu xmm10,XMMWORD[((64-128))+rdi] movdqu xmm11,XMMWORD[((96-128))+rdi] movdqu xmm12,XMMWORD[((128-128))+rdi] movdqu xmm13,XMMWORD[((160-128))+rdi] movdqu xmm14,XMMWORD[((192-128))+rdi] movdqu xmm15,XMMWORD[((224-128))+rdi] movdqu xmm6,XMMWORD[$L$pbswap] jmp NEAR $L$oop ALIGN 32 $L$oop: movdqa xmm4,xmm10 pxor xmm4,xmm9 movd xmm5,DWORD[r8] movd xmm0,DWORD[r9] movd xmm1,DWORD[r10] movd xmm2,DWORD[r11] punpckldq xmm5,xmm1 punpckldq xmm0,xmm2 punpckldq xmm5,xmm0 movdqa xmm7,xmm12 DB 102,15,56,0,238 movdqa xmm2,xmm12 psrld xmm7,6 movdqa xmm1,xmm12 pslld xmm2,7 movdqa XMMWORD[(0-128)+rax],xmm5 paddd xmm5,xmm15 psrld xmm1,11 pxor xmm7,xmm2 pslld xmm2,21-7 paddd xmm5,XMMWORD[((-128))+rbp] pxor xmm7,xmm1 psrld xmm1,25-11 movdqa xmm0,xmm12 pxor xmm7,xmm2 movdqa xmm3,xmm12 pslld xmm2,26-21 pandn xmm0,xmm14 pand xmm3,xmm13 pxor xmm7,xmm1 movdqa xmm1,xmm8 pxor xmm7,xmm2 movdqa xmm2,xmm8 psrld xmm1,2 paddd xmm5,xmm7 pxor xmm0,xmm3 movdqa xmm3,xmm9 movdqa xmm7,xmm8 pslld xmm2,10 pxor xmm3,xmm8 psrld xmm7,13 pxor xmm1,xmm2 paddd xmm5,xmm0 pslld xmm2,19-10 pand xmm4,xmm3 pxor xmm1,xmm7 psrld xmm7,22-13 pxor xmm1,xmm2 movdqa xmm15,xmm9 pslld xmm2,30-19 pxor xmm7,xmm1 pxor xmm15,xmm4 paddd xmm11,xmm5 pxor xmm7,xmm2 paddd xmm15,xmm5 paddd xmm15,xmm7 movd xmm5,DWORD[4+r8] movd xmm0,DWORD[4+r9] movd xmm1,DWORD[4+r10] movd xmm2,DWORD[4+r11] punpckldq xmm5,xmm1 punpckldq xmm0,xmm2 punpckldq xmm5,xmm0 movdqa xmm7,xmm11 movdqa xmm2,xmm11 DB 102,15,56,0,238 psrld xmm7,6 movdqa xmm1,xmm11 pslld xmm2,7 movdqa XMMWORD[(16-128)+rax],xmm5 paddd xmm5,xmm14 psrld xmm1,11 pxor xmm7,xmm2 pslld xmm2,21-7 paddd xmm5,XMMWORD[((-96))+rbp] pxor xmm7,xmm1 psrld xmm1,25-11 movdqa xmm0,xmm11 pxor xmm7,xmm2 movdqa xmm4,xmm11 pslld xmm2,26-21 pandn xmm0,xmm13 pand xmm4,xmm12 pxor xmm7,xmm1 movdqa xmm1,xmm15 pxor xmm7,xmm2 movdqa xmm2,xmm15 psrld xmm1,2 paddd xmm5,xmm7 pxor xmm0,xmm4 movdqa xmm4,xmm8 movdqa xmm7,xmm15 pslld xmm2,10 pxor xmm4,xmm15 psrld xmm7,13 pxor xmm1,xmm2 paddd xmm5,xmm0 pslld xmm2,19-10 pand xmm3,xmm4 pxor xmm1,xmm7 psrld xmm7,22-13 pxor xmm1,xmm2 movdqa xmm14,xmm8 pslld xmm2,30-19 pxor xmm7,xmm1 pxor xmm14,xmm3 paddd xmm10,xmm5 pxor xmm7,xmm2 paddd xmm14,xmm5 paddd xmm14,xmm7 movd xmm5,DWORD[8+r8] movd xmm0,DWORD[8+r9] movd xmm1,DWORD[8+r10] movd xmm2,DWORD[8+r11] punpckldq xmm5,xmm1 punpckldq xmm0,xmm2 punpckldq xmm5,xmm0 movdqa xmm7,xmm10 DB 102,15,56,0,238 movdqa xmm2,xmm10 psrld xmm7,6 movdqa xmm1,xmm10 pslld xmm2,7 movdqa XMMWORD[(32-128)+rax],xmm5 paddd xmm5,xmm13 psrld xmm1,11 pxor xmm7,xmm2 pslld xmm2,21-7 paddd xmm5,XMMWORD[((-64))+rbp] pxor xmm7,xmm1 psrld xmm1,25-11 movdqa xmm0,xmm10 pxor xmm7,xmm2 movdqa xmm3,xmm10 pslld xmm2,26-21 pandn xmm0,xmm12 pand xmm3,xmm11 pxor xmm7,xmm1 movdqa xmm1,xmm14 pxor xmm7,xmm2 movdqa xmm2,xmm14 psrld xmm1,2 paddd xmm5,xmm7 pxor xmm0,xmm3 movdqa xmm3,xmm15 movdqa xmm7,xmm14 pslld xmm2,10 pxor xmm3,xmm14 psrld xmm7,13 pxor xmm1,xmm2 paddd xmm5,xmm0 pslld xmm2,19-10 pand xmm4,xmm3 pxor xmm1,xmm7 psrld xmm7,22-13 pxor xmm1,xmm2 movdqa xmm13,xmm15 pslld xmm2,30-19 pxor xmm7,xmm1 pxor xmm13,xmm4 paddd xmm9,xmm5 pxor xmm7,xmm2 paddd xmm13,xmm5 paddd xmm13,xmm7 movd xmm5,DWORD[12+r8] movd xmm0,DWORD[12+r9] movd xmm1,DWORD[12+r10] movd xmm2,DWORD[12+r11] punpckldq xmm5,xmm1 punpckldq xmm0,xmm2 punpckldq xmm5,xmm0 movdqa xmm7,xmm9 movdqa xmm2,xmm9 DB 102,15,56,0,238 psrld xmm7,6 movdqa xmm1,xmm9 pslld xmm2,7 movdqa XMMWORD[(48-128)+rax],xmm5 paddd xmm5,xmm12 psrld xmm1,11 pxor xmm7,xmm2 pslld xmm2,21-7 paddd xmm5,XMMWORD[((-32))+rbp] pxor xmm7,xmm1 psrld xmm1,25-11 movdqa xmm0,xmm9 pxor xmm7,xmm2 movdqa xmm4,xmm9 pslld xmm2,26-21 pandn xmm0,xmm11 pand xmm4,xmm10 pxor xmm7,xmm1 movdqa xmm1,xmm13 pxor xmm7,xmm2 movdqa xmm2,xmm13 psrld xmm1,2 paddd xmm5,xmm7 pxor xmm0,xmm4 movdqa xmm4,xmm14 movdqa xmm7,xmm13 pslld xmm2,10 pxor xmm4,xmm13 psrld xmm7,13 pxor xmm1,xmm2 paddd xmm5,xmm0 pslld xmm2,19-10 pand xmm3,xmm4 pxor xmm1,xmm7 psrld xmm7,22-13 pxor xmm1,xmm2 movdqa xmm12,xmm14 pslld xmm2,30-19 pxor xmm7,xmm1 pxor xmm12,xmm3 paddd xmm8,xmm5 pxor xmm7,xmm2 paddd xmm12,xmm5 paddd xmm12,xmm7 movd xmm5,DWORD[16+r8] movd xmm0,DWORD[16+r9] movd xmm1,DWORD[16+r10] movd xmm2,DWORD[16+r11] punpckldq xmm5,xmm1 punpckldq xmm0,xmm2 punpckldq xmm5,xmm0 movdqa xmm7,xmm8 DB 102,15,56,0,238 movdqa xmm2,xmm8 psrld xmm7,6 movdqa xmm1,xmm8 pslld xmm2,7 movdqa XMMWORD[(64-128)+rax],xmm5 paddd xmm5,xmm11 psrld xmm1,11 pxor xmm7,xmm2 pslld xmm2,21-7 paddd xmm5,XMMWORD[rbp] pxor xmm7,xmm1 psrld xmm1,25-11 movdqa xmm0,xmm8 pxor xmm7,xmm2 movdqa xmm3,xmm8 pslld xmm2,26-21 pandn xmm0,xmm10 pand xmm3,xmm9 pxor xmm7,xmm1 movdqa xmm1,xmm12 pxor xmm7,xmm2 movdqa xmm2,xmm12 psrld xmm1,2 paddd xmm5,xmm7 pxor xmm0,xmm3 movdqa xmm3,xmm13 movdqa xmm7,xmm12 pslld xmm2,10 pxor xmm3,xmm12 psrld xmm7,13 pxor xmm1,xmm2 paddd xmm5,xmm0 pslld xmm2,19-10 pand xmm4,xmm3 pxor xmm1,xmm7 psrld xmm7,22-13 pxor xmm1,xmm2 movdqa xmm11,xmm13 pslld xmm2,30-19 pxor xmm7,xmm1 pxor xmm11,xmm4 paddd xmm15,xmm5 pxor xmm7,xmm2 paddd xmm11,xmm5 paddd xmm11,xmm7 movd xmm5,DWORD[20+r8] movd xmm0,DWORD[20+r9] movd xmm1,DWORD[20+r10] movd xmm2,DWORD[20+r11] punpckldq xmm5,xmm1 punpckldq xmm0,xmm2 punpckldq xmm5,xmm0 movdqa xmm7,xmm15 movdqa xmm2,xmm15 DB 102,15,56,0,238 psrld xmm7,6 movdqa xmm1,xmm15 pslld xmm2,7 movdqa XMMWORD[(80-128)+rax],xmm5 paddd xmm5,xmm10 psrld xmm1,11 pxor xmm7,xmm2 pslld xmm2,21-7 paddd xmm5,XMMWORD[32+rbp] pxor xmm7,xmm1 psrld xmm1,25-11 movdqa xmm0,xmm15 pxor xmm7,xmm2 movdqa xmm4,xmm15 pslld xmm2,26-21 pandn xmm0,xmm9 pand xmm4,xmm8 pxor xmm7,xmm1 movdqa xmm1,xmm11 pxor xmm7,xmm2 movdqa xmm2,xmm11 psrld xmm1,2 paddd xmm5,xmm7 pxor xmm0,xmm4 movdqa xmm4,xmm12 movdqa xmm7,xmm11 pslld xmm2,10 pxor xmm4,xmm11 psrld xmm7,13 pxor xmm1,xmm2 paddd xmm5,xmm0 pslld xmm2,19-10 pand xmm3,xmm4 pxor xmm1,xmm7 psrld xmm7,22-13 pxor xmm1,xmm2 movdqa xmm10,xmm12 pslld xmm2,30-19 pxor xmm7,xmm1 pxor xmm10,xmm3 paddd xmm14,xmm5 pxor xmm7,xmm2 paddd xmm10,xmm5 paddd xmm10,xmm7 movd xmm5,DWORD[24+r8] movd xmm0,DWORD[24+r9] movd xmm1,DWORD[24+r10] movd xmm2,DWORD[24+r11] punpckldq xmm5,xmm1 punpckldq xmm0,xmm2 punpckldq xmm5,xmm0 movdqa xmm7,xmm14 DB 102,15,56,0,238 movdqa xmm2,xmm14 psrld xmm7,6 movdqa xmm1,xmm14 pslld xmm2,7 movdqa XMMWORD[(96-128)+rax],xmm5 paddd xmm5,xmm9 psrld xmm1,11 pxor xmm7,xmm2 pslld xmm2,21-7 paddd xmm5,XMMWORD[64+rbp] pxor xmm7,xmm1 psrld xmm1,25-11 movdqa xmm0,xmm14 pxor xmm7,xmm2 movdqa xmm3,xmm14 pslld xmm2,26-21 pandn xmm0,xmm8 pand xmm3,xmm15 pxor xmm7,xmm1 movdqa xmm1,xmm10 pxor xmm7,xmm2 movdqa xmm2,xmm10 psrld xmm1,2 paddd xmm5,xmm7 pxor xmm0,xmm3 movdqa xmm3,xmm11 movdqa xmm7,xmm10 pslld xmm2,10 pxor xmm3,xmm10 psrld xmm7,13 pxor xmm1,xmm2 paddd xmm5,xmm0 pslld xmm2,19-10 pand xmm4,xmm3 pxor xmm1,xmm7 psrld xmm7,22-13 pxor xmm1,xmm2 movdqa xmm9,xmm11 pslld xmm2,30-19 pxor xmm7,xmm1 pxor xmm9,xmm4 paddd xmm13,xmm5 pxor xmm7,xmm2 paddd xmm9,xmm5 paddd xmm9,xmm7 movd xmm5,DWORD[28+r8] movd xmm0,DWORD[28+r9] movd xmm1,DWORD[28+r10] movd xmm2,DWORD[28+r11] punpckldq xmm5,xmm1 punpckldq xmm0,xmm2 punpckldq xmm5,xmm0 movdqa xmm7,xmm13 movdqa xmm2,xmm13 DB 102,15,56,0,238 psrld xmm7,6 movdqa xmm1,xmm13 pslld xmm2,7 movdqa XMMWORD[(112-128)+rax],xmm5 paddd xmm5,xmm8 psrld xmm1,11 pxor xmm7,xmm2 pslld xmm2,21-7 paddd xmm5,XMMWORD[96+rbp] pxor xmm7,xmm1 psrld xmm1,25-11 movdqa xmm0,xmm13 pxor xmm7,xmm2 movdqa xmm4,xmm13 pslld xmm2,26-21 pandn xmm0,xmm15 pand xmm4,xmm14 pxor xmm7,xmm1 movdqa xmm1,xmm9 pxor xmm7,xmm2 movdqa xmm2,xmm9 psrld xmm1,2 paddd xmm5,xmm7 pxor xmm0,xmm4 movdqa xmm4,xmm10 movdqa xmm7,xmm9 pslld xmm2,10 pxor xmm4,xmm9 psrld xmm7,13 pxor xmm1,xmm2 paddd xmm5,xmm0 pslld xmm2,19-10 pand xmm3,xmm4 pxor xmm1,xmm7 psrld xmm7,22-13 pxor xmm1,xmm2 movdqa xmm8,xmm10 pslld xmm2,30-19 pxor xmm7,xmm1 pxor xmm8,xmm3 paddd xmm12,xmm5 pxor xmm7,xmm2 paddd xmm8,xmm5 paddd xmm8,xmm7 lea rbp,[256+rbp] movd xmm5,DWORD[32+r8] movd xmm0,DWORD[32+r9] movd xmm1,DWORD[32+r10] movd xmm2,DWORD[32+r11] punpckldq xmm5,xmm1 punpckldq xmm0,xmm2 punpckldq xmm5,xmm0 movdqa xmm7,xmm12 DB 102,15,56,0,238 movdqa xmm2,xmm12 psrld xmm7,6 movdqa xmm1,xmm12 pslld xmm2,7 movdqa XMMWORD[(128-128)+rax],xmm5 paddd xmm5,xmm15 psrld xmm1,11 pxor xmm7,xmm2 pslld xmm2,21-7 paddd xmm5,XMMWORD[((-128))+rbp] pxor xmm7,xmm1 psrld xmm1,25-11 movdqa xmm0,xmm12 pxor xmm7,xmm2 movdqa xmm3,xmm12 pslld xmm2,26-21 pandn xmm0,xmm14 pand xmm3,xmm13 pxor xmm7,xmm1 movdqa xmm1,xmm8 pxor xmm7,xmm2 movdqa xmm2,xmm8 psrld xmm1,2 paddd xmm5,xmm7 pxor xmm0,xmm3 movdqa xmm3,xmm9 movdqa xmm7,xmm8 pslld xmm2,10 pxor xmm3,xmm8 psrld xmm7,13 pxor xmm1,xmm2 paddd xmm5,xmm0 pslld xmm2,19-10 pand xmm4,xmm3 pxor xmm1,xmm7 psrld xmm7,22-13 pxor xmm1,xmm2 movdqa xmm15,xmm9 pslld xmm2,30-19 pxor xmm7,xmm1 pxor xmm15,xmm4 paddd xmm11,xmm5 pxor xmm7,xmm2 paddd xmm15,xmm5 paddd xmm15,xmm7 movd xmm5,DWORD[36+r8] movd xmm0,DWORD[36+r9] movd xmm1,DWORD[36+r10] movd xmm2,DWORD[36+r11] punpckldq xmm5,xmm1 punpckldq xmm0,xmm2 punpckldq xmm5,xmm0 movdqa xmm7,xmm11 movdqa xmm2,xmm11 DB 102,15,56,0,238 psrld xmm7,6 movdqa xmm1,xmm11 pslld xmm2,7 movdqa XMMWORD[(144-128)+rax],xmm5 paddd xmm5,xmm14 psrld xmm1,11 pxor xmm7,xmm2 pslld xmm2,21-7 paddd xmm5,XMMWORD[((-96))+rbp] pxor xmm7,xmm1 psrld xmm1,25-11 movdqa xmm0,xmm11 pxor xmm7,xmm2 movdqa xmm4,xmm11 pslld xmm2,26-21 pandn xmm0,xmm13 pand xmm4,xmm12 pxor xmm7,xmm1 movdqa xmm1,xmm15 pxor xmm7,xmm2 movdqa xmm2,xmm15 psrld xmm1,2 paddd xmm5,xmm7 pxor xmm0,xmm4 movdqa xmm4,xmm8 movdqa xmm7,xmm15 pslld xmm2,10 pxor xmm4,xmm15 psrld xmm7,13 pxor xmm1,xmm2 paddd xmm5,xmm0 pslld xmm2,19-10 pand xmm3,xmm4 pxor xmm1,xmm7 psrld xmm7,22-13 pxor xmm1,xmm2 movdqa xmm14,xmm8 pslld xmm2,30-19 pxor xmm7,xmm1 pxor xmm14,xmm3 paddd xmm10,xmm5 pxor xmm7,xmm2 paddd xmm14,xmm5 paddd xmm14,xmm7 movd xmm5,DWORD[40+r8] movd xmm0,DWORD[40+r9] movd xmm1,DWORD[40+r10] movd xmm2,DWORD[40+r11] punpckldq xmm5,xmm1 punpckldq xmm0,xmm2 punpckldq xmm5,xmm0 movdqa xmm7,xmm10 DB 102,15,56,0,238 movdqa xmm2,xmm10 psrld xmm7,6 movdqa xmm1,xmm10 pslld xmm2,7 movdqa XMMWORD[(160-128)+rax],xmm5 paddd xmm5,xmm13 psrld xmm1,11 pxor xmm7,xmm2 pslld xmm2,21-7 paddd xmm5,XMMWORD[((-64))+rbp] pxor xmm7,xmm1 psrld xmm1,25-11 movdqa xmm0,xmm10 pxor xmm7,xmm2 movdqa xmm3,xmm10 pslld xmm2,26-21 pandn xmm0,xmm12 pand xmm3,xmm11 pxor xmm7,xmm1 movdqa xmm1,xmm14 pxor xmm7,xmm2 movdqa xmm2,xmm14 psrld xmm1,2 paddd xmm5,xmm7 pxor xmm0,xmm3 movdqa xmm3,xmm15 movdqa xmm7,xmm14 pslld xmm2,10 pxor xmm3,xmm14 psrld xmm7,13 pxor xmm1,xmm2 paddd xmm5,xmm0 pslld xmm2,19-10 pand xmm4,xmm3 pxor xmm1,xmm7 psrld xmm7,22-13 pxor xmm1,xmm2 movdqa xmm13,xmm15 pslld xmm2,30-19 pxor xmm7,xmm1 pxor xmm13,xmm4 paddd xmm9,xmm5 pxor xmm7,xmm2 paddd xmm13,xmm5 paddd xmm13,xmm7 movd xmm5,DWORD[44+r8] movd xmm0,DWORD[44+r9] movd xmm1,DWORD[44+r10] movd xmm2,DWORD[44+r11] punpckldq xmm5,xmm1 punpckldq xmm0,xmm2 punpckldq xmm5,xmm0 movdqa xmm7,xmm9 movdqa xmm2,xmm9 DB 102,15,56,0,238 psrld xmm7,6 movdqa xmm1,xmm9 pslld xmm2,7 movdqa XMMWORD[(176-128)+rax],xmm5 paddd xmm5,xmm12 psrld xmm1,11 pxor xmm7,xmm2 pslld xmm2,21-7 paddd xmm5,XMMWORD[((-32))+rbp] pxor xmm7,xmm1 psrld xmm1,25-11 movdqa xmm0,xmm9 pxor xmm7,xmm2 movdqa xmm4,xmm9 pslld xmm2,26-21 pandn xmm0,xmm11 pand xmm4,xmm10 pxor xmm7,xmm1 movdqa xmm1,xmm13 pxor xmm7,xmm2 movdqa xmm2,xmm13 psrld xmm1,2 paddd xmm5,xmm7 pxor xmm0,xmm4 movdqa xmm4,xmm14 movdqa xmm7,xmm13 pslld xmm2,10 pxor xmm4,xmm13 psrld xmm7,13 pxor xmm1,xmm2 paddd xmm5,xmm0 pslld xmm2,19-10 pand xmm3,xmm4 pxor xmm1,xmm7 psrld xmm7,22-13 pxor xmm1,xmm2 movdqa xmm12,xmm14 pslld xmm2,30-19 pxor xmm7,xmm1 pxor xmm12,xmm3 paddd xmm8,xmm5 pxor xmm7,xmm2 paddd xmm12,xmm5 paddd xmm12,xmm7 movd xmm5,DWORD[48+r8] movd xmm0,DWORD[48+r9] movd xmm1,DWORD[48+r10] movd xmm2,DWORD[48+r11] punpckldq xmm5,xmm1 punpckldq xmm0,xmm2 punpckldq xmm5,xmm0 movdqa xmm7,xmm8 DB 102,15,56,0,238 movdqa xmm2,xmm8 psrld xmm7,6 movdqa xmm1,xmm8 pslld xmm2,7 movdqa XMMWORD[(192-128)+rax],xmm5 paddd xmm5,xmm11 psrld xmm1,11 pxor xmm7,xmm2 pslld xmm2,21-7 paddd xmm5,XMMWORD[rbp] pxor xmm7,xmm1 psrld xmm1,25-11 movdqa xmm0,xmm8 pxor xmm7,xmm2 movdqa xmm3,xmm8 pslld xmm2,26-21 pandn xmm0,xmm10 pand xmm3,xmm9 pxor xmm7,xmm1 movdqa xmm1,xmm12 pxor xmm7,xmm2 movdqa xmm2,xmm12 psrld xmm1,2 paddd xmm5,xmm7 pxor xmm0,xmm3 movdqa xmm3,xmm13 movdqa xmm7,xmm12 pslld xmm2,10 pxor xmm3,xmm12 psrld xmm7,13 pxor xmm1,xmm2 paddd xmm5,xmm0 pslld xmm2,19-10 pand xmm4,xmm3 pxor xmm1,xmm7 psrld xmm7,22-13 pxor xmm1,xmm2 movdqa xmm11,xmm13 pslld xmm2,30-19 pxor xmm7,xmm1 pxor xmm11,xmm4 paddd xmm15,xmm5 pxor xmm7,xmm2 paddd xmm11,xmm5 paddd xmm11,xmm7 movd xmm5,DWORD[52+r8] movd xmm0,DWORD[52+r9] movd xmm1,DWORD[52+r10] movd xmm2,DWORD[52+r11] punpckldq xmm5,xmm1 punpckldq xmm0,xmm2 punpckldq xmm5,xmm0 movdqa xmm7,xmm15 movdqa xmm2,xmm15 DB 102,15,56,0,238 psrld xmm7,6 movdqa xmm1,xmm15 pslld xmm2,7 movdqa XMMWORD[(208-128)+rax],xmm5 paddd xmm5,xmm10 psrld xmm1,11 pxor xmm7,xmm2 pslld xmm2,21-7 paddd xmm5,XMMWORD[32+rbp] pxor xmm7,xmm1 psrld xmm1,25-11 movdqa xmm0,xmm15 pxor xmm7,xmm2 movdqa xmm4,xmm15 pslld xmm2,26-21 pandn xmm0,xmm9 pand xmm4,xmm8 pxor xmm7,xmm1 movdqa xmm1,xmm11 pxor xmm7,xmm2 movdqa xmm2,xmm11 psrld xmm1,2 paddd xmm5,xmm7 pxor xmm0,xmm4 movdqa xmm4,xmm12 movdqa xmm7,xmm11 pslld xmm2,10 pxor xmm4,xmm11 psrld xmm7,13 pxor xmm1,xmm2 paddd xmm5,xmm0 pslld xmm2,19-10 pand xmm3,xmm4 pxor xmm1,xmm7 psrld xmm7,22-13 pxor xmm1,xmm2 movdqa xmm10,xmm12 pslld xmm2,30-19 pxor xmm7,xmm1 pxor xmm10,xmm3 paddd xmm14,xmm5 pxor xmm7,xmm2 paddd xmm10,xmm5 paddd xmm10,xmm7 movd xmm5,DWORD[56+r8] movd xmm0,DWORD[56+r9] movd xmm1,DWORD[56+r10] movd xmm2,DWORD[56+r11] punpckldq xmm5,xmm1 punpckldq xmm0,xmm2 punpckldq xmm5,xmm0 movdqa xmm7,xmm14 DB 102,15,56,0,238 movdqa xmm2,xmm14 psrld xmm7,6 movdqa xmm1,xmm14 pslld xmm2,7 movdqa XMMWORD[(224-128)+rax],xmm5 paddd xmm5,xmm9 psrld xmm1,11 pxor xmm7,xmm2 pslld xmm2,21-7 paddd xmm5,XMMWORD[64+rbp] pxor xmm7,xmm1 psrld xmm1,25-11 movdqa xmm0,xmm14 pxor xmm7,xmm2 movdqa xmm3,xmm14 pslld xmm2,26-21 pandn xmm0,xmm8 pand xmm3,xmm15 pxor xmm7,xmm1 movdqa xmm1,xmm10 pxor xmm7,xmm2 movdqa xmm2,xmm10 psrld xmm1,2 paddd xmm5,xmm7 pxor xmm0,xmm3 movdqa xmm3,xmm11 movdqa xmm7,xmm10 pslld xmm2,10 pxor xmm3,xmm10 psrld xmm7,13 pxor xmm1,xmm2 paddd xmm5,xmm0 pslld xmm2,19-10 pand xmm4,xmm3 pxor xmm1,xmm7 psrld xmm7,22-13 pxor xmm1,xmm2 movdqa xmm9,xmm11 pslld xmm2,30-19 pxor xmm7,xmm1 pxor xmm9,xmm4 paddd xmm13,xmm5 pxor xmm7,xmm2 paddd xmm9,xmm5 paddd xmm9,xmm7 movd xmm5,DWORD[60+r8] lea r8,[64+r8] movd xmm0,DWORD[60+r9] lea r9,[64+r9] movd xmm1,DWORD[60+r10] lea r10,[64+r10] movd xmm2,DWORD[60+r11] lea r11,[64+r11] punpckldq xmm5,xmm1 punpckldq xmm0,xmm2 punpckldq xmm5,xmm0 movdqa xmm7,xmm13 movdqa xmm2,xmm13 DB 102,15,56,0,238 psrld xmm7,6 movdqa xmm1,xmm13 pslld xmm2,7 movdqa XMMWORD[(240-128)+rax],xmm5 paddd xmm5,xmm8 psrld xmm1,11 pxor xmm7,xmm2 pslld xmm2,21-7 paddd xmm5,XMMWORD[96+rbp] pxor xmm7,xmm1 psrld xmm1,25-11 movdqa xmm0,xmm13 prefetcht0 [63+r8] pxor xmm7,xmm2 movdqa xmm4,xmm13 pslld xmm2,26-21 pandn xmm0,xmm15 pand xmm4,xmm14 pxor xmm7,xmm1 prefetcht0 [63+r9] movdqa xmm1,xmm9 pxor xmm7,xmm2 movdqa xmm2,xmm9 psrld xmm1,2 paddd xmm5,xmm7 pxor xmm0,xmm4 movdqa xmm4,xmm10 movdqa xmm7,xmm9 pslld xmm2,10 pxor xmm4,xmm9 prefetcht0 [63+r10] psrld xmm7,13 pxor xmm1,xmm2 paddd xmm5,xmm0 pslld xmm2,19-10 pand xmm3,xmm4 pxor xmm1,xmm7 prefetcht0 [63+r11] psrld xmm7,22-13 pxor xmm1,xmm2 movdqa xmm8,xmm10 pslld xmm2,30-19 pxor xmm7,xmm1 pxor xmm8,xmm3 paddd xmm12,xmm5 pxor xmm7,xmm2 paddd xmm8,xmm5 paddd xmm8,xmm7 lea rbp,[256+rbp] movdqu xmm5,XMMWORD[((0-128))+rax] mov ecx,3 jmp NEAR $L$oop_16_xx ALIGN 32 $L$oop_16_xx: movdqa xmm6,XMMWORD[((16-128))+rax] paddd xmm5,XMMWORD[((144-128))+rax] movdqa xmm7,xmm6 movdqa xmm1,xmm6 psrld xmm7,3 movdqa xmm2,xmm6 psrld xmm1,7 movdqa xmm0,XMMWORD[((224-128))+rax] pslld xmm2,14 pxor xmm7,xmm1 psrld xmm1,18-7 movdqa xmm3,xmm0 pxor xmm7,xmm2 pslld xmm2,25-14 pxor xmm7,xmm1 psrld xmm0,10 movdqa xmm1,xmm3 psrld xmm3,17 pxor xmm7,xmm2 pslld xmm1,13 paddd xmm5,xmm7 pxor xmm0,xmm3 psrld xmm3,19-17 pxor xmm0,xmm1 pslld xmm1,15-13 pxor xmm0,xmm3 pxor xmm0,xmm1 paddd xmm5,xmm0 movdqa xmm7,xmm12 movdqa xmm2,xmm12 psrld xmm7,6 movdqa xmm1,xmm12 pslld xmm2,7 movdqa XMMWORD[(0-128)+rax],xmm5 paddd xmm5,xmm15 psrld xmm1,11 pxor xmm7,xmm2 pslld xmm2,21-7 paddd xmm5,XMMWORD[((-128))+rbp] pxor xmm7,xmm1 psrld xmm1,25-11 movdqa xmm0,xmm12 pxor xmm7,xmm2 movdqa xmm3,xmm12 pslld xmm2,26-21 pandn xmm0,xmm14 pand xmm3,xmm13 pxor xmm7,xmm1 movdqa xmm1,xmm8 pxor xmm7,xmm2 movdqa xmm2,xmm8 psrld xmm1,2 paddd xmm5,xmm7 pxor xmm0,xmm3 movdqa xmm3,xmm9 movdqa xmm7,xmm8 pslld xmm2,10 pxor xmm3,xmm8 psrld xmm7,13 pxor xmm1,xmm2 paddd xmm5,xmm0 pslld xmm2,19-10 pand xmm4,xmm3 pxor xmm1,xmm7 psrld xmm7,22-13 pxor xmm1,xmm2 movdqa xmm15,xmm9 pslld xmm2,30-19 pxor xmm7,xmm1 pxor xmm15,xmm4 paddd xmm11,xmm5 pxor xmm7,xmm2 paddd xmm15,xmm5 paddd xmm15,xmm7 movdqa xmm5,XMMWORD[((32-128))+rax] paddd xmm6,XMMWORD[((160-128))+rax] movdqa xmm7,xmm5 movdqa xmm1,xmm5 psrld xmm7,3 movdqa xmm2,xmm5 psrld xmm1,7 movdqa xmm0,XMMWORD[((240-128))+rax] pslld xmm2,14 pxor xmm7,xmm1 psrld xmm1,18-7 movdqa xmm4,xmm0 pxor xmm7,xmm2 pslld xmm2,25-14 pxor xmm7,xmm1 psrld xmm0,10 movdqa xmm1,xmm4 psrld xmm4,17 pxor xmm7,xmm2 pslld xmm1,13 paddd xmm6,xmm7 pxor xmm0,xmm4 psrld xmm4,19-17 pxor xmm0,xmm1 pslld xmm1,15-13 pxor xmm0,xmm4 pxor xmm0,xmm1 paddd xmm6,xmm0 movdqa xmm7,xmm11 movdqa xmm2,xmm11 psrld xmm7,6 movdqa xmm1,xmm11 pslld xmm2,7 movdqa XMMWORD[(16-128)+rax],xmm6 paddd xmm6,xmm14 psrld xmm1,11 pxor xmm7,xmm2 pslld xmm2,21-7 paddd xmm6,XMMWORD[((-96))+rbp] pxor xmm7,xmm1 psrld xmm1,25-11 movdqa xmm0,xmm11 pxor xmm7,xmm2 movdqa xmm4,xmm11 pslld xmm2,26-21 pandn xmm0,xmm13 pand xmm4,xmm12 pxor xmm7,xmm1 movdqa xmm1,xmm15 pxor xmm7,xmm2 movdqa xmm2,xmm15 psrld xmm1,2 paddd xmm6,xmm7 pxor xmm0,xmm4 movdqa xmm4,xmm8 movdqa xmm7,xmm15 pslld xmm2,10 pxor xmm4,xmm15 psrld xmm7,13 pxor xmm1,xmm2 paddd xmm6,xmm0 pslld xmm2,19-10 pand xmm3,xmm4 pxor xmm1,xmm7 psrld xmm7,22-13 pxor xmm1,xmm2 movdqa xmm14,xmm8 pslld xmm2,30-19 pxor xmm7,xmm1 pxor xmm14,xmm3 paddd xmm10,xmm6 pxor xmm7,xmm2 paddd xmm14,xmm6 paddd xmm14,xmm7 movdqa xmm6,XMMWORD[((48-128))+rax] paddd xmm5,XMMWORD[((176-128))+rax] movdqa xmm7,xmm6 movdqa xmm1,xmm6 psrld xmm7,3 movdqa xmm2,xmm6 psrld xmm1,7 movdqa xmm0,XMMWORD[((0-128))+rax] pslld xmm2,14 pxor xmm7,xmm1 psrld xmm1,18-7 movdqa xmm3,xmm0 pxor xmm7,xmm2 pslld xmm2,25-14 pxor xmm7,xmm1 psrld xmm0,10 movdqa xmm1,xmm3 psrld xmm3,17 pxor xmm7,xmm2 pslld xmm1,13 paddd xmm5,xmm7 pxor xmm0,xmm3 psrld xmm3,19-17 pxor xmm0,xmm1 pslld xmm1,15-13 pxor xmm0,xmm3 pxor xmm0,xmm1 paddd xmm5,xmm0 movdqa xmm7,xmm10 movdqa xmm2,xmm10 psrld xmm7,6 movdqa xmm1,xmm10 pslld xmm2,7 movdqa XMMWORD[(32-128)+rax],xmm5 paddd xmm5,xmm13 psrld xmm1,11 pxor xmm7,xmm2 pslld xmm2,21-7 paddd xmm5,XMMWORD[((-64))+rbp] pxor xmm7,xmm1 psrld xmm1,25-11 movdqa xmm0,xmm10 pxor xmm7,xmm2 movdqa xmm3,xmm10 pslld xmm2,26-21 pandn xmm0,xmm12 pand xmm3,xmm11 pxor xmm7,xmm1 movdqa xmm1,xmm14 pxor xmm7,xmm2 movdqa xmm2,xmm14 psrld xmm1,2 paddd xmm5,xmm7 pxor xmm0,xmm3 movdqa xmm3,xmm15 movdqa xmm7,xmm14 pslld xmm2,10 pxor xmm3,xmm14 psrld xmm7,13 pxor xmm1,xmm2 paddd xmm5,xmm0 pslld xmm2,19-10 pand xmm4,xmm3 pxor xmm1,xmm7 psrld xmm7,22-13 pxor xmm1,xmm2 movdqa xmm13,xmm15 pslld xmm2,30-19 pxor xmm7,xmm1 pxor xmm13,xmm4 paddd xmm9,xmm5 pxor xmm7,xmm2 paddd xmm13,xmm5 paddd xmm13,xmm7 movdqa xmm5,XMMWORD[((64-128))+rax] paddd xmm6,XMMWORD[((192-128))+rax] movdqa xmm7,xmm5 movdqa xmm1,xmm5 psrld xmm7,3 movdqa xmm2,xmm5 psrld xmm1,7 movdqa xmm0,XMMWORD[((16-128))+rax] pslld xmm2,14 pxor xmm7,xmm1 psrld xmm1,18-7 movdqa xmm4,xmm0 pxor xmm7,xmm2 pslld xmm2,25-14 pxor xmm7,xmm1 psrld xmm0,10 movdqa xmm1,xmm4 psrld xmm4,17 pxor xmm7,xmm2 pslld xmm1,13 paddd xmm6,xmm7 pxor xmm0,xmm4 psrld xmm4,19-17 pxor xmm0,xmm1 pslld xmm1,15-13 pxor xmm0,xmm4 pxor xmm0,xmm1 paddd xmm6,xmm0 movdqa xmm7,xmm9 movdqa xmm2,xmm9 psrld xmm7,6 movdqa xmm1,xmm9 pslld xmm2,7 movdqa XMMWORD[(48-128)+rax],xmm6 paddd xmm6,xmm12 psrld xmm1,11 pxor xmm7,xmm2 pslld xmm2,21-7 paddd xmm6,XMMWORD[((-32))+rbp] pxor xmm7,xmm1 psrld xmm1,25-11 movdqa xmm0,xmm9 pxor xmm7,xmm2 movdqa xmm4,xmm9 pslld xmm2,26-21 pandn xmm0,xmm11 pand xmm4,xmm10 pxor xmm7,xmm1 movdqa xmm1,xmm13 pxor xmm7,xmm2 movdqa xmm2,xmm13 psrld xmm1,2 paddd xmm6,xmm7 pxor xmm0,xmm4 movdqa xmm4,xmm14 movdqa xmm7,xmm13 pslld xmm2,10 pxor xmm4,xmm13 psrld xmm7,13 pxor xmm1,xmm2 paddd xmm6,xmm0 pslld xmm2,19-10 pand xmm3,xmm4 pxor xmm1,xmm7 psrld xmm7,22-13 pxor xmm1,xmm2 movdqa xmm12,xmm14 pslld xmm2,30-19 pxor xmm7,xmm1 pxor xmm12,xmm3 paddd xmm8,xmm6 pxor xmm7,xmm2 paddd xmm12,xmm6 paddd xmm12,xmm7 movdqa xmm6,XMMWORD[((80-128))+rax] paddd xmm5,XMMWORD[((208-128))+rax] movdqa xmm7,xmm6 movdqa xmm1,xmm6 psrld xmm7,3 movdqa xmm2,xmm6 psrld xmm1,7 movdqa xmm0,XMMWORD[((32-128))+rax] pslld xmm2,14 pxor xmm7,xmm1 psrld xmm1,18-7 movdqa xmm3,xmm0 pxor xmm7,xmm2 pslld xmm2,25-14 pxor xmm7,xmm1 psrld xmm0,10 movdqa xmm1,xmm3 psrld xmm3,17 pxor xmm7,xmm2 pslld xmm1,13 paddd xmm5,xmm7 pxor xmm0,xmm3 psrld xmm3,19-17 pxor xmm0,xmm1 pslld xmm1,15-13 pxor xmm0,xmm3 pxor xmm0,xmm1 paddd xmm5,xmm0 movdqa xmm7,xmm8 movdqa xmm2,xmm8 psrld xmm7,6 movdqa xmm1,xmm8 pslld xmm2,7 movdqa XMMWORD[(64-128)+rax],xmm5 paddd xmm5,xmm11 psrld xmm1,11 pxor xmm7,xmm2 pslld xmm2,21-7 paddd xmm5,XMMWORD[rbp] pxor xmm7,xmm1 psrld xmm1,25-11 movdqa xmm0,xmm8 pxor xmm7,xmm2 movdqa xmm3,xmm8 pslld xmm2,26-21 pandn xmm0,xmm10 pand xmm3,xmm9 pxor xmm7,xmm1 movdqa xmm1,xmm12 pxor xmm7,xmm2 movdqa xmm2,xmm12 psrld xmm1,2 paddd xmm5,xmm7 pxor xmm0,xmm3 movdqa xmm3,xmm13 movdqa xmm7,xmm12 pslld xmm2,10 pxor xmm3,xmm12 psrld xmm7,13 pxor xmm1,xmm2 paddd xmm5,xmm0 pslld xmm2,19-10 pand xmm4,xmm3 pxor xmm1,xmm7 psrld xmm7,22-13 pxor xmm1,xmm2 movdqa xmm11,xmm13 pslld xmm2,30-19 pxor xmm7,xmm1 pxor xmm11,xmm4 paddd xmm15,xmm5 pxor xmm7,xmm2 paddd xmm11,xmm5 paddd xmm11,xmm7 movdqa xmm5,XMMWORD[((96-128))+rax] paddd xmm6,XMMWORD[((224-128))+rax] movdqa xmm7,xmm5 movdqa xmm1,xmm5 psrld xmm7,3 movdqa xmm2,xmm5 psrld xmm1,7 movdqa xmm0,XMMWORD[((48-128))+rax] pslld xmm2,14 pxor xmm7,xmm1 psrld xmm1,18-7 movdqa xmm4,xmm0 pxor xmm7,xmm2 pslld xmm2,25-14 pxor xmm7,xmm1 psrld xmm0,10 movdqa xmm1,xmm4 psrld xmm4,17 pxor xmm7,xmm2 pslld xmm1,13 paddd xmm6,xmm7 pxor xmm0,xmm4 psrld xmm4,19-17 pxor xmm0,xmm1 pslld xmm1,15-13 pxor xmm0,xmm4 pxor xmm0,xmm1 paddd xmm6,xmm0 movdqa xmm7,xmm15 movdqa xmm2,xmm15 psrld xmm7,6 movdqa xmm1,xmm15 pslld xmm2,7 movdqa XMMWORD[(80-128)+rax],xmm6 paddd xmm6,xmm10 psrld xmm1,11 pxor xmm7,xmm2 pslld xmm2,21-7 paddd xmm6,XMMWORD[32+rbp] pxor xmm7,xmm1 psrld xmm1,25-11 movdqa xmm0,xmm15 pxor xmm7,xmm2 movdqa xmm4,xmm15 pslld xmm2,26-21 pandn xmm0,xmm9 pand xmm4,xmm8 pxor xmm7,xmm1 movdqa xmm1,xmm11 pxor xmm7,xmm2 movdqa xmm2,xmm11 psrld xmm1,2 paddd xmm6,xmm7 pxor xmm0,xmm4 movdqa xmm4,xmm12 movdqa xmm7,xmm11 pslld xmm2,10 pxor xmm4,xmm11 psrld xmm7,13 pxor xmm1,xmm2 paddd xmm6,xmm0 pslld xmm2,19-10 pand xmm3,xmm4 pxor xmm1,xmm7 psrld xmm7,22-13 pxor xmm1,xmm2 movdqa xmm10,xmm12 pslld xmm2,30-19 pxor xmm7,xmm1 pxor xmm10,xmm3 paddd xmm14,xmm6 pxor xmm7,xmm2 paddd xmm10,xmm6 paddd xmm10,xmm7 movdqa xmm6,XMMWORD[((112-128))+rax] paddd xmm5,XMMWORD[((240-128))+rax] movdqa xmm7,xmm6 movdqa xmm1,xmm6 psrld xmm7,3 movdqa xmm2,xmm6 psrld xmm1,7 movdqa xmm0,XMMWORD[((64-128))+rax] pslld xmm2,14 pxor xmm7,xmm1 psrld xmm1,18-7 movdqa xmm3,xmm0 pxor xmm7,xmm2 pslld xmm2,25-14 pxor xmm7,xmm1 psrld xmm0,10 movdqa xmm1,xmm3 psrld xmm3,17 pxor xmm7,xmm2 pslld xmm1,13 paddd xmm5,xmm7 pxor xmm0,xmm3 psrld xmm3,19-17 pxor xmm0,xmm1 pslld xmm1,15-13 pxor xmm0,xmm3 pxor xmm0,xmm1 paddd xmm5,xmm0 movdqa xmm7,xmm14 movdqa xmm2,xmm14 psrld xmm7,6 movdqa xmm1,xmm14 pslld xmm2,7 movdqa XMMWORD[(96-128)+rax],xmm5 paddd xmm5,xmm9 psrld xmm1,11 pxor xmm7,xmm2 pslld xmm2,21-7 paddd xmm5,XMMWORD[64+rbp] pxor xmm7,xmm1 psrld xmm1,25-11 movdqa xmm0,xmm14 pxor xmm7,xmm2 movdqa xmm3,xmm14 pslld xmm2,26-21 pandn xmm0,xmm8 pand xmm3,xmm15 pxor xmm7,xmm1 movdqa xmm1,xmm10 pxor xmm7,xmm2 movdqa xmm2,xmm10 psrld xmm1,2 paddd xmm5,xmm7 pxor xmm0,xmm3 movdqa xmm3,xmm11 movdqa xmm7,xmm10 pslld xmm2,10 pxor xmm3,xmm10 psrld xmm7,13 pxor xmm1,xmm2 paddd xmm5,xmm0 pslld xmm2,19-10 pand xmm4,xmm3 pxor xmm1,xmm7 psrld xmm7,22-13 pxor xmm1,xmm2 movdqa xmm9,xmm11 pslld xmm2,30-19 pxor xmm7,xmm1 pxor xmm9,xmm4 paddd xmm13,xmm5 pxor xmm7,xmm2 paddd xmm9,xmm5 paddd xmm9,xmm7 movdqa xmm5,XMMWORD[((128-128))+rax] paddd xmm6,XMMWORD[((0-128))+rax] movdqa xmm7,xmm5 movdqa xmm1,xmm5 psrld xmm7,3 movdqa xmm2,xmm5 psrld xmm1,7 movdqa xmm0,XMMWORD[((80-128))+rax] pslld xmm2,14 pxor xmm7,xmm1 psrld xmm1,18-7 movdqa xmm4,xmm0 pxor xmm7,xmm2 pslld xmm2,25-14 pxor xmm7,xmm1 psrld xmm0,10 movdqa xmm1,xmm4 psrld xmm4,17 pxor xmm7,xmm2 pslld xmm1,13 paddd xmm6,xmm7 pxor xmm0,xmm4 psrld xmm4,19-17 pxor xmm0,xmm1 pslld xmm1,15-13 pxor xmm0,xmm4 pxor xmm0,xmm1 paddd xmm6,xmm0 movdqa xmm7,xmm13 movdqa xmm2,xmm13 psrld xmm7,6 movdqa xmm1,xmm13 pslld xmm2,7 movdqa XMMWORD[(112-128)+rax],xmm6 paddd xmm6,xmm8 psrld xmm1,11 pxor xmm7,xmm2 pslld xmm2,21-7 paddd xmm6,XMMWORD[96+rbp] pxor xmm7,xmm1 psrld xmm1,25-11 movdqa xmm0,xmm13 pxor xmm7,xmm2 movdqa xmm4,xmm13 pslld xmm2,26-21 pandn xmm0,xmm15 pand xmm4,xmm14 pxor xmm7,xmm1 movdqa xmm1,xmm9 pxor xmm7,xmm2 movdqa xmm2,xmm9 psrld xmm1,2 paddd xmm6,xmm7 pxor xmm0,xmm4 movdqa xmm4,xmm10 movdqa xmm7,xmm9 pslld xmm2,10 pxor xmm4,xmm9 psrld xmm7,13 pxor xmm1,xmm2 paddd xmm6,xmm0 pslld xmm2,19-10 pand xmm3,xmm4 pxor xmm1,xmm7 psrld xmm7,22-13 pxor xmm1,xmm2 movdqa xmm8,xmm10 pslld xmm2,30-19 pxor xmm7,xmm1 pxor xmm8,xmm3 paddd xmm12,xmm6 pxor xmm7,xmm2 paddd xmm8,xmm6 paddd xmm8,xmm7 lea rbp,[256+rbp] movdqa xmm6,XMMWORD[((144-128))+rax] paddd xmm5,XMMWORD[((16-128))+rax] movdqa xmm7,xmm6 movdqa xmm1,xmm6 psrld xmm7,3 movdqa xmm2,xmm6 psrld xmm1,7 movdqa xmm0,XMMWORD[((96-128))+rax] pslld xmm2,14 pxor xmm7,xmm1 psrld xmm1,18-7 movdqa xmm3,xmm0 pxor xmm7,xmm2 pslld xmm2,25-14 pxor xmm7,xmm1 psrld xmm0,10 movdqa xmm1,xmm3 psrld xmm3,17 pxor xmm7,xmm2 pslld xmm1,13 paddd xmm5,xmm7 pxor xmm0,xmm3 psrld xmm3,19-17 pxor xmm0,xmm1 pslld xmm1,15-13 pxor xmm0,xmm3 pxor xmm0,xmm1 paddd xmm5,xmm0 movdqa xmm7,xmm12 movdqa xmm2,xmm12 psrld xmm7,6 movdqa xmm1,xmm12 pslld xmm2,7 movdqa XMMWORD[(128-128)+rax],xmm5 paddd xmm5,xmm15 psrld xmm1,11 pxor xmm7,xmm2 pslld xmm2,21-7 paddd xmm5,XMMWORD[((-128))+rbp] pxor xmm7,xmm1 psrld xmm1,25-11 movdqa xmm0,xmm12 pxor xmm7,xmm2 movdqa xmm3,xmm12 pslld xmm2,26-21 pandn xmm0,xmm14 pand xmm3,xmm13 pxor xmm7,xmm1 movdqa xmm1,xmm8 pxor xmm7,xmm2 movdqa xmm2,xmm8 psrld xmm1,2 paddd xmm5,xmm7 pxor xmm0,xmm3 movdqa xmm3,xmm9 movdqa xmm7,xmm8 pslld xmm2,10 pxor xmm3,xmm8 psrld xmm7,13 pxor xmm1,xmm2 paddd xmm5,xmm0 pslld xmm2,19-10 pand xmm4,xmm3 pxor xmm1,xmm7 psrld xmm7,22-13 pxor xmm1,xmm2 movdqa xmm15,xmm9 pslld xmm2,30-19 pxor xmm7,xmm1 pxor xmm15,xmm4 paddd xmm11,xmm5 pxor xmm7,xmm2 paddd xmm15,xmm5 paddd xmm15,xmm7 movdqa xmm5,XMMWORD[((160-128))+rax] paddd xmm6,XMMWORD[((32-128))+rax] movdqa xmm7,xmm5 movdqa xmm1,xmm5 psrld xmm7,3 movdqa xmm2,xmm5 psrld xmm1,7 movdqa xmm0,XMMWORD[((112-128))+rax] pslld xmm2,14 pxor xmm7,xmm1 psrld xmm1,18-7 movdqa xmm4,xmm0 pxor xmm7,xmm2 pslld xmm2,25-14 pxor xmm7,xmm1 psrld xmm0,10 movdqa xmm1,xmm4 psrld xmm4,17 pxor xmm7,xmm2 pslld xmm1,13 paddd xmm6,xmm7 pxor xmm0,xmm4 psrld xmm4,19-17 pxor xmm0,xmm1 pslld xmm1,15-13 pxor xmm0,xmm4 pxor xmm0,xmm1 paddd xmm6,xmm0 movdqa xmm7,xmm11 movdqa xmm2,xmm11 psrld xmm7,6 movdqa xmm1,xmm11 pslld xmm2,7 movdqa XMMWORD[(144-128)+rax],xmm6 paddd xmm6,xmm14 psrld xmm1,11 pxor xmm7,xmm2 pslld xmm2,21-7 paddd xmm6,XMMWORD[((-96))+rbp] pxor xmm7,xmm1 psrld xmm1,25-11 movdqa xmm0,xmm11 pxor xmm7,xmm2 movdqa xmm4,xmm11 pslld xmm2,26-21 pandn xmm0,xmm13 pand xmm4,xmm12 pxor xmm7,xmm1 movdqa xmm1,xmm15 pxor xmm7,xmm2 movdqa xmm2,xmm15 psrld xmm1,2 paddd xmm6,xmm7 pxor xmm0,xmm4 movdqa xmm4,xmm8 movdqa xmm7,xmm15 pslld xmm2,10 pxor xmm4,xmm15 psrld xmm7,13 pxor xmm1,xmm2 paddd xmm6,xmm0 pslld xmm2,19-10 pand xmm3,xmm4 pxor xmm1,xmm7 psrld xmm7,22-13 pxor xmm1,xmm2 movdqa xmm14,xmm8 pslld xmm2,30-19 pxor xmm7,xmm1 pxor xmm14,xmm3 paddd xmm10,xmm6 pxor xmm7,xmm2 paddd xmm14,xmm6 paddd xmm14,xmm7 movdqa xmm6,XMMWORD[((176-128))+rax] paddd xmm5,XMMWORD[((48-128))+rax] movdqa xmm7,xmm6 movdqa xmm1,xmm6 psrld xmm7,3 movdqa xmm2,xmm6 psrld xmm1,7 movdqa xmm0,XMMWORD[((128-128))+rax] pslld xmm2,14 pxor xmm7,xmm1 psrld xmm1,18-7 movdqa xmm3,xmm0 pxor xmm7,xmm2 pslld xmm2,25-14 pxor xmm7,xmm1 psrld xmm0,10 movdqa xmm1,xmm3 psrld xmm3,17 pxor xmm7,xmm2 pslld xmm1,13 paddd xmm5,xmm7 pxor xmm0,xmm3 psrld xmm3,19-17 pxor xmm0,xmm1 pslld xmm1,15-13 pxor xmm0,xmm3 pxor xmm0,xmm1 paddd xmm5,xmm0 movdqa xmm7,xmm10 movdqa xmm2,xmm10 psrld xmm7,6 movdqa xmm1,xmm10 pslld xmm2,7 movdqa XMMWORD[(160-128)+rax],xmm5 paddd xmm5,xmm13 psrld xmm1,11 pxor xmm7,xmm2 pslld xmm2,21-7 paddd xmm5,XMMWORD[((-64))+rbp] pxor xmm7,xmm1 psrld xmm1,25-11 movdqa xmm0,xmm10 pxor xmm7,xmm2 movdqa xmm3,xmm10 pslld xmm2,26-21 pandn xmm0,xmm12 pand xmm3,xmm11 pxor xmm7,xmm1 movdqa xmm1,xmm14 pxor xmm7,xmm2 movdqa xmm2,xmm14 psrld xmm1,2 paddd xmm5,xmm7 pxor xmm0,xmm3 movdqa xmm3,xmm15 movdqa xmm7,xmm14 pslld xmm2,10 pxor xmm3,xmm14 psrld xmm7,13 pxor xmm1,xmm2 paddd xmm5,xmm0 pslld xmm2,19-10 pand xmm4,xmm3 pxor xmm1,xmm7 psrld xmm7,22-13 pxor xmm1,xmm2 movdqa xmm13,xmm15 pslld xmm2,30-19 pxor xmm7,xmm1 pxor xmm13,xmm4 paddd xmm9,xmm5 pxor xmm7,xmm2 paddd xmm13,xmm5 paddd xmm13,xmm7 movdqa xmm5,XMMWORD[((192-128))+rax] paddd xmm6,XMMWORD[((64-128))+rax] movdqa xmm7,xmm5 movdqa xmm1,xmm5 psrld xmm7,3 movdqa xmm2,xmm5 psrld xmm1,7 movdqa xmm0,XMMWORD[((144-128))+rax] pslld xmm2,14 pxor xmm7,xmm1 psrld xmm1,18-7 movdqa xmm4,xmm0 pxor xmm7,xmm2 pslld xmm2,25-14 pxor xmm7,xmm1 psrld xmm0,10 movdqa xmm1,xmm4 psrld xmm4,17 pxor xmm7,xmm2 pslld xmm1,13 paddd xmm6,xmm7 pxor xmm0,xmm4 psrld xmm4,19-17 pxor xmm0,xmm1 pslld xmm1,15-13 pxor xmm0,xmm4 pxor xmm0,xmm1 paddd xmm6,xmm0 movdqa xmm7,xmm9 movdqa xmm2,xmm9 psrld xmm7,6 movdqa xmm1,xmm9 pslld xmm2,7 movdqa XMMWORD[(176-128)+rax],xmm6 paddd xmm6,xmm12 psrld xmm1,11 pxor xmm7,xmm2 pslld xmm2,21-7 paddd xmm6,XMMWORD[((-32))+rbp] pxor xmm7,xmm1 psrld xmm1,25-11 movdqa xmm0,xmm9 pxor xmm7,xmm2 movdqa xmm4,xmm9 pslld xmm2,26-21 pandn xmm0,xmm11 pand xmm4,xmm10 pxor xmm7,xmm1 movdqa xmm1,xmm13 pxor xmm7,xmm2 movdqa xmm2,xmm13 psrld xmm1,2 paddd xmm6,xmm7 pxor xmm0,xmm4 movdqa xmm4,xmm14 movdqa xmm7,xmm13 pslld xmm2,10 pxor xmm4,xmm13 psrld xmm7,13 pxor xmm1,xmm2 paddd xmm6,xmm0 pslld xmm2,19-10 pand xmm3,xmm4 pxor xmm1,xmm7 psrld xmm7,22-13 pxor xmm1,xmm2 movdqa xmm12,xmm14 pslld xmm2,30-19 pxor xmm7,xmm1 pxor xmm12,xmm3 paddd xmm8,xmm6 pxor xmm7,xmm2 paddd xmm12,xmm6 paddd xmm12,xmm7 movdqa xmm6,XMMWORD[((208-128))+rax] paddd xmm5,XMMWORD[((80-128))+rax] movdqa xmm7,xmm6 movdqa xmm1,xmm6 psrld xmm7,3 movdqa xmm2,xmm6 psrld xmm1,7 movdqa xmm0,XMMWORD[((160-128))+rax] pslld xmm2,14 pxor xmm7,xmm1 psrld xmm1,18-7 movdqa xmm3,xmm0 pxor xmm7,xmm2 pslld xmm2,25-14 pxor xmm7,xmm1 psrld xmm0,10 movdqa xmm1,xmm3 psrld xmm3,17 pxor xmm7,xmm2 pslld xmm1,13 paddd xmm5,xmm7 pxor xmm0,xmm3 psrld xmm3,19-17 pxor xmm0,xmm1 pslld xmm1,15-13 pxor xmm0,xmm3 pxor xmm0,xmm1 paddd xmm5,xmm0 movdqa xmm7,xmm8 movdqa xmm2,xmm8 psrld xmm7,6 movdqa xmm1,xmm8 pslld xmm2,7 movdqa XMMWORD[(192-128)+rax],xmm5 paddd xmm5,xmm11 psrld xmm1,11 pxor xmm7,xmm2 pslld xmm2,21-7 paddd xmm5,XMMWORD[rbp] pxor xmm7,xmm1 psrld xmm1,25-11 movdqa xmm0,xmm8 pxor xmm7,xmm2 movdqa xmm3,xmm8 pslld xmm2,26-21 pandn xmm0,xmm10 pand xmm3,xmm9 pxor xmm7,xmm1 movdqa xmm1,xmm12 pxor xmm7,xmm2 movdqa xmm2,xmm12 psrld xmm1,2 paddd xmm5,xmm7 pxor xmm0,xmm3 movdqa xmm3,xmm13 movdqa xmm7,xmm12 pslld xmm2,10 pxor xmm3,xmm12 psrld xmm7,13 pxor xmm1,xmm2 paddd xmm5,xmm0 pslld xmm2,19-10 pand xmm4,xmm3 pxor xmm1,xmm7 psrld xmm7,22-13 pxor xmm1,xmm2 movdqa xmm11,xmm13 pslld xmm2,30-19 pxor xmm7,xmm1 pxor xmm11,xmm4 paddd xmm15,xmm5 pxor xmm7,xmm2 paddd xmm11,xmm5 paddd xmm11,xmm7 movdqa xmm5,XMMWORD[((224-128))+rax] paddd xmm6,XMMWORD[((96-128))+rax] movdqa xmm7,xmm5 movdqa xmm1,xmm5 psrld xmm7,3 movdqa xmm2,xmm5 psrld xmm1,7 movdqa xmm0,XMMWORD[((176-128))+rax] pslld xmm2,14 pxor xmm7,xmm1 psrld xmm1,18-7 movdqa xmm4,xmm0 pxor xmm7,xmm2 pslld xmm2,25-14 pxor xmm7,xmm1 psrld xmm0,10 movdqa xmm1,xmm4 psrld xmm4,17 pxor xmm7,xmm2 pslld xmm1,13 paddd xmm6,xmm7 pxor xmm0,xmm4 psrld xmm4,19-17 pxor xmm0,xmm1 pslld xmm1,15-13 pxor xmm0,xmm4 pxor xmm0,xmm1 paddd xmm6,xmm0 movdqa xmm7,xmm15 movdqa xmm2,xmm15 psrld xmm7,6 movdqa xmm1,xmm15 pslld xmm2,7 movdqa XMMWORD[(208-128)+rax],xmm6 paddd xmm6,xmm10 psrld xmm1,11 pxor xmm7,xmm2 pslld xmm2,21-7 paddd xmm6,XMMWORD[32+rbp] pxor xmm7,xmm1 psrld xmm1,25-11 movdqa xmm0,xmm15 pxor xmm7,xmm2 movdqa xmm4,xmm15 pslld xmm2,26-21 pandn xmm0,xmm9 pand xmm4,xmm8 pxor xmm7,xmm1 movdqa xmm1,xmm11 pxor xmm7,xmm2 movdqa xmm2,xmm11 psrld xmm1,2 paddd xmm6,xmm7 pxor xmm0,xmm4 movdqa xmm4,xmm12 movdqa xmm7,xmm11 pslld xmm2,10 pxor xmm4,xmm11 psrld xmm7,13 pxor xmm1,xmm2 paddd xmm6,xmm0 pslld xmm2,19-10 pand xmm3,xmm4 pxor xmm1,xmm7 psrld xmm7,22-13 pxor xmm1,xmm2 movdqa xmm10,xmm12 pslld xmm2,30-19 pxor xmm7,xmm1 pxor xmm10,xmm3 paddd xmm14,xmm6 pxor xmm7,xmm2 paddd xmm10,xmm6 paddd xmm10,xmm7 movdqa xmm6,XMMWORD[((240-128))+rax] paddd xmm5,XMMWORD[((112-128))+rax] movdqa xmm7,xmm6 movdqa xmm1,xmm6 psrld xmm7,3 movdqa xmm2,xmm6 psrld xmm1,7 movdqa xmm0,XMMWORD[((192-128))+rax] pslld xmm2,14 pxor xmm7,xmm1 psrld xmm1,18-7 movdqa xmm3,xmm0 pxor xmm7,xmm2 pslld xmm2,25-14 pxor xmm7,xmm1 psrld xmm0,10 movdqa xmm1,xmm3 psrld xmm3,17 pxor xmm7,xmm2 pslld xmm1,13 paddd xmm5,xmm7 pxor xmm0,xmm3 psrld xmm3,19-17 pxor xmm0,xmm1 pslld xmm1,15-13 pxor xmm0,xmm3 pxor xmm0,xmm1 paddd xmm5,xmm0 movdqa xmm7,xmm14 movdqa xmm2,xmm14 psrld xmm7,6 movdqa xmm1,xmm14 pslld xmm2,7 movdqa XMMWORD[(224-128)+rax],xmm5 paddd xmm5,xmm9 psrld xmm1,11 pxor xmm7,xmm2 pslld xmm2,21-7 paddd xmm5,XMMWORD[64+rbp] pxor xmm7,xmm1 psrld xmm1,25-11 movdqa xmm0,xmm14 pxor xmm7,xmm2 movdqa xmm3,xmm14 pslld xmm2,26-21 pandn xmm0,xmm8 pand xmm3,xmm15 pxor xmm7,xmm1 movdqa xmm1,xmm10 pxor xmm7,xmm2 movdqa xmm2,xmm10 psrld xmm1,2 paddd xmm5,xmm7 pxor xmm0,xmm3 movdqa xmm3,xmm11 movdqa xmm7,xmm10 pslld xmm2,10 pxor xmm3,xmm10 psrld xmm7,13 pxor xmm1,xmm2 paddd xmm5,xmm0 pslld xmm2,19-10 pand xmm4,xmm3 pxor xmm1,xmm7 psrld xmm7,22-13 pxor xmm1,xmm2 movdqa xmm9,xmm11 pslld xmm2,30-19 pxor xmm7,xmm1 pxor xmm9,xmm4 paddd xmm13,xmm5 pxor xmm7,xmm2 paddd xmm9,xmm5 paddd xmm9,xmm7 movdqa xmm5,XMMWORD[((0-128))+rax] paddd xmm6,XMMWORD[((128-128))+rax] movdqa xmm7,xmm5 movdqa xmm1,xmm5 psrld xmm7,3 movdqa xmm2,xmm5 psrld xmm1,7 movdqa xmm0,XMMWORD[((208-128))+rax] pslld xmm2,14 pxor xmm7,xmm1 psrld xmm1,18-7 movdqa xmm4,xmm0 pxor xmm7,xmm2 pslld xmm2,25-14 pxor xmm7,xmm1 psrld xmm0,10 movdqa xmm1,xmm4 psrld xmm4,17 pxor xmm7,xmm2 pslld xmm1,13 paddd xmm6,xmm7 pxor xmm0,xmm4 psrld xmm4,19-17 pxor xmm0,xmm1 pslld xmm1,15-13 pxor xmm0,xmm4 pxor xmm0,xmm1 paddd xmm6,xmm0 movdqa xmm7,xmm13 movdqa xmm2,xmm13 psrld xmm7,6 movdqa xmm1,xmm13 pslld xmm2,7 movdqa XMMWORD[(240-128)+rax],xmm6 paddd xmm6,xmm8 psrld xmm1,11 pxor xmm7,xmm2 pslld xmm2,21-7 paddd xmm6,XMMWORD[96+rbp] pxor xmm7,xmm1 psrld xmm1,25-11 movdqa xmm0,xmm13 pxor xmm7,xmm2 movdqa xmm4,xmm13 pslld xmm2,26-21 pandn xmm0,xmm15 pand xmm4,xmm14 pxor xmm7,xmm1 movdqa xmm1,xmm9 pxor xmm7,xmm2 movdqa xmm2,xmm9 psrld xmm1,2 paddd xmm6,xmm7 pxor xmm0,xmm4 movdqa xmm4,xmm10 movdqa xmm7,xmm9 pslld xmm2,10 pxor xmm4,xmm9 psrld xmm7,13 pxor xmm1,xmm2 paddd xmm6,xmm0 pslld xmm2,19-10 pand xmm3,xmm4 pxor xmm1,xmm7 psrld xmm7,22-13 pxor xmm1,xmm2 movdqa xmm8,xmm10 pslld xmm2,30-19 pxor xmm7,xmm1 pxor xmm8,xmm3 paddd xmm12,xmm6 pxor xmm7,xmm2 paddd xmm8,xmm6 paddd xmm8,xmm7 lea rbp,[256+rbp] dec ecx jnz NEAR $L$oop_16_xx mov ecx,1 lea rbp,[((K256+128))] movdqa xmm7,XMMWORD[rbx] cmp ecx,DWORD[rbx] pxor xmm0,xmm0 cmovge r8,rbp cmp ecx,DWORD[4+rbx] movdqa xmm6,xmm7 cmovge r9,rbp cmp ecx,DWORD[8+rbx] pcmpgtd xmm6,xmm0 cmovge r10,rbp cmp ecx,DWORD[12+rbx] paddd xmm7,xmm6 cmovge r11,rbp movdqu xmm0,XMMWORD[((0-128))+rdi] pand xmm8,xmm6 movdqu xmm1,XMMWORD[((32-128))+rdi] pand xmm9,xmm6 movdqu xmm2,XMMWORD[((64-128))+rdi] pand xmm10,xmm6 movdqu xmm5,XMMWORD[((96-128))+rdi] pand xmm11,xmm6 paddd xmm8,xmm0 movdqu xmm0,XMMWORD[((128-128))+rdi] pand xmm12,xmm6 paddd xmm9,xmm1 movdqu xmm1,XMMWORD[((160-128))+rdi] pand xmm13,xmm6 paddd xmm10,xmm2 movdqu xmm2,XMMWORD[((192-128))+rdi] pand xmm14,xmm6 paddd xmm11,xmm5 movdqu xmm5,XMMWORD[((224-128))+rdi] pand xmm15,xmm6 paddd xmm12,xmm0 paddd xmm13,xmm1 movdqu XMMWORD[(0-128)+rdi],xmm8 paddd xmm14,xmm2 movdqu XMMWORD[(32-128)+rdi],xmm9 paddd xmm15,xmm5 movdqu XMMWORD[(64-128)+rdi],xmm10 movdqu XMMWORD[(96-128)+rdi],xmm11 movdqu XMMWORD[(128-128)+rdi],xmm12 movdqu XMMWORD[(160-128)+rdi],xmm13 movdqu XMMWORD[(192-128)+rdi],xmm14 movdqu XMMWORD[(224-128)+rdi],xmm15 movdqa XMMWORD[rbx],xmm7 movdqa xmm6,XMMWORD[$L$pbswap] dec edx jnz NEAR $L$oop mov edx,DWORD[280+rsp] lea rdi,[16+rdi] lea rsi,[64+rsi] dec edx jnz NEAR $L$oop_grande $L$done: mov rax,QWORD[272+rsp] movaps xmm6,XMMWORD[((-184))+rax] movaps xmm7,XMMWORD[((-168))+rax] movaps xmm8,XMMWORD[((-152))+rax] movaps xmm9,XMMWORD[((-136))+rax] movaps xmm10,XMMWORD[((-120))+rax] movaps xmm11,XMMWORD[((-104))+rax] movaps xmm12,XMMWORD[((-88))+rax] movaps xmm13,XMMWORD[((-72))+rax] movaps xmm14,XMMWORD[((-56))+rax] movaps xmm15,XMMWORD[((-40))+rax] mov rbp,QWORD[((-16))+rax] mov rbx,QWORD[((-8))+rax] lea rsp,[rax] $L$epilogue: mov rdi,QWORD[8+rsp] ;WIN64 epilogue mov rsi,QWORD[16+rsp] DB 0F3h,0C3h ;repret $L$SEH_end_sha256_multi_block: ALIGN 32 sha256_multi_block_shaext: mov QWORD[8+rsp],rdi ;WIN64 prologue mov QWORD[16+rsp],rsi mov rax,rsp $L$SEH_begin_sha256_multi_block_shaext: mov rdi,rcx mov rsi,rdx mov rdx,r8 _shaext_shortcut: mov rax,rsp push rbx push rbp lea rsp,[((-168))+rsp] movaps XMMWORD[rsp],xmm6 movaps XMMWORD[16+rsp],xmm7 movaps XMMWORD[32+rsp],xmm8 movaps XMMWORD[48+rsp],xmm9 movaps XMMWORD[(-120)+rax],xmm10 movaps XMMWORD[(-104)+rax],xmm11 movaps XMMWORD[(-88)+rax],xmm12 movaps XMMWORD[(-72)+rax],xmm13 movaps XMMWORD[(-56)+rax],xmm14 movaps XMMWORD[(-40)+rax],xmm15 sub rsp,288 shl edx,1 and rsp,-256 lea rdi,[128+rdi] mov QWORD[272+rsp],rax $L$body_shaext: lea rbx,[256+rsp] lea rbp,[((K256_shaext+128))] $L$oop_grande_shaext: mov DWORD[280+rsp],edx xor edx,edx mov r8,QWORD[rsi] mov ecx,DWORD[8+rsi] cmp ecx,edx cmovg edx,ecx test ecx,ecx mov DWORD[rbx],ecx cmovle r8,rsp mov r9,QWORD[16+rsi] mov ecx,DWORD[24+rsi] cmp ecx,edx cmovg edx,ecx test ecx,ecx mov DWORD[4+rbx],ecx cmovle r9,rsp test edx,edx jz NEAR $L$done_shaext movq xmm12,QWORD[((0-128))+rdi] movq xmm4,QWORD[((32-128))+rdi] movq xmm13,QWORD[((64-128))+rdi] movq xmm5,QWORD[((96-128))+rdi] movq xmm8,QWORD[((128-128))+rdi] movq xmm9,QWORD[((160-128))+rdi] movq xmm10,QWORD[((192-128))+rdi] movq xmm11,QWORD[((224-128))+rdi] punpckldq xmm12,xmm4 punpckldq xmm13,xmm5 punpckldq xmm8,xmm9 punpckldq xmm10,xmm11 movdqa xmm3,XMMWORD[((K256_shaext-16))] movdqa xmm14,xmm12 movdqa xmm15,xmm13 punpcklqdq xmm12,xmm8 punpcklqdq xmm13,xmm10 punpckhqdq xmm14,xmm8 punpckhqdq xmm15,xmm10 pshufd xmm12,xmm12,27 pshufd xmm13,xmm13,27 pshufd xmm14,xmm14,27 pshufd xmm15,xmm15,27 jmp NEAR $L$oop_shaext ALIGN 32 $L$oop_shaext: movdqu xmm4,XMMWORD[r8] movdqu xmm8,XMMWORD[r9] movdqu xmm5,XMMWORD[16+r8] movdqu xmm9,XMMWORD[16+r9] movdqu xmm6,XMMWORD[32+r8] DB 102,15,56,0,227 movdqu xmm10,XMMWORD[32+r9] DB 102,68,15,56,0,195 movdqu xmm7,XMMWORD[48+r8] lea r8,[64+r8] movdqu xmm11,XMMWORD[48+r9] lea r9,[64+r9] movdqa xmm0,XMMWORD[((0-128))+rbp] DB 102,15,56,0,235 paddd xmm0,xmm4 pxor xmm4,xmm12 movdqa xmm1,xmm0 movdqa xmm2,XMMWORD[((0-128))+rbp] DB 102,68,15,56,0,203 paddd xmm2,xmm8 movdqa XMMWORD[80+rsp],xmm13 DB 69,15,56,203,236 pxor xmm8,xmm14 movdqa xmm0,xmm2 movdqa XMMWORD[112+rsp],xmm15 DB 69,15,56,203,254 pshufd xmm0,xmm1,0x0e pxor xmm4,xmm12 movdqa XMMWORD[64+rsp],xmm12 DB 69,15,56,203,229 pshufd xmm0,xmm2,0x0e pxor xmm8,xmm14 movdqa XMMWORD[96+rsp],xmm14 movdqa xmm1,XMMWORD[((16-128))+rbp] paddd xmm1,xmm5 DB 102,15,56,0,243 DB 69,15,56,203,247 movdqa xmm0,xmm1 movdqa xmm2,XMMWORD[((16-128))+rbp] paddd xmm2,xmm9 DB 69,15,56,203,236 movdqa xmm0,xmm2 prefetcht0 [127+r8] DB 102,15,56,0,251 DB 102,68,15,56,0,211 prefetcht0 [127+r9] DB 69,15,56,203,254 pshufd xmm0,xmm1,0x0e DB 102,68,15,56,0,219 DB 15,56,204,229 DB 69,15,56,203,229 pshufd xmm0,xmm2,0x0e movdqa xmm1,XMMWORD[((32-128))+rbp] paddd xmm1,xmm6 DB 69,15,56,203,247 movdqa xmm0,xmm1 movdqa xmm2,XMMWORD[((32-128))+rbp] paddd xmm2,xmm10 DB 69,15,56,203,236 DB 69,15,56,204,193 movdqa xmm0,xmm2 movdqa xmm3,xmm7 DB 69,15,56,203,254 pshufd xmm0,xmm1,0x0e DB 102,15,58,15,222,4 paddd xmm4,xmm3 movdqa xmm3,xmm11 DB 102,65,15,58,15,218,4 DB 15,56,204,238 DB 69,15,56,203,229 pshufd xmm0,xmm2,0x0e movdqa xmm1,XMMWORD[((48-128))+rbp] paddd xmm1,xmm7 DB 69,15,56,203,247 DB 69,15,56,204,202 movdqa xmm0,xmm1 movdqa xmm2,XMMWORD[((48-128))+rbp] paddd xmm8,xmm3 paddd xmm2,xmm11 DB 15,56,205,231 DB 69,15,56,203,236 movdqa xmm0,xmm2 movdqa xmm3,xmm4 DB 102,15,58,15,223,4 DB 69,15,56,203,254 DB 69,15,56,205,195 pshufd xmm0,xmm1,0x0e paddd xmm5,xmm3 movdqa xmm3,xmm8 DB 102,65,15,58,15,219,4 DB 15,56,204,247 DB 69,15,56,203,229 pshufd xmm0,xmm2,0x0e movdqa xmm1,XMMWORD[((64-128))+rbp] paddd xmm1,xmm4 DB 69,15,56,203,247 DB 69,15,56,204,211 movdqa xmm0,xmm1 movdqa xmm2,XMMWORD[((64-128))+rbp] paddd xmm9,xmm3 paddd xmm2,xmm8 DB 15,56,205,236 DB 69,15,56,203,236 movdqa xmm0,xmm2 movdqa xmm3,xmm5 DB 102,15,58,15,220,4 DB 69,15,56,203,254 DB 69,15,56,205,200 pshufd xmm0,xmm1,0x0e paddd xmm6,xmm3 movdqa xmm3,xmm9 DB 102,65,15,58,15,216,4 DB 15,56,204,252 DB 69,15,56,203,229 pshufd xmm0,xmm2,0x0e movdqa xmm1,XMMWORD[((80-128))+rbp] paddd xmm1,xmm5 DB 69,15,56,203,247 DB 69,15,56,204,216 movdqa xmm0,xmm1 movdqa xmm2,XMMWORD[((80-128))+rbp] paddd xmm10,xmm3 paddd xmm2,xmm9 DB 15,56,205,245 DB 69,15,56,203,236 movdqa xmm0,xmm2 movdqa xmm3,xmm6 DB 102,15,58,15,221,4 DB 69,15,56,203,254 DB 69,15,56,205,209 pshufd xmm0,xmm1,0x0e paddd xmm7,xmm3 movdqa xmm3,xmm10 DB 102,65,15,58,15,217,4 DB 15,56,204,229 DB 69,15,56,203,229 pshufd xmm0,xmm2,0x0e movdqa xmm1,XMMWORD[((96-128))+rbp] paddd xmm1,xmm6 DB 69,15,56,203,247 DB 69,15,56,204,193 movdqa xmm0,xmm1 movdqa xmm2,XMMWORD[((96-128))+rbp] paddd xmm11,xmm3 paddd xmm2,xmm10 DB 15,56,205,254 DB 69,15,56,203,236 movdqa xmm0,xmm2 movdqa xmm3,xmm7 DB 102,15,58,15,222,4 DB 69,15,56,203,254 DB 69,15,56,205,218 pshufd xmm0,xmm1,0x0e paddd xmm4,xmm3 movdqa xmm3,xmm11 DB 102,65,15,58,15,218,4 DB 15,56,204,238 DB 69,15,56,203,229 pshufd xmm0,xmm2,0x0e movdqa xmm1,XMMWORD[((112-128))+rbp] paddd xmm1,xmm7 DB 69,15,56,203,247 DB 69,15,56,204,202 movdqa xmm0,xmm1 movdqa xmm2,XMMWORD[((112-128))+rbp] paddd xmm8,xmm3 paddd xmm2,xmm11 DB 15,56,205,231 DB 69,15,56,203,236 movdqa xmm0,xmm2 movdqa xmm3,xmm4 DB 102,15,58,15,223,4 DB 69,15,56,203,254 DB 69,15,56,205,195 pshufd xmm0,xmm1,0x0e paddd xmm5,xmm3 movdqa xmm3,xmm8 DB 102,65,15,58,15,219,4 DB 15,56,204,247 DB 69,15,56,203,229 pshufd xmm0,xmm2,0x0e movdqa xmm1,XMMWORD[((128-128))+rbp] paddd xmm1,xmm4 DB 69,15,56,203,247 DB 69,15,56,204,211 movdqa xmm0,xmm1 movdqa xmm2,XMMWORD[((128-128))+rbp] paddd xmm9,xmm3 paddd xmm2,xmm8 DB 15,56,205,236 DB 69,15,56,203,236 movdqa xmm0,xmm2 movdqa xmm3,xmm5 DB 102,15,58,15,220,4 DB 69,15,56,203,254 DB 69,15,56,205,200 pshufd xmm0,xmm1,0x0e paddd xmm6,xmm3 movdqa xmm3,xmm9 DB 102,65,15,58,15,216,4 DB 15,56,204,252 DB 69,15,56,203,229 pshufd xmm0,xmm2,0x0e movdqa xmm1,XMMWORD[((144-128))+rbp] paddd xmm1,xmm5 DB 69,15,56,203,247 DB 69,15,56,204,216 movdqa xmm0,xmm1 movdqa xmm2,XMMWORD[((144-128))+rbp] paddd xmm10,xmm3 paddd xmm2,xmm9 DB 15,56,205,245 DB 69,15,56,203,236 movdqa xmm0,xmm2 movdqa xmm3,xmm6 DB 102,15,58,15,221,4 DB 69,15,56,203,254 DB 69,15,56,205,209 pshufd xmm0,xmm1,0x0e paddd xmm7,xmm3 movdqa xmm3,xmm10 DB 102,65,15,58,15,217,4 DB 15,56,204,229 DB 69,15,56,203,229 pshufd xmm0,xmm2,0x0e movdqa xmm1,XMMWORD[((160-128))+rbp] paddd xmm1,xmm6 DB 69,15,56,203,247 DB 69,15,56,204,193 movdqa xmm0,xmm1 movdqa xmm2,XMMWORD[((160-128))+rbp] paddd xmm11,xmm3 paddd xmm2,xmm10 DB 15,56,205,254 DB 69,15,56,203,236 movdqa xmm0,xmm2 movdqa xmm3,xmm7 DB 102,15,58,15,222,4 DB 69,15,56,203,254 DB 69,15,56,205,218 pshufd xmm0,xmm1,0x0e paddd xmm4,xmm3 movdqa xmm3,xmm11 DB 102,65,15,58,15,218,4 DB 15,56,204,238 DB 69,15,56,203,229 pshufd xmm0,xmm2,0x0e movdqa xmm1,XMMWORD[((176-128))+rbp] paddd xmm1,xmm7 DB 69,15,56,203,247 DB 69,15,56,204,202 movdqa xmm0,xmm1 movdqa xmm2,XMMWORD[((176-128))+rbp] paddd xmm8,xmm3 paddd xmm2,xmm11 DB 15,56,205,231 DB 69,15,56,203,236 movdqa xmm0,xmm2 movdqa xmm3,xmm4 DB 102,15,58,15,223,4 DB 69,15,56,203,254 DB 69,15,56,205,195 pshufd xmm0,xmm1,0x0e paddd xmm5,xmm3 movdqa xmm3,xmm8 DB 102,65,15,58,15,219,4 DB 15,56,204,247 DB 69,15,56,203,229 pshufd xmm0,xmm2,0x0e movdqa xmm1,XMMWORD[((192-128))+rbp] paddd xmm1,xmm4 DB 69,15,56,203,247 DB 69,15,56,204,211 movdqa xmm0,xmm1 movdqa xmm2,XMMWORD[((192-128))+rbp] paddd xmm9,xmm3 paddd xmm2,xmm8 DB 15,56,205,236 DB 69,15,56,203,236 movdqa xmm0,xmm2 movdqa xmm3,xmm5 DB 102,15,58,15,220,4 DB 69,15,56,203,254 DB 69,15,56,205,200 pshufd xmm0,xmm1,0x0e paddd xmm6,xmm3 movdqa xmm3,xmm9 DB 102,65,15,58,15,216,4 DB 15,56,204,252 DB 69,15,56,203,229 pshufd xmm0,xmm2,0x0e movdqa xmm1,XMMWORD[((208-128))+rbp] paddd xmm1,xmm5 DB 69,15,56,203,247 DB 69,15,56,204,216 movdqa xmm0,xmm1 movdqa xmm2,XMMWORD[((208-128))+rbp] paddd xmm10,xmm3 paddd xmm2,xmm9 DB 15,56,205,245 DB 69,15,56,203,236 movdqa xmm0,xmm2 movdqa xmm3,xmm6 DB 102,15,58,15,221,4 DB 69,15,56,203,254 DB 69,15,56,205,209 pshufd xmm0,xmm1,0x0e paddd xmm7,xmm3 movdqa xmm3,xmm10 DB 102,65,15,58,15,217,4 nop DB 69,15,56,203,229 pshufd xmm0,xmm2,0x0e movdqa xmm1,XMMWORD[((224-128))+rbp] paddd xmm1,xmm6 DB 69,15,56,203,247 movdqa xmm0,xmm1 movdqa xmm2,XMMWORD[((224-128))+rbp] paddd xmm11,xmm3 paddd xmm2,xmm10 DB 15,56,205,254 nop DB 69,15,56,203,236 movdqa xmm0,xmm2 mov ecx,1 pxor xmm6,xmm6 DB 69,15,56,203,254 DB 69,15,56,205,218 pshufd xmm0,xmm1,0x0e movdqa xmm1,XMMWORD[((240-128))+rbp] paddd xmm1,xmm7 movq xmm7,QWORD[rbx] nop DB 69,15,56,203,229 pshufd xmm0,xmm2,0x0e movdqa xmm2,XMMWORD[((240-128))+rbp] paddd xmm2,xmm11 DB 69,15,56,203,247 movdqa xmm0,xmm1 cmp ecx,DWORD[rbx] cmovge r8,rsp cmp ecx,DWORD[4+rbx] cmovge r9,rsp pshufd xmm9,xmm7,0x00 DB 69,15,56,203,236 movdqa xmm0,xmm2 pshufd xmm10,xmm7,0x55 movdqa xmm11,xmm7 DB 69,15,56,203,254 pshufd xmm0,xmm1,0x0e pcmpgtd xmm9,xmm6 pcmpgtd xmm10,xmm6 DB 69,15,56,203,229 pshufd xmm0,xmm2,0x0e pcmpgtd xmm11,xmm6 movdqa xmm3,XMMWORD[((K256_shaext-16))] DB 69,15,56,203,247 pand xmm13,xmm9 pand xmm15,xmm10 pand xmm12,xmm9 pand xmm14,xmm10 paddd xmm11,xmm7 paddd xmm13,XMMWORD[80+rsp] paddd xmm15,XMMWORD[112+rsp] paddd xmm12,XMMWORD[64+rsp] paddd xmm14,XMMWORD[96+rsp] movq QWORD[rbx],xmm11 dec edx jnz NEAR $L$oop_shaext mov edx,DWORD[280+rsp] pshufd xmm12,xmm12,27 pshufd xmm13,xmm13,27 pshufd xmm14,xmm14,27 pshufd xmm15,xmm15,27 movdqa xmm5,xmm12 movdqa xmm6,xmm13 punpckldq xmm12,xmm14 punpckhdq xmm5,xmm14 punpckldq xmm13,xmm15 punpckhdq xmm6,xmm15 movq QWORD[(0-128)+rdi],xmm12 psrldq xmm12,8 movq QWORD[(128-128)+rdi],xmm5 psrldq xmm5,8 movq QWORD[(32-128)+rdi],xmm12 movq QWORD[(160-128)+rdi],xmm5 movq QWORD[(64-128)+rdi],xmm13 psrldq xmm13,8 movq QWORD[(192-128)+rdi],xmm6 psrldq xmm6,8 movq QWORD[(96-128)+rdi],xmm13 movq QWORD[(224-128)+rdi],xmm6 lea rdi,[8+rdi] lea rsi,[32+rsi] dec edx jnz NEAR $L$oop_grande_shaext $L$done_shaext: movaps xmm6,XMMWORD[((-184))+rax] movaps xmm7,XMMWORD[((-168))+rax] movaps xmm8,XMMWORD[((-152))+rax] movaps xmm9,XMMWORD[((-136))+rax] movaps xmm10,XMMWORD[((-120))+rax] movaps xmm11,XMMWORD[((-104))+rax] movaps xmm12,XMMWORD[((-88))+rax] movaps xmm13,XMMWORD[((-72))+rax] movaps xmm14,XMMWORD[((-56))+rax] movaps xmm15,XMMWORD[((-40))+rax] mov rbp,QWORD[((-16))+rax] mov rbx,QWORD[((-8))+rax] lea rsp,[rax] $L$epilogue_shaext: mov rdi,QWORD[8+rsp] ;WIN64 epilogue mov rsi,QWORD[16+rsp] DB 0F3h,0C3h ;repret $L$SEH_end_sha256_multi_block_shaext: ALIGN 32 sha256_multi_block_avx: mov QWORD[8+rsp],rdi ;WIN64 prologue mov QWORD[16+rsp],rsi mov rax,rsp $L$SEH_begin_sha256_multi_block_avx: mov rdi,rcx mov rsi,rdx mov rdx,r8 _avx_shortcut: shr rcx,32 cmp edx,2 jb NEAR $L$avx test ecx,32 jnz NEAR _avx2_shortcut jmp NEAR $L$avx ALIGN 32 $L$avx: mov rax,rsp push rbx push rbp lea rsp,[((-168))+rsp] movaps XMMWORD[rsp],xmm6 movaps XMMWORD[16+rsp],xmm7 movaps XMMWORD[32+rsp],xmm8 movaps XMMWORD[48+rsp],xmm9 movaps XMMWORD[(-120)+rax],xmm10 movaps XMMWORD[(-104)+rax],xmm11 movaps XMMWORD[(-88)+rax],xmm12 movaps XMMWORD[(-72)+rax],xmm13 movaps XMMWORD[(-56)+rax],xmm14 movaps XMMWORD[(-40)+rax],xmm15 sub rsp,288 and rsp,-256 mov QWORD[272+rsp],rax $L$body_avx: lea rbp,[((K256+128))] lea rbx,[256+rsp] lea rdi,[128+rdi] $L$oop_grande_avx: mov DWORD[280+rsp],edx xor edx,edx mov r8,QWORD[rsi] mov ecx,DWORD[8+rsi] cmp ecx,edx cmovg edx,ecx test ecx,ecx mov DWORD[rbx],ecx cmovle r8,rbp mov r9,QWORD[16+rsi] mov ecx,DWORD[24+rsi] cmp ecx,edx cmovg edx,ecx test ecx,ecx mov DWORD[4+rbx],ecx cmovle r9,rbp mov r10,QWORD[32+rsi] mov ecx,DWORD[40+rsi] cmp ecx,edx cmovg edx,ecx test ecx,ecx mov DWORD[8+rbx],ecx cmovle r10,rbp mov r11,QWORD[48+rsi] mov ecx,DWORD[56+rsi] cmp ecx,edx cmovg edx,ecx test ecx,ecx mov DWORD[12+rbx],ecx cmovle r11,rbp test edx,edx jz NEAR $L$done_avx vmovdqu xmm8,XMMWORD[((0-128))+rdi] lea rax,[128+rsp] vmovdqu xmm9,XMMWORD[((32-128))+rdi] vmovdqu xmm10,XMMWORD[((64-128))+rdi] vmovdqu xmm11,XMMWORD[((96-128))+rdi] vmovdqu xmm12,XMMWORD[((128-128))+rdi] vmovdqu xmm13,XMMWORD[((160-128))+rdi] vmovdqu xmm14,XMMWORD[((192-128))+rdi] vmovdqu xmm15,XMMWORD[((224-128))+rdi] vmovdqu xmm6,XMMWORD[$L$pbswap] jmp NEAR $L$oop_avx ALIGN 32 $L$oop_avx: vpxor xmm4,xmm10,xmm9 vmovd xmm5,DWORD[r8] vmovd xmm0,DWORD[r9] vpinsrd xmm5,xmm5,DWORD[r10],1 vpinsrd xmm0,xmm0,DWORD[r11],1 vpunpckldq xmm5,xmm5,xmm0 vpshufb xmm5,xmm5,xmm6 vpsrld xmm7,xmm12,6 vpslld xmm2,xmm12,26 vmovdqu XMMWORD[(0-128)+rax],xmm5 vpaddd xmm5,xmm5,xmm15 vpsrld xmm1,xmm12,11 vpxor xmm7,xmm7,xmm2 vpslld xmm2,xmm12,21 vpaddd xmm5,xmm5,XMMWORD[((-128))+rbp] vpxor xmm7,xmm7,xmm1 vpsrld xmm1,xmm12,25 vpxor xmm7,xmm7,xmm2 vpslld xmm2,xmm12,7 vpandn xmm0,xmm12,xmm14 vpand xmm3,xmm12,xmm13 vpxor xmm7,xmm7,xmm1 vpsrld xmm15,xmm8,2 vpxor xmm7,xmm7,xmm2 vpslld xmm1,xmm8,30 vpxor xmm0,xmm0,xmm3 vpxor xmm3,xmm9,xmm8 vpxor xmm15,xmm15,xmm1 vpaddd xmm5,xmm5,xmm7 vpsrld xmm1,xmm8,13 vpslld xmm2,xmm8,19 vpaddd xmm5,xmm5,xmm0 vpand xmm4,xmm4,xmm3 vpxor xmm7,xmm15,xmm1 vpsrld xmm1,xmm8,22 vpxor xmm7,xmm7,xmm2 vpslld xmm2,xmm8,10 vpxor xmm15,xmm9,xmm4 vpaddd xmm11,xmm11,xmm5 vpxor xmm7,xmm7,xmm1 vpxor xmm7,xmm7,xmm2 vpaddd xmm15,xmm15,xmm5 vpaddd xmm15,xmm15,xmm7 vmovd xmm5,DWORD[4+r8] vmovd xmm0,DWORD[4+r9] vpinsrd xmm5,xmm5,DWORD[4+r10],1 vpinsrd xmm0,xmm0,DWORD[4+r11],1 vpunpckldq xmm5,xmm5,xmm0 vpshufb xmm5,xmm5,xmm6 vpsrld xmm7,xmm11,6 vpslld xmm2,xmm11,26 vmovdqu XMMWORD[(16-128)+rax],xmm5 vpaddd xmm5,xmm5,xmm14 vpsrld xmm1,xmm11,11 vpxor xmm7,xmm7,xmm2 vpslld xmm2,xmm11,21 vpaddd xmm5,xmm5,XMMWORD[((-96))+rbp] vpxor xmm7,xmm7,xmm1 vpsrld xmm1,xmm11,25 vpxor xmm7,xmm7,xmm2 vpslld xmm2,xmm11,7 vpandn xmm0,xmm11,xmm13 vpand xmm4,xmm11,xmm12 vpxor xmm7,xmm7,xmm1 vpsrld xmm14,xmm15,2 vpxor xmm7,xmm7,xmm2 vpslld xmm1,xmm15,30 vpxor xmm0,xmm0,xmm4 vpxor xmm4,xmm8,xmm15 vpxor xmm14,xmm14,xmm1 vpaddd xmm5,xmm5,xmm7 vpsrld xmm1,xmm15,13 vpslld xmm2,xmm15,19 vpaddd xmm5,xmm5,xmm0 vpand xmm3,xmm3,xmm4 vpxor xmm7,xmm14,xmm1 vpsrld xmm1,xmm15,22 vpxor xmm7,xmm7,xmm2 vpslld xmm2,xmm15,10 vpxor xmm14,xmm8,xmm3 vpaddd xmm10,xmm10,xmm5 vpxor xmm7,xmm7,xmm1 vpxor xmm7,xmm7,xmm2 vpaddd xmm14,xmm14,xmm5 vpaddd xmm14,xmm14,xmm7 vmovd xmm5,DWORD[8+r8] vmovd xmm0,DWORD[8+r9] vpinsrd xmm5,xmm5,DWORD[8+r10],1 vpinsrd xmm0,xmm0,DWORD[8+r11],1 vpunpckldq xmm5,xmm5,xmm0 vpshufb xmm5,xmm5,xmm6 vpsrld xmm7,xmm10,6 vpslld xmm2,xmm10,26 vmovdqu XMMWORD[(32-128)+rax],xmm5 vpaddd xmm5,xmm5,xmm13 vpsrld xmm1,xmm10,11 vpxor xmm7,xmm7,xmm2 vpslld xmm2,xmm10,21 vpaddd xmm5,xmm5,XMMWORD[((-64))+rbp] vpxor xmm7,xmm7,xmm1 vpsrld xmm1,xmm10,25 vpxor xmm7,xmm7,xmm2 vpslld xmm2,xmm10,7 vpandn xmm0,xmm10,xmm12 vpand xmm3,xmm10,xmm11 vpxor xmm7,xmm7,xmm1 vpsrld xmm13,xmm14,2 vpxor xmm7,xmm7,xmm2 vpslld xmm1,xmm14,30 vpxor xmm0,xmm0,xmm3 vpxor xmm3,xmm15,xmm14 vpxor xmm13,xmm13,xmm1 vpaddd xmm5,xmm5,xmm7 vpsrld xmm1,xmm14,13 vpslld xmm2,xmm14,19 vpaddd xmm5,xmm5,xmm0 vpand xmm4,xmm4,xmm3 vpxor xmm7,xmm13,xmm1 vpsrld xmm1,xmm14,22 vpxor xmm7,xmm7,xmm2 vpslld xmm2,xmm14,10 vpxor xmm13,xmm15,xmm4 vpaddd xmm9,xmm9,xmm5 vpxor xmm7,xmm7,xmm1 vpxor xmm7,xmm7,xmm2 vpaddd xmm13,xmm13,xmm5 vpaddd xmm13,xmm13,xmm7 vmovd xmm5,DWORD[12+r8] vmovd xmm0,DWORD[12+r9] vpinsrd xmm5,xmm5,DWORD[12+r10],1 vpinsrd xmm0,xmm0,DWORD[12+r11],1 vpunpckldq xmm5,xmm5,xmm0 vpshufb xmm5,xmm5,xmm6 vpsrld xmm7,xmm9,6 vpslld xmm2,xmm9,26 vmovdqu XMMWORD[(48-128)+rax],xmm5 vpaddd xmm5,xmm5,xmm12 vpsrld xmm1,xmm9,11 vpxor xmm7,xmm7,xmm2 vpslld xmm2,xmm9,21 vpaddd xmm5,xmm5,XMMWORD[((-32))+rbp] vpxor xmm7,xmm7,xmm1 vpsrld xmm1,xmm9,25 vpxor xmm7,xmm7,xmm2 vpslld xmm2,xmm9,7 vpandn xmm0,xmm9,xmm11 vpand xmm4,xmm9,xmm10 vpxor xmm7,xmm7,xmm1 vpsrld xmm12,xmm13,2 vpxor xmm7,xmm7,xmm2 vpslld xmm1,xmm13,30 vpxor xmm0,xmm0,xmm4 vpxor xmm4,xmm14,xmm13 vpxor xmm12,xmm12,xmm1 vpaddd xmm5,xmm5,xmm7 vpsrld xmm1,xmm13,13 vpslld xmm2,xmm13,19 vpaddd xmm5,xmm5,xmm0 vpand xmm3,xmm3,xmm4 vpxor xmm7,xmm12,xmm1 vpsrld xmm1,xmm13,22 vpxor xmm7,xmm7,xmm2 vpslld xmm2,xmm13,10 vpxor xmm12,xmm14,xmm3 vpaddd xmm8,xmm8,xmm5 vpxor xmm7,xmm7,xmm1 vpxor xmm7,xmm7,xmm2 vpaddd xmm12,xmm12,xmm5 vpaddd xmm12,xmm12,xmm7 vmovd xmm5,DWORD[16+r8] vmovd xmm0,DWORD[16+r9] vpinsrd xmm5,xmm5,DWORD[16+r10],1 vpinsrd xmm0,xmm0,DWORD[16+r11],1 vpunpckldq xmm5,xmm5,xmm0 vpshufb xmm5,xmm5,xmm6 vpsrld xmm7,xmm8,6 vpslld xmm2,xmm8,26 vmovdqu XMMWORD[(64-128)+rax],xmm5 vpaddd xmm5,xmm5,xmm11 vpsrld xmm1,xmm8,11 vpxor xmm7,xmm7,xmm2 vpslld xmm2,xmm8,21 vpaddd xmm5,xmm5,XMMWORD[rbp] vpxor xmm7,xmm7,xmm1 vpsrld xmm1,xmm8,25 vpxor xmm7,xmm7,xmm2 vpslld xmm2,xmm8,7 vpandn xmm0,xmm8,xmm10 vpand xmm3,xmm8,xmm9 vpxor xmm7,xmm7,xmm1 vpsrld xmm11,xmm12,2 vpxor xmm7,xmm7,xmm2 vpslld xmm1,xmm12,30 vpxor xmm0,xmm0,xmm3 vpxor xmm3,xmm13,xmm12 vpxor xmm11,xmm11,xmm1 vpaddd xmm5,xmm5,xmm7 vpsrld xmm1,xmm12,13 vpslld xmm2,xmm12,19 vpaddd xmm5,xmm5,xmm0 vpand xmm4,xmm4,xmm3 vpxor xmm7,xmm11,xmm1 vpsrld xmm1,xmm12,22 vpxor xmm7,xmm7,xmm2 vpslld xmm2,xmm12,10 vpxor xmm11,xmm13,xmm4 vpaddd xmm15,xmm15,xmm5 vpxor xmm7,xmm7,xmm1 vpxor xmm7,xmm7,xmm2 vpaddd xmm11,xmm11,xmm5 vpaddd xmm11,xmm11,xmm7 vmovd xmm5,DWORD[20+r8] vmovd xmm0,DWORD[20+r9] vpinsrd xmm5,xmm5,DWORD[20+r10],1 vpinsrd xmm0,xmm0,DWORD[20+r11],1 vpunpckldq xmm5,xmm5,xmm0 vpshufb xmm5,xmm5,xmm6 vpsrld xmm7,xmm15,6 vpslld xmm2,xmm15,26 vmovdqu XMMWORD[(80-128)+rax],xmm5 vpaddd xmm5,xmm5,xmm10 vpsrld xmm1,xmm15,11 vpxor xmm7,xmm7,xmm2 vpslld xmm2,xmm15,21 vpaddd xmm5,xmm5,XMMWORD[32+rbp] vpxor xmm7,xmm7,xmm1 vpsrld xmm1,xmm15,25 vpxor xmm7,xmm7,xmm2 vpslld xmm2,xmm15,7 vpandn xmm0,xmm15,xmm9 vpand xmm4,xmm15,xmm8 vpxor xmm7,xmm7,xmm1 vpsrld xmm10,xmm11,2 vpxor xmm7,xmm7,xmm2 vpslld xmm1,xmm11,30 vpxor xmm0,xmm0,xmm4 vpxor xmm4,xmm12,xmm11 vpxor xmm10,xmm10,xmm1 vpaddd xmm5,xmm5,xmm7 vpsrld xmm1,xmm11,13 vpslld xmm2,xmm11,19 vpaddd xmm5,xmm5,xmm0 vpand xmm3,xmm3,xmm4 vpxor xmm7,xmm10,xmm1 vpsrld xmm1,xmm11,22 vpxor xmm7,xmm7,xmm2 vpslld xmm2,xmm11,10 vpxor xmm10,xmm12,xmm3 vpaddd xmm14,xmm14,xmm5 vpxor xmm7,xmm7,xmm1 vpxor xmm7,xmm7,xmm2 vpaddd xmm10,xmm10,xmm5 vpaddd xmm10,xmm10,xmm7 vmovd xmm5,DWORD[24+r8] vmovd xmm0,DWORD[24+r9] vpinsrd xmm5,xmm5,DWORD[24+r10],1 vpinsrd xmm0,xmm0,DWORD[24+r11],1 vpunpckldq xmm5,xmm5,xmm0 vpshufb xmm5,xmm5,xmm6 vpsrld xmm7,xmm14,6 vpslld xmm2,xmm14,26 vmovdqu XMMWORD[(96-128)+rax],xmm5 vpaddd xmm5,xmm5,xmm9 vpsrld xmm1,xmm14,11 vpxor xmm7,xmm7,xmm2 vpslld xmm2,xmm14,21 vpaddd xmm5,xmm5,XMMWORD[64+rbp] vpxor xmm7,xmm7,xmm1 vpsrld xmm1,xmm14,25 vpxor xmm7,xmm7,xmm2 vpslld xmm2,xmm14,7 vpandn xmm0,xmm14,xmm8 vpand xmm3,xmm14,xmm15 vpxor xmm7,xmm7,xmm1 vpsrld xmm9,xmm10,2 vpxor xmm7,xmm7,xmm2 vpslld xmm1,xmm10,30 vpxor xmm0,xmm0,xmm3 vpxor xmm3,xmm11,xmm10 vpxor xmm9,xmm9,xmm1 vpaddd xmm5,xmm5,xmm7 vpsrld xmm1,xmm10,13 vpslld xmm2,xmm10,19 vpaddd xmm5,xmm5,xmm0 vpand xmm4,xmm4,xmm3 vpxor xmm7,xmm9,xmm1 vpsrld xmm1,xmm10,22 vpxor xmm7,xmm7,xmm2 vpslld xmm2,xmm10,10 vpxor xmm9,xmm11,xmm4 vpaddd xmm13,xmm13,xmm5 vpxor xmm7,xmm7,xmm1 vpxor xmm7,xmm7,xmm2 vpaddd xmm9,xmm9,xmm5 vpaddd xmm9,xmm9,xmm7 vmovd xmm5,DWORD[28+r8] vmovd xmm0,DWORD[28+r9] vpinsrd xmm5,xmm5,DWORD[28+r10],1 vpinsrd xmm0,xmm0,DWORD[28+r11],1 vpunpckldq xmm5,xmm5,xmm0 vpshufb xmm5,xmm5,xmm6 vpsrld xmm7,xmm13,6 vpslld xmm2,xmm13,26 vmovdqu XMMWORD[(112-128)+rax],xmm5 vpaddd xmm5,xmm5,xmm8 vpsrld xmm1,xmm13,11 vpxor xmm7,xmm7,xmm2 vpslld xmm2,xmm13,21 vpaddd xmm5,xmm5,XMMWORD[96+rbp] vpxor xmm7,xmm7,xmm1 vpsrld xmm1,xmm13,25 vpxor xmm7,xmm7,xmm2 vpslld xmm2,xmm13,7 vpandn xmm0,xmm13,xmm15 vpand xmm4,xmm13,xmm14 vpxor xmm7,xmm7,xmm1 vpsrld xmm8,xmm9,2 vpxor xmm7,xmm7,xmm2 vpslld xmm1,xmm9,30 vpxor xmm0,xmm0,xmm4 vpxor xmm4,xmm10,xmm9 vpxor xmm8,xmm8,xmm1 vpaddd xmm5,xmm5,xmm7 vpsrld xmm1,xmm9,13 vpslld xmm2,xmm9,19 vpaddd xmm5,xmm5,xmm0 vpand xmm3,xmm3,xmm4 vpxor xmm7,xmm8,xmm1 vpsrld xmm1,xmm9,22 vpxor xmm7,xmm7,xmm2 vpslld xmm2,xmm9,10 vpxor xmm8,xmm10,xmm3 vpaddd xmm12,xmm12,xmm5 vpxor xmm7,xmm7,xmm1 vpxor xmm7,xmm7,xmm2 vpaddd xmm8,xmm8,xmm5 vpaddd xmm8,xmm8,xmm7 add rbp,256 vmovd xmm5,DWORD[32+r8] vmovd xmm0,DWORD[32+r9] vpinsrd xmm5,xmm5,DWORD[32+r10],1 vpinsrd xmm0,xmm0,DWORD[32+r11],1 vpunpckldq xmm5,xmm5,xmm0 vpshufb xmm5,xmm5,xmm6 vpsrld xmm7,xmm12,6 vpslld xmm2,xmm12,26 vmovdqu XMMWORD[(128-128)+rax],xmm5 vpaddd xmm5,xmm5,xmm15 vpsrld xmm1,xmm12,11 vpxor xmm7,xmm7,xmm2 vpslld xmm2,xmm12,21 vpaddd xmm5,xmm5,XMMWORD[((-128))+rbp] vpxor xmm7,xmm7,xmm1 vpsrld xmm1,xmm12,25 vpxor xmm7,xmm7,xmm2 vpslld xmm2,xmm12,7 vpandn xmm0,xmm12,xmm14 vpand xmm3,xmm12,xmm13 vpxor xmm7,xmm7,xmm1 vpsrld xmm15,xmm8,2 vpxor xmm7,xmm7,xmm2 vpslld xmm1,xmm8,30 vpxor xmm0,xmm0,xmm3 vpxor xmm3,xmm9,xmm8 vpxor xmm15,xmm15,xmm1 vpaddd xmm5,xmm5,xmm7 vpsrld xmm1,xmm8,13 vpslld xmm2,xmm8,19 vpaddd xmm5,xmm5,xmm0 vpand xmm4,xmm4,xmm3 vpxor xmm7,xmm15,xmm1 vpsrld xmm1,xmm8,22 vpxor xmm7,xmm7,xmm2 vpslld xmm2,xmm8,10 vpxor xmm15,xmm9,xmm4 vpaddd xmm11,xmm11,xmm5 vpxor xmm7,xmm7,xmm1 vpxor xmm7,xmm7,xmm2 vpaddd xmm15,xmm15,xmm5 vpaddd xmm15,xmm15,xmm7 vmovd xmm5,DWORD[36+r8] vmovd xmm0,DWORD[36+r9] vpinsrd xmm5,xmm5,DWORD[36+r10],1 vpinsrd xmm0,xmm0,DWORD[36+r11],1 vpunpckldq xmm5,xmm5,xmm0 vpshufb xmm5,xmm5,xmm6 vpsrld xmm7,xmm11,6 vpslld xmm2,xmm11,26 vmovdqu XMMWORD[(144-128)+rax],xmm5 vpaddd xmm5,xmm5,xmm14 vpsrld xmm1,xmm11,11 vpxor xmm7,xmm7,xmm2 vpslld xmm2,xmm11,21 vpaddd xmm5,xmm5,XMMWORD[((-96))+rbp] vpxor xmm7,xmm7,xmm1 vpsrld xmm1,xmm11,25 vpxor xmm7,xmm7,xmm2 vpslld xmm2,xmm11,7 vpandn xmm0,xmm11,xmm13 vpand xmm4,xmm11,xmm12 vpxor xmm7,xmm7,xmm1 vpsrld xmm14,xmm15,2 vpxor xmm7,xmm7,xmm2 vpslld xmm1,xmm15,30 vpxor xmm0,xmm0,xmm4 vpxor xmm4,xmm8,xmm15 vpxor xmm14,xmm14,xmm1 vpaddd xmm5,xmm5,xmm7 vpsrld xmm1,xmm15,13 vpslld xmm2,xmm15,19 vpaddd xmm5,xmm5,xmm0 vpand xmm3,xmm3,xmm4 vpxor xmm7,xmm14,xmm1 vpsrld xmm1,xmm15,22 vpxor xmm7,xmm7,xmm2 vpslld xmm2,xmm15,10 vpxor xmm14,xmm8,xmm3 vpaddd xmm10,xmm10,xmm5 vpxor xmm7,xmm7,xmm1 vpxor xmm7,xmm7,xmm2 vpaddd xmm14,xmm14,xmm5 vpaddd xmm14,xmm14,xmm7 vmovd xmm5,DWORD[40+r8] vmovd xmm0,DWORD[40+r9] vpinsrd xmm5,xmm5,DWORD[40+r10],1 vpinsrd xmm0,xmm0,DWORD[40+r11],1 vpunpckldq xmm5,xmm5,xmm0 vpshufb xmm5,xmm5,xmm6 vpsrld xmm7,xmm10,6 vpslld xmm2,xmm10,26 vmovdqu XMMWORD[(160-128)+rax],xmm5 vpaddd xmm5,xmm5,xmm13 vpsrld xmm1,xmm10,11 vpxor xmm7,xmm7,xmm2 vpslld xmm2,xmm10,21 vpaddd xmm5,xmm5,XMMWORD[((-64))+rbp] vpxor xmm7,xmm7,xmm1 vpsrld xmm1,xmm10,25 vpxor xmm7,xmm7,xmm2 vpslld xmm2,xmm10,7 vpandn xmm0,xmm10,xmm12 vpand xmm3,xmm10,xmm11 vpxor xmm7,xmm7,xmm1 vpsrld xmm13,xmm14,2 vpxor xmm7,xmm7,xmm2 vpslld xmm1,xmm14,30 vpxor xmm0,xmm0,xmm3 vpxor xmm3,xmm15,xmm14 vpxor xmm13,xmm13,xmm1 vpaddd xmm5,xmm5,xmm7 vpsrld xmm1,xmm14,13 vpslld xmm2,xmm14,19 vpaddd xmm5,xmm5,xmm0 vpand xmm4,xmm4,xmm3 vpxor xmm7,xmm13,xmm1 vpsrld xmm1,xmm14,22 vpxor xmm7,xmm7,xmm2 vpslld xmm2,xmm14,10 vpxor xmm13,xmm15,xmm4 vpaddd xmm9,xmm9,xmm5 vpxor xmm7,xmm7,xmm1 vpxor xmm7,xmm7,xmm2 vpaddd xmm13,xmm13,xmm5 vpaddd xmm13,xmm13,xmm7 vmovd xmm5,DWORD[44+r8] vmovd xmm0,DWORD[44+r9] vpinsrd xmm5,xmm5,DWORD[44+r10],1 vpinsrd xmm0,xmm0,DWORD[44+r11],1 vpunpckldq xmm5,xmm5,xmm0 vpshufb xmm5,xmm5,xmm6 vpsrld xmm7,xmm9,6 vpslld xmm2,xmm9,26 vmovdqu XMMWORD[(176-128)+rax],xmm5 vpaddd xmm5,xmm5,xmm12 vpsrld xmm1,xmm9,11 vpxor xmm7,xmm7,xmm2 vpslld xmm2,xmm9,21 vpaddd xmm5,xmm5,XMMWORD[((-32))+rbp] vpxor xmm7,xmm7,xmm1 vpsrld xmm1,xmm9,25 vpxor xmm7,xmm7,xmm2 vpslld xmm2,xmm9,7 vpandn xmm0,xmm9,xmm11 vpand xmm4,xmm9,xmm10 vpxor xmm7,xmm7,xmm1 vpsrld xmm12,xmm13,2 vpxor xmm7,xmm7,xmm2 vpslld xmm1,xmm13,30 vpxor xmm0,xmm0,xmm4 vpxor xmm4,xmm14,xmm13 vpxor xmm12,xmm12,xmm1 vpaddd xmm5,xmm5,xmm7 vpsrld xmm1,xmm13,13 vpslld xmm2,xmm13,19 vpaddd xmm5,xmm5,xmm0 vpand xmm3,xmm3,xmm4 vpxor xmm7,xmm12,xmm1 vpsrld xmm1,xmm13,22 vpxor xmm7,xmm7,xmm2 vpslld xmm2,xmm13,10 vpxor xmm12,xmm14,xmm3 vpaddd xmm8,xmm8,xmm5 vpxor xmm7,xmm7,xmm1 vpxor xmm7,xmm7,xmm2 vpaddd xmm12,xmm12,xmm5 vpaddd xmm12,xmm12,xmm7 vmovd xmm5,DWORD[48+r8] vmovd xmm0,DWORD[48+r9] vpinsrd xmm5,xmm5,DWORD[48+r10],1 vpinsrd xmm0,xmm0,DWORD[48+r11],1 vpunpckldq xmm5,xmm5,xmm0 vpshufb xmm5,xmm5,xmm6 vpsrld xmm7,xmm8,6 vpslld xmm2,xmm8,26 vmovdqu XMMWORD[(192-128)+rax],xmm5 vpaddd xmm5,xmm5,xmm11 vpsrld xmm1,xmm8,11 vpxor xmm7,xmm7,xmm2 vpslld xmm2,xmm8,21 vpaddd xmm5,xmm5,XMMWORD[rbp] vpxor xmm7,xmm7,xmm1 vpsrld xmm1,xmm8,25 vpxor xmm7,xmm7,xmm2 vpslld xmm2,xmm8,7 vpandn xmm0,xmm8,xmm10 vpand xmm3,xmm8,xmm9 vpxor xmm7,xmm7,xmm1 vpsrld xmm11,xmm12,2 vpxor xmm7,xmm7,xmm2 vpslld xmm1,xmm12,30 vpxor xmm0,xmm0,xmm3 vpxor xmm3,xmm13,xmm12 vpxor xmm11,xmm11,xmm1 vpaddd xmm5,xmm5,xmm7 vpsrld xmm1,xmm12,13 vpslld xmm2,xmm12,19 vpaddd xmm5,xmm5,xmm0 vpand xmm4,xmm4,xmm3 vpxor xmm7,xmm11,xmm1 vpsrld xmm1,xmm12,22 vpxor xmm7,xmm7,xmm2 vpslld xmm2,xmm12,10 vpxor xmm11,xmm13,xmm4 vpaddd xmm15,xmm15,xmm5 vpxor xmm7,xmm7,xmm1 vpxor xmm7,xmm7,xmm2 vpaddd xmm11,xmm11,xmm5 vpaddd xmm11,xmm11,xmm7 vmovd xmm5,DWORD[52+r8] vmovd xmm0,DWORD[52+r9] vpinsrd xmm5,xmm5,DWORD[52+r10],1 vpinsrd xmm0,xmm0,DWORD[52+r11],1 vpunpckldq xmm5,xmm5,xmm0 vpshufb xmm5,xmm5,xmm6 vpsrld xmm7,xmm15,6 vpslld xmm2,xmm15,26 vmovdqu XMMWORD[(208-128)+rax],xmm5 vpaddd xmm5,xmm5,xmm10 vpsrld xmm1,xmm15,11 vpxor xmm7,xmm7,xmm2 vpslld xmm2,xmm15,21 vpaddd xmm5,xmm5,XMMWORD[32+rbp] vpxor xmm7,xmm7,xmm1 vpsrld xmm1,xmm15,25 vpxor xmm7,xmm7,xmm2 vpslld xmm2,xmm15,7 vpandn xmm0,xmm15,xmm9 vpand xmm4,xmm15,xmm8 vpxor xmm7,xmm7,xmm1 vpsrld xmm10,xmm11,2 vpxor xmm7,xmm7,xmm2 vpslld xmm1,xmm11,30 vpxor xmm0,xmm0,xmm4 vpxor xmm4,xmm12,xmm11 vpxor xmm10,xmm10,xmm1 vpaddd xmm5,xmm5,xmm7 vpsrld xmm1,xmm11,13 vpslld xmm2,xmm11,19 vpaddd xmm5,xmm5,xmm0 vpand xmm3,xmm3,xmm4 vpxor xmm7,xmm10,xmm1 vpsrld xmm1,xmm11,22 vpxor xmm7,xmm7,xmm2 vpslld xmm2,xmm11,10 vpxor xmm10,xmm12,xmm3 vpaddd xmm14,xmm14,xmm5 vpxor xmm7,xmm7,xmm1 vpxor xmm7,xmm7,xmm2 vpaddd xmm10,xmm10,xmm5 vpaddd xmm10,xmm10,xmm7 vmovd xmm5,DWORD[56+r8] vmovd xmm0,DWORD[56+r9] vpinsrd xmm5,xmm5,DWORD[56+r10],1 vpinsrd xmm0,xmm0,DWORD[56+r11],1 vpunpckldq xmm5,xmm5,xmm0 vpshufb xmm5,xmm5,xmm6 vpsrld xmm7,xmm14,6 vpslld xmm2,xmm14,26 vmovdqu XMMWORD[(224-128)+rax],xmm5 vpaddd xmm5,xmm5,xmm9 vpsrld xmm1,xmm14,11 vpxor xmm7,xmm7,xmm2 vpslld xmm2,xmm14,21 vpaddd xmm5,xmm5,XMMWORD[64+rbp] vpxor xmm7,xmm7,xmm1 vpsrld xmm1,xmm14,25 vpxor xmm7,xmm7,xmm2 vpslld xmm2,xmm14,7 vpandn xmm0,xmm14,xmm8 vpand xmm3,xmm14,xmm15 vpxor xmm7,xmm7,xmm1 vpsrld xmm9,xmm10,2 vpxor xmm7,xmm7,xmm2 vpslld xmm1,xmm10,30 vpxor xmm0,xmm0,xmm3 vpxor xmm3,xmm11,xmm10 vpxor xmm9,xmm9,xmm1 vpaddd xmm5,xmm5,xmm7 vpsrld xmm1,xmm10,13 vpslld xmm2,xmm10,19 vpaddd xmm5,xmm5,xmm0 vpand xmm4,xmm4,xmm3 vpxor xmm7,xmm9,xmm1 vpsrld xmm1,xmm10,22 vpxor xmm7,xmm7,xmm2 vpslld xmm2,xmm10,10 vpxor xmm9,xmm11,xmm4 vpaddd xmm13,xmm13,xmm5 vpxor xmm7,xmm7,xmm1 vpxor xmm7,xmm7,xmm2 vpaddd xmm9,xmm9,xmm5 vpaddd xmm9,xmm9,xmm7 vmovd xmm5,DWORD[60+r8] lea r8,[64+r8] vmovd xmm0,DWORD[60+r9] lea r9,[64+r9] vpinsrd xmm5,xmm5,DWORD[60+r10],1 lea r10,[64+r10] vpinsrd xmm0,xmm0,DWORD[60+r11],1 lea r11,[64+r11] vpunpckldq xmm5,xmm5,xmm0 vpshufb xmm5,xmm5,xmm6 vpsrld xmm7,xmm13,6 vpslld xmm2,xmm13,26 vmovdqu XMMWORD[(240-128)+rax],xmm5 vpaddd xmm5,xmm5,xmm8 vpsrld xmm1,xmm13,11 vpxor xmm7,xmm7,xmm2 vpslld xmm2,xmm13,21 vpaddd xmm5,xmm5,XMMWORD[96+rbp] vpxor xmm7,xmm7,xmm1 vpsrld xmm1,xmm13,25 vpxor xmm7,xmm7,xmm2 prefetcht0 [63+r8] vpslld xmm2,xmm13,7 vpandn xmm0,xmm13,xmm15 vpand xmm4,xmm13,xmm14 prefetcht0 [63+r9] vpxor xmm7,xmm7,xmm1 vpsrld xmm8,xmm9,2 vpxor xmm7,xmm7,xmm2 prefetcht0 [63+r10] vpslld xmm1,xmm9,30 vpxor xmm0,xmm0,xmm4 vpxor xmm4,xmm10,xmm9 prefetcht0 [63+r11] vpxor xmm8,xmm8,xmm1 vpaddd xmm5,xmm5,xmm7 vpsrld xmm1,xmm9,13 vpslld xmm2,xmm9,19 vpaddd xmm5,xmm5,xmm0 vpand xmm3,xmm3,xmm4 vpxor xmm7,xmm8,xmm1 vpsrld xmm1,xmm9,22 vpxor xmm7,xmm7,xmm2 vpslld xmm2,xmm9,10 vpxor xmm8,xmm10,xmm3 vpaddd xmm12,xmm12,xmm5 vpxor xmm7,xmm7,xmm1 vpxor xmm7,xmm7,xmm2 vpaddd xmm8,xmm8,xmm5 vpaddd xmm8,xmm8,xmm7 add rbp,256 vmovdqu xmm5,XMMWORD[((0-128))+rax] mov ecx,3 jmp NEAR $L$oop_16_xx_avx ALIGN 32 $L$oop_16_xx_avx: vmovdqu xmm6,XMMWORD[((16-128))+rax] vpaddd xmm5,xmm5,XMMWORD[((144-128))+rax] vpsrld xmm7,xmm6,3 vpsrld xmm1,xmm6,7 vpslld xmm2,xmm6,25 vpxor xmm7,xmm7,xmm1 vpsrld xmm1,xmm6,18 vpxor xmm7,xmm7,xmm2 vpslld xmm2,xmm6,14 vmovdqu xmm0,XMMWORD[((224-128))+rax] vpsrld xmm3,xmm0,10 vpxor xmm7,xmm7,xmm1 vpsrld xmm1,xmm0,17 vpxor xmm7,xmm7,xmm2 vpslld xmm2,xmm0,15 vpaddd xmm5,xmm5,xmm7 vpxor xmm7,xmm3,xmm1 vpsrld xmm1,xmm0,19 vpxor xmm7,xmm7,xmm2 vpslld xmm2,xmm0,13 vpxor xmm7,xmm7,xmm1 vpxor xmm7,xmm7,xmm2 vpaddd xmm5,xmm5,xmm7 vpsrld xmm7,xmm12,6 vpslld xmm2,xmm12,26 vmovdqu XMMWORD[(0-128)+rax],xmm5 vpaddd xmm5,xmm5,xmm15 vpsrld xmm1,xmm12,11 vpxor xmm7,xmm7,xmm2 vpslld xmm2,xmm12,21 vpaddd xmm5,xmm5,XMMWORD[((-128))+rbp] vpxor xmm7,xmm7,xmm1 vpsrld xmm1,xmm12,25 vpxor xmm7,xmm7,xmm2 vpslld xmm2,xmm12,7 vpandn xmm0,xmm12,xmm14 vpand xmm3,xmm12,xmm13 vpxor xmm7,xmm7,xmm1 vpsrld xmm15,xmm8,2 vpxor xmm7,xmm7,xmm2 vpslld xmm1,xmm8,30 vpxor xmm0,xmm0,xmm3 vpxor xmm3,xmm9,xmm8 vpxor xmm15,xmm15,xmm1 vpaddd xmm5,xmm5,xmm7 vpsrld xmm1,xmm8,13 vpslld xmm2,xmm8,19 vpaddd xmm5,xmm5,xmm0 vpand xmm4,xmm4,xmm3 vpxor xmm7,xmm15,xmm1 vpsrld xmm1,xmm8,22 vpxor xmm7,xmm7,xmm2 vpslld xmm2,xmm8,10 vpxor xmm15,xmm9,xmm4 vpaddd xmm11,xmm11,xmm5 vpxor xmm7,xmm7,xmm1 vpxor xmm7,xmm7,xmm2 vpaddd xmm15,xmm15,xmm5 vpaddd xmm15,xmm15,xmm7 vmovdqu xmm5,XMMWORD[((32-128))+rax] vpaddd xmm6,xmm6,XMMWORD[((160-128))+rax] vpsrld xmm7,xmm5,3 vpsrld xmm1,xmm5,7 vpslld xmm2,xmm5,25 vpxor xmm7,xmm7,xmm1 vpsrld xmm1,xmm5,18 vpxor xmm7,xmm7,xmm2 vpslld xmm2,xmm5,14 vmovdqu xmm0,XMMWORD[((240-128))+rax] vpsrld xmm4,xmm0,10 vpxor xmm7,xmm7,xmm1 vpsrld xmm1,xmm0,17 vpxor xmm7,xmm7,xmm2 vpslld xmm2,xmm0,15 vpaddd xmm6,xmm6,xmm7 vpxor xmm7,xmm4,xmm1 vpsrld xmm1,xmm0,19 vpxor xmm7,xmm7,xmm2 vpslld xmm2,xmm0,13 vpxor xmm7,xmm7,xmm1 vpxor xmm7,xmm7,xmm2 vpaddd xmm6,xmm6,xmm7 vpsrld xmm7,xmm11,6 vpslld xmm2,xmm11,26 vmovdqu XMMWORD[(16-128)+rax],xmm6 vpaddd xmm6,xmm6,xmm14 vpsrld xmm1,xmm11,11 vpxor xmm7,xmm7,xmm2 vpslld xmm2,xmm11,21 vpaddd xmm6,xmm6,XMMWORD[((-96))+rbp] vpxor xmm7,xmm7,xmm1 vpsrld xmm1,xmm11,25 vpxor xmm7,xmm7,xmm2 vpslld xmm2,xmm11,7 vpandn xmm0,xmm11,xmm13 vpand xmm4,xmm11,xmm12 vpxor xmm7,xmm7,xmm1 vpsrld xmm14,xmm15,2 vpxor xmm7,xmm7,xmm2 vpslld xmm1,xmm15,30 vpxor xmm0,xmm0,xmm4 vpxor xmm4,xmm8,xmm15 vpxor xmm14,xmm14,xmm1 vpaddd xmm6,xmm6,xmm7 vpsrld xmm1,xmm15,13 vpslld xmm2,xmm15,19 vpaddd xmm6,xmm6,xmm0 vpand xmm3,xmm3,xmm4 vpxor xmm7,xmm14,xmm1 vpsrld xmm1,xmm15,22 vpxor xmm7,xmm7,xmm2 vpslld xmm2,xmm15,10 vpxor xmm14,xmm8,xmm3 vpaddd xmm10,xmm10,xmm6 vpxor xmm7,xmm7,xmm1 vpxor xmm7,xmm7,xmm2 vpaddd xmm14,xmm14,xmm6 vpaddd xmm14,xmm14,xmm7 vmovdqu xmm6,XMMWORD[((48-128))+rax] vpaddd xmm5,xmm5,XMMWORD[((176-128))+rax] vpsrld xmm7,xmm6,3 vpsrld xmm1,xmm6,7 vpslld xmm2,xmm6,25 vpxor xmm7,xmm7,xmm1 vpsrld xmm1,xmm6,18 vpxor xmm7,xmm7,xmm2 vpslld xmm2,xmm6,14 vmovdqu xmm0,XMMWORD[((0-128))+rax] vpsrld xmm3,xmm0,10 vpxor xmm7,xmm7,xmm1 vpsrld xmm1,xmm0,17 vpxor xmm7,xmm7,xmm2 vpslld xmm2,xmm0,15 vpaddd xmm5,xmm5,xmm7 vpxor xmm7,xmm3,xmm1 vpsrld xmm1,xmm0,19 vpxor xmm7,xmm7,xmm2 vpslld xmm2,xmm0,13 vpxor xmm7,xmm7,xmm1 vpxor xmm7,xmm7,xmm2 vpaddd xmm5,xmm5,xmm7 vpsrld xmm7,xmm10,6 vpslld xmm2,xmm10,26 vmovdqu XMMWORD[(32-128)+rax],xmm5 vpaddd xmm5,xmm5,xmm13 vpsrld xmm1,xmm10,11 vpxor xmm7,xmm7,xmm2 vpslld xmm2,xmm10,21 vpaddd xmm5,xmm5,XMMWORD[((-64))+rbp] vpxor xmm7,xmm7,xmm1 vpsrld xmm1,xmm10,25 vpxor xmm7,xmm7,xmm2 vpslld xmm2,xmm10,7 vpandn xmm0,xmm10,xmm12 vpand xmm3,xmm10,xmm11 vpxor xmm7,xmm7,xmm1 vpsrld xmm13,xmm14,2 vpxor xmm7,xmm7,xmm2 vpslld xmm1,xmm14,30 vpxor xmm0,xmm0,xmm3 vpxor xmm3,xmm15,xmm14 vpxor xmm13,xmm13,xmm1 vpaddd xmm5,xmm5,xmm7 vpsrld xmm1,xmm14,13 vpslld xmm2,xmm14,19 vpaddd xmm5,xmm5,xmm0 vpand xmm4,xmm4,xmm3 vpxor xmm7,xmm13,xmm1 vpsrld xmm1,xmm14,22 vpxor xmm7,xmm7,xmm2 vpslld xmm2,xmm14,10 vpxor xmm13,xmm15,xmm4 vpaddd xmm9,xmm9,xmm5 vpxor xmm7,xmm7,xmm1 vpxor xmm7,xmm7,xmm2 vpaddd xmm13,xmm13,xmm5 vpaddd xmm13,xmm13,xmm7 vmovdqu xmm5,XMMWORD[((64-128))+rax] vpaddd xmm6,xmm6,XMMWORD[((192-128))+rax] vpsrld xmm7,xmm5,3 vpsrld xmm1,xmm5,7 vpslld xmm2,xmm5,25 vpxor xmm7,xmm7,xmm1 vpsrld xmm1,xmm5,18 vpxor xmm7,xmm7,xmm2 vpslld xmm2,xmm5,14 vmovdqu xmm0,XMMWORD[((16-128))+rax] vpsrld xmm4,xmm0,10 vpxor xmm7,xmm7,xmm1 vpsrld xmm1,xmm0,17 vpxor xmm7,xmm7,xmm2 vpslld xmm2,xmm0,15 vpaddd xmm6,xmm6,xmm7 vpxor xmm7,xmm4,xmm1 vpsrld xmm1,xmm0,19 vpxor xmm7,xmm7,xmm2 vpslld xmm2,xmm0,13 vpxor xmm7,xmm7,xmm1 vpxor xmm7,xmm7,xmm2 vpaddd xmm6,xmm6,xmm7 vpsrld xmm7,xmm9,6 vpslld xmm2,xmm9,26 vmovdqu XMMWORD[(48-128)+rax],xmm6 vpaddd xmm6,xmm6,xmm12 vpsrld xmm1,xmm9,11 vpxor xmm7,xmm7,xmm2 vpslld xmm2,xmm9,21 vpaddd xmm6,xmm6,XMMWORD[((-32))+rbp] vpxor xmm7,xmm7,xmm1 vpsrld xmm1,xmm9,25 vpxor xmm7,xmm7,xmm2 vpslld xmm2,xmm9,7 vpandn xmm0,xmm9,xmm11 vpand xmm4,xmm9,xmm10 vpxor xmm7,xmm7,xmm1 vpsrld xmm12,xmm13,2 vpxor xmm7,xmm7,xmm2 vpslld xmm1,xmm13,30 vpxor xmm0,xmm0,xmm4 vpxor xmm4,xmm14,xmm13 vpxor xmm12,xmm12,xmm1 vpaddd xmm6,xmm6,xmm7 vpsrld xmm1,xmm13,13 vpslld xmm2,xmm13,19 vpaddd xmm6,xmm6,xmm0 vpand xmm3,xmm3,xmm4 vpxor xmm7,xmm12,xmm1 vpsrld xmm1,xmm13,22 vpxor xmm7,xmm7,xmm2 vpslld xmm2,xmm13,10 vpxor xmm12,xmm14,xmm3 vpaddd xmm8,xmm8,xmm6 vpxor xmm7,xmm7,xmm1 vpxor xmm7,xmm7,xmm2 vpaddd xmm12,xmm12,xmm6 vpaddd xmm12,xmm12,xmm7 vmovdqu xmm6,XMMWORD[((80-128))+rax] vpaddd xmm5,xmm5,XMMWORD[((208-128))+rax] vpsrld xmm7,xmm6,3 vpsrld xmm1,xmm6,7 vpslld xmm2,xmm6,25 vpxor xmm7,xmm7,xmm1 vpsrld xmm1,xmm6,18 vpxor xmm7,xmm7,xmm2 vpslld xmm2,xmm6,14 vmovdqu xmm0,XMMWORD[((32-128))+rax] vpsrld xmm3,xmm0,10 vpxor xmm7,xmm7,xmm1 vpsrld xmm1,xmm0,17 vpxor xmm7,xmm7,xmm2 vpslld xmm2,xmm0,15 vpaddd xmm5,xmm5,xmm7 vpxor xmm7,xmm3,xmm1 vpsrld xmm1,xmm0,19 vpxor xmm7,xmm7,xmm2 vpslld xmm2,xmm0,13 vpxor xmm7,xmm7,xmm1 vpxor xmm7,xmm7,xmm2 vpaddd xmm5,xmm5,xmm7 vpsrld xmm7,xmm8,6 vpslld xmm2,xmm8,26 vmovdqu XMMWORD[(64-128)+rax],xmm5 vpaddd xmm5,xmm5,xmm11 vpsrld xmm1,xmm8,11 vpxor xmm7,xmm7,xmm2 vpslld xmm2,xmm8,21 vpaddd xmm5,xmm5,XMMWORD[rbp] vpxor xmm7,xmm7,xmm1 vpsrld xmm1,xmm8,25 vpxor xmm7,xmm7,xmm2 vpslld xmm2,xmm8,7 vpandn xmm0,xmm8,xmm10 vpand xmm3,xmm8,xmm9 vpxor xmm7,xmm7,xmm1 vpsrld xmm11,xmm12,2 vpxor xmm7,xmm7,xmm2 vpslld xmm1,xmm12,30 vpxor xmm0,xmm0,xmm3 vpxor xmm3,xmm13,xmm12 vpxor xmm11,xmm11,xmm1 vpaddd xmm5,xmm5,xmm7 vpsrld xmm1,xmm12,13 vpslld xmm2,xmm12,19 vpaddd xmm5,xmm5,xmm0 vpand xmm4,xmm4,xmm3 vpxor xmm7,xmm11,xmm1 vpsrld xmm1,xmm12,22 vpxor xmm7,xmm7,xmm2 vpslld xmm2,xmm12,10 vpxor xmm11,xmm13,xmm4 vpaddd xmm15,xmm15,xmm5 vpxor xmm7,xmm7,xmm1 vpxor xmm7,xmm7,xmm2 vpaddd xmm11,xmm11,xmm5 vpaddd xmm11,xmm11,xmm7 vmovdqu xmm5,XMMWORD[((96-128))+rax] vpaddd xmm6,xmm6,XMMWORD[((224-128))+rax] vpsrld xmm7,xmm5,3 vpsrld xmm1,xmm5,7 vpslld xmm2,xmm5,25 vpxor xmm7,xmm7,xmm1 vpsrld xmm1,xmm5,18 vpxor xmm7,xmm7,xmm2 vpslld xmm2,xmm5,14 vmovdqu xmm0,XMMWORD[((48-128))+rax] vpsrld xmm4,xmm0,10 vpxor xmm7,xmm7,xmm1 vpsrld xmm1,xmm0,17 vpxor xmm7,xmm7,xmm2 vpslld xmm2,xmm0,15 vpaddd xmm6,xmm6,xmm7 vpxor xmm7,xmm4,xmm1 vpsrld xmm1,xmm0,19 vpxor xmm7,xmm7,xmm2 vpslld xmm2,xmm0,13 vpxor xmm7,xmm7,xmm1 vpxor xmm7,xmm7,xmm2 vpaddd xmm6,xmm6,xmm7 vpsrld xmm7,xmm15,6 vpslld xmm2,xmm15,26 vmovdqu XMMWORD[(80-128)+rax],xmm6 vpaddd xmm6,xmm6,xmm10 vpsrld xmm1,xmm15,11 vpxor xmm7,xmm7,xmm2 vpslld xmm2,xmm15,21 vpaddd xmm6,xmm6,XMMWORD[32+rbp] vpxor xmm7,xmm7,xmm1 vpsrld xmm1,xmm15,25 vpxor xmm7,xmm7,xmm2 vpslld xmm2,xmm15,7 vpandn xmm0,xmm15,xmm9 vpand xmm4,xmm15,xmm8 vpxor xmm7,xmm7,xmm1 vpsrld xmm10,xmm11,2 vpxor xmm7,xmm7,xmm2 vpslld xmm1,xmm11,30 vpxor xmm0,xmm0,xmm4 vpxor xmm4,xmm12,xmm11 vpxor xmm10,xmm10,xmm1 vpaddd xmm6,xmm6,xmm7 vpsrld xmm1,xmm11,13 vpslld xmm2,xmm11,19 vpaddd xmm6,xmm6,xmm0 vpand xmm3,xmm3,xmm4 vpxor xmm7,xmm10,xmm1 vpsrld xmm1,xmm11,22 vpxor xmm7,xmm7,xmm2 vpslld xmm2,xmm11,10 vpxor xmm10,xmm12,xmm3 vpaddd xmm14,xmm14,xmm6 vpxor xmm7,xmm7,xmm1 vpxor xmm7,xmm7,xmm2 vpaddd xmm10,xmm10,xmm6 vpaddd xmm10,xmm10,xmm7 vmovdqu xmm6,XMMWORD[((112-128))+rax] vpaddd xmm5,xmm5,XMMWORD[((240-128))+rax] vpsrld xmm7,xmm6,3 vpsrld xmm1,xmm6,7 vpslld xmm2,xmm6,25 vpxor xmm7,xmm7,xmm1 vpsrld xmm1,xmm6,18 vpxor xmm7,xmm7,xmm2 vpslld xmm2,xmm6,14 vmovdqu xmm0,XMMWORD[((64-128))+rax] vpsrld xmm3,xmm0,10 vpxor xmm7,xmm7,xmm1 vpsrld xmm1,xmm0,17 vpxor xmm7,xmm7,xmm2 vpslld xmm2,xmm0,15 vpaddd xmm5,xmm5,xmm7 vpxor xmm7,xmm3,xmm1 vpsrld xmm1,xmm0,19 vpxor xmm7,xmm7,xmm2 vpslld xmm2,xmm0,13 vpxor xmm7,xmm7,xmm1 vpxor xmm7,xmm7,xmm2 vpaddd xmm5,xmm5,xmm7 vpsrld xmm7,xmm14,6 vpslld xmm2,xmm14,26 vmovdqu XMMWORD[(96-128)+rax],xmm5 vpaddd xmm5,xmm5,xmm9 vpsrld xmm1,xmm14,11 vpxor xmm7,xmm7,xmm2 vpslld xmm2,xmm14,21 vpaddd xmm5,xmm5,XMMWORD[64+rbp] vpxor xmm7,xmm7,xmm1 vpsrld xmm1,xmm14,25 vpxor xmm7,xmm7,xmm2 vpslld xmm2,xmm14,7 vpandn xmm0,xmm14,xmm8 vpand xmm3,xmm14,xmm15 vpxor xmm7,xmm7,xmm1 vpsrld xmm9,xmm10,2 vpxor xmm7,xmm7,xmm2 vpslld xmm1,xmm10,30 vpxor xmm0,xmm0,xmm3 vpxor xmm3,xmm11,xmm10 vpxor xmm9,xmm9,xmm1 vpaddd xmm5,xmm5,xmm7 vpsrld xmm1,xmm10,13 vpslld xmm2,xmm10,19 vpaddd xmm5,xmm5,xmm0 vpand xmm4,xmm4,xmm3 vpxor xmm7,xmm9,xmm1 vpsrld xmm1,xmm10,22 vpxor xmm7,xmm7,xmm2 vpslld xmm2,xmm10,10 vpxor xmm9,xmm11,xmm4 vpaddd xmm13,xmm13,xmm5 vpxor xmm7,xmm7,xmm1 vpxor xmm7,xmm7,xmm2 vpaddd xmm9,xmm9,xmm5 vpaddd xmm9,xmm9,xmm7 vmovdqu xmm5,XMMWORD[((128-128))+rax] vpaddd xmm6,xmm6,XMMWORD[((0-128))+rax] vpsrld xmm7,xmm5,3 vpsrld xmm1,xmm5,7 vpslld xmm2,xmm5,25 vpxor xmm7,xmm7,xmm1 vpsrld xmm1,xmm5,18 vpxor xmm7,xmm7,xmm2 vpslld xmm2,xmm5,14 vmovdqu xmm0,XMMWORD[((80-128))+rax] vpsrld xmm4,xmm0,10 vpxor xmm7,xmm7,xmm1 vpsrld xmm1,xmm0,17 vpxor xmm7,xmm7,xmm2 vpslld xmm2,xmm0,15 vpaddd xmm6,xmm6,xmm7 vpxor xmm7,xmm4,xmm1 vpsrld xmm1,xmm0,19 vpxor xmm7,xmm7,xmm2 vpslld xmm2,xmm0,13 vpxor xmm7,xmm7,xmm1 vpxor xmm7,xmm7,xmm2 vpaddd xmm6,xmm6,xmm7 vpsrld xmm7,xmm13,6 vpslld xmm2,xmm13,26 vmovdqu XMMWORD[(112-128)+rax],xmm6 vpaddd xmm6,xmm6,xmm8 vpsrld xmm1,xmm13,11 vpxor xmm7,xmm7,xmm2 vpslld xmm2,xmm13,21 vpaddd xmm6,xmm6,XMMWORD[96+rbp] vpxor xmm7,xmm7,xmm1 vpsrld xmm1,xmm13,25 vpxor xmm7,xmm7,xmm2 vpslld xmm2,xmm13,7 vpandn xmm0,xmm13,xmm15 vpand xmm4,xmm13,xmm14 vpxor xmm7,xmm7,xmm1 vpsrld xmm8,xmm9,2 vpxor xmm7,xmm7,xmm2 vpslld xmm1,xmm9,30 vpxor xmm0,xmm0,xmm4 vpxor xmm4,xmm10,xmm9 vpxor xmm8,xmm8,xmm1 vpaddd xmm6,xmm6,xmm7 vpsrld xmm1,xmm9,13 vpslld xmm2,xmm9,19 vpaddd xmm6,xmm6,xmm0 vpand xmm3,xmm3,xmm4 vpxor xmm7,xmm8,xmm1 vpsrld xmm1,xmm9,22 vpxor xmm7,xmm7,xmm2 vpslld xmm2,xmm9,10 vpxor xmm8,xmm10,xmm3 vpaddd xmm12,xmm12,xmm6 vpxor xmm7,xmm7,xmm1 vpxor xmm7,xmm7,xmm2 vpaddd xmm8,xmm8,xmm6 vpaddd xmm8,xmm8,xmm7 add rbp,256 vmovdqu xmm6,XMMWORD[((144-128))+rax] vpaddd xmm5,xmm5,XMMWORD[((16-128))+rax] vpsrld xmm7,xmm6,3 vpsrld xmm1,xmm6,7 vpslld xmm2,xmm6,25 vpxor xmm7,xmm7,xmm1 vpsrld xmm1,xmm6,18 vpxor xmm7,xmm7,xmm2 vpslld xmm2,xmm6,14 vmovdqu xmm0,XMMWORD[((96-128))+rax] vpsrld xmm3,xmm0,10 vpxor xmm7,xmm7,xmm1 vpsrld xmm1,xmm0,17 vpxor xmm7,xmm7,xmm2 vpslld xmm2,xmm0,15 vpaddd xmm5,xmm5,xmm7 vpxor xmm7,xmm3,xmm1 vpsrld xmm1,xmm0,19 vpxor xmm7,xmm7,xmm2 vpslld xmm2,xmm0,13 vpxor xmm7,xmm7,xmm1 vpxor xmm7,xmm7,xmm2 vpaddd xmm5,xmm5,xmm7 vpsrld xmm7,xmm12,6 vpslld xmm2,xmm12,26 vmovdqu XMMWORD[(128-128)+rax],xmm5 vpaddd xmm5,xmm5,xmm15 vpsrld xmm1,xmm12,11 vpxor xmm7,xmm7,xmm2 vpslld xmm2,xmm12,21 vpaddd xmm5,xmm5,XMMWORD[((-128))+rbp] vpxor xmm7,xmm7,xmm1 vpsrld xmm1,xmm12,25 vpxor xmm7,xmm7,xmm2 vpslld xmm2,xmm12,7 vpandn xmm0,xmm12,xmm14 vpand xmm3,xmm12,xmm13 vpxor xmm7,xmm7,xmm1 vpsrld xmm15,xmm8,2 vpxor xmm7,xmm7,xmm2 vpslld xmm1,xmm8,30 vpxor xmm0,xmm0,xmm3 vpxor xmm3,xmm9,xmm8 vpxor xmm15,xmm15,xmm1 vpaddd xmm5,xmm5,xmm7 vpsrld xmm1,xmm8,13 vpslld xmm2,xmm8,19 vpaddd xmm5,xmm5,xmm0 vpand xmm4,xmm4,xmm3 vpxor xmm7,xmm15,xmm1 vpsrld xmm1,xmm8,22 vpxor xmm7,xmm7,xmm2 vpslld xmm2,xmm8,10 vpxor xmm15,xmm9,xmm4 vpaddd xmm11,xmm11,xmm5 vpxor xmm7,xmm7,xmm1 vpxor xmm7,xmm7,xmm2 vpaddd xmm15,xmm15,xmm5 vpaddd xmm15,xmm15,xmm7 vmovdqu xmm5,XMMWORD[((160-128))+rax] vpaddd xmm6,xmm6,XMMWORD[((32-128))+rax] vpsrld xmm7,xmm5,3 vpsrld xmm1,xmm5,7 vpslld xmm2,xmm5,25 vpxor xmm7,xmm7,xmm1 vpsrld xmm1,xmm5,18 vpxor xmm7,xmm7,xmm2 vpslld xmm2,xmm5,14 vmovdqu xmm0,XMMWORD[((112-128))+rax] vpsrld xmm4,xmm0,10 vpxor xmm7,xmm7,xmm1 vpsrld xmm1,xmm0,17 vpxor xmm7,xmm7,xmm2 vpslld xmm2,xmm0,15 vpaddd xmm6,xmm6,xmm7 vpxor xmm7,xmm4,xmm1 vpsrld xmm1,xmm0,19 vpxor xmm7,xmm7,xmm2 vpslld xmm2,xmm0,13 vpxor xmm7,xmm7,xmm1 vpxor xmm7,xmm7,xmm2 vpaddd xmm6,xmm6,xmm7 vpsrld xmm7,xmm11,6 vpslld xmm2,xmm11,26 vmovdqu XMMWORD[(144-128)+rax],xmm6 vpaddd xmm6,xmm6,xmm14 vpsrld xmm1,xmm11,11 vpxor xmm7,xmm7,xmm2 vpslld xmm2,xmm11,21 vpaddd xmm6,xmm6,XMMWORD[((-96))+rbp] vpxor xmm7,xmm7,xmm1 vpsrld xmm1,xmm11,25 vpxor xmm7,xmm7,xmm2 vpslld xmm2,xmm11,7 vpandn xmm0,xmm11,xmm13 vpand xmm4,xmm11,xmm12 vpxor xmm7,xmm7,xmm1 vpsrld xmm14,xmm15,2 vpxor xmm7,xmm7,xmm2 vpslld xmm1,xmm15,30 vpxor xmm0,xmm0,xmm4 vpxor xmm4,xmm8,xmm15 vpxor xmm14,xmm14,xmm1 vpaddd xmm6,xmm6,xmm7 vpsrld xmm1,xmm15,13 vpslld xmm2,xmm15,19 vpaddd xmm6,xmm6,xmm0 vpand xmm3,xmm3,xmm4 vpxor xmm7,xmm14,xmm1 vpsrld xmm1,xmm15,22 vpxor xmm7,xmm7,xmm2 vpslld xmm2,xmm15,10 vpxor xmm14,xmm8,xmm3 vpaddd xmm10,xmm10,xmm6 vpxor xmm7,xmm7,xmm1 vpxor xmm7,xmm7,xmm2 vpaddd xmm14,xmm14,xmm6 vpaddd xmm14,xmm14,xmm7 vmovdqu xmm6,XMMWORD[((176-128))+rax] vpaddd xmm5,xmm5,XMMWORD[((48-128))+rax] vpsrld xmm7,xmm6,3 vpsrld xmm1,xmm6,7 vpslld xmm2,xmm6,25 vpxor xmm7,xmm7,xmm1 vpsrld xmm1,xmm6,18 vpxor xmm7,xmm7,xmm2 vpslld xmm2,xmm6,14 vmovdqu xmm0,XMMWORD[((128-128))+rax] vpsrld xmm3,xmm0,10 vpxor xmm7,xmm7,xmm1 vpsrld xmm1,xmm0,17 vpxor xmm7,xmm7,xmm2 vpslld xmm2,xmm0,15 vpaddd xmm5,xmm5,xmm7 vpxor xmm7,xmm3,xmm1 vpsrld xmm1,xmm0,19 vpxor xmm7,xmm7,xmm2 vpslld xmm2,xmm0,13 vpxor xmm7,xmm7,xmm1 vpxor xmm7,xmm7,xmm2 vpaddd xmm5,xmm5,xmm7 vpsrld xmm7,xmm10,6 vpslld xmm2,xmm10,26 vmovdqu XMMWORD[(160-128)+rax],xmm5 vpaddd xmm5,xmm5,xmm13 vpsrld xmm1,xmm10,11 vpxor xmm7,xmm7,xmm2 vpslld xmm2,xmm10,21 vpaddd xmm5,xmm5,XMMWORD[((-64))+rbp] vpxor xmm7,xmm7,xmm1 vpsrld xmm1,xmm10,25 vpxor xmm7,xmm7,xmm2 vpslld xmm2,xmm10,7 vpandn xmm0,xmm10,xmm12 vpand xmm3,xmm10,xmm11 vpxor xmm7,xmm7,xmm1 vpsrld xmm13,xmm14,2 vpxor xmm7,xmm7,xmm2 vpslld xmm1,xmm14,30 vpxor xmm0,xmm0,xmm3 vpxor xmm3,xmm15,xmm14 vpxor xmm13,xmm13,xmm1 vpaddd xmm5,xmm5,xmm7 vpsrld xmm1,xmm14,13 vpslld xmm2,xmm14,19 vpaddd xmm5,xmm5,xmm0 vpand xmm4,xmm4,xmm3 vpxor xmm7,xmm13,xmm1 vpsrld xmm1,xmm14,22 vpxor xmm7,xmm7,xmm2 vpslld xmm2,xmm14,10 vpxor xmm13,xmm15,xmm4 vpaddd xmm9,xmm9,xmm5 vpxor xmm7,xmm7,xmm1 vpxor xmm7,xmm7,xmm2 vpaddd xmm13,xmm13,xmm5 vpaddd xmm13,xmm13,xmm7 vmovdqu xmm5,XMMWORD[((192-128))+rax] vpaddd xmm6,xmm6,XMMWORD[((64-128))+rax] vpsrld xmm7,xmm5,3 vpsrld xmm1,xmm5,7 vpslld xmm2,xmm5,25 vpxor xmm7,xmm7,xmm1 vpsrld xmm1,xmm5,18 vpxor xmm7,xmm7,xmm2 vpslld xmm2,xmm5,14 vmovdqu xmm0,XMMWORD[((144-128))+rax] vpsrld xmm4,xmm0,10 vpxor xmm7,xmm7,xmm1 vpsrld xmm1,xmm0,17 vpxor xmm7,xmm7,xmm2 vpslld xmm2,xmm0,15 vpaddd xmm6,xmm6,xmm7 vpxor xmm7,xmm4,xmm1 vpsrld xmm1,xmm0,19 vpxor xmm7,xmm7,xmm2 vpslld xmm2,xmm0,13 vpxor xmm7,xmm7,xmm1 vpxor xmm7,xmm7,xmm2 vpaddd xmm6,xmm6,xmm7 vpsrld xmm7,xmm9,6 vpslld xmm2,xmm9,26 vmovdqu XMMWORD[(176-128)+rax],xmm6 vpaddd xmm6,xmm6,xmm12 vpsrld xmm1,xmm9,11 vpxor xmm7,xmm7,xmm2 vpslld xmm2,xmm9,21 vpaddd xmm6,xmm6,XMMWORD[((-32))+rbp] vpxor xmm7,xmm7,xmm1 vpsrld xmm1,xmm9,25 vpxor xmm7,xmm7,xmm2 vpslld xmm2,xmm9,7 vpandn xmm0,xmm9,xmm11 vpand xmm4,xmm9,xmm10 vpxor xmm7,xmm7,xmm1 vpsrld xmm12,xmm13,2 vpxor xmm7,xmm7,xmm2 vpslld xmm1,xmm13,30 vpxor xmm0,xmm0,xmm4 vpxor xmm4,xmm14,xmm13 vpxor xmm12,xmm12,xmm1 vpaddd xmm6,xmm6,xmm7 vpsrld xmm1,xmm13,13 vpslld xmm2,xmm13,19 vpaddd xmm6,xmm6,xmm0 vpand xmm3,xmm3,xmm4 vpxor xmm7,xmm12,xmm1 vpsrld xmm1,xmm13,22 vpxor xmm7,xmm7,xmm2 vpslld xmm2,xmm13,10 vpxor xmm12,xmm14,xmm3 vpaddd xmm8,xmm8,xmm6 vpxor xmm7,xmm7,xmm1 vpxor xmm7,xmm7,xmm2 vpaddd xmm12,xmm12,xmm6 vpaddd xmm12,xmm12,xmm7 vmovdqu xmm6,XMMWORD[((208-128))+rax] vpaddd xmm5,xmm5,XMMWORD[((80-128))+rax] vpsrld xmm7,xmm6,3 vpsrld xmm1,xmm6,7 vpslld xmm2,xmm6,25 vpxor xmm7,xmm7,xmm1 vpsrld xmm1,xmm6,18 vpxor xmm7,xmm7,xmm2 vpslld xmm2,xmm6,14 vmovdqu xmm0,XMMWORD[((160-128))+rax] vpsrld xmm3,xmm0,10 vpxor xmm7,xmm7,xmm1 vpsrld xmm1,xmm0,17 vpxor xmm7,xmm7,xmm2 vpslld xmm2,xmm0,15 vpaddd xmm5,xmm5,xmm7 vpxor xmm7,xmm3,xmm1 vpsrld xmm1,xmm0,19 vpxor xmm7,xmm7,xmm2 vpslld xmm2,xmm0,13 vpxor xmm7,xmm7,xmm1 vpxor xmm7,xmm7,xmm2 vpaddd xmm5,xmm5,xmm7 vpsrld xmm7,xmm8,6 vpslld xmm2,xmm8,26 vmovdqu XMMWORD[(192-128)+rax],xmm5 vpaddd xmm5,xmm5,xmm11 vpsrld xmm1,xmm8,11 vpxor xmm7,xmm7,xmm2 vpslld xmm2,xmm8,21 vpaddd xmm5,xmm5,XMMWORD[rbp] vpxor xmm7,xmm7,xmm1 vpsrld xmm1,xmm8,25 vpxor xmm7,xmm7,xmm2 vpslld xmm2,xmm8,7 vpandn xmm0,xmm8,xmm10 vpand xmm3,xmm8,xmm9 vpxor xmm7,xmm7,xmm1 vpsrld xmm11,xmm12,2 vpxor xmm7,xmm7,xmm2 vpslld xmm1,xmm12,30 vpxor xmm0,xmm0,xmm3 vpxor xmm3,xmm13,xmm12 vpxor xmm11,xmm11,xmm1 vpaddd xmm5,xmm5,xmm7 vpsrld xmm1,xmm12,13 vpslld xmm2,xmm12,19 vpaddd xmm5,xmm5,xmm0 vpand xmm4,xmm4,xmm3 vpxor xmm7,xmm11,xmm1 vpsrld xmm1,xmm12,22 vpxor xmm7,xmm7,xmm2 vpslld xmm2,xmm12,10 vpxor xmm11,xmm13,xmm4 vpaddd xmm15,xmm15,xmm5 vpxor xmm7,xmm7,xmm1 vpxor xmm7,xmm7,xmm2 vpaddd xmm11,xmm11,xmm5 vpaddd xmm11,xmm11,xmm7 vmovdqu xmm5,XMMWORD[((224-128))+rax] vpaddd xmm6,xmm6,XMMWORD[((96-128))+rax] vpsrld xmm7,xmm5,3 vpsrld xmm1,xmm5,7 vpslld xmm2,xmm5,25 vpxor xmm7,xmm7,xmm1 vpsrld xmm1,xmm5,18 vpxor xmm7,xmm7,xmm2 vpslld xmm2,xmm5,14 vmovdqu xmm0,XMMWORD[((176-128))+rax] vpsrld xmm4,xmm0,10 vpxor xmm7,xmm7,xmm1 vpsrld xmm1,xmm0,17 vpxor xmm7,xmm7,xmm2 vpslld xmm2,xmm0,15 vpaddd xmm6,xmm6,xmm7 vpxor xmm7,xmm4,xmm1 vpsrld xmm1,xmm0,19 vpxor xmm7,xmm7,xmm2 vpslld xmm2,xmm0,13 vpxor xmm7,xmm7,xmm1 vpxor xmm7,xmm7,xmm2 vpaddd xmm6,xmm6,xmm7 vpsrld xmm7,xmm15,6 vpslld xmm2,xmm15,26 vmovdqu XMMWORD[(208-128)+rax],xmm6 vpaddd xmm6,xmm6,xmm10 vpsrld xmm1,xmm15,11 vpxor xmm7,xmm7,xmm2 vpslld xmm2,xmm15,21 vpaddd xmm6,xmm6,XMMWORD[32+rbp] vpxor xmm7,xmm7,xmm1 vpsrld xmm1,xmm15,25 vpxor xmm7,xmm7,xmm2 vpslld xmm2,xmm15,7 vpandn xmm0,xmm15,xmm9 vpand xmm4,xmm15,xmm8 vpxor xmm7,xmm7,xmm1 vpsrld xmm10,xmm11,2 vpxor xmm7,xmm7,xmm2 vpslld xmm1,xmm11,30 vpxor xmm0,xmm0,xmm4 vpxor xmm4,xmm12,xmm11 vpxor xmm10,xmm10,xmm1 vpaddd xmm6,xmm6,xmm7 vpsrld xmm1,xmm11,13 vpslld xmm2,xmm11,19 vpaddd xmm6,xmm6,xmm0 vpand xmm3,xmm3,xmm4 vpxor xmm7,xmm10,xmm1 vpsrld xmm1,xmm11,22 vpxor xmm7,xmm7,xmm2 vpslld xmm2,xmm11,10 vpxor xmm10,xmm12,xmm3 vpaddd xmm14,xmm14,xmm6 vpxor xmm7,xmm7,xmm1 vpxor xmm7,xmm7,xmm2 vpaddd xmm10,xmm10,xmm6 vpaddd xmm10,xmm10,xmm7 vmovdqu xmm6,XMMWORD[((240-128))+rax] vpaddd xmm5,xmm5,XMMWORD[((112-128))+rax] vpsrld xmm7,xmm6,3 vpsrld xmm1,xmm6,7 vpslld xmm2,xmm6,25 vpxor xmm7,xmm7,xmm1 vpsrld xmm1,xmm6,18 vpxor xmm7,xmm7,xmm2 vpslld xmm2,xmm6,14 vmovdqu xmm0,XMMWORD[((192-128))+rax] vpsrld xmm3,xmm0,10 vpxor xmm7,xmm7,xmm1 vpsrld xmm1,xmm0,17 vpxor xmm7,xmm7,xmm2 vpslld xmm2,xmm0,15 vpaddd xmm5,xmm5,xmm7 vpxor xmm7,xmm3,xmm1 vpsrld xmm1,xmm0,19 vpxor xmm7,xmm7,xmm2 vpslld xmm2,xmm0,13 vpxor xmm7,xmm7,xmm1 vpxor xmm7,xmm7,xmm2 vpaddd xmm5,xmm5,xmm7 vpsrld xmm7,xmm14,6 vpslld xmm2,xmm14,26 vmovdqu XMMWORD[(224-128)+rax],xmm5 vpaddd xmm5,xmm5,xmm9 vpsrld xmm1,xmm14,11 vpxor xmm7,xmm7,xmm2 vpslld xmm2,xmm14,21 vpaddd xmm5,xmm5,XMMWORD[64+rbp] vpxor xmm7,xmm7,xmm1 vpsrld xmm1,xmm14,25 vpxor xmm7,xmm7,xmm2 vpslld xmm2,xmm14,7 vpandn xmm0,xmm14,xmm8 vpand xmm3,xmm14,xmm15 vpxor xmm7,xmm7,xmm1 vpsrld xmm9,xmm10,2 vpxor xmm7,xmm7,xmm2 vpslld xmm1,xmm10,30 vpxor xmm0,xmm0,xmm3 vpxor xmm3,xmm11,xmm10 vpxor xmm9,xmm9,xmm1 vpaddd xmm5,xmm5,xmm7 vpsrld xmm1,xmm10,13 vpslld xmm2,xmm10,19 vpaddd xmm5,xmm5,xmm0 vpand xmm4,xmm4,xmm3 vpxor xmm7,xmm9,xmm1 vpsrld xmm1,xmm10,22 vpxor xmm7,xmm7,xmm2 vpslld xmm2,xmm10,10 vpxor xmm9,xmm11,xmm4 vpaddd xmm13,xmm13,xmm5 vpxor xmm7,xmm7,xmm1 vpxor xmm7,xmm7,xmm2 vpaddd xmm9,xmm9,xmm5 vpaddd xmm9,xmm9,xmm7 vmovdqu xmm5,XMMWORD[((0-128))+rax] vpaddd xmm6,xmm6,XMMWORD[((128-128))+rax] vpsrld xmm7,xmm5,3 vpsrld xmm1,xmm5,7 vpslld xmm2,xmm5,25 vpxor xmm7,xmm7,xmm1 vpsrld xmm1,xmm5,18 vpxor xmm7,xmm7,xmm2 vpslld xmm2,xmm5,14 vmovdqu xmm0,XMMWORD[((208-128))+rax] vpsrld xmm4,xmm0,10 vpxor xmm7,xmm7,xmm1 vpsrld xmm1,xmm0,17 vpxor xmm7,xmm7,xmm2 vpslld xmm2,xmm0,15 vpaddd xmm6,xmm6,xmm7 vpxor xmm7,xmm4,xmm1 vpsrld xmm1,xmm0,19 vpxor xmm7,xmm7,xmm2 vpslld xmm2,xmm0,13 vpxor xmm7,xmm7,xmm1 vpxor xmm7,xmm7,xmm2 vpaddd xmm6,xmm6,xmm7 vpsrld xmm7,xmm13,6 vpslld xmm2,xmm13,26 vmovdqu XMMWORD[(240-128)+rax],xmm6 vpaddd xmm6,xmm6,xmm8 vpsrld xmm1,xmm13,11 vpxor xmm7,xmm7,xmm2 vpslld xmm2,xmm13,21 vpaddd xmm6,xmm6,XMMWORD[96+rbp] vpxor xmm7,xmm7,xmm1 vpsrld xmm1,xmm13,25 vpxor xmm7,xmm7,xmm2 vpslld xmm2,xmm13,7 vpandn xmm0,xmm13,xmm15 vpand xmm4,xmm13,xmm14 vpxor xmm7,xmm7,xmm1 vpsrld xmm8,xmm9,2 vpxor xmm7,xmm7,xmm2 vpslld xmm1,xmm9,30 vpxor xmm0,xmm0,xmm4 vpxor xmm4,xmm10,xmm9 vpxor xmm8,xmm8,xmm1 vpaddd xmm6,xmm6,xmm7 vpsrld xmm1,xmm9,13 vpslld xmm2,xmm9,19 vpaddd xmm6,xmm6,xmm0 vpand xmm3,xmm3,xmm4 vpxor xmm7,xmm8,xmm1 vpsrld xmm1,xmm9,22 vpxor xmm7,xmm7,xmm2 vpslld xmm2,xmm9,10 vpxor xmm8,xmm10,xmm3 vpaddd xmm12,xmm12,xmm6 vpxor xmm7,xmm7,xmm1 vpxor xmm7,xmm7,xmm2 vpaddd xmm8,xmm8,xmm6 vpaddd xmm8,xmm8,xmm7 add rbp,256 dec ecx jnz NEAR $L$oop_16_xx_avx mov ecx,1 lea rbp,[((K256+128))] cmp ecx,DWORD[rbx] cmovge r8,rbp cmp ecx,DWORD[4+rbx] cmovge r9,rbp cmp ecx,DWORD[8+rbx] cmovge r10,rbp cmp ecx,DWORD[12+rbx] cmovge r11,rbp vmovdqa xmm7,XMMWORD[rbx] vpxor xmm0,xmm0,xmm0 vmovdqa xmm6,xmm7 vpcmpgtd xmm6,xmm6,xmm0 vpaddd xmm7,xmm7,xmm6 vmovdqu xmm0,XMMWORD[((0-128))+rdi] vpand xmm8,xmm8,xmm6 vmovdqu xmm1,XMMWORD[((32-128))+rdi] vpand xmm9,xmm9,xmm6 vmovdqu xmm2,XMMWORD[((64-128))+rdi] vpand xmm10,xmm10,xmm6 vmovdqu xmm5,XMMWORD[((96-128))+rdi] vpand xmm11,xmm11,xmm6 vpaddd xmm8,xmm8,xmm0 vmovdqu xmm0,XMMWORD[((128-128))+rdi] vpand xmm12,xmm12,xmm6 vpaddd xmm9,xmm9,xmm1 vmovdqu xmm1,XMMWORD[((160-128))+rdi] vpand xmm13,xmm13,xmm6 vpaddd xmm10,xmm10,xmm2 vmovdqu xmm2,XMMWORD[((192-128))+rdi] vpand xmm14,xmm14,xmm6 vpaddd xmm11,xmm11,xmm5 vmovdqu xmm5,XMMWORD[((224-128))+rdi] vpand xmm15,xmm15,xmm6 vpaddd xmm12,xmm12,xmm0 vpaddd xmm13,xmm13,xmm1 vmovdqu XMMWORD[(0-128)+rdi],xmm8 vpaddd xmm14,xmm14,xmm2 vmovdqu XMMWORD[(32-128)+rdi],xmm9 vpaddd xmm15,xmm15,xmm5 vmovdqu XMMWORD[(64-128)+rdi],xmm10 vmovdqu XMMWORD[(96-128)+rdi],xmm11 vmovdqu XMMWORD[(128-128)+rdi],xmm12 vmovdqu XMMWORD[(160-128)+rdi],xmm13 vmovdqu XMMWORD[(192-128)+rdi],xmm14 vmovdqu XMMWORD[(224-128)+rdi],xmm15 vmovdqu XMMWORD[rbx],xmm7 vmovdqu xmm6,XMMWORD[$L$pbswap] dec edx jnz NEAR $L$oop_avx mov edx,DWORD[280+rsp] lea rdi,[16+rdi] lea rsi,[64+rsi] dec edx jnz NEAR $L$oop_grande_avx $L$done_avx: mov rax,QWORD[272+rsp] vzeroupper movaps xmm6,XMMWORD[((-184))+rax] movaps xmm7,XMMWORD[((-168))+rax] movaps xmm8,XMMWORD[((-152))+rax] movaps xmm9,XMMWORD[((-136))+rax] movaps xmm10,XMMWORD[((-120))+rax] movaps xmm11,XMMWORD[((-104))+rax] movaps xmm12,XMMWORD[((-88))+rax] movaps xmm13,XMMWORD[((-72))+rax] movaps xmm14,XMMWORD[((-56))+rax] movaps xmm15,XMMWORD[((-40))+rax] mov rbp,QWORD[((-16))+rax] mov rbx,QWORD[((-8))+rax] lea rsp,[rax] $L$epilogue_avx: mov rdi,QWORD[8+rsp] ;WIN64 epilogue mov rsi,QWORD[16+rsp] DB 0F3h,0C3h ;repret $L$SEH_end_sha256_multi_block_avx: ALIGN 32 sha256_multi_block_avx2: mov QWORD[8+rsp],rdi ;WIN64 prologue mov QWORD[16+rsp],rsi mov rax,rsp $L$SEH_begin_sha256_multi_block_avx2: mov rdi,rcx mov rsi,rdx mov rdx,r8 _avx2_shortcut: mov rax,rsp push rbx push rbp push r12 push r13 push r14 push r15 lea rsp,[((-168))+rsp] movaps XMMWORD[rsp],xmm6 movaps XMMWORD[16+rsp],xmm7 movaps XMMWORD[32+rsp],xmm8 movaps XMMWORD[48+rsp],xmm9 movaps XMMWORD[64+rsp],xmm10 movaps XMMWORD[80+rsp],xmm11 movaps XMMWORD[(-120)+rax],xmm12 movaps XMMWORD[(-104)+rax],xmm13 movaps XMMWORD[(-88)+rax],xmm14 movaps XMMWORD[(-72)+rax],xmm15 sub rsp,576 and rsp,-256 mov QWORD[544+rsp],rax $L$body_avx2: lea rbp,[((K256+128))] lea rdi,[128+rdi] $L$oop_grande_avx2: mov DWORD[552+rsp],edx xor edx,edx lea rbx,[512+rsp] mov r12,QWORD[rsi] mov ecx,DWORD[8+rsi] cmp ecx,edx cmovg edx,ecx test ecx,ecx mov DWORD[rbx],ecx cmovle r12,rbp mov r13,QWORD[16+rsi] mov ecx,DWORD[24+rsi] cmp ecx,edx cmovg edx,ecx test ecx,ecx mov DWORD[4+rbx],ecx cmovle r13,rbp mov r14,QWORD[32+rsi] mov ecx,DWORD[40+rsi] cmp ecx,edx cmovg edx,ecx test ecx,ecx mov DWORD[8+rbx],ecx cmovle r14,rbp mov r15,QWORD[48+rsi] mov ecx,DWORD[56+rsi] cmp ecx,edx cmovg edx,ecx test ecx,ecx mov DWORD[12+rbx],ecx cmovle r15,rbp mov r8,QWORD[64+rsi] mov ecx,DWORD[72+rsi] cmp ecx,edx cmovg edx,ecx test ecx,ecx mov DWORD[16+rbx],ecx cmovle r8,rbp mov r9,QWORD[80+rsi] mov ecx,DWORD[88+rsi] cmp ecx,edx cmovg edx,ecx test ecx,ecx mov DWORD[20+rbx],ecx cmovle r9,rbp mov r10,QWORD[96+rsi] mov ecx,DWORD[104+rsi] cmp ecx,edx cmovg edx,ecx test ecx,ecx mov DWORD[24+rbx],ecx cmovle r10,rbp mov r11,QWORD[112+rsi] mov ecx,DWORD[120+rsi] cmp ecx,edx cmovg edx,ecx test ecx,ecx mov DWORD[28+rbx],ecx cmovle r11,rbp vmovdqu ymm8,YMMWORD[((0-128))+rdi] lea rax,[128+rsp] vmovdqu ymm9,YMMWORD[((32-128))+rdi] lea rbx,[((256+128))+rsp] vmovdqu ymm10,YMMWORD[((64-128))+rdi] vmovdqu ymm11,YMMWORD[((96-128))+rdi] vmovdqu ymm12,YMMWORD[((128-128))+rdi] vmovdqu ymm13,YMMWORD[((160-128))+rdi] vmovdqu ymm14,YMMWORD[((192-128))+rdi] vmovdqu ymm15,YMMWORD[((224-128))+rdi] vmovdqu ymm6,YMMWORD[$L$pbswap] jmp NEAR $L$oop_avx2 ALIGN 32 $L$oop_avx2: vpxor ymm4,ymm10,ymm9 vmovd xmm5,DWORD[r12] vmovd xmm0,DWORD[r8] vmovd xmm1,DWORD[r13] vmovd xmm2,DWORD[r9] vpinsrd xmm5,xmm5,DWORD[r14],1 vpinsrd xmm0,xmm0,DWORD[r10],1 vpinsrd xmm1,xmm1,DWORD[r15],1 vpunpckldq ymm5,ymm5,ymm1 vpinsrd xmm2,xmm2,DWORD[r11],1 vpunpckldq ymm0,ymm0,ymm2 vinserti128 ymm5,ymm5,xmm0,1 vpshufb ymm5,ymm5,ymm6 vpsrld ymm7,ymm12,6 vpslld ymm2,ymm12,26 vmovdqu YMMWORD[(0-128)+rax],ymm5 vpaddd ymm5,ymm5,ymm15 vpsrld ymm1,ymm12,11 vpxor ymm7,ymm7,ymm2 vpslld ymm2,ymm12,21 vpaddd ymm5,ymm5,YMMWORD[((-128))+rbp] vpxor ymm7,ymm7,ymm1 vpsrld ymm1,ymm12,25 vpxor ymm7,ymm7,ymm2 vpslld ymm2,ymm12,7 vpandn ymm0,ymm12,ymm14 vpand ymm3,ymm12,ymm13 vpxor ymm7,ymm7,ymm1 vpsrld ymm15,ymm8,2 vpxor ymm7,ymm7,ymm2 vpslld ymm1,ymm8,30 vpxor ymm0,ymm0,ymm3 vpxor ymm3,ymm9,ymm8 vpxor ymm15,ymm15,ymm1 vpaddd ymm5,ymm5,ymm7 vpsrld ymm1,ymm8,13 vpslld ymm2,ymm8,19 vpaddd ymm5,ymm5,ymm0 vpand ymm4,ymm4,ymm3 vpxor ymm7,ymm15,ymm1 vpsrld ymm1,ymm8,22 vpxor ymm7,ymm7,ymm2 vpslld ymm2,ymm8,10 vpxor ymm15,ymm9,ymm4 vpaddd ymm11,ymm11,ymm5 vpxor ymm7,ymm7,ymm1 vpxor ymm7,ymm7,ymm2 vpaddd ymm15,ymm15,ymm5 vpaddd ymm15,ymm15,ymm7 vmovd xmm5,DWORD[4+r12] vmovd xmm0,DWORD[4+r8] vmovd xmm1,DWORD[4+r13] vmovd xmm2,DWORD[4+r9] vpinsrd xmm5,xmm5,DWORD[4+r14],1 vpinsrd xmm0,xmm0,DWORD[4+r10],1 vpinsrd xmm1,xmm1,DWORD[4+r15],1 vpunpckldq ymm5,ymm5,ymm1 vpinsrd xmm2,xmm2,DWORD[4+r11],1 vpunpckldq ymm0,ymm0,ymm2 vinserti128 ymm5,ymm5,xmm0,1 vpshufb ymm5,ymm5,ymm6 vpsrld ymm7,ymm11,6 vpslld ymm2,ymm11,26 vmovdqu YMMWORD[(32-128)+rax],ymm5 vpaddd ymm5,ymm5,ymm14 vpsrld ymm1,ymm11,11 vpxor ymm7,ymm7,ymm2 vpslld ymm2,ymm11,21 vpaddd ymm5,ymm5,YMMWORD[((-96))+rbp] vpxor ymm7,ymm7,ymm1 vpsrld ymm1,ymm11,25 vpxor ymm7,ymm7,ymm2 vpslld ymm2,ymm11,7 vpandn ymm0,ymm11,ymm13 vpand ymm4,ymm11,ymm12 vpxor ymm7,ymm7,ymm1 vpsrld ymm14,ymm15,2 vpxor ymm7,ymm7,ymm2 vpslld ymm1,ymm15,30 vpxor ymm0,ymm0,ymm4 vpxor ymm4,ymm8,ymm15 vpxor ymm14,ymm14,ymm1 vpaddd ymm5,ymm5,ymm7 vpsrld ymm1,ymm15,13 vpslld ymm2,ymm15,19 vpaddd ymm5,ymm5,ymm0 vpand ymm3,ymm3,ymm4 vpxor ymm7,ymm14,ymm1 vpsrld ymm1,ymm15,22 vpxor ymm7,ymm7,ymm2 vpslld ymm2,ymm15,10 vpxor ymm14,ymm8,ymm3 vpaddd ymm10,ymm10,ymm5 vpxor ymm7,ymm7,ymm1 vpxor ymm7,ymm7,ymm2 vpaddd ymm14,ymm14,ymm5 vpaddd ymm14,ymm14,ymm7 vmovd xmm5,DWORD[8+r12] vmovd xmm0,DWORD[8+r8] vmovd xmm1,DWORD[8+r13] vmovd xmm2,DWORD[8+r9] vpinsrd xmm5,xmm5,DWORD[8+r14],1 vpinsrd xmm0,xmm0,DWORD[8+r10],1 vpinsrd xmm1,xmm1,DWORD[8+r15],1 vpunpckldq ymm5,ymm5,ymm1 vpinsrd xmm2,xmm2,DWORD[8+r11],1 vpunpckldq ymm0,ymm0,ymm2 vinserti128 ymm5,ymm5,xmm0,1 vpshufb ymm5,ymm5,ymm6 vpsrld ymm7,ymm10,6 vpslld ymm2,ymm10,26 vmovdqu YMMWORD[(64-128)+rax],ymm5 vpaddd ymm5,ymm5,ymm13 vpsrld ymm1,ymm10,11 vpxor ymm7,ymm7,ymm2 vpslld ymm2,ymm10,21 vpaddd ymm5,ymm5,YMMWORD[((-64))+rbp] vpxor ymm7,ymm7,ymm1 vpsrld ymm1,ymm10,25 vpxor ymm7,ymm7,ymm2 vpslld ymm2,ymm10,7 vpandn ymm0,ymm10,ymm12 vpand ymm3,ymm10,ymm11 vpxor ymm7,ymm7,ymm1 vpsrld ymm13,ymm14,2 vpxor ymm7,ymm7,ymm2 vpslld ymm1,ymm14,30 vpxor ymm0,ymm0,ymm3 vpxor ymm3,ymm15,ymm14 vpxor ymm13,ymm13,ymm1 vpaddd ymm5,ymm5,ymm7 vpsrld ymm1,ymm14,13 vpslld ymm2,ymm14,19 vpaddd ymm5,ymm5,ymm0 vpand ymm4,ymm4,ymm3 vpxor ymm7,ymm13,ymm1 vpsrld ymm1,ymm14,22 vpxor ymm7,ymm7,ymm2 vpslld ymm2,ymm14,10 vpxor ymm13,ymm15,ymm4 vpaddd ymm9,ymm9,ymm5 vpxor ymm7,ymm7,ymm1 vpxor ymm7,ymm7,ymm2 vpaddd ymm13,ymm13,ymm5 vpaddd ymm13,ymm13,ymm7 vmovd xmm5,DWORD[12+r12] vmovd xmm0,DWORD[12+r8] vmovd xmm1,DWORD[12+r13] vmovd xmm2,DWORD[12+r9] vpinsrd xmm5,xmm5,DWORD[12+r14],1 vpinsrd xmm0,xmm0,DWORD[12+r10],1 vpinsrd xmm1,xmm1,DWORD[12+r15],1 vpunpckldq ymm5,ymm5,ymm1 vpinsrd xmm2,xmm2,DWORD[12+r11],1 vpunpckldq ymm0,ymm0,ymm2 vinserti128 ymm5,ymm5,xmm0,1 vpshufb ymm5,ymm5,ymm6 vpsrld ymm7,ymm9,6 vpslld ymm2,ymm9,26 vmovdqu YMMWORD[(96-128)+rax],ymm5 vpaddd ymm5,ymm5,ymm12 vpsrld ymm1,ymm9,11 vpxor ymm7,ymm7,ymm2 vpslld ymm2,ymm9,21 vpaddd ymm5,ymm5,YMMWORD[((-32))+rbp] vpxor ymm7,ymm7,ymm1 vpsrld ymm1,ymm9,25 vpxor ymm7,ymm7,ymm2 vpslld ymm2,ymm9,7 vpandn ymm0,ymm9,ymm11 vpand ymm4,ymm9,ymm10 vpxor ymm7,ymm7,ymm1 vpsrld ymm12,ymm13,2 vpxor ymm7,ymm7,ymm2 vpslld ymm1,ymm13,30 vpxor ymm0,ymm0,ymm4 vpxor ymm4,ymm14,ymm13 vpxor ymm12,ymm12,ymm1 vpaddd ymm5,ymm5,ymm7 vpsrld ymm1,ymm13,13 vpslld ymm2,ymm13,19 vpaddd ymm5,ymm5,ymm0 vpand ymm3,ymm3,ymm4 vpxor ymm7,ymm12,ymm1 vpsrld ymm1,ymm13,22 vpxor ymm7,ymm7,ymm2 vpslld ymm2,ymm13,10 vpxor ymm12,ymm14,ymm3 vpaddd ymm8,ymm8,ymm5 vpxor ymm7,ymm7,ymm1 vpxor ymm7,ymm7,ymm2 vpaddd ymm12,ymm12,ymm5 vpaddd ymm12,ymm12,ymm7 vmovd xmm5,DWORD[16+r12] vmovd xmm0,DWORD[16+r8] vmovd xmm1,DWORD[16+r13] vmovd xmm2,DWORD[16+r9] vpinsrd xmm5,xmm5,DWORD[16+r14],1 vpinsrd xmm0,xmm0,DWORD[16+r10],1 vpinsrd xmm1,xmm1,DWORD[16+r15],1 vpunpckldq ymm5,ymm5,ymm1 vpinsrd xmm2,xmm2,DWORD[16+r11],1 vpunpckldq ymm0,ymm0,ymm2 vinserti128 ymm5,ymm5,xmm0,1 vpshufb ymm5,ymm5,ymm6 vpsrld ymm7,ymm8,6 vpslld ymm2,ymm8,26 vmovdqu YMMWORD[(128-128)+rax],ymm5 vpaddd ymm5,ymm5,ymm11 vpsrld ymm1,ymm8,11 vpxor ymm7,ymm7,ymm2 vpslld ymm2,ymm8,21 vpaddd ymm5,ymm5,YMMWORD[rbp] vpxor ymm7,ymm7,ymm1 vpsrld ymm1,ymm8,25 vpxor ymm7,ymm7,ymm2 vpslld ymm2,ymm8,7 vpandn ymm0,ymm8,ymm10 vpand ymm3,ymm8,ymm9 vpxor ymm7,ymm7,ymm1 vpsrld ymm11,ymm12,2 vpxor ymm7,ymm7,ymm2 vpslld ymm1,ymm12,30 vpxor ymm0,ymm0,ymm3 vpxor ymm3,ymm13,ymm12 vpxor ymm11,ymm11,ymm1 vpaddd ymm5,ymm5,ymm7 vpsrld ymm1,ymm12,13 vpslld ymm2,ymm12,19 vpaddd ymm5,ymm5,ymm0 vpand ymm4,ymm4,ymm3 vpxor ymm7,ymm11,ymm1 vpsrld ymm1,ymm12,22 vpxor ymm7,ymm7,ymm2 vpslld ymm2,ymm12,10 vpxor ymm11,ymm13,ymm4 vpaddd ymm15,ymm15,ymm5 vpxor ymm7,ymm7,ymm1 vpxor ymm7,ymm7,ymm2 vpaddd ymm11,ymm11,ymm5 vpaddd ymm11,ymm11,ymm7 vmovd xmm5,DWORD[20+r12] vmovd xmm0,DWORD[20+r8] vmovd xmm1,DWORD[20+r13] vmovd xmm2,DWORD[20+r9] vpinsrd xmm5,xmm5,DWORD[20+r14],1 vpinsrd xmm0,xmm0,DWORD[20+r10],1 vpinsrd xmm1,xmm1,DWORD[20+r15],1 vpunpckldq ymm5,ymm5,ymm1 vpinsrd xmm2,xmm2,DWORD[20+r11],1 vpunpckldq ymm0,ymm0,ymm2 vinserti128 ymm5,ymm5,xmm0,1 vpshufb ymm5,ymm5,ymm6 vpsrld ymm7,ymm15,6 vpslld ymm2,ymm15,26 vmovdqu YMMWORD[(160-128)+rax],ymm5 vpaddd ymm5,ymm5,ymm10 vpsrld ymm1,ymm15,11 vpxor ymm7,ymm7,ymm2 vpslld ymm2,ymm15,21 vpaddd ymm5,ymm5,YMMWORD[32+rbp] vpxor ymm7,ymm7,ymm1 vpsrld ymm1,ymm15,25 vpxor ymm7,ymm7,ymm2 vpslld ymm2,ymm15,7 vpandn ymm0,ymm15,ymm9 vpand ymm4,ymm15,ymm8 vpxor ymm7,ymm7,ymm1 vpsrld ymm10,ymm11,2 vpxor ymm7,ymm7,ymm2 vpslld ymm1,ymm11,30 vpxor ymm0,ymm0,ymm4 vpxor ymm4,ymm12,ymm11 vpxor ymm10,ymm10,ymm1 vpaddd ymm5,ymm5,ymm7 vpsrld ymm1,ymm11,13 vpslld ymm2,ymm11,19 vpaddd ymm5,ymm5,ymm0 vpand ymm3,ymm3,ymm4 vpxor ymm7,ymm10,ymm1 vpsrld ymm1,ymm11,22 vpxor ymm7,ymm7,ymm2 vpslld ymm2,ymm11,10 vpxor ymm10,ymm12,ymm3 vpaddd ymm14,ymm14,ymm5 vpxor ymm7,ymm7,ymm1 vpxor ymm7,ymm7,ymm2 vpaddd ymm10,ymm10,ymm5 vpaddd ymm10,ymm10,ymm7 vmovd xmm5,DWORD[24+r12] vmovd xmm0,DWORD[24+r8] vmovd xmm1,DWORD[24+r13] vmovd xmm2,DWORD[24+r9] vpinsrd xmm5,xmm5,DWORD[24+r14],1 vpinsrd xmm0,xmm0,DWORD[24+r10],1 vpinsrd xmm1,xmm1,DWORD[24+r15],1 vpunpckldq ymm5,ymm5,ymm1 vpinsrd xmm2,xmm2,DWORD[24+r11],1 vpunpckldq ymm0,ymm0,ymm2 vinserti128 ymm5,ymm5,xmm0,1 vpshufb ymm5,ymm5,ymm6 vpsrld ymm7,ymm14,6 vpslld ymm2,ymm14,26 vmovdqu YMMWORD[(192-128)+rax],ymm5 vpaddd ymm5,ymm5,ymm9 vpsrld ymm1,ymm14,11 vpxor ymm7,ymm7,ymm2 vpslld ymm2,ymm14,21 vpaddd ymm5,ymm5,YMMWORD[64+rbp] vpxor ymm7,ymm7,ymm1 vpsrld ymm1,ymm14,25 vpxor ymm7,ymm7,ymm2 vpslld ymm2,ymm14,7 vpandn ymm0,ymm14,ymm8 vpand ymm3,ymm14,ymm15 vpxor ymm7,ymm7,ymm1 vpsrld ymm9,ymm10,2 vpxor ymm7,ymm7,ymm2 vpslld ymm1,ymm10,30 vpxor ymm0,ymm0,ymm3 vpxor ymm3,ymm11,ymm10 vpxor ymm9,ymm9,ymm1 vpaddd ymm5,ymm5,ymm7 vpsrld ymm1,ymm10,13 vpslld ymm2,ymm10,19 vpaddd ymm5,ymm5,ymm0 vpand ymm4,ymm4,ymm3 vpxor ymm7,ymm9,ymm1 vpsrld ymm1,ymm10,22 vpxor ymm7,ymm7,ymm2 vpslld ymm2,ymm10,10 vpxor ymm9,ymm11,ymm4 vpaddd ymm13,ymm13,ymm5 vpxor ymm7,ymm7,ymm1 vpxor ymm7,ymm7,ymm2 vpaddd ymm9,ymm9,ymm5 vpaddd ymm9,ymm9,ymm7 vmovd xmm5,DWORD[28+r12] vmovd xmm0,DWORD[28+r8] vmovd xmm1,DWORD[28+r13] vmovd xmm2,DWORD[28+r9] vpinsrd xmm5,xmm5,DWORD[28+r14],1 vpinsrd xmm0,xmm0,DWORD[28+r10],1 vpinsrd xmm1,xmm1,DWORD[28+r15],1 vpunpckldq ymm5,ymm5,ymm1 vpinsrd xmm2,xmm2,DWORD[28+r11],1 vpunpckldq ymm0,ymm0,ymm2 vinserti128 ymm5,ymm5,xmm0,1 vpshufb ymm5,ymm5,ymm6 vpsrld ymm7,ymm13,6 vpslld ymm2,ymm13,26 vmovdqu YMMWORD[(224-128)+rax],ymm5 vpaddd ymm5,ymm5,ymm8 vpsrld ymm1,ymm13,11 vpxor ymm7,ymm7,ymm2 vpslld ymm2,ymm13,21 vpaddd ymm5,ymm5,YMMWORD[96+rbp] vpxor ymm7,ymm7,ymm1 vpsrld ymm1,ymm13,25 vpxor ymm7,ymm7,ymm2 vpslld ymm2,ymm13,7 vpandn ymm0,ymm13,ymm15 vpand ymm4,ymm13,ymm14 vpxor ymm7,ymm7,ymm1 vpsrld ymm8,ymm9,2 vpxor ymm7,ymm7,ymm2 vpslld ymm1,ymm9,30 vpxor ymm0,ymm0,ymm4 vpxor ymm4,ymm10,ymm9 vpxor ymm8,ymm8,ymm1 vpaddd ymm5,ymm5,ymm7 vpsrld ymm1,ymm9,13 vpslld ymm2,ymm9,19 vpaddd ymm5,ymm5,ymm0 vpand ymm3,ymm3,ymm4 vpxor ymm7,ymm8,ymm1 vpsrld ymm1,ymm9,22 vpxor ymm7,ymm7,ymm2 vpslld ymm2,ymm9,10 vpxor ymm8,ymm10,ymm3 vpaddd ymm12,ymm12,ymm5 vpxor ymm7,ymm7,ymm1 vpxor ymm7,ymm7,ymm2 vpaddd ymm8,ymm8,ymm5 vpaddd ymm8,ymm8,ymm7 add rbp,256 vmovd xmm5,DWORD[32+r12] vmovd xmm0,DWORD[32+r8] vmovd xmm1,DWORD[32+r13] vmovd xmm2,DWORD[32+r9] vpinsrd xmm5,xmm5,DWORD[32+r14],1 vpinsrd xmm0,xmm0,DWORD[32+r10],1 vpinsrd xmm1,xmm1,DWORD[32+r15],1 vpunpckldq ymm5,ymm5,ymm1 vpinsrd xmm2,xmm2,DWORD[32+r11],1 vpunpckldq ymm0,ymm0,ymm2 vinserti128 ymm5,ymm5,xmm0,1 vpshufb ymm5,ymm5,ymm6 vpsrld ymm7,ymm12,6 vpslld ymm2,ymm12,26 vmovdqu YMMWORD[(256-256-128)+rbx],ymm5 vpaddd ymm5,ymm5,ymm15 vpsrld ymm1,ymm12,11 vpxor ymm7,ymm7,ymm2 vpslld ymm2,ymm12,21 vpaddd ymm5,ymm5,YMMWORD[((-128))+rbp] vpxor ymm7,ymm7,ymm1 vpsrld ymm1,ymm12,25 vpxor ymm7,ymm7,ymm2 vpslld ymm2,ymm12,7 vpandn ymm0,ymm12,ymm14 vpand ymm3,ymm12,ymm13 vpxor ymm7,ymm7,ymm1 vpsrld ymm15,ymm8,2 vpxor ymm7,ymm7,ymm2 vpslld ymm1,ymm8,30 vpxor ymm0,ymm0,ymm3 vpxor ymm3,ymm9,ymm8 vpxor ymm15,ymm15,ymm1 vpaddd ymm5,ymm5,ymm7 vpsrld ymm1,ymm8,13 vpslld ymm2,ymm8,19 vpaddd ymm5,ymm5,ymm0 vpand ymm4,ymm4,ymm3 vpxor ymm7,ymm15,ymm1 vpsrld ymm1,ymm8,22 vpxor ymm7,ymm7,ymm2 vpslld ymm2,ymm8,10 vpxor ymm15,ymm9,ymm4 vpaddd ymm11,ymm11,ymm5 vpxor ymm7,ymm7,ymm1 vpxor ymm7,ymm7,ymm2 vpaddd ymm15,ymm15,ymm5 vpaddd ymm15,ymm15,ymm7 vmovd xmm5,DWORD[36+r12] vmovd xmm0,DWORD[36+r8] vmovd xmm1,DWORD[36+r13] vmovd xmm2,DWORD[36+r9] vpinsrd xmm5,xmm5,DWORD[36+r14],1 vpinsrd xmm0,xmm0,DWORD[36+r10],1 vpinsrd xmm1,xmm1,DWORD[36+r15],1 vpunpckldq ymm5,ymm5,ymm1 vpinsrd xmm2,xmm2,DWORD[36+r11],1 vpunpckldq ymm0,ymm0,ymm2 vinserti128 ymm5,ymm5,xmm0,1 vpshufb ymm5,ymm5,ymm6 vpsrld ymm7,ymm11,6 vpslld ymm2,ymm11,26 vmovdqu YMMWORD[(288-256-128)+rbx],ymm5 vpaddd ymm5,ymm5,ymm14 vpsrld ymm1,ymm11,11 vpxor ymm7,ymm7,ymm2 vpslld ymm2,ymm11,21 vpaddd ymm5,ymm5,YMMWORD[((-96))+rbp] vpxor ymm7,ymm7,ymm1 vpsrld ymm1,ymm11,25 vpxor ymm7,ymm7,ymm2 vpslld ymm2,ymm11,7 vpandn ymm0,ymm11,ymm13 vpand ymm4,ymm11,ymm12 vpxor ymm7,ymm7,ymm1 vpsrld ymm14,ymm15,2 vpxor ymm7,ymm7,ymm2 vpslld ymm1,ymm15,30 vpxor ymm0,ymm0,ymm4 vpxor ymm4,ymm8,ymm15 vpxor ymm14,ymm14,ymm1 vpaddd ymm5,ymm5,ymm7 vpsrld ymm1,ymm15,13 vpslld ymm2,ymm15,19 vpaddd ymm5,ymm5,ymm0 vpand ymm3,ymm3,ymm4 vpxor ymm7,ymm14,ymm1 vpsrld ymm1,ymm15,22 vpxor ymm7,ymm7,ymm2 vpslld ymm2,ymm15,10 vpxor ymm14,ymm8,ymm3 vpaddd ymm10,ymm10,ymm5 vpxor ymm7,ymm7,ymm1 vpxor ymm7,ymm7,ymm2 vpaddd ymm14,ymm14,ymm5 vpaddd ymm14,ymm14,ymm7 vmovd xmm5,DWORD[40+r12] vmovd xmm0,DWORD[40+r8] vmovd xmm1,DWORD[40+r13] vmovd xmm2,DWORD[40+r9] vpinsrd xmm5,xmm5,DWORD[40+r14],1 vpinsrd xmm0,xmm0,DWORD[40+r10],1 vpinsrd xmm1,xmm1,DWORD[40+r15],1 vpunpckldq ymm5,ymm5,ymm1 vpinsrd xmm2,xmm2,DWORD[40+r11],1 vpunpckldq ymm0,ymm0,ymm2 vinserti128 ymm5,ymm5,xmm0,1 vpshufb ymm5,ymm5,ymm6 vpsrld ymm7,ymm10,6 vpslld ymm2,ymm10,26 vmovdqu YMMWORD[(320-256-128)+rbx],ymm5 vpaddd ymm5,ymm5,ymm13 vpsrld ymm1,ymm10,11 vpxor ymm7,ymm7,ymm2 vpslld ymm2,ymm10,21 vpaddd ymm5,ymm5,YMMWORD[((-64))+rbp] vpxor ymm7,ymm7,ymm1 vpsrld ymm1,ymm10,25 vpxor ymm7,ymm7,ymm2 vpslld ymm2,ymm10,7 vpandn ymm0,ymm10,ymm12 vpand ymm3,ymm10,ymm11 vpxor ymm7,ymm7,ymm1 vpsrld ymm13,ymm14,2 vpxor ymm7,ymm7,ymm2 vpslld ymm1,ymm14,30 vpxor ymm0,ymm0,ymm3 vpxor ymm3,ymm15,ymm14 vpxor ymm13,ymm13,ymm1 vpaddd ymm5,ymm5,ymm7 vpsrld ymm1,ymm14,13 vpslld ymm2,ymm14,19 vpaddd ymm5,ymm5,ymm0 vpand ymm4,ymm4,ymm3 vpxor ymm7,ymm13,ymm1 vpsrld ymm1,ymm14,22 vpxor ymm7,ymm7,ymm2 vpslld ymm2,ymm14,10 vpxor ymm13,ymm15,ymm4 vpaddd ymm9,ymm9,ymm5 vpxor ymm7,ymm7,ymm1 vpxor ymm7,ymm7,ymm2 vpaddd ymm13,ymm13,ymm5 vpaddd ymm13,ymm13,ymm7 vmovd xmm5,DWORD[44+r12] vmovd xmm0,DWORD[44+r8] vmovd xmm1,DWORD[44+r13] vmovd xmm2,DWORD[44+r9] vpinsrd xmm5,xmm5,DWORD[44+r14],1 vpinsrd xmm0,xmm0,DWORD[44+r10],1 vpinsrd xmm1,xmm1,DWORD[44+r15],1 vpunpckldq ymm5,ymm5,ymm1 vpinsrd xmm2,xmm2,DWORD[44+r11],1 vpunpckldq ymm0,ymm0,ymm2 vinserti128 ymm5,ymm5,xmm0,1 vpshufb ymm5,ymm5,ymm6 vpsrld ymm7,ymm9,6 vpslld ymm2,ymm9,26 vmovdqu YMMWORD[(352-256-128)+rbx],ymm5 vpaddd ymm5,ymm5,ymm12 vpsrld ymm1,ymm9,11 vpxor ymm7,ymm7,ymm2 vpslld ymm2,ymm9,21 vpaddd ymm5,ymm5,YMMWORD[((-32))+rbp] vpxor ymm7,ymm7,ymm1 vpsrld ymm1,ymm9,25 vpxor ymm7,ymm7,ymm2 vpslld ymm2,ymm9,7 vpandn ymm0,ymm9,ymm11 vpand ymm4,ymm9,ymm10 vpxor ymm7,ymm7,ymm1 vpsrld ymm12,ymm13,2 vpxor ymm7,ymm7,ymm2 vpslld ymm1,ymm13,30 vpxor ymm0,ymm0,ymm4 vpxor ymm4,ymm14,ymm13 vpxor ymm12,ymm12,ymm1 vpaddd ymm5,ymm5,ymm7 vpsrld ymm1,ymm13,13 vpslld ymm2,ymm13,19 vpaddd ymm5,ymm5,ymm0 vpand ymm3,ymm3,ymm4 vpxor ymm7,ymm12,ymm1 vpsrld ymm1,ymm13,22 vpxor ymm7,ymm7,ymm2 vpslld ymm2,ymm13,10 vpxor ymm12,ymm14,ymm3 vpaddd ymm8,ymm8,ymm5 vpxor ymm7,ymm7,ymm1 vpxor ymm7,ymm7,ymm2 vpaddd ymm12,ymm12,ymm5 vpaddd ymm12,ymm12,ymm7 vmovd xmm5,DWORD[48+r12] vmovd xmm0,DWORD[48+r8] vmovd xmm1,DWORD[48+r13] vmovd xmm2,DWORD[48+r9] vpinsrd xmm5,xmm5,DWORD[48+r14],1 vpinsrd xmm0,xmm0,DWORD[48+r10],1 vpinsrd xmm1,xmm1,DWORD[48+r15],1 vpunpckldq ymm5,ymm5,ymm1 vpinsrd xmm2,xmm2,DWORD[48+r11],1 vpunpckldq ymm0,ymm0,ymm2 vinserti128 ymm5,ymm5,xmm0,1 vpshufb ymm5,ymm5,ymm6 vpsrld ymm7,ymm8,6 vpslld ymm2,ymm8,26 vmovdqu YMMWORD[(384-256-128)+rbx],ymm5 vpaddd ymm5,ymm5,ymm11 vpsrld ymm1,ymm8,11 vpxor ymm7,ymm7,ymm2 vpslld ymm2,ymm8,21 vpaddd ymm5,ymm5,YMMWORD[rbp] vpxor ymm7,ymm7,ymm1 vpsrld ymm1,ymm8,25 vpxor ymm7,ymm7,ymm2 vpslld ymm2,ymm8,7 vpandn ymm0,ymm8,ymm10 vpand ymm3,ymm8,ymm9 vpxor ymm7,ymm7,ymm1 vpsrld ymm11,ymm12,2 vpxor ymm7,ymm7,ymm2 vpslld ymm1,ymm12,30 vpxor ymm0,ymm0,ymm3 vpxor ymm3,ymm13,ymm12 vpxor ymm11,ymm11,ymm1 vpaddd ymm5,ymm5,ymm7 vpsrld ymm1,ymm12,13 vpslld ymm2,ymm12,19 vpaddd ymm5,ymm5,ymm0 vpand ymm4,ymm4,ymm3 vpxor ymm7,ymm11,ymm1 vpsrld ymm1,ymm12,22 vpxor ymm7,ymm7,ymm2 vpslld ymm2,ymm12,10 vpxor ymm11,ymm13,ymm4 vpaddd ymm15,ymm15,ymm5 vpxor ymm7,ymm7,ymm1 vpxor ymm7,ymm7,ymm2 vpaddd ymm11,ymm11,ymm5 vpaddd ymm11,ymm11,ymm7 vmovd xmm5,DWORD[52+r12] vmovd xmm0,DWORD[52+r8] vmovd xmm1,DWORD[52+r13] vmovd xmm2,DWORD[52+r9] vpinsrd xmm5,xmm5,DWORD[52+r14],1 vpinsrd xmm0,xmm0,DWORD[52+r10],1 vpinsrd xmm1,xmm1,DWORD[52+r15],1 vpunpckldq ymm5,ymm5,ymm1 vpinsrd xmm2,xmm2,DWORD[52+r11],1 vpunpckldq ymm0,ymm0,ymm2 vinserti128 ymm5,ymm5,xmm0,1 vpshufb ymm5,ymm5,ymm6 vpsrld ymm7,ymm15,6 vpslld ymm2,ymm15,26 vmovdqu YMMWORD[(416-256-128)+rbx],ymm5 vpaddd ymm5,ymm5,ymm10 vpsrld ymm1,ymm15,11 vpxor ymm7,ymm7,ymm2 vpslld ymm2,ymm15,21 vpaddd ymm5,ymm5,YMMWORD[32+rbp] vpxor ymm7,ymm7,ymm1 vpsrld ymm1,ymm15,25 vpxor ymm7,ymm7,ymm2 vpslld ymm2,ymm15,7 vpandn ymm0,ymm15,ymm9 vpand ymm4,ymm15,ymm8 vpxor ymm7,ymm7,ymm1 vpsrld ymm10,ymm11,2 vpxor ymm7,ymm7,ymm2 vpslld ymm1,ymm11,30 vpxor ymm0,ymm0,ymm4 vpxor ymm4,ymm12,ymm11 vpxor ymm10,ymm10,ymm1 vpaddd ymm5,ymm5,ymm7 vpsrld ymm1,ymm11,13 vpslld ymm2,ymm11,19 vpaddd ymm5,ymm5,ymm0 vpand ymm3,ymm3,ymm4 vpxor ymm7,ymm10,ymm1 vpsrld ymm1,ymm11,22 vpxor ymm7,ymm7,ymm2 vpslld ymm2,ymm11,10 vpxor ymm10,ymm12,ymm3 vpaddd ymm14,ymm14,ymm5 vpxor ymm7,ymm7,ymm1 vpxor ymm7,ymm7,ymm2 vpaddd ymm10,ymm10,ymm5 vpaddd ymm10,ymm10,ymm7 vmovd xmm5,DWORD[56+r12] vmovd xmm0,DWORD[56+r8] vmovd xmm1,DWORD[56+r13] vmovd xmm2,DWORD[56+r9] vpinsrd xmm5,xmm5,DWORD[56+r14],1 vpinsrd xmm0,xmm0,DWORD[56+r10],1 vpinsrd xmm1,xmm1,DWORD[56+r15],1 vpunpckldq ymm5,ymm5,ymm1 vpinsrd xmm2,xmm2,DWORD[56+r11],1 vpunpckldq ymm0,ymm0,ymm2 vinserti128 ymm5,ymm5,xmm0,1 vpshufb ymm5,ymm5,ymm6 vpsrld ymm7,ymm14,6 vpslld ymm2,ymm14,26 vmovdqu YMMWORD[(448-256-128)+rbx],ymm5 vpaddd ymm5,ymm5,ymm9 vpsrld ymm1,ymm14,11 vpxor ymm7,ymm7,ymm2 vpslld ymm2,ymm14,21 vpaddd ymm5,ymm5,YMMWORD[64+rbp] vpxor ymm7,ymm7,ymm1 vpsrld ymm1,ymm14,25 vpxor ymm7,ymm7,ymm2 vpslld ymm2,ymm14,7 vpandn ymm0,ymm14,ymm8 vpand ymm3,ymm14,ymm15 vpxor ymm7,ymm7,ymm1 vpsrld ymm9,ymm10,2 vpxor ymm7,ymm7,ymm2 vpslld ymm1,ymm10,30 vpxor ymm0,ymm0,ymm3 vpxor ymm3,ymm11,ymm10 vpxor ymm9,ymm9,ymm1 vpaddd ymm5,ymm5,ymm7 vpsrld ymm1,ymm10,13 vpslld ymm2,ymm10,19 vpaddd ymm5,ymm5,ymm0 vpand ymm4,ymm4,ymm3 vpxor ymm7,ymm9,ymm1 vpsrld ymm1,ymm10,22 vpxor ymm7,ymm7,ymm2 vpslld ymm2,ymm10,10 vpxor ymm9,ymm11,ymm4 vpaddd ymm13,ymm13,ymm5 vpxor ymm7,ymm7,ymm1 vpxor ymm7,ymm7,ymm2 vpaddd ymm9,ymm9,ymm5 vpaddd ymm9,ymm9,ymm7 vmovd xmm5,DWORD[60+r12] lea r12,[64+r12] vmovd xmm0,DWORD[60+r8] lea r8,[64+r8] vmovd xmm1,DWORD[60+r13] lea r13,[64+r13] vmovd xmm2,DWORD[60+r9] lea r9,[64+r9] vpinsrd xmm5,xmm5,DWORD[60+r14],1 lea r14,[64+r14] vpinsrd xmm0,xmm0,DWORD[60+r10],1 lea r10,[64+r10] vpinsrd xmm1,xmm1,DWORD[60+r15],1 lea r15,[64+r15] vpunpckldq ymm5,ymm5,ymm1 vpinsrd xmm2,xmm2,DWORD[60+r11],1 lea r11,[64+r11] vpunpckldq ymm0,ymm0,ymm2 vinserti128 ymm5,ymm5,xmm0,1 vpshufb ymm5,ymm5,ymm6 vpsrld ymm7,ymm13,6 vpslld ymm2,ymm13,26 vmovdqu YMMWORD[(480-256-128)+rbx],ymm5 vpaddd ymm5,ymm5,ymm8 vpsrld ymm1,ymm13,11 vpxor ymm7,ymm7,ymm2 vpslld ymm2,ymm13,21 vpaddd ymm5,ymm5,YMMWORD[96+rbp] vpxor ymm7,ymm7,ymm1 vpsrld ymm1,ymm13,25 vpxor ymm7,ymm7,ymm2 prefetcht0 [63+r12] vpslld ymm2,ymm13,7 vpandn ymm0,ymm13,ymm15 vpand ymm4,ymm13,ymm14 prefetcht0 [63+r13] vpxor ymm7,ymm7,ymm1 vpsrld ymm8,ymm9,2 vpxor ymm7,ymm7,ymm2 prefetcht0 [63+r14] vpslld ymm1,ymm9,30 vpxor ymm0,ymm0,ymm4 vpxor ymm4,ymm10,ymm9 prefetcht0 [63+r15] vpxor ymm8,ymm8,ymm1 vpaddd ymm5,ymm5,ymm7 vpsrld ymm1,ymm9,13 prefetcht0 [63+r8] vpslld ymm2,ymm9,19 vpaddd ymm5,ymm5,ymm0 vpand ymm3,ymm3,ymm4 prefetcht0 [63+r9] vpxor ymm7,ymm8,ymm1 vpsrld ymm1,ymm9,22 vpxor ymm7,ymm7,ymm2 prefetcht0 [63+r10] vpslld ymm2,ymm9,10 vpxor ymm8,ymm10,ymm3 vpaddd ymm12,ymm12,ymm5 prefetcht0 [63+r11] vpxor ymm7,ymm7,ymm1 vpxor ymm7,ymm7,ymm2 vpaddd ymm8,ymm8,ymm5 vpaddd ymm8,ymm8,ymm7 add rbp,256 vmovdqu ymm5,YMMWORD[((0-128))+rax] mov ecx,3 jmp NEAR $L$oop_16_xx_avx2 ALIGN 32 $L$oop_16_xx_avx2: vmovdqu ymm6,YMMWORD[((32-128))+rax] vpaddd ymm5,ymm5,YMMWORD[((288-256-128))+rbx] vpsrld ymm7,ymm6,3 vpsrld ymm1,ymm6,7 vpslld ymm2,ymm6,25 vpxor ymm7,ymm7,ymm1 vpsrld ymm1,ymm6,18 vpxor ymm7,ymm7,ymm2 vpslld ymm2,ymm6,14 vmovdqu ymm0,YMMWORD[((448-256-128))+rbx] vpsrld ymm3,ymm0,10 vpxor ymm7,ymm7,ymm1 vpsrld ymm1,ymm0,17 vpxor ymm7,ymm7,ymm2 vpslld ymm2,ymm0,15 vpaddd ymm5,ymm5,ymm7 vpxor ymm7,ymm3,ymm1 vpsrld ymm1,ymm0,19 vpxor ymm7,ymm7,ymm2 vpslld ymm2,ymm0,13 vpxor ymm7,ymm7,ymm1 vpxor ymm7,ymm7,ymm2 vpaddd ymm5,ymm5,ymm7 vpsrld ymm7,ymm12,6 vpslld ymm2,ymm12,26 vmovdqu YMMWORD[(0-128)+rax],ymm5 vpaddd ymm5,ymm5,ymm15 vpsrld ymm1,ymm12,11 vpxor ymm7,ymm7,ymm2 vpslld ymm2,ymm12,21 vpaddd ymm5,ymm5,YMMWORD[((-128))+rbp] vpxor ymm7,ymm7,ymm1 vpsrld ymm1,ymm12,25 vpxor ymm7,ymm7,ymm2 vpslld ymm2,ymm12,7 vpandn ymm0,ymm12,ymm14 vpand ymm3,ymm12,ymm13 vpxor ymm7,ymm7,ymm1 vpsrld ymm15,ymm8,2 vpxor ymm7,ymm7,ymm2 vpslld ymm1,ymm8,30 vpxor ymm0,ymm0,ymm3 vpxor ymm3,ymm9,ymm8 vpxor ymm15,ymm15,ymm1 vpaddd ymm5,ymm5,ymm7 vpsrld ymm1,ymm8,13 vpslld ymm2,ymm8,19 vpaddd ymm5,ymm5,ymm0 vpand ymm4,ymm4,ymm3 vpxor ymm7,ymm15,ymm1 vpsrld ymm1,ymm8,22 vpxor ymm7,ymm7,ymm2 vpslld ymm2,ymm8,10 vpxor ymm15,ymm9,ymm4 vpaddd ymm11,ymm11,ymm5 vpxor ymm7,ymm7,ymm1 vpxor ymm7,ymm7,ymm2 vpaddd ymm15,ymm15,ymm5 vpaddd ymm15,ymm15,ymm7 vmovdqu ymm5,YMMWORD[((64-128))+rax] vpaddd ymm6,ymm6,YMMWORD[((320-256-128))+rbx] vpsrld ymm7,ymm5,3 vpsrld ymm1,ymm5,7 vpslld ymm2,ymm5,25 vpxor ymm7,ymm7,ymm1 vpsrld ymm1,ymm5,18 vpxor ymm7,ymm7,ymm2 vpslld ymm2,ymm5,14 vmovdqu ymm0,YMMWORD[((480-256-128))+rbx] vpsrld ymm4,ymm0,10 vpxor ymm7,ymm7,ymm1 vpsrld ymm1,ymm0,17 vpxor ymm7,ymm7,ymm2 vpslld ymm2,ymm0,15 vpaddd ymm6,ymm6,ymm7 vpxor ymm7,ymm4,ymm1 vpsrld ymm1,ymm0,19 vpxor ymm7,ymm7,ymm2 vpslld ymm2,ymm0,13 vpxor ymm7,ymm7,ymm1 vpxor ymm7,ymm7,ymm2 vpaddd ymm6,ymm6,ymm7 vpsrld ymm7,ymm11,6 vpslld ymm2,ymm11,26 vmovdqu YMMWORD[(32-128)+rax],ymm6 vpaddd ymm6,ymm6,ymm14 vpsrld ymm1,ymm11,11 vpxor ymm7,ymm7,ymm2 vpslld ymm2,ymm11,21 vpaddd ymm6,ymm6,YMMWORD[((-96))+rbp] vpxor ymm7,ymm7,ymm1 vpsrld ymm1,ymm11,25 vpxor ymm7,ymm7,ymm2 vpslld ymm2,ymm11,7 vpandn ymm0,ymm11,ymm13 vpand ymm4,ymm11,ymm12 vpxor ymm7,ymm7,ymm1 vpsrld ymm14,ymm15,2 vpxor ymm7,ymm7,ymm2 vpslld ymm1,ymm15,30 vpxor ymm0,ymm0,ymm4 vpxor ymm4,ymm8,ymm15 vpxor ymm14,ymm14,ymm1 vpaddd ymm6,ymm6,ymm7 vpsrld ymm1,ymm15,13 vpslld ymm2,ymm15,19 vpaddd ymm6,ymm6,ymm0 vpand ymm3,ymm3,ymm4 vpxor ymm7,ymm14,ymm1 vpsrld ymm1,ymm15,22 vpxor ymm7,ymm7,ymm2 vpslld ymm2,ymm15,10 vpxor ymm14,ymm8,ymm3 vpaddd ymm10,ymm10,ymm6 vpxor ymm7,ymm7,ymm1 vpxor ymm7,ymm7,ymm2 vpaddd ymm14,ymm14,ymm6 vpaddd ymm14,ymm14,ymm7 vmovdqu ymm6,YMMWORD[((96-128))+rax] vpaddd ymm5,ymm5,YMMWORD[((352-256-128))+rbx] vpsrld ymm7,ymm6,3 vpsrld ymm1,ymm6,7 vpslld ymm2,ymm6,25 vpxor ymm7,ymm7,ymm1 vpsrld ymm1,ymm6,18 vpxor ymm7,ymm7,ymm2 vpslld ymm2,ymm6,14 vmovdqu ymm0,YMMWORD[((0-128))+rax] vpsrld ymm3,ymm0,10 vpxor ymm7,ymm7,ymm1 vpsrld ymm1,ymm0,17 vpxor ymm7,ymm7,ymm2 vpslld ymm2,ymm0,15 vpaddd ymm5,ymm5,ymm7 vpxor ymm7,ymm3,ymm1 vpsrld ymm1,ymm0,19 vpxor ymm7,ymm7,ymm2 vpslld ymm2,ymm0,13 vpxor ymm7,ymm7,ymm1 vpxor ymm7,ymm7,ymm2 vpaddd ymm5,ymm5,ymm7 vpsrld ymm7,ymm10,6 vpslld ymm2,ymm10,26 vmovdqu YMMWORD[(64-128)+rax],ymm5 vpaddd ymm5,ymm5,ymm13 vpsrld ymm1,ymm10,11 vpxor ymm7,ymm7,ymm2 vpslld ymm2,ymm10,21 vpaddd ymm5,ymm5,YMMWORD[((-64))+rbp] vpxor ymm7,ymm7,ymm1 vpsrld ymm1,ymm10,25 vpxor ymm7,ymm7,ymm2 vpslld ymm2,ymm10,7 vpandn ymm0,ymm10,ymm12 vpand ymm3,ymm10,ymm11 vpxor ymm7,ymm7,ymm1 vpsrld ymm13,ymm14,2 vpxor ymm7,ymm7,ymm2 vpslld ymm1,ymm14,30 vpxor ymm0,ymm0,ymm3 vpxor ymm3,ymm15,ymm14 vpxor ymm13,ymm13,ymm1 vpaddd ymm5,ymm5,ymm7 vpsrld ymm1,ymm14,13 vpslld ymm2,ymm14,19 vpaddd ymm5,ymm5,ymm0 vpand ymm4,ymm4,ymm3 vpxor ymm7,ymm13,ymm1 vpsrld ymm1,ymm14,22 vpxor ymm7,ymm7,ymm2 vpslld ymm2,ymm14,10 vpxor ymm13,ymm15,ymm4 vpaddd ymm9,ymm9,ymm5 vpxor ymm7,ymm7,ymm1 vpxor ymm7,ymm7,ymm2 vpaddd ymm13,ymm13,ymm5 vpaddd ymm13,ymm13,ymm7 vmovdqu ymm5,YMMWORD[((128-128))+rax] vpaddd ymm6,ymm6,YMMWORD[((384-256-128))+rbx] vpsrld ymm7,ymm5,3 vpsrld ymm1,ymm5,7 vpslld ymm2,ymm5,25 vpxor ymm7,ymm7,ymm1 vpsrld ymm1,ymm5,18 vpxor ymm7,ymm7,ymm2 vpslld ymm2,ymm5,14 vmovdqu ymm0,YMMWORD[((32-128))+rax] vpsrld ymm4,ymm0,10 vpxor ymm7,ymm7,ymm1 vpsrld ymm1,ymm0,17 vpxor ymm7,ymm7,ymm2 vpslld ymm2,ymm0,15 vpaddd ymm6,ymm6,ymm7 vpxor ymm7,ymm4,ymm1 vpsrld ymm1,ymm0,19 vpxor ymm7,ymm7,ymm2 vpslld ymm2,ymm0,13 vpxor ymm7,ymm7,ymm1 vpxor ymm7,ymm7,ymm2 vpaddd ymm6,ymm6,ymm7 vpsrld ymm7,ymm9,6 vpslld ymm2,ymm9,26 vmovdqu YMMWORD[(96-128)+rax],ymm6 vpaddd ymm6,ymm6,ymm12 vpsrld ymm1,ymm9,11 vpxor ymm7,ymm7,ymm2 vpslld ymm2,ymm9,21 vpaddd ymm6,ymm6,YMMWORD[((-32))+rbp] vpxor ymm7,ymm7,ymm1 vpsrld ymm1,ymm9,25 vpxor ymm7,ymm7,ymm2 vpslld ymm2,ymm9,7 vpandn ymm0,ymm9,ymm11 vpand ymm4,ymm9,ymm10 vpxor ymm7,ymm7,ymm1 vpsrld ymm12,ymm13,2 vpxor ymm7,ymm7,ymm2 vpslld ymm1,ymm13,30 vpxor ymm0,ymm0,ymm4 vpxor ymm4,ymm14,ymm13 vpxor ymm12,ymm12,ymm1 vpaddd ymm6,ymm6,ymm7 vpsrld ymm1,ymm13,13 vpslld ymm2,ymm13,19 vpaddd ymm6,ymm6,ymm0 vpand ymm3,ymm3,ymm4 vpxor ymm7,ymm12,ymm1 vpsrld ymm1,ymm13,22 vpxor ymm7,ymm7,ymm2 vpslld ymm2,ymm13,10 vpxor ymm12,ymm14,ymm3 vpaddd ymm8,ymm8,ymm6 vpxor ymm7,ymm7,ymm1 vpxor ymm7,ymm7,ymm2 vpaddd ymm12,ymm12,ymm6 vpaddd ymm12,ymm12,ymm7 vmovdqu ymm6,YMMWORD[((160-128))+rax] vpaddd ymm5,ymm5,YMMWORD[((416-256-128))+rbx] vpsrld ymm7,ymm6,3 vpsrld ymm1,ymm6,7 vpslld ymm2,ymm6,25 vpxor ymm7,ymm7,ymm1 vpsrld ymm1,ymm6,18 vpxor ymm7,ymm7,ymm2 vpslld ymm2,ymm6,14 vmovdqu ymm0,YMMWORD[((64-128))+rax] vpsrld ymm3,ymm0,10 vpxor ymm7,ymm7,ymm1 vpsrld ymm1,ymm0,17 vpxor ymm7,ymm7,ymm2 vpslld ymm2,ymm0,15 vpaddd ymm5,ymm5,ymm7 vpxor ymm7,ymm3,ymm1 vpsrld ymm1,ymm0,19 vpxor ymm7,ymm7,ymm2 vpslld ymm2,ymm0,13 vpxor ymm7,ymm7,ymm1 vpxor ymm7,ymm7,ymm2 vpaddd ymm5,ymm5,ymm7 vpsrld ymm7,ymm8,6 vpslld ymm2,ymm8,26 vmovdqu YMMWORD[(128-128)+rax],ymm5 vpaddd ymm5,ymm5,ymm11 vpsrld ymm1,ymm8,11 vpxor ymm7,ymm7,ymm2 vpslld ymm2,ymm8,21 vpaddd ymm5,ymm5,YMMWORD[rbp] vpxor ymm7,ymm7,ymm1 vpsrld ymm1,ymm8,25 vpxor ymm7,ymm7,ymm2 vpslld ymm2,ymm8,7 vpandn ymm0,ymm8,ymm10 vpand ymm3,ymm8,ymm9 vpxor ymm7,ymm7,ymm1 vpsrld ymm11,ymm12,2 vpxor ymm7,ymm7,ymm2 vpslld ymm1,ymm12,30 vpxor ymm0,ymm0,ymm3 vpxor ymm3,ymm13,ymm12 vpxor ymm11,ymm11,ymm1 vpaddd ymm5,ymm5,ymm7 vpsrld ymm1,ymm12,13 vpslld ymm2,ymm12,19 vpaddd ymm5,ymm5,ymm0 vpand ymm4,ymm4,ymm3 vpxor ymm7,ymm11,ymm1 vpsrld ymm1,ymm12,22 vpxor ymm7,ymm7,ymm2 vpslld ymm2,ymm12,10 vpxor ymm11,ymm13,ymm4 vpaddd ymm15,ymm15,ymm5 vpxor ymm7,ymm7,ymm1 vpxor ymm7,ymm7,ymm2 vpaddd ymm11,ymm11,ymm5 vpaddd ymm11,ymm11,ymm7 vmovdqu ymm5,YMMWORD[((192-128))+rax] vpaddd ymm6,ymm6,YMMWORD[((448-256-128))+rbx] vpsrld ymm7,ymm5,3 vpsrld ymm1,ymm5,7 vpslld ymm2,ymm5,25 vpxor ymm7,ymm7,ymm1 vpsrld ymm1,ymm5,18 vpxor ymm7,ymm7,ymm2 vpslld ymm2,ymm5,14 vmovdqu ymm0,YMMWORD[((96-128))+rax] vpsrld ymm4,ymm0,10 vpxor ymm7,ymm7,ymm1 vpsrld ymm1,ymm0,17 vpxor ymm7,ymm7,ymm2 vpslld ymm2,ymm0,15 vpaddd ymm6,ymm6,ymm7 vpxor ymm7,ymm4,ymm1 vpsrld ymm1,ymm0,19 vpxor ymm7,ymm7,ymm2 vpslld ymm2,ymm0,13 vpxor ymm7,ymm7,ymm1 vpxor ymm7,ymm7,ymm2 vpaddd ymm6,ymm6,ymm7 vpsrld ymm7,ymm15,6 vpslld ymm2,ymm15,26 vmovdqu YMMWORD[(160-128)+rax],ymm6 vpaddd ymm6,ymm6,ymm10 vpsrld ymm1,ymm15,11 vpxor ymm7,ymm7,ymm2 vpslld ymm2,ymm15,21 vpaddd ymm6,ymm6,YMMWORD[32+rbp] vpxor ymm7,ymm7,ymm1 vpsrld ymm1,ymm15,25 vpxor ymm7,ymm7,ymm2 vpslld ymm2,ymm15,7 vpandn ymm0,ymm15,ymm9 vpand ymm4,ymm15,ymm8 vpxor ymm7,ymm7,ymm1 vpsrld ymm10,ymm11,2 vpxor ymm7,ymm7,ymm2 vpslld ymm1,ymm11,30 vpxor ymm0,ymm0,ymm4 vpxor ymm4,ymm12,ymm11 vpxor ymm10,ymm10,ymm1 vpaddd ymm6,ymm6,ymm7 vpsrld ymm1,ymm11,13 vpslld ymm2,ymm11,19 vpaddd ymm6,ymm6,ymm0 vpand ymm3,ymm3,ymm4 vpxor ymm7,ymm10,ymm1 vpsrld ymm1,ymm11,22 vpxor ymm7,ymm7,ymm2 vpslld ymm2,ymm11,10 vpxor ymm10,ymm12,ymm3 vpaddd ymm14,ymm14,ymm6 vpxor ymm7,ymm7,ymm1 vpxor ymm7,ymm7,ymm2 vpaddd ymm10,ymm10,ymm6 vpaddd ymm10,ymm10,ymm7 vmovdqu ymm6,YMMWORD[((224-128))+rax] vpaddd ymm5,ymm5,YMMWORD[((480-256-128))+rbx] vpsrld ymm7,ymm6,3 vpsrld ymm1,ymm6,7 vpslld ymm2,ymm6,25 vpxor ymm7,ymm7,ymm1 vpsrld ymm1,ymm6,18 vpxor ymm7,ymm7,ymm2 vpslld ymm2,ymm6,14 vmovdqu ymm0,YMMWORD[((128-128))+rax] vpsrld ymm3,ymm0,10 vpxor ymm7,ymm7,ymm1 vpsrld ymm1,ymm0,17 vpxor ymm7,ymm7,ymm2 vpslld ymm2,ymm0,15 vpaddd ymm5,ymm5,ymm7 vpxor ymm7,ymm3,ymm1 vpsrld ymm1,ymm0,19 vpxor ymm7,ymm7,ymm2 vpslld ymm2,ymm0,13 vpxor ymm7,ymm7,ymm1 vpxor ymm7,ymm7,ymm2 vpaddd ymm5,ymm5,ymm7 vpsrld ymm7,ymm14,6 vpslld ymm2,ymm14,26 vmovdqu YMMWORD[(192-128)+rax],ymm5 vpaddd ymm5,ymm5,ymm9 vpsrld ymm1,ymm14,11 vpxor ymm7,ymm7,ymm2 vpslld ymm2,ymm14,21 vpaddd ymm5,ymm5,YMMWORD[64+rbp] vpxor ymm7,ymm7,ymm1 vpsrld ymm1,ymm14,25 vpxor ymm7,ymm7,ymm2 vpslld ymm2,ymm14,7 vpandn ymm0,ymm14,ymm8 vpand ymm3,ymm14,ymm15 vpxor ymm7,ymm7,ymm1 vpsrld ymm9,ymm10,2 vpxor ymm7,ymm7,ymm2 vpslld ymm1,ymm10,30 vpxor ymm0,ymm0,ymm3 vpxor ymm3,ymm11,ymm10 vpxor ymm9,ymm9,ymm1 vpaddd ymm5,ymm5,ymm7 vpsrld ymm1,ymm10,13 vpslld ymm2,ymm10,19 vpaddd ymm5,ymm5,ymm0 vpand ymm4,ymm4,ymm3 vpxor ymm7,ymm9,ymm1 vpsrld ymm1,ymm10,22 vpxor ymm7,ymm7,ymm2 vpslld ymm2,ymm10,10 vpxor ymm9,ymm11,ymm4 vpaddd ymm13,ymm13,ymm5 vpxor ymm7,ymm7,ymm1 vpxor ymm7,ymm7,ymm2 vpaddd ymm9,ymm9,ymm5 vpaddd ymm9,ymm9,ymm7 vmovdqu ymm5,YMMWORD[((256-256-128))+rbx] vpaddd ymm6,ymm6,YMMWORD[((0-128))+rax] vpsrld ymm7,ymm5,3 vpsrld ymm1,ymm5,7 vpslld ymm2,ymm5,25 vpxor ymm7,ymm7,ymm1 vpsrld ymm1,ymm5,18 vpxor ymm7,ymm7,ymm2 vpslld ymm2,ymm5,14 vmovdqu ymm0,YMMWORD[((160-128))+rax] vpsrld ymm4,ymm0,10 vpxor ymm7,ymm7,ymm1 vpsrld ymm1,ymm0,17 vpxor ymm7,ymm7,ymm2 vpslld ymm2,ymm0,15 vpaddd ymm6,ymm6,ymm7 vpxor ymm7,ymm4,ymm1 vpsrld ymm1,ymm0,19 vpxor ymm7,ymm7,ymm2 vpslld ymm2,ymm0,13 vpxor ymm7,ymm7,ymm1 vpxor ymm7,ymm7,ymm2 vpaddd ymm6,ymm6,ymm7 vpsrld ymm7,ymm13,6 vpslld ymm2,ymm13,26 vmovdqu YMMWORD[(224-128)+rax],ymm6 vpaddd ymm6,ymm6,ymm8 vpsrld ymm1,ymm13,11 vpxor ymm7,ymm7,ymm2 vpslld ymm2,ymm13,21 vpaddd ymm6,ymm6,YMMWORD[96+rbp] vpxor ymm7,ymm7,ymm1 vpsrld ymm1,ymm13,25 vpxor ymm7,ymm7,ymm2 vpslld ymm2,ymm13,7 vpandn ymm0,ymm13,ymm15 vpand ymm4,ymm13,ymm14 vpxor ymm7,ymm7,ymm1 vpsrld ymm8,ymm9,2 vpxor ymm7,ymm7,ymm2 vpslld ymm1,ymm9,30 vpxor ymm0,ymm0,ymm4 vpxor ymm4,ymm10,ymm9 vpxor ymm8,ymm8,ymm1 vpaddd ymm6,ymm6,ymm7 vpsrld ymm1,ymm9,13 vpslld ymm2,ymm9,19 vpaddd ymm6,ymm6,ymm0 vpand ymm3,ymm3,ymm4 vpxor ymm7,ymm8,ymm1 vpsrld ymm1,ymm9,22 vpxor ymm7,ymm7,ymm2 vpslld ymm2,ymm9,10 vpxor ymm8,ymm10,ymm3 vpaddd ymm12,ymm12,ymm6 vpxor ymm7,ymm7,ymm1 vpxor ymm7,ymm7,ymm2 vpaddd ymm8,ymm8,ymm6 vpaddd ymm8,ymm8,ymm7 add rbp,256 vmovdqu ymm6,YMMWORD[((288-256-128))+rbx] vpaddd ymm5,ymm5,YMMWORD[((32-128))+rax] vpsrld ymm7,ymm6,3 vpsrld ymm1,ymm6,7 vpslld ymm2,ymm6,25 vpxor ymm7,ymm7,ymm1 vpsrld ymm1,ymm6,18 vpxor ymm7,ymm7,ymm2 vpslld ymm2,ymm6,14 vmovdqu ymm0,YMMWORD[((192-128))+rax] vpsrld ymm3,ymm0,10 vpxor ymm7,ymm7,ymm1 vpsrld ymm1,ymm0,17 vpxor ymm7,ymm7,ymm2 vpslld ymm2,ymm0,15 vpaddd ymm5,ymm5,ymm7 vpxor ymm7,ymm3,ymm1 vpsrld ymm1,ymm0,19 vpxor ymm7,ymm7,ymm2 vpslld ymm2,ymm0,13 vpxor ymm7,ymm7,ymm1 vpxor ymm7,ymm7,ymm2 vpaddd ymm5,ymm5,ymm7 vpsrld ymm7,ymm12,6 vpslld ymm2,ymm12,26 vmovdqu YMMWORD[(256-256-128)+rbx],ymm5 vpaddd ymm5,ymm5,ymm15 vpsrld ymm1,ymm12,11 vpxor ymm7,ymm7,ymm2 vpslld ymm2,ymm12,21 vpaddd ymm5,ymm5,YMMWORD[((-128))+rbp] vpxor ymm7,ymm7,ymm1 vpsrld ymm1,ymm12,25 vpxor ymm7,ymm7,ymm2 vpslld ymm2,ymm12,7 vpandn ymm0,ymm12,ymm14 vpand ymm3,ymm12,ymm13 vpxor ymm7,ymm7,ymm1 vpsrld ymm15,ymm8,2 vpxor ymm7,ymm7,ymm2 vpslld ymm1,ymm8,30 vpxor ymm0,ymm0,ymm3 vpxor ymm3,ymm9,ymm8 vpxor ymm15,ymm15,ymm1 vpaddd ymm5,ymm5,ymm7 vpsrld ymm1,ymm8,13 vpslld ymm2,ymm8,19 vpaddd ymm5,ymm5,ymm0 vpand ymm4,ymm4,ymm3 vpxor ymm7,ymm15,ymm1 vpsrld ymm1,ymm8,22 vpxor ymm7,ymm7,ymm2 vpslld ymm2,ymm8,10 vpxor ymm15,ymm9,ymm4 vpaddd ymm11,ymm11,ymm5 vpxor ymm7,ymm7,ymm1 vpxor ymm7,ymm7,ymm2 vpaddd ymm15,ymm15,ymm5 vpaddd ymm15,ymm15,ymm7 vmovdqu ymm5,YMMWORD[((320-256-128))+rbx] vpaddd ymm6,ymm6,YMMWORD[((64-128))+rax] vpsrld ymm7,ymm5,3 vpsrld ymm1,ymm5,7 vpslld ymm2,ymm5,25 vpxor ymm7,ymm7,ymm1 vpsrld ymm1,ymm5,18 vpxor ymm7,ymm7,ymm2 vpslld ymm2,ymm5,14 vmovdqu ymm0,YMMWORD[((224-128))+rax] vpsrld ymm4,ymm0,10 vpxor ymm7,ymm7,ymm1 vpsrld ymm1,ymm0,17 vpxor ymm7,ymm7,ymm2 vpslld ymm2,ymm0,15 vpaddd ymm6,ymm6,ymm7 vpxor ymm7,ymm4,ymm1 vpsrld ymm1,ymm0,19 vpxor ymm7,ymm7,ymm2 vpslld ymm2,ymm0,13 vpxor ymm7,ymm7,ymm1 vpxor ymm7,ymm7,ymm2 vpaddd ymm6,ymm6,ymm7 vpsrld ymm7,ymm11,6 vpslld ymm2,ymm11,26 vmovdqu YMMWORD[(288-256-128)+rbx],ymm6 vpaddd ymm6,ymm6,ymm14 vpsrld ymm1,ymm11,11 vpxor ymm7,ymm7,ymm2 vpslld ymm2,ymm11,21 vpaddd ymm6,ymm6,YMMWORD[((-96))+rbp] vpxor ymm7,ymm7,ymm1 vpsrld ymm1,ymm11,25 vpxor ymm7,ymm7,ymm2 vpslld ymm2,ymm11,7 vpandn ymm0,ymm11,ymm13 vpand ymm4,ymm11,ymm12 vpxor ymm7,ymm7,ymm1 vpsrld ymm14,ymm15,2 vpxor ymm7,ymm7,ymm2 vpslld ymm1,ymm15,30 vpxor ymm0,ymm0,ymm4 vpxor ymm4,ymm8,ymm15 vpxor ymm14,ymm14,ymm1 vpaddd ymm6,ymm6,ymm7 vpsrld ymm1,ymm15,13 vpslld ymm2,ymm15,19 vpaddd ymm6,ymm6,ymm0 vpand ymm3,ymm3,ymm4 vpxor ymm7,ymm14,ymm1 vpsrld ymm1,ymm15,22 vpxor ymm7,ymm7,ymm2 vpslld ymm2,ymm15,10 vpxor ymm14,ymm8,ymm3 vpaddd ymm10,ymm10,ymm6 vpxor ymm7,ymm7,ymm1 vpxor ymm7,ymm7,ymm2 vpaddd ymm14,ymm14,ymm6 vpaddd ymm14,ymm14,ymm7 vmovdqu ymm6,YMMWORD[((352-256-128))+rbx] vpaddd ymm5,ymm5,YMMWORD[((96-128))+rax] vpsrld ymm7,ymm6,3 vpsrld ymm1,ymm6,7 vpslld ymm2,ymm6,25 vpxor ymm7,ymm7,ymm1 vpsrld ymm1,ymm6,18 vpxor ymm7,ymm7,ymm2 vpslld ymm2,ymm6,14 vmovdqu ymm0,YMMWORD[((256-256-128))+rbx] vpsrld ymm3,ymm0,10 vpxor ymm7,ymm7,ymm1 vpsrld ymm1,ymm0,17 vpxor ymm7,ymm7,ymm2 vpslld ymm2,ymm0,15 vpaddd ymm5,ymm5,ymm7 vpxor ymm7,ymm3,ymm1 vpsrld ymm1,ymm0,19 vpxor ymm7,ymm7,ymm2 vpslld ymm2,ymm0,13 vpxor ymm7,ymm7,ymm1 vpxor ymm7,ymm7,ymm2 vpaddd ymm5,ymm5,ymm7 vpsrld ymm7,ymm10,6 vpslld ymm2,ymm10,26 vmovdqu YMMWORD[(320-256-128)+rbx],ymm5 vpaddd ymm5,ymm5,ymm13 vpsrld ymm1,ymm10,11 vpxor ymm7,ymm7,ymm2 vpslld ymm2,ymm10,21 vpaddd ymm5,ymm5,YMMWORD[((-64))+rbp] vpxor ymm7,ymm7,ymm1 vpsrld ymm1,ymm10,25 vpxor ymm7,ymm7,ymm2 vpslld ymm2,ymm10,7 vpandn ymm0,ymm10,ymm12 vpand ymm3,ymm10,ymm11 vpxor ymm7,ymm7,ymm1 vpsrld ymm13,ymm14,2 vpxor ymm7,ymm7,ymm2 vpslld ymm1,ymm14,30 vpxor ymm0,ymm0,ymm3 vpxor ymm3,ymm15,ymm14 vpxor ymm13,ymm13,ymm1 vpaddd ymm5,ymm5,ymm7 vpsrld ymm1,ymm14,13 vpslld ymm2,ymm14,19 vpaddd ymm5,ymm5,ymm0 vpand ymm4,ymm4,ymm3 vpxor ymm7,ymm13,ymm1 vpsrld ymm1,ymm14,22 vpxor ymm7,ymm7,ymm2 vpslld ymm2,ymm14,10 vpxor ymm13,ymm15,ymm4 vpaddd ymm9,ymm9,ymm5 vpxor ymm7,ymm7,ymm1 vpxor ymm7,ymm7,ymm2 vpaddd ymm13,ymm13,ymm5 vpaddd ymm13,ymm13,ymm7 vmovdqu ymm5,YMMWORD[((384-256-128))+rbx] vpaddd ymm6,ymm6,YMMWORD[((128-128))+rax] vpsrld ymm7,ymm5,3 vpsrld ymm1,ymm5,7 vpslld ymm2,ymm5,25 vpxor ymm7,ymm7,ymm1 vpsrld ymm1,ymm5,18 vpxor ymm7,ymm7,ymm2 vpslld ymm2,ymm5,14 vmovdqu ymm0,YMMWORD[((288-256-128))+rbx] vpsrld ymm4,ymm0,10 vpxor ymm7,ymm7,ymm1 vpsrld ymm1,ymm0,17 vpxor ymm7,ymm7,ymm2 vpslld ymm2,ymm0,15 vpaddd ymm6,ymm6,ymm7 vpxor ymm7,ymm4,ymm1 vpsrld ymm1,ymm0,19 vpxor ymm7,ymm7,ymm2 vpslld ymm2,ymm0,13 vpxor ymm7,ymm7,ymm1 vpxor ymm7,ymm7,ymm2 vpaddd ymm6,ymm6,ymm7 vpsrld ymm7,ymm9,6 vpslld ymm2,ymm9,26 vmovdqu YMMWORD[(352-256-128)+rbx],ymm6 vpaddd ymm6,ymm6,ymm12 vpsrld ymm1,ymm9,11 vpxor ymm7,ymm7,ymm2 vpslld ymm2,ymm9,21 vpaddd ymm6,ymm6,YMMWORD[((-32))+rbp] vpxor ymm7,ymm7,ymm1 vpsrld ymm1,ymm9,25 vpxor ymm7,ymm7,ymm2 vpslld ymm2,ymm9,7 vpandn ymm0,ymm9,ymm11 vpand ymm4,ymm9,ymm10 vpxor ymm7,ymm7,ymm1 vpsrld ymm12,ymm13,2 vpxor ymm7,ymm7,ymm2 vpslld ymm1,ymm13,30 vpxor ymm0,ymm0,ymm4 vpxor ymm4,ymm14,ymm13 vpxor ymm12,ymm12,ymm1 vpaddd ymm6,ymm6,ymm7 vpsrld ymm1,ymm13,13 vpslld ymm2,ymm13,19 vpaddd ymm6,ymm6,ymm0 vpand ymm3,ymm3,ymm4 vpxor ymm7,ymm12,ymm1 vpsrld ymm1,ymm13,22 vpxor ymm7,ymm7,ymm2 vpslld ymm2,ymm13,10 vpxor ymm12,ymm14,ymm3 vpaddd ymm8,ymm8,ymm6 vpxor ymm7,ymm7,ymm1 vpxor ymm7,ymm7,ymm2 vpaddd ymm12,ymm12,ymm6 vpaddd ymm12,ymm12,ymm7 vmovdqu ymm6,YMMWORD[((416-256-128))+rbx] vpaddd ymm5,ymm5,YMMWORD[((160-128))+rax] vpsrld ymm7,ymm6,3 vpsrld ymm1,ymm6,7 vpslld ymm2,ymm6,25 vpxor ymm7,ymm7,ymm1 vpsrld ymm1,ymm6,18 vpxor ymm7,ymm7,ymm2 vpslld ymm2,ymm6,14 vmovdqu ymm0,YMMWORD[((320-256-128))+rbx] vpsrld ymm3,ymm0,10 vpxor ymm7,ymm7,ymm1 vpsrld ymm1,ymm0,17 vpxor ymm7,ymm7,ymm2 vpslld ymm2,ymm0,15 vpaddd ymm5,ymm5,ymm7 vpxor ymm7,ymm3,ymm1 vpsrld ymm1,ymm0,19 vpxor ymm7,ymm7,ymm2 vpslld ymm2,ymm0,13 vpxor ymm7,ymm7,ymm1 vpxor ymm7,ymm7,ymm2 vpaddd ymm5,ymm5,ymm7 vpsrld ymm7,ymm8,6 vpslld ymm2,ymm8,26 vmovdqu YMMWORD[(384-256-128)+rbx],ymm5 vpaddd ymm5,ymm5,ymm11 vpsrld ymm1,ymm8,11 vpxor ymm7,ymm7,ymm2 vpslld ymm2,ymm8,21 vpaddd ymm5,ymm5,YMMWORD[rbp] vpxor ymm7,ymm7,ymm1 vpsrld ymm1,ymm8,25 vpxor ymm7,ymm7,ymm2 vpslld ymm2,ymm8,7 vpandn ymm0,ymm8,ymm10 vpand ymm3,ymm8,ymm9 vpxor ymm7,ymm7,ymm1 vpsrld ymm11,ymm12,2 vpxor ymm7,ymm7,ymm2 vpslld ymm1,ymm12,30 vpxor ymm0,ymm0,ymm3 vpxor ymm3,ymm13,ymm12 vpxor ymm11,ymm11,ymm1 vpaddd ymm5,ymm5,ymm7 vpsrld ymm1,ymm12,13 vpslld ymm2,ymm12,19 vpaddd ymm5,ymm5,ymm0 vpand ymm4,ymm4,ymm3 vpxor ymm7,ymm11,ymm1 vpsrld ymm1,ymm12,22 vpxor ymm7,ymm7,ymm2 vpslld ymm2,ymm12,10 vpxor ymm11,ymm13,ymm4 vpaddd ymm15,ymm15,ymm5 vpxor ymm7,ymm7,ymm1 vpxor ymm7,ymm7,ymm2 vpaddd ymm11,ymm11,ymm5 vpaddd ymm11,ymm11,ymm7 vmovdqu ymm5,YMMWORD[((448-256-128))+rbx] vpaddd ymm6,ymm6,YMMWORD[((192-128))+rax] vpsrld ymm7,ymm5,3 vpsrld ymm1,ymm5,7 vpslld ymm2,ymm5,25 vpxor ymm7,ymm7,ymm1 vpsrld ymm1,ymm5,18 vpxor ymm7,ymm7,ymm2 vpslld ymm2,ymm5,14 vmovdqu ymm0,YMMWORD[((352-256-128))+rbx] vpsrld ymm4,ymm0,10 vpxor ymm7,ymm7,ymm1 vpsrld ymm1,ymm0,17 vpxor ymm7,ymm7,ymm2 vpslld ymm2,ymm0,15 vpaddd ymm6,ymm6,ymm7 vpxor ymm7,ymm4,ymm1 vpsrld ymm1,ymm0,19 vpxor ymm7,ymm7,ymm2 vpslld ymm2,ymm0,13 vpxor ymm7,ymm7,ymm1 vpxor ymm7,ymm7,ymm2 vpaddd ymm6,ymm6,ymm7 vpsrld ymm7,ymm15,6 vpslld ymm2,ymm15,26 vmovdqu YMMWORD[(416-256-128)+rbx],ymm6 vpaddd ymm6,ymm6,ymm10 vpsrld ymm1,ymm15,11 vpxor ymm7,ymm7,ymm2 vpslld ymm2,ymm15,21 vpaddd ymm6,ymm6,YMMWORD[32+rbp] vpxor ymm7,ymm7,ymm1 vpsrld ymm1,ymm15,25 vpxor ymm7,ymm7,ymm2 vpslld ymm2,ymm15,7 vpandn ymm0,ymm15,ymm9 vpand ymm4,ymm15,ymm8 vpxor ymm7,ymm7,ymm1 vpsrld ymm10,ymm11,2 vpxor ymm7,ymm7,ymm2 vpslld ymm1,ymm11,30 vpxor ymm0,ymm0,ymm4 vpxor ymm4,ymm12,ymm11 vpxor ymm10,ymm10,ymm1 vpaddd ymm6,ymm6,ymm7 vpsrld ymm1,ymm11,13 vpslld ymm2,ymm11,19 vpaddd ymm6,ymm6,ymm0 vpand ymm3,ymm3,ymm4 vpxor ymm7,ymm10,ymm1 vpsrld ymm1,ymm11,22 vpxor ymm7,ymm7,ymm2 vpslld ymm2,ymm11,10 vpxor ymm10,ymm12,ymm3 vpaddd ymm14,ymm14,ymm6 vpxor ymm7,ymm7,ymm1 vpxor ymm7,ymm7,ymm2 vpaddd ymm10,ymm10,ymm6 vpaddd ymm10,ymm10,ymm7 vmovdqu ymm6,YMMWORD[((480-256-128))+rbx] vpaddd ymm5,ymm5,YMMWORD[((224-128))+rax] vpsrld ymm7,ymm6,3 vpsrld ymm1,ymm6,7 vpslld ymm2,ymm6,25 vpxor ymm7,ymm7,ymm1 vpsrld ymm1,ymm6,18 vpxor ymm7,ymm7,ymm2 vpslld ymm2,ymm6,14 vmovdqu ymm0,YMMWORD[((384-256-128))+rbx] vpsrld ymm3,ymm0,10 vpxor ymm7,ymm7,ymm1 vpsrld ymm1,ymm0,17 vpxor ymm7,ymm7,ymm2 vpslld ymm2,ymm0,15 vpaddd ymm5,ymm5,ymm7 vpxor ymm7,ymm3,ymm1 vpsrld ymm1,ymm0,19 vpxor ymm7,ymm7,ymm2 vpslld ymm2,ymm0,13 vpxor ymm7,ymm7,ymm1 vpxor ymm7,ymm7,ymm2 vpaddd ymm5,ymm5,ymm7 vpsrld ymm7,ymm14,6 vpslld ymm2,ymm14,26 vmovdqu YMMWORD[(448-256-128)+rbx],ymm5 vpaddd ymm5,ymm5,ymm9 vpsrld ymm1,ymm14,11 vpxor ymm7,ymm7,ymm2 vpslld ymm2,ymm14,21 vpaddd ymm5,ymm5,YMMWORD[64+rbp] vpxor ymm7,ymm7,ymm1 vpsrld ymm1,ymm14,25 vpxor ymm7,ymm7,ymm2 vpslld ymm2,ymm14,7 vpandn ymm0,ymm14,ymm8 vpand ymm3,ymm14,ymm15 vpxor ymm7,ymm7,ymm1 vpsrld ymm9,ymm10,2 vpxor ymm7,ymm7,ymm2 vpslld ymm1,ymm10,30 vpxor ymm0,ymm0,ymm3 vpxor ymm3,ymm11,ymm10 vpxor ymm9,ymm9,ymm1 vpaddd ymm5,ymm5,ymm7 vpsrld ymm1,ymm10,13 vpslld ymm2,ymm10,19 vpaddd ymm5,ymm5,ymm0 vpand ymm4,ymm4,ymm3 vpxor ymm7,ymm9,ymm1 vpsrld ymm1,ymm10,22 vpxor ymm7,ymm7,ymm2 vpslld ymm2,ymm10,10 vpxor ymm9,ymm11,ymm4 vpaddd ymm13,ymm13,ymm5 vpxor ymm7,ymm7,ymm1 vpxor ymm7,ymm7,ymm2 vpaddd ymm9,ymm9,ymm5 vpaddd ymm9,ymm9,ymm7 vmovdqu ymm5,YMMWORD[((0-128))+rax] vpaddd ymm6,ymm6,YMMWORD[((256-256-128))+rbx] vpsrld ymm7,ymm5,3 vpsrld ymm1,ymm5,7 vpslld ymm2,ymm5,25 vpxor ymm7,ymm7,ymm1 vpsrld ymm1,ymm5,18 vpxor ymm7,ymm7,ymm2 vpslld ymm2,ymm5,14 vmovdqu ymm0,YMMWORD[((416-256-128))+rbx] vpsrld ymm4,ymm0,10 vpxor ymm7,ymm7,ymm1 vpsrld ymm1,ymm0,17 vpxor ymm7,ymm7,ymm2 vpslld ymm2,ymm0,15 vpaddd ymm6,ymm6,ymm7 vpxor ymm7,ymm4,ymm1 vpsrld ymm1,ymm0,19 vpxor ymm7,ymm7,ymm2 vpslld ymm2,ymm0,13 vpxor ymm7,ymm7,ymm1 vpxor ymm7,ymm7,ymm2 vpaddd ymm6,ymm6,ymm7 vpsrld ymm7,ymm13,6 vpslld ymm2,ymm13,26 vmovdqu YMMWORD[(480-256-128)+rbx],ymm6 vpaddd ymm6,ymm6,ymm8 vpsrld ymm1,ymm13,11 vpxor ymm7,ymm7,ymm2 vpslld ymm2,ymm13,21 vpaddd ymm6,ymm6,YMMWORD[96+rbp] vpxor ymm7,ymm7,ymm1 vpsrld ymm1,ymm13,25 vpxor ymm7,ymm7,ymm2 vpslld ymm2,ymm13,7 vpandn ymm0,ymm13,ymm15 vpand ymm4,ymm13,ymm14 vpxor ymm7,ymm7,ymm1 vpsrld ymm8,ymm9,2 vpxor ymm7,ymm7,ymm2 vpslld ymm1,ymm9,30 vpxor ymm0,ymm0,ymm4 vpxor ymm4,ymm10,ymm9 vpxor ymm8,ymm8,ymm1 vpaddd ymm6,ymm6,ymm7 vpsrld ymm1,ymm9,13 vpslld ymm2,ymm9,19 vpaddd ymm6,ymm6,ymm0 vpand ymm3,ymm3,ymm4 vpxor ymm7,ymm8,ymm1 vpsrld ymm1,ymm9,22 vpxor ymm7,ymm7,ymm2 vpslld ymm2,ymm9,10 vpxor ymm8,ymm10,ymm3 vpaddd ymm12,ymm12,ymm6 vpxor ymm7,ymm7,ymm1 vpxor ymm7,ymm7,ymm2 vpaddd ymm8,ymm8,ymm6 vpaddd ymm8,ymm8,ymm7 add rbp,256 dec ecx jnz NEAR $L$oop_16_xx_avx2 mov ecx,1 lea rbx,[512+rsp] lea rbp,[((K256+128))] cmp ecx,DWORD[rbx] cmovge r12,rbp cmp ecx,DWORD[4+rbx] cmovge r13,rbp cmp ecx,DWORD[8+rbx] cmovge r14,rbp cmp ecx,DWORD[12+rbx] cmovge r15,rbp cmp ecx,DWORD[16+rbx] cmovge r8,rbp cmp ecx,DWORD[20+rbx] cmovge r9,rbp cmp ecx,DWORD[24+rbx] cmovge r10,rbp cmp ecx,DWORD[28+rbx] cmovge r11,rbp vmovdqa ymm7,YMMWORD[rbx] vpxor ymm0,ymm0,ymm0 vmovdqa ymm6,ymm7 vpcmpgtd ymm6,ymm6,ymm0 vpaddd ymm7,ymm7,ymm6 vmovdqu ymm0,YMMWORD[((0-128))+rdi] vpand ymm8,ymm8,ymm6 vmovdqu ymm1,YMMWORD[((32-128))+rdi] vpand ymm9,ymm9,ymm6 vmovdqu ymm2,YMMWORD[((64-128))+rdi] vpand ymm10,ymm10,ymm6 vmovdqu ymm5,YMMWORD[((96-128))+rdi] vpand ymm11,ymm11,ymm6 vpaddd ymm8,ymm8,ymm0 vmovdqu ymm0,YMMWORD[((128-128))+rdi] vpand ymm12,ymm12,ymm6 vpaddd ymm9,ymm9,ymm1 vmovdqu ymm1,YMMWORD[((160-128))+rdi] vpand ymm13,ymm13,ymm6 vpaddd ymm10,ymm10,ymm2 vmovdqu ymm2,YMMWORD[((192-128))+rdi] vpand ymm14,ymm14,ymm6 vpaddd ymm11,ymm11,ymm5 vmovdqu ymm5,YMMWORD[((224-128))+rdi] vpand ymm15,ymm15,ymm6 vpaddd ymm12,ymm12,ymm0 vpaddd ymm13,ymm13,ymm1 vmovdqu YMMWORD[(0-128)+rdi],ymm8 vpaddd ymm14,ymm14,ymm2 vmovdqu YMMWORD[(32-128)+rdi],ymm9 vpaddd ymm15,ymm15,ymm5 vmovdqu YMMWORD[(64-128)+rdi],ymm10 vmovdqu YMMWORD[(96-128)+rdi],ymm11 vmovdqu YMMWORD[(128-128)+rdi],ymm12 vmovdqu YMMWORD[(160-128)+rdi],ymm13 vmovdqu YMMWORD[(192-128)+rdi],ymm14 vmovdqu YMMWORD[(224-128)+rdi],ymm15 vmovdqu YMMWORD[rbx],ymm7 lea rbx,[((256+128))+rsp] vmovdqu ymm6,YMMWORD[$L$pbswap] dec edx jnz NEAR $L$oop_avx2 $L$done_avx2: mov rax,QWORD[544+rsp] vzeroupper movaps xmm6,XMMWORD[((-216))+rax] movaps xmm7,XMMWORD[((-200))+rax] movaps xmm8,XMMWORD[((-184))+rax] movaps xmm9,XMMWORD[((-168))+rax] movaps xmm10,XMMWORD[((-152))+rax] movaps xmm11,XMMWORD[((-136))+rax] movaps xmm12,XMMWORD[((-120))+rax] movaps xmm13,XMMWORD[((-104))+rax] movaps xmm14,XMMWORD[((-88))+rax] movaps xmm15,XMMWORD[((-72))+rax] mov r15,QWORD[((-48))+rax] mov r14,QWORD[((-40))+rax] mov r13,QWORD[((-32))+rax] mov r12,QWORD[((-24))+rax] mov rbp,QWORD[((-16))+rax] mov rbx,QWORD[((-8))+rax] lea rsp,[rax] $L$epilogue_avx2: mov rdi,QWORD[8+rsp] ;WIN64 epilogue mov rsi,QWORD[16+rsp] DB 0F3h,0C3h ;repret $L$SEH_end_sha256_multi_block_avx2: ALIGN 256 K256: DD 1116352408,1116352408,1116352408,1116352408 DD 1116352408,1116352408,1116352408,1116352408 DD 1899447441,1899447441,1899447441,1899447441 DD 1899447441,1899447441,1899447441,1899447441 DD 3049323471,3049323471,3049323471,3049323471 DD 3049323471,3049323471,3049323471,3049323471 DD 3921009573,3921009573,3921009573,3921009573 DD 3921009573,3921009573,3921009573,3921009573 DD 961987163,961987163,961987163,961987163 DD 961987163,961987163,961987163,961987163 DD 1508970993,1508970993,1508970993,1508970993 DD 1508970993,1508970993,1508970993,1508970993 DD 2453635748,2453635748,2453635748,2453635748 DD 2453635748,2453635748,2453635748,2453635748 DD 2870763221,2870763221,2870763221,2870763221 DD 2870763221,2870763221,2870763221,2870763221 DD 3624381080,3624381080,3624381080,3624381080 DD 3624381080,3624381080,3624381080,3624381080 DD 310598401,310598401,310598401,310598401 DD 310598401,310598401,310598401,310598401 DD 607225278,607225278,607225278,607225278 DD 607225278,607225278,607225278,607225278 DD 1426881987,1426881987,1426881987,1426881987 DD 1426881987,1426881987,1426881987,1426881987 DD 1925078388,1925078388,1925078388,1925078388 DD 1925078388,1925078388,1925078388,1925078388 DD 2162078206,2162078206,2162078206,2162078206 DD 2162078206,2162078206,2162078206,2162078206 DD 2614888103,2614888103,2614888103,2614888103 DD 2614888103,2614888103,2614888103,2614888103 DD 3248222580,3248222580,3248222580,3248222580 DD 3248222580,3248222580,3248222580,3248222580 DD 3835390401,3835390401,3835390401,3835390401 DD 3835390401,3835390401,3835390401,3835390401 DD 4022224774,4022224774,4022224774,4022224774 DD 4022224774,4022224774,4022224774,4022224774 DD 264347078,264347078,264347078,264347078 DD 264347078,264347078,264347078,264347078 DD 604807628,604807628,604807628,604807628 DD 604807628,604807628,604807628,604807628 DD 770255983,770255983,770255983,770255983 DD 770255983,770255983,770255983,770255983 DD 1249150122,1249150122,1249150122,1249150122 DD 1249150122,1249150122,1249150122,1249150122 DD 1555081692,1555081692,1555081692,1555081692 DD 1555081692,1555081692,1555081692,1555081692 DD 1996064986,1996064986,1996064986,1996064986 DD 1996064986,1996064986,1996064986,1996064986 DD 2554220882,2554220882,2554220882,2554220882 DD 2554220882,2554220882,2554220882,2554220882 DD 2821834349,2821834349,2821834349,2821834349 DD 2821834349,2821834349,2821834349,2821834349 DD 2952996808,2952996808,2952996808,2952996808 DD 2952996808,2952996808,2952996808,2952996808 DD 3210313671,3210313671,3210313671,3210313671 DD 3210313671,3210313671,3210313671,3210313671 DD 3336571891,3336571891,3336571891,3336571891 DD 3336571891,3336571891,3336571891,3336571891 DD 3584528711,3584528711,3584528711,3584528711 DD 3584528711,3584528711,3584528711,3584528711 DD 113926993,113926993,113926993,113926993 DD 113926993,113926993,113926993,113926993 DD 338241895,338241895,338241895,338241895 DD 338241895,338241895,338241895,338241895 DD 666307205,666307205,666307205,666307205 DD 666307205,666307205,666307205,666307205 DD 773529912,773529912,773529912,773529912 DD 773529912,773529912,773529912,773529912 DD 1294757372,1294757372,1294757372,1294757372 DD 1294757372,1294757372,1294757372,1294757372 DD 1396182291,1396182291,1396182291,1396182291 DD 1396182291,1396182291,1396182291,1396182291 DD 1695183700,1695183700,1695183700,1695183700 DD 1695183700,1695183700,1695183700,1695183700 DD 1986661051,1986661051,1986661051,1986661051 DD 1986661051,1986661051,1986661051,1986661051 DD 2177026350,2177026350,2177026350,2177026350 DD 2177026350,2177026350,2177026350,2177026350 DD 2456956037,2456956037,2456956037,2456956037 DD 2456956037,2456956037,2456956037,2456956037 DD 2730485921,2730485921,2730485921,2730485921 DD 2730485921,2730485921,2730485921,2730485921 DD 2820302411,2820302411,2820302411,2820302411 DD 2820302411,2820302411,2820302411,2820302411 DD 3259730800,3259730800,3259730800,3259730800 DD 3259730800,3259730800,3259730800,3259730800 DD 3345764771,3345764771,3345764771,3345764771 DD 3345764771,3345764771,3345764771,3345764771 DD 3516065817,3516065817,3516065817,3516065817 DD 3516065817,3516065817,3516065817,3516065817 DD 3600352804,3600352804,3600352804,3600352804 DD 3600352804,3600352804,3600352804,3600352804 DD 4094571909,4094571909,4094571909,4094571909 DD 4094571909,4094571909,4094571909,4094571909 DD 275423344,275423344,275423344,275423344 DD 275423344,275423344,275423344,275423344 DD 430227734,430227734,430227734,430227734 DD 430227734,430227734,430227734,430227734 DD 506948616,506948616,506948616,506948616 DD 506948616,506948616,506948616,506948616 DD 659060556,659060556,659060556,659060556 DD 659060556,659060556,659060556,659060556 DD 883997877,883997877,883997877,883997877 DD 883997877,883997877,883997877,883997877 DD 958139571,958139571,958139571,958139571 DD 958139571,958139571,958139571,958139571 DD 1322822218,1322822218,1322822218,1322822218 DD 1322822218,1322822218,1322822218,1322822218 DD 1537002063,1537002063,1537002063,1537002063 DD 1537002063,1537002063,1537002063,1537002063 DD 1747873779,1747873779,1747873779,1747873779 DD 1747873779,1747873779,1747873779,1747873779 DD 1955562222,1955562222,1955562222,1955562222 DD 1955562222,1955562222,1955562222,1955562222 DD 2024104815,2024104815,2024104815,2024104815 DD 2024104815,2024104815,2024104815,2024104815 DD 2227730452,2227730452,2227730452,2227730452 DD 2227730452,2227730452,2227730452,2227730452 DD 2361852424,2361852424,2361852424,2361852424 DD 2361852424,2361852424,2361852424,2361852424 DD 2428436474,2428436474,2428436474,2428436474 DD 2428436474,2428436474,2428436474,2428436474 DD 2756734187,2756734187,2756734187,2756734187 DD 2756734187,2756734187,2756734187,2756734187 DD 3204031479,3204031479,3204031479,3204031479 DD 3204031479,3204031479,3204031479,3204031479 DD 3329325298,3329325298,3329325298,3329325298 DD 3329325298,3329325298,3329325298,3329325298 $L$pbswap: DD 0x00010203,0x04050607,0x08090a0b,0x0c0d0e0f DD 0x00010203,0x04050607,0x08090a0b,0x0c0d0e0f K256_shaext: DD 0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5 DD 0x3956c25b,0x59f111f1,0x923f82a4,0xab1c5ed5 DD 0xd807aa98,0x12835b01,0x243185be,0x550c7dc3 DD 0x72be5d74,0x80deb1fe,0x9bdc06a7,0xc19bf174 DD 0xe49b69c1,0xefbe4786,0x0fc19dc6,0x240ca1cc DD 0x2de92c6f,0x4a7484aa,0x5cb0a9dc,0x76f988da DD 0x983e5152,0xa831c66d,0xb00327c8,0xbf597fc7 DD 0xc6e00bf3,0xd5a79147,0x06ca6351,0x14292967 DD 0x27b70a85,0x2e1b2138,0x4d2c6dfc,0x53380d13 DD 0x650a7354,0x766a0abb,0x81c2c92e,0x92722c85 DD 0xa2bfe8a1,0xa81a664b,0xc24b8b70,0xc76c51a3 DD 0xd192e819,0xd6990624,0xf40e3585,0x106aa070 DD 0x19a4c116,0x1e376c08,0x2748774c,0x34b0bcb5 DD 0x391c0cb3,0x4ed8aa4a,0x5b9cca4f,0x682e6ff3 DD 0x748f82ee,0x78a5636f,0x84c87814,0x8cc70208 DD 0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2 DB 83,72,65,50,53,54,32,109,117,108,116,105,45,98,108,111 DB 99,107,32,116,114,97,110,115,102,111,114,109,32,102,111,114 DB 32,120,56,54,95,54,52,44,32,67,82,89,80,84,79,71 DB 65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112 DB 101,110,115,115,108,46,111,114,103,62,0 EXTERN __imp_RtlVirtualUnwind ALIGN 16 se_handler: push rsi push rdi push rbx push rbp push r12 push r13 push r14 push r15 pushfq sub rsp,64 mov rax,QWORD[120+r8] mov rbx,QWORD[248+r8] mov rsi,QWORD[8+r9] mov r11,QWORD[56+r9] mov r10d,DWORD[r11] lea r10,[r10*1+rsi] cmp rbx,r10 jb NEAR $L$in_prologue mov rax,QWORD[152+r8] mov r10d,DWORD[4+r11] lea r10,[r10*1+rsi] cmp rbx,r10 jae NEAR $L$in_prologue mov rax,QWORD[272+rax] mov rbx,QWORD[((-8))+rax] mov rbp,QWORD[((-16))+rax] mov QWORD[144+r8],rbx mov QWORD[160+r8],rbp lea rsi,[((-24-160))+rax] lea rdi,[512+r8] mov ecx,20 DD 0xa548f3fc $L$in_prologue: mov rdi,QWORD[8+rax] mov rsi,QWORD[16+rax] mov QWORD[152+r8],rax mov QWORD[168+r8],rsi mov QWORD[176+r8],rdi mov rdi,QWORD[40+r9] mov rsi,r8 mov ecx,154 DD 0xa548f3fc mov rsi,r9 xor rcx,rcx mov rdx,QWORD[8+rsi] mov r8,QWORD[rsi] mov r9,QWORD[16+rsi] mov r10,QWORD[40+rsi] lea r11,[56+rsi] lea r12,[24+rsi] mov QWORD[32+rsp],r10 mov QWORD[40+rsp],r11 mov QWORD[48+rsp],r12 mov QWORD[56+rsp],rcx call QWORD[__imp_RtlVirtualUnwind] mov eax,1 add rsp,64 popfq pop r15 pop r14 pop r13 pop r12 pop rbp pop rbx pop rdi pop rsi DB 0F3h,0C3h ;repret ALIGN 16 avx2_handler: push rsi push rdi push rbx push rbp push r12 push r13 push r14 push r15 pushfq sub rsp,64 mov rax,QWORD[120+r8] mov rbx,QWORD[248+r8] mov rsi,QWORD[8+r9] mov r11,QWORD[56+r9] mov r10d,DWORD[r11] lea r10,[r10*1+rsi] cmp rbx,r10 jb NEAR $L$in_prologue mov rax,QWORD[152+r8] mov r10d,DWORD[4+r11] lea r10,[r10*1+rsi] cmp rbx,r10 jae NEAR $L$in_prologue mov rax,QWORD[544+r8] mov rbx,QWORD[((-8))+rax] mov rbp,QWORD[((-16))+rax] mov r12,QWORD[((-24))+rax] mov r13,QWORD[((-32))+rax] mov r14,QWORD[((-40))+rax] mov r15,QWORD[((-48))+rax] mov QWORD[144+r8],rbx mov QWORD[160+r8],rbp mov QWORD[216+r8],r12 mov QWORD[224+r8],r13 mov QWORD[232+r8],r14 mov QWORD[240+r8],r15 lea rsi,[((-56-160))+rax] lea rdi,[512+r8] mov ecx,20 DD 0xa548f3fc jmp NEAR $L$in_prologue section .pdata rdata align=4 ALIGN 4 DD $L$SEH_begin_sha256_multi_block wrt ..imagebase DD $L$SEH_end_sha256_multi_block wrt ..imagebase DD $L$SEH_info_sha256_multi_block wrt ..imagebase DD $L$SEH_begin_sha256_multi_block_shaext wrt ..imagebase DD $L$SEH_end_sha256_multi_block_shaext wrt ..imagebase DD $L$SEH_info_sha256_multi_block_shaext wrt ..imagebase DD $L$SEH_begin_sha256_multi_block_avx wrt ..imagebase DD $L$SEH_end_sha256_multi_block_avx wrt ..imagebase DD $L$SEH_info_sha256_multi_block_avx wrt ..imagebase DD $L$SEH_begin_sha256_multi_block_avx2 wrt ..imagebase DD $L$SEH_end_sha256_multi_block_avx2 wrt ..imagebase DD $L$SEH_info_sha256_multi_block_avx2 wrt ..imagebase section .xdata rdata align=8 ALIGN 8 $L$SEH_info_sha256_multi_block: DB 9,0,0,0 DD se_handler wrt ..imagebase DD $L$body wrt ..imagebase,$L$epilogue wrt ..imagebase $L$SEH_info_sha256_multi_block_shaext: DB 9,0,0,0 DD se_handler wrt ..imagebase DD $L$body_shaext wrt ..imagebase,$L$epilogue_shaext wrt ..imagebase $L$SEH_info_sha256_multi_block_avx: DB 9,0,0,0 DD se_handler wrt ..imagebase DD $L$body_avx wrt ..imagebase,$L$epilogue_avx wrt ..imagebase $L$SEH_info_sha256_multi_block_avx2: DB 9,0,0,0 DD avx2_handler wrt ..imagebase DD $L$body_avx2 wrt ..imagebase,$L$epilogue_avx2 wrt ..imagebase