VirtualBox

source: vbox/trunk/src/libs/openssl-3.1.0/crypto/genasm-nasm/aesni-x86_64.S@ 99507

最後變更 在這個檔案從99507是 94083,由 vboxsync 提交於 3 年 前

libs/openssl-3.0.1: Recreate asm files, bugref:10128

檔案大小: 95.7 KB
 
1default rel
2%define XMMWORD
3%define YMMWORD
4%define ZMMWORD
5section .text code align=64
6
7EXTERN OPENSSL_ia32cap_P
8global aesni_encrypt
9
10ALIGN 16
11aesni_encrypt:
12
13DB 243,15,30,250
14 movups xmm2,XMMWORD[rcx]
15 mov eax,DWORD[240+r8]
16 movups xmm0,XMMWORD[r8]
17 movups xmm1,XMMWORD[16+r8]
18 lea r8,[32+r8]
19 xorps xmm2,xmm0
20$L$oop_enc1_1:
21DB 102,15,56,220,209
22 dec eax
23 movups xmm1,XMMWORD[r8]
24 lea r8,[16+r8]
25 jnz NEAR $L$oop_enc1_1
26DB 102,15,56,221,209
27 pxor xmm0,xmm0
28 pxor xmm1,xmm1
29 movups XMMWORD[rdx],xmm2
30 pxor xmm2,xmm2
31 DB 0F3h,0C3h ;repret
32
33
34
35global aesni_decrypt
36
37ALIGN 16
38aesni_decrypt:
39
40DB 243,15,30,250
41 movups xmm2,XMMWORD[rcx]
42 mov eax,DWORD[240+r8]
43 movups xmm0,XMMWORD[r8]
44 movups xmm1,XMMWORD[16+r8]
45 lea r8,[32+r8]
46 xorps xmm2,xmm0
47$L$oop_dec1_2:
48DB 102,15,56,222,209
49 dec eax
50 movups xmm1,XMMWORD[r8]
51 lea r8,[16+r8]
52 jnz NEAR $L$oop_dec1_2
53DB 102,15,56,223,209
54 pxor xmm0,xmm0
55 pxor xmm1,xmm1
56 movups XMMWORD[rdx],xmm2
57 pxor xmm2,xmm2
58 DB 0F3h,0C3h ;repret
59
60
61
62ALIGN 16
63_aesni_encrypt2:
64
65 movups xmm0,XMMWORD[rcx]
66 shl eax,4
67 movups xmm1,XMMWORD[16+rcx]
68 xorps xmm2,xmm0
69 xorps xmm3,xmm0
70 movups xmm0,XMMWORD[32+rcx]
71 lea rcx,[32+rax*1+rcx]
72 neg rax
73 add rax,16
74
75$L$enc_loop2:
76DB 102,15,56,220,209
77DB 102,15,56,220,217
78 movups xmm1,XMMWORD[rax*1+rcx]
79 add rax,32
80DB 102,15,56,220,208
81DB 102,15,56,220,216
82 movups xmm0,XMMWORD[((-16))+rax*1+rcx]
83 jnz NEAR $L$enc_loop2
84
85DB 102,15,56,220,209
86DB 102,15,56,220,217
87DB 102,15,56,221,208
88DB 102,15,56,221,216
89 DB 0F3h,0C3h ;repret
90
91
92
93ALIGN 16
94_aesni_decrypt2:
95
96 movups xmm0,XMMWORD[rcx]
97 shl eax,4
98 movups xmm1,XMMWORD[16+rcx]
99 xorps xmm2,xmm0
100 xorps xmm3,xmm0
101 movups xmm0,XMMWORD[32+rcx]
102 lea rcx,[32+rax*1+rcx]
103 neg rax
104 add rax,16
105
106$L$dec_loop2:
107DB 102,15,56,222,209
108DB 102,15,56,222,217
109 movups xmm1,XMMWORD[rax*1+rcx]
110 add rax,32
111DB 102,15,56,222,208
112DB 102,15,56,222,216
113 movups xmm0,XMMWORD[((-16))+rax*1+rcx]
114 jnz NEAR $L$dec_loop2
115
116DB 102,15,56,222,209
117DB 102,15,56,222,217
118DB 102,15,56,223,208
119DB 102,15,56,223,216
120 DB 0F3h,0C3h ;repret
121
122
123
124ALIGN 16
125_aesni_encrypt3:
126
127 movups xmm0,XMMWORD[rcx]
128 shl eax,4
129 movups xmm1,XMMWORD[16+rcx]
130 xorps xmm2,xmm0
131 xorps xmm3,xmm0
132 xorps xmm4,xmm0
133 movups xmm0,XMMWORD[32+rcx]
134 lea rcx,[32+rax*1+rcx]
135 neg rax
136 add rax,16
137
138$L$enc_loop3:
139DB 102,15,56,220,209
140DB 102,15,56,220,217
141DB 102,15,56,220,225
142 movups xmm1,XMMWORD[rax*1+rcx]
143 add rax,32
144DB 102,15,56,220,208
145DB 102,15,56,220,216
146DB 102,15,56,220,224
147 movups xmm0,XMMWORD[((-16))+rax*1+rcx]
148 jnz NEAR $L$enc_loop3
149
150DB 102,15,56,220,209
151DB 102,15,56,220,217
152DB 102,15,56,220,225
153DB 102,15,56,221,208
154DB 102,15,56,221,216
155DB 102,15,56,221,224
156 DB 0F3h,0C3h ;repret
157
158
159
160ALIGN 16
161_aesni_decrypt3:
162
163 movups xmm0,XMMWORD[rcx]
164 shl eax,4
165 movups xmm1,XMMWORD[16+rcx]
166 xorps xmm2,xmm0
167 xorps xmm3,xmm0
168 xorps xmm4,xmm0
169 movups xmm0,XMMWORD[32+rcx]
170 lea rcx,[32+rax*1+rcx]
171 neg rax
172 add rax,16
173
174$L$dec_loop3:
175DB 102,15,56,222,209
176DB 102,15,56,222,217
177DB 102,15,56,222,225
178 movups xmm1,XMMWORD[rax*1+rcx]
179 add rax,32
180DB 102,15,56,222,208
181DB 102,15,56,222,216
182DB 102,15,56,222,224
183 movups xmm0,XMMWORD[((-16))+rax*1+rcx]
184 jnz NEAR $L$dec_loop3
185
186DB 102,15,56,222,209
187DB 102,15,56,222,217
188DB 102,15,56,222,225
189DB 102,15,56,223,208
190DB 102,15,56,223,216
191DB 102,15,56,223,224
192 DB 0F3h,0C3h ;repret
193
194
195
196ALIGN 16
197_aesni_encrypt4:
198
199 movups xmm0,XMMWORD[rcx]
200 shl eax,4
201 movups xmm1,XMMWORD[16+rcx]
202 xorps xmm2,xmm0
203 xorps xmm3,xmm0
204 xorps xmm4,xmm0
205 xorps xmm5,xmm0
206 movups xmm0,XMMWORD[32+rcx]
207 lea rcx,[32+rax*1+rcx]
208 neg rax
209DB 0x0f,0x1f,0x00
210 add rax,16
211
212$L$enc_loop4:
213DB 102,15,56,220,209
214DB 102,15,56,220,217
215DB 102,15,56,220,225
216DB 102,15,56,220,233
217 movups xmm1,XMMWORD[rax*1+rcx]
218 add rax,32
219DB 102,15,56,220,208
220DB 102,15,56,220,216
221DB 102,15,56,220,224
222DB 102,15,56,220,232
223 movups xmm0,XMMWORD[((-16))+rax*1+rcx]
224 jnz NEAR $L$enc_loop4
225
226DB 102,15,56,220,209
227DB 102,15,56,220,217
228DB 102,15,56,220,225
229DB 102,15,56,220,233
230DB 102,15,56,221,208
231DB 102,15,56,221,216
232DB 102,15,56,221,224
233DB 102,15,56,221,232
234 DB 0F3h,0C3h ;repret
235
236
237
238ALIGN 16
239_aesni_decrypt4:
240
241 movups xmm0,XMMWORD[rcx]
242 shl eax,4
243 movups xmm1,XMMWORD[16+rcx]
244 xorps xmm2,xmm0
245 xorps xmm3,xmm0
246 xorps xmm4,xmm0
247 xorps xmm5,xmm0
248 movups xmm0,XMMWORD[32+rcx]
249 lea rcx,[32+rax*1+rcx]
250 neg rax
251DB 0x0f,0x1f,0x00
252 add rax,16
253
254$L$dec_loop4:
255DB 102,15,56,222,209
256DB 102,15,56,222,217
257DB 102,15,56,222,225
258DB 102,15,56,222,233
259 movups xmm1,XMMWORD[rax*1+rcx]
260 add rax,32
261DB 102,15,56,222,208
262DB 102,15,56,222,216
263DB 102,15,56,222,224
264DB 102,15,56,222,232
265 movups xmm0,XMMWORD[((-16))+rax*1+rcx]
266 jnz NEAR $L$dec_loop4
267
268DB 102,15,56,222,209
269DB 102,15,56,222,217
270DB 102,15,56,222,225
271DB 102,15,56,222,233
272DB 102,15,56,223,208
273DB 102,15,56,223,216
274DB 102,15,56,223,224
275DB 102,15,56,223,232
276 DB 0F3h,0C3h ;repret
277
278
279
280ALIGN 16
281_aesni_encrypt6:
282
283 movups xmm0,XMMWORD[rcx]
284 shl eax,4
285 movups xmm1,XMMWORD[16+rcx]
286 xorps xmm2,xmm0
287 pxor xmm3,xmm0
288 pxor xmm4,xmm0
289DB 102,15,56,220,209
290 lea rcx,[32+rax*1+rcx]
291 neg rax
292DB 102,15,56,220,217
293 pxor xmm5,xmm0
294 pxor xmm6,xmm0
295DB 102,15,56,220,225
296 pxor xmm7,xmm0
297 movups xmm0,XMMWORD[rax*1+rcx]
298 add rax,16
299 jmp NEAR $L$enc_loop6_enter
300ALIGN 16
301$L$enc_loop6:
302DB 102,15,56,220,209
303DB 102,15,56,220,217
304DB 102,15,56,220,225
305$L$enc_loop6_enter:
306DB 102,15,56,220,233
307DB 102,15,56,220,241
308DB 102,15,56,220,249
309 movups xmm1,XMMWORD[rax*1+rcx]
310 add rax,32
311DB 102,15,56,220,208
312DB 102,15,56,220,216
313DB 102,15,56,220,224
314DB 102,15,56,220,232
315DB 102,15,56,220,240
316DB 102,15,56,220,248
317 movups xmm0,XMMWORD[((-16))+rax*1+rcx]
318 jnz NEAR $L$enc_loop6
319
320DB 102,15,56,220,209
321DB 102,15,56,220,217
322DB 102,15,56,220,225
323DB 102,15,56,220,233
324DB 102,15,56,220,241
325DB 102,15,56,220,249
326DB 102,15,56,221,208
327DB 102,15,56,221,216
328DB 102,15,56,221,224
329DB 102,15,56,221,232
330DB 102,15,56,221,240
331DB 102,15,56,221,248
332 DB 0F3h,0C3h ;repret
333
334
335
336ALIGN 16
337_aesni_decrypt6:
338
339 movups xmm0,XMMWORD[rcx]
340 shl eax,4
341 movups xmm1,XMMWORD[16+rcx]
342 xorps xmm2,xmm0
343 pxor xmm3,xmm0
344 pxor xmm4,xmm0
345DB 102,15,56,222,209
346 lea rcx,[32+rax*1+rcx]
347 neg rax
348DB 102,15,56,222,217
349 pxor xmm5,xmm0
350 pxor xmm6,xmm0
351DB 102,15,56,222,225
352 pxor xmm7,xmm0
353 movups xmm0,XMMWORD[rax*1+rcx]
354 add rax,16
355 jmp NEAR $L$dec_loop6_enter
356ALIGN 16
357$L$dec_loop6:
358DB 102,15,56,222,209
359DB 102,15,56,222,217
360DB 102,15,56,222,225
361$L$dec_loop6_enter:
362DB 102,15,56,222,233
363DB 102,15,56,222,241
364DB 102,15,56,222,249
365 movups xmm1,XMMWORD[rax*1+rcx]
366 add rax,32
367DB 102,15,56,222,208
368DB 102,15,56,222,216
369DB 102,15,56,222,224
370DB 102,15,56,222,232
371DB 102,15,56,222,240
372DB 102,15,56,222,248
373 movups xmm0,XMMWORD[((-16))+rax*1+rcx]
374 jnz NEAR $L$dec_loop6
375
376DB 102,15,56,222,209
377DB 102,15,56,222,217
378DB 102,15,56,222,225
379DB 102,15,56,222,233
380DB 102,15,56,222,241
381DB 102,15,56,222,249
382DB 102,15,56,223,208
383DB 102,15,56,223,216
384DB 102,15,56,223,224
385DB 102,15,56,223,232
386DB 102,15,56,223,240
387DB 102,15,56,223,248
388 DB 0F3h,0C3h ;repret
389
390
391
392ALIGN 16
393_aesni_encrypt8:
394
395 movups xmm0,XMMWORD[rcx]
396 shl eax,4
397 movups xmm1,XMMWORD[16+rcx]
398 xorps xmm2,xmm0
399 xorps xmm3,xmm0
400 pxor xmm4,xmm0
401 pxor xmm5,xmm0
402 pxor xmm6,xmm0
403 lea rcx,[32+rax*1+rcx]
404 neg rax
405DB 102,15,56,220,209
406 pxor xmm7,xmm0
407 pxor xmm8,xmm0
408DB 102,15,56,220,217
409 pxor xmm9,xmm0
410 movups xmm0,XMMWORD[rax*1+rcx]
411 add rax,16
412 jmp NEAR $L$enc_loop8_inner
413ALIGN 16
414$L$enc_loop8:
415DB 102,15,56,220,209
416DB 102,15,56,220,217
417$L$enc_loop8_inner:
418DB 102,15,56,220,225
419DB 102,15,56,220,233
420DB 102,15,56,220,241
421DB 102,15,56,220,249
422DB 102,68,15,56,220,193
423DB 102,68,15,56,220,201
424$L$enc_loop8_enter:
425 movups xmm1,XMMWORD[rax*1+rcx]
426 add rax,32
427DB 102,15,56,220,208
428DB 102,15,56,220,216
429DB 102,15,56,220,224
430DB 102,15,56,220,232
431DB 102,15,56,220,240
432DB 102,15,56,220,248
433DB 102,68,15,56,220,192
434DB 102,68,15,56,220,200
435 movups xmm0,XMMWORD[((-16))+rax*1+rcx]
436 jnz NEAR $L$enc_loop8
437
438DB 102,15,56,220,209
439DB 102,15,56,220,217
440DB 102,15,56,220,225
441DB 102,15,56,220,233
442DB 102,15,56,220,241
443DB 102,15,56,220,249
444DB 102,68,15,56,220,193
445DB 102,68,15,56,220,201
446DB 102,15,56,221,208
447DB 102,15,56,221,216
448DB 102,15,56,221,224
449DB 102,15,56,221,232
450DB 102,15,56,221,240
451DB 102,15,56,221,248
452DB 102,68,15,56,221,192
453DB 102,68,15,56,221,200
454 DB 0F3h,0C3h ;repret
455
456
457
458ALIGN 16
459_aesni_decrypt8:
460
461 movups xmm0,XMMWORD[rcx]
462 shl eax,4
463 movups xmm1,XMMWORD[16+rcx]
464 xorps xmm2,xmm0
465 xorps xmm3,xmm0
466 pxor xmm4,xmm0
467 pxor xmm5,xmm0
468 pxor xmm6,xmm0
469 lea rcx,[32+rax*1+rcx]
470 neg rax
471DB 102,15,56,222,209
472 pxor xmm7,xmm0
473 pxor xmm8,xmm0
474DB 102,15,56,222,217
475 pxor xmm9,xmm0
476 movups xmm0,XMMWORD[rax*1+rcx]
477 add rax,16
478 jmp NEAR $L$dec_loop8_inner
479ALIGN 16
480$L$dec_loop8:
481DB 102,15,56,222,209
482DB 102,15,56,222,217
483$L$dec_loop8_inner:
484DB 102,15,56,222,225
485DB 102,15,56,222,233
486DB 102,15,56,222,241
487DB 102,15,56,222,249
488DB 102,68,15,56,222,193
489DB 102,68,15,56,222,201
490$L$dec_loop8_enter:
491 movups xmm1,XMMWORD[rax*1+rcx]
492 add rax,32
493DB 102,15,56,222,208
494DB 102,15,56,222,216
495DB 102,15,56,222,224
496DB 102,15,56,222,232
497DB 102,15,56,222,240
498DB 102,15,56,222,248
499DB 102,68,15,56,222,192
500DB 102,68,15,56,222,200
501 movups xmm0,XMMWORD[((-16))+rax*1+rcx]
502 jnz NEAR $L$dec_loop8
503
504DB 102,15,56,222,209
505DB 102,15,56,222,217
506DB 102,15,56,222,225
507DB 102,15,56,222,233
508DB 102,15,56,222,241
509DB 102,15,56,222,249
510DB 102,68,15,56,222,193
511DB 102,68,15,56,222,201
512DB 102,15,56,223,208
513DB 102,15,56,223,216
514DB 102,15,56,223,224
515DB 102,15,56,223,232
516DB 102,15,56,223,240
517DB 102,15,56,223,248
518DB 102,68,15,56,223,192
519DB 102,68,15,56,223,200
520 DB 0F3h,0C3h ;repret
521
522
523global aesni_ecb_encrypt
524
525ALIGN 16
526aesni_ecb_encrypt:
527 mov QWORD[8+rsp],rdi ;WIN64 prologue
528 mov QWORD[16+rsp],rsi
529 mov rax,rsp
530$L$SEH_begin_aesni_ecb_encrypt:
531 mov rdi,rcx
532 mov rsi,rdx
533 mov rdx,r8
534 mov rcx,r9
535 mov r8,QWORD[40+rsp]
536
537
538
539DB 243,15,30,250
540 lea rsp,[((-88))+rsp]
541 movaps XMMWORD[rsp],xmm6
542 movaps XMMWORD[16+rsp],xmm7
543 movaps XMMWORD[32+rsp],xmm8
544 movaps XMMWORD[48+rsp],xmm9
545$L$ecb_enc_body:
546 and rdx,-16
547 jz NEAR $L$ecb_ret
548
549 mov eax,DWORD[240+rcx]
550 movups xmm0,XMMWORD[rcx]
551 mov r11,rcx
552 mov r10d,eax
553 test r8d,r8d
554 jz NEAR $L$ecb_decrypt
555
556 cmp rdx,0x80
557 jb NEAR $L$ecb_enc_tail
558
559 movdqu xmm2,XMMWORD[rdi]
560 movdqu xmm3,XMMWORD[16+rdi]
561 movdqu xmm4,XMMWORD[32+rdi]
562 movdqu xmm5,XMMWORD[48+rdi]
563 movdqu xmm6,XMMWORD[64+rdi]
564 movdqu xmm7,XMMWORD[80+rdi]
565 movdqu xmm8,XMMWORD[96+rdi]
566 movdqu xmm9,XMMWORD[112+rdi]
567 lea rdi,[128+rdi]
568 sub rdx,0x80
569 jmp NEAR $L$ecb_enc_loop8_enter
570ALIGN 16
571$L$ecb_enc_loop8:
572 movups XMMWORD[rsi],xmm2
573 mov rcx,r11
574 movdqu xmm2,XMMWORD[rdi]
575 mov eax,r10d
576 movups XMMWORD[16+rsi],xmm3
577 movdqu xmm3,XMMWORD[16+rdi]
578 movups XMMWORD[32+rsi],xmm4
579 movdqu xmm4,XMMWORD[32+rdi]
580 movups XMMWORD[48+rsi],xmm5
581 movdqu xmm5,XMMWORD[48+rdi]
582 movups XMMWORD[64+rsi],xmm6
583 movdqu xmm6,XMMWORD[64+rdi]
584 movups XMMWORD[80+rsi],xmm7
585 movdqu xmm7,XMMWORD[80+rdi]
586 movups XMMWORD[96+rsi],xmm8
587 movdqu xmm8,XMMWORD[96+rdi]
588 movups XMMWORD[112+rsi],xmm9
589 lea rsi,[128+rsi]
590 movdqu xmm9,XMMWORD[112+rdi]
591 lea rdi,[128+rdi]
592$L$ecb_enc_loop8_enter:
593
594 call _aesni_encrypt8
595
596 sub rdx,0x80
597 jnc NEAR $L$ecb_enc_loop8
598
599 movups XMMWORD[rsi],xmm2
600 mov rcx,r11
601 movups XMMWORD[16+rsi],xmm3
602 mov eax,r10d
603 movups XMMWORD[32+rsi],xmm4
604 movups XMMWORD[48+rsi],xmm5
605 movups XMMWORD[64+rsi],xmm6
606 movups XMMWORD[80+rsi],xmm7
607 movups XMMWORD[96+rsi],xmm8
608 movups XMMWORD[112+rsi],xmm9
609 lea rsi,[128+rsi]
610 add rdx,0x80
611 jz NEAR $L$ecb_ret
612
613$L$ecb_enc_tail:
614 movups xmm2,XMMWORD[rdi]
615 cmp rdx,0x20
616 jb NEAR $L$ecb_enc_one
617 movups xmm3,XMMWORD[16+rdi]
618 je NEAR $L$ecb_enc_two
619 movups xmm4,XMMWORD[32+rdi]
620 cmp rdx,0x40
621 jb NEAR $L$ecb_enc_three
622 movups xmm5,XMMWORD[48+rdi]
623 je NEAR $L$ecb_enc_four
624 movups xmm6,XMMWORD[64+rdi]
625 cmp rdx,0x60
626 jb NEAR $L$ecb_enc_five
627 movups xmm7,XMMWORD[80+rdi]
628 je NEAR $L$ecb_enc_six
629 movdqu xmm8,XMMWORD[96+rdi]
630 xorps xmm9,xmm9
631 call _aesni_encrypt8
632 movups XMMWORD[rsi],xmm2
633 movups XMMWORD[16+rsi],xmm3
634 movups XMMWORD[32+rsi],xmm4
635 movups XMMWORD[48+rsi],xmm5
636 movups XMMWORD[64+rsi],xmm6
637 movups XMMWORD[80+rsi],xmm7
638 movups XMMWORD[96+rsi],xmm8
639 jmp NEAR $L$ecb_ret
640ALIGN 16
641$L$ecb_enc_one:
642 movups xmm0,XMMWORD[rcx]
643 movups xmm1,XMMWORD[16+rcx]
644 lea rcx,[32+rcx]
645 xorps xmm2,xmm0
646$L$oop_enc1_3:
647DB 102,15,56,220,209
648 dec eax
649 movups xmm1,XMMWORD[rcx]
650 lea rcx,[16+rcx]
651 jnz NEAR $L$oop_enc1_3
652DB 102,15,56,221,209
653 movups XMMWORD[rsi],xmm2
654 jmp NEAR $L$ecb_ret
655ALIGN 16
656$L$ecb_enc_two:
657 call _aesni_encrypt2
658 movups XMMWORD[rsi],xmm2
659 movups XMMWORD[16+rsi],xmm3
660 jmp NEAR $L$ecb_ret
661ALIGN 16
662$L$ecb_enc_three:
663 call _aesni_encrypt3
664 movups XMMWORD[rsi],xmm2
665 movups XMMWORD[16+rsi],xmm3
666 movups XMMWORD[32+rsi],xmm4
667 jmp NEAR $L$ecb_ret
668ALIGN 16
669$L$ecb_enc_four:
670 call _aesni_encrypt4
671 movups XMMWORD[rsi],xmm2
672 movups XMMWORD[16+rsi],xmm3
673 movups XMMWORD[32+rsi],xmm4
674 movups XMMWORD[48+rsi],xmm5
675 jmp NEAR $L$ecb_ret
676ALIGN 16
677$L$ecb_enc_five:
678 xorps xmm7,xmm7
679 call _aesni_encrypt6
680 movups XMMWORD[rsi],xmm2
681 movups XMMWORD[16+rsi],xmm3
682 movups XMMWORD[32+rsi],xmm4
683 movups XMMWORD[48+rsi],xmm5
684 movups XMMWORD[64+rsi],xmm6
685 jmp NEAR $L$ecb_ret
686ALIGN 16
687$L$ecb_enc_six:
688 call _aesni_encrypt6
689 movups XMMWORD[rsi],xmm2
690 movups XMMWORD[16+rsi],xmm3
691 movups XMMWORD[32+rsi],xmm4
692 movups XMMWORD[48+rsi],xmm5
693 movups XMMWORD[64+rsi],xmm6
694 movups XMMWORD[80+rsi],xmm7
695 jmp NEAR $L$ecb_ret
696
697ALIGN 16
698$L$ecb_decrypt:
699 cmp rdx,0x80
700 jb NEAR $L$ecb_dec_tail
701
702 movdqu xmm2,XMMWORD[rdi]
703 movdqu xmm3,XMMWORD[16+rdi]
704 movdqu xmm4,XMMWORD[32+rdi]
705 movdqu xmm5,XMMWORD[48+rdi]
706 movdqu xmm6,XMMWORD[64+rdi]
707 movdqu xmm7,XMMWORD[80+rdi]
708 movdqu xmm8,XMMWORD[96+rdi]
709 movdqu xmm9,XMMWORD[112+rdi]
710 lea rdi,[128+rdi]
711 sub rdx,0x80
712 jmp NEAR $L$ecb_dec_loop8_enter
713ALIGN 16
714$L$ecb_dec_loop8:
715 movups XMMWORD[rsi],xmm2
716 mov rcx,r11
717 movdqu xmm2,XMMWORD[rdi]
718 mov eax,r10d
719 movups XMMWORD[16+rsi],xmm3
720 movdqu xmm3,XMMWORD[16+rdi]
721 movups XMMWORD[32+rsi],xmm4
722 movdqu xmm4,XMMWORD[32+rdi]
723 movups XMMWORD[48+rsi],xmm5
724 movdqu xmm5,XMMWORD[48+rdi]
725 movups XMMWORD[64+rsi],xmm6
726 movdqu xmm6,XMMWORD[64+rdi]
727 movups XMMWORD[80+rsi],xmm7
728 movdqu xmm7,XMMWORD[80+rdi]
729 movups XMMWORD[96+rsi],xmm8
730 movdqu xmm8,XMMWORD[96+rdi]
731 movups XMMWORD[112+rsi],xmm9
732 lea rsi,[128+rsi]
733 movdqu xmm9,XMMWORD[112+rdi]
734 lea rdi,[128+rdi]
735$L$ecb_dec_loop8_enter:
736
737 call _aesni_decrypt8
738
739 movups xmm0,XMMWORD[r11]
740 sub rdx,0x80
741 jnc NEAR $L$ecb_dec_loop8
742
743 movups XMMWORD[rsi],xmm2
744 pxor xmm2,xmm2
745 mov rcx,r11
746 movups XMMWORD[16+rsi],xmm3
747 pxor xmm3,xmm3
748 mov eax,r10d
749 movups XMMWORD[32+rsi],xmm4
750 pxor xmm4,xmm4
751 movups XMMWORD[48+rsi],xmm5
752 pxor xmm5,xmm5
753 movups XMMWORD[64+rsi],xmm6
754 pxor xmm6,xmm6
755 movups XMMWORD[80+rsi],xmm7
756 pxor xmm7,xmm7
757 movups XMMWORD[96+rsi],xmm8
758 pxor xmm8,xmm8
759 movups XMMWORD[112+rsi],xmm9
760 pxor xmm9,xmm9
761 lea rsi,[128+rsi]
762 add rdx,0x80
763 jz NEAR $L$ecb_ret
764
765$L$ecb_dec_tail:
766 movups xmm2,XMMWORD[rdi]
767 cmp rdx,0x20
768 jb NEAR $L$ecb_dec_one
769 movups xmm3,XMMWORD[16+rdi]
770 je NEAR $L$ecb_dec_two
771 movups xmm4,XMMWORD[32+rdi]
772 cmp rdx,0x40
773 jb NEAR $L$ecb_dec_three
774 movups xmm5,XMMWORD[48+rdi]
775 je NEAR $L$ecb_dec_four
776 movups xmm6,XMMWORD[64+rdi]
777 cmp rdx,0x60
778 jb NEAR $L$ecb_dec_five
779 movups xmm7,XMMWORD[80+rdi]
780 je NEAR $L$ecb_dec_six
781 movups xmm8,XMMWORD[96+rdi]
782 movups xmm0,XMMWORD[rcx]
783 xorps xmm9,xmm9
784 call _aesni_decrypt8
785 movups XMMWORD[rsi],xmm2
786 pxor xmm2,xmm2
787 movups XMMWORD[16+rsi],xmm3
788 pxor xmm3,xmm3
789 movups XMMWORD[32+rsi],xmm4
790 pxor xmm4,xmm4
791 movups XMMWORD[48+rsi],xmm5
792 pxor xmm5,xmm5
793 movups XMMWORD[64+rsi],xmm6
794 pxor xmm6,xmm6
795 movups XMMWORD[80+rsi],xmm7
796 pxor xmm7,xmm7
797 movups XMMWORD[96+rsi],xmm8
798 pxor xmm8,xmm8
799 pxor xmm9,xmm9
800 jmp NEAR $L$ecb_ret
801ALIGN 16
802$L$ecb_dec_one:
803 movups xmm0,XMMWORD[rcx]
804 movups xmm1,XMMWORD[16+rcx]
805 lea rcx,[32+rcx]
806 xorps xmm2,xmm0
807$L$oop_dec1_4:
808DB 102,15,56,222,209
809 dec eax
810 movups xmm1,XMMWORD[rcx]
811 lea rcx,[16+rcx]
812 jnz NEAR $L$oop_dec1_4
813DB 102,15,56,223,209
814 movups XMMWORD[rsi],xmm2
815 pxor xmm2,xmm2
816 jmp NEAR $L$ecb_ret
817ALIGN 16
818$L$ecb_dec_two:
819 call _aesni_decrypt2
820 movups XMMWORD[rsi],xmm2
821 pxor xmm2,xmm2
822 movups XMMWORD[16+rsi],xmm3
823 pxor xmm3,xmm3
824 jmp NEAR $L$ecb_ret
825ALIGN 16
826$L$ecb_dec_three:
827 call _aesni_decrypt3
828 movups XMMWORD[rsi],xmm2
829 pxor xmm2,xmm2
830 movups XMMWORD[16+rsi],xmm3
831 pxor xmm3,xmm3
832 movups XMMWORD[32+rsi],xmm4
833 pxor xmm4,xmm4
834 jmp NEAR $L$ecb_ret
835ALIGN 16
836$L$ecb_dec_four:
837 call _aesni_decrypt4
838 movups XMMWORD[rsi],xmm2
839 pxor xmm2,xmm2
840 movups XMMWORD[16+rsi],xmm3
841 pxor xmm3,xmm3
842 movups XMMWORD[32+rsi],xmm4
843 pxor xmm4,xmm4
844 movups XMMWORD[48+rsi],xmm5
845 pxor xmm5,xmm5
846 jmp NEAR $L$ecb_ret
847ALIGN 16
848$L$ecb_dec_five:
849 xorps xmm7,xmm7
850 call _aesni_decrypt6
851 movups XMMWORD[rsi],xmm2
852 pxor xmm2,xmm2
853 movups XMMWORD[16+rsi],xmm3
854 pxor xmm3,xmm3
855 movups XMMWORD[32+rsi],xmm4
856 pxor xmm4,xmm4
857 movups XMMWORD[48+rsi],xmm5
858 pxor xmm5,xmm5
859 movups XMMWORD[64+rsi],xmm6
860 pxor xmm6,xmm6
861 pxor xmm7,xmm7
862 jmp NEAR $L$ecb_ret
863ALIGN 16
864$L$ecb_dec_six:
865 call _aesni_decrypt6
866 movups XMMWORD[rsi],xmm2
867 pxor xmm2,xmm2
868 movups XMMWORD[16+rsi],xmm3
869 pxor xmm3,xmm3
870 movups XMMWORD[32+rsi],xmm4
871 pxor xmm4,xmm4
872 movups XMMWORD[48+rsi],xmm5
873 pxor xmm5,xmm5
874 movups XMMWORD[64+rsi],xmm6
875 pxor xmm6,xmm6
876 movups XMMWORD[80+rsi],xmm7
877 pxor xmm7,xmm7
878
879$L$ecb_ret:
880 xorps xmm0,xmm0
881 pxor xmm1,xmm1
882 movaps xmm6,XMMWORD[rsp]
883 movaps XMMWORD[rsp],xmm0
884 movaps xmm7,XMMWORD[16+rsp]
885 movaps XMMWORD[16+rsp],xmm0
886 movaps xmm8,XMMWORD[32+rsp]
887 movaps XMMWORD[32+rsp],xmm0
888 movaps xmm9,XMMWORD[48+rsp]
889 movaps XMMWORD[48+rsp],xmm0
890 lea rsp,[88+rsp]
891$L$ecb_enc_ret:
892 mov rdi,QWORD[8+rsp] ;WIN64 epilogue
893 mov rsi,QWORD[16+rsp]
894 DB 0F3h,0C3h ;repret
895
896$L$SEH_end_aesni_ecb_encrypt:
897global aesni_ccm64_encrypt_blocks
898
899ALIGN 16
900aesni_ccm64_encrypt_blocks:
901 mov QWORD[8+rsp],rdi ;WIN64 prologue
902 mov QWORD[16+rsp],rsi
903 mov rax,rsp
904$L$SEH_begin_aesni_ccm64_encrypt_blocks:
905 mov rdi,rcx
906 mov rsi,rdx
907 mov rdx,r8
908 mov rcx,r9
909 mov r8,QWORD[40+rsp]
910 mov r9,QWORD[48+rsp]
911
912
913
914DB 243,15,30,250
915 lea rsp,[((-88))+rsp]
916 movaps XMMWORD[rsp],xmm6
917 movaps XMMWORD[16+rsp],xmm7
918 movaps XMMWORD[32+rsp],xmm8
919 movaps XMMWORD[48+rsp],xmm9
920$L$ccm64_enc_body:
921 mov eax,DWORD[240+rcx]
922 movdqu xmm6,XMMWORD[r8]
923 movdqa xmm9,XMMWORD[$L$increment64]
924 movdqa xmm7,XMMWORD[$L$bswap_mask]
925
926 shl eax,4
927 mov r10d,16
928 lea r11,[rcx]
929 movdqu xmm3,XMMWORD[r9]
930 movdqa xmm2,xmm6
931 lea rcx,[32+rax*1+rcx]
932DB 102,15,56,0,247
933 sub r10,rax
934 jmp NEAR $L$ccm64_enc_outer
935ALIGN 16
936$L$ccm64_enc_outer:
937 movups xmm0,XMMWORD[r11]
938 mov rax,r10
939 movups xmm8,XMMWORD[rdi]
940
941 xorps xmm2,xmm0
942 movups xmm1,XMMWORD[16+r11]
943 xorps xmm0,xmm8
944 xorps xmm3,xmm0
945 movups xmm0,XMMWORD[32+r11]
946
947$L$ccm64_enc2_loop:
948DB 102,15,56,220,209
949DB 102,15,56,220,217
950 movups xmm1,XMMWORD[rax*1+rcx]
951 add rax,32
952DB 102,15,56,220,208
953DB 102,15,56,220,216
954 movups xmm0,XMMWORD[((-16))+rax*1+rcx]
955 jnz NEAR $L$ccm64_enc2_loop
956DB 102,15,56,220,209
957DB 102,15,56,220,217
958 paddq xmm6,xmm9
959 dec rdx
960DB 102,15,56,221,208
961DB 102,15,56,221,216
962
963 lea rdi,[16+rdi]
964 xorps xmm8,xmm2
965 movdqa xmm2,xmm6
966 movups XMMWORD[rsi],xmm8
967DB 102,15,56,0,215
968 lea rsi,[16+rsi]
969 jnz NEAR $L$ccm64_enc_outer
970
971 pxor xmm0,xmm0
972 pxor xmm1,xmm1
973 pxor xmm2,xmm2
974 movups XMMWORD[r9],xmm3
975 pxor xmm3,xmm3
976 pxor xmm8,xmm8
977 pxor xmm6,xmm6
978 movaps xmm6,XMMWORD[rsp]
979 movaps XMMWORD[rsp],xmm0
980 movaps xmm7,XMMWORD[16+rsp]
981 movaps XMMWORD[16+rsp],xmm0
982 movaps xmm8,XMMWORD[32+rsp]
983 movaps XMMWORD[32+rsp],xmm0
984 movaps xmm9,XMMWORD[48+rsp]
985 movaps XMMWORD[48+rsp],xmm0
986 lea rsp,[88+rsp]
987$L$ccm64_enc_ret:
988 mov rdi,QWORD[8+rsp] ;WIN64 epilogue
989 mov rsi,QWORD[16+rsp]
990 DB 0F3h,0C3h ;repret
991
992$L$SEH_end_aesni_ccm64_encrypt_blocks:
993global aesni_ccm64_decrypt_blocks
994
995ALIGN 16
996aesni_ccm64_decrypt_blocks:
997 mov QWORD[8+rsp],rdi ;WIN64 prologue
998 mov QWORD[16+rsp],rsi
999 mov rax,rsp
1000$L$SEH_begin_aesni_ccm64_decrypt_blocks:
1001 mov rdi,rcx
1002 mov rsi,rdx
1003 mov rdx,r8
1004 mov rcx,r9
1005 mov r8,QWORD[40+rsp]
1006 mov r9,QWORD[48+rsp]
1007
1008
1009
1010DB 243,15,30,250
1011 lea rsp,[((-88))+rsp]
1012 movaps XMMWORD[rsp],xmm6
1013 movaps XMMWORD[16+rsp],xmm7
1014 movaps XMMWORD[32+rsp],xmm8
1015 movaps XMMWORD[48+rsp],xmm9
1016$L$ccm64_dec_body:
1017 mov eax,DWORD[240+rcx]
1018 movups xmm6,XMMWORD[r8]
1019 movdqu xmm3,XMMWORD[r9]
1020 movdqa xmm9,XMMWORD[$L$increment64]
1021 movdqa xmm7,XMMWORD[$L$bswap_mask]
1022
1023 movaps xmm2,xmm6
1024 mov r10d,eax
1025 mov r11,rcx
1026DB 102,15,56,0,247
1027 movups xmm0,XMMWORD[rcx]
1028 movups xmm1,XMMWORD[16+rcx]
1029 lea rcx,[32+rcx]
1030 xorps xmm2,xmm0
1031$L$oop_enc1_5:
1032DB 102,15,56,220,209
1033 dec eax
1034 movups xmm1,XMMWORD[rcx]
1035 lea rcx,[16+rcx]
1036 jnz NEAR $L$oop_enc1_5
1037DB 102,15,56,221,209
1038 shl r10d,4
1039 mov eax,16
1040 movups xmm8,XMMWORD[rdi]
1041 paddq xmm6,xmm9
1042 lea rdi,[16+rdi]
1043 sub rax,r10
1044 lea rcx,[32+r10*1+r11]
1045 mov r10,rax
1046 jmp NEAR $L$ccm64_dec_outer
1047ALIGN 16
1048$L$ccm64_dec_outer:
1049 xorps xmm8,xmm2
1050 movdqa xmm2,xmm6
1051 movups XMMWORD[rsi],xmm8
1052 lea rsi,[16+rsi]
1053DB 102,15,56,0,215
1054
1055 sub rdx,1
1056 jz NEAR $L$ccm64_dec_break
1057
1058 movups xmm0,XMMWORD[r11]
1059 mov rax,r10
1060 movups xmm1,XMMWORD[16+r11]
1061 xorps xmm8,xmm0
1062 xorps xmm2,xmm0
1063 xorps xmm3,xmm8
1064 movups xmm0,XMMWORD[32+r11]
1065 jmp NEAR $L$ccm64_dec2_loop
1066ALIGN 16
1067$L$ccm64_dec2_loop:
1068DB 102,15,56,220,209
1069DB 102,15,56,220,217
1070 movups xmm1,XMMWORD[rax*1+rcx]
1071 add rax,32
1072DB 102,15,56,220,208
1073DB 102,15,56,220,216
1074 movups xmm0,XMMWORD[((-16))+rax*1+rcx]
1075 jnz NEAR $L$ccm64_dec2_loop
1076 movups xmm8,XMMWORD[rdi]
1077 paddq xmm6,xmm9
1078DB 102,15,56,220,209
1079DB 102,15,56,220,217
1080DB 102,15,56,221,208
1081DB 102,15,56,221,216
1082 lea rdi,[16+rdi]
1083 jmp NEAR $L$ccm64_dec_outer
1084
1085ALIGN 16
1086$L$ccm64_dec_break:
1087
1088 mov eax,DWORD[240+r11]
1089 movups xmm0,XMMWORD[r11]
1090 movups xmm1,XMMWORD[16+r11]
1091 xorps xmm8,xmm0
1092 lea r11,[32+r11]
1093 xorps xmm3,xmm8
1094$L$oop_enc1_6:
1095DB 102,15,56,220,217
1096 dec eax
1097 movups xmm1,XMMWORD[r11]
1098 lea r11,[16+r11]
1099 jnz NEAR $L$oop_enc1_6
1100DB 102,15,56,221,217
1101 pxor xmm0,xmm0
1102 pxor xmm1,xmm1
1103 pxor xmm2,xmm2
1104 movups XMMWORD[r9],xmm3
1105 pxor xmm3,xmm3
1106 pxor xmm8,xmm8
1107 pxor xmm6,xmm6
1108 movaps xmm6,XMMWORD[rsp]
1109 movaps XMMWORD[rsp],xmm0
1110 movaps xmm7,XMMWORD[16+rsp]
1111 movaps XMMWORD[16+rsp],xmm0
1112 movaps xmm8,XMMWORD[32+rsp]
1113 movaps XMMWORD[32+rsp],xmm0
1114 movaps xmm9,XMMWORD[48+rsp]
1115 movaps XMMWORD[48+rsp],xmm0
1116 lea rsp,[88+rsp]
1117$L$ccm64_dec_ret:
1118 mov rdi,QWORD[8+rsp] ;WIN64 epilogue
1119 mov rsi,QWORD[16+rsp]
1120 DB 0F3h,0C3h ;repret
1121
1122$L$SEH_end_aesni_ccm64_decrypt_blocks:
1123global aesni_ctr32_encrypt_blocks
1124
1125ALIGN 16
1126aesni_ctr32_encrypt_blocks:
1127 mov QWORD[8+rsp],rdi ;WIN64 prologue
1128 mov QWORD[16+rsp],rsi
1129 mov rax,rsp
1130$L$SEH_begin_aesni_ctr32_encrypt_blocks:
1131 mov rdi,rcx
1132 mov rsi,rdx
1133 mov rdx,r8
1134 mov rcx,r9
1135 mov r8,QWORD[40+rsp]
1136
1137
1138
1139DB 243,15,30,250
1140 cmp rdx,1
1141 jne NEAR $L$ctr32_bulk
1142
1143
1144
1145 movups xmm2,XMMWORD[r8]
1146 movups xmm3,XMMWORD[rdi]
1147 mov edx,DWORD[240+rcx]
1148 movups xmm0,XMMWORD[rcx]
1149 movups xmm1,XMMWORD[16+rcx]
1150 lea rcx,[32+rcx]
1151 xorps xmm2,xmm0
1152$L$oop_enc1_7:
1153DB 102,15,56,220,209
1154 dec edx
1155 movups xmm1,XMMWORD[rcx]
1156 lea rcx,[16+rcx]
1157 jnz NEAR $L$oop_enc1_7
1158DB 102,15,56,221,209
1159 pxor xmm0,xmm0
1160 pxor xmm1,xmm1
1161 xorps xmm2,xmm3
1162 pxor xmm3,xmm3
1163 movups XMMWORD[rsi],xmm2
1164 xorps xmm2,xmm2
1165 jmp NEAR $L$ctr32_epilogue
1166
1167ALIGN 16
1168$L$ctr32_bulk:
1169 lea r11,[rsp]
1170
1171 push rbp
1172
1173 sub rsp,288
1174 and rsp,-16
1175 movaps XMMWORD[(-168)+r11],xmm6
1176 movaps XMMWORD[(-152)+r11],xmm7
1177 movaps XMMWORD[(-136)+r11],xmm8
1178 movaps XMMWORD[(-120)+r11],xmm9
1179 movaps XMMWORD[(-104)+r11],xmm10
1180 movaps XMMWORD[(-88)+r11],xmm11
1181 movaps XMMWORD[(-72)+r11],xmm12
1182 movaps XMMWORD[(-56)+r11],xmm13
1183 movaps XMMWORD[(-40)+r11],xmm14
1184 movaps XMMWORD[(-24)+r11],xmm15
1185$L$ctr32_body:
1186
1187
1188
1189
1190 movdqu xmm2,XMMWORD[r8]
1191 movdqu xmm0,XMMWORD[rcx]
1192 mov r8d,DWORD[12+r8]
1193 pxor xmm2,xmm0
1194 mov ebp,DWORD[12+rcx]
1195 movdqa XMMWORD[rsp],xmm2
1196 bswap r8d
1197 movdqa xmm3,xmm2
1198 movdqa xmm4,xmm2
1199 movdqa xmm5,xmm2
1200 movdqa XMMWORD[64+rsp],xmm2
1201 movdqa XMMWORD[80+rsp],xmm2
1202 movdqa XMMWORD[96+rsp],xmm2
1203 mov r10,rdx
1204 movdqa XMMWORD[112+rsp],xmm2
1205
1206 lea rax,[1+r8]
1207 lea rdx,[2+r8]
1208 bswap eax
1209 bswap edx
1210 xor eax,ebp
1211 xor edx,ebp
1212DB 102,15,58,34,216,3
1213 lea rax,[3+r8]
1214 movdqa XMMWORD[16+rsp],xmm3
1215DB 102,15,58,34,226,3
1216 bswap eax
1217 mov rdx,r10
1218 lea r10,[4+r8]
1219 movdqa XMMWORD[32+rsp],xmm4
1220 xor eax,ebp
1221 bswap r10d
1222DB 102,15,58,34,232,3
1223 xor r10d,ebp
1224 movdqa XMMWORD[48+rsp],xmm5
1225 lea r9,[5+r8]
1226 mov DWORD[((64+12))+rsp],r10d
1227 bswap r9d
1228 lea r10,[6+r8]
1229 mov eax,DWORD[240+rcx]
1230 xor r9d,ebp
1231 bswap r10d
1232 mov DWORD[((80+12))+rsp],r9d
1233 xor r10d,ebp
1234 lea r9,[7+r8]
1235 mov DWORD[((96+12))+rsp],r10d
1236 bswap r9d
1237 mov r10d,DWORD[((OPENSSL_ia32cap_P+4))]
1238 xor r9d,ebp
1239 and r10d,71303168
1240 mov DWORD[((112+12))+rsp],r9d
1241
1242 movups xmm1,XMMWORD[16+rcx]
1243
1244 movdqa xmm6,XMMWORD[64+rsp]
1245 movdqa xmm7,XMMWORD[80+rsp]
1246
1247 cmp rdx,8
1248 jb NEAR $L$ctr32_tail
1249
1250 sub rdx,6
1251 cmp r10d,4194304
1252 je NEAR $L$ctr32_6x
1253
1254 lea rcx,[128+rcx]
1255 sub rdx,2
1256 jmp NEAR $L$ctr32_loop8
1257
1258ALIGN 16
1259$L$ctr32_6x:
1260 shl eax,4
1261 mov r10d,48
1262 bswap ebp
1263 lea rcx,[32+rax*1+rcx]
1264 sub r10,rax
1265 jmp NEAR $L$ctr32_loop6
1266
1267ALIGN 16
1268$L$ctr32_loop6:
1269 add r8d,6
1270 movups xmm0,XMMWORD[((-48))+r10*1+rcx]
1271DB 102,15,56,220,209
1272 mov eax,r8d
1273 xor eax,ebp
1274DB 102,15,56,220,217
1275DB 0x0f,0x38,0xf1,0x44,0x24,12
1276 lea eax,[1+r8]
1277DB 102,15,56,220,225
1278 xor eax,ebp
1279DB 0x0f,0x38,0xf1,0x44,0x24,28
1280DB 102,15,56,220,233
1281 lea eax,[2+r8]
1282 xor eax,ebp
1283DB 102,15,56,220,241
1284DB 0x0f,0x38,0xf1,0x44,0x24,44
1285 lea eax,[3+r8]
1286DB 102,15,56,220,249
1287 movups xmm1,XMMWORD[((-32))+r10*1+rcx]
1288 xor eax,ebp
1289
1290DB 102,15,56,220,208
1291DB 0x0f,0x38,0xf1,0x44,0x24,60
1292 lea eax,[4+r8]
1293DB 102,15,56,220,216
1294 xor eax,ebp
1295DB 0x0f,0x38,0xf1,0x44,0x24,76
1296DB 102,15,56,220,224
1297 lea eax,[5+r8]
1298 xor eax,ebp
1299DB 102,15,56,220,232
1300DB 0x0f,0x38,0xf1,0x44,0x24,92
1301 mov rax,r10
1302DB 102,15,56,220,240
1303DB 102,15,56,220,248
1304 movups xmm0,XMMWORD[((-16))+r10*1+rcx]
1305
1306 call $L$enc_loop6
1307
1308 movdqu xmm8,XMMWORD[rdi]
1309 movdqu xmm9,XMMWORD[16+rdi]
1310 movdqu xmm10,XMMWORD[32+rdi]
1311 movdqu xmm11,XMMWORD[48+rdi]
1312 movdqu xmm12,XMMWORD[64+rdi]
1313 movdqu xmm13,XMMWORD[80+rdi]
1314 lea rdi,[96+rdi]
1315 movups xmm1,XMMWORD[((-64))+r10*1+rcx]
1316 pxor xmm8,xmm2
1317 movaps xmm2,XMMWORD[rsp]
1318 pxor xmm9,xmm3
1319 movaps xmm3,XMMWORD[16+rsp]
1320 pxor xmm10,xmm4
1321 movaps xmm4,XMMWORD[32+rsp]
1322 pxor xmm11,xmm5
1323 movaps xmm5,XMMWORD[48+rsp]
1324 pxor xmm12,xmm6
1325 movaps xmm6,XMMWORD[64+rsp]
1326 pxor xmm13,xmm7
1327 movaps xmm7,XMMWORD[80+rsp]
1328 movdqu XMMWORD[rsi],xmm8
1329 movdqu XMMWORD[16+rsi],xmm9
1330 movdqu XMMWORD[32+rsi],xmm10
1331 movdqu XMMWORD[48+rsi],xmm11
1332 movdqu XMMWORD[64+rsi],xmm12
1333 movdqu XMMWORD[80+rsi],xmm13
1334 lea rsi,[96+rsi]
1335
1336 sub rdx,6
1337 jnc NEAR $L$ctr32_loop6
1338
1339 add rdx,6
1340 jz NEAR $L$ctr32_done
1341
1342 lea eax,[((-48))+r10]
1343 lea rcx,[((-80))+r10*1+rcx]
1344 neg eax
1345 shr eax,4
1346 jmp NEAR $L$ctr32_tail
1347
1348ALIGN 32
1349$L$ctr32_loop8:
1350 add r8d,8
1351 movdqa xmm8,XMMWORD[96+rsp]
1352DB 102,15,56,220,209
1353 mov r9d,r8d
1354 movdqa xmm9,XMMWORD[112+rsp]
1355DB 102,15,56,220,217
1356 bswap r9d
1357 movups xmm0,XMMWORD[((32-128))+rcx]
1358DB 102,15,56,220,225
1359 xor r9d,ebp
1360 nop
1361DB 102,15,56,220,233
1362 mov DWORD[((0+12))+rsp],r9d
1363 lea r9,[1+r8]
1364DB 102,15,56,220,241
1365DB 102,15,56,220,249
1366DB 102,68,15,56,220,193
1367DB 102,68,15,56,220,201
1368 movups xmm1,XMMWORD[((48-128))+rcx]
1369 bswap r9d
1370DB 102,15,56,220,208
1371DB 102,15,56,220,216
1372 xor r9d,ebp
1373DB 0x66,0x90
1374DB 102,15,56,220,224
1375DB 102,15,56,220,232
1376 mov DWORD[((16+12))+rsp],r9d
1377 lea r9,[2+r8]
1378DB 102,15,56,220,240
1379DB 102,15,56,220,248
1380DB 102,68,15,56,220,192
1381DB 102,68,15,56,220,200
1382 movups xmm0,XMMWORD[((64-128))+rcx]
1383 bswap r9d
1384DB 102,15,56,220,209
1385DB 102,15,56,220,217
1386 xor r9d,ebp
1387DB 0x66,0x90
1388DB 102,15,56,220,225
1389DB 102,15,56,220,233
1390 mov DWORD[((32+12))+rsp],r9d
1391 lea r9,[3+r8]
1392DB 102,15,56,220,241
1393DB 102,15,56,220,249
1394DB 102,68,15,56,220,193
1395DB 102,68,15,56,220,201
1396 movups xmm1,XMMWORD[((80-128))+rcx]
1397 bswap r9d
1398DB 102,15,56,220,208
1399DB 102,15,56,220,216
1400 xor r9d,ebp
1401DB 0x66,0x90
1402DB 102,15,56,220,224
1403DB 102,15,56,220,232
1404 mov DWORD[((48+12))+rsp],r9d
1405 lea r9,[4+r8]
1406DB 102,15,56,220,240
1407DB 102,15,56,220,248
1408DB 102,68,15,56,220,192
1409DB 102,68,15,56,220,200
1410 movups xmm0,XMMWORD[((96-128))+rcx]
1411 bswap r9d
1412DB 102,15,56,220,209
1413DB 102,15,56,220,217
1414 xor r9d,ebp
1415DB 0x66,0x90
1416DB 102,15,56,220,225
1417DB 102,15,56,220,233
1418 mov DWORD[((64+12))+rsp],r9d
1419 lea r9,[5+r8]
1420DB 102,15,56,220,241
1421DB 102,15,56,220,249
1422DB 102,68,15,56,220,193
1423DB 102,68,15,56,220,201
1424 movups xmm1,XMMWORD[((112-128))+rcx]
1425 bswap r9d
1426DB 102,15,56,220,208
1427DB 102,15,56,220,216
1428 xor r9d,ebp
1429DB 0x66,0x90
1430DB 102,15,56,220,224
1431DB 102,15,56,220,232
1432 mov DWORD[((80+12))+rsp],r9d
1433 lea r9,[6+r8]
1434DB 102,15,56,220,240
1435DB 102,15,56,220,248
1436DB 102,68,15,56,220,192
1437DB 102,68,15,56,220,200
1438 movups xmm0,XMMWORD[((128-128))+rcx]
1439 bswap r9d
1440DB 102,15,56,220,209
1441DB 102,15,56,220,217
1442 xor r9d,ebp
1443DB 0x66,0x90
1444DB 102,15,56,220,225
1445DB 102,15,56,220,233
1446 mov DWORD[((96+12))+rsp],r9d
1447 lea r9,[7+r8]
1448DB 102,15,56,220,241
1449DB 102,15,56,220,249
1450DB 102,68,15,56,220,193
1451DB 102,68,15,56,220,201
1452 movups xmm1,XMMWORD[((144-128))+rcx]
1453 bswap r9d
1454DB 102,15,56,220,208
1455DB 102,15,56,220,216
1456DB 102,15,56,220,224
1457 xor r9d,ebp
1458 movdqu xmm10,XMMWORD[rdi]
1459DB 102,15,56,220,232
1460 mov DWORD[((112+12))+rsp],r9d
1461 cmp eax,11
1462DB 102,15,56,220,240
1463DB 102,15,56,220,248
1464DB 102,68,15,56,220,192
1465DB 102,68,15,56,220,200
1466 movups xmm0,XMMWORD[((160-128))+rcx]
1467
1468 jb NEAR $L$ctr32_enc_done
1469
1470DB 102,15,56,220,209
1471DB 102,15,56,220,217
1472DB 102,15,56,220,225
1473DB 102,15,56,220,233
1474DB 102,15,56,220,241
1475DB 102,15,56,220,249
1476DB 102,68,15,56,220,193
1477DB 102,68,15,56,220,201
1478 movups xmm1,XMMWORD[((176-128))+rcx]
1479
1480DB 102,15,56,220,208
1481DB 102,15,56,220,216
1482DB 102,15,56,220,224
1483DB 102,15,56,220,232
1484DB 102,15,56,220,240
1485DB 102,15,56,220,248
1486DB 102,68,15,56,220,192
1487DB 102,68,15,56,220,200
1488 movups xmm0,XMMWORD[((192-128))+rcx]
1489 je NEAR $L$ctr32_enc_done
1490
1491DB 102,15,56,220,209
1492DB 102,15,56,220,217
1493DB 102,15,56,220,225
1494DB 102,15,56,220,233
1495DB 102,15,56,220,241
1496DB 102,15,56,220,249
1497DB 102,68,15,56,220,193
1498DB 102,68,15,56,220,201
1499 movups xmm1,XMMWORD[((208-128))+rcx]
1500
1501DB 102,15,56,220,208
1502DB 102,15,56,220,216
1503DB 102,15,56,220,224
1504DB 102,15,56,220,232
1505DB 102,15,56,220,240
1506DB 102,15,56,220,248
1507DB 102,68,15,56,220,192
1508DB 102,68,15,56,220,200
1509 movups xmm0,XMMWORD[((224-128))+rcx]
1510 jmp NEAR $L$ctr32_enc_done
1511
1512ALIGN 16
1513$L$ctr32_enc_done:
1514 movdqu xmm11,XMMWORD[16+rdi]
1515 pxor xmm10,xmm0
1516 movdqu xmm12,XMMWORD[32+rdi]
1517 pxor xmm11,xmm0
1518 movdqu xmm13,XMMWORD[48+rdi]
1519 pxor xmm12,xmm0
1520 movdqu xmm14,XMMWORD[64+rdi]
1521 pxor xmm13,xmm0
1522 movdqu xmm15,XMMWORD[80+rdi]
1523 pxor xmm14,xmm0
1524 pxor xmm15,xmm0
1525DB 102,15,56,220,209
1526DB 102,15,56,220,217
1527DB 102,15,56,220,225
1528DB 102,15,56,220,233
1529DB 102,15,56,220,241
1530DB 102,15,56,220,249
1531DB 102,68,15,56,220,193
1532DB 102,68,15,56,220,201
1533 movdqu xmm1,XMMWORD[96+rdi]
1534 lea rdi,[128+rdi]
1535
1536DB 102,65,15,56,221,210
1537 pxor xmm1,xmm0
1538 movdqu xmm10,XMMWORD[((112-128))+rdi]
1539DB 102,65,15,56,221,219
1540 pxor xmm10,xmm0
1541 movdqa xmm11,XMMWORD[rsp]
1542DB 102,65,15,56,221,228
1543DB 102,65,15,56,221,237
1544 movdqa xmm12,XMMWORD[16+rsp]
1545 movdqa xmm13,XMMWORD[32+rsp]
1546DB 102,65,15,56,221,246
1547DB 102,65,15,56,221,255
1548 movdqa xmm14,XMMWORD[48+rsp]
1549 movdqa xmm15,XMMWORD[64+rsp]
1550DB 102,68,15,56,221,193
1551 movdqa xmm0,XMMWORD[80+rsp]
1552 movups xmm1,XMMWORD[((16-128))+rcx]
1553DB 102,69,15,56,221,202
1554
1555 movups XMMWORD[rsi],xmm2
1556 movdqa xmm2,xmm11
1557 movups XMMWORD[16+rsi],xmm3
1558 movdqa xmm3,xmm12
1559 movups XMMWORD[32+rsi],xmm4
1560 movdqa xmm4,xmm13
1561 movups XMMWORD[48+rsi],xmm5
1562 movdqa xmm5,xmm14
1563 movups XMMWORD[64+rsi],xmm6
1564 movdqa xmm6,xmm15
1565 movups XMMWORD[80+rsi],xmm7
1566 movdqa xmm7,xmm0
1567 movups XMMWORD[96+rsi],xmm8
1568 movups XMMWORD[112+rsi],xmm9
1569 lea rsi,[128+rsi]
1570
1571 sub rdx,8
1572 jnc NEAR $L$ctr32_loop8
1573
1574 add rdx,8
1575 jz NEAR $L$ctr32_done
1576 lea rcx,[((-128))+rcx]
1577
1578$L$ctr32_tail:
1579
1580
1581 lea rcx,[16+rcx]
1582 cmp rdx,4
1583 jb NEAR $L$ctr32_loop3
1584 je NEAR $L$ctr32_loop4
1585
1586
1587 shl eax,4
1588 movdqa xmm8,XMMWORD[96+rsp]
1589 pxor xmm9,xmm9
1590
1591 movups xmm0,XMMWORD[16+rcx]
1592DB 102,15,56,220,209
1593DB 102,15,56,220,217
1594 lea rcx,[((32-16))+rax*1+rcx]
1595 neg rax
1596DB 102,15,56,220,225
1597 add rax,16
1598 movups xmm10,XMMWORD[rdi]
1599DB 102,15,56,220,233
1600DB 102,15,56,220,241
1601 movups xmm11,XMMWORD[16+rdi]
1602 movups xmm12,XMMWORD[32+rdi]
1603DB 102,15,56,220,249
1604DB 102,68,15,56,220,193
1605
1606 call $L$enc_loop8_enter
1607
1608 movdqu xmm13,XMMWORD[48+rdi]
1609 pxor xmm2,xmm10
1610 movdqu xmm10,XMMWORD[64+rdi]
1611 pxor xmm3,xmm11
1612 movdqu XMMWORD[rsi],xmm2
1613 pxor xmm4,xmm12
1614 movdqu XMMWORD[16+rsi],xmm3
1615 pxor xmm5,xmm13
1616 movdqu XMMWORD[32+rsi],xmm4
1617 pxor xmm6,xmm10
1618 movdqu XMMWORD[48+rsi],xmm5
1619 movdqu XMMWORD[64+rsi],xmm6
1620 cmp rdx,6
1621 jb NEAR $L$ctr32_done
1622
1623 movups xmm11,XMMWORD[80+rdi]
1624 xorps xmm7,xmm11
1625 movups XMMWORD[80+rsi],xmm7
1626 je NEAR $L$ctr32_done
1627
1628 movups xmm12,XMMWORD[96+rdi]
1629 xorps xmm8,xmm12
1630 movups XMMWORD[96+rsi],xmm8
1631 jmp NEAR $L$ctr32_done
1632
1633ALIGN 32
1634$L$ctr32_loop4:
1635DB 102,15,56,220,209
1636 lea rcx,[16+rcx]
1637 dec eax
1638DB 102,15,56,220,217
1639DB 102,15,56,220,225
1640DB 102,15,56,220,233
1641 movups xmm1,XMMWORD[rcx]
1642 jnz NEAR $L$ctr32_loop4
1643DB 102,15,56,221,209
1644DB 102,15,56,221,217
1645 movups xmm10,XMMWORD[rdi]
1646 movups xmm11,XMMWORD[16+rdi]
1647DB 102,15,56,221,225
1648DB 102,15,56,221,233
1649 movups xmm12,XMMWORD[32+rdi]
1650 movups xmm13,XMMWORD[48+rdi]
1651
1652 xorps xmm2,xmm10
1653 movups XMMWORD[rsi],xmm2
1654 xorps xmm3,xmm11
1655 movups XMMWORD[16+rsi],xmm3
1656 pxor xmm4,xmm12
1657 movdqu XMMWORD[32+rsi],xmm4
1658 pxor xmm5,xmm13
1659 movdqu XMMWORD[48+rsi],xmm5
1660 jmp NEAR $L$ctr32_done
1661
1662ALIGN 32
1663$L$ctr32_loop3:
1664DB 102,15,56,220,209
1665 lea rcx,[16+rcx]
1666 dec eax
1667DB 102,15,56,220,217
1668DB 102,15,56,220,225
1669 movups xmm1,XMMWORD[rcx]
1670 jnz NEAR $L$ctr32_loop3
1671DB 102,15,56,221,209
1672DB 102,15,56,221,217
1673DB 102,15,56,221,225
1674
1675 movups xmm10,XMMWORD[rdi]
1676 xorps xmm2,xmm10
1677 movups XMMWORD[rsi],xmm2
1678 cmp rdx,2
1679 jb NEAR $L$ctr32_done
1680
1681 movups xmm11,XMMWORD[16+rdi]
1682 xorps xmm3,xmm11
1683 movups XMMWORD[16+rsi],xmm3
1684 je NEAR $L$ctr32_done
1685
1686 movups xmm12,XMMWORD[32+rdi]
1687 xorps xmm4,xmm12
1688 movups XMMWORD[32+rsi],xmm4
1689
1690$L$ctr32_done:
1691 xorps xmm0,xmm0
1692 xor ebp,ebp
1693 pxor xmm1,xmm1
1694 pxor xmm2,xmm2
1695 pxor xmm3,xmm3
1696 pxor xmm4,xmm4
1697 pxor xmm5,xmm5
1698 movaps xmm6,XMMWORD[((-168))+r11]
1699 movaps XMMWORD[(-168)+r11],xmm0
1700 movaps xmm7,XMMWORD[((-152))+r11]
1701 movaps XMMWORD[(-152)+r11],xmm0
1702 movaps xmm8,XMMWORD[((-136))+r11]
1703 movaps XMMWORD[(-136)+r11],xmm0
1704 movaps xmm9,XMMWORD[((-120))+r11]
1705 movaps XMMWORD[(-120)+r11],xmm0
1706 movaps xmm10,XMMWORD[((-104))+r11]
1707 movaps XMMWORD[(-104)+r11],xmm0
1708 movaps xmm11,XMMWORD[((-88))+r11]
1709 movaps XMMWORD[(-88)+r11],xmm0
1710 movaps xmm12,XMMWORD[((-72))+r11]
1711 movaps XMMWORD[(-72)+r11],xmm0
1712 movaps xmm13,XMMWORD[((-56))+r11]
1713 movaps XMMWORD[(-56)+r11],xmm0
1714 movaps xmm14,XMMWORD[((-40))+r11]
1715 movaps XMMWORD[(-40)+r11],xmm0
1716 movaps xmm15,XMMWORD[((-24))+r11]
1717 movaps XMMWORD[(-24)+r11],xmm0
1718 movaps XMMWORD[rsp],xmm0
1719 movaps XMMWORD[16+rsp],xmm0
1720 movaps XMMWORD[32+rsp],xmm0
1721 movaps XMMWORD[48+rsp],xmm0
1722 movaps XMMWORD[64+rsp],xmm0
1723 movaps XMMWORD[80+rsp],xmm0
1724 movaps XMMWORD[96+rsp],xmm0
1725 movaps XMMWORD[112+rsp],xmm0
1726 mov rbp,QWORD[((-8))+r11]
1727
1728 lea rsp,[r11]
1729
1730$L$ctr32_epilogue:
1731 mov rdi,QWORD[8+rsp] ;WIN64 epilogue
1732 mov rsi,QWORD[16+rsp]
1733 DB 0F3h,0C3h ;repret
1734
1735$L$SEH_end_aesni_ctr32_encrypt_blocks:
1736global aesni_xts_encrypt
1737
1738ALIGN 16
1739aesni_xts_encrypt:
1740 mov QWORD[8+rsp],rdi ;WIN64 prologue
1741 mov QWORD[16+rsp],rsi
1742 mov rax,rsp
1743$L$SEH_begin_aesni_xts_encrypt:
1744 mov rdi,rcx
1745 mov rsi,rdx
1746 mov rdx,r8
1747 mov rcx,r9
1748 mov r8,QWORD[40+rsp]
1749 mov r9,QWORD[48+rsp]
1750
1751
1752
1753DB 243,15,30,250
1754 lea r11,[rsp]
1755
1756 push rbp
1757
1758 sub rsp,272
1759 and rsp,-16
1760 movaps XMMWORD[(-168)+r11],xmm6
1761 movaps XMMWORD[(-152)+r11],xmm7
1762 movaps XMMWORD[(-136)+r11],xmm8
1763 movaps XMMWORD[(-120)+r11],xmm9
1764 movaps XMMWORD[(-104)+r11],xmm10
1765 movaps XMMWORD[(-88)+r11],xmm11
1766 movaps XMMWORD[(-72)+r11],xmm12
1767 movaps XMMWORD[(-56)+r11],xmm13
1768 movaps XMMWORD[(-40)+r11],xmm14
1769 movaps XMMWORD[(-24)+r11],xmm15
1770$L$xts_enc_body:
1771 movups xmm2,XMMWORD[r9]
1772 mov eax,DWORD[240+r8]
1773 mov r10d,DWORD[240+rcx]
1774 movups xmm0,XMMWORD[r8]
1775 movups xmm1,XMMWORD[16+r8]
1776 lea r8,[32+r8]
1777 xorps xmm2,xmm0
1778$L$oop_enc1_8:
1779DB 102,15,56,220,209
1780 dec eax
1781 movups xmm1,XMMWORD[r8]
1782 lea r8,[16+r8]
1783 jnz NEAR $L$oop_enc1_8
1784DB 102,15,56,221,209
1785 movups xmm0,XMMWORD[rcx]
1786 mov rbp,rcx
1787 mov eax,r10d
1788 shl r10d,4
1789 mov r9,rdx
1790 and rdx,-16
1791
1792 movups xmm1,XMMWORD[16+r10*1+rcx]
1793
1794 movdqa xmm8,XMMWORD[$L$xts_magic]
1795 movdqa xmm15,xmm2
1796 pshufd xmm9,xmm2,0x5f
1797 pxor xmm1,xmm0
1798 movdqa xmm14,xmm9
1799 paddd xmm9,xmm9
1800 movdqa xmm10,xmm15
1801 psrad xmm14,31
1802 paddq xmm15,xmm15
1803 pand xmm14,xmm8
1804 pxor xmm10,xmm0
1805 pxor xmm15,xmm14
1806 movdqa xmm14,xmm9
1807 paddd xmm9,xmm9
1808 movdqa xmm11,xmm15
1809 psrad xmm14,31
1810 paddq xmm15,xmm15
1811 pand xmm14,xmm8
1812 pxor xmm11,xmm0
1813 pxor xmm15,xmm14
1814 movdqa xmm14,xmm9
1815 paddd xmm9,xmm9
1816 movdqa xmm12,xmm15
1817 psrad xmm14,31
1818 paddq xmm15,xmm15
1819 pand xmm14,xmm8
1820 pxor xmm12,xmm0
1821 pxor xmm15,xmm14
1822 movdqa xmm14,xmm9
1823 paddd xmm9,xmm9
1824 movdqa xmm13,xmm15
1825 psrad xmm14,31
1826 paddq xmm15,xmm15
1827 pand xmm14,xmm8
1828 pxor xmm13,xmm0
1829 pxor xmm15,xmm14
1830 movdqa xmm14,xmm15
1831 psrad xmm9,31
1832 paddq xmm15,xmm15
1833 pand xmm9,xmm8
1834 pxor xmm14,xmm0
1835 pxor xmm15,xmm9
1836 movaps XMMWORD[96+rsp],xmm1
1837
1838 sub rdx,16*6
1839 jc NEAR $L$xts_enc_short
1840
1841 mov eax,16+96
1842 lea rcx,[32+r10*1+rbp]
1843 sub rax,r10
1844 movups xmm1,XMMWORD[16+rbp]
1845 mov r10,rax
1846 lea r8,[$L$xts_magic]
1847 jmp NEAR $L$xts_enc_grandloop
1848
1849ALIGN 32
1850$L$xts_enc_grandloop:
1851 movdqu xmm2,XMMWORD[rdi]
1852 movdqa xmm8,xmm0
1853 movdqu xmm3,XMMWORD[16+rdi]
1854 pxor xmm2,xmm10
1855 movdqu xmm4,XMMWORD[32+rdi]
1856 pxor xmm3,xmm11
1857DB 102,15,56,220,209
1858 movdqu xmm5,XMMWORD[48+rdi]
1859 pxor xmm4,xmm12
1860DB 102,15,56,220,217
1861 movdqu xmm6,XMMWORD[64+rdi]
1862 pxor xmm5,xmm13
1863DB 102,15,56,220,225
1864 movdqu xmm7,XMMWORD[80+rdi]
1865 pxor xmm8,xmm15
1866 movdqa xmm9,XMMWORD[96+rsp]
1867 pxor xmm6,xmm14
1868DB 102,15,56,220,233
1869 movups xmm0,XMMWORD[32+rbp]
1870 lea rdi,[96+rdi]
1871 pxor xmm7,xmm8
1872
1873 pxor xmm10,xmm9
1874DB 102,15,56,220,241
1875 pxor xmm11,xmm9
1876 movdqa XMMWORD[rsp],xmm10
1877DB 102,15,56,220,249
1878 movups xmm1,XMMWORD[48+rbp]
1879 pxor xmm12,xmm9
1880
1881DB 102,15,56,220,208
1882 pxor xmm13,xmm9
1883 movdqa XMMWORD[16+rsp],xmm11
1884DB 102,15,56,220,216
1885 pxor xmm14,xmm9
1886 movdqa XMMWORD[32+rsp],xmm12
1887DB 102,15,56,220,224
1888DB 102,15,56,220,232
1889 pxor xmm8,xmm9
1890 movdqa XMMWORD[64+rsp],xmm14
1891DB 102,15,56,220,240
1892DB 102,15,56,220,248
1893 movups xmm0,XMMWORD[64+rbp]
1894 movdqa XMMWORD[80+rsp],xmm8
1895 pshufd xmm9,xmm15,0x5f
1896 jmp NEAR $L$xts_enc_loop6
1897ALIGN 32
1898$L$xts_enc_loop6:
1899DB 102,15,56,220,209
1900DB 102,15,56,220,217
1901DB 102,15,56,220,225
1902DB 102,15,56,220,233
1903DB 102,15,56,220,241
1904DB 102,15,56,220,249
1905 movups xmm1,XMMWORD[((-64))+rax*1+rcx]
1906 add rax,32
1907
1908DB 102,15,56,220,208
1909DB 102,15,56,220,216
1910DB 102,15,56,220,224
1911DB 102,15,56,220,232
1912DB 102,15,56,220,240
1913DB 102,15,56,220,248
1914 movups xmm0,XMMWORD[((-80))+rax*1+rcx]
1915 jnz NEAR $L$xts_enc_loop6
1916
1917 movdqa xmm8,XMMWORD[r8]
1918 movdqa xmm14,xmm9
1919 paddd xmm9,xmm9
1920DB 102,15,56,220,209
1921 paddq xmm15,xmm15
1922 psrad xmm14,31
1923DB 102,15,56,220,217
1924 pand xmm14,xmm8
1925 movups xmm10,XMMWORD[rbp]
1926DB 102,15,56,220,225
1927DB 102,15,56,220,233
1928DB 102,15,56,220,241
1929 pxor xmm15,xmm14
1930 movaps xmm11,xmm10
1931DB 102,15,56,220,249
1932 movups xmm1,XMMWORD[((-64))+rcx]
1933
1934 movdqa xmm14,xmm9
1935DB 102,15,56,220,208
1936 paddd xmm9,xmm9
1937 pxor xmm10,xmm15
1938DB 102,15,56,220,216
1939 psrad xmm14,31
1940 paddq xmm15,xmm15
1941DB 102,15,56,220,224
1942DB 102,15,56,220,232
1943 pand xmm14,xmm8
1944 movaps xmm12,xmm11
1945DB 102,15,56,220,240
1946 pxor xmm15,xmm14
1947 movdqa xmm14,xmm9
1948DB 102,15,56,220,248
1949 movups xmm0,XMMWORD[((-48))+rcx]
1950
1951 paddd xmm9,xmm9
1952DB 102,15,56,220,209
1953 pxor xmm11,xmm15
1954 psrad xmm14,31
1955DB 102,15,56,220,217
1956 paddq xmm15,xmm15
1957 pand xmm14,xmm8
1958DB 102,15,56,220,225
1959DB 102,15,56,220,233
1960 movdqa XMMWORD[48+rsp],xmm13
1961 pxor xmm15,xmm14
1962DB 102,15,56,220,241
1963 movaps xmm13,xmm12
1964 movdqa xmm14,xmm9
1965DB 102,15,56,220,249
1966 movups xmm1,XMMWORD[((-32))+rcx]
1967
1968 paddd xmm9,xmm9
1969DB 102,15,56,220,208
1970 pxor xmm12,xmm15
1971 psrad xmm14,31
1972DB 102,15,56,220,216
1973 paddq xmm15,xmm15
1974 pand xmm14,xmm8
1975DB 102,15,56,220,224
1976DB 102,15,56,220,232
1977DB 102,15,56,220,240
1978 pxor xmm15,xmm14
1979 movaps xmm14,xmm13
1980DB 102,15,56,220,248
1981
1982 movdqa xmm0,xmm9
1983 paddd xmm9,xmm9
1984DB 102,15,56,220,209
1985 pxor xmm13,xmm15
1986 psrad xmm0,31
1987DB 102,15,56,220,217
1988 paddq xmm15,xmm15
1989 pand xmm0,xmm8
1990DB 102,15,56,220,225
1991DB 102,15,56,220,233
1992 pxor xmm15,xmm0
1993 movups xmm0,XMMWORD[rbp]
1994DB 102,15,56,220,241
1995DB 102,15,56,220,249
1996 movups xmm1,XMMWORD[16+rbp]
1997
1998 pxor xmm14,xmm15
1999DB 102,15,56,221,84,36,0
2000 psrad xmm9,31
2001 paddq xmm15,xmm15
2002DB 102,15,56,221,92,36,16
2003DB 102,15,56,221,100,36,32
2004 pand xmm9,xmm8
2005 mov rax,r10
2006DB 102,15,56,221,108,36,48
2007DB 102,15,56,221,116,36,64
2008DB 102,15,56,221,124,36,80
2009 pxor xmm15,xmm9
2010
2011 lea rsi,[96+rsi]
2012 movups XMMWORD[(-96)+rsi],xmm2
2013 movups XMMWORD[(-80)+rsi],xmm3
2014 movups XMMWORD[(-64)+rsi],xmm4
2015 movups XMMWORD[(-48)+rsi],xmm5
2016 movups XMMWORD[(-32)+rsi],xmm6
2017 movups XMMWORD[(-16)+rsi],xmm7
2018 sub rdx,16*6
2019 jnc NEAR $L$xts_enc_grandloop
2020
2021 mov eax,16+96
2022 sub eax,r10d
2023 mov rcx,rbp
2024 shr eax,4
2025
2026$L$xts_enc_short:
2027
2028 mov r10d,eax
2029 pxor xmm10,xmm0
2030 add rdx,16*6
2031 jz NEAR $L$xts_enc_done
2032
2033 pxor xmm11,xmm0
2034 cmp rdx,0x20
2035 jb NEAR $L$xts_enc_one
2036 pxor xmm12,xmm0
2037 je NEAR $L$xts_enc_two
2038
2039 pxor xmm13,xmm0
2040 cmp rdx,0x40
2041 jb NEAR $L$xts_enc_three
2042 pxor xmm14,xmm0
2043 je NEAR $L$xts_enc_four
2044
2045 movdqu xmm2,XMMWORD[rdi]
2046 movdqu xmm3,XMMWORD[16+rdi]
2047 movdqu xmm4,XMMWORD[32+rdi]
2048 pxor xmm2,xmm10
2049 movdqu xmm5,XMMWORD[48+rdi]
2050 pxor xmm3,xmm11
2051 movdqu xmm6,XMMWORD[64+rdi]
2052 lea rdi,[80+rdi]
2053 pxor xmm4,xmm12
2054 pxor xmm5,xmm13
2055 pxor xmm6,xmm14
2056 pxor xmm7,xmm7
2057
2058 call _aesni_encrypt6
2059
2060 xorps xmm2,xmm10
2061 movdqa xmm10,xmm15
2062 xorps xmm3,xmm11
2063 xorps xmm4,xmm12
2064 movdqu XMMWORD[rsi],xmm2
2065 xorps xmm5,xmm13
2066 movdqu XMMWORD[16+rsi],xmm3
2067 xorps xmm6,xmm14
2068 movdqu XMMWORD[32+rsi],xmm4
2069 movdqu XMMWORD[48+rsi],xmm5
2070 movdqu XMMWORD[64+rsi],xmm6
2071 lea rsi,[80+rsi]
2072 jmp NEAR $L$xts_enc_done
2073
2074ALIGN 16
2075$L$xts_enc_one:
2076 movups xmm2,XMMWORD[rdi]
2077 lea rdi,[16+rdi]
2078 xorps xmm2,xmm10
2079 movups xmm0,XMMWORD[rcx]
2080 movups xmm1,XMMWORD[16+rcx]
2081 lea rcx,[32+rcx]
2082 xorps xmm2,xmm0
2083$L$oop_enc1_9:
2084DB 102,15,56,220,209
2085 dec eax
2086 movups xmm1,XMMWORD[rcx]
2087 lea rcx,[16+rcx]
2088 jnz NEAR $L$oop_enc1_9
2089DB 102,15,56,221,209
2090 xorps xmm2,xmm10
2091 movdqa xmm10,xmm11
2092 movups XMMWORD[rsi],xmm2
2093 lea rsi,[16+rsi]
2094 jmp NEAR $L$xts_enc_done
2095
2096ALIGN 16
2097$L$xts_enc_two:
2098 movups xmm2,XMMWORD[rdi]
2099 movups xmm3,XMMWORD[16+rdi]
2100 lea rdi,[32+rdi]
2101 xorps xmm2,xmm10
2102 xorps xmm3,xmm11
2103
2104 call _aesni_encrypt2
2105
2106 xorps xmm2,xmm10
2107 movdqa xmm10,xmm12
2108 xorps xmm3,xmm11
2109 movups XMMWORD[rsi],xmm2
2110 movups XMMWORD[16+rsi],xmm3
2111 lea rsi,[32+rsi]
2112 jmp NEAR $L$xts_enc_done
2113
2114ALIGN 16
2115$L$xts_enc_three:
2116 movups xmm2,XMMWORD[rdi]
2117 movups xmm3,XMMWORD[16+rdi]
2118 movups xmm4,XMMWORD[32+rdi]
2119 lea rdi,[48+rdi]
2120 xorps xmm2,xmm10
2121 xorps xmm3,xmm11
2122 xorps xmm4,xmm12
2123
2124 call _aesni_encrypt3
2125
2126 xorps xmm2,xmm10
2127 movdqa xmm10,xmm13
2128 xorps xmm3,xmm11
2129 xorps xmm4,xmm12
2130 movups XMMWORD[rsi],xmm2
2131 movups XMMWORD[16+rsi],xmm3
2132 movups XMMWORD[32+rsi],xmm4
2133 lea rsi,[48+rsi]
2134 jmp NEAR $L$xts_enc_done
2135
2136ALIGN 16
2137$L$xts_enc_four:
2138 movups xmm2,XMMWORD[rdi]
2139 movups xmm3,XMMWORD[16+rdi]
2140 movups xmm4,XMMWORD[32+rdi]
2141 xorps xmm2,xmm10
2142 movups xmm5,XMMWORD[48+rdi]
2143 lea rdi,[64+rdi]
2144 xorps xmm3,xmm11
2145 xorps xmm4,xmm12
2146 xorps xmm5,xmm13
2147
2148 call _aesni_encrypt4
2149
2150 pxor xmm2,xmm10
2151 movdqa xmm10,xmm14
2152 pxor xmm3,xmm11
2153 pxor xmm4,xmm12
2154 movdqu XMMWORD[rsi],xmm2
2155 pxor xmm5,xmm13
2156 movdqu XMMWORD[16+rsi],xmm3
2157 movdqu XMMWORD[32+rsi],xmm4
2158 movdqu XMMWORD[48+rsi],xmm5
2159 lea rsi,[64+rsi]
2160 jmp NEAR $L$xts_enc_done
2161
2162ALIGN 16
2163$L$xts_enc_done:
2164 and r9,15
2165 jz NEAR $L$xts_enc_ret
2166 mov rdx,r9
2167
2168$L$xts_enc_steal:
2169 movzx eax,BYTE[rdi]
2170 movzx ecx,BYTE[((-16))+rsi]
2171 lea rdi,[1+rdi]
2172 mov BYTE[((-16))+rsi],al
2173 mov BYTE[rsi],cl
2174 lea rsi,[1+rsi]
2175 sub rdx,1
2176 jnz NEAR $L$xts_enc_steal
2177
2178 sub rsi,r9
2179 mov rcx,rbp
2180 mov eax,r10d
2181
2182 movups xmm2,XMMWORD[((-16))+rsi]
2183 xorps xmm2,xmm10
2184 movups xmm0,XMMWORD[rcx]
2185 movups xmm1,XMMWORD[16+rcx]
2186 lea rcx,[32+rcx]
2187 xorps xmm2,xmm0
2188$L$oop_enc1_10:
2189DB 102,15,56,220,209
2190 dec eax
2191 movups xmm1,XMMWORD[rcx]
2192 lea rcx,[16+rcx]
2193 jnz NEAR $L$oop_enc1_10
2194DB 102,15,56,221,209
2195 xorps xmm2,xmm10
2196 movups XMMWORD[(-16)+rsi],xmm2
2197
2198$L$xts_enc_ret:
2199 xorps xmm0,xmm0
2200 pxor xmm1,xmm1
2201 pxor xmm2,xmm2
2202 pxor xmm3,xmm3
2203 pxor xmm4,xmm4
2204 pxor xmm5,xmm5
2205 movaps xmm6,XMMWORD[((-168))+r11]
2206 movaps XMMWORD[(-168)+r11],xmm0
2207 movaps xmm7,XMMWORD[((-152))+r11]
2208 movaps XMMWORD[(-152)+r11],xmm0
2209 movaps xmm8,XMMWORD[((-136))+r11]
2210 movaps XMMWORD[(-136)+r11],xmm0
2211 movaps xmm9,XMMWORD[((-120))+r11]
2212 movaps XMMWORD[(-120)+r11],xmm0
2213 movaps xmm10,XMMWORD[((-104))+r11]
2214 movaps XMMWORD[(-104)+r11],xmm0
2215 movaps xmm11,XMMWORD[((-88))+r11]
2216 movaps XMMWORD[(-88)+r11],xmm0
2217 movaps xmm12,XMMWORD[((-72))+r11]
2218 movaps XMMWORD[(-72)+r11],xmm0
2219 movaps xmm13,XMMWORD[((-56))+r11]
2220 movaps XMMWORD[(-56)+r11],xmm0
2221 movaps xmm14,XMMWORD[((-40))+r11]
2222 movaps XMMWORD[(-40)+r11],xmm0
2223 movaps xmm15,XMMWORD[((-24))+r11]
2224 movaps XMMWORD[(-24)+r11],xmm0
2225 movaps XMMWORD[rsp],xmm0
2226 movaps XMMWORD[16+rsp],xmm0
2227 movaps XMMWORD[32+rsp],xmm0
2228 movaps XMMWORD[48+rsp],xmm0
2229 movaps XMMWORD[64+rsp],xmm0
2230 movaps XMMWORD[80+rsp],xmm0
2231 movaps XMMWORD[96+rsp],xmm0
2232 mov rbp,QWORD[((-8))+r11]
2233
2234 lea rsp,[r11]
2235
2236$L$xts_enc_epilogue:
2237 mov rdi,QWORD[8+rsp] ;WIN64 epilogue
2238 mov rsi,QWORD[16+rsp]
2239 DB 0F3h,0C3h ;repret
2240
2241$L$SEH_end_aesni_xts_encrypt:
2242global aesni_xts_decrypt
2243
2244ALIGN 16
2245aesni_xts_decrypt:
2246 mov QWORD[8+rsp],rdi ;WIN64 prologue
2247 mov QWORD[16+rsp],rsi
2248 mov rax,rsp
2249$L$SEH_begin_aesni_xts_decrypt:
2250 mov rdi,rcx
2251 mov rsi,rdx
2252 mov rdx,r8
2253 mov rcx,r9
2254 mov r8,QWORD[40+rsp]
2255 mov r9,QWORD[48+rsp]
2256
2257
2258
2259DB 243,15,30,250
2260 lea r11,[rsp]
2261
2262 push rbp
2263
2264 sub rsp,272
2265 and rsp,-16
2266 movaps XMMWORD[(-168)+r11],xmm6
2267 movaps XMMWORD[(-152)+r11],xmm7
2268 movaps XMMWORD[(-136)+r11],xmm8
2269 movaps XMMWORD[(-120)+r11],xmm9
2270 movaps XMMWORD[(-104)+r11],xmm10
2271 movaps XMMWORD[(-88)+r11],xmm11
2272 movaps XMMWORD[(-72)+r11],xmm12
2273 movaps XMMWORD[(-56)+r11],xmm13
2274 movaps XMMWORD[(-40)+r11],xmm14
2275 movaps XMMWORD[(-24)+r11],xmm15
2276$L$xts_dec_body:
2277 movups xmm2,XMMWORD[r9]
2278 mov eax,DWORD[240+r8]
2279 mov r10d,DWORD[240+rcx]
2280 movups xmm0,XMMWORD[r8]
2281 movups xmm1,XMMWORD[16+r8]
2282 lea r8,[32+r8]
2283 xorps xmm2,xmm0
2284$L$oop_enc1_11:
2285DB 102,15,56,220,209
2286 dec eax
2287 movups xmm1,XMMWORD[r8]
2288 lea r8,[16+r8]
2289 jnz NEAR $L$oop_enc1_11
2290DB 102,15,56,221,209
2291 xor eax,eax
2292 test rdx,15
2293 setnz al
2294 shl rax,4
2295 sub rdx,rax
2296
2297 movups xmm0,XMMWORD[rcx]
2298 mov rbp,rcx
2299 mov eax,r10d
2300 shl r10d,4
2301 mov r9,rdx
2302 and rdx,-16
2303
2304 movups xmm1,XMMWORD[16+r10*1+rcx]
2305
2306 movdqa xmm8,XMMWORD[$L$xts_magic]
2307 movdqa xmm15,xmm2
2308 pshufd xmm9,xmm2,0x5f
2309 pxor xmm1,xmm0
2310 movdqa xmm14,xmm9
2311 paddd xmm9,xmm9
2312 movdqa xmm10,xmm15
2313 psrad xmm14,31
2314 paddq xmm15,xmm15
2315 pand xmm14,xmm8
2316 pxor xmm10,xmm0
2317 pxor xmm15,xmm14
2318 movdqa xmm14,xmm9
2319 paddd xmm9,xmm9
2320 movdqa xmm11,xmm15
2321 psrad xmm14,31
2322 paddq xmm15,xmm15
2323 pand xmm14,xmm8
2324 pxor xmm11,xmm0
2325 pxor xmm15,xmm14
2326 movdqa xmm14,xmm9
2327 paddd xmm9,xmm9
2328 movdqa xmm12,xmm15
2329 psrad xmm14,31
2330 paddq xmm15,xmm15
2331 pand xmm14,xmm8
2332 pxor xmm12,xmm0
2333 pxor xmm15,xmm14
2334 movdqa xmm14,xmm9
2335 paddd xmm9,xmm9
2336 movdqa xmm13,xmm15
2337 psrad xmm14,31
2338 paddq xmm15,xmm15
2339 pand xmm14,xmm8
2340 pxor xmm13,xmm0
2341 pxor xmm15,xmm14
2342 movdqa xmm14,xmm15
2343 psrad xmm9,31
2344 paddq xmm15,xmm15
2345 pand xmm9,xmm8
2346 pxor xmm14,xmm0
2347 pxor xmm15,xmm9
2348 movaps XMMWORD[96+rsp],xmm1
2349
2350 sub rdx,16*6
2351 jc NEAR $L$xts_dec_short
2352
2353 mov eax,16+96
2354 lea rcx,[32+r10*1+rbp]
2355 sub rax,r10
2356 movups xmm1,XMMWORD[16+rbp]
2357 mov r10,rax
2358 lea r8,[$L$xts_magic]
2359 jmp NEAR $L$xts_dec_grandloop
2360
2361ALIGN 32
2362$L$xts_dec_grandloop:
2363 movdqu xmm2,XMMWORD[rdi]
2364 movdqa xmm8,xmm0
2365 movdqu xmm3,XMMWORD[16+rdi]
2366 pxor xmm2,xmm10
2367 movdqu xmm4,XMMWORD[32+rdi]
2368 pxor xmm3,xmm11
2369DB 102,15,56,222,209
2370 movdqu xmm5,XMMWORD[48+rdi]
2371 pxor xmm4,xmm12
2372DB 102,15,56,222,217
2373 movdqu xmm6,XMMWORD[64+rdi]
2374 pxor xmm5,xmm13
2375DB 102,15,56,222,225
2376 movdqu xmm7,XMMWORD[80+rdi]
2377 pxor xmm8,xmm15
2378 movdqa xmm9,XMMWORD[96+rsp]
2379 pxor xmm6,xmm14
2380DB 102,15,56,222,233
2381 movups xmm0,XMMWORD[32+rbp]
2382 lea rdi,[96+rdi]
2383 pxor xmm7,xmm8
2384
2385 pxor xmm10,xmm9
2386DB 102,15,56,222,241
2387 pxor xmm11,xmm9
2388 movdqa XMMWORD[rsp],xmm10
2389DB 102,15,56,222,249
2390 movups xmm1,XMMWORD[48+rbp]
2391 pxor xmm12,xmm9
2392
2393DB 102,15,56,222,208
2394 pxor xmm13,xmm9
2395 movdqa XMMWORD[16+rsp],xmm11
2396DB 102,15,56,222,216
2397 pxor xmm14,xmm9
2398 movdqa XMMWORD[32+rsp],xmm12
2399DB 102,15,56,222,224
2400DB 102,15,56,222,232
2401 pxor xmm8,xmm9
2402 movdqa XMMWORD[64+rsp],xmm14
2403DB 102,15,56,222,240
2404DB 102,15,56,222,248
2405 movups xmm0,XMMWORD[64+rbp]
2406 movdqa XMMWORD[80+rsp],xmm8
2407 pshufd xmm9,xmm15,0x5f
2408 jmp NEAR $L$xts_dec_loop6
2409ALIGN 32
2410$L$xts_dec_loop6:
2411DB 102,15,56,222,209
2412DB 102,15,56,222,217
2413DB 102,15,56,222,225
2414DB 102,15,56,222,233
2415DB 102,15,56,222,241
2416DB 102,15,56,222,249
2417 movups xmm1,XMMWORD[((-64))+rax*1+rcx]
2418 add rax,32
2419
2420DB 102,15,56,222,208
2421DB 102,15,56,222,216
2422DB 102,15,56,222,224
2423DB 102,15,56,222,232
2424DB 102,15,56,222,240
2425DB 102,15,56,222,248
2426 movups xmm0,XMMWORD[((-80))+rax*1+rcx]
2427 jnz NEAR $L$xts_dec_loop6
2428
2429 movdqa xmm8,XMMWORD[r8]
2430 movdqa xmm14,xmm9
2431 paddd xmm9,xmm9
2432DB 102,15,56,222,209
2433 paddq xmm15,xmm15
2434 psrad xmm14,31
2435DB 102,15,56,222,217
2436 pand xmm14,xmm8
2437 movups xmm10,XMMWORD[rbp]
2438DB 102,15,56,222,225
2439DB 102,15,56,222,233
2440DB 102,15,56,222,241
2441 pxor xmm15,xmm14
2442 movaps xmm11,xmm10
2443DB 102,15,56,222,249
2444 movups xmm1,XMMWORD[((-64))+rcx]
2445
2446 movdqa xmm14,xmm9
2447DB 102,15,56,222,208
2448 paddd xmm9,xmm9
2449 pxor xmm10,xmm15
2450DB 102,15,56,222,216
2451 psrad xmm14,31
2452 paddq xmm15,xmm15
2453DB 102,15,56,222,224
2454DB 102,15,56,222,232
2455 pand xmm14,xmm8
2456 movaps xmm12,xmm11
2457DB 102,15,56,222,240
2458 pxor xmm15,xmm14
2459 movdqa xmm14,xmm9
2460DB 102,15,56,222,248
2461 movups xmm0,XMMWORD[((-48))+rcx]
2462
2463 paddd xmm9,xmm9
2464DB 102,15,56,222,209
2465 pxor xmm11,xmm15
2466 psrad xmm14,31
2467DB 102,15,56,222,217
2468 paddq xmm15,xmm15
2469 pand xmm14,xmm8
2470DB 102,15,56,222,225
2471DB 102,15,56,222,233
2472 movdqa XMMWORD[48+rsp],xmm13
2473 pxor xmm15,xmm14
2474DB 102,15,56,222,241
2475 movaps xmm13,xmm12
2476 movdqa xmm14,xmm9
2477DB 102,15,56,222,249
2478 movups xmm1,XMMWORD[((-32))+rcx]
2479
2480 paddd xmm9,xmm9
2481DB 102,15,56,222,208
2482 pxor xmm12,xmm15
2483 psrad xmm14,31
2484DB 102,15,56,222,216
2485 paddq xmm15,xmm15
2486 pand xmm14,xmm8
2487DB 102,15,56,222,224
2488DB 102,15,56,222,232
2489DB 102,15,56,222,240
2490 pxor xmm15,xmm14
2491 movaps xmm14,xmm13
2492DB 102,15,56,222,248
2493
2494 movdqa xmm0,xmm9
2495 paddd xmm9,xmm9
2496DB 102,15,56,222,209
2497 pxor xmm13,xmm15
2498 psrad xmm0,31
2499DB 102,15,56,222,217
2500 paddq xmm15,xmm15
2501 pand xmm0,xmm8
2502DB 102,15,56,222,225
2503DB 102,15,56,222,233
2504 pxor xmm15,xmm0
2505 movups xmm0,XMMWORD[rbp]
2506DB 102,15,56,222,241
2507DB 102,15,56,222,249
2508 movups xmm1,XMMWORD[16+rbp]
2509
2510 pxor xmm14,xmm15
2511DB 102,15,56,223,84,36,0
2512 psrad xmm9,31
2513 paddq xmm15,xmm15
2514DB 102,15,56,223,92,36,16
2515DB 102,15,56,223,100,36,32
2516 pand xmm9,xmm8
2517 mov rax,r10
2518DB 102,15,56,223,108,36,48
2519DB 102,15,56,223,116,36,64
2520DB 102,15,56,223,124,36,80
2521 pxor xmm15,xmm9
2522
2523 lea rsi,[96+rsi]
2524 movups XMMWORD[(-96)+rsi],xmm2
2525 movups XMMWORD[(-80)+rsi],xmm3
2526 movups XMMWORD[(-64)+rsi],xmm4
2527 movups XMMWORD[(-48)+rsi],xmm5
2528 movups XMMWORD[(-32)+rsi],xmm6
2529 movups XMMWORD[(-16)+rsi],xmm7
2530 sub rdx,16*6
2531 jnc NEAR $L$xts_dec_grandloop
2532
2533 mov eax,16+96
2534 sub eax,r10d
2535 mov rcx,rbp
2536 shr eax,4
2537
2538$L$xts_dec_short:
2539
2540 mov r10d,eax
2541 pxor xmm10,xmm0
2542 pxor xmm11,xmm0
2543 add rdx,16*6
2544 jz NEAR $L$xts_dec_done
2545
2546 pxor xmm12,xmm0
2547 cmp rdx,0x20
2548 jb NEAR $L$xts_dec_one
2549 pxor xmm13,xmm0
2550 je NEAR $L$xts_dec_two
2551
2552 pxor xmm14,xmm0
2553 cmp rdx,0x40
2554 jb NEAR $L$xts_dec_three
2555 je NEAR $L$xts_dec_four
2556
2557 movdqu xmm2,XMMWORD[rdi]
2558 movdqu xmm3,XMMWORD[16+rdi]
2559 movdqu xmm4,XMMWORD[32+rdi]
2560 pxor xmm2,xmm10
2561 movdqu xmm5,XMMWORD[48+rdi]
2562 pxor xmm3,xmm11
2563 movdqu xmm6,XMMWORD[64+rdi]
2564 lea rdi,[80+rdi]
2565 pxor xmm4,xmm12
2566 pxor xmm5,xmm13
2567 pxor xmm6,xmm14
2568
2569 call _aesni_decrypt6
2570
2571 xorps xmm2,xmm10
2572 xorps xmm3,xmm11
2573 xorps xmm4,xmm12
2574 movdqu XMMWORD[rsi],xmm2
2575 xorps xmm5,xmm13
2576 movdqu XMMWORD[16+rsi],xmm3
2577 xorps xmm6,xmm14
2578 movdqu XMMWORD[32+rsi],xmm4
2579 pxor xmm14,xmm14
2580 movdqu XMMWORD[48+rsi],xmm5
2581 pcmpgtd xmm14,xmm15
2582 movdqu XMMWORD[64+rsi],xmm6
2583 lea rsi,[80+rsi]
2584 pshufd xmm11,xmm14,0x13
2585 and r9,15
2586 jz NEAR $L$xts_dec_ret
2587
2588 movdqa xmm10,xmm15
2589 paddq xmm15,xmm15
2590 pand xmm11,xmm8
2591 pxor xmm11,xmm15
2592 jmp NEAR $L$xts_dec_done2
2593
2594ALIGN 16
2595$L$xts_dec_one:
2596 movups xmm2,XMMWORD[rdi]
2597 lea rdi,[16+rdi]
2598 xorps xmm2,xmm10
2599 movups xmm0,XMMWORD[rcx]
2600 movups xmm1,XMMWORD[16+rcx]
2601 lea rcx,[32+rcx]
2602 xorps xmm2,xmm0
2603$L$oop_dec1_12:
2604DB 102,15,56,222,209
2605 dec eax
2606 movups xmm1,XMMWORD[rcx]
2607 lea rcx,[16+rcx]
2608 jnz NEAR $L$oop_dec1_12
2609DB 102,15,56,223,209
2610 xorps xmm2,xmm10
2611 movdqa xmm10,xmm11
2612 movups XMMWORD[rsi],xmm2
2613 movdqa xmm11,xmm12
2614 lea rsi,[16+rsi]
2615 jmp NEAR $L$xts_dec_done
2616
2617ALIGN 16
2618$L$xts_dec_two:
2619 movups xmm2,XMMWORD[rdi]
2620 movups xmm3,XMMWORD[16+rdi]
2621 lea rdi,[32+rdi]
2622 xorps xmm2,xmm10
2623 xorps xmm3,xmm11
2624
2625 call _aesni_decrypt2
2626
2627 xorps xmm2,xmm10
2628 movdqa xmm10,xmm12
2629 xorps xmm3,xmm11
2630 movdqa xmm11,xmm13
2631 movups XMMWORD[rsi],xmm2
2632 movups XMMWORD[16+rsi],xmm3
2633 lea rsi,[32+rsi]
2634 jmp NEAR $L$xts_dec_done
2635
2636ALIGN 16
2637$L$xts_dec_three:
2638 movups xmm2,XMMWORD[rdi]
2639 movups xmm3,XMMWORD[16+rdi]
2640 movups xmm4,XMMWORD[32+rdi]
2641 lea rdi,[48+rdi]
2642 xorps xmm2,xmm10
2643 xorps xmm3,xmm11
2644 xorps xmm4,xmm12
2645
2646 call _aesni_decrypt3
2647
2648 xorps xmm2,xmm10
2649 movdqa xmm10,xmm13
2650 xorps xmm3,xmm11
2651 movdqa xmm11,xmm14
2652 xorps xmm4,xmm12
2653 movups XMMWORD[rsi],xmm2
2654 movups XMMWORD[16+rsi],xmm3
2655 movups XMMWORD[32+rsi],xmm4
2656 lea rsi,[48+rsi]
2657 jmp NEAR $L$xts_dec_done
2658
2659ALIGN 16
2660$L$xts_dec_four:
2661 movups xmm2,XMMWORD[rdi]
2662 movups xmm3,XMMWORD[16+rdi]
2663 movups xmm4,XMMWORD[32+rdi]
2664 xorps xmm2,xmm10
2665 movups xmm5,XMMWORD[48+rdi]
2666 lea rdi,[64+rdi]
2667 xorps xmm3,xmm11
2668 xorps xmm4,xmm12
2669 xorps xmm5,xmm13
2670
2671 call _aesni_decrypt4
2672
2673 pxor xmm2,xmm10
2674 movdqa xmm10,xmm14
2675 pxor xmm3,xmm11
2676 movdqa xmm11,xmm15
2677 pxor xmm4,xmm12
2678 movdqu XMMWORD[rsi],xmm2
2679 pxor xmm5,xmm13
2680 movdqu XMMWORD[16+rsi],xmm3
2681 movdqu XMMWORD[32+rsi],xmm4
2682 movdqu XMMWORD[48+rsi],xmm5
2683 lea rsi,[64+rsi]
2684 jmp NEAR $L$xts_dec_done
2685
2686ALIGN 16
2687$L$xts_dec_done:
2688 and r9,15
2689 jz NEAR $L$xts_dec_ret
2690$L$xts_dec_done2:
2691 mov rdx,r9
2692 mov rcx,rbp
2693 mov eax,r10d
2694
2695 movups xmm2,XMMWORD[rdi]
2696 xorps xmm2,xmm11
2697 movups xmm0,XMMWORD[rcx]
2698 movups xmm1,XMMWORD[16+rcx]
2699 lea rcx,[32+rcx]
2700 xorps xmm2,xmm0
2701$L$oop_dec1_13:
2702DB 102,15,56,222,209
2703 dec eax
2704 movups xmm1,XMMWORD[rcx]
2705 lea rcx,[16+rcx]
2706 jnz NEAR $L$oop_dec1_13
2707DB 102,15,56,223,209
2708 xorps xmm2,xmm11
2709 movups XMMWORD[rsi],xmm2
2710
2711$L$xts_dec_steal:
2712 movzx eax,BYTE[16+rdi]
2713 movzx ecx,BYTE[rsi]
2714 lea rdi,[1+rdi]
2715 mov BYTE[rsi],al
2716 mov BYTE[16+rsi],cl
2717 lea rsi,[1+rsi]
2718 sub rdx,1
2719 jnz NEAR $L$xts_dec_steal
2720
2721 sub rsi,r9
2722 mov rcx,rbp
2723 mov eax,r10d
2724
2725 movups xmm2,XMMWORD[rsi]
2726 xorps xmm2,xmm10
2727 movups xmm0,XMMWORD[rcx]
2728 movups xmm1,XMMWORD[16+rcx]
2729 lea rcx,[32+rcx]
2730 xorps xmm2,xmm0
2731$L$oop_dec1_14:
2732DB 102,15,56,222,209
2733 dec eax
2734 movups xmm1,XMMWORD[rcx]
2735 lea rcx,[16+rcx]
2736 jnz NEAR $L$oop_dec1_14
2737DB 102,15,56,223,209
2738 xorps xmm2,xmm10
2739 movups XMMWORD[rsi],xmm2
2740
2741$L$xts_dec_ret:
2742 xorps xmm0,xmm0
2743 pxor xmm1,xmm1
2744 pxor xmm2,xmm2
2745 pxor xmm3,xmm3
2746 pxor xmm4,xmm4
2747 pxor xmm5,xmm5
2748 movaps xmm6,XMMWORD[((-168))+r11]
2749 movaps XMMWORD[(-168)+r11],xmm0
2750 movaps xmm7,XMMWORD[((-152))+r11]
2751 movaps XMMWORD[(-152)+r11],xmm0
2752 movaps xmm8,XMMWORD[((-136))+r11]
2753 movaps XMMWORD[(-136)+r11],xmm0
2754 movaps xmm9,XMMWORD[((-120))+r11]
2755 movaps XMMWORD[(-120)+r11],xmm0
2756 movaps xmm10,XMMWORD[((-104))+r11]
2757 movaps XMMWORD[(-104)+r11],xmm0
2758 movaps xmm11,XMMWORD[((-88))+r11]
2759 movaps XMMWORD[(-88)+r11],xmm0
2760 movaps xmm12,XMMWORD[((-72))+r11]
2761 movaps XMMWORD[(-72)+r11],xmm0
2762 movaps xmm13,XMMWORD[((-56))+r11]
2763 movaps XMMWORD[(-56)+r11],xmm0
2764 movaps xmm14,XMMWORD[((-40))+r11]
2765 movaps XMMWORD[(-40)+r11],xmm0
2766 movaps xmm15,XMMWORD[((-24))+r11]
2767 movaps XMMWORD[(-24)+r11],xmm0
2768 movaps XMMWORD[rsp],xmm0
2769 movaps XMMWORD[16+rsp],xmm0
2770 movaps XMMWORD[32+rsp],xmm0
2771 movaps XMMWORD[48+rsp],xmm0
2772 movaps XMMWORD[64+rsp],xmm0
2773 movaps XMMWORD[80+rsp],xmm0
2774 movaps XMMWORD[96+rsp],xmm0
2775 mov rbp,QWORD[((-8))+r11]
2776
2777 lea rsp,[r11]
2778
2779$L$xts_dec_epilogue:
2780 mov rdi,QWORD[8+rsp] ;WIN64 epilogue
2781 mov rsi,QWORD[16+rsp]
2782 DB 0F3h,0C3h ;repret
2783
2784$L$SEH_end_aesni_xts_decrypt:
2785global aesni_ocb_encrypt
2786
2787ALIGN 32
2788aesni_ocb_encrypt:
2789 mov QWORD[8+rsp],rdi ;WIN64 prologue
2790 mov QWORD[16+rsp],rsi
2791 mov rax,rsp
2792$L$SEH_begin_aesni_ocb_encrypt:
2793 mov rdi,rcx
2794 mov rsi,rdx
2795 mov rdx,r8
2796 mov rcx,r9
2797 mov r8,QWORD[40+rsp]
2798 mov r9,QWORD[48+rsp]
2799
2800
2801
2802DB 243,15,30,250
2803 lea rax,[rsp]
2804 push rbx
2805
2806 push rbp
2807
2808 push r12
2809
2810 push r13
2811
2812 push r14
2813
2814 lea rsp,[((-160))+rsp]
2815 movaps XMMWORD[rsp],xmm6
2816 movaps XMMWORD[16+rsp],xmm7
2817 movaps XMMWORD[32+rsp],xmm8
2818 movaps XMMWORD[48+rsp],xmm9
2819 movaps XMMWORD[64+rsp],xmm10
2820 movaps XMMWORD[80+rsp],xmm11
2821 movaps XMMWORD[96+rsp],xmm12
2822 movaps XMMWORD[112+rsp],xmm13
2823 movaps XMMWORD[128+rsp],xmm14
2824 movaps XMMWORD[144+rsp],xmm15
2825$L$ocb_enc_body:
2826 mov rbx,QWORD[56+rax]
2827 mov rbp,QWORD[((56+8))+rax]
2828
2829 mov r10d,DWORD[240+rcx]
2830 mov r11,rcx
2831 shl r10d,4
2832 movups xmm9,XMMWORD[rcx]
2833 movups xmm1,XMMWORD[16+r10*1+rcx]
2834
2835 movdqu xmm15,XMMWORD[r9]
2836 pxor xmm9,xmm1
2837 pxor xmm15,xmm1
2838
2839 mov eax,16+32
2840 lea rcx,[32+r10*1+r11]
2841 movups xmm1,XMMWORD[16+r11]
2842 sub rax,r10
2843 mov r10,rax
2844
2845 movdqu xmm10,XMMWORD[rbx]
2846 movdqu xmm8,XMMWORD[rbp]
2847
2848 test r8,1
2849 jnz NEAR $L$ocb_enc_odd
2850
2851 bsf r12,r8
2852 add r8,1
2853 shl r12,4
2854 movdqu xmm7,XMMWORD[r12*1+rbx]
2855 movdqu xmm2,XMMWORD[rdi]
2856 lea rdi,[16+rdi]
2857
2858 call __ocb_encrypt1
2859
2860 movdqa xmm15,xmm7
2861 movups XMMWORD[rsi],xmm2
2862 lea rsi,[16+rsi]
2863 sub rdx,1
2864 jz NEAR $L$ocb_enc_done
2865
2866$L$ocb_enc_odd:
2867 lea r12,[1+r8]
2868 lea r13,[3+r8]
2869 lea r14,[5+r8]
2870 lea r8,[6+r8]
2871 bsf r12,r12
2872 bsf r13,r13
2873 bsf r14,r14
2874 shl r12,4
2875 shl r13,4
2876 shl r14,4
2877
2878 sub rdx,6
2879 jc NEAR $L$ocb_enc_short
2880 jmp NEAR $L$ocb_enc_grandloop
2881
2882ALIGN 32
2883$L$ocb_enc_grandloop:
2884 movdqu xmm2,XMMWORD[rdi]
2885 movdqu xmm3,XMMWORD[16+rdi]
2886 movdqu xmm4,XMMWORD[32+rdi]
2887 movdqu xmm5,XMMWORD[48+rdi]
2888 movdqu xmm6,XMMWORD[64+rdi]
2889 movdqu xmm7,XMMWORD[80+rdi]
2890 lea rdi,[96+rdi]
2891
2892 call __ocb_encrypt6
2893
2894 movups XMMWORD[rsi],xmm2
2895 movups XMMWORD[16+rsi],xmm3
2896 movups XMMWORD[32+rsi],xmm4
2897 movups XMMWORD[48+rsi],xmm5
2898 movups XMMWORD[64+rsi],xmm6
2899 movups XMMWORD[80+rsi],xmm7
2900 lea rsi,[96+rsi]
2901 sub rdx,6
2902 jnc NEAR $L$ocb_enc_grandloop
2903
2904$L$ocb_enc_short:
2905 add rdx,6
2906 jz NEAR $L$ocb_enc_done
2907
2908 movdqu xmm2,XMMWORD[rdi]
2909 cmp rdx,2
2910 jb NEAR $L$ocb_enc_one
2911 movdqu xmm3,XMMWORD[16+rdi]
2912 je NEAR $L$ocb_enc_two
2913
2914 movdqu xmm4,XMMWORD[32+rdi]
2915 cmp rdx,4
2916 jb NEAR $L$ocb_enc_three
2917 movdqu xmm5,XMMWORD[48+rdi]
2918 je NEAR $L$ocb_enc_four
2919
2920 movdqu xmm6,XMMWORD[64+rdi]
2921 pxor xmm7,xmm7
2922
2923 call __ocb_encrypt6
2924
2925 movdqa xmm15,xmm14
2926 movups XMMWORD[rsi],xmm2
2927 movups XMMWORD[16+rsi],xmm3
2928 movups XMMWORD[32+rsi],xmm4
2929 movups XMMWORD[48+rsi],xmm5
2930 movups XMMWORD[64+rsi],xmm6
2931
2932 jmp NEAR $L$ocb_enc_done
2933
2934ALIGN 16
2935$L$ocb_enc_one:
2936 movdqa xmm7,xmm10
2937
2938 call __ocb_encrypt1
2939
2940 movdqa xmm15,xmm7
2941 movups XMMWORD[rsi],xmm2
2942 jmp NEAR $L$ocb_enc_done
2943
2944ALIGN 16
2945$L$ocb_enc_two:
2946 pxor xmm4,xmm4
2947 pxor xmm5,xmm5
2948
2949 call __ocb_encrypt4
2950
2951 movdqa xmm15,xmm11
2952 movups XMMWORD[rsi],xmm2
2953 movups XMMWORD[16+rsi],xmm3
2954
2955 jmp NEAR $L$ocb_enc_done
2956
2957ALIGN 16
2958$L$ocb_enc_three:
2959 pxor xmm5,xmm5
2960
2961 call __ocb_encrypt4
2962
2963 movdqa xmm15,xmm12
2964 movups XMMWORD[rsi],xmm2
2965 movups XMMWORD[16+rsi],xmm3
2966 movups XMMWORD[32+rsi],xmm4
2967
2968 jmp NEAR $L$ocb_enc_done
2969
2970ALIGN 16
2971$L$ocb_enc_four:
2972 call __ocb_encrypt4
2973
2974 movdqa xmm15,xmm13
2975 movups XMMWORD[rsi],xmm2
2976 movups XMMWORD[16+rsi],xmm3
2977 movups XMMWORD[32+rsi],xmm4
2978 movups XMMWORD[48+rsi],xmm5
2979
2980$L$ocb_enc_done:
2981 pxor xmm15,xmm0
2982 movdqu XMMWORD[rbp],xmm8
2983 movdqu XMMWORD[r9],xmm15
2984
2985 xorps xmm0,xmm0
2986 pxor xmm1,xmm1
2987 pxor xmm2,xmm2
2988 pxor xmm3,xmm3
2989 pxor xmm4,xmm4
2990 pxor xmm5,xmm5
2991 movaps xmm6,XMMWORD[rsp]
2992 movaps XMMWORD[rsp],xmm0
2993 movaps xmm7,XMMWORD[16+rsp]
2994 movaps XMMWORD[16+rsp],xmm0
2995 movaps xmm8,XMMWORD[32+rsp]
2996 movaps XMMWORD[32+rsp],xmm0
2997 movaps xmm9,XMMWORD[48+rsp]
2998 movaps XMMWORD[48+rsp],xmm0
2999 movaps xmm10,XMMWORD[64+rsp]
3000 movaps XMMWORD[64+rsp],xmm0
3001 movaps xmm11,XMMWORD[80+rsp]
3002 movaps XMMWORD[80+rsp],xmm0
3003 movaps xmm12,XMMWORD[96+rsp]
3004 movaps XMMWORD[96+rsp],xmm0
3005 movaps xmm13,XMMWORD[112+rsp]
3006 movaps XMMWORD[112+rsp],xmm0
3007 movaps xmm14,XMMWORD[128+rsp]
3008 movaps XMMWORD[128+rsp],xmm0
3009 movaps xmm15,XMMWORD[144+rsp]
3010 movaps XMMWORD[144+rsp],xmm0
3011 lea rax,[((160+40))+rsp]
3012$L$ocb_enc_pop:
3013 mov r14,QWORD[((-40))+rax]
3014
3015 mov r13,QWORD[((-32))+rax]
3016
3017 mov r12,QWORD[((-24))+rax]
3018
3019 mov rbp,QWORD[((-16))+rax]
3020
3021 mov rbx,QWORD[((-8))+rax]
3022
3023 lea rsp,[rax]
3024
3025$L$ocb_enc_epilogue:
3026 mov rdi,QWORD[8+rsp] ;WIN64 epilogue
3027 mov rsi,QWORD[16+rsp]
3028 DB 0F3h,0C3h ;repret
3029
3030$L$SEH_end_aesni_ocb_encrypt:
3031
3032
3033ALIGN 32
3034__ocb_encrypt6:
3035
3036 pxor xmm15,xmm9
3037 movdqu xmm11,XMMWORD[r12*1+rbx]
3038 movdqa xmm12,xmm10
3039 movdqu xmm13,XMMWORD[r13*1+rbx]
3040 movdqa xmm14,xmm10
3041 pxor xmm10,xmm15
3042 movdqu xmm15,XMMWORD[r14*1+rbx]
3043 pxor xmm11,xmm10
3044 pxor xmm8,xmm2
3045 pxor xmm2,xmm10
3046 pxor xmm12,xmm11
3047 pxor xmm8,xmm3
3048 pxor xmm3,xmm11
3049 pxor xmm13,xmm12
3050 pxor xmm8,xmm4
3051 pxor xmm4,xmm12
3052 pxor xmm14,xmm13
3053 pxor xmm8,xmm5
3054 pxor xmm5,xmm13
3055 pxor xmm15,xmm14
3056 pxor xmm8,xmm6
3057 pxor xmm6,xmm14
3058 pxor xmm8,xmm7
3059 pxor xmm7,xmm15
3060 movups xmm0,XMMWORD[32+r11]
3061
3062 lea r12,[1+r8]
3063 lea r13,[3+r8]
3064 lea r14,[5+r8]
3065 add r8,6
3066 pxor xmm10,xmm9
3067 bsf r12,r12
3068 bsf r13,r13
3069 bsf r14,r14
3070
3071DB 102,15,56,220,209
3072DB 102,15,56,220,217
3073DB 102,15,56,220,225
3074DB 102,15,56,220,233
3075 pxor xmm11,xmm9
3076 pxor xmm12,xmm9
3077DB 102,15,56,220,241
3078 pxor xmm13,xmm9
3079 pxor xmm14,xmm9
3080DB 102,15,56,220,249
3081 movups xmm1,XMMWORD[48+r11]
3082 pxor xmm15,xmm9
3083
3084DB 102,15,56,220,208
3085DB 102,15,56,220,216
3086DB 102,15,56,220,224
3087DB 102,15,56,220,232
3088DB 102,15,56,220,240
3089DB 102,15,56,220,248
3090 movups xmm0,XMMWORD[64+r11]
3091 shl r12,4
3092 shl r13,4
3093 jmp NEAR $L$ocb_enc_loop6
3094
3095ALIGN 32
3096$L$ocb_enc_loop6:
3097DB 102,15,56,220,209
3098DB 102,15,56,220,217
3099DB 102,15,56,220,225
3100DB 102,15,56,220,233
3101DB 102,15,56,220,241
3102DB 102,15,56,220,249
3103 movups xmm1,XMMWORD[rax*1+rcx]
3104 add rax,32
3105
3106DB 102,15,56,220,208
3107DB 102,15,56,220,216
3108DB 102,15,56,220,224
3109DB 102,15,56,220,232
3110DB 102,15,56,220,240
3111DB 102,15,56,220,248
3112 movups xmm0,XMMWORD[((-16))+rax*1+rcx]
3113 jnz NEAR $L$ocb_enc_loop6
3114
3115DB 102,15,56,220,209
3116DB 102,15,56,220,217
3117DB 102,15,56,220,225
3118DB 102,15,56,220,233
3119DB 102,15,56,220,241
3120DB 102,15,56,220,249
3121 movups xmm1,XMMWORD[16+r11]
3122 shl r14,4
3123
3124DB 102,65,15,56,221,210
3125 movdqu xmm10,XMMWORD[rbx]
3126 mov rax,r10
3127DB 102,65,15,56,221,219
3128DB 102,65,15,56,221,228
3129DB 102,65,15,56,221,237
3130DB 102,65,15,56,221,246
3131DB 102,65,15,56,221,255
3132 DB 0F3h,0C3h ;repret
3133
3134
3135
3136
3137ALIGN 32
3138__ocb_encrypt4:
3139
3140 pxor xmm15,xmm9
3141 movdqu xmm11,XMMWORD[r12*1+rbx]
3142 movdqa xmm12,xmm10
3143 movdqu xmm13,XMMWORD[r13*1+rbx]
3144 pxor xmm10,xmm15
3145 pxor xmm11,xmm10
3146 pxor xmm8,xmm2
3147 pxor xmm2,xmm10
3148 pxor xmm12,xmm11
3149 pxor xmm8,xmm3
3150 pxor xmm3,xmm11
3151 pxor xmm13,xmm12
3152 pxor xmm8,xmm4
3153 pxor xmm4,xmm12
3154 pxor xmm8,xmm5
3155 pxor xmm5,xmm13
3156 movups xmm0,XMMWORD[32+r11]
3157
3158 pxor xmm10,xmm9
3159 pxor xmm11,xmm9
3160 pxor xmm12,xmm9
3161 pxor xmm13,xmm9
3162
3163DB 102,15,56,220,209
3164DB 102,15,56,220,217
3165DB 102,15,56,220,225
3166DB 102,15,56,220,233
3167 movups xmm1,XMMWORD[48+r11]
3168
3169DB 102,15,56,220,208
3170DB 102,15,56,220,216
3171DB 102,15,56,220,224
3172DB 102,15,56,220,232
3173 movups xmm0,XMMWORD[64+r11]
3174 jmp NEAR $L$ocb_enc_loop4
3175
3176ALIGN 32
3177$L$ocb_enc_loop4:
3178DB 102,15,56,220,209
3179DB 102,15,56,220,217
3180DB 102,15,56,220,225
3181DB 102,15,56,220,233
3182 movups xmm1,XMMWORD[rax*1+rcx]
3183 add rax,32
3184
3185DB 102,15,56,220,208
3186DB 102,15,56,220,216
3187DB 102,15,56,220,224
3188DB 102,15,56,220,232
3189 movups xmm0,XMMWORD[((-16))+rax*1+rcx]
3190 jnz NEAR $L$ocb_enc_loop4
3191
3192DB 102,15,56,220,209
3193DB 102,15,56,220,217
3194DB 102,15,56,220,225
3195DB 102,15,56,220,233
3196 movups xmm1,XMMWORD[16+r11]
3197 mov rax,r10
3198
3199DB 102,65,15,56,221,210
3200DB 102,65,15,56,221,219
3201DB 102,65,15,56,221,228
3202DB 102,65,15,56,221,237
3203 DB 0F3h,0C3h ;repret
3204
3205
3206
3207
3208ALIGN 32
3209__ocb_encrypt1:
3210
3211 pxor xmm7,xmm15
3212 pxor xmm7,xmm9
3213 pxor xmm8,xmm2
3214 pxor xmm2,xmm7
3215 movups xmm0,XMMWORD[32+r11]
3216
3217DB 102,15,56,220,209
3218 movups xmm1,XMMWORD[48+r11]
3219 pxor xmm7,xmm9
3220
3221DB 102,15,56,220,208
3222 movups xmm0,XMMWORD[64+r11]
3223 jmp NEAR $L$ocb_enc_loop1
3224
3225ALIGN 32
3226$L$ocb_enc_loop1:
3227DB 102,15,56,220,209
3228 movups xmm1,XMMWORD[rax*1+rcx]
3229 add rax,32
3230
3231DB 102,15,56,220,208
3232 movups xmm0,XMMWORD[((-16))+rax*1+rcx]
3233 jnz NEAR $L$ocb_enc_loop1
3234
3235DB 102,15,56,220,209
3236 movups xmm1,XMMWORD[16+r11]
3237 mov rax,r10
3238
3239DB 102,15,56,221,215
3240 DB 0F3h,0C3h ;repret
3241
3242
3243
3244global aesni_ocb_decrypt
3245
3246ALIGN 32
3247aesni_ocb_decrypt:
3248 mov QWORD[8+rsp],rdi ;WIN64 prologue
3249 mov QWORD[16+rsp],rsi
3250 mov rax,rsp
3251$L$SEH_begin_aesni_ocb_decrypt:
3252 mov rdi,rcx
3253 mov rsi,rdx
3254 mov rdx,r8
3255 mov rcx,r9
3256 mov r8,QWORD[40+rsp]
3257 mov r9,QWORD[48+rsp]
3258
3259
3260
3261DB 243,15,30,250
3262 lea rax,[rsp]
3263 push rbx
3264
3265 push rbp
3266
3267 push r12
3268
3269 push r13
3270
3271 push r14
3272
3273 lea rsp,[((-160))+rsp]
3274 movaps XMMWORD[rsp],xmm6
3275 movaps XMMWORD[16+rsp],xmm7
3276 movaps XMMWORD[32+rsp],xmm8
3277 movaps XMMWORD[48+rsp],xmm9
3278 movaps XMMWORD[64+rsp],xmm10
3279 movaps XMMWORD[80+rsp],xmm11
3280 movaps XMMWORD[96+rsp],xmm12
3281 movaps XMMWORD[112+rsp],xmm13
3282 movaps XMMWORD[128+rsp],xmm14
3283 movaps XMMWORD[144+rsp],xmm15
3284$L$ocb_dec_body:
3285 mov rbx,QWORD[56+rax]
3286 mov rbp,QWORD[((56+8))+rax]
3287
3288 mov r10d,DWORD[240+rcx]
3289 mov r11,rcx
3290 shl r10d,4
3291 movups xmm9,XMMWORD[rcx]
3292 movups xmm1,XMMWORD[16+r10*1+rcx]
3293
3294 movdqu xmm15,XMMWORD[r9]
3295 pxor xmm9,xmm1
3296 pxor xmm15,xmm1
3297
3298 mov eax,16+32
3299 lea rcx,[32+r10*1+r11]
3300 movups xmm1,XMMWORD[16+r11]
3301 sub rax,r10
3302 mov r10,rax
3303
3304 movdqu xmm10,XMMWORD[rbx]
3305 movdqu xmm8,XMMWORD[rbp]
3306
3307 test r8,1
3308 jnz NEAR $L$ocb_dec_odd
3309
3310 bsf r12,r8
3311 add r8,1
3312 shl r12,4
3313 movdqu xmm7,XMMWORD[r12*1+rbx]
3314 movdqu xmm2,XMMWORD[rdi]
3315 lea rdi,[16+rdi]
3316
3317 call __ocb_decrypt1
3318
3319 movdqa xmm15,xmm7
3320 movups XMMWORD[rsi],xmm2
3321 xorps xmm8,xmm2
3322 lea rsi,[16+rsi]
3323 sub rdx,1
3324 jz NEAR $L$ocb_dec_done
3325
3326$L$ocb_dec_odd:
3327 lea r12,[1+r8]
3328 lea r13,[3+r8]
3329 lea r14,[5+r8]
3330 lea r8,[6+r8]
3331 bsf r12,r12
3332 bsf r13,r13
3333 bsf r14,r14
3334 shl r12,4
3335 shl r13,4
3336 shl r14,4
3337
3338 sub rdx,6
3339 jc NEAR $L$ocb_dec_short
3340 jmp NEAR $L$ocb_dec_grandloop
3341
3342ALIGN 32
3343$L$ocb_dec_grandloop:
3344 movdqu xmm2,XMMWORD[rdi]
3345 movdqu xmm3,XMMWORD[16+rdi]
3346 movdqu xmm4,XMMWORD[32+rdi]
3347 movdqu xmm5,XMMWORD[48+rdi]
3348 movdqu xmm6,XMMWORD[64+rdi]
3349 movdqu xmm7,XMMWORD[80+rdi]
3350 lea rdi,[96+rdi]
3351
3352 call __ocb_decrypt6
3353
3354 movups XMMWORD[rsi],xmm2
3355 pxor xmm8,xmm2
3356 movups XMMWORD[16+rsi],xmm3
3357 pxor xmm8,xmm3
3358 movups XMMWORD[32+rsi],xmm4
3359 pxor xmm8,xmm4
3360 movups XMMWORD[48+rsi],xmm5
3361 pxor xmm8,xmm5
3362 movups XMMWORD[64+rsi],xmm6
3363 pxor xmm8,xmm6
3364 movups XMMWORD[80+rsi],xmm7
3365 pxor xmm8,xmm7
3366 lea rsi,[96+rsi]
3367 sub rdx,6
3368 jnc NEAR $L$ocb_dec_grandloop
3369
3370$L$ocb_dec_short:
3371 add rdx,6
3372 jz NEAR $L$ocb_dec_done
3373
3374 movdqu xmm2,XMMWORD[rdi]
3375 cmp rdx,2
3376 jb NEAR $L$ocb_dec_one
3377 movdqu xmm3,XMMWORD[16+rdi]
3378 je NEAR $L$ocb_dec_two
3379
3380 movdqu xmm4,XMMWORD[32+rdi]
3381 cmp rdx,4
3382 jb NEAR $L$ocb_dec_three
3383 movdqu xmm5,XMMWORD[48+rdi]
3384 je NEAR $L$ocb_dec_four
3385
3386 movdqu xmm6,XMMWORD[64+rdi]
3387 pxor xmm7,xmm7
3388
3389 call __ocb_decrypt6
3390
3391 movdqa xmm15,xmm14
3392 movups XMMWORD[rsi],xmm2
3393 pxor xmm8,xmm2
3394 movups XMMWORD[16+rsi],xmm3
3395 pxor xmm8,xmm3
3396 movups XMMWORD[32+rsi],xmm4
3397 pxor xmm8,xmm4
3398 movups XMMWORD[48+rsi],xmm5
3399 pxor xmm8,xmm5
3400 movups XMMWORD[64+rsi],xmm6
3401 pxor xmm8,xmm6
3402
3403 jmp NEAR $L$ocb_dec_done
3404
3405ALIGN 16
3406$L$ocb_dec_one:
3407 movdqa xmm7,xmm10
3408
3409 call __ocb_decrypt1
3410
3411 movdqa xmm15,xmm7
3412 movups XMMWORD[rsi],xmm2
3413 xorps xmm8,xmm2
3414 jmp NEAR $L$ocb_dec_done
3415
3416ALIGN 16
3417$L$ocb_dec_two:
3418 pxor xmm4,xmm4
3419 pxor xmm5,xmm5
3420
3421 call __ocb_decrypt4
3422
3423 movdqa xmm15,xmm11
3424 movups XMMWORD[rsi],xmm2
3425 xorps xmm8,xmm2
3426 movups XMMWORD[16+rsi],xmm3
3427 xorps xmm8,xmm3
3428
3429 jmp NEAR $L$ocb_dec_done
3430
3431ALIGN 16
3432$L$ocb_dec_three:
3433 pxor xmm5,xmm5
3434
3435 call __ocb_decrypt4
3436
3437 movdqa xmm15,xmm12
3438 movups XMMWORD[rsi],xmm2
3439 xorps xmm8,xmm2
3440 movups XMMWORD[16+rsi],xmm3
3441 xorps xmm8,xmm3
3442 movups XMMWORD[32+rsi],xmm4
3443 xorps xmm8,xmm4
3444
3445 jmp NEAR $L$ocb_dec_done
3446
3447ALIGN 16
3448$L$ocb_dec_four:
3449 call __ocb_decrypt4
3450
3451 movdqa xmm15,xmm13
3452 movups XMMWORD[rsi],xmm2
3453 pxor xmm8,xmm2
3454 movups XMMWORD[16+rsi],xmm3
3455 pxor xmm8,xmm3
3456 movups XMMWORD[32+rsi],xmm4
3457 pxor xmm8,xmm4
3458 movups XMMWORD[48+rsi],xmm5
3459 pxor xmm8,xmm5
3460
3461$L$ocb_dec_done:
3462 pxor xmm15,xmm0
3463 movdqu XMMWORD[rbp],xmm8
3464 movdqu XMMWORD[r9],xmm15
3465
3466 xorps xmm0,xmm0
3467 pxor xmm1,xmm1
3468 pxor xmm2,xmm2
3469 pxor xmm3,xmm3
3470 pxor xmm4,xmm4
3471 pxor xmm5,xmm5
3472 movaps xmm6,XMMWORD[rsp]
3473 movaps XMMWORD[rsp],xmm0
3474 movaps xmm7,XMMWORD[16+rsp]
3475 movaps XMMWORD[16+rsp],xmm0
3476 movaps xmm8,XMMWORD[32+rsp]
3477 movaps XMMWORD[32+rsp],xmm0
3478 movaps xmm9,XMMWORD[48+rsp]
3479 movaps XMMWORD[48+rsp],xmm0
3480 movaps xmm10,XMMWORD[64+rsp]
3481 movaps XMMWORD[64+rsp],xmm0
3482 movaps xmm11,XMMWORD[80+rsp]
3483 movaps XMMWORD[80+rsp],xmm0
3484 movaps xmm12,XMMWORD[96+rsp]
3485 movaps XMMWORD[96+rsp],xmm0
3486 movaps xmm13,XMMWORD[112+rsp]
3487 movaps XMMWORD[112+rsp],xmm0
3488 movaps xmm14,XMMWORD[128+rsp]
3489 movaps XMMWORD[128+rsp],xmm0
3490 movaps xmm15,XMMWORD[144+rsp]
3491 movaps XMMWORD[144+rsp],xmm0
3492 lea rax,[((160+40))+rsp]
3493$L$ocb_dec_pop:
3494 mov r14,QWORD[((-40))+rax]
3495
3496 mov r13,QWORD[((-32))+rax]
3497
3498 mov r12,QWORD[((-24))+rax]
3499
3500 mov rbp,QWORD[((-16))+rax]
3501
3502 mov rbx,QWORD[((-8))+rax]
3503
3504 lea rsp,[rax]
3505
3506$L$ocb_dec_epilogue:
3507 mov rdi,QWORD[8+rsp] ;WIN64 epilogue
3508 mov rsi,QWORD[16+rsp]
3509 DB 0F3h,0C3h ;repret
3510
3511$L$SEH_end_aesni_ocb_decrypt:
3512
3513
3514ALIGN 32
3515__ocb_decrypt6:
3516
3517 pxor xmm15,xmm9
3518 movdqu xmm11,XMMWORD[r12*1+rbx]
3519 movdqa xmm12,xmm10
3520 movdqu xmm13,XMMWORD[r13*1+rbx]
3521 movdqa xmm14,xmm10
3522 pxor xmm10,xmm15
3523 movdqu xmm15,XMMWORD[r14*1+rbx]
3524 pxor xmm11,xmm10
3525 pxor xmm2,xmm10
3526 pxor xmm12,xmm11
3527 pxor xmm3,xmm11
3528 pxor xmm13,xmm12
3529 pxor xmm4,xmm12
3530 pxor xmm14,xmm13
3531 pxor xmm5,xmm13
3532 pxor xmm15,xmm14
3533 pxor xmm6,xmm14
3534 pxor xmm7,xmm15
3535 movups xmm0,XMMWORD[32+r11]
3536
3537 lea r12,[1+r8]
3538 lea r13,[3+r8]
3539 lea r14,[5+r8]
3540 add r8,6
3541 pxor xmm10,xmm9
3542 bsf r12,r12
3543 bsf r13,r13
3544 bsf r14,r14
3545
3546DB 102,15,56,222,209
3547DB 102,15,56,222,217
3548DB 102,15,56,222,225
3549DB 102,15,56,222,233
3550 pxor xmm11,xmm9
3551 pxor xmm12,xmm9
3552DB 102,15,56,222,241
3553 pxor xmm13,xmm9
3554 pxor xmm14,xmm9
3555DB 102,15,56,222,249
3556 movups xmm1,XMMWORD[48+r11]
3557 pxor xmm15,xmm9
3558
3559DB 102,15,56,222,208
3560DB 102,15,56,222,216
3561DB 102,15,56,222,224
3562DB 102,15,56,222,232
3563DB 102,15,56,222,240
3564DB 102,15,56,222,248
3565 movups xmm0,XMMWORD[64+r11]
3566 shl r12,4
3567 shl r13,4
3568 jmp NEAR $L$ocb_dec_loop6
3569
3570ALIGN 32
3571$L$ocb_dec_loop6:
3572DB 102,15,56,222,209
3573DB 102,15,56,222,217
3574DB 102,15,56,222,225
3575DB 102,15,56,222,233
3576DB 102,15,56,222,241
3577DB 102,15,56,222,249
3578 movups xmm1,XMMWORD[rax*1+rcx]
3579 add rax,32
3580
3581DB 102,15,56,222,208
3582DB 102,15,56,222,216
3583DB 102,15,56,222,224
3584DB 102,15,56,222,232
3585DB 102,15,56,222,240
3586DB 102,15,56,222,248
3587 movups xmm0,XMMWORD[((-16))+rax*1+rcx]
3588 jnz NEAR $L$ocb_dec_loop6
3589
3590DB 102,15,56,222,209
3591DB 102,15,56,222,217
3592DB 102,15,56,222,225
3593DB 102,15,56,222,233
3594DB 102,15,56,222,241
3595DB 102,15,56,222,249
3596 movups xmm1,XMMWORD[16+r11]
3597 shl r14,4
3598
3599DB 102,65,15,56,223,210
3600 movdqu xmm10,XMMWORD[rbx]
3601 mov rax,r10
3602DB 102,65,15,56,223,219
3603DB 102,65,15,56,223,228
3604DB 102,65,15,56,223,237
3605DB 102,65,15,56,223,246
3606DB 102,65,15,56,223,255
3607 DB 0F3h,0C3h ;repret
3608
3609
3610
3611
3612ALIGN 32
3613__ocb_decrypt4:
3614
3615 pxor xmm15,xmm9
3616 movdqu xmm11,XMMWORD[r12*1+rbx]
3617 movdqa xmm12,xmm10
3618 movdqu xmm13,XMMWORD[r13*1+rbx]
3619 pxor xmm10,xmm15
3620 pxor xmm11,xmm10
3621 pxor xmm2,xmm10
3622 pxor xmm12,xmm11
3623 pxor xmm3,xmm11
3624 pxor xmm13,xmm12
3625 pxor xmm4,xmm12
3626 pxor xmm5,xmm13
3627 movups xmm0,XMMWORD[32+r11]
3628
3629 pxor xmm10,xmm9
3630 pxor xmm11,xmm9
3631 pxor xmm12,xmm9
3632 pxor xmm13,xmm9
3633
3634DB 102,15,56,222,209
3635DB 102,15,56,222,217
3636DB 102,15,56,222,225
3637DB 102,15,56,222,233
3638 movups xmm1,XMMWORD[48+r11]
3639
3640DB 102,15,56,222,208
3641DB 102,15,56,222,216
3642DB 102,15,56,222,224
3643DB 102,15,56,222,232
3644 movups xmm0,XMMWORD[64+r11]
3645 jmp NEAR $L$ocb_dec_loop4
3646
3647ALIGN 32
3648$L$ocb_dec_loop4:
3649DB 102,15,56,222,209
3650DB 102,15,56,222,217
3651DB 102,15,56,222,225
3652DB 102,15,56,222,233
3653 movups xmm1,XMMWORD[rax*1+rcx]
3654 add rax,32
3655
3656DB 102,15,56,222,208
3657DB 102,15,56,222,216
3658DB 102,15,56,222,224
3659DB 102,15,56,222,232
3660 movups xmm0,XMMWORD[((-16))+rax*1+rcx]
3661 jnz NEAR $L$ocb_dec_loop4
3662
3663DB 102,15,56,222,209
3664DB 102,15,56,222,217
3665DB 102,15,56,222,225
3666DB 102,15,56,222,233
3667 movups xmm1,XMMWORD[16+r11]
3668 mov rax,r10
3669
3670DB 102,65,15,56,223,210
3671DB 102,65,15,56,223,219
3672DB 102,65,15,56,223,228
3673DB 102,65,15,56,223,237
3674 DB 0F3h,0C3h ;repret
3675
3676
3677
3678
3679ALIGN 32
3680__ocb_decrypt1:
3681
3682 pxor xmm7,xmm15
3683 pxor xmm7,xmm9
3684 pxor xmm2,xmm7
3685 movups xmm0,XMMWORD[32+r11]
3686
3687DB 102,15,56,222,209
3688 movups xmm1,XMMWORD[48+r11]
3689 pxor xmm7,xmm9
3690
3691DB 102,15,56,222,208
3692 movups xmm0,XMMWORD[64+r11]
3693 jmp NEAR $L$ocb_dec_loop1
3694
3695ALIGN 32
3696$L$ocb_dec_loop1:
3697DB 102,15,56,222,209
3698 movups xmm1,XMMWORD[rax*1+rcx]
3699 add rax,32
3700
3701DB 102,15,56,222,208
3702 movups xmm0,XMMWORD[((-16))+rax*1+rcx]
3703 jnz NEAR $L$ocb_dec_loop1
3704
3705DB 102,15,56,222,209
3706 movups xmm1,XMMWORD[16+r11]
3707 mov rax,r10
3708
3709DB 102,15,56,223,215
3710 DB 0F3h,0C3h ;repret
3711
3712
3713global aesni_cbc_encrypt
3714
3715ALIGN 16
3716aesni_cbc_encrypt:
3717 mov QWORD[8+rsp],rdi ;WIN64 prologue
3718 mov QWORD[16+rsp],rsi
3719 mov rax,rsp
3720$L$SEH_begin_aesni_cbc_encrypt:
3721 mov rdi,rcx
3722 mov rsi,rdx
3723 mov rdx,r8
3724 mov rcx,r9
3725 mov r8,QWORD[40+rsp]
3726 mov r9,QWORD[48+rsp]
3727
3728
3729
3730DB 243,15,30,250
3731 test rdx,rdx
3732 jz NEAR $L$cbc_ret
3733
3734 mov r10d,DWORD[240+rcx]
3735 mov r11,rcx
3736 test r9d,r9d
3737 jz NEAR $L$cbc_decrypt
3738
3739 movups xmm2,XMMWORD[r8]
3740 mov eax,r10d
3741 cmp rdx,16
3742 jb NEAR $L$cbc_enc_tail
3743 sub rdx,16
3744 jmp NEAR $L$cbc_enc_loop
3745ALIGN 16
3746$L$cbc_enc_loop:
3747 movups xmm3,XMMWORD[rdi]
3748 lea rdi,[16+rdi]
3749
3750 movups xmm0,XMMWORD[rcx]
3751 movups xmm1,XMMWORD[16+rcx]
3752 xorps xmm3,xmm0
3753 lea rcx,[32+rcx]
3754 xorps xmm2,xmm3
3755$L$oop_enc1_15:
3756DB 102,15,56,220,209
3757 dec eax
3758 movups xmm1,XMMWORD[rcx]
3759 lea rcx,[16+rcx]
3760 jnz NEAR $L$oop_enc1_15
3761DB 102,15,56,221,209
3762 mov eax,r10d
3763 mov rcx,r11
3764 movups XMMWORD[rsi],xmm2
3765 lea rsi,[16+rsi]
3766 sub rdx,16
3767 jnc NEAR $L$cbc_enc_loop
3768 add rdx,16
3769 jnz NEAR $L$cbc_enc_tail
3770 pxor xmm0,xmm0
3771 pxor xmm1,xmm1
3772 movups XMMWORD[r8],xmm2
3773 pxor xmm2,xmm2
3774 pxor xmm3,xmm3
3775 jmp NEAR $L$cbc_ret
3776
3777$L$cbc_enc_tail:
3778 mov rcx,rdx
3779 xchg rsi,rdi
3780 DD 0x9066A4F3
3781 mov ecx,16
3782 sub rcx,rdx
3783 xor eax,eax
3784 DD 0x9066AAF3
3785 lea rdi,[((-16))+rdi]
3786 mov eax,r10d
3787 mov rsi,rdi
3788 mov rcx,r11
3789 xor rdx,rdx
3790 jmp NEAR $L$cbc_enc_loop
3791
3792ALIGN 16
3793$L$cbc_decrypt:
3794 cmp rdx,16
3795 jne NEAR $L$cbc_decrypt_bulk
3796
3797
3798
3799 movdqu xmm2,XMMWORD[rdi]
3800 movdqu xmm3,XMMWORD[r8]
3801 movdqa xmm4,xmm2
3802 movups xmm0,XMMWORD[rcx]
3803 movups xmm1,XMMWORD[16+rcx]
3804 lea rcx,[32+rcx]
3805 xorps xmm2,xmm0
3806$L$oop_dec1_16:
3807DB 102,15,56,222,209
3808 dec r10d
3809 movups xmm1,XMMWORD[rcx]
3810 lea rcx,[16+rcx]
3811 jnz NEAR $L$oop_dec1_16
3812DB 102,15,56,223,209
3813 pxor xmm0,xmm0
3814 pxor xmm1,xmm1
3815 movdqu XMMWORD[r8],xmm4
3816 xorps xmm2,xmm3
3817 pxor xmm3,xmm3
3818 movups XMMWORD[rsi],xmm2
3819 pxor xmm2,xmm2
3820 jmp NEAR $L$cbc_ret
3821ALIGN 16
3822$L$cbc_decrypt_bulk:
3823 lea r11,[rsp]
3824
3825 push rbp
3826
3827 sub rsp,176
3828 and rsp,-16
3829 movaps XMMWORD[16+rsp],xmm6
3830 movaps XMMWORD[32+rsp],xmm7
3831 movaps XMMWORD[48+rsp],xmm8
3832 movaps XMMWORD[64+rsp],xmm9
3833 movaps XMMWORD[80+rsp],xmm10
3834 movaps XMMWORD[96+rsp],xmm11
3835 movaps XMMWORD[112+rsp],xmm12
3836 movaps XMMWORD[128+rsp],xmm13
3837 movaps XMMWORD[144+rsp],xmm14
3838 movaps XMMWORD[160+rsp],xmm15
3839$L$cbc_decrypt_body:
3840 mov rbp,rcx
3841 movups xmm10,XMMWORD[r8]
3842 mov eax,r10d
3843 cmp rdx,0x50
3844 jbe NEAR $L$cbc_dec_tail
3845
3846 movups xmm0,XMMWORD[rcx]
3847 movdqu xmm2,XMMWORD[rdi]
3848 movdqu xmm3,XMMWORD[16+rdi]
3849 movdqa xmm11,xmm2
3850 movdqu xmm4,XMMWORD[32+rdi]
3851 movdqa xmm12,xmm3
3852 movdqu xmm5,XMMWORD[48+rdi]
3853 movdqa xmm13,xmm4
3854 movdqu xmm6,XMMWORD[64+rdi]
3855 movdqa xmm14,xmm5
3856 movdqu xmm7,XMMWORD[80+rdi]
3857 movdqa xmm15,xmm6
3858 mov r9d,DWORD[((OPENSSL_ia32cap_P+4))]
3859 cmp rdx,0x70
3860 jbe NEAR $L$cbc_dec_six_or_seven
3861
3862 and r9d,71303168
3863 sub rdx,0x50
3864 cmp r9d,4194304
3865 je NEAR $L$cbc_dec_loop6_enter
3866 sub rdx,0x20
3867 lea rcx,[112+rcx]
3868 jmp NEAR $L$cbc_dec_loop8_enter
3869ALIGN 16
3870$L$cbc_dec_loop8:
3871 movups XMMWORD[rsi],xmm9
3872 lea rsi,[16+rsi]
3873$L$cbc_dec_loop8_enter:
3874 movdqu xmm8,XMMWORD[96+rdi]
3875 pxor xmm2,xmm0
3876 movdqu xmm9,XMMWORD[112+rdi]
3877 pxor xmm3,xmm0
3878 movups xmm1,XMMWORD[((16-112))+rcx]
3879 pxor xmm4,xmm0
3880 mov rbp,-1
3881 cmp rdx,0x70
3882 pxor xmm5,xmm0
3883 pxor xmm6,xmm0
3884 pxor xmm7,xmm0
3885 pxor xmm8,xmm0
3886
3887DB 102,15,56,222,209
3888 pxor xmm9,xmm0
3889 movups xmm0,XMMWORD[((32-112))+rcx]
3890DB 102,15,56,222,217
3891DB 102,15,56,222,225
3892DB 102,15,56,222,233
3893DB 102,15,56,222,241
3894DB 102,15,56,222,249
3895DB 102,68,15,56,222,193
3896 adc rbp,0
3897 and rbp,128
3898DB 102,68,15,56,222,201
3899 add rbp,rdi
3900 movups xmm1,XMMWORD[((48-112))+rcx]
3901DB 102,15,56,222,208
3902DB 102,15,56,222,216
3903DB 102,15,56,222,224
3904DB 102,15,56,222,232
3905DB 102,15,56,222,240
3906DB 102,15,56,222,248
3907DB 102,68,15,56,222,192
3908DB 102,68,15,56,222,200
3909 movups xmm0,XMMWORD[((64-112))+rcx]
3910 nop
3911DB 102,15,56,222,209
3912DB 102,15,56,222,217
3913DB 102,15,56,222,225
3914DB 102,15,56,222,233
3915DB 102,15,56,222,241
3916DB 102,15,56,222,249
3917DB 102,68,15,56,222,193
3918DB 102,68,15,56,222,201
3919 movups xmm1,XMMWORD[((80-112))+rcx]
3920 nop
3921DB 102,15,56,222,208
3922DB 102,15,56,222,216
3923DB 102,15,56,222,224
3924DB 102,15,56,222,232
3925DB 102,15,56,222,240
3926DB 102,15,56,222,248
3927DB 102,68,15,56,222,192
3928DB 102,68,15,56,222,200
3929 movups xmm0,XMMWORD[((96-112))+rcx]
3930 nop
3931DB 102,15,56,222,209
3932DB 102,15,56,222,217
3933DB 102,15,56,222,225
3934DB 102,15,56,222,233
3935DB 102,15,56,222,241
3936DB 102,15,56,222,249
3937DB 102,68,15,56,222,193
3938DB 102,68,15,56,222,201
3939 movups xmm1,XMMWORD[((112-112))+rcx]
3940 nop
3941DB 102,15,56,222,208
3942DB 102,15,56,222,216
3943DB 102,15,56,222,224
3944DB 102,15,56,222,232
3945DB 102,15,56,222,240
3946DB 102,15,56,222,248
3947DB 102,68,15,56,222,192
3948DB 102,68,15,56,222,200
3949 movups xmm0,XMMWORD[((128-112))+rcx]
3950 nop
3951DB 102,15,56,222,209
3952DB 102,15,56,222,217
3953DB 102,15,56,222,225
3954DB 102,15,56,222,233
3955DB 102,15,56,222,241
3956DB 102,15,56,222,249
3957DB 102,68,15,56,222,193
3958DB 102,68,15,56,222,201
3959 movups xmm1,XMMWORD[((144-112))+rcx]
3960 cmp eax,11
3961DB 102,15,56,222,208
3962DB 102,15,56,222,216
3963DB 102,15,56,222,224
3964DB 102,15,56,222,232
3965DB 102,15,56,222,240
3966DB 102,15,56,222,248
3967DB 102,68,15,56,222,192
3968DB 102,68,15,56,222,200
3969 movups xmm0,XMMWORD[((160-112))+rcx]
3970 jb NEAR $L$cbc_dec_done
3971DB 102,15,56,222,209
3972DB 102,15,56,222,217
3973DB 102,15,56,222,225
3974DB 102,15,56,222,233
3975DB 102,15,56,222,241
3976DB 102,15,56,222,249
3977DB 102,68,15,56,222,193
3978DB 102,68,15,56,222,201
3979 movups xmm1,XMMWORD[((176-112))+rcx]
3980 nop
3981DB 102,15,56,222,208
3982DB 102,15,56,222,216
3983DB 102,15,56,222,224
3984DB 102,15,56,222,232
3985DB 102,15,56,222,240
3986DB 102,15,56,222,248
3987DB 102,68,15,56,222,192
3988DB 102,68,15,56,222,200
3989 movups xmm0,XMMWORD[((192-112))+rcx]
3990 je NEAR $L$cbc_dec_done
3991DB 102,15,56,222,209
3992DB 102,15,56,222,217
3993DB 102,15,56,222,225
3994DB 102,15,56,222,233
3995DB 102,15,56,222,241
3996DB 102,15,56,222,249
3997DB 102,68,15,56,222,193
3998DB 102,68,15,56,222,201
3999 movups xmm1,XMMWORD[((208-112))+rcx]
4000 nop
4001DB 102,15,56,222,208
4002DB 102,15,56,222,216
4003DB 102,15,56,222,224
4004DB 102,15,56,222,232
4005DB 102,15,56,222,240
4006DB 102,15,56,222,248
4007DB 102,68,15,56,222,192
4008DB 102,68,15,56,222,200
4009 movups xmm0,XMMWORD[((224-112))+rcx]
4010 jmp NEAR $L$cbc_dec_done
4011ALIGN 16
4012$L$cbc_dec_done:
4013DB 102,15,56,222,209
4014DB 102,15,56,222,217
4015 pxor xmm10,xmm0
4016 pxor xmm11,xmm0
4017DB 102,15,56,222,225
4018DB 102,15,56,222,233
4019 pxor xmm12,xmm0
4020 pxor xmm13,xmm0
4021DB 102,15,56,222,241
4022DB 102,15,56,222,249
4023 pxor xmm14,xmm0
4024 pxor xmm15,xmm0
4025DB 102,68,15,56,222,193
4026DB 102,68,15,56,222,201
4027 movdqu xmm1,XMMWORD[80+rdi]
4028
4029DB 102,65,15,56,223,210
4030 movdqu xmm10,XMMWORD[96+rdi]
4031 pxor xmm1,xmm0
4032DB 102,65,15,56,223,219
4033 pxor xmm10,xmm0
4034 movdqu xmm0,XMMWORD[112+rdi]
4035DB 102,65,15,56,223,228
4036 lea rdi,[128+rdi]
4037 movdqu xmm11,XMMWORD[rbp]
4038DB 102,65,15,56,223,237
4039DB 102,65,15,56,223,246
4040 movdqu xmm12,XMMWORD[16+rbp]
4041 movdqu xmm13,XMMWORD[32+rbp]
4042DB 102,65,15,56,223,255
4043DB 102,68,15,56,223,193
4044 movdqu xmm14,XMMWORD[48+rbp]
4045 movdqu xmm15,XMMWORD[64+rbp]
4046DB 102,69,15,56,223,202
4047 movdqa xmm10,xmm0
4048 movdqu xmm1,XMMWORD[80+rbp]
4049 movups xmm0,XMMWORD[((-112))+rcx]
4050
4051 movups XMMWORD[rsi],xmm2
4052 movdqa xmm2,xmm11
4053 movups XMMWORD[16+rsi],xmm3
4054 movdqa xmm3,xmm12
4055 movups XMMWORD[32+rsi],xmm4
4056 movdqa xmm4,xmm13
4057 movups XMMWORD[48+rsi],xmm5
4058 movdqa xmm5,xmm14
4059 movups XMMWORD[64+rsi],xmm6
4060 movdqa xmm6,xmm15
4061 movups XMMWORD[80+rsi],xmm7
4062 movdqa xmm7,xmm1
4063 movups XMMWORD[96+rsi],xmm8
4064 lea rsi,[112+rsi]
4065
4066 sub rdx,0x80
4067 ja NEAR $L$cbc_dec_loop8
4068
4069 movaps xmm2,xmm9
4070 lea rcx,[((-112))+rcx]
4071 add rdx,0x70
4072 jle NEAR $L$cbc_dec_clear_tail_collected
4073 movups XMMWORD[rsi],xmm9
4074 lea rsi,[16+rsi]
4075 cmp rdx,0x50
4076 jbe NEAR $L$cbc_dec_tail
4077
4078 movaps xmm2,xmm11
4079$L$cbc_dec_six_or_seven:
4080 cmp rdx,0x60
4081 ja NEAR $L$cbc_dec_seven
4082
4083 movaps xmm8,xmm7
4084 call _aesni_decrypt6
4085 pxor xmm2,xmm10
4086 movaps xmm10,xmm8
4087 pxor xmm3,xmm11
4088 movdqu XMMWORD[rsi],xmm2
4089 pxor xmm4,xmm12
4090 movdqu XMMWORD[16+rsi],xmm3
4091 pxor xmm3,xmm3
4092 pxor xmm5,xmm13
4093 movdqu XMMWORD[32+rsi],xmm4
4094 pxor xmm4,xmm4
4095 pxor xmm6,xmm14
4096 movdqu XMMWORD[48+rsi],xmm5
4097 pxor xmm5,xmm5
4098 pxor xmm7,xmm15
4099 movdqu XMMWORD[64+rsi],xmm6
4100 pxor xmm6,xmm6
4101 lea rsi,[80+rsi]
4102 movdqa xmm2,xmm7
4103 pxor xmm7,xmm7
4104 jmp NEAR $L$cbc_dec_tail_collected
4105
4106ALIGN 16
4107$L$cbc_dec_seven:
4108 movups xmm8,XMMWORD[96+rdi]
4109 xorps xmm9,xmm9
4110 call _aesni_decrypt8
4111 movups xmm9,XMMWORD[80+rdi]
4112 pxor xmm2,xmm10
4113 movups xmm10,XMMWORD[96+rdi]
4114 pxor xmm3,xmm11
4115 movdqu XMMWORD[rsi],xmm2
4116 pxor xmm4,xmm12
4117 movdqu XMMWORD[16+rsi],xmm3
4118 pxor xmm3,xmm3
4119 pxor xmm5,xmm13
4120 movdqu XMMWORD[32+rsi],xmm4
4121 pxor xmm4,xmm4
4122 pxor xmm6,xmm14
4123 movdqu XMMWORD[48+rsi],xmm5
4124 pxor xmm5,xmm5
4125 pxor xmm7,xmm15
4126 movdqu XMMWORD[64+rsi],xmm6
4127 pxor xmm6,xmm6
4128 pxor xmm8,xmm9
4129 movdqu XMMWORD[80+rsi],xmm7
4130 pxor xmm7,xmm7
4131 lea rsi,[96+rsi]
4132 movdqa xmm2,xmm8
4133 pxor xmm8,xmm8
4134 pxor xmm9,xmm9
4135 jmp NEAR $L$cbc_dec_tail_collected
4136
4137ALIGN 16
4138$L$cbc_dec_loop6:
4139 movups XMMWORD[rsi],xmm7
4140 lea rsi,[16+rsi]
4141 movdqu xmm2,XMMWORD[rdi]
4142 movdqu xmm3,XMMWORD[16+rdi]
4143 movdqa xmm11,xmm2
4144 movdqu xmm4,XMMWORD[32+rdi]
4145 movdqa xmm12,xmm3
4146 movdqu xmm5,XMMWORD[48+rdi]
4147 movdqa xmm13,xmm4
4148 movdqu xmm6,XMMWORD[64+rdi]
4149 movdqa xmm14,xmm5
4150 movdqu xmm7,XMMWORD[80+rdi]
4151 movdqa xmm15,xmm6
4152$L$cbc_dec_loop6_enter:
4153 lea rdi,[96+rdi]
4154 movdqa xmm8,xmm7
4155
4156 call _aesni_decrypt6
4157
4158 pxor xmm2,xmm10
4159 movdqa xmm10,xmm8
4160 pxor xmm3,xmm11
4161 movdqu XMMWORD[rsi],xmm2
4162 pxor xmm4,xmm12
4163 movdqu XMMWORD[16+rsi],xmm3
4164 pxor xmm5,xmm13
4165 movdqu XMMWORD[32+rsi],xmm4
4166 pxor xmm6,xmm14
4167 mov rcx,rbp
4168 movdqu XMMWORD[48+rsi],xmm5
4169 pxor xmm7,xmm15
4170 mov eax,r10d
4171 movdqu XMMWORD[64+rsi],xmm6
4172 lea rsi,[80+rsi]
4173 sub rdx,0x60
4174 ja NEAR $L$cbc_dec_loop6
4175
4176 movdqa xmm2,xmm7
4177 add rdx,0x50
4178 jle NEAR $L$cbc_dec_clear_tail_collected
4179 movups XMMWORD[rsi],xmm7
4180 lea rsi,[16+rsi]
4181
4182$L$cbc_dec_tail:
4183 movups xmm2,XMMWORD[rdi]
4184 sub rdx,0x10
4185 jbe NEAR $L$cbc_dec_one
4186
4187 movups xmm3,XMMWORD[16+rdi]
4188 movaps xmm11,xmm2
4189 sub rdx,0x10
4190 jbe NEAR $L$cbc_dec_two
4191
4192 movups xmm4,XMMWORD[32+rdi]
4193 movaps xmm12,xmm3
4194 sub rdx,0x10
4195 jbe NEAR $L$cbc_dec_three
4196
4197 movups xmm5,XMMWORD[48+rdi]
4198 movaps xmm13,xmm4
4199 sub rdx,0x10
4200 jbe NEAR $L$cbc_dec_four
4201
4202 movups xmm6,XMMWORD[64+rdi]
4203 movaps xmm14,xmm5
4204 movaps xmm15,xmm6
4205 xorps xmm7,xmm7
4206 call _aesni_decrypt6
4207 pxor xmm2,xmm10
4208 movaps xmm10,xmm15
4209 pxor xmm3,xmm11
4210 movdqu XMMWORD[rsi],xmm2
4211 pxor xmm4,xmm12
4212 movdqu XMMWORD[16+rsi],xmm3
4213 pxor xmm3,xmm3
4214 pxor xmm5,xmm13
4215 movdqu XMMWORD[32+rsi],xmm4
4216 pxor xmm4,xmm4
4217 pxor xmm6,xmm14
4218 movdqu XMMWORD[48+rsi],xmm5
4219 pxor xmm5,xmm5
4220 lea rsi,[64+rsi]
4221 movdqa xmm2,xmm6
4222 pxor xmm6,xmm6
4223 pxor xmm7,xmm7
4224 sub rdx,0x10
4225 jmp NEAR $L$cbc_dec_tail_collected
4226
4227ALIGN 16
4228$L$cbc_dec_one:
4229 movaps xmm11,xmm2
4230 movups xmm0,XMMWORD[rcx]
4231 movups xmm1,XMMWORD[16+rcx]
4232 lea rcx,[32+rcx]
4233 xorps xmm2,xmm0
4234$L$oop_dec1_17:
4235DB 102,15,56,222,209
4236 dec eax
4237 movups xmm1,XMMWORD[rcx]
4238 lea rcx,[16+rcx]
4239 jnz NEAR $L$oop_dec1_17
4240DB 102,15,56,223,209
4241 xorps xmm2,xmm10
4242 movaps xmm10,xmm11
4243 jmp NEAR $L$cbc_dec_tail_collected
4244ALIGN 16
4245$L$cbc_dec_two:
4246 movaps xmm12,xmm3
4247 call _aesni_decrypt2
4248 pxor xmm2,xmm10
4249 movaps xmm10,xmm12
4250 pxor xmm3,xmm11
4251 movdqu XMMWORD[rsi],xmm2
4252 movdqa xmm2,xmm3
4253 pxor xmm3,xmm3
4254 lea rsi,[16+rsi]
4255 jmp NEAR $L$cbc_dec_tail_collected
4256ALIGN 16
4257$L$cbc_dec_three:
4258 movaps xmm13,xmm4
4259 call _aesni_decrypt3
4260 pxor xmm2,xmm10
4261 movaps xmm10,xmm13
4262 pxor xmm3,xmm11
4263 movdqu XMMWORD[rsi],xmm2
4264 pxor xmm4,xmm12
4265 movdqu XMMWORD[16+rsi],xmm3
4266 pxor xmm3,xmm3
4267 movdqa xmm2,xmm4
4268 pxor xmm4,xmm4
4269 lea rsi,[32+rsi]
4270 jmp NEAR $L$cbc_dec_tail_collected
4271ALIGN 16
4272$L$cbc_dec_four:
4273 movaps xmm14,xmm5
4274 call _aesni_decrypt4
4275 pxor xmm2,xmm10
4276 movaps xmm10,xmm14
4277 pxor xmm3,xmm11
4278 movdqu XMMWORD[rsi],xmm2
4279 pxor xmm4,xmm12
4280 movdqu XMMWORD[16+rsi],xmm3
4281 pxor xmm3,xmm3
4282 pxor xmm5,xmm13
4283 movdqu XMMWORD[32+rsi],xmm4
4284 pxor xmm4,xmm4
4285 movdqa xmm2,xmm5
4286 pxor xmm5,xmm5
4287 lea rsi,[48+rsi]
4288 jmp NEAR $L$cbc_dec_tail_collected
4289
4290ALIGN 16
4291$L$cbc_dec_clear_tail_collected:
4292 pxor xmm3,xmm3
4293 pxor xmm4,xmm4
4294 pxor xmm5,xmm5
4295$L$cbc_dec_tail_collected:
4296 movups XMMWORD[r8],xmm10
4297 and rdx,15
4298 jnz NEAR $L$cbc_dec_tail_partial
4299 movups XMMWORD[rsi],xmm2
4300 pxor xmm2,xmm2
4301 jmp NEAR $L$cbc_dec_ret
4302ALIGN 16
4303$L$cbc_dec_tail_partial:
4304 movaps XMMWORD[rsp],xmm2
4305 pxor xmm2,xmm2
4306 mov rcx,16
4307 mov rdi,rsi
4308 sub rcx,rdx
4309 lea rsi,[rsp]
4310 DD 0x9066A4F3
4311 movdqa XMMWORD[rsp],xmm2
4312
4313$L$cbc_dec_ret:
4314 xorps xmm0,xmm0
4315 pxor xmm1,xmm1
4316 movaps xmm6,XMMWORD[16+rsp]
4317 movaps XMMWORD[16+rsp],xmm0
4318 movaps xmm7,XMMWORD[32+rsp]
4319 movaps XMMWORD[32+rsp],xmm0
4320 movaps xmm8,XMMWORD[48+rsp]
4321 movaps XMMWORD[48+rsp],xmm0
4322 movaps xmm9,XMMWORD[64+rsp]
4323 movaps XMMWORD[64+rsp],xmm0
4324 movaps xmm10,XMMWORD[80+rsp]
4325 movaps XMMWORD[80+rsp],xmm0
4326 movaps xmm11,XMMWORD[96+rsp]
4327 movaps XMMWORD[96+rsp],xmm0
4328 movaps xmm12,XMMWORD[112+rsp]
4329 movaps XMMWORD[112+rsp],xmm0
4330 movaps xmm13,XMMWORD[128+rsp]
4331 movaps XMMWORD[128+rsp],xmm0
4332 movaps xmm14,XMMWORD[144+rsp]
4333 movaps XMMWORD[144+rsp],xmm0
4334 movaps xmm15,XMMWORD[160+rsp]
4335 movaps XMMWORD[160+rsp],xmm0
4336 mov rbp,QWORD[((-8))+r11]
4337
4338 lea rsp,[r11]
4339
4340$L$cbc_ret:
4341 mov rdi,QWORD[8+rsp] ;WIN64 epilogue
4342 mov rsi,QWORD[16+rsp]
4343 DB 0F3h,0C3h ;repret
4344
4345$L$SEH_end_aesni_cbc_encrypt:
4346global aesni_set_decrypt_key
4347
4348ALIGN 16
4349aesni_set_decrypt_key:
4350
4351DB 0x48,0x83,0xEC,0x08
4352
4353 call __aesni_set_encrypt_key
4354 shl edx,4
4355 test eax,eax
4356 jnz NEAR $L$dec_key_ret
4357 lea rcx,[16+rdx*1+r8]
4358
4359 movups xmm0,XMMWORD[r8]
4360 movups xmm1,XMMWORD[rcx]
4361 movups XMMWORD[rcx],xmm0
4362 movups XMMWORD[r8],xmm1
4363 lea r8,[16+r8]
4364 lea rcx,[((-16))+rcx]
4365
4366$L$dec_key_inverse:
4367 movups xmm0,XMMWORD[r8]
4368 movups xmm1,XMMWORD[rcx]
4369DB 102,15,56,219,192
4370DB 102,15,56,219,201
4371 lea r8,[16+r8]
4372 lea rcx,[((-16))+rcx]
4373 movups XMMWORD[16+rcx],xmm0
4374 movups XMMWORD[(-16)+r8],xmm1
4375 cmp rcx,r8
4376 ja NEAR $L$dec_key_inverse
4377
4378 movups xmm0,XMMWORD[r8]
4379DB 102,15,56,219,192
4380 pxor xmm1,xmm1
4381 movups XMMWORD[rcx],xmm0
4382 pxor xmm0,xmm0
4383$L$dec_key_ret:
4384 add rsp,8
4385
4386 DB 0F3h,0C3h ;repret
4387
4388$L$SEH_end_set_decrypt_key:
4389
4390global aesni_set_encrypt_key
4391
4392ALIGN 16
4393aesni_set_encrypt_key:
4394__aesni_set_encrypt_key:
4395
4396DB 0x48,0x83,0xEC,0x08
4397
4398 mov rax,-1
4399 test rcx,rcx
4400 jz NEAR $L$enc_key_ret
4401 test r8,r8
4402 jz NEAR $L$enc_key_ret
4403
4404 mov r10d,268437504
4405 movups xmm0,XMMWORD[rcx]
4406 xorps xmm4,xmm4
4407 and r10d,DWORD[((OPENSSL_ia32cap_P+4))]
4408 lea rax,[16+r8]
4409 cmp edx,256
4410 je NEAR $L$14rounds
4411 cmp edx,192
4412 je NEAR $L$12rounds
4413 cmp edx,128
4414 jne NEAR $L$bad_keybits
4415
4416$L$10rounds:
4417 mov edx,9
4418 cmp r10d,268435456
4419 je NEAR $L$10rounds_alt
4420
4421 movups XMMWORD[r8],xmm0
4422DB 102,15,58,223,200,1
4423 call $L$key_expansion_128_cold
4424DB 102,15,58,223,200,2
4425 call $L$key_expansion_128
4426DB 102,15,58,223,200,4
4427 call $L$key_expansion_128
4428DB 102,15,58,223,200,8
4429 call $L$key_expansion_128
4430DB 102,15,58,223,200,16
4431 call $L$key_expansion_128
4432DB 102,15,58,223,200,32
4433 call $L$key_expansion_128
4434DB 102,15,58,223,200,64
4435 call $L$key_expansion_128
4436DB 102,15,58,223,200,128
4437 call $L$key_expansion_128
4438DB 102,15,58,223,200,27
4439 call $L$key_expansion_128
4440DB 102,15,58,223,200,54
4441 call $L$key_expansion_128
4442 movups XMMWORD[rax],xmm0
4443 mov DWORD[80+rax],edx
4444 xor eax,eax
4445 jmp NEAR $L$enc_key_ret
4446
4447ALIGN 16
4448$L$10rounds_alt:
4449 movdqa xmm5,XMMWORD[$L$key_rotate]
4450 mov r10d,8
4451 movdqa xmm4,XMMWORD[$L$key_rcon1]
4452 movdqa xmm2,xmm0
4453 movdqu XMMWORD[r8],xmm0
4454 jmp NEAR $L$oop_key128
4455
4456ALIGN 16
4457$L$oop_key128:
4458DB 102,15,56,0,197
4459DB 102,15,56,221,196
4460 pslld xmm4,1
4461 lea rax,[16+rax]
4462
4463 movdqa xmm3,xmm2
4464 pslldq xmm2,4
4465 pxor xmm3,xmm2
4466 pslldq xmm2,4
4467 pxor xmm3,xmm2
4468 pslldq xmm2,4
4469 pxor xmm2,xmm3
4470
4471 pxor xmm0,xmm2
4472 movdqu XMMWORD[(-16)+rax],xmm0
4473 movdqa xmm2,xmm0
4474
4475 dec r10d
4476 jnz NEAR $L$oop_key128
4477
4478 movdqa xmm4,XMMWORD[$L$key_rcon1b]
4479
4480DB 102,15,56,0,197
4481DB 102,15,56,221,196
4482 pslld xmm4,1
4483
4484 movdqa xmm3,xmm2
4485 pslldq xmm2,4
4486 pxor xmm3,xmm2
4487 pslldq xmm2,4
4488 pxor xmm3,xmm2
4489 pslldq xmm2,4
4490 pxor xmm2,xmm3
4491
4492 pxor xmm0,xmm2
4493 movdqu XMMWORD[rax],xmm0
4494
4495 movdqa xmm2,xmm0
4496DB 102,15,56,0,197
4497DB 102,15,56,221,196
4498
4499 movdqa xmm3,xmm2
4500 pslldq xmm2,4
4501 pxor xmm3,xmm2
4502 pslldq xmm2,4
4503 pxor xmm3,xmm2
4504 pslldq xmm2,4
4505 pxor xmm2,xmm3
4506
4507 pxor xmm0,xmm2
4508 movdqu XMMWORD[16+rax],xmm0
4509
4510 mov DWORD[96+rax],edx
4511 xor eax,eax
4512 jmp NEAR $L$enc_key_ret
4513
4514ALIGN 16
4515$L$12rounds:
4516 movq xmm2,QWORD[16+rcx]
4517 mov edx,11
4518 cmp r10d,268435456
4519 je NEAR $L$12rounds_alt
4520
4521 movups XMMWORD[r8],xmm0
4522DB 102,15,58,223,202,1
4523 call $L$key_expansion_192a_cold
4524DB 102,15,58,223,202,2
4525 call $L$key_expansion_192b
4526DB 102,15,58,223,202,4
4527 call $L$key_expansion_192a
4528DB 102,15,58,223,202,8
4529 call $L$key_expansion_192b
4530DB 102,15,58,223,202,16
4531 call $L$key_expansion_192a
4532DB 102,15,58,223,202,32
4533 call $L$key_expansion_192b
4534DB 102,15,58,223,202,64
4535 call $L$key_expansion_192a
4536DB 102,15,58,223,202,128
4537 call $L$key_expansion_192b
4538 movups XMMWORD[rax],xmm0
4539 mov DWORD[48+rax],edx
4540 xor rax,rax
4541 jmp NEAR $L$enc_key_ret
4542
4543ALIGN 16
4544$L$12rounds_alt:
4545 movdqa xmm5,XMMWORD[$L$key_rotate192]
4546 movdqa xmm4,XMMWORD[$L$key_rcon1]
4547 mov r10d,8
4548 movdqu XMMWORD[r8],xmm0
4549 jmp NEAR $L$oop_key192
4550
4551ALIGN 16
4552$L$oop_key192:
4553 movq QWORD[rax],xmm2
4554 movdqa xmm1,xmm2
4555DB 102,15,56,0,213
4556DB 102,15,56,221,212
4557 pslld xmm4,1
4558 lea rax,[24+rax]
4559
4560 movdqa xmm3,xmm0
4561 pslldq xmm0,4
4562 pxor xmm3,xmm0
4563 pslldq xmm0,4
4564 pxor xmm3,xmm0
4565 pslldq xmm0,4
4566 pxor xmm0,xmm3
4567
4568 pshufd xmm3,xmm0,0xff
4569 pxor xmm3,xmm1
4570 pslldq xmm1,4
4571 pxor xmm3,xmm1
4572
4573 pxor xmm0,xmm2
4574 pxor xmm2,xmm3
4575 movdqu XMMWORD[(-16)+rax],xmm0
4576
4577 dec r10d
4578 jnz NEAR $L$oop_key192
4579
4580 mov DWORD[32+rax],edx
4581 xor eax,eax
4582 jmp NEAR $L$enc_key_ret
4583
4584ALIGN 16
4585$L$14rounds:
4586 movups xmm2,XMMWORD[16+rcx]
4587 mov edx,13
4588 lea rax,[16+rax]
4589 cmp r10d,268435456
4590 je NEAR $L$14rounds_alt
4591
4592 movups XMMWORD[r8],xmm0
4593 movups XMMWORD[16+r8],xmm2
4594DB 102,15,58,223,202,1
4595 call $L$key_expansion_256a_cold
4596DB 102,15,58,223,200,1
4597 call $L$key_expansion_256b
4598DB 102,15,58,223,202,2
4599 call $L$key_expansion_256a
4600DB 102,15,58,223,200,2
4601 call $L$key_expansion_256b
4602DB 102,15,58,223,202,4
4603 call $L$key_expansion_256a
4604DB 102,15,58,223,200,4
4605 call $L$key_expansion_256b
4606DB 102,15,58,223,202,8
4607 call $L$key_expansion_256a
4608DB 102,15,58,223,200,8
4609 call $L$key_expansion_256b
4610DB 102,15,58,223,202,16
4611 call $L$key_expansion_256a
4612DB 102,15,58,223,200,16
4613 call $L$key_expansion_256b
4614DB 102,15,58,223,202,32
4615 call $L$key_expansion_256a
4616DB 102,15,58,223,200,32
4617 call $L$key_expansion_256b
4618DB 102,15,58,223,202,64
4619 call $L$key_expansion_256a
4620 movups XMMWORD[rax],xmm0
4621 mov DWORD[16+rax],edx
4622 xor rax,rax
4623 jmp NEAR $L$enc_key_ret
4624
4625ALIGN 16
4626$L$14rounds_alt:
4627 movdqa xmm5,XMMWORD[$L$key_rotate]
4628 movdqa xmm4,XMMWORD[$L$key_rcon1]
4629 mov r10d,7
4630 movdqu XMMWORD[r8],xmm0
4631 movdqa xmm1,xmm2
4632 movdqu XMMWORD[16+r8],xmm2
4633 jmp NEAR $L$oop_key256
4634
4635ALIGN 16
4636$L$oop_key256:
4637DB 102,15,56,0,213
4638DB 102,15,56,221,212
4639
4640 movdqa xmm3,xmm0
4641 pslldq xmm0,4
4642 pxor xmm3,xmm0
4643 pslldq xmm0,4
4644 pxor xmm3,xmm0
4645 pslldq xmm0,4
4646 pxor xmm0,xmm3
4647 pslld xmm4,1
4648
4649 pxor xmm0,xmm2
4650 movdqu XMMWORD[rax],xmm0
4651
4652 dec r10d
4653 jz NEAR $L$done_key256
4654
4655 pshufd xmm2,xmm0,0xff
4656 pxor xmm3,xmm3
4657DB 102,15,56,221,211
4658
4659 movdqa xmm3,xmm1
4660 pslldq xmm1,4
4661 pxor xmm3,xmm1
4662 pslldq xmm1,4
4663 pxor xmm3,xmm1
4664 pslldq xmm1,4
4665 pxor xmm1,xmm3
4666
4667 pxor xmm2,xmm1
4668 movdqu XMMWORD[16+rax],xmm2
4669 lea rax,[32+rax]
4670 movdqa xmm1,xmm2
4671
4672 jmp NEAR $L$oop_key256
4673
4674$L$done_key256:
4675 mov DWORD[16+rax],edx
4676 xor eax,eax
4677 jmp NEAR $L$enc_key_ret
4678
4679ALIGN 16
4680$L$bad_keybits:
4681 mov rax,-2
4682$L$enc_key_ret:
4683 pxor xmm0,xmm0
4684 pxor xmm1,xmm1
4685 pxor xmm2,xmm2
4686 pxor xmm3,xmm3
4687 pxor xmm4,xmm4
4688 pxor xmm5,xmm5
4689 add rsp,8
4690
4691 DB 0F3h,0C3h ;repret
4692$L$SEH_end_set_encrypt_key:
4693
4694ALIGN 16
4695$L$key_expansion_128:
4696 movups XMMWORD[rax],xmm0
4697 lea rax,[16+rax]
4698$L$key_expansion_128_cold:
4699 shufps xmm4,xmm0,16
4700 xorps xmm0,xmm4
4701 shufps xmm4,xmm0,140
4702 xorps xmm0,xmm4
4703 shufps xmm1,xmm1,255
4704 xorps xmm0,xmm1
4705 DB 0F3h,0C3h ;repret
4706
4707ALIGN 16
4708$L$key_expansion_192a:
4709 movups XMMWORD[rax],xmm0
4710 lea rax,[16+rax]
4711$L$key_expansion_192a_cold:
4712 movaps xmm5,xmm2
4713$L$key_expansion_192b_warm:
4714 shufps xmm4,xmm0,16
4715 movdqa xmm3,xmm2
4716 xorps xmm0,xmm4
4717 shufps xmm4,xmm0,140
4718 pslldq xmm3,4
4719 xorps xmm0,xmm4
4720 pshufd xmm1,xmm1,85
4721 pxor xmm2,xmm3
4722 pxor xmm0,xmm1
4723 pshufd xmm3,xmm0,255
4724 pxor xmm2,xmm3
4725 DB 0F3h,0C3h ;repret
4726
4727ALIGN 16
4728$L$key_expansion_192b:
4729 movaps xmm3,xmm0
4730 shufps xmm5,xmm0,68
4731 movups XMMWORD[rax],xmm5
4732 shufps xmm3,xmm2,78
4733 movups XMMWORD[16+rax],xmm3
4734 lea rax,[32+rax]
4735 jmp NEAR $L$key_expansion_192b_warm
4736
4737ALIGN 16
4738$L$key_expansion_256a:
4739 movups XMMWORD[rax],xmm2
4740 lea rax,[16+rax]
4741$L$key_expansion_256a_cold:
4742 shufps xmm4,xmm0,16
4743 xorps xmm0,xmm4
4744 shufps xmm4,xmm0,140
4745 xorps xmm0,xmm4
4746 shufps xmm1,xmm1,255
4747 xorps xmm0,xmm1
4748 DB 0F3h,0C3h ;repret
4749
4750ALIGN 16
4751$L$key_expansion_256b:
4752 movups XMMWORD[rax],xmm0
4753 lea rax,[16+rax]
4754
4755 shufps xmm4,xmm2,16
4756 xorps xmm2,xmm4
4757 shufps xmm4,xmm2,140
4758 xorps xmm2,xmm4
4759 shufps xmm1,xmm1,170
4760 xorps xmm2,xmm1
4761 DB 0F3h,0C3h ;repret
4762
4763
4764
4765ALIGN 64
4766$L$bswap_mask:
4767DB 15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0
4768$L$increment32:
4769 DD 6,6,6,0
4770$L$increment64:
4771 DD 1,0,0,0
4772$L$xts_magic:
4773 DD 0x87,0,1,0
4774$L$increment1:
4775DB 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1
4776$L$key_rotate:
4777 DD 0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d
4778$L$key_rotate192:
4779 DD 0x04070605,0x04070605,0x04070605,0x04070605
4780$L$key_rcon1:
4781 DD 1,1,1,1
4782$L$key_rcon1b:
4783 DD 0x1b,0x1b,0x1b,0x1b
4784
4785DB 65,69,83,32,102,111,114,32,73,110,116,101,108,32,65,69
4786DB 83,45,78,73,44,32,67,82,89,80,84,79,71,65,77,83
4787DB 32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115
4788DB 115,108,46,111,114,103,62,0
4789ALIGN 64
4790EXTERN __imp_RtlVirtualUnwind
4791
4792ALIGN 16
4793ecb_ccm64_se_handler:
4794 push rsi
4795 push rdi
4796 push rbx
4797 push rbp
4798 push r12
4799 push r13
4800 push r14
4801 push r15
4802 pushfq
4803 sub rsp,64
4804
4805 mov rax,QWORD[120+r8]
4806 mov rbx,QWORD[248+r8]
4807
4808 mov rsi,QWORD[8+r9]
4809 mov r11,QWORD[56+r9]
4810
4811 mov r10d,DWORD[r11]
4812 lea r10,[r10*1+rsi]
4813 cmp rbx,r10
4814 jb NEAR $L$common_seh_tail
4815
4816 mov rax,QWORD[152+r8]
4817
4818 mov r10d,DWORD[4+r11]
4819 lea r10,[r10*1+rsi]
4820 cmp rbx,r10
4821 jae NEAR $L$common_seh_tail
4822
4823 lea rsi,[rax]
4824 lea rdi,[512+r8]
4825 mov ecx,8
4826 DD 0xa548f3fc
4827 lea rax,[88+rax]
4828
4829 jmp NEAR $L$common_seh_tail
4830
4831
4832
4833ALIGN 16
4834ctr_xts_se_handler:
4835 push rsi
4836 push rdi
4837 push rbx
4838 push rbp
4839 push r12
4840 push r13
4841 push r14
4842 push r15
4843 pushfq
4844 sub rsp,64
4845
4846 mov rax,QWORD[120+r8]
4847 mov rbx,QWORD[248+r8]
4848
4849 mov rsi,QWORD[8+r9]
4850 mov r11,QWORD[56+r9]
4851
4852 mov r10d,DWORD[r11]
4853 lea r10,[r10*1+rsi]
4854 cmp rbx,r10
4855 jb NEAR $L$common_seh_tail
4856
4857 mov rax,QWORD[152+r8]
4858
4859 mov r10d,DWORD[4+r11]
4860 lea r10,[r10*1+rsi]
4861 cmp rbx,r10
4862 jae NEAR $L$common_seh_tail
4863
4864 mov rax,QWORD[208+r8]
4865
4866 lea rsi,[((-168))+rax]
4867 lea rdi,[512+r8]
4868 mov ecx,20
4869 DD 0xa548f3fc
4870
4871 mov rbp,QWORD[((-8))+rax]
4872 mov QWORD[160+r8],rbp
4873 jmp NEAR $L$common_seh_tail
4874
4875
4876
4877ALIGN 16
4878ocb_se_handler:
4879 push rsi
4880 push rdi
4881 push rbx
4882 push rbp
4883 push r12
4884 push r13
4885 push r14
4886 push r15
4887 pushfq
4888 sub rsp,64
4889
4890 mov rax,QWORD[120+r8]
4891 mov rbx,QWORD[248+r8]
4892
4893 mov rsi,QWORD[8+r9]
4894 mov r11,QWORD[56+r9]
4895
4896 mov r10d,DWORD[r11]
4897 lea r10,[r10*1+rsi]
4898 cmp rbx,r10
4899 jb NEAR $L$common_seh_tail
4900
4901 mov r10d,DWORD[4+r11]
4902 lea r10,[r10*1+rsi]
4903 cmp rbx,r10
4904 jae NEAR $L$common_seh_tail
4905
4906 mov r10d,DWORD[8+r11]
4907 lea r10,[r10*1+rsi]
4908 cmp rbx,r10
4909 jae NEAR $L$ocb_no_xmm
4910
4911 mov rax,QWORD[152+r8]
4912
4913 lea rsi,[rax]
4914 lea rdi,[512+r8]
4915 mov ecx,20
4916 DD 0xa548f3fc
4917 lea rax,[((160+40))+rax]
4918
4919$L$ocb_no_xmm:
4920 mov rbx,QWORD[((-8))+rax]
4921 mov rbp,QWORD[((-16))+rax]
4922 mov r12,QWORD[((-24))+rax]
4923 mov r13,QWORD[((-32))+rax]
4924 mov r14,QWORD[((-40))+rax]
4925
4926 mov QWORD[144+r8],rbx
4927 mov QWORD[160+r8],rbp
4928 mov QWORD[216+r8],r12
4929 mov QWORD[224+r8],r13
4930 mov QWORD[232+r8],r14
4931
4932 jmp NEAR $L$common_seh_tail
4933
4934
4935ALIGN 16
4936cbc_se_handler:
4937 push rsi
4938 push rdi
4939 push rbx
4940 push rbp
4941 push r12
4942 push r13
4943 push r14
4944 push r15
4945 pushfq
4946 sub rsp,64
4947
4948 mov rax,QWORD[152+r8]
4949 mov rbx,QWORD[248+r8]
4950
4951 lea r10,[$L$cbc_decrypt_bulk]
4952 cmp rbx,r10
4953 jb NEAR $L$common_seh_tail
4954
4955 mov rax,QWORD[120+r8]
4956
4957 lea r10,[$L$cbc_decrypt_body]
4958 cmp rbx,r10
4959 jb NEAR $L$common_seh_tail
4960
4961 mov rax,QWORD[152+r8]
4962
4963 lea r10,[$L$cbc_ret]
4964 cmp rbx,r10
4965 jae NEAR $L$common_seh_tail
4966
4967 lea rsi,[16+rax]
4968 lea rdi,[512+r8]
4969 mov ecx,20
4970 DD 0xa548f3fc
4971
4972 mov rax,QWORD[208+r8]
4973
4974 mov rbp,QWORD[((-8))+rax]
4975 mov QWORD[160+r8],rbp
4976
4977$L$common_seh_tail:
4978 mov rdi,QWORD[8+rax]
4979 mov rsi,QWORD[16+rax]
4980 mov QWORD[152+r8],rax
4981 mov QWORD[168+r8],rsi
4982 mov QWORD[176+r8],rdi
4983
4984 mov rdi,QWORD[40+r9]
4985 mov rsi,r8
4986 mov ecx,154
4987 DD 0xa548f3fc
4988
4989 mov rsi,r9
4990 xor rcx,rcx
4991 mov rdx,QWORD[8+rsi]
4992 mov r8,QWORD[rsi]
4993 mov r9,QWORD[16+rsi]
4994 mov r10,QWORD[40+rsi]
4995 lea r11,[56+rsi]
4996 lea r12,[24+rsi]
4997 mov QWORD[32+rsp],r10
4998 mov QWORD[40+rsp],r11
4999 mov QWORD[48+rsp],r12
5000 mov QWORD[56+rsp],rcx
5001 call QWORD[__imp_RtlVirtualUnwind]
5002
5003 mov eax,1
5004 add rsp,64
5005 popfq
5006 pop r15
5007 pop r14
5008 pop r13
5009 pop r12
5010 pop rbp
5011 pop rbx
5012 pop rdi
5013 pop rsi
5014 DB 0F3h,0C3h ;repret
5015
5016
5017section .pdata rdata align=4
5018ALIGN 4
5019 DD $L$SEH_begin_aesni_ecb_encrypt wrt ..imagebase
5020 DD $L$SEH_end_aesni_ecb_encrypt wrt ..imagebase
5021 DD $L$SEH_info_ecb wrt ..imagebase
5022
5023 DD $L$SEH_begin_aesni_ccm64_encrypt_blocks wrt ..imagebase
5024 DD $L$SEH_end_aesni_ccm64_encrypt_blocks wrt ..imagebase
5025 DD $L$SEH_info_ccm64_enc wrt ..imagebase
5026
5027 DD $L$SEH_begin_aesni_ccm64_decrypt_blocks wrt ..imagebase
5028 DD $L$SEH_end_aesni_ccm64_decrypt_blocks wrt ..imagebase
5029 DD $L$SEH_info_ccm64_dec wrt ..imagebase
5030
5031 DD $L$SEH_begin_aesni_ctr32_encrypt_blocks wrt ..imagebase
5032 DD $L$SEH_end_aesni_ctr32_encrypt_blocks wrt ..imagebase
5033 DD $L$SEH_info_ctr32 wrt ..imagebase
5034
5035 DD $L$SEH_begin_aesni_xts_encrypt wrt ..imagebase
5036 DD $L$SEH_end_aesni_xts_encrypt wrt ..imagebase
5037 DD $L$SEH_info_xts_enc wrt ..imagebase
5038
5039 DD $L$SEH_begin_aesni_xts_decrypt wrt ..imagebase
5040 DD $L$SEH_end_aesni_xts_decrypt wrt ..imagebase
5041 DD $L$SEH_info_xts_dec wrt ..imagebase
5042
5043 DD $L$SEH_begin_aesni_ocb_encrypt wrt ..imagebase
5044 DD $L$SEH_end_aesni_ocb_encrypt wrt ..imagebase
5045 DD $L$SEH_info_ocb_enc wrt ..imagebase
5046
5047 DD $L$SEH_begin_aesni_ocb_decrypt wrt ..imagebase
5048 DD $L$SEH_end_aesni_ocb_decrypt wrt ..imagebase
5049 DD $L$SEH_info_ocb_dec wrt ..imagebase
5050 DD $L$SEH_begin_aesni_cbc_encrypt wrt ..imagebase
5051 DD $L$SEH_end_aesni_cbc_encrypt wrt ..imagebase
5052 DD $L$SEH_info_cbc wrt ..imagebase
5053
5054 DD aesni_set_decrypt_key wrt ..imagebase
5055 DD $L$SEH_end_set_decrypt_key wrt ..imagebase
5056 DD $L$SEH_info_key wrt ..imagebase
5057
5058 DD aesni_set_encrypt_key wrt ..imagebase
5059 DD $L$SEH_end_set_encrypt_key wrt ..imagebase
5060 DD $L$SEH_info_key wrt ..imagebase
5061section .xdata rdata align=8
5062ALIGN 8
5063$L$SEH_info_ecb:
5064DB 9,0,0,0
5065 DD ecb_ccm64_se_handler wrt ..imagebase
5066 DD $L$ecb_enc_body wrt ..imagebase,$L$ecb_enc_ret wrt ..imagebase
5067$L$SEH_info_ccm64_enc:
5068DB 9,0,0,0
5069 DD ecb_ccm64_se_handler wrt ..imagebase
5070 DD $L$ccm64_enc_body wrt ..imagebase,$L$ccm64_enc_ret wrt ..imagebase
5071$L$SEH_info_ccm64_dec:
5072DB 9,0,0,0
5073 DD ecb_ccm64_se_handler wrt ..imagebase
5074 DD $L$ccm64_dec_body wrt ..imagebase,$L$ccm64_dec_ret wrt ..imagebase
5075$L$SEH_info_ctr32:
5076DB 9,0,0,0
5077 DD ctr_xts_se_handler wrt ..imagebase
5078 DD $L$ctr32_body wrt ..imagebase,$L$ctr32_epilogue wrt ..imagebase
5079$L$SEH_info_xts_enc:
5080DB 9,0,0,0
5081 DD ctr_xts_se_handler wrt ..imagebase
5082 DD $L$xts_enc_body wrt ..imagebase,$L$xts_enc_epilogue wrt ..imagebase
5083$L$SEH_info_xts_dec:
5084DB 9,0,0,0
5085 DD ctr_xts_se_handler wrt ..imagebase
5086 DD $L$xts_dec_body wrt ..imagebase,$L$xts_dec_epilogue wrt ..imagebase
5087$L$SEH_info_ocb_enc:
5088DB 9,0,0,0
5089 DD ocb_se_handler wrt ..imagebase
5090 DD $L$ocb_enc_body wrt ..imagebase,$L$ocb_enc_epilogue wrt ..imagebase
5091 DD $L$ocb_enc_pop wrt ..imagebase
5092 DD 0
5093$L$SEH_info_ocb_dec:
5094DB 9,0,0,0
5095 DD ocb_se_handler wrt ..imagebase
5096 DD $L$ocb_dec_body wrt ..imagebase,$L$ocb_dec_epilogue wrt ..imagebase
5097 DD $L$ocb_dec_pop wrt ..imagebase
5098 DD 0
5099$L$SEH_info_cbc:
5100DB 9,0,0,0
5101 DD cbc_se_handler wrt ..imagebase
5102$L$SEH_info_key:
5103DB 0x01,0x04,0x01,0x00
5104DB 0x04,0x02,0x00,0x00
注意: 瀏覽 TracBrowser 來幫助您使用儲存庫瀏覽器

© 2025 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette