VirtualBox

source: vbox/trunk/src/libs/openssl-3.0.1/crypto/genasm-nasm/x86_64-mont5.S@ 94081

最後變更 在這個檔案從94081是 83531,由 vboxsync 提交於 5 年 前

setting svn:sync-process=export for openssl-1.1.1f, all files except tests

檔案大小: 39.6 KB
 
1default rel
2%define XMMWORD
3%define YMMWORD
4%define ZMMWORD
5section .text code align=64
6
7
8EXTERN OPENSSL_ia32cap_P
9
10global bn_mul_mont_gather5
11
12ALIGN 64
13bn_mul_mont_gather5:
14 mov QWORD[8+rsp],rdi ;WIN64 prologue
15 mov QWORD[16+rsp],rsi
16 mov rax,rsp
17$L$SEH_begin_bn_mul_mont_gather5:
18 mov rdi,rcx
19 mov rsi,rdx
20 mov rdx,r8
21 mov rcx,r9
22 mov r8,QWORD[40+rsp]
23 mov r9,QWORD[48+rsp]
24
25
26
27 mov r9d,r9d
28 mov rax,rsp
29
30 test r9d,7
31 jnz NEAR $L$mul_enter
32 jmp NEAR $L$mul4x_enter
33
34ALIGN 16
35$L$mul_enter:
36 movd xmm5,DWORD[56+rsp]
37 push rbx
38
39 push rbp
40
41 push r12
42
43 push r13
44
45 push r14
46
47 push r15
48
49
50 neg r9
51 mov r11,rsp
52 lea r10,[((-280))+r9*8+rsp]
53 neg r9
54 and r10,-1024
55
56
57
58
59
60
61
62
63
64 sub r11,r10
65 and r11,-4096
66 lea rsp,[r11*1+r10]
67 mov r11,QWORD[rsp]
68 cmp rsp,r10
69 ja NEAR $L$mul_page_walk
70 jmp NEAR $L$mul_page_walk_done
71
72$L$mul_page_walk:
73 lea rsp,[((-4096))+rsp]
74 mov r11,QWORD[rsp]
75 cmp rsp,r10
76 ja NEAR $L$mul_page_walk
77$L$mul_page_walk_done:
78
79 lea r10,[$L$inc]
80 mov QWORD[8+r9*8+rsp],rax
81
82$L$mul_body:
83
84 lea r12,[128+rdx]
85 movdqa xmm0,XMMWORD[r10]
86 movdqa xmm1,XMMWORD[16+r10]
87 lea r10,[((24-112))+r9*8+rsp]
88 and r10,-16
89
90 pshufd xmm5,xmm5,0
91 movdqa xmm4,xmm1
92 movdqa xmm2,xmm1
93 paddd xmm1,xmm0
94 pcmpeqd xmm0,xmm5
95DB 0x67
96 movdqa xmm3,xmm4
97 paddd xmm2,xmm1
98 pcmpeqd xmm1,xmm5
99 movdqa XMMWORD[112+r10],xmm0
100 movdqa xmm0,xmm4
101
102 paddd xmm3,xmm2
103 pcmpeqd xmm2,xmm5
104 movdqa XMMWORD[128+r10],xmm1
105 movdqa xmm1,xmm4
106
107 paddd xmm0,xmm3
108 pcmpeqd xmm3,xmm5
109 movdqa XMMWORD[144+r10],xmm2
110 movdqa xmm2,xmm4
111
112 paddd xmm1,xmm0
113 pcmpeqd xmm0,xmm5
114 movdqa XMMWORD[160+r10],xmm3
115 movdqa xmm3,xmm4
116 paddd xmm2,xmm1
117 pcmpeqd xmm1,xmm5
118 movdqa XMMWORD[176+r10],xmm0
119 movdqa xmm0,xmm4
120
121 paddd xmm3,xmm2
122 pcmpeqd xmm2,xmm5
123 movdqa XMMWORD[192+r10],xmm1
124 movdqa xmm1,xmm4
125
126 paddd xmm0,xmm3
127 pcmpeqd xmm3,xmm5
128 movdqa XMMWORD[208+r10],xmm2
129 movdqa xmm2,xmm4
130
131 paddd xmm1,xmm0
132 pcmpeqd xmm0,xmm5
133 movdqa XMMWORD[224+r10],xmm3
134 movdqa xmm3,xmm4
135 paddd xmm2,xmm1
136 pcmpeqd xmm1,xmm5
137 movdqa XMMWORD[240+r10],xmm0
138 movdqa xmm0,xmm4
139
140 paddd xmm3,xmm2
141 pcmpeqd xmm2,xmm5
142 movdqa XMMWORD[256+r10],xmm1
143 movdqa xmm1,xmm4
144
145 paddd xmm0,xmm3
146 pcmpeqd xmm3,xmm5
147 movdqa XMMWORD[272+r10],xmm2
148 movdqa xmm2,xmm4
149
150 paddd xmm1,xmm0
151 pcmpeqd xmm0,xmm5
152 movdqa XMMWORD[288+r10],xmm3
153 movdqa xmm3,xmm4
154 paddd xmm2,xmm1
155 pcmpeqd xmm1,xmm5
156 movdqa XMMWORD[304+r10],xmm0
157
158 paddd xmm3,xmm2
159DB 0x67
160 pcmpeqd xmm2,xmm5
161 movdqa XMMWORD[320+r10],xmm1
162
163 pcmpeqd xmm3,xmm5
164 movdqa XMMWORD[336+r10],xmm2
165 pand xmm0,XMMWORD[64+r12]
166
167 pand xmm1,XMMWORD[80+r12]
168 pand xmm2,XMMWORD[96+r12]
169 movdqa XMMWORD[352+r10],xmm3
170 pand xmm3,XMMWORD[112+r12]
171 por xmm0,xmm2
172 por xmm1,xmm3
173 movdqa xmm4,XMMWORD[((-128))+r12]
174 movdqa xmm5,XMMWORD[((-112))+r12]
175 movdqa xmm2,XMMWORD[((-96))+r12]
176 pand xmm4,XMMWORD[112+r10]
177 movdqa xmm3,XMMWORD[((-80))+r12]
178 pand xmm5,XMMWORD[128+r10]
179 por xmm0,xmm4
180 pand xmm2,XMMWORD[144+r10]
181 por xmm1,xmm5
182 pand xmm3,XMMWORD[160+r10]
183 por xmm0,xmm2
184 por xmm1,xmm3
185 movdqa xmm4,XMMWORD[((-64))+r12]
186 movdqa xmm5,XMMWORD[((-48))+r12]
187 movdqa xmm2,XMMWORD[((-32))+r12]
188 pand xmm4,XMMWORD[176+r10]
189 movdqa xmm3,XMMWORD[((-16))+r12]
190 pand xmm5,XMMWORD[192+r10]
191 por xmm0,xmm4
192 pand xmm2,XMMWORD[208+r10]
193 por xmm1,xmm5
194 pand xmm3,XMMWORD[224+r10]
195 por xmm0,xmm2
196 por xmm1,xmm3
197 movdqa xmm4,XMMWORD[r12]
198 movdqa xmm5,XMMWORD[16+r12]
199 movdqa xmm2,XMMWORD[32+r12]
200 pand xmm4,XMMWORD[240+r10]
201 movdqa xmm3,XMMWORD[48+r12]
202 pand xmm5,XMMWORD[256+r10]
203 por xmm0,xmm4
204 pand xmm2,XMMWORD[272+r10]
205 por xmm1,xmm5
206 pand xmm3,XMMWORD[288+r10]
207 por xmm0,xmm2
208 por xmm1,xmm3
209 por xmm0,xmm1
210 pshufd xmm1,xmm0,0x4e
211 por xmm0,xmm1
212 lea r12,[256+r12]
213DB 102,72,15,126,195
214
215 mov r8,QWORD[r8]
216 mov rax,QWORD[rsi]
217
218 xor r14,r14
219 xor r15,r15
220
221 mov rbp,r8
222 mul rbx
223 mov r10,rax
224 mov rax,QWORD[rcx]
225
226 imul rbp,r10
227 mov r11,rdx
228
229 mul rbp
230 add r10,rax
231 mov rax,QWORD[8+rsi]
232 adc rdx,0
233 mov r13,rdx
234
235 lea r15,[1+r15]
236 jmp NEAR $L$1st_enter
237
238ALIGN 16
239$L$1st:
240 add r13,rax
241 mov rax,QWORD[r15*8+rsi]
242 adc rdx,0
243 add r13,r11
244 mov r11,r10
245 adc rdx,0
246 mov QWORD[((-16))+r15*8+rsp],r13
247 mov r13,rdx
248
249$L$1st_enter:
250 mul rbx
251 add r11,rax
252 mov rax,QWORD[r15*8+rcx]
253 adc rdx,0
254 lea r15,[1+r15]
255 mov r10,rdx
256
257 mul rbp
258 cmp r15,r9
259 jne NEAR $L$1st
260
261
262 add r13,rax
263 adc rdx,0
264 add r13,r11
265 adc rdx,0
266 mov QWORD[((-16))+r9*8+rsp],r13
267 mov r13,rdx
268 mov r11,r10
269
270 xor rdx,rdx
271 add r13,r11
272 adc rdx,0
273 mov QWORD[((-8))+r9*8+rsp],r13
274 mov QWORD[r9*8+rsp],rdx
275
276 lea r14,[1+r14]
277 jmp NEAR $L$outer
278ALIGN 16
279$L$outer:
280 lea rdx,[((24+128))+r9*8+rsp]
281 and rdx,-16
282 pxor xmm4,xmm4
283 pxor xmm5,xmm5
284 movdqa xmm0,XMMWORD[((-128))+r12]
285 movdqa xmm1,XMMWORD[((-112))+r12]
286 movdqa xmm2,XMMWORD[((-96))+r12]
287 movdqa xmm3,XMMWORD[((-80))+r12]
288 pand xmm0,XMMWORD[((-128))+rdx]
289 pand xmm1,XMMWORD[((-112))+rdx]
290 por xmm4,xmm0
291 pand xmm2,XMMWORD[((-96))+rdx]
292 por xmm5,xmm1
293 pand xmm3,XMMWORD[((-80))+rdx]
294 por xmm4,xmm2
295 por xmm5,xmm3
296 movdqa xmm0,XMMWORD[((-64))+r12]
297 movdqa xmm1,XMMWORD[((-48))+r12]
298 movdqa xmm2,XMMWORD[((-32))+r12]
299 movdqa xmm3,XMMWORD[((-16))+r12]
300 pand xmm0,XMMWORD[((-64))+rdx]
301 pand xmm1,XMMWORD[((-48))+rdx]
302 por xmm4,xmm0
303 pand xmm2,XMMWORD[((-32))+rdx]
304 por xmm5,xmm1
305 pand xmm3,XMMWORD[((-16))+rdx]
306 por xmm4,xmm2
307 por xmm5,xmm3
308 movdqa xmm0,XMMWORD[r12]
309 movdqa xmm1,XMMWORD[16+r12]
310 movdqa xmm2,XMMWORD[32+r12]
311 movdqa xmm3,XMMWORD[48+r12]
312 pand xmm0,XMMWORD[rdx]
313 pand xmm1,XMMWORD[16+rdx]
314 por xmm4,xmm0
315 pand xmm2,XMMWORD[32+rdx]
316 por xmm5,xmm1
317 pand xmm3,XMMWORD[48+rdx]
318 por xmm4,xmm2
319 por xmm5,xmm3
320 movdqa xmm0,XMMWORD[64+r12]
321 movdqa xmm1,XMMWORD[80+r12]
322 movdqa xmm2,XMMWORD[96+r12]
323 movdqa xmm3,XMMWORD[112+r12]
324 pand xmm0,XMMWORD[64+rdx]
325 pand xmm1,XMMWORD[80+rdx]
326 por xmm4,xmm0
327 pand xmm2,XMMWORD[96+rdx]
328 por xmm5,xmm1
329 pand xmm3,XMMWORD[112+rdx]
330 por xmm4,xmm2
331 por xmm5,xmm3
332 por xmm4,xmm5
333 pshufd xmm0,xmm4,0x4e
334 por xmm0,xmm4
335 lea r12,[256+r12]
336
337 mov rax,QWORD[rsi]
338DB 102,72,15,126,195
339
340 xor r15,r15
341 mov rbp,r8
342 mov r10,QWORD[rsp]
343
344 mul rbx
345 add r10,rax
346 mov rax,QWORD[rcx]
347 adc rdx,0
348
349 imul rbp,r10
350 mov r11,rdx
351
352 mul rbp
353 add r10,rax
354 mov rax,QWORD[8+rsi]
355 adc rdx,0
356 mov r10,QWORD[8+rsp]
357 mov r13,rdx
358
359 lea r15,[1+r15]
360 jmp NEAR $L$inner_enter
361
362ALIGN 16
363$L$inner:
364 add r13,rax
365 mov rax,QWORD[r15*8+rsi]
366 adc rdx,0
367 add r13,r10
368 mov r10,QWORD[r15*8+rsp]
369 adc rdx,0
370 mov QWORD[((-16))+r15*8+rsp],r13
371 mov r13,rdx
372
373$L$inner_enter:
374 mul rbx
375 add r11,rax
376 mov rax,QWORD[r15*8+rcx]
377 adc rdx,0
378 add r10,r11
379 mov r11,rdx
380 adc r11,0
381 lea r15,[1+r15]
382
383 mul rbp
384 cmp r15,r9
385 jne NEAR $L$inner
386
387 add r13,rax
388 adc rdx,0
389 add r13,r10
390 mov r10,QWORD[r9*8+rsp]
391 adc rdx,0
392 mov QWORD[((-16))+r9*8+rsp],r13
393 mov r13,rdx
394
395 xor rdx,rdx
396 add r13,r11
397 adc rdx,0
398 add r13,r10
399 adc rdx,0
400 mov QWORD[((-8))+r9*8+rsp],r13
401 mov QWORD[r9*8+rsp],rdx
402
403 lea r14,[1+r14]
404 cmp r14,r9
405 jb NEAR $L$outer
406
407 xor r14,r14
408 mov rax,QWORD[rsp]
409 lea rsi,[rsp]
410 mov r15,r9
411 jmp NEAR $L$sub
412ALIGN 16
413$L$sub: sbb rax,QWORD[r14*8+rcx]
414 mov QWORD[r14*8+rdi],rax
415 mov rax,QWORD[8+r14*8+rsi]
416 lea r14,[1+r14]
417 dec r15
418 jnz NEAR $L$sub
419
420 sbb rax,0
421 mov rbx,-1
422 xor rbx,rax
423 xor r14,r14
424 mov r15,r9
425
426$L$copy:
427 mov rcx,QWORD[r14*8+rdi]
428 mov rdx,QWORD[r14*8+rsp]
429 and rcx,rbx
430 and rdx,rax
431 mov QWORD[r14*8+rsp],r14
432 or rdx,rcx
433 mov QWORD[r14*8+rdi],rdx
434 lea r14,[1+r14]
435 sub r15,1
436 jnz NEAR $L$copy
437
438 mov rsi,QWORD[8+r9*8+rsp]
439
440 mov rax,1
441
442 mov r15,QWORD[((-48))+rsi]
443
444 mov r14,QWORD[((-40))+rsi]
445
446 mov r13,QWORD[((-32))+rsi]
447
448 mov r12,QWORD[((-24))+rsi]
449
450 mov rbp,QWORD[((-16))+rsi]
451
452 mov rbx,QWORD[((-8))+rsi]
453
454 lea rsp,[rsi]
455
456$L$mul_epilogue:
457 mov rdi,QWORD[8+rsp] ;WIN64 epilogue
458 mov rsi,QWORD[16+rsp]
459 DB 0F3h,0C3h ;repret
460
461$L$SEH_end_bn_mul_mont_gather5:
462
463ALIGN 32
464bn_mul4x_mont_gather5:
465 mov QWORD[8+rsp],rdi ;WIN64 prologue
466 mov QWORD[16+rsp],rsi
467 mov rax,rsp
468$L$SEH_begin_bn_mul4x_mont_gather5:
469 mov rdi,rcx
470 mov rsi,rdx
471 mov rdx,r8
472 mov rcx,r9
473 mov r8,QWORD[40+rsp]
474 mov r9,QWORD[48+rsp]
475
476
477
478DB 0x67
479 mov rax,rsp
480
481$L$mul4x_enter:
482 push rbx
483
484 push rbp
485
486 push r12
487
488 push r13
489
490 push r14
491
492 push r15
493
494$L$mul4x_prologue:
495
496DB 0x67
497 shl r9d,3
498 lea r10,[r9*2+r9]
499 neg r9
500
501
502
503
504
505
506
507
508
509
510 lea r11,[((-320))+r9*2+rsp]
511 mov rbp,rsp
512 sub r11,rdi
513 and r11,4095
514 cmp r10,r11
515 jb NEAR $L$mul4xsp_alt
516 sub rbp,r11
517 lea rbp,[((-320))+r9*2+rbp]
518 jmp NEAR $L$mul4xsp_done
519
520ALIGN 32
521$L$mul4xsp_alt:
522 lea r10,[((4096-320))+r9*2]
523 lea rbp,[((-320))+r9*2+rbp]
524 sub r11,r10
525 mov r10,0
526 cmovc r11,r10
527 sub rbp,r11
528$L$mul4xsp_done:
529 and rbp,-64
530 mov r11,rsp
531 sub r11,rbp
532 and r11,-4096
533 lea rsp,[rbp*1+r11]
534 mov r10,QWORD[rsp]
535 cmp rsp,rbp
536 ja NEAR $L$mul4x_page_walk
537 jmp NEAR $L$mul4x_page_walk_done
538
539$L$mul4x_page_walk:
540 lea rsp,[((-4096))+rsp]
541 mov r10,QWORD[rsp]
542 cmp rsp,rbp
543 ja NEAR $L$mul4x_page_walk
544$L$mul4x_page_walk_done:
545
546 neg r9
547
548 mov QWORD[40+rsp],rax
549
550$L$mul4x_body:
551
552 call mul4x_internal
553
554 mov rsi,QWORD[40+rsp]
555
556 mov rax,1
557
558 mov r15,QWORD[((-48))+rsi]
559
560 mov r14,QWORD[((-40))+rsi]
561
562 mov r13,QWORD[((-32))+rsi]
563
564 mov r12,QWORD[((-24))+rsi]
565
566 mov rbp,QWORD[((-16))+rsi]
567
568 mov rbx,QWORD[((-8))+rsi]
569
570 lea rsp,[rsi]
571
572$L$mul4x_epilogue:
573 mov rdi,QWORD[8+rsp] ;WIN64 epilogue
574 mov rsi,QWORD[16+rsp]
575 DB 0F3h,0C3h ;repret
576
577$L$SEH_end_bn_mul4x_mont_gather5:
578
579
580ALIGN 32
581mul4x_internal:
582 shl r9,5
583 movd xmm5,DWORD[56+rax]
584 lea rax,[$L$inc]
585 lea r13,[128+r9*1+rdx]
586 shr r9,5
587 movdqa xmm0,XMMWORD[rax]
588 movdqa xmm1,XMMWORD[16+rax]
589 lea r10,[((88-112))+r9*1+rsp]
590 lea r12,[128+rdx]
591
592 pshufd xmm5,xmm5,0
593 movdqa xmm4,xmm1
594DB 0x67,0x67
595 movdqa xmm2,xmm1
596 paddd xmm1,xmm0
597 pcmpeqd xmm0,xmm5
598DB 0x67
599 movdqa xmm3,xmm4
600 paddd xmm2,xmm1
601 pcmpeqd xmm1,xmm5
602 movdqa XMMWORD[112+r10],xmm0
603 movdqa xmm0,xmm4
604
605 paddd xmm3,xmm2
606 pcmpeqd xmm2,xmm5
607 movdqa XMMWORD[128+r10],xmm1
608 movdqa xmm1,xmm4
609
610 paddd xmm0,xmm3
611 pcmpeqd xmm3,xmm5
612 movdqa XMMWORD[144+r10],xmm2
613 movdqa xmm2,xmm4
614
615 paddd xmm1,xmm0
616 pcmpeqd xmm0,xmm5
617 movdqa XMMWORD[160+r10],xmm3
618 movdqa xmm3,xmm4
619 paddd xmm2,xmm1
620 pcmpeqd xmm1,xmm5
621 movdqa XMMWORD[176+r10],xmm0
622 movdqa xmm0,xmm4
623
624 paddd xmm3,xmm2
625 pcmpeqd xmm2,xmm5
626 movdqa XMMWORD[192+r10],xmm1
627 movdqa xmm1,xmm4
628
629 paddd xmm0,xmm3
630 pcmpeqd xmm3,xmm5
631 movdqa XMMWORD[208+r10],xmm2
632 movdqa xmm2,xmm4
633
634 paddd xmm1,xmm0
635 pcmpeqd xmm0,xmm5
636 movdqa XMMWORD[224+r10],xmm3
637 movdqa xmm3,xmm4
638 paddd xmm2,xmm1
639 pcmpeqd xmm1,xmm5
640 movdqa XMMWORD[240+r10],xmm0
641 movdqa xmm0,xmm4
642
643 paddd xmm3,xmm2
644 pcmpeqd xmm2,xmm5
645 movdqa XMMWORD[256+r10],xmm1
646 movdqa xmm1,xmm4
647
648 paddd xmm0,xmm3
649 pcmpeqd xmm3,xmm5
650 movdqa XMMWORD[272+r10],xmm2
651 movdqa xmm2,xmm4
652
653 paddd xmm1,xmm0
654 pcmpeqd xmm0,xmm5
655 movdqa XMMWORD[288+r10],xmm3
656 movdqa xmm3,xmm4
657 paddd xmm2,xmm1
658 pcmpeqd xmm1,xmm5
659 movdqa XMMWORD[304+r10],xmm0
660
661 paddd xmm3,xmm2
662DB 0x67
663 pcmpeqd xmm2,xmm5
664 movdqa XMMWORD[320+r10],xmm1
665
666 pcmpeqd xmm3,xmm5
667 movdqa XMMWORD[336+r10],xmm2
668 pand xmm0,XMMWORD[64+r12]
669
670 pand xmm1,XMMWORD[80+r12]
671 pand xmm2,XMMWORD[96+r12]
672 movdqa XMMWORD[352+r10],xmm3
673 pand xmm3,XMMWORD[112+r12]
674 por xmm0,xmm2
675 por xmm1,xmm3
676 movdqa xmm4,XMMWORD[((-128))+r12]
677 movdqa xmm5,XMMWORD[((-112))+r12]
678 movdqa xmm2,XMMWORD[((-96))+r12]
679 pand xmm4,XMMWORD[112+r10]
680 movdqa xmm3,XMMWORD[((-80))+r12]
681 pand xmm5,XMMWORD[128+r10]
682 por xmm0,xmm4
683 pand xmm2,XMMWORD[144+r10]
684 por xmm1,xmm5
685 pand xmm3,XMMWORD[160+r10]
686 por xmm0,xmm2
687 por xmm1,xmm3
688 movdqa xmm4,XMMWORD[((-64))+r12]
689 movdqa xmm5,XMMWORD[((-48))+r12]
690 movdqa xmm2,XMMWORD[((-32))+r12]
691 pand xmm4,XMMWORD[176+r10]
692 movdqa xmm3,XMMWORD[((-16))+r12]
693 pand xmm5,XMMWORD[192+r10]
694 por xmm0,xmm4
695 pand xmm2,XMMWORD[208+r10]
696 por xmm1,xmm5
697 pand xmm3,XMMWORD[224+r10]
698 por xmm0,xmm2
699 por xmm1,xmm3
700 movdqa xmm4,XMMWORD[r12]
701 movdqa xmm5,XMMWORD[16+r12]
702 movdqa xmm2,XMMWORD[32+r12]
703 pand xmm4,XMMWORD[240+r10]
704 movdqa xmm3,XMMWORD[48+r12]
705 pand xmm5,XMMWORD[256+r10]
706 por xmm0,xmm4
707 pand xmm2,XMMWORD[272+r10]
708 por xmm1,xmm5
709 pand xmm3,XMMWORD[288+r10]
710 por xmm0,xmm2
711 por xmm1,xmm3
712 por xmm0,xmm1
713 pshufd xmm1,xmm0,0x4e
714 por xmm0,xmm1
715 lea r12,[256+r12]
716DB 102,72,15,126,195
717
718 mov QWORD[((16+8))+rsp],r13
719 mov QWORD[((56+8))+rsp],rdi
720
721 mov r8,QWORD[r8]
722 mov rax,QWORD[rsi]
723 lea rsi,[r9*1+rsi]
724 neg r9
725
726 mov rbp,r8
727 mul rbx
728 mov r10,rax
729 mov rax,QWORD[rcx]
730
731 imul rbp,r10
732 lea r14,[((64+8))+rsp]
733 mov r11,rdx
734
735 mul rbp
736 add r10,rax
737 mov rax,QWORD[8+r9*1+rsi]
738 adc rdx,0
739 mov rdi,rdx
740
741 mul rbx
742 add r11,rax
743 mov rax,QWORD[8+rcx]
744 adc rdx,0
745 mov r10,rdx
746
747 mul rbp
748 add rdi,rax
749 mov rax,QWORD[16+r9*1+rsi]
750 adc rdx,0
751 add rdi,r11
752 lea r15,[32+r9]
753 lea rcx,[32+rcx]
754 adc rdx,0
755 mov QWORD[r14],rdi
756 mov r13,rdx
757 jmp NEAR $L$1st4x
758
759ALIGN 32
760$L$1st4x:
761 mul rbx
762 add r10,rax
763 mov rax,QWORD[((-16))+rcx]
764 lea r14,[32+r14]
765 adc rdx,0
766 mov r11,rdx
767
768 mul rbp
769 add r13,rax
770 mov rax,QWORD[((-8))+r15*1+rsi]
771 adc rdx,0
772 add r13,r10
773 adc rdx,0
774 mov QWORD[((-24))+r14],r13
775 mov rdi,rdx
776
777 mul rbx
778 add r11,rax
779 mov rax,QWORD[((-8))+rcx]
780 adc rdx,0
781 mov r10,rdx
782
783 mul rbp
784 add rdi,rax
785 mov rax,QWORD[r15*1+rsi]
786 adc rdx,0
787 add rdi,r11
788 adc rdx,0
789 mov QWORD[((-16))+r14],rdi
790 mov r13,rdx
791
792 mul rbx
793 add r10,rax
794 mov rax,QWORD[rcx]
795 adc rdx,0
796 mov r11,rdx
797
798 mul rbp
799 add r13,rax
800 mov rax,QWORD[8+r15*1+rsi]
801 adc rdx,0
802 add r13,r10
803 adc rdx,0
804 mov QWORD[((-8))+r14],r13
805 mov rdi,rdx
806
807 mul rbx
808 add r11,rax
809 mov rax,QWORD[8+rcx]
810 adc rdx,0
811 mov r10,rdx
812
813 mul rbp
814 add rdi,rax
815 mov rax,QWORD[16+r15*1+rsi]
816 adc rdx,0
817 add rdi,r11
818 lea rcx,[32+rcx]
819 adc rdx,0
820 mov QWORD[r14],rdi
821 mov r13,rdx
822
823 add r15,32
824 jnz NEAR $L$1st4x
825
826 mul rbx
827 add r10,rax
828 mov rax,QWORD[((-16))+rcx]
829 lea r14,[32+r14]
830 adc rdx,0
831 mov r11,rdx
832
833 mul rbp
834 add r13,rax
835 mov rax,QWORD[((-8))+rsi]
836 adc rdx,0
837 add r13,r10
838 adc rdx,0
839 mov QWORD[((-24))+r14],r13
840 mov rdi,rdx
841
842 mul rbx
843 add r11,rax
844 mov rax,QWORD[((-8))+rcx]
845 adc rdx,0
846 mov r10,rdx
847
848 mul rbp
849 add rdi,rax
850 mov rax,QWORD[r9*1+rsi]
851 adc rdx,0
852 add rdi,r11
853 adc rdx,0
854 mov QWORD[((-16))+r14],rdi
855 mov r13,rdx
856
857 lea rcx,[r9*1+rcx]
858
859 xor rdi,rdi
860 add r13,r10
861 adc rdi,0
862 mov QWORD[((-8))+r14],r13
863
864 jmp NEAR $L$outer4x
865
866ALIGN 32
867$L$outer4x:
868 lea rdx,[((16+128))+r14]
869 pxor xmm4,xmm4
870 pxor xmm5,xmm5
871 movdqa xmm0,XMMWORD[((-128))+r12]
872 movdqa xmm1,XMMWORD[((-112))+r12]
873 movdqa xmm2,XMMWORD[((-96))+r12]
874 movdqa xmm3,XMMWORD[((-80))+r12]
875 pand xmm0,XMMWORD[((-128))+rdx]
876 pand xmm1,XMMWORD[((-112))+rdx]
877 por xmm4,xmm0
878 pand xmm2,XMMWORD[((-96))+rdx]
879 por xmm5,xmm1
880 pand xmm3,XMMWORD[((-80))+rdx]
881 por xmm4,xmm2
882 por xmm5,xmm3
883 movdqa xmm0,XMMWORD[((-64))+r12]
884 movdqa xmm1,XMMWORD[((-48))+r12]
885 movdqa xmm2,XMMWORD[((-32))+r12]
886 movdqa xmm3,XMMWORD[((-16))+r12]
887 pand xmm0,XMMWORD[((-64))+rdx]
888 pand xmm1,XMMWORD[((-48))+rdx]
889 por xmm4,xmm0
890 pand xmm2,XMMWORD[((-32))+rdx]
891 por xmm5,xmm1
892 pand xmm3,XMMWORD[((-16))+rdx]
893 por xmm4,xmm2
894 por xmm5,xmm3
895 movdqa xmm0,XMMWORD[r12]
896 movdqa xmm1,XMMWORD[16+r12]
897 movdqa xmm2,XMMWORD[32+r12]
898 movdqa xmm3,XMMWORD[48+r12]
899 pand xmm0,XMMWORD[rdx]
900 pand xmm1,XMMWORD[16+rdx]
901 por xmm4,xmm0
902 pand xmm2,XMMWORD[32+rdx]
903 por xmm5,xmm1
904 pand xmm3,XMMWORD[48+rdx]
905 por xmm4,xmm2
906 por xmm5,xmm3
907 movdqa xmm0,XMMWORD[64+r12]
908 movdqa xmm1,XMMWORD[80+r12]
909 movdqa xmm2,XMMWORD[96+r12]
910 movdqa xmm3,XMMWORD[112+r12]
911 pand xmm0,XMMWORD[64+rdx]
912 pand xmm1,XMMWORD[80+rdx]
913 por xmm4,xmm0
914 pand xmm2,XMMWORD[96+rdx]
915 por xmm5,xmm1
916 pand xmm3,XMMWORD[112+rdx]
917 por xmm4,xmm2
918 por xmm5,xmm3
919 por xmm4,xmm5
920 pshufd xmm0,xmm4,0x4e
921 por xmm0,xmm4
922 lea r12,[256+r12]
923DB 102,72,15,126,195
924
925 mov r10,QWORD[r9*1+r14]
926 mov rbp,r8
927 mul rbx
928 add r10,rax
929 mov rax,QWORD[rcx]
930 adc rdx,0
931
932 imul rbp,r10
933 mov r11,rdx
934 mov QWORD[r14],rdi
935
936 lea r14,[r9*1+r14]
937
938 mul rbp
939 add r10,rax
940 mov rax,QWORD[8+r9*1+rsi]
941 adc rdx,0
942 mov rdi,rdx
943
944 mul rbx
945 add r11,rax
946 mov rax,QWORD[8+rcx]
947 adc rdx,0
948 add r11,QWORD[8+r14]
949 adc rdx,0
950 mov r10,rdx
951
952 mul rbp
953 add rdi,rax
954 mov rax,QWORD[16+r9*1+rsi]
955 adc rdx,0
956 add rdi,r11
957 lea r15,[32+r9]
958 lea rcx,[32+rcx]
959 adc rdx,0
960 mov r13,rdx
961 jmp NEAR $L$inner4x
962
963ALIGN 32
964$L$inner4x:
965 mul rbx
966 add r10,rax
967 mov rax,QWORD[((-16))+rcx]
968 adc rdx,0
969 add r10,QWORD[16+r14]
970 lea r14,[32+r14]
971 adc rdx,0
972 mov r11,rdx
973
974 mul rbp
975 add r13,rax
976 mov rax,QWORD[((-8))+r15*1+rsi]
977 adc rdx,0
978 add r13,r10
979 adc rdx,0
980 mov QWORD[((-32))+r14],rdi
981 mov rdi,rdx
982
983 mul rbx
984 add r11,rax
985 mov rax,QWORD[((-8))+rcx]
986 adc rdx,0
987 add r11,QWORD[((-8))+r14]
988 adc rdx,0
989 mov r10,rdx
990
991 mul rbp
992 add rdi,rax
993 mov rax,QWORD[r15*1+rsi]
994 adc rdx,0
995 add rdi,r11
996 adc rdx,0
997 mov QWORD[((-24))+r14],r13
998 mov r13,rdx
999
1000 mul rbx
1001 add r10,rax
1002 mov rax,QWORD[rcx]
1003 adc rdx,0
1004 add r10,QWORD[r14]
1005 adc rdx,0
1006 mov r11,rdx
1007
1008 mul rbp
1009 add r13,rax
1010 mov rax,QWORD[8+r15*1+rsi]
1011 adc rdx,0
1012 add r13,r10
1013 adc rdx,0
1014 mov QWORD[((-16))+r14],rdi
1015 mov rdi,rdx
1016
1017 mul rbx
1018 add r11,rax
1019 mov rax,QWORD[8+rcx]
1020 adc rdx,0
1021 add r11,QWORD[8+r14]
1022 adc rdx,0
1023 mov r10,rdx
1024
1025 mul rbp
1026 add rdi,rax
1027 mov rax,QWORD[16+r15*1+rsi]
1028 adc rdx,0
1029 add rdi,r11
1030 lea rcx,[32+rcx]
1031 adc rdx,0
1032 mov QWORD[((-8))+r14],r13
1033 mov r13,rdx
1034
1035 add r15,32
1036 jnz NEAR $L$inner4x
1037
1038 mul rbx
1039 add r10,rax
1040 mov rax,QWORD[((-16))+rcx]
1041 adc rdx,0
1042 add r10,QWORD[16+r14]
1043 lea r14,[32+r14]
1044 adc rdx,0
1045 mov r11,rdx
1046
1047 mul rbp
1048 add r13,rax
1049 mov rax,QWORD[((-8))+rsi]
1050 adc rdx,0
1051 add r13,r10
1052 adc rdx,0
1053 mov QWORD[((-32))+r14],rdi
1054 mov rdi,rdx
1055
1056 mul rbx
1057 add r11,rax
1058 mov rax,rbp
1059 mov rbp,QWORD[((-8))+rcx]
1060 adc rdx,0
1061 add r11,QWORD[((-8))+r14]
1062 adc rdx,0
1063 mov r10,rdx
1064
1065 mul rbp
1066 add rdi,rax
1067 mov rax,QWORD[r9*1+rsi]
1068 adc rdx,0
1069 add rdi,r11
1070 adc rdx,0
1071 mov QWORD[((-24))+r14],r13
1072 mov r13,rdx
1073
1074 mov QWORD[((-16))+r14],rdi
1075 lea rcx,[r9*1+rcx]
1076
1077 xor rdi,rdi
1078 add r13,r10
1079 adc rdi,0
1080 add r13,QWORD[r14]
1081 adc rdi,0
1082 mov QWORD[((-8))+r14],r13
1083
1084 cmp r12,QWORD[((16+8))+rsp]
1085 jb NEAR $L$outer4x
1086 xor rax,rax
1087 sub rbp,r13
1088 adc r15,r15
1089 or rdi,r15
1090 sub rax,rdi
1091 lea rbx,[r9*1+r14]
1092 mov r12,QWORD[rcx]
1093 lea rbp,[rcx]
1094 mov rcx,r9
1095 sar rcx,3+2
1096 mov rdi,QWORD[((56+8))+rsp]
1097 dec r12
1098 xor r10,r10
1099 mov r13,QWORD[8+rbp]
1100 mov r14,QWORD[16+rbp]
1101 mov r15,QWORD[24+rbp]
1102 jmp NEAR $L$sqr4x_sub_entry
1103
1104global bn_power5
1105
1106ALIGN 32
1107bn_power5:
1108 mov QWORD[8+rsp],rdi ;WIN64 prologue
1109 mov QWORD[16+rsp],rsi
1110 mov rax,rsp
1111$L$SEH_begin_bn_power5:
1112 mov rdi,rcx
1113 mov rsi,rdx
1114 mov rdx,r8
1115 mov rcx,r9
1116 mov r8,QWORD[40+rsp]
1117 mov r9,QWORD[48+rsp]
1118
1119
1120
1121 mov rax,rsp
1122
1123 push rbx
1124
1125 push rbp
1126
1127 push r12
1128
1129 push r13
1130
1131 push r14
1132
1133 push r15
1134
1135$L$power5_prologue:
1136
1137 shl r9d,3
1138 lea r10d,[r9*2+r9]
1139 neg r9
1140 mov r8,QWORD[r8]
1141
1142
1143
1144
1145
1146
1147
1148
1149 lea r11,[((-320))+r9*2+rsp]
1150 mov rbp,rsp
1151 sub r11,rdi
1152 and r11,4095
1153 cmp r10,r11
1154 jb NEAR $L$pwr_sp_alt
1155 sub rbp,r11
1156 lea rbp,[((-320))+r9*2+rbp]
1157 jmp NEAR $L$pwr_sp_done
1158
1159ALIGN 32
1160$L$pwr_sp_alt:
1161 lea r10,[((4096-320))+r9*2]
1162 lea rbp,[((-320))+r9*2+rbp]
1163 sub r11,r10
1164 mov r10,0
1165 cmovc r11,r10
1166 sub rbp,r11
1167$L$pwr_sp_done:
1168 and rbp,-64
1169 mov r11,rsp
1170 sub r11,rbp
1171 and r11,-4096
1172 lea rsp,[rbp*1+r11]
1173 mov r10,QWORD[rsp]
1174 cmp rsp,rbp
1175 ja NEAR $L$pwr_page_walk
1176 jmp NEAR $L$pwr_page_walk_done
1177
1178$L$pwr_page_walk:
1179 lea rsp,[((-4096))+rsp]
1180 mov r10,QWORD[rsp]
1181 cmp rsp,rbp
1182 ja NEAR $L$pwr_page_walk
1183$L$pwr_page_walk_done:
1184
1185 mov r10,r9
1186 neg r9
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197 mov QWORD[32+rsp],r8
1198 mov QWORD[40+rsp],rax
1199
1200$L$power5_body:
1201DB 102,72,15,110,207
1202DB 102,72,15,110,209
1203DB 102,73,15,110,218
1204DB 102,72,15,110,226
1205
1206 call __bn_sqr8x_internal
1207 call __bn_post4x_internal
1208 call __bn_sqr8x_internal
1209 call __bn_post4x_internal
1210 call __bn_sqr8x_internal
1211 call __bn_post4x_internal
1212 call __bn_sqr8x_internal
1213 call __bn_post4x_internal
1214 call __bn_sqr8x_internal
1215 call __bn_post4x_internal
1216
1217DB 102,72,15,126,209
1218DB 102,72,15,126,226
1219 mov rdi,rsi
1220 mov rax,QWORD[40+rsp]
1221 lea r8,[32+rsp]
1222
1223 call mul4x_internal
1224
1225 mov rsi,QWORD[40+rsp]
1226
1227 mov rax,1
1228 mov r15,QWORD[((-48))+rsi]
1229
1230 mov r14,QWORD[((-40))+rsi]
1231
1232 mov r13,QWORD[((-32))+rsi]
1233
1234 mov r12,QWORD[((-24))+rsi]
1235
1236 mov rbp,QWORD[((-16))+rsi]
1237
1238 mov rbx,QWORD[((-8))+rsi]
1239
1240 lea rsp,[rsi]
1241
1242$L$power5_epilogue:
1243 mov rdi,QWORD[8+rsp] ;WIN64 epilogue
1244 mov rsi,QWORD[16+rsp]
1245 DB 0F3h,0C3h ;repret
1246
1247$L$SEH_end_bn_power5:
1248
1249global bn_sqr8x_internal
1250
1251
1252ALIGN 32
1253bn_sqr8x_internal:
1254__bn_sqr8x_internal:
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328 lea rbp,[32+r10]
1329 lea rsi,[r9*1+rsi]
1330
1331 mov rcx,r9
1332
1333
1334 mov r14,QWORD[((-32))+rbp*1+rsi]
1335 lea rdi,[((48+8))+r9*2+rsp]
1336 mov rax,QWORD[((-24))+rbp*1+rsi]
1337 lea rdi,[((-32))+rbp*1+rdi]
1338 mov rbx,QWORD[((-16))+rbp*1+rsi]
1339 mov r15,rax
1340
1341 mul r14
1342 mov r10,rax
1343 mov rax,rbx
1344 mov r11,rdx
1345 mov QWORD[((-24))+rbp*1+rdi],r10
1346
1347 mul r14
1348 add r11,rax
1349 mov rax,rbx
1350 adc rdx,0
1351 mov QWORD[((-16))+rbp*1+rdi],r11
1352 mov r10,rdx
1353
1354
1355 mov rbx,QWORD[((-8))+rbp*1+rsi]
1356 mul r15
1357 mov r12,rax
1358 mov rax,rbx
1359 mov r13,rdx
1360
1361 lea rcx,[rbp]
1362 mul r14
1363 add r10,rax
1364 mov rax,rbx
1365 mov r11,rdx
1366 adc r11,0
1367 add r10,r12
1368 adc r11,0
1369 mov QWORD[((-8))+rcx*1+rdi],r10
1370 jmp NEAR $L$sqr4x_1st
1371
1372ALIGN 32
1373$L$sqr4x_1st:
1374 mov rbx,QWORD[rcx*1+rsi]
1375 mul r15
1376 add r13,rax
1377 mov rax,rbx
1378 mov r12,rdx
1379 adc r12,0
1380
1381 mul r14
1382 add r11,rax
1383 mov rax,rbx
1384 mov rbx,QWORD[8+rcx*1+rsi]
1385 mov r10,rdx
1386 adc r10,0
1387 add r11,r13
1388 adc r10,0
1389
1390
1391 mul r15
1392 add r12,rax
1393 mov rax,rbx
1394 mov QWORD[rcx*1+rdi],r11
1395 mov r13,rdx
1396 adc r13,0
1397
1398 mul r14
1399 add r10,rax
1400 mov rax,rbx
1401 mov rbx,QWORD[16+rcx*1+rsi]
1402 mov r11,rdx
1403 adc r11,0
1404 add r10,r12
1405 adc r11,0
1406
1407 mul r15
1408 add r13,rax
1409 mov rax,rbx
1410 mov QWORD[8+rcx*1+rdi],r10
1411 mov r12,rdx
1412 adc r12,0
1413
1414 mul r14
1415 add r11,rax
1416 mov rax,rbx
1417 mov rbx,QWORD[24+rcx*1+rsi]
1418 mov r10,rdx
1419 adc r10,0
1420 add r11,r13
1421 adc r10,0
1422
1423
1424 mul r15
1425 add r12,rax
1426 mov rax,rbx
1427 mov QWORD[16+rcx*1+rdi],r11
1428 mov r13,rdx
1429 adc r13,0
1430 lea rcx,[32+rcx]
1431
1432 mul r14
1433 add r10,rax
1434 mov rax,rbx
1435 mov r11,rdx
1436 adc r11,0
1437 add r10,r12
1438 adc r11,0
1439 mov QWORD[((-8))+rcx*1+rdi],r10
1440
1441 cmp rcx,0
1442 jne NEAR $L$sqr4x_1st
1443
1444 mul r15
1445 add r13,rax
1446 lea rbp,[16+rbp]
1447 adc rdx,0
1448 add r13,r11
1449 adc rdx,0
1450
1451 mov QWORD[rdi],r13
1452 mov r12,rdx
1453 mov QWORD[8+rdi],rdx
1454 jmp NEAR $L$sqr4x_outer
1455
1456ALIGN 32
1457$L$sqr4x_outer:
1458 mov r14,QWORD[((-32))+rbp*1+rsi]
1459 lea rdi,[((48+8))+r9*2+rsp]
1460 mov rax,QWORD[((-24))+rbp*1+rsi]
1461 lea rdi,[((-32))+rbp*1+rdi]
1462 mov rbx,QWORD[((-16))+rbp*1+rsi]
1463 mov r15,rax
1464
1465 mul r14
1466 mov r10,QWORD[((-24))+rbp*1+rdi]
1467 add r10,rax
1468 mov rax,rbx
1469 adc rdx,0
1470 mov QWORD[((-24))+rbp*1+rdi],r10
1471 mov r11,rdx
1472
1473 mul r14
1474 add r11,rax
1475 mov rax,rbx
1476 adc rdx,0
1477 add r11,QWORD[((-16))+rbp*1+rdi]
1478 mov r10,rdx
1479 adc r10,0
1480 mov QWORD[((-16))+rbp*1+rdi],r11
1481
1482 xor r12,r12
1483
1484 mov rbx,QWORD[((-8))+rbp*1+rsi]
1485 mul r15
1486 add r12,rax
1487 mov rax,rbx
1488 adc rdx,0
1489 add r12,QWORD[((-8))+rbp*1+rdi]
1490 mov r13,rdx
1491 adc r13,0
1492
1493 mul r14
1494 add r10,rax
1495 mov rax,rbx
1496 adc rdx,0
1497 add r10,r12
1498 mov r11,rdx
1499 adc r11,0
1500 mov QWORD[((-8))+rbp*1+rdi],r10
1501
1502 lea rcx,[rbp]
1503 jmp NEAR $L$sqr4x_inner
1504
1505ALIGN 32
1506$L$sqr4x_inner:
1507 mov rbx,QWORD[rcx*1+rsi]
1508 mul r15
1509 add r13,rax
1510 mov rax,rbx
1511 mov r12,rdx
1512 adc r12,0
1513 add r13,QWORD[rcx*1+rdi]
1514 adc r12,0
1515
1516DB 0x67
1517 mul r14
1518 add r11,rax
1519 mov rax,rbx
1520 mov rbx,QWORD[8+rcx*1+rsi]
1521 mov r10,rdx
1522 adc r10,0
1523 add r11,r13
1524 adc r10,0
1525
1526 mul r15
1527 add r12,rax
1528 mov QWORD[rcx*1+rdi],r11
1529 mov rax,rbx
1530 mov r13,rdx
1531 adc r13,0
1532 add r12,QWORD[8+rcx*1+rdi]
1533 lea rcx,[16+rcx]
1534 adc r13,0
1535
1536 mul r14
1537 add r10,rax
1538 mov rax,rbx
1539 adc rdx,0
1540 add r10,r12
1541 mov r11,rdx
1542 adc r11,0
1543 mov QWORD[((-8))+rcx*1+rdi],r10
1544
1545 cmp rcx,0
1546 jne NEAR $L$sqr4x_inner
1547
1548DB 0x67
1549 mul r15
1550 add r13,rax
1551 adc rdx,0
1552 add r13,r11
1553 adc rdx,0
1554
1555 mov QWORD[rdi],r13
1556 mov r12,rdx
1557 mov QWORD[8+rdi],rdx
1558
1559 add rbp,16
1560 jnz NEAR $L$sqr4x_outer
1561
1562
1563 mov r14,QWORD[((-32))+rsi]
1564 lea rdi,[((48+8))+r9*2+rsp]
1565 mov rax,QWORD[((-24))+rsi]
1566 lea rdi,[((-32))+rbp*1+rdi]
1567 mov rbx,QWORD[((-16))+rsi]
1568 mov r15,rax
1569
1570 mul r14
1571 add r10,rax
1572 mov rax,rbx
1573 mov r11,rdx
1574 adc r11,0
1575
1576 mul r14
1577 add r11,rax
1578 mov rax,rbx
1579 mov QWORD[((-24))+rdi],r10
1580 mov r10,rdx
1581 adc r10,0
1582 add r11,r13
1583 mov rbx,QWORD[((-8))+rsi]
1584 adc r10,0
1585
1586 mul r15
1587 add r12,rax
1588 mov rax,rbx
1589 mov QWORD[((-16))+rdi],r11
1590 mov r13,rdx
1591 adc r13,0
1592
1593 mul r14
1594 add r10,rax
1595 mov rax,rbx
1596 mov r11,rdx
1597 adc r11,0
1598 add r10,r12
1599 adc r11,0
1600 mov QWORD[((-8))+rdi],r10
1601
1602 mul r15
1603 add r13,rax
1604 mov rax,QWORD[((-16))+rsi]
1605 adc rdx,0
1606 add r13,r11
1607 adc rdx,0
1608
1609 mov QWORD[rdi],r13
1610 mov r12,rdx
1611 mov QWORD[8+rdi],rdx
1612
1613 mul rbx
1614 add rbp,16
1615 xor r14,r14
1616 sub rbp,r9
1617 xor r15,r15
1618
1619 add rax,r12
1620 adc rdx,0
1621 mov QWORD[8+rdi],rax
1622 mov QWORD[16+rdi],rdx
1623 mov QWORD[24+rdi],r15
1624
1625 mov rax,QWORD[((-16))+rbp*1+rsi]
1626 lea rdi,[((48+8))+rsp]
1627 xor r10,r10
1628 mov r11,QWORD[8+rdi]
1629
1630 lea r12,[r10*2+r14]
1631 shr r10,63
1632 lea r13,[r11*2+rcx]
1633 shr r11,63
1634 or r13,r10
1635 mov r10,QWORD[16+rdi]
1636 mov r14,r11
1637 mul rax
1638 neg r15
1639 mov r11,QWORD[24+rdi]
1640 adc r12,rax
1641 mov rax,QWORD[((-8))+rbp*1+rsi]
1642 mov QWORD[rdi],r12
1643 adc r13,rdx
1644
1645 lea rbx,[r10*2+r14]
1646 mov QWORD[8+rdi],r13
1647 sbb r15,r15
1648 shr r10,63
1649 lea r8,[r11*2+rcx]
1650 shr r11,63
1651 or r8,r10
1652 mov r10,QWORD[32+rdi]
1653 mov r14,r11
1654 mul rax
1655 neg r15
1656 mov r11,QWORD[40+rdi]
1657 adc rbx,rax
1658 mov rax,QWORD[rbp*1+rsi]
1659 mov QWORD[16+rdi],rbx
1660 adc r8,rdx
1661 lea rbp,[16+rbp]
1662 mov QWORD[24+rdi],r8
1663 sbb r15,r15
1664 lea rdi,[64+rdi]
1665 jmp NEAR $L$sqr4x_shift_n_add
1666
1667ALIGN 32
1668$L$sqr4x_shift_n_add:
1669 lea r12,[r10*2+r14]
1670 shr r10,63
1671 lea r13,[r11*2+rcx]
1672 shr r11,63
1673 or r13,r10
1674 mov r10,QWORD[((-16))+rdi]
1675 mov r14,r11
1676 mul rax
1677 neg r15
1678 mov r11,QWORD[((-8))+rdi]
1679 adc r12,rax
1680 mov rax,QWORD[((-8))+rbp*1+rsi]
1681 mov QWORD[((-32))+rdi],r12
1682 adc r13,rdx
1683
1684 lea rbx,[r10*2+r14]
1685 mov QWORD[((-24))+rdi],r13
1686 sbb r15,r15
1687 shr r10,63
1688 lea r8,[r11*2+rcx]
1689 shr r11,63
1690 or r8,r10
1691 mov r10,QWORD[rdi]
1692 mov r14,r11
1693 mul rax
1694 neg r15
1695 mov r11,QWORD[8+rdi]
1696 adc rbx,rax
1697 mov rax,QWORD[rbp*1+rsi]
1698 mov QWORD[((-16))+rdi],rbx
1699 adc r8,rdx
1700
1701 lea r12,[r10*2+r14]
1702 mov QWORD[((-8))+rdi],r8
1703 sbb r15,r15
1704 shr r10,63
1705 lea r13,[r11*2+rcx]
1706 shr r11,63
1707 or r13,r10
1708 mov r10,QWORD[16+rdi]
1709 mov r14,r11
1710 mul rax
1711 neg r15
1712 mov r11,QWORD[24+rdi]
1713 adc r12,rax
1714 mov rax,QWORD[8+rbp*1+rsi]
1715 mov QWORD[rdi],r12
1716 adc r13,rdx
1717
1718 lea rbx,[r10*2+r14]
1719 mov QWORD[8+rdi],r13
1720 sbb r15,r15
1721 shr r10,63
1722 lea r8,[r11*2+rcx]
1723 shr r11,63
1724 or r8,r10
1725 mov r10,QWORD[32+rdi]
1726 mov r14,r11
1727 mul rax
1728 neg r15
1729 mov r11,QWORD[40+rdi]
1730 adc rbx,rax
1731 mov rax,QWORD[16+rbp*1+rsi]
1732 mov QWORD[16+rdi],rbx
1733 adc r8,rdx
1734 mov QWORD[24+rdi],r8
1735 sbb r15,r15
1736 lea rdi,[64+rdi]
1737 add rbp,32
1738 jnz NEAR $L$sqr4x_shift_n_add
1739
1740 lea r12,[r10*2+r14]
1741DB 0x67
1742 shr r10,63
1743 lea r13,[r11*2+rcx]
1744 shr r11,63
1745 or r13,r10
1746 mov r10,QWORD[((-16))+rdi]
1747 mov r14,r11
1748 mul rax
1749 neg r15
1750 mov r11,QWORD[((-8))+rdi]
1751 adc r12,rax
1752 mov rax,QWORD[((-8))+rsi]
1753 mov QWORD[((-32))+rdi],r12
1754 adc r13,rdx
1755
1756 lea rbx,[r10*2+r14]
1757 mov QWORD[((-24))+rdi],r13
1758 sbb r15,r15
1759 shr r10,63
1760 lea r8,[r11*2+rcx]
1761 shr r11,63
1762 or r8,r10
1763 mul rax
1764 neg r15
1765 adc rbx,rax
1766 adc r8,rdx
1767 mov QWORD[((-16))+rdi],rbx
1768 mov QWORD[((-8))+rdi],r8
1769DB 102,72,15,126,213
1770__bn_sqr8x_reduction:
1771 xor rax,rax
1772 lea rcx,[rbp*1+r9]
1773 lea rdx,[((48+8))+r9*2+rsp]
1774 mov QWORD[((0+8))+rsp],rcx
1775 lea rdi,[((48+8))+r9*1+rsp]
1776 mov QWORD[((8+8))+rsp],rdx
1777 neg r9
1778 jmp NEAR $L$8x_reduction_loop
1779
1780ALIGN 32
1781$L$8x_reduction_loop:
1782 lea rdi,[r9*1+rdi]
1783DB 0x66
1784 mov rbx,QWORD[rdi]
1785 mov r9,QWORD[8+rdi]
1786 mov r10,QWORD[16+rdi]
1787 mov r11,QWORD[24+rdi]
1788 mov r12,QWORD[32+rdi]
1789 mov r13,QWORD[40+rdi]
1790 mov r14,QWORD[48+rdi]
1791 mov r15,QWORD[56+rdi]
1792 mov QWORD[rdx],rax
1793 lea rdi,[64+rdi]
1794
1795DB 0x67
1796 mov r8,rbx
1797 imul rbx,QWORD[((32+8))+rsp]
1798 mov rax,QWORD[rbp]
1799 mov ecx,8
1800 jmp NEAR $L$8x_reduce
1801
1802ALIGN 32
1803$L$8x_reduce:
1804 mul rbx
1805 mov rax,QWORD[8+rbp]
1806 neg r8
1807 mov r8,rdx
1808 adc r8,0
1809
1810 mul rbx
1811 add r9,rax
1812 mov rax,QWORD[16+rbp]
1813 adc rdx,0
1814 add r8,r9
1815 mov QWORD[((48-8+8))+rcx*8+rsp],rbx
1816 mov r9,rdx
1817 adc r9,0
1818
1819 mul rbx
1820 add r10,rax
1821 mov rax,QWORD[24+rbp]
1822 adc rdx,0
1823 add r9,r10
1824 mov rsi,QWORD[((32+8))+rsp]
1825 mov r10,rdx
1826 adc r10,0
1827
1828 mul rbx
1829 add r11,rax
1830 mov rax,QWORD[32+rbp]
1831 adc rdx,0
1832 imul rsi,r8
1833 add r10,r11
1834 mov r11,rdx
1835 adc r11,0
1836
1837 mul rbx
1838 add r12,rax
1839 mov rax,QWORD[40+rbp]
1840 adc rdx,0
1841 add r11,r12
1842 mov r12,rdx
1843 adc r12,0
1844
1845 mul rbx
1846 add r13,rax
1847 mov rax,QWORD[48+rbp]
1848 adc rdx,0
1849 add r12,r13
1850 mov r13,rdx
1851 adc r13,0
1852
1853 mul rbx
1854 add r14,rax
1855 mov rax,QWORD[56+rbp]
1856 adc rdx,0
1857 add r13,r14
1858 mov r14,rdx
1859 adc r14,0
1860
1861 mul rbx
1862 mov rbx,rsi
1863 add r15,rax
1864 mov rax,QWORD[rbp]
1865 adc rdx,0
1866 add r14,r15
1867 mov r15,rdx
1868 adc r15,0
1869
1870 dec ecx
1871 jnz NEAR $L$8x_reduce
1872
1873 lea rbp,[64+rbp]
1874 xor rax,rax
1875 mov rdx,QWORD[((8+8))+rsp]
1876 cmp rbp,QWORD[((0+8))+rsp]
1877 jae NEAR $L$8x_no_tail
1878
1879DB 0x66
1880 add r8,QWORD[rdi]
1881 adc r9,QWORD[8+rdi]
1882 adc r10,QWORD[16+rdi]
1883 adc r11,QWORD[24+rdi]
1884 adc r12,QWORD[32+rdi]
1885 adc r13,QWORD[40+rdi]
1886 adc r14,QWORD[48+rdi]
1887 adc r15,QWORD[56+rdi]
1888 sbb rsi,rsi
1889
1890 mov rbx,QWORD[((48+56+8))+rsp]
1891 mov ecx,8
1892 mov rax,QWORD[rbp]
1893 jmp NEAR $L$8x_tail
1894
1895ALIGN 32
1896$L$8x_tail:
1897 mul rbx
1898 add r8,rax
1899 mov rax,QWORD[8+rbp]
1900 mov QWORD[rdi],r8
1901 mov r8,rdx
1902 adc r8,0
1903
1904 mul rbx
1905 add r9,rax
1906 mov rax,QWORD[16+rbp]
1907 adc rdx,0
1908 add r8,r9
1909 lea rdi,[8+rdi]
1910 mov r9,rdx
1911 adc r9,0
1912
1913 mul rbx
1914 add r10,rax
1915 mov rax,QWORD[24+rbp]
1916 adc rdx,0
1917 add r9,r10
1918 mov r10,rdx
1919 adc r10,0
1920
1921 mul rbx
1922 add r11,rax
1923 mov rax,QWORD[32+rbp]
1924 adc rdx,0
1925 add r10,r11
1926 mov r11,rdx
1927 adc r11,0
1928
1929 mul rbx
1930 add r12,rax
1931 mov rax,QWORD[40+rbp]
1932 adc rdx,0
1933 add r11,r12
1934 mov r12,rdx
1935 adc r12,0
1936
1937 mul rbx
1938 add r13,rax
1939 mov rax,QWORD[48+rbp]
1940 adc rdx,0
1941 add r12,r13
1942 mov r13,rdx
1943 adc r13,0
1944
1945 mul rbx
1946 add r14,rax
1947 mov rax,QWORD[56+rbp]
1948 adc rdx,0
1949 add r13,r14
1950 mov r14,rdx
1951 adc r14,0
1952
1953 mul rbx
1954 mov rbx,QWORD[((48-16+8))+rcx*8+rsp]
1955 add r15,rax
1956 adc rdx,0
1957 add r14,r15
1958 mov rax,QWORD[rbp]
1959 mov r15,rdx
1960 adc r15,0
1961
1962 dec ecx
1963 jnz NEAR $L$8x_tail
1964
1965 lea rbp,[64+rbp]
1966 mov rdx,QWORD[((8+8))+rsp]
1967 cmp rbp,QWORD[((0+8))+rsp]
1968 jae NEAR $L$8x_tail_done
1969
1970 mov rbx,QWORD[((48+56+8))+rsp]
1971 neg rsi
1972 mov rax,QWORD[rbp]
1973 adc r8,QWORD[rdi]
1974 adc r9,QWORD[8+rdi]
1975 adc r10,QWORD[16+rdi]
1976 adc r11,QWORD[24+rdi]
1977 adc r12,QWORD[32+rdi]
1978 adc r13,QWORD[40+rdi]
1979 adc r14,QWORD[48+rdi]
1980 adc r15,QWORD[56+rdi]
1981 sbb rsi,rsi
1982
1983 mov ecx,8
1984 jmp NEAR $L$8x_tail
1985
1986ALIGN 32
1987$L$8x_tail_done:
1988 xor rax,rax
1989 add r8,QWORD[rdx]
1990 adc r9,0
1991 adc r10,0
1992 adc r11,0
1993 adc r12,0
1994 adc r13,0
1995 adc r14,0
1996 adc r15,0
1997 adc rax,0
1998
1999 neg rsi
2000$L$8x_no_tail:
2001 adc r8,QWORD[rdi]
2002 adc r9,QWORD[8+rdi]
2003 adc r10,QWORD[16+rdi]
2004 adc r11,QWORD[24+rdi]
2005 adc r12,QWORD[32+rdi]
2006 adc r13,QWORD[40+rdi]
2007 adc r14,QWORD[48+rdi]
2008 adc r15,QWORD[56+rdi]
2009 adc rax,0
2010 mov rcx,QWORD[((-8))+rbp]
2011 xor rsi,rsi
2012
2013DB 102,72,15,126,213
2014
2015 mov QWORD[rdi],r8
2016 mov QWORD[8+rdi],r9
2017DB 102,73,15,126,217
2018 mov QWORD[16+rdi],r10
2019 mov QWORD[24+rdi],r11
2020 mov QWORD[32+rdi],r12
2021 mov QWORD[40+rdi],r13
2022 mov QWORD[48+rdi],r14
2023 mov QWORD[56+rdi],r15
2024 lea rdi,[64+rdi]
2025
2026 cmp rdi,rdx
2027 jb NEAR $L$8x_reduction_loop
2028 DB 0F3h,0C3h ;repret
2029
2030
2031ALIGN 32
2032__bn_post4x_internal:
2033 mov r12,QWORD[rbp]
2034 lea rbx,[r9*1+rdi]
2035 mov rcx,r9
2036DB 102,72,15,126,207
2037 neg rax
2038DB 102,72,15,126,206
2039 sar rcx,3+2
2040 dec r12
2041 xor r10,r10
2042 mov r13,QWORD[8+rbp]
2043 mov r14,QWORD[16+rbp]
2044 mov r15,QWORD[24+rbp]
2045 jmp NEAR $L$sqr4x_sub_entry
2046
2047ALIGN 16
2048$L$sqr4x_sub:
2049 mov r12,QWORD[rbp]
2050 mov r13,QWORD[8+rbp]
2051 mov r14,QWORD[16+rbp]
2052 mov r15,QWORD[24+rbp]
2053$L$sqr4x_sub_entry:
2054 lea rbp,[32+rbp]
2055 not r12
2056 not r13
2057 not r14
2058 not r15
2059 and r12,rax
2060 and r13,rax
2061 and r14,rax
2062 and r15,rax
2063
2064 neg r10
2065 adc r12,QWORD[rbx]
2066 adc r13,QWORD[8+rbx]
2067 adc r14,QWORD[16+rbx]
2068 adc r15,QWORD[24+rbx]
2069 mov QWORD[rdi],r12
2070 lea rbx,[32+rbx]
2071 mov QWORD[8+rdi],r13
2072 sbb r10,r10
2073 mov QWORD[16+rdi],r14
2074 mov QWORD[24+rdi],r15
2075 lea rdi,[32+rdi]
2076
2077 inc rcx
2078 jnz NEAR $L$sqr4x_sub
2079
2080 mov r10,r9
2081 neg r9
2082 DB 0F3h,0C3h ;repret
2083
2084global bn_from_montgomery
2085
2086ALIGN 32
2087bn_from_montgomery:
2088 test DWORD[48+rsp],7
2089 jz NEAR bn_from_mont8x
2090 xor eax,eax
2091 DB 0F3h,0C3h ;repret
2092
2093
2094
2095ALIGN 32
2096bn_from_mont8x:
2097 mov QWORD[8+rsp],rdi ;WIN64 prologue
2098 mov QWORD[16+rsp],rsi
2099 mov rax,rsp
2100$L$SEH_begin_bn_from_mont8x:
2101 mov rdi,rcx
2102 mov rsi,rdx
2103 mov rdx,r8
2104 mov rcx,r9
2105 mov r8,QWORD[40+rsp]
2106 mov r9,QWORD[48+rsp]
2107
2108
2109
2110DB 0x67
2111 mov rax,rsp
2112
2113 push rbx
2114
2115 push rbp
2116
2117 push r12
2118
2119 push r13
2120
2121 push r14
2122
2123 push r15
2124
2125$L$from_prologue:
2126
2127 shl r9d,3
2128 lea r10,[r9*2+r9]
2129 neg r9
2130 mov r8,QWORD[r8]
2131
2132
2133
2134
2135
2136
2137
2138
2139 lea r11,[((-320))+r9*2+rsp]
2140 mov rbp,rsp
2141 sub r11,rdi
2142 and r11,4095
2143 cmp r10,r11
2144 jb NEAR $L$from_sp_alt
2145 sub rbp,r11
2146 lea rbp,[((-320))+r9*2+rbp]
2147 jmp NEAR $L$from_sp_done
2148
2149ALIGN 32
2150$L$from_sp_alt:
2151 lea r10,[((4096-320))+r9*2]
2152 lea rbp,[((-320))+r9*2+rbp]
2153 sub r11,r10
2154 mov r10,0
2155 cmovc r11,r10
2156 sub rbp,r11
2157$L$from_sp_done:
2158 and rbp,-64
2159 mov r11,rsp
2160 sub r11,rbp
2161 and r11,-4096
2162 lea rsp,[rbp*1+r11]
2163 mov r10,QWORD[rsp]
2164 cmp rsp,rbp
2165 ja NEAR $L$from_page_walk
2166 jmp NEAR $L$from_page_walk_done
2167
2168$L$from_page_walk:
2169 lea rsp,[((-4096))+rsp]
2170 mov r10,QWORD[rsp]
2171 cmp rsp,rbp
2172 ja NEAR $L$from_page_walk
2173$L$from_page_walk_done:
2174
2175 mov r10,r9
2176 neg r9
2177
2178
2179
2180
2181
2182
2183
2184
2185
2186
2187 mov QWORD[32+rsp],r8
2188 mov QWORD[40+rsp],rax
2189
2190$L$from_body:
2191 mov r11,r9
2192 lea rax,[48+rsp]
2193 pxor xmm0,xmm0
2194 jmp NEAR $L$mul_by_1
2195
2196ALIGN 32
2197$L$mul_by_1:
2198 movdqu xmm1,XMMWORD[rsi]
2199 movdqu xmm2,XMMWORD[16+rsi]
2200 movdqu xmm3,XMMWORD[32+rsi]
2201 movdqa XMMWORD[r9*1+rax],xmm0
2202 movdqu xmm4,XMMWORD[48+rsi]
2203 movdqa XMMWORD[16+r9*1+rax],xmm0
2204DB 0x48,0x8d,0xb6,0x40,0x00,0x00,0x00
2205 movdqa XMMWORD[rax],xmm1
2206 movdqa XMMWORD[32+r9*1+rax],xmm0
2207 movdqa XMMWORD[16+rax],xmm2
2208 movdqa XMMWORD[48+r9*1+rax],xmm0
2209 movdqa XMMWORD[32+rax],xmm3
2210 movdqa XMMWORD[48+rax],xmm4
2211 lea rax,[64+rax]
2212 sub r11,64
2213 jnz NEAR $L$mul_by_1
2214
2215DB 102,72,15,110,207
2216DB 102,72,15,110,209
2217DB 0x67
2218 mov rbp,rcx
2219DB 102,73,15,110,218
2220 call __bn_sqr8x_reduction
2221 call __bn_post4x_internal
2222
2223 pxor xmm0,xmm0
2224 lea rax,[48+rsp]
2225 jmp NEAR $L$from_mont_zero
2226
2227ALIGN 32
2228$L$from_mont_zero:
2229 mov rsi,QWORD[40+rsp]
2230
2231 movdqa XMMWORD[rax],xmm0
2232 movdqa XMMWORD[16+rax],xmm0
2233 movdqa XMMWORD[32+rax],xmm0
2234 movdqa XMMWORD[48+rax],xmm0
2235 lea rax,[64+rax]
2236 sub r9,32
2237 jnz NEAR $L$from_mont_zero
2238
2239 mov rax,1
2240 mov r15,QWORD[((-48))+rsi]
2241
2242 mov r14,QWORD[((-40))+rsi]
2243
2244 mov r13,QWORD[((-32))+rsi]
2245
2246 mov r12,QWORD[((-24))+rsi]
2247
2248 mov rbp,QWORD[((-16))+rsi]
2249
2250 mov rbx,QWORD[((-8))+rsi]
2251
2252 lea rsp,[rsi]
2253
2254$L$from_epilogue:
2255 mov rdi,QWORD[8+rsp] ;WIN64 epilogue
2256 mov rsi,QWORD[16+rsp]
2257 DB 0F3h,0C3h ;repret
2258
2259$L$SEH_end_bn_from_mont8x:
2260global bn_get_bits5
2261
2262ALIGN 16
2263bn_get_bits5:
2264 lea r10,[rcx]
2265 lea r11,[1+rcx]
2266 mov ecx,edx
2267 shr edx,4
2268 and ecx,15
2269 lea eax,[((-8))+rcx]
2270 cmp ecx,11
2271 cmova r10,r11
2272 cmova ecx,eax
2273 movzx eax,WORD[rdx*2+r10]
2274 shr eax,cl
2275 and eax,31
2276 DB 0F3h,0C3h ;repret
2277
2278
2279global bn_scatter5
2280
2281ALIGN 16
2282bn_scatter5:
2283 cmp edx,0
2284 jz NEAR $L$scatter_epilogue
2285 lea r8,[r9*8+r8]
2286$L$scatter:
2287 mov rax,QWORD[rcx]
2288 lea rcx,[8+rcx]
2289 mov QWORD[r8],rax
2290 lea r8,[256+r8]
2291 sub edx,1
2292 jnz NEAR $L$scatter
2293$L$scatter_epilogue:
2294 DB 0F3h,0C3h ;repret
2295
2296
2297global bn_gather5
2298
2299ALIGN 32
2300bn_gather5:
2301$L$SEH_begin_bn_gather5:
2302
2303DB 0x4c,0x8d,0x14,0x24
2304DB 0x48,0x81,0xec,0x08,0x01,0x00,0x00
2305 lea rax,[$L$inc]
2306 and rsp,-16
2307
2308 movd xmm5,r9d
2309 movdqa xmm0,XMMWORD[rax]
2310 movdqa xmm1,XMMWORD[16+rax]
2311 lea r11,[128+r8]
2312 lea rax,[128+rsp]
2313
2314 pshufd xmm5,xmm5,0
2315 movdqa xmm4,xmm1
2316 movdqa xmm2,xmm1
2317 paddd xmm1,xmm0
2318 pcmpeqd xmm0,xmm5
2319 movdqa xmm3,xmm4
2320
2321 paddd xmm2,xmm1
2322 pcmpeqd xmm1,xmm5
2323 movdqa XMMWORD[(-128)+rax],xmm0
2324 movdqa xmm0,xmm4
2325
2326 paddd xmm3,xmm2
2327 pcmpeqd xmm2,xmm5
2328 movdqa XMMWORD[(-112)+rax],xmm1
2329 movdqa xmm1,xmm4
2330
2331 paddd xmm0,xmm3
2332 pcmpeqd xmm3,xmm5
2333 movdqa XMMWORD[(-96)+rax],xmm2
2334 movdqa xmm2,xmm4
2335 paddd xmm1,xmm0
2336 pcmpeqd xmm0,xmm5
2337 movdqa XMMWORD[(-80)+rax],xmm3
2338 movdqa xmm3,xmm4
2339
2340 paddd xmm2,xmm1
2341 pcmpeqd xmm1,xmm5
2342 movdqa XMMWORD[(-64)+rax],xmm0
2343 movdqa xmm0,xmm4
2344
2345 paddd xmm3,xmm2
2346 pcmpeqd xmm2,xmm5
2347 movdqa XMMWORD[(-48)+rax],xmm1
2348 movdqa xmm1,xmm4
2349
2350 paddd xmm0,xmm3
2351 pcmpeqd xmm3,xmm5
2352 movdqa XMMWORD[(-32)+rax],xmm2
2353 movdqa xmm2,xmm4
2354 paddd xmm1,xmm0
2355 pcmpeqd xmm0,xmm5
2356 movdqa XMMWORD[(-16)+rax],xmm3
2357 movdqa xmm3,xmm4
2358
2359 paddd xmm2,xmm1
2360 pcmpeqd xmm1,xmm5
2361 movdqa XMMWORD[rax],xmm0
2362 movdqa xmm0,xmm4
2363
2364 paddd xmm3,xmm2
2365 pcmpeqd xmm2,xmm5
2366 movdqa XMMWORD[16+rax],xmm1
2367 movdqa xmm1,xmm4
2368
2369 paddd xmm0,xmm3
2370 pcmpeqd xmm3,xmm5
2371 movdqa XMMWORD[32+rax],xmm2
2372 movdqa xmm2,xmm4
2373 paddd xmm1,xmm0
2374 pcmpeqd xmm0,xmm5
2375 movdqa XMMWORD[48+rax],xmm3
2376 movdqa xmm3,xmm4
2377
2378 paddd xmm2,xmm1
2379 pcmpeqd xmm1,xmm5
2380 movdqa XMMWORD[64+rax],xmm0
2381 movdqa xmm0,xmm4
2382
2383 paddd xmm3,xmm2
2384 pcmpeqd xmm2,xmm5
2385 movdqa XMMWORD[80+rax],xmm1
2386 movdqa xmm1,xmm4
2387
2388 paddd xmm0,xmm3
2389 pcmpeqd xmm3,xmm5
2390 movdqa XMMWORD[96+rax],xmm2
2391 movdqa xmm2,xmm4
2392 movdqa XMMWORD[112+rax],xmm3
2393 jmp NEAR $L$gather
2394
2395ALIGN 32
2396$L$gather:
2397 pxor xmm4,xmm4
2398 pxor xmm5,xmm5
2399 movdqa xmm0,XMMWORD[((-128))+r11]
2400 movdqa xmm1,XMMWORD[((-112))+r11]
2401 movdqa xmm2,XMMWORD[((-96))+r11]
2402 pand xmm0,XMMWORD[((-128))+rax]
2403 movdqa xmm3,XMMWORD[((-80))+r11]
2404 pand xmm1,XMMWORD[((-112))+rax]
2405 por xmm4,xmm0
2406 pand xmm2,XMMWORD[((-96))+rax]
2407 por xmm5,xmm1
2408 pand xmm3,XMMWORD[((-80))+rax]
2409 por xmm4,xmm2
2410 por xmm5,xmm3
2411 movdqa xmm0,XMMWORD[((-64))+r11]
2412 movdqa xmm1,XMMWORD[((-48))+r11]
2413 movdqa xmm2,XMMWORD[((-32))+r11]
2414 pand xmm0,XMMWORD[((-64))+rax]
2415 movdqa xmm3,XMMWORD[((-16))+r11]
2416 pand xmm1,XMMWORD[((-48))+rax]
2417 por xmm4,xmm0
2418 pand xmm2,XMMWORD[((-32))+rax]
2419 por xmm5,xmm1
2420 pand xmm3,XMMWORD[((-16))+rax]
2421 por xmm4,xmm2
2422 por xmm5,xmm3
2423 movdqa xmm0,XMMWORD[r11]
2424 movdqa xmm1,XMMWORD[16+r11]
2425 movdqa xmm2,XMMWORD[32+r11]
2426 pand xmm0,XMMWORD[rax]
2427 movdqa xmm3,XMMWORD[48+r11]
2428 pand xmm1,XMMWORD[16+rax]
2429 por xmm4,xmm0
2430 pand xmm2,XMMWORD[32+rax]
2431 por xmm5,xmm1
2432 pand xmm3,XMMWORD[48+rax]
2433 por xmm4,xmm2
2434 por xmm5,xmm3
2435 movdqa xmm0,XMMWORD[64+r11]
2436 movdqa xmm1,XMMWORD[80+r11]
2437 movdqa xmm2,XMMWORD[96+r11]
2438 pand xmm0,XMMWORD[64+rax]
2439 movdqa xmm3,XMMWORD[112+r11]
2440 pand xmm1,XMMWORD[80+rax]
2441 por xmm4,xmm0
2442 pand xmm2,XMMWORD[96+rax]
2443 por xmm5,xmm1
2444 pand xmm3,XMMWORD[112+rax]
2445 por xmm4,xmm2
2446 por xmm5,xmm3
2447 por xmm4,xmm5
2448 lea r11,[256+r11]
2449 pshufd xmm0,xmm4,0x4e
2450 por xmm0,xmm4
2451 movq QWORD[rcx],xmm0
2452 lea rcx,[8+rcx]
2453 sub edx,1
2454 jnz NEAR $L$gather
2455
2456 lea rsp,[r10]
2457 DB 0F3h,0C3h ;repret
2458$L$SEH_end_bn_gather5:
2459
2460ALIGN 64
2461$L$inc:
2462 DD 0,0,1,1
2463 DD 2,2,2,2
2464DB 77,111,110,116,103,111,109,101,114,121,32,77,117,108,116,105
2465DB 112,108,105,99,97,116,105,111,110,32,119,105,116,104,32,115
2466DB 99,97,116,116,101,114,47,103,97,116,104,101,114,32,102,111
2467DB 114,32,120,56,54,95,54,52,44,32,67,82,89,80,84,79
2468DB 71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111
2469DB 112,101,110,115,115,108,46,111,114,103,62,0
2470EXTERN __imp_RtlVirtualUnwind
2471
2472ALIGN 16
2473mul_handler:
2474 push rsi
2475 push rdi
2476 push rbx
2477 push rbp
2478 push r12
2479 push r13
2480 push r14
2481 push r15
2482 pushfq
2483 sub rsp,64
2484
2485 mov rax,QWORD[120+r8]
2486 mov rbx,QWORD[248+r8]
2487
2488 mov rsi,QWORD[8+r9]
2489 mov r11,QWORD[56+r9]
2490
2491 mov r10d,DWORD[r11]
2492 lea r10,[r10*1+rsi]
2493 cmp rbx,r10
2494 jb NEAR $L$common_seh_tail
2495
2496 mov r10d,DWORD[4+r11]
2497 lea r10,[r10*1+rsi]
2498 cmp rbx,r10
2499 jb NEAR $L$common_pop_regs
2500
2501 mov rax,QWORD[152+r8]
2502
2503 mov r10d,DWORD[8+r11]
2504 lea r10,[r10*1+rsi]
2505 cmp rbx,r10
2506 jae NEAR $L$common_seh_tail
2507
2508 lea r10,[$L$mul_epilogue]
2509 cmp rbx,r10
2510 ja NEAR $L$body_40
2511
2512 mov r10,QWORD[192+r8]
2513 mov rax,QWORD[8+r10*8+rax]
2514
2515 jmp NEAR $L$common_pop_regs
2516
2517$L$body_40:
2518 mov rax,QWORD[40+rax]
2519$L$common_pop_regs:
2520 mov rbx,QWORD[((-8))+rax]
2521 mov rbp,QWORD[((-16))+rax]
2522 mov r12,QWORD[((-24))+rax]
2523 mov r13,QWORD[((-32))+rax]
2524 mov r14,QWORD[((-40))+rax]
2525 mov r15,QWORD[((-48))+rax]
2526 mov QWORD[144+r8],rbx
2527 mov QWORD[160+r8],rbp
2528 mov QWORD[216+r8],r12
2529 mov QWORD[224+r8],r13
2530 mov QWORD[232+r8],r14
2531 mov QWORD[240+r8],r15
2532
2533$L$common_seh_tail:
2534 mov rdi,QWORD[8+rax]
2535 mov rsi,QWORD[16+rax]
2536 mov QWORD[152+r8],rax
2537 mov QWORD[168+r8],rsi
2538 mov QWORD[176+r8],rdi
2539
2540 mov rdi,QWORD[40+r9]
2541 mov rsi,r8
2542 mov ecx,154
2543 DD 0xa548f3fc
2544
2545 mov rsi,r9
2546 xor rcx,rcx
2547 mov rdx,QWORD[8+rsi]
2548 mov r8,QWORD[rsi]
2549 mov r9,QWORD[16+rsi]
2550 mov r10,QWORD[40+rsi]
2551 lea r11,[56+rsi]
2552 lea r12,[24+rsi]
2553 mov QWORD[32+rsp],r10
2554 mov QWORD[40+rsp],r11
2555 mov QWORD[48+rsp],r12
2556 mov QWORD[56+rsp],rcx
2557 call QWORD[__imp_RtlVirtualUnwind]
2558
2559 mov eax,1
2560 add rsp,64
2561 popfq
2562 pop r15
2563 pop r14
2564 pop r13
2565 pop r12
2566 pop rbp
2567 pop rbx
2568 pop rdi
2569 pop rsi
2570 DB 0F3h,0C3h ;repret
2571
2572
2573section .pdata rdata align=4
2574ALIGN 4
2575 DD $L$SEH_begin_bn_mul_mont_gather5 wrt ..imagebase
2576 DD $L$SEH_end_bn_mul_mont_gather5 wrt ..imagebase
2577 DD $L$SEH_info_bn_mul_mont_gather5 wrt ..imagebase
2578
2579 DD $L$SEH_begin_bn_mul4x_mont_gather5 wrt ..imagebase
2580 DD $L$SEH_end_bn_mul4x_mont_gather5 wrt ..imagebase
2581 DD $L$SEH_info_bn_mul4x_mont_gather5 wrt ..imagebase
2582
2583 DD $L$SEH_begin_bn_power5 wrt ..imagebase
2584 DD $L$SEH_end_bn_power5 wrt ..imagebase
2585 DD $L$SEH_info_bn_power5 wrt ..imagebase
2586
2587 DD $L$SEH_begin_bn_from_mont8x wrt ..imagebase
2588 DD $L$SEH_end_bn_from_mont8x wrt ..imagebase
2589 DD $L$SEH_info_bn_from_mont8x wrt ..imagebase
2590 DD $L$SEH_begin_bn_gather5 wrt ..imagebase
2591 DD $L$SEH_end_bn_gather5 wrt ..imagebase
2592 DD $L$SEH_info_bn_gather5 wrt ..imagebase
2593
2594section .xdata rdata align=8
2595ALIGN 8
2596$L$SEH_info_bn_mul_mont_gather5:
2597DB 9,0,0,0
2598 DD mul_handler wrt ..imagebase
2599 DD $L$mul_body wrt ..imagebase,$L$mul_body wrt ..imagebase,$L$mul_epilogue wrt ..imagebase
2600ALIGN 8
2601$L$SEH_info_bn_mul4x_mont_gather5:
2602DB 9,0,0,0
2603 DD mul_handler wrt ..imagebase
2604 DD $L$mul4x_prologue wrt ..imagebase,$L$mul4x_body wrt ..imagebase,$L$mul4x_epilogue wrt ..imagebase
2605ALIGN 8
2606$L$SEH_info_bn_power5:
2607DB 9,0,0,0
2608 DD mul_handler wrt ..imagebase
2609 DD $L$power5_prologue wrt ..imagebase,$L$power5_body wrt ..imagebase,$L$power5_epilogue wrt ..imagebase
2610ALIGN 8
2611$L$SEH_info_bn_from_mont8x:
2612DB 9,0,0,0
2613 DD mul_handler wrt ..imagebase
2614 DD $L$from_prologue wrt ..imagebase,$L$from_body wrt ..imagebase,$L$from_epilogue wrt ..imagebase
2615ALIGN 8
2616$L$SEH_info_bn_gather5:
2617DB 0x01,0x0b,0x03,0x0a
2618DB 0x0b,0x01,0x21,0x00
2619DB 0x04,0xa3,0x00,0x00
2620ALIGN 8
注意: 瀏覽 TracBrowser 來幫助您使用儲存庫瀏覽器

© 2025 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette