VirtualBox

source: vbox/trunk/src/libs/openssl-3.1.0/crypto/genasm-nasm/rsaz-3k-avx512.S@ 100939

最後變更 在這個檔案從100939是 100939,由 vboxsync 提交於 19 月 前

openssl: adding missed files bugref:10418

檔案大小: 29.2 KB
 
1.text
2
3.globl ossl_rsaz_amm52x30_x1_ifma256
4.type ossl_rsaz_amm52x30_x1_ifma256,@function
5.align 32
6ossl_rsaz_amm52x30_x1_ifma256:
7.cfi_startproc
8.byte 243,15,30,250
9 pushq %rbx
10.cfi_adjust_cfa_offset 8
11.cfi_offset %rbx,-16
12 pushq %rbp
13.cfi_adjust_cfa_offset 8
14.cfi_offset %rbp,-24
15 pushq %r12
16.cfi_adjust_cfa_offset 8
17.cfi_offset %r12,-32
18 pushq %r13
19.cfi_adjust_cfa_offset 8
20.cfi_offset %r13,-40
21 pushq %r14
22.cfi_adjust_cfa_offset 8
23.cfi_offset %r14,-48
24 pushq %r15
25.cfi_adjust_cfa_offset 8
26.cfi_offset %r15,-56
27
28 vpxord %ymm0,%ymm0,%ymm0
29 vmovdqa64 %ymm0,%ymm3
30 vmovdqa64 %ymm0,%ymm4
31 vmovdqa64 %ymm0,%ymm5
32 vmovdqa64 %ymm0,%ymm6
33 vmovdqa64 %ymm0,%ymm7
34 vmovdqa64 %ymm0,%ymm8
35 vmovdqa64 %ymm0,%ymm9
36 vmovdqa64 %ymm0,%ymm10
37
38 xorl %r9d,%r9d
39
40 movq %rdx,%r11
41 movq $0xfffffffffffff,%rax
42
43
44 movl $7,%ebx
45
46.align 32
47.Lloop7:
48 movq 0(%r11),%r13
49
50 vpbroadcastq %r13,%ymm1
51 movq 0(%rsi),%rdx
52 mulxq %r13,%r13,%r12
53 addq %r13,%r9
54 movq %r12,%r10
55 adcq $0,%r10
56
57 movq %r8,%r13
58 imulq %r9,%r13
59 andq %rax,%r13
60
61 vpbroadcastq %r13,%ymm2
62 movq 0(%rcx),%rdx
63 mulxq %r13,%r13,%r12
64 addq %r13,%r9
65 adcq %r12,%r10
66
67 shrq $52,%r9
68 salq $12,%r10
69 orq %r10,%r9
70
71 vpmadd52luq 0(%rsi),%ymm1,%ymm3
72 vpmadd52luq 32(%rsi),%ymm1,%ymm4
73 vpmadd52luq 64(%rsi),%ymm1,%ymm5
74 vpmadd52luq 96(%rsi),%ymm1,%ymm6
75 vpmadd52luq 128(%rsi),%ymm1,%ymm7
76 vpmadd52luq 160(%rsi),%ymm1,%ymm8
77 vpmadd52luq 192(%rsi),%ymm1,%ymm9
78 vpmadd52luq 224(%rsi),%ymm1,%ymm10
79
80 vpmadd52luq 0(%rcx),%ymm2,%ymm3
81 vpmadd52luq 32(%rcx),%ymm2,%ymm4
82 vpmadd52luq 64(%rcx),%ymm2,%ymm5
83 vpmadd52luq 96(%rcx),%ymm2,%ymm6
84 vpmadd52luq 128(%rcx),%ymm2,%ymm7
85 vpmadd52luq 160(%rcx),%ymm2,%ymm8
86 vpmadd52luq 192(%rcx),%ymm2,%ymm9
87 vpmadd52luq 224(%rcx),%ymm2,%ymm10
88
89
90 valignq $1,%ymm3,%ymm4,%ymm3
91 valignq $1,%ymm4,%ymm5,%ymm4
92 valignq $1,%ymm5,%ymm6,%ymm5
93 valignq $1,%ymm6,%ymm7,%ymm6
94 valignq $1,%ymm7,%ymm8,%ymm7
95 valignq $1,%ymm8,%ymm9,%ymm8
96 valignq $1,%ymm9,%ymm10,%ymm9
97 valignq $1,%ymm10,%ymm0,%ymm10
98
99 vmovq %xmm3,%r13
100 addq %r13,%r9
101
102 vpmadd52huq 0(%rsi),%ymm1,%ymm3
103 vpmadd52huq 32(%rsi),%ymm1,%ymm4
104 vpmadd52huq 64(%rsi),%ymm1,%ymm5
105 vpmadd52huq 96(%rsi),%ymm1,%ymm6
106 vpmadd52huq 128(%rsi),%ymm1,%ymm7
107 vpmadd52huq 160(%rsi),%ymm1,%ymm8
108 vpmadd52huq 192(%rsi),%ymm1,%ymm9
109 vpmadd52huq 224(%rsi),%ymm1,%ymm10
110
111 vpmadd52huq 0(%rcx),%ymm2,%ymm3
112 vpmadd52huq 32(%rcx),%ymm2,%ymm4
113 vpmadd52huq 64(%rcx),%ymm2,%ymm5
114 vpmadd52huq 96(%rcx),%ymm2,%ymm6
115 vpmadd52huq 128(%rcx),%ymm2,%ymm7
116 vpmadd52huq 160(%rcx),%ymm2,%ymm8
117 vpmadd52huq 192(%rcx),%ymm2,%ymm9
118 vpmadd52huq 224(%rcx),%ymm2,%ymm10
119 movq 8(%r11),%r13
120
121 vpbroadcastq %r13,%ymm1
122 movq 0(%rsi),%rdx
123 mulxq %r13,%r13,%r12
124 addq %r13,%r9
125 movq %r12,%r10
126 adcq $0,%r10
127
128 movq %r8,%r13
129 imulq %r9,%r13
130 andq %rax,%r13
131
132 vpbroadcastq %r13,%ymm2
133 movq 0(%rcx),%rdx
134 mulxq %r13,%r13,%r12
135 addq %r13,%r9
136 adcq %r12,%r10
137
138 shrq $52,%r9
139 salq $12,%r10
140 orq %r10,%r9
141
142 vpmadd52luq 0(%rsi),%ymm1,%ymm3
143 vpmadd52luq 32(%rsi),%ymm1,%ymm4
144 vpmadd52luq 64(%rsi),%ymm1,%ymm5
145 vpmadd52luq 96(%rsi),%ymm1,%ymm6
146 vpmadd52luq 128(%rsi),%ymm1,%ymm7
147 vpmadd52luq 160(%rsi),%ymm1,%ymm8
148 vpmadd52luq 192(%rsi),%ymm1,%ymm9
149 vpmadd52luq 224(%rsi),%ymm1,%ymm10
150
151 vpmadd52luq 0(%rcx),%ymm2,%ymm3
152 vpmadd52luq 32(%rcx),%ymm2,%ymm4
153 vpmadd52luq 64(%rcx),%ymm2,%ymm5
154 vpmadd52luq 96(%rcx),%ymm2,%ymm6
155 vpmadd52luq 128(%rcx),%ymm2,%ymm7
156 vpmadd52luq 160(%rcx),%ymm2,%ymm8
157 vpmadd52luq 192(%rcx),%ymm2,%ymm9
158 vpmadd52luq 224(%rcx),%ymm2,%ymm10
159
160
161 valignq $1,%ymm3,%ymm4,%ymm3
162 valignq $1,%ymm4,%ymm5,%ymm4
163 valignq $1,%ymm5,%ymm6,%ymm5
164 valignq $1,%ymm6,%ymm7,%ymm6
165 valignq $1,%ymm7,%ymm8,%ymm7
166 valignq $1,%ymm8,%ymm9,%ymm8
167 valignq $1,%ymm9,%ymm10,%ymm9
168 valignq $1,%ymm10,%ymm0,%ymm10
169
170 vmovq %xmm3,%r13
171 addq %r13,%r9
172
173 vpmadd52huq 0(%rsi),%ymm1,%ymm3
174 vpmadd52huq 32(%rsi),%ymm1,%ymm4
175 vpmadd52huq 64(%rsi),%ymm1,%ymm5
176 vpmadd52huq 96(%rsi),%ymm1,%ymm6
177 vpmadd52huq 128(%rsi),%ymm1,%ymm7
178 vpmadd52huq 160(%rsi),%ymm1,%ymm8
179 vpmadd52huq 192(%rsi),%ymm1,%ymm9
180 vpmadd52huq 224(%rsi),%ymm1,%ymm10
181
182 vpmadd52huq 0(%rcx),%ymm2,%ymm3
183 vpmadd52huq 32(%rcx),%ymm2,%ymm4
184 vpmadd52huq 64(%rcx),%ymm2,%ymm5
185 vpmadd52huq 96(%rcx),%ymm2,%ymm6
186 vpmadd52huq 128(%rcx),%ymm2,%ymm7
187 vpmadd52huq 160(%rcx),%ymm2,%ymm8
188 vpmadd52huq 192(%rcx),%ymm2,%ymm9
189 vpmadd52huq 224(%rcx),%ymm2,%ymm10
190 movq 16(%r11),%r13
191
192 vpbroadcastq %r13,%ymm1
193 movq 0(%rsi),%rdx
194 mulxq %r13,%r13,%r12
195 addq %r13,%r9
196 movq %r12,%r10
197 adcq $0,%r10
198
199 movq %r8,%r13
200 imulq %r9,%r13
201 andq %rax,%r13
202
203 vpbroadcastq %r13,%ymm2
204 movq 0(%rcx),%rdx
205 mulxq %r13,%r13,%r12
206 addq %r13,%r9
207 adcq %r12,%r10
208
209 shrq $52,%r9
210 salq $12,%r10
211 orq %r10,%r9
212
213 vpmadd52luq 0(%rsi),%ymm1,%ymm3
214 vpmadd52luq 32(%rsi),%ymm1,%ymm4
215 vpmadd52luq 64(%rsi),%ymm1,%ymm5
216 vpmadd52luq 96(%rsi),%ymm1,%ymm6
217 vpmadd52luq 128(%rsi),%ymm1,%ymm7
218 vpmadd52luq 160(%rsi),%ymm1,%ymm8
219 vpmadd52luq 192(%rsi),%ymm1,%ymm9
220 vpmadd52luq 224(%rsi),%ymm1,%ymm10
221
222 vpmadd52luq 0(%rcx),%ymm2,%ymm3
223 vpmadd52luq 32(%rcx),%ymm2,%ymm4
224 vpmadd52luq 64(%rcx),%ymm2,%ymm5
225 vpmadd52luq 96(%rcx),%ymm2,%ymm6
226 vpmadd52luq 128(%rcx),%ymm2,%ymm7
227 vpmadd52luq 160(%rcx),%ymm2,%ymm8
228 vpmadd52luq 192(%rcx),%ymm2,%ymm9
229 vpmadd52luq 224(%rcx),%ymm2,%ymm10
230
231
232 valignq $1,%ymm3,%ymm4,%ymm3
233 valignq $1,%ymm4,%ymm5,%ymm4
234 valignq $1,%ymm5,%ymm6,%ymm5
235 valignq $1,%ymm6,%ymm7,%ymm6
236 valignq $1,%ymm7,%ymm8,%ymm7
237 valignq $1,%ymm8,%ymm9,%ymm8
238 valignq $1,%ymm9,%ymm10,%ymm9
239 valignq $1,%ymm10,%ymm0,%ymm10
240
241 vmovq %xmm3,%r13
242 addq %r13,%r9
243
244 vpmadd52huq 0(%rsi),%ymm1,%ymm3
245 vpmadd52huq 32(%rsi),%ymm1,%ymm4
246 vpmadd52huq 64(%rsi),%ymm1,%ymm5
247 vpmadd52huq 96(%rsi),%ymm1,%ymm6
248 vpmadd52huq 128(%rsi),%ymm1,%ymm7
249 vpmadd52huq 160(%rsi),%ymm1,%ymm8
250 vpmadd52huq 192(%rsi),%ymm1,%ymm9
251 vpmadd52huq 224(%rsi),%ymm1,%ymm10
252
253 vpmadd52huq 0(%rcx),%ymm2,%ymm3
254 vpmadd52huq 32(%rcx),%ymm2,%ymm4
255 vpmadd52huq 64(%rcx),%ymm2,%ymm5
256 vpmadd52huq 96(%rcx),%ymm2,%ymm6
257 vpmadd52huq 128(%rcx),%ymm2,%ymm7
258 vpmadd52huq 160(%rcx),%ymm2,%ymm8
259 vpmadd52huq 192(%rcx),%ymm2,%ymm9
260 vpmadd52huq 224(%rcx),%ymm2,%ymm10
261 movq 24(%r11),%r13
262
263 vpbroadcastq %r13,%ymm1
264 movq 0(%rsi),%rdx
265 mulxq %r13,%r13,%r12
266 addq %r13,%r9
267 movq %r12,%r10
268 adcq $0,%r10
269
270 movq %r8,%r13
271 imulq %r9,%r13
272 andq %rax,%r13
273
274 vpbroadcastq %r13,%ymm2
275 movq 0(%rcx),%rdx
276 mulxq %r13,%r13,%r12
277 addq %r13,%r9
278 adcq %r12,%r10
279
280 shrq $52,%r9
281 salq $12,%r10
282 orq %r10,%r9
283
284 vpmadd52luq 0(%rsi),%ymm1,%ymm3
285 vpmadd52luq 32(%rsi),%ymm1,%ymm4
286 vpmadd52luq 64(%rsi),%ymm1,%ymm5
287 vpmadd52luq 96(%rsi),%ymm1,%ymm6
288 vpmadd52luq 128(%rsi),%ymm1,%ymm7
289 vpmadd52luq 160(%rsi),%ymm1,%ymm8
290 vpmadd52luq 192(%rsi),%ymm1,%ymm9
291 vpmadd52luq 224(%rsi),%ymm1,%ymm10
292
293 vpmadd52luq 0(%rcx),%ymm2,%ymm3
294 vpmadd52luq 32(%rcx),%ymm2,%ymm4
295 vpmadd52luq 64(%rcx),%ymm2,%ymm5
296 vpmadd52luq 96(%rcx),%ymm2,%ymm6
297 vpmadd52luq 128(%rcx),%ymm2,%ymm7
298 vpmadd52luq 160(%rcx),%ymm2,%ymm8
299 vpmadd52luq 192(%rcx),%ymm2,%ymm9
300 vpmadd52luq 224(%rcx),%ymm2,%ymm10
301
302
303 valignq $1,%ymm3,%ymm4,%ymm3
304 valignq $1,%ymm4,%ymm5,%ymm4
305 valignq $1,%ymm5,%ymm6,%ymm5
306 valignq $1,%ymm6,%ymm7,%ymm6
307 valignq $1,%ymm7,%ymm8,%ymm7
308 valignq $1,%ymm8,%ymm9,%ymm8
309 valignq $1,%ymm9,%ymm10,%ymm9
310 valignq $1,%ymm10,%ymm0,%ymm10
311
312 vmovq %xmm3,%r13
313 addq %r13,%r9
314
315 vpmadd52huq 0(%rsi),%ymm1,%ymm3
316 vpmadd52huq 32(%rsi),%ymm1,%ymm4
317 vpmadd52huq 64(%rsi),%ymm1,%ymm5
318 vpmadd52huq 96(%rsi),%ymm1,%ymm6
319 vpmadd52huq 128(%rsi),%ymm1,%ymm7
320 vpmadd52huq 160(%rsi),%ymm1,%ymm8
321 vpmadd52huq 192(%rsi),%ymm1,%ymm9
322 vpmadd52huq 224(%rsi),%ymm1,%ymm10
323
324 vpmadd52huq 0(%rcx),%ymm2,%ymm3
325 vpmadd52huq 32(%rcx),%ymm2,%ymm4
326 vpmadd52huq 64(%rcx),%ymm2,%ymm5
327 vpmadd52huq 96(%rcx),%ymm2,%ymm6
328 vpmadd52huq 128(%rcx),%ymm2,%ymm7
329 vpmadd52huq 160(%rcx),%ymm2,%ymm8
330 vpmadd52huq 192(%rcx),%ymm2,%ymm9
331 vpmadd52huq 224(%rcx),%ymm2,%ymm10
332 leaq 32(%r11),%r11
333 decl %ebx
334 jne .Lloop7
335 movq 0(%r11),%r13
336
337 vpbroadcastq %r13,%ymm1
338 movq 0(%rsi),%rdx
339 mulxq %r13,%r13,%r12
340 addq %r13,%r9
341 movq %r12,%r10
342 adcq $0,%r10
343
344 movq %r8,%r13
345 imulq %r9,%r13
346 andq %rax,%r13
347
348 vpbroadcastq %r13,%ymm2
349 movq 0(%rcx),%rdx
350 mulxq %r13,%r13,%r12
351 addq %r13,%r9
352 adcq %r12,%r10
353
354 shrq $52,%r9
355 salq $12,%r10
356 orq %r10,%r9
357
358 vpmadd52luq 0(%rsi),%ymm1,%ymm3
359 vpmadd52luq 32(%rsi),%ymm1,%ymm4
360 vpmadd52luq 64(%rsi),%ymm1,%ymm5
361 vpmadd52luq 96(%rsi),%ymm1,%ymm6
362 vpmadd52luq 128(%rsi),%ymm1,%ymm7
363 vpmadd52luq 160(%rsi),%ymm1,%ymm8
364 vpmadd52luq 192(%rsi),%ymm1,%ymm9
365 vpmadd52luq 224(%rsi),%ymm1,%ymm10
366
367 vpmadd52luq 0(%rcx),%ymm2,%ymm3
368 vpmadd52luq 32(%rcx),%ymm2,%ymm4
369 vpmadd52luq 64(%rcx),%ymm2,%ymm5
370 vpmadd52luq 96(%rcx),%ymm2,%ymm6
371 vpmadd52luq 128(%rcx),%ymm2,%ymm7
372 vpmadd52luq 160(%rcx),%ymm2,%ymm8
373 vpmadd52luq 192(%rcx),%ymm2,%ymm9
374 vpmadd52luq 224(%rcx),%ymm2,%ymm10
375
376
377 valignq $1,%ymm3,%ymm4,%ymm3
378 valignq $1,%ymm4,%ymm5,%ymm4
379 valignq $1,%ymm5,%ymm6,%ymm5
380 valignq $1,%ymm6,%ymm7,%ymm6
381 valignq $1,%ymm7,%ymm8,%ymm7
382 valignq $1,%ymm8,%ymm9,%ymm8
383 valignq $1,%ymm9,%ymm10,%ymm9
384 valignq $1,%ymm10,%ymm0,%ymm10
385
386 vmovq %xmm3,%r13
387 addq %r13,%r9
388
389 vpmadd52huq 0(%rsi),%ymm1,%ymm3
390 vpmadd52huq 32(%rsi),%ymm1,%ymm4
391 vpmadd52huq 64(%rsi),%ymm1,%ymm5
392 vpmadd52huq 96(%rsi),%ymm1,%ymm6
393 vpmadd52huq 128(%rsi),%ymm1,%ymm7
394 vpmadd52huq 160(%rsi),%ymm1,%ymm8
395 vpmadd52huq 192(%rsi),%ymm1,%ymm9
396 vpmadd52huq 224(%rsi),%ymm1,%ymm10
397
398 vpmadd52huq 0(%rcx),%ymm2,%ymm3
399 vpmadd52huq 32(%rcx),%ymm2,%ymm4
400 vpmadd52huq 64(%rcx),%ymm2,%ymm5
401 vpmadd52huq 96(%rcx),%ymm2,%ymm6
402 vpmadd52huq 128(%rcx),%ymm2,%ymm7
403 vpmadd52huq 160(%rcx),%ymm2,%ymm8
404 vpmadd52huq 192(%rcx),%ymm2,%ymm9
405 vpmadd52huq 224(%rcx),%ymm2,%ymm10
406 movq 8(%r11),%r13
407
408 vpbroadcastq %r13,%ymm1
409 movq 0(%rsi),%rdx
410 mulxq %r13,%r13,%r12
411 addq %r13,%r9
412 movq %r12,%r10
413 adcq $0,%r10
414
415 movq %r8,%r13
416 imulq %r9,%r13
417 andq %rax,%r13
418
419 vpbroadcastq %r13,%ymm2
420 movq 0(%rcx),%rdx
421 mulxq %r13,%r13,%r12
422 addq %r13,%r9
423 adcq %r12,%r10
424
425 shrq $52,%r9
426 salq $12,%r10
427 orq %r10,%r9
428
429 vpmadd52luq 0(%rsi),%ymm1,%ymm3
430 vpmadd52luq 32(%rsi),%ymm1,%ymm4
431 vpmadd52luq 64(%rsi),%ymm1,%ymm5
432 vpmadd52luq 96(%rsi),%ymm1,%ymm6
433 vpmadd52luq 128(%rsi),%ymm1,%ymm7
434 vpmadd52luq 160(%rsi),%ymm1,%ymm8
435 vpmadd52luq 192(%rsi),%ymm1,%ymm9
436 vpmadd52luq 224(%rsi),%ymm1,%ymm10
437
438 vpmadd52luq 0(%rcx),%ymm2,%ymm3
439 vpmadd52luq 32(%rcx),%ymm2,%ymm4
440 vpmadd52luq 64(%rcx),%ymm2,%ymm5
441 vpmadd52luq 96(%rcx),%ymm2,%ymm6
442 vpmadd52luq 128(%rcx),%ymm2,%ymm7
443 vpmadd52luq 160(%rcx),%ymm2,%ymm8
444 vpmadd52luq 192(%rcx),%ymm2,%ymm9
445 vpmadd52luq 224(%rcx),%ymm2,%ymm10
446
447
448 valignq $1,%ymm3,%ymm4,%ymm3
449 valignq $1,%ymm4,%ymm5,%ymm4
450 valignq $1,%ymm5,%ymm6,%ymm5
451 valignq $1,%ymm6,%ymm7,%ymm6
452 valignq $1,%ymm7,%ymm8,%ymm7
453 valignq $1,%ymm8,%ymm9,%ymm8
454 valignq $1,%ymm9,%ymm10,%ymm9
455 valignq $1,%ymm10,%ymm0,%ymm10
456
457 vmovq %xmm3,%r13
458 addq %r13,%r9
459
460 vpmadd52huq 0(%rsi),%ymm1,%ymm3
461 vpmadd52huq 32(%rsi),%ymm1,%ymm4
462 vpmadd52huq 64(%rsi),%ymm1,%ymm5
463 vpmadd52huq 96(%rsi),%ymm1,%ymm6
464 vpmadd52huq 128(%rsi),%ymm1,%ymm7
465 vpmadd52huq 160(%rsi),%ymm1,%ymm8
466 vpmadd52huq 192(%rsi),%ymm1,%ymm9
467 vpmadd52huq 224(%rsi),%ymm1,%ymm10
468
469 vpmadd52huq 0(%rcx),%ymm2,%ymm3
470 vpmadd52huq 32(%rcx),%ymm2,%ymm4
471 vpmadd52huq 64(%rcx),%ymm2,%ymm5
472 vpmadd52huq 96(%rcx),%ymm2,%ymm6
473 vpmadd52huq 128(%rcx),%ymm2,%ymm7
474 vpmadd52huq 160(%rcx),%ymm2,%ymm8
475 vpmadd52huq 192(%rcx),%ymm2,%ymm9
476 vpmadd52huq 224(%rcx),%ymm2,%ymm10
477
478 vpbroadcastq %r9,%ymm0
479 vpblendd $3,%ymm0,%ymm3,%ymm3
480
481
482
483 vpsrlq $52,%ymm3,%ymm0
484 vpsrlq $52,%ymm4,%ymm1
485 vpsrlq $52,%ymm5,%ymm2
486 vpsrlq $52,%ymm6,%ymm19
487 vpsrlq $52,%ymm7,%ymm20
488 vpsrlq $52,%ymm8,%ymm21
489 vpsrlq $52,%ymm9,%ymm22
490 vpsrlq $52,%ymm10,%ymm23
491
492
493 valignq $3,%ymm22,%ymm23,%ymm23
494 valignq $3,%ymm21,%ymm22,%ymm22
495 valignq $3,%ymm20,%ymm21,%ymm21
496 valignq $3,%ymm19,%ymm20,%ymm20
497 valignq $3,%ymm2,%ymm19,%ymm19
498 valignq $3,%ymm1,%ymm2,%ymm2
499 valignq $3,%ymm0,%ymm1,%ymm1
500 valignq $3,.Lzeros(%rip),%ymm0,%ymm0
501
502
503 vpandq .Lmask52x4(%rip),%ymm3,%ymm3
504 vpandq .Lmask52x4(%rip),%ymm4,%ymm4
505 vpandq .Lmask52x4(%rip),%ymm5,%ymm5
506 vpandq .Lmask52x4(%rip),%ymm6,%ymm6
507 vpandq .Lmask52x4(%rip),%ymm7,%ymm7
508 vpandq .Lmask52x4(%rip),%ymm8,%ymm8
509 vpandq .Lmask52x4(%rip),%ymm9,%ymm9
510 vpandq .Lmask52x4(%rip),%ymm10,%ymm10
511
512
513 vpaddq %ymm0,%ymm3,%ymm3
514 vpaddq %ymm1,%ymm4,%ymm4
515 vpaddq %ymm2,%ymm5,%ymm5
516 vpaddq %ymm19,%ymm6,%ymm6
517 vpaddq %ymm20,%ymm7,%ymm7
518 vpaddq %ymm21,%ymm8,%ymm8
519 vpaddq %ymm22,%ymm9,%ymm9
520 vpaddq %ymm23,%ymm10,%ymm10
521
522
523
524 vpcmpuq $6,.Lmask52x4(%rip),%ymm3,%k1
525 vpcmpuq $6,.Lmask52x4(%rip),%ymm4,%k2
526 kmovb %k1,%r14d
527 kmovb %k2,%r13d
528 shlb $4,%r13b
529 orb %r13b,%r14b
530
531 vpcmpuq $6,.Lmask52x4(%rip),%ymm5,%k1
532 vpcmpuq $6,.Lmask52x4(%rip),%ymm6,%k2
533 kmovb %k1,%r13d
534 kmovb %k2,%r12d
535 shlb $4,%r12b
536 orb %r12b,%r13b
537
538 vpcmpuq $6,.Lmask52x4(%rip),%ymm7,%k1
539 vpcmpuq $6,.Lmask52x4(%rip),%ymm8,%k2
540 kmovb %k1,%r12d
541 kmovb %k2,%r11d
542 shlb $4,%r11b
543 orb %r11b,%r12b
544
545 vpcmpuq $6,.Lmask52x4(%rip),%ymm9,%k1
546 vpcmpuq $6,.Lmask52x4(%rip),%ymm10,%k2
547 kmovb %k1,%r11d
548 kmovb %k2,%r10d
549 shlb $4,%r10b
550 orb %r10b,%r11b
551
552 addb %r14b,%r14b
553 adcb %r13b,%r13b
554 adcb %r12b,%r12b
555 adcb %r11b,%r11b
556
557
558 vpcmpuq $0,.Lmask52x4(%rip),%ymm3,%k1
559 vpcmpuq $0,.Lmask52x4(%rip),%ymm4,%k2
560 kmovb %k1,%r9d
561 kmovb %k2,%r8d
562 shlb $4,%r8b
563 orb %r8b,%r9b
564
565 vpcmpuq $0,.Lmask52x4(%rip),%ymm5,%k1
566 vpcmpuq $0,.Lmask52x4(%rip),%ymm6,%k2
567 kmovb %k1,%r8d
568 kmovb %k2,%edx
569 shlb $4,%dl
570 orb %dl,%r8b
571
572 vpcmpuq $0,.Lmask52x4(%rip),%ymm7,%k1
573 vpcmpuq $0,.Lmask52x4(%rip),%ymm8,%k2
574 kmovb %k1,%edx
575 kmovb %k2,%ecx
576 shlb $4,%cl
577 orb %cl,%dl
578
579 vpcmpuq $0,.Lmask52x4(%rip),%ymm9,%k1
580 vpcmpuq $0,.Lmask52x4(%rip),%ymm10,%k2
581 kmovb %k1,%ecx
582 kmovb %k2,%ebx
583 shlb $4,%bl
584 orb %bl,%cl
585
586 addb %r9b,%r14b
587 adcb %r8b,%r13b
588 adcb %dl,%r12b
589 adcb %cl,%r11b
590
591 xorb %r9b,%r14b
592 xorb %r8b,%r13b
593 xorb %dl,%r12b
594 xorb %cl,%r11b
595
596 kmovb %r14d,%k1
597 shrb $4,%r14b
598 kmovb %r14d,%k2
599 kmovb %r13d,%k3
600 shrb $4,%r13b
601 kmovb %r13d,%k4
602 kmovb %r12d,%k5
603 shrb $4,%r12b
604 kmovb %r12d,%k6
605 kmovb %r11d,%k7
606
607 vpsubq .Lmask52x4(%rip),%ymm3,%ymm3{%k1}
608 vpsubq .Lmask52x4(%rip),%ymm4,%ymm4{%k2}
609 vpsubq .Lmask52x4(%rip),%ymm5,%ymm5{%k3}
610 vpsubq .Lmask52x4(%rip),%ymm6,%ymm6{%k4}
611 vpsubq .Lmask52x4(%rip),%ymm7,%ymm7{%k5}
612 vpsubq .Lmask52x4(%rip),%ymm8,%ymm8{%k6}
613 vpsubq .Lmask52x4(%rip),%ymm9,%ymm9{%k7}
614
615 vpandq .Lmask52x4(%rip),%ymm3,%ymm3
616 vpandq .Lmask52x4(%rip),%ymm4,%ymm4
617 vpandq .Lmask52x4(%rip),%ymm5,%ymm5
618 vpandq .Lmask52x4(%rip),%ymm6,%ymm6
619 vpandq .Lmask52x4(%rip),%ymm7,%ymm7
620 vpandq .Lmask52x4(%rip),%ymm8,%ymm8
621 vpandq .Lmask52x4(%rip),%ymm9,%ymm9
622
623 shrb $4,%r11b
624 kmovb %r11d,%k1
625
626 vpsubq .Lmask52x4(%rip),%ymm10,%ymm10{%k1}
627
628 vpandq .Lmask52x4(%rip),%ymm10,%ymm10
629
630 vmovdqu64 %ymm3,0(%rdi)
631 vmovdqu64 %ymm4,32(%rdi)
632 vmovdqu64 %ymm5,64(%rdi)
633 vmovdqu64 %ymm6,96(%rdi)
634 vmovdqu64 %ymm7,128(%rdi)
635 vmovdqu64 %ymm8,160(%rdi)
636 vmovdqu64 %ymm9,192(%rdi)
637 vmovdqu64 %ymm10,224(%rdi)
638
639 vzeroupper
640 leaq (%rsp),%rax
641.cfi_def_cfa_register %rax
642 movq 0(%rax),%r15
643.cfi_restore %r15
644 movq 8(%rax),%r14
645.cfi_restore %r14
646 movq 16(%rax),%r13
647.cfi_restore %r13
648 movq 24(%rax),%r12
649.cfi_restore %r12
650 movq 32(%rax),%rbp
651.cfi_restore %rbp
652 movq 40(%rax),%rbx
653.cfi_restore %rbx
654 leaq 48(%rax),%rsp
655.cfi_def_cfa %rsp,8
656.Lossl_rsaz_amm52x30_x1_ifma256_epilogue:
657 .byte 0xf3,0xc3
658.cfi_endproc
659.size ossl_rsaz_amm52x30_x1_ifma256, .-ossl_rsaz_amm52x30_x1_ifma256
660.data
661.align 32
662.Lmask52x4:
663.quad 0xfffffffffffff
664.quad 0xfffffffffffff
665.quad 0xfffffffffffff
666.quad 0xfffffffffffff
667.text
668
669.globl ossl_rsaz_amm52x30_x2_ifma256
670.type ossl_rsaz_amm52x30_x2_ifma256,@function
671.align 32
672ossl_rsaz_amm52x30_x2_ifma256:
673.cfi_startproc
674.byte 243,15,30,250
675 pushq %rbx
676.cfi_adjust_cfa_offset 8
677.cfi_offset %rbx,-16
678 pushq %rbp
679.cfi_adjust_cfa_offset 8
680.cfi_offset %rbp,-24
681 pushq %r12
682.cfi_adjust_cfa_offset 8
683.cfi_offset %r12,-32
684 pushq %r13
685.cfi_adjust_cfa_offset 8
686.cfi_offset %r13,-40
687 pushq %r14
688.cfi_adjust_cfa_offset 8
689.cfi_offset %r14,-48
690 pushq %r15
691.cfi_adjust_cfa_offset 8
692.cfi_offset %r15,-56
693
694 vpxord %ymm0,%ymm0,%ymm0
695 vmovdqa64 %ymm0,%ymm3
696 vmovdqa64 %ymm0,%ymm4
697 vmovdqa64 %ymm0,%ymm5
698 vmovdqa64 %ymm0,%ymm6
699 vmovdqa64 %ymm0,%ymm7
700 vmovdqa64 %ymm0,%ymm8
701 vmovdqa64 %ymm0,%ymm9
702 vmovdqa64 %ymm0,%ymm10
703
704 vmovdqa64 %ymm0,%ymm11
705 vmovdqa64 %ymm0,%ymm12
706 vmovdqa64 %ymm0,%ymm13
707 vmovdqa64 %ymm0,%ymm14
708 vmovdqa64 %ymm0,%ymm15
709 vmovdqa64 %ymm0,%ymm16
710 vmovdqa64 %ymm0,%ymm17
711 vmovdqa64 %ymm0,%ymm18
712
713
714 xorl %r9d,%r9d
715 xorl %r15d,%r15d
716
717 movq %rdx,%r11
718 movq $0xfffffffffffff,%rax
719
720 movl $30,%ebx
721
722.align 32
723.Lloop30:
724 movq 0(%r11),%r13
725
726 vpbroadcastq %r13,%ymm1
727 movq 0(%rsi),%rdx
728 mulxq %r13,%r13,%r12
729 addq %r13,%r9
730 movq %r12,%r10
731 adcq $0,%r10
732
733 movq (%r8),%r13
734 imulq %r9,%r13
735 andq %rax,%r13
736
737 vpbroadcastq %r13,%ymm2
738 movq 0(%rcx),%rdx
739 mulxq %r13,%r13,%r12
740 addq %r13,%r9
741 adcq %r12,%r10
742
743 shrq $52,%r9
744 salq $12,%r10
745 orq %r10,%r9
746
747 vpmadd52luq 0(%rsi),%ymm1,%ymm3
748 vpmadd52luq 32(%rsi),%ymm1,%ymm4
749 vpmadd52luq 64(%rsi),%ymm1,%ymm5
750 vpmadd52luq 96(%rsi),%ymm1,%ymm6
751 vpmadd52luq 128(%rsi),%ymm1,%ymm7
752 vpmadd52luq 160(%rsi),%ymm1,%ymm8
753 vpmadd52luq 192(%rsi),%ymm1,%ymm9
754 vpmadd52luq 224(%rsi),%ymm1,%ymm10
755
756 vpmadd52luq 0(%rcx),%ymm2,%ymm3
757 vpmadd52luq 32(%rcx),%ymm2,%ymm4
758 vpmadd52luq 64(%rcx),%ymm2,%ymm5
759 vpmadd52luq 96(%rcx),%ymm2,%ymm6
760 vpmadd52luq 128(%rcx),%ymm2,%ymm7
761 vpmadd52luq 160(%rcx),%ymm2,%ymm8
762 vpmadd52luq 192(%rcx),%ymm2,%ymm9
763 vpmadd52luq 224(%rcx),%ymm2,%ymm10
764
765
766 valignq $1,%ymm3,%ymm4,%ymm3
767 valignq $1,%ymm4,%ymm5,%ymm4
768 valignq $1,%ymm5,%ymm6,%ymm5
769 valignq $1,%ymm6,%ymm7,%ymm6
770 valignq $1,%ymm7,%ymm8,%ymm7
771 valignq $1,%ymm8,%ymm9,%ymm8
772 valignq $1,%ymm9,%ymm10,%ymm9
773 valignq $1,%ymm10,%ymm0,%ymm10
774
775 vmovq %xmm3,%r13
776 addq %r13,%r9
777
778 vpmadd52huq 0(%rsi),%ymm1,%ymm3
779 vpmadd52huq 32(%rsi),%ymm1,%ymm4
780 vpmadd52huq 64(%rsi),%ymm1,%ymm5
781 vpmadd52huq 96(%rsi),%ymm1,%ymm6
782 vpmadd52huq 128(%rsi),%ymm1,%ymm7
783 vpmadd52huq 160(%rsi),%ymm1,%ymm8
784 vpmadd52huq 192(%rsi),%ymm1,%ymm9
785 vpmadd52huq 224(%rsi),%ymm1,%ymm10
786
787 vpmadd52huq 0(%rcx),%ymm2,%ymm3
788 vpmadd52huq 32(%rcx),%ymm2,%ymm4
789 vpmadd52huq 64(%rcx),%ymm2,%ymm5
790 vpmadd52huq 96(%rcx),%ymm2,%ymm6
791 vpmadd52huq 128(%rcx),%ymm2,%ymm7
792 vpmadd52huq 160(%rcx),%ymm2,%ymm8
793 vpmadd52huq 192(%rcx),%ymm2,%ymm9
794 vpmadd52huq 224(%rcx),%ymm2,%ymm10
795 movq 256(%r11),%r13
796
797 vpbroadcastq %r13,%ymm1
798 movq 256(%rsi),%rdx
799 mulxq %r13,%r13,%r12
800 addq %r13,%r15
801 movq %r12,%r10
802 adcq $0,%r10
803
804 movq 8(%r8),%r13
805 imulq %r15,%r13
806 andq %rax,%r13
807
808 vpbroadcastq %r13,%ymm2
809 movq 256(%rcx),%rdx
810 mulxq %r13,%r13,%r12
811 addq %r13,%r15
812 adcq %r12,%r10
813
814 shrq $52,%r15
815 salq $12,%r10
816 orq %r10,%r15
817
818 vpmadd52luq 256(%rsi),%ymm1,%ymm11
819 vpmadd52luq 288(%rsi),%ymm1,%ymm12
820 vpmadd52luq 320(%rsi),%ymm1,%ymm13
821 vpmadd52luq 352(%rsi),%ymm1,%ymm14
822 vpmadd52luq 384(%rsi),%ymm1,%ymm15
823 vpmadd52luq 416(%rsi),%ymm1,%ymm16
824 vpmadd52luq 448(%rsi),%ymm1,%ymm17
825 vpmadd52luq 480(%rsi),%ymm1,%ymm18
826
827 vpmadd52luq 256(%rcx),%ymm2,%ymm11
828 vpmadd52luq 288(%rcx),%ymm2,%ymm12
829 vpmadd52luq 320(%rcx),%ymm2,%ymm13
830 vpmadd52luq 352(%rcx),%ymm2,%ymm14
831 vpmadd52luq 384(%rcx),%ymm2,%ymm15
832 vpmadd52luq 416(%rcx),%ymm2,%ymm16
833 vpmadd52luq 448(%rcx),%ymm2,%ymm17
834 vpmadd52luq 480(%rcx),%ymm2,%ymm18
835
836
837 valignq $1,%ymm11,%ymm12,%ymm11
838 valignq $1,%ymm12,%ymm13,%ymm12
839 valignq $1,%ymm13,%ymm14,%ymm13
840 valignq $1,%ymm14,%ymm15,%ymm14
841 valignq $1,%ymm15,%ymm16,%ymm15
842 valignq $1,%ymm16,%ymm17,%ymm16
843 valignq $1,%ymm17,%ymm18,%ymm17
844 valignq $1,%ymm18,%ymm0,%ymm18
845
846 vmovq %xmm11,%r13
847 addq %r13,%r15
848
849 vpmadd52huq 256(%rsi),%ymm1,%ymm11
850 vpmadd52huq 288(%rsi),%ymm1,%ymm12
851 vpmadd52huq 320(%rsi),%ymm1,%ymm13
852 vpmadd52huq 352(%rsi),%ymm1,%ymm14
853 vpmadd52huq 384(%rsi),%ymm1,%ymm15
854 vpmadd52huq 416(%rsi),%ymm1,%ymm16
855 vpmadd52huq 448(%rsi),%ymm1,%ymm17
856 vpmadd52huq 480(%rsi),%ymm1,%ymm18
857
858 vpmadd52huq 256(%rcx),%ymm2,%ymm11
859 vpmadd52huq 288(%rcx),%ymm2,%ymm12
860 vpmadd52huq 320(%rcx),%ymm2,%ymm13
861 vpmadd52huq 352(%rcx),%ymm2,%ymm14
862 vpmadd52huq 384(%rcx),%ymm2,%ymm15
863 vpmadd52huq 416(%rcx),%ymm2,%ymm16
864 vpmadd52huq 448(%rcx),%ymm2,%ymm17
865 vpmadd52huq 480(%rcx),%ymm2,%ymm18
866 leaq 8(%r11),%r11
867 decl %ebx
868 jne .Lloop30
869
870 vpbroadcastq %r9,%ymm0
871 vpblendd $3,%ymm0,%ymm3,%ymm3
872
873
874
875 vpsrlq $52,%ymm3,%ymm0
876 vpsrlq $52,%ymm4,%ymm1
877 vpsrlq $52,%ymm5,%ymm2
878 vpsrlq $52,%ymm6,%ymm19
879 vpsrlq $52,%ymm7,%ymm20
880 vpsrlq $52,%ymm8,%ymm21
881 vpsrlq $52,%ymm9,%ymm22
882 vpsrlq $52,%ymm10,%ymm23
883
884
885 valignq $3,%ymm22,%ymm23,%ymm23
886 valignq $3,%ymm21,%ymm22,%ymm22
887 valignq $3,%ymm20,%ymm21,%ymm21
888 valignq $3,%ymm19,%ymm20,%ymm20
889 valignq $3,%ymm2,%ymm19,%ymm19
890 valignq $3,%ymm1,%ymm2,%ymm2
891 valignq $3,%ymm0,%ymm1,%ymm1
892 valignq $3,.Lzeros(%rip),%ymm0,%ymm0
893
894
895 vpandq .Lmask52x4(%rip),%ymm3,%ymm3
896 vpandq .Lmask52x4(%rip),%ymm4,%ymm4
897 vpandq .Lmask52x4(%rip),%ymm5,%ymm5
898 vpandq .Lmask52x4(%rip),%ymm6,%ymm6
899 vpandq .Lmask52x4(%rip),%ymm7,%ymm7
900 vpandq .Lmask52x4(%rip),%ymm8,%ymm8
901 vpandq .Lmask52x4(%rip),%ymm9,%ymm9
902 vpandq .Lmask52x4(%rip),%ymm10,%ymm10
903
904
905 vpaddq %ymm0,%ymm3,%ymm3
906 vpaddq %ymm1,%ymm4,%ymm4
907 vpaddq %ymm2,%ymm5,%ymm5
908 vpaddq %ymm19,%ymm6,%ymm6
909 vpaddq %ymm20,%ymm7,%ymm7
910 vpaddq %ymm21,%ymm8,%ymm8
911 vpaddq %ymm22,%ymm9,%ymm9
912 vpaddq %ymm23,%ymm10,%ymm10
913
914
915
916 vpcmpuq $6,.Lmask52x4(%rip),%ymm3,%k1
917 vpcmpuq $6,.Lmask52x4(%rip),%ymm4,%k2
918 kmovb %k1,%r14d
919 kmovb %k2,%r13d
920 shlb $4,%r13b
921 orb %r13b,%r14b
922
923 vpcmpuq $6,.Lmask52x4(%rip),%ymm5,%k1
924 vpcmpuq $6,.Lmask52x4(%rip),%ymm6,%k2
925 kmovb %k1,%r13d
926 kmovb %k2,%r12d
927 shlb $4,%r12b
928 orb %r12b,%r13b
929
930 vpcmpuq $6,.Lmask52x4(%rip),%ymm7,%k1
931 vpcmpuq $6,.Lmask52x4(%rip),%ymm8,%k2
932 kmovb %k1,%r12d
933 kmovb %k2,%r11d
934 shlb $4,%r11b
935 orb %r11b,%r12b
936
937 vpcmpuq $6,.Lmask52x4(%rip),%ymm9,%k1
938 vpcmpuq $6,.Lmask52x4(%rip),%ymm10,%k2
939 kmovb %k1,%r11d
940 kmovb %k2,%r10d
941 shlb $4,%r10b
942 orb %r10b,%r11b
943
944 addb %r14b,%r14b
945 adcb %r13b,%r13b
946 adcb %r12b,%r12b
947 adcb %r11b,%r11b
948
949
950 vpcmpuq $0,.Lmask52x4(%rip),%ymm3,%k1
951 vpcmpuq $0,.Lmask52x4(%rip),%ymm4,%k2
952 kmovb %k1,%r9d
953 kmovb %k2,%r8d
954 shlb $4,%r8b
955 orb %r8b,%r9b
956
957 vpcmpuq $0,.Lmask52x4(%rip),%ymm5,%k1
958 vpcmpuq $0,.Lmask52x4(%rip),%ymm6,%k2
959 kmovb %k1,%r8d
960 kmovb %k2,%edx
961 shlb $4,%dl
962 orb %dl,%r8b
963
964 vpcmpuq $0,.Lmask52x4(%rip),%ymm7,%k1
965 vpcmpuq $0,.Lmask52x4(%rip),%ymm8,%k2
966 kmovb %k1,%edx
967 kmovb %k2,%ecx
968 shlb $4,%cl
969 orb %cl,%dl
970
971 vpcmpuq $0,.Lmask52x4(%rip),%ymm9,%k1
972 vpcmpuq $0,.Lmask52x4(%rip),%ymm10,%k2
973 kmovb %k1,%ecx
974 kmovb %k2,%ebx
975 shlb $4,%bl
976 orb %bl,%cl
977
978 addb %r9b,%r14b
979 adcb %r8b,%r13b
980 adcb %dl,%r12b
981 adcb %cl,%r11b
982
983 xorb %r9b,%r14b
984 xorb %r8b,%r13b
985 xorb %dl,%r12b
986 xorb %cl,%r11b
987
988 kmovb %r14d,%k1
989 shrb $4,%r14b
990 kmovb %r14d,%k2
991 kmovb %r13d,%k3
992 shrb $4,%r13b
993 kmovb %r13d,%k4
994 kmovb %r12d,%k5
995 shrb $4,%r12b
996 kmovb %r12d,%k6
997 kmovb %r11d,%k7
998
999 vpsubq .Lmask52x4(%rip),%ymm3,%ymm3{%k1}
1000 vpsubq .Lmask52x4(%rip),%ymm4,%ymm4{%k2}
1001 vpsubq .Lmask52x4(%rip),%ymm5,%ymm5{%k3}
1002 vpsubq .Lmask52x4(%rip),%ymm6,%ymm6{%k4}
1003 vpsubq .Lmask52x4(%rip),%ymm7,%ymm7{%k5}
1004 vpsubq .Lmask52x4(%rip),%ymm8,%ymm8{%k6}
1005 vpsubq .Lmask52x4(%rip),%ymm9,%ymm9{%k7}
1006
1007 vpandq .Lmask52x4(%rip),%ymm3,%ymm3
1008 vpandq .Lmask52x4(%rip),%ymm4,%ymm4
1009 vpandq .Lmask52x4(%rip),%ymm5,%ymm5
1010 vpandq .Lmask52x4(%rip),%ymm6,%ymm6
1011 vpandq .Lmask52x4(%rip),%ymm7,%ymm7
1012 vpandq .Lmask52x4(%rip),%ymm8,%ymm8
1013 vpandq .Lmask52x4(%rip),%ymm9,%ymm9
1014
1015 shrb $4,%r11b
1016 kmovb %r11d,%k1
1017
1018 vpsubq .Lmask52x4(%rip),%ymm10,%ymm10{%k1}
1019
1020 vpandq .Lmask52x4(%rip),%ymm10,%ymm10
1021
1022 vpbroadcastq %r15,%ymm0
1023 vpblendd $3,%ymm0,%ymm11,%ymm11
1024
1025
1026
1027 vpsrlq $52,%ymm11,%ymm0
1028 vpsrlq $52,%ymm12,%ymm1
1029 vpsrlq $52,%ymm13,%ymm2
1030 vpsrlq $52,%ymm14,%ymm19
1031 vpsrlq $52,%ymm15,%ymm20
1032 vpsrlq $52,%ymm16,%ymm21
1033 vpsrlq $52,%ymm17,%ymm22
1034 vpsrlq $52,%ymm18,%ymm23
1035
1036
1037 valignq $3,%ymm22,%ymm23,%ymm23
1038 valignq $3,%ymm21,%ymm22,%ymm22
1039 valignq $3,%ymm20,%ymm21,%ymm21
1040 valignq $3,%ymm19,%ymm20,%ymm20
1041 valignq $3,%ymm2,%ymm19,%ymm19
1042 valignq $3,%ymm1,%ymm2,%ymm2
1043 valignq $3,%ymm0,%ymm1,%ymm1
1044 valignq $3,.Lzeros(%rip),%ymm0,%ymm0
1045
1046
1047 vpandq .Lmask52x4(%rip),%ymm11,%ymm11
1048 vpandq .Lmask52x4(%rip),%ymm12,%ymm12
1049 vpandq .Lmask52x4(%rip),%ymm13,%ymm13
1050 vpandq .Lmask52x4(%rip),%ymm14,%ymm14
1051 vpandq .Lmask52x4(%rip),%ymm15,%ymm15
1052 vpandq .Lmask52x4(%rip),%ymm16,%ymm16
1053 vpandq .Lmask52x4(%rip),%ymm17,%ymm17
1054 vpandq .Lmask52x4(%rip),%ymm18,%ymm18
1055
1056
1057 vpaddq %ymm0,%ymm11,%ymm11
1058 vpaddq %ymm1,%ymm12,%ymm12
1059 vpaddq %ymm2,%ymm13,%ymm13
1060 vpaddq %ymm19,%ymm14,%ymm14
1061 vpaddq %ymm20,%ymm15,%ymm15
1062 vpaddq %ymm21,%ymm16,%ymm16
1063 vpaddq %ymm22,%ymm17,%ymm17
1064 vpaddq %ymm23,%ymm18,%ymm18
1065
1066
1067
1068 vpcmpuq $6,.Lmask52x4(%rip),%ymm11,%k1
1069 vpcmpuq $6,.Lmask52x4(%rip),%ymm12,%k2
1070 kmovb %k1,%r14d
1071 kmovb %k2,%r13d
1072 shlb $4,%r13b
1073 orb %r13b,%r14b
1074
1075 vpcmpuq $6,.Lmask52x4(%rip),%ymm13,%k1
1076 vpcmpuq $6,.Lmask52x4(%rip),%ymm14,%k2
1077 kmovb %k1,%r13d
1078 kmovb %k2,%r12d
1079 shlb $4,%r12b
1080 orb %r12b,%r13b
1081
1082 vpcmpuq $6,.Lmask52x4(%rip),%ymm15,%k1
1083 vpcmpuq $6,.Lmask52x4(%rip),%ymm16,%k2
1084 kmovb %k1,%r12d
1085 kmovb %k2,%r11d
1086 shlb $4,%r11b
1087 orb %r11b,%r12b
1088
1089 vpcmpuq $6,.Lmask52x4(%rip),%ymm17,%k1
1090 vpcmpuq $6,.Lmask52x4(%rip),%ymm18,%k2
1091 kmovb %k1,%r11d
1092 kmovb %k2,%r10d
1093 shlb $4,%r10b
1094 orb %r10b,%r11b
1095
1096 addb %r14b,%r14b
1097 adcb %r13b,%r13b
1098 adcb %r12b,%r12b
1099 adcb %r11b,%r11b
1100
1101
1102 vpcmpuq $0,.Lmask52x4(%rip),%ymm11,%k1
1103 vpcmpuq $0,.Lmask52x4(%rip),%ymm12,%k2
1104 kmovb %k1,%r9d
1105 kmovb %k2,%r8d
1106 shlb $4,%r8b
1107 orb %r8b,%r9b
1108
1109 vpcmpuq $0,.Lmask52x4(%rip),%ymm13,%k1
1110 vpcmpuq $0,.Lmask52x4(%rip),%ymm14,%k2
1111 kmovb %k1,%r8d
1112 kmovb %k2,%edx
1113 shlb $4,%dl
1114 orb %dl,%r8b
1115
1116 vpcmpuq $0,.Lmask52x4(%rip),%ymm15,%k1
1117 vpcmpuq $0,.Lmask52x4(%rip),%ymm16,%k2
1118 kmovb %k1,%edx
1119 kmovb %k2,%ecx
1120 shlb $4,%cl
1121 orb %cl,%dl
1122
1123 vpcmpuq $0,.Lmask52x4(%rip),%ymm17,%k1
1124 vpcmpuq $0,.Lmask52x4(%rip),%ymm18,%k2
1125 kmovb %k1,%ecx
1126 kmovb %k2,%ebx
1127 shlb $4,%bl
1128 orb %bl,%cl
1129
1130 addb %r9b,%r14b
1131 adcb %r8b,%r13b
1132 adcb %dl,%r12b
1133 adcb %cl,%r11b
1134
1135 xorb %r9b,%r14b
1136 xorb %r8b,%r13b
1137 xorb %dl,%r12b
1138 xorb %cl,%r11b
1139
1140 kmovb %r14d,%k1
1141 shrb $4,%r14b
1142 kmovb %r14d,%k2
1143 kmovb %r13d,%k3
1144 shrb $4,%r13b
1145 kmovb %r13d,%k4
1146 kmovb %r12d,%k5
1147 shrb $4,%r12b
1148 kmovb %r12d,%k6
1149 kmovb %r11d,%k7
1150
1151 vpsubq .Lmask52x4(%rip),%ymm11,%ymm11{%k1}
1152 vpsubq .Lmask52x4(%rip),%ymm12,%ymm12{%k2}
1153 vpsubq .Lmask52x4(%rip),%ymm13,%ymm13{%k3}
1154 vpsubq .Lmask52x4(%rip),%ymm14,%ymm14{%k4}
1155 vpsubq .Lmask52x4(%rip),%ymm15,%ymm15{%k5}
1156 vpsubq .Lmask52x4(%rip),%ymm16,%ymm16{%k6}
1157 vpsubq .Lmask52x4(%rip),%ymm17,%ymm17{%k7}
1158
1159 vpandq .Lmask52x4(%rip),%ymm11,%ymm11
1160 vpandq .Lmask52x4(%rip),%ymm12,%ymm12
1161 vpandq .Lmask52x4(%rip),%ymm13,%ymm13
1162 vpandq .Lmask52x4(%rip),%ymm14,%ymm14
1163 vpandq .Lmask52x4(%rip),%ymm15,%ymm15
1164 vpandq .Lmask52x4(%rip),%ymm16,%ymm16
1165 vpandq .Lmask52x4(%rip),%ymm17,%ymm17
1166
1167 shrb $4,%r11b
1168 kmovb %r11d,%k1
1169
1170 vpsubq .Lmask52x4(%rip),%ymm18,%ymm18{%k1}
1171
1172 vpandq .Lmask52x4(%rip),%ymm18,%ymm18
1173
1174 vmovdqu64 %ymm3,0(%rdi)
1175 vmovdqu64 %ymm4,32(%rdi)
1176 vmovdqu64 %ymm5,64(%rdi)
1177 vmovdqu64 %ymm6,96(%rdi)
1178 vmovdqu64 %ymm7,128(%rdi)
1179 vmovdqu64 %ymm8,160(%rdi)
1180 vmovdqu64 %ymm9,192(%rdi)
1181 vmovdqu64 %ymm10,224(%rdi)
1182
1183 vmovdqu64 %ymm11,256(%rdi)
1184 vmovdqu64 %ymm12,288(%rdi)
1185 vmovdqu64 %ymm13,320(%rdi)
1186 vmovdqu64 %ymm14,352(%rdi)
1187 vmovdqu64 %ymm15,384(%rdi)
1188 vmovdqu64 %ymm16,416(%rdi)
1189 vmovdqu64 %ymm17,448(%rdi)
1190 vmovdqu64 %ymm18,480(%rdi)
1191
1192 vzeroupper
1193 leaq (%rsp),%rax
1194.cfi_def_cfa_register %rax
1195 movq 0(%rax),%r15
1196.cfi_restore %r15
1197 movq 8(%rax),%r14
1198.cfi_restore %r14
1199 movq 16(%rax),%r13
1200.cfi_restore %r13
1201 movq 24(%rax),%r12
1202.cfi_restore %r12
1203 movq 32(%rax),%rbp
1204.cfi_restore %rbp
1205 movq 40(%rax),%rbx
1206.cfi_restore %rbx
1207 leaq 48(%rax),%rsp
1208.cfi_def_cfa %rsp,8
1209.Lossl_rsaz_amm52x30_x2_ifma256_epilogue:
1210 .byte 0xf3,0xc3
1211.cfi_endproc
1212.size ossl_rsaz_amm52x30_x2_ifma256, .-ossl_rsaz_amm52x30_x2_ifma256
1213.text
1214
1215.align 32
1216.globl ossl_extract_multiplier_2x30_win5
1217.type ossl_extract_multiplier_2x30_win5,@function
1218ossl_extract_multiplier_2x30_win5:
1219.cfi_startproc
1220.byte 243,15,30,250
1221 vmovdqa64 .Lones(%rip),%ymm30
1222 vpbroadcastq %rdx,%ymm28
1223 vpbroadcastq %rcx,%ymm29
1224 leaq 16384(%rsi),%rax
1225
1226
1227 vpxor %xmm0,%xmm0,%xmm0
1228 vmovdqa64 %ymm0,%ymm27
1229 vmovdqa64 %ymm0,%ymm1
1230 vmovdqa64 %ymm0,%ymm2
1231 vmovdqa64 %ymm0,%ymm3
1232 vmovdqa64 %ymm0,%ymm4
1233 vmovdqa64 %ymm0,%ymm5
1234 vmovdqa64 %ymm0,%ymm16
1235 vmovdqa64 %ymm0,%ymm17
1236 vmovdqa64 %ymm0,%ymm18
1237 vmovdqa64 %ymm0,%ymm19
1238 vmovdqa64 %ymm0,%ymm20
1239 vmovdqa64 %ymm0,%ymm21
1240 vmovdqa64 %ymm0,%ymm22
1241 vmovdqa64 %ymm0,%ymm23
1242 vmovdqa64 %ymm0,%ymm24
1243 vmovdqa64 %ymm0,%ymm25
1244
1245.align 32
1246.Lloop:
1247 vpcmpq $0,%ymm27,%ymm28,%k1
1248 vpcmpq $0,%ymm27,%ymm29,%k2
1249 vmovdqu64 0(%rsi),%ymm26
1250 vpblendmq %ymm26,%ymm0,%ymm0{%k1}
1251 vmovdqu64 32(%rsi),%ymm26
1252 vpblendmq %ymm26,%ymm1,%ymm1{%k1}
1253 vmovdqu64 64(%rsi),%ymm26
1254 vpblendmq %ymm26,%ymm2,%ymm2{%k1}
1255 vmovdqu64 96(%rsi),%ymm26
1256 vpblendmq %ymm26,%ymm3,%ymm3{%k1}
1257 vmovdqu64 128(%rsi),%ymm26
1258 vpblendmq %ymm26,%ymm4,%ymm4{%k1}
1259 vmovdqu64 160(%rsi),%ymm26
1260 vpblendmq %ymm26,%ymm5,%ymm5{%k1}
1261 vmovdqu64 192(%rsi),%ymm26
1262 vpblendmq %ymm26,%ymm16,%ymm16{%k1}
1263 vmovdqu64 224(%rsi),%ymm26
1264 vpblendmq %ymm26,%ymm17,%ymm17{%k1}
1265 vmovdqu64 256(%rsi),%ymm26
1266 vpblendmq %ymm26,%ymm18,%ymm18{%k2}
1267 vmovdqu64 288(%rsi),%ymm26
1268 vpblendmq %ymm26,%ymm19,%ymm19{%k2}
1269 vmovdqu64 320(%rsi),%ymm26
1270 vpblendmq %ymm26,%ymm20,%ymm20{%k2}
1271 vmovdqu64 352(%rsi),%ymm26
1272 vpblendmq %ymm26,%ymm21,%ymm21{%k2}
1273 vmovdqu64 384(%rsi),%ymm26
1274 vpblendmq %ymm26,%ymm22,%ymm22{%k2}
1275 vmovdqu64 416(%rsi),%ymm26
1276 vpblendmq %ymm26,%ymm23,%ymm23{%k2}
1277 vmovdqu64 448(%rsi),%ymm26
1278 vpblendmq %ymm26,%ymm24,%ymm24{%k2}
1279 vmovdqu64 480(%rsi),%ymm26
1280 vpblendmq %ymm26,%ymm25,%ymm25{%k2}
1281 vpaddq %ymm30,%ymm27,%ymm27
1282 addq $512,%rsi
1283 cmpq %rsi,%rax
1284 jne .Lloop
1285 vmovdqu64 %ymm0,0(%rdi)
1286 vmovdqu64 %ymm1,32(%rdi)
1287 vmovdqu64 %ymm2,64(%rdi)
1288 vmovdqu64 %ymm3,96(%rdi)
1289 vmovdqu64 %ymm4,128(%rdi)
1290 vmovdqu64 %ymm5,160(%rdi)
1291 vmovdqu64 %ymm16,192(%rdi)
1292 vmovdqu64 %ymm17,224(%rdi)
1293 vmovdqu64 %ymm18,256(%rdi)
1294 vmovdqu64 %ymm19,288(%rdi)
1295 vmovdqu64 %ymm20,320(%rdi)
1296 vmovdqu64 %ymm21,352(%rdi)
1297 vmovdqu64 %ymm22,384(%rdi)
1298 vmovdqu64 %ymm23,416(%rdi)
1299 vmovdqu64 %ymm24,448(%rdi)
1300 vmovdqu64 %ymm25,480(%rdi)
1301
1302 .byte 0xf3,0xc3
1303.cfi_endproc
1304.size ossl_extract_multiplier_2x30_win5, .-ossl_extract_multiplier_2x30_win5
1305.data
1306.align 32
1307.Lones:
1308.quad 1,1,1,1
1309.Lzeros:
1310.quad 0,0,0,0
1311 .section ".note.gnu.property", "a"
1312 .p2align 3
1313 .long 1f - 0f
1314 .long 4f - 1f
1315 .long 5
13160:
1317 # "GNU" encoded with .byte, since .asciz isn't supported
1318 # on Solaris.
1319 .byte 0x47
1320 .byte 0x4e
1321 .byte 0x55
1322 .byte 0
13231:
1324 .p2align 3
1325 .long 0xc0000002
1326 .long 3f - 2f
13272:
1328 .long 3
13293:
1330 .p2align 3
13314:
注意: 瀏覽 TracBrowser 來幫助您使用儲存庫瀏覽器

© 2025 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette