keccak1600-x86_64.pl@ 99507

最後變更在這個檔案從99507是 99366,由 vboxsync 提交於 2 年前
openssl-3.1.0: Applied and adjusted our OpenSSL changes to 3.0.7. bugref:10418
檔案大小: 14.1 KB

行
1	#!/usr/bin/env perl
2	# Copyright 2017-2020 The OpenSSL Project Authors. All Rights Reserved.
3	#
4	# Licensed under the Apache License 2.0 (the "License"). You may not use
5	# this file except in compliance with the License. You can obtain a copy
6	# in the file LICENSE in the source distribution or at
7	# https://www.openssl.org/source/license.html
8	#
9	# ====================================================================
10	# Written by Andy Polyakov <[email protected]> for the OpenSSL
11	# project. The module is, however, dual licensed under OpenSSL and
12	# CRYPTOGAMS licenses depending on where you obtain it. For further
13	# details see http://www.openssl.org/~appro/cryptogams/.
14	# ====================================================================
15	#
16	# Keccak-1600 for x86_64.
17	#
18	# June 2017.
19	#
20	# Below code is [lane complementing] KECCAK_2X implementation (see
21	# sha/keccak1600.c) with C[5] and D[5] held in register bank. Though
22	# instead of actually unrolling the loop pair-wise I simply flip
23	# pointers to T[][] and A[][] at the end of round. Since number of
24	# rounds is even, last round writes to A[][] and everything works out.
25	# How does it compare to x86_64 assembly module in Keccak Code Package?
26	# Depending on processor it's either as fast or faster by up to 15%...
27	#
28	########################################################################
29	# Numbers are cycles per processed byte out of large message.
30	#
31	# r=1088(*)
32	#
33	# P4 25.8
34	# Core 2 12.9
35	# Westmere 13.7
36	# Sandy Bridge 12.9(**)
37	# Haswell 9.6
38	# Skylake 9.4
39	# Silvermont 22.8
40	# Goldmont 15.8
41	# VIA Nano 17.3
42	# Sledgehammer 13.3
43	# Bulldozer 16.5
44	# Ryzen 8.8
45	#
46	# (*) Corresponds to SHA3-256. Improvement over compiler-generate
47	# varies a lot, most common coefficient is 15% in comparison to
48	# gcc-5.x, 50% for gcc-4.x, 90% for gcc-3.x.
49	# (**) Sandy Bridge has broken rotate instruction. Performance can be
50	# improved by 14% by replacing rotates with double-precision
51	# shift with same register as source and destination.
52
53	# $output is the last argument if it looks like a file (it has an extension)
54	# $flavour is the first argument if it doesn't look like a file
55	$output = $#ARGV >= 0 && $ARGV[$#ARGV] =~ m\|\.\w+$\| ? pop : undef;
56	$flavour = $#ARGV >= 0 && $ARGV[0] !~ m\|\.\| ? shift : undef;
57
58	$win64=0; $win64=1 if ($flavour =~ /[nm]asm\|mingw64/ \|\| $output =~ /\.asm$/);
59
60	$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
61	( $xlate="${dir}x86_64-xlate.pl" and -f $xlate ) or
62	( $xlate="${dir}../../perlasm/x86_64-xlate.pl" and -f $xlate) or
63	die "can't locate x86_64-xlate.pl";
64
65	open OUT,"\| \"$^X\" \"$xlate\" $flavour \"$output\""
66	or die "can't call $xlate: $!";
67	STDOUT=OUT;
68
69	my @A = map([ 8$_-100, 8($_+1)-100, 8*($_+2)-100,
70	8($_+3)-100, 8($_+4)-100 ], (0,5,10,15,20));
71
72	my @C = ("%rax","%rbx","%rcx","%rdx","%rbp");
73	my @D = map("%r$_",(8..12));
74	my @T = map("%r$_",(13..14));
75	my $iotas = "%r15";
76
77	my @rhotates = ([ 0, 1, 62, 28, 27 ],
78	[ 36, 44, 6, 55, 20 ],
79	[ 3, 10, 43, 25, 39 ],
80	[ 41, 45, 15, 21, 8 ],
81	[ 18, 2, 61, 56, 14 ]);
82
83	$code.=<<___;
84	.text
85
86	.type __KeccakF1600,\@abi-omnipotent
87	.align 32
88	__KeccakF1600:
89	.cfi_startproc
90	mov $A[4][0](%rdi),@C[0]
91	mov $A[4][1](%rdi),@C[1]
92	mov $A[4][2](%rdi),@C[2]
93	mov $A[4][3](%rdi),@C[3]
94	mov $A[4][4](%rdi),@C[4]
95	jmp .Loop
96
97	.align 32
98	.Loop:
99	mov $A[0][0](%rdi),@D[0]
100	mov $A[1][1](%rdi),@D[1]
101	mov $A[2][2](%rdi),@D[2]
102	mov $A[3][3](%rdi),@D[3]
103
104	xor $A[0][2](%rdi),@C[2]
105	xor $A[0][3](%rdi),@C[3]
106	xor @D[0], @C[0]
107	xor $A[0][1](%rdi),@C[1]
108	xor $A[1][2](%rdi),@C[2]
109	xor $A[1][0](%rdi),@C[0]
110	mov @C[4],@D[4]
111	xor $A[0][4](%rdi),@C[4]
112
113	xor @D[2], @C[2]
114	xor $A[2][0](%rdi),@C[0]
115	xor $A[1][3](%rdi),@C[3]
116	xor @D[1], @C[1]
117	xor $A[1][4](%rdi),@C[4]
118
119	xor $A[3][2](%rdi),@C[2]
120	xor $A[3][0](%rdi),@C[0]
121	xor $A[2][3](%rdi),@C[3]
122	xor $A[2][1](%rdi),@C[1]
123	xor $A[2][4](%rdi),@C[4]
124
125	mov @C[2],@T[0]
126	rol \$1,@C[2]
127	xor @C[0],@C[2] # D[1] = ROL64(C[2], 1) ^ C[0]
128	xor @D[3], @C[3]
129
130	rol \$1,@C[0]
131	xor @C[3],@C[0] # D[4] = ROL64(C[0], 1) ^ C[3]
132	xor $A[3][1](%rdi),@C[1]
133
134	rol \$1,@C[3]
135	xor @C[1],@C[3] # D[2] = ROL64(C[3], 1) ^ C[1]
136	xor $A[3][4](%rdi),@C[4]
137
138	rol \$1,@C[1]
139	xor @C[4],@C[1] # D[0] = ROL64(C[1], 1) ^ C[4]
140
141	rol \$1,@C[4]
142	xor @T[0],@C[4] # D[3] = ROL64(C[4], 1) ^ C[2]
143	___
144	(@D[0..4], @C) = (@C[1..4,0], @D);
145	$code.=<<___;
146	xor @D[1],@C[1]
147	xor @D[2],@C[2]
148	rol \$$rhotates[1][1],@C[1]
149	xor @D[3],@C[3]
150	xor @D[4],@C[4]
151	rol \$$rhotates[2][2],@C[2]
152	xor @D[0],@C[0]
153	mov @C[1],@T[0]
154	rol \$$rhotates[3][3],@C[3]
155	or @C[2],@C[1]
156	xor @C[0],@C[1] # C[0] ^ ( C[1] \| C[2])
157	rol \$$rhotates[4][4],@C[4]
158
159	xor ($iotas),@C[1]
160	lea 8($iotas),$iotas
161
162	mov @C[4],@T[1]
163	and @C[3],@C[4]
164	mov @C[1],$A[0][0](%rsi) # R[0][0] = C[0] ^ ( C[1] \| C[2]) ^ iotas[i]
165	xor @C[2],@C[4] # C[2] ^ ( C[4] & C[3])
166	not @C[2]
167	mov @C[4],$A[0][2](%rsi) # R[0][2] = C[2] ^ ( C[4] & C[3])
168
169	or @C[3],@C[2]
170	mov $A[4][2](%rdi),@C[4]
171	xor @T[0],@C[2] # C[1] ^ (~C[2] \| C[3])
172	mov @C[2],$A[0][1](%rsi) # R[0][1] = C[1] ^ (~C[2] \| C[3])
173
174	and @C[0],@T[0]
175	mov $A[1][4](%rdi),@C[1]
176	xor @T[1],@T[0] # C[4] ^ ( C[1] & C[0])
177	mov $A[2][0](%rdi),@C[2]
178	mov @T[0],$A[0][4](%rsi) # R[0][4] = C[4] ^ ( C[1] & C[0])
179
180	or @C[0],@T[1]
181	mov $A[0][3](%rdi),@C[0]
182	xor @C[3],@T[1] # C[3] ^ ( C[4] \| C[0])
183	mov $A[3][1](%rdi),@C[3]
184	mov @T[1],$A[0][3](%rsi) # R[0][3] = C[3] ^ ( C[4] \| C[0])
185
186
187	xor @D[3],@C[0]
188	xor @D[2],@C[4]
189	rol \$$rhotates[0][3],@C[0]
190	xor @D[1],@C[3]
191	xor @D[4],@C[1]
192	rol \$$rhotates[4][2],@C[4]
193	rol \$$rhotates[3][1],@C[3]
194	xor @D[0],@C[2]
195	rol \$$rhotates[1][4],@C[1]
196	mov @C[0],@T[0]
197	or @C[4],@C[0]
198	rol \$$rhotates[2][0],@C[2]
199
200	xor @C[3],@C[0] # C[3] ^ (C[0] \| C[4])
201	mov @C[0],$A[1][3](%rsi) # R[1][3] = C[3] ^ (C[0] \| C[4])
202
203	mov @C[1],@T[1]
204	and @T[0],@C[1]
205	mov $A[0][1](%rdi),@C[0]
206	xor @C[4],@C[1] # C[4] ^ (C[1] & C[0])
207	not @C[4]
208	mov @C[1],$A[1][4](%rsi) # R[1][4] = C[4] ^ (C[1] & C[0])
209
210	or @C[3],@C[4]
211	mov $A[1][2](%rdi),@C[1]
212	xor @C[2],@C[4] # C[2] ^ (~C[4] \| C[3])
213	mov @C[4],$A[1][2](%rsi) # R[1][2] = C[2] ^ (~C[4] \| C[3])
214
215	and @C[2],@C[3]
216	mov $A[4][0](%rdi),@C[4]
217	xor @T[1],@C[3] # C[1] ^ (C[3] & C[2])
218	mov @C[3],$A[1][1](%rsi) # R[1][1] = C[1] ^ (C[3] & C[2])
219
220	or @C[2],@T[1]
221	mov $A[2][3](%rdi),@C[2]
222	xor @T[0],@T[1] # C[0] ^ (C[1] \| C[2])
223	mov $A[3][4](%rdi),@C[3]
224	mov @T[1],$A[1][0](%rsi) # R[1][0] = C[0] ^ (C[1] \| C[2])
225
226
227	xor @D[3],@C[2]
228	xor @D[4],@C[3]
229	rol \$$rhotates[2][3],@C[2]
230	xor @D[2],@C[1]
231	rol \$$rhotates[3][4],@C[3]
232	xor @D[0],@C[4]
233	rol \$$rhotates[1][2],@C[1]
234	xor @D[1],@C[0]
235	rol \$$rhotates[4][0],@C[4]
236	mov @C[2],@T[0]
237	and @C[3],@C[2]
238	rol \$$rhotates[0][1],@C[0]
239
240	not @C[3]
241	xor @C[1],@C[2] # C[1] ^ ( C[2] & C[3])
242	mov @C[2],$A[2][1](%rsi) # R[2][1] = C[1] ^ ( C[2] & C[3])
243
244	mov @C[4],@T[1]
245	and @C[3],@C[4]
246	mov $A[2][1](%rdi),@C[2]
247	xor @T[0],@C[4] # C[2] ^ ( C[4] & ~C[3])
248	mov @C[4],$A[2][2](%rsi) # R[2][2] = C[2] ^ ( C[4] & ~C[3])
249
250	or @C[1],@T[0]
251	mov $A[4][3](%rdi),@C[4]
252	xor @C[0],@T[0] # C[0] ^ ( C[2] \| C[1])
253	mov @T[0],$A[2][0](%rsi) # R[2][0] = C[0] ^ ( C[2] \| C[1])
254
255	and @C[0],@C[1]
256	xor @T[1],@C[1] # C[4] ^ ( C[1] & C[0])
257	mov @C[1],$A[2][4](%rsi) # R[2][4] = C[4] ^ ( C[1] & C[0])
258
259	or @C[0],@T[1]
260	mov $A[1][0](%rdi),@C[1]
261	xor @C[3],@T[1] # ~C[3] ^ ( C[0] \| C[4])
262	mov $A[3][2](%rdi),@C[3]
263	mov @T[1],$A[2][3](%rsi) # R[2][3] = ~C[3] ^ ( C[0] \| C[4])
264
265
266	mov $A[0][4](%rdi),@C[0]
267
268	xor @D[1],@C[2]
269	xor @D[2],@C[3]
270	rol \$$rhotates[2][1],@C[2]
271	xor @D[0],@C[1]
272	rol \$$rhotates[3][2],@C[3]
273	xor @D[3],@C[4]
274	rol \$$rhotates[1][0],@C[1]
275	xor @D[4],@C[0]
276	rol \$$rhotates[4][3],@C[4]
277	mov @C[2],@T[0]
278	or @C[3],@C[2]
279	rol \$$rhotates[0][4],@C[0]
280
281	not @C[3]
282	xor @C[1],@C[2] # C[1] ^ ( C[2] \| C[3])
283	mov @C[2],$A[3][1](%rsi) # R[3][1] = C[1] ^ ( C[2] \| C[3])
284
285	mov @C[4],@T[1]
286	or @C[3],@C[4]
287	xor @T[0],@C[4] # C[2] ^ ( C[4] \| ~C[3])
288	mov @C[4],$A[3][2](%rsi) # R[3][2] = C[2] ^ ( C[4] \| ~C[3])
289
290	and @C[1],@T[0]
291	xor @C[0],@T[0] # C[0] ^ ( C[2] & C[1])
292	mov @T[0],$A[3][0](%rsi) # R[3][0] = C[0] ^ ( C[2] & C[1])
293
294	or @C[0],@C[1]
295	xor @T[1],@C[1] # C[4] ^ ( C[1] \| C[0])
296	mov @C[1],$A[3][4](%rsi) # R[3][4] = C[4] ^ ( C[1] \| C[0])
297
298	and @T[1],@C[0]
299	xor @C[3],@C[0] # ~C[3] ^ ( C[0] & C[4])
300	mov @C[0],$A[3][3](%rsi) # R[3][3] = ~C[3] ^ ( C[0] & C[4])
301
302
303	xor $A[0][2](%rdi),@D[2]
304	xor $A[1][3](%rdi),@D[3]
305	rol \$$rhotates[0][2],@D[2]
306	xor $A[4][1](%rdi),@D[1]
307	rol \$$rhotates[1][3],@D[3]
308	xor $A[2][4](%rdi),@D[4]
309	rol \$$rhotates[4][1],@D[1]
310	xor $A[3][0](%rdi),@D[0]
311	xchg %rsi,%rdi
312	rol \$$rhotates[2][4],@D[4]
313	rol \$$rhotates[3][0],@D[0]
314	___
315	@C = @D[2..4,0,1];
316	$code.=<<___;
317	mov @C[0],@T[0]
318	and @C[1],@C[0]
319	not @C[1]
320	xor @C[4],@C[0] # C[4] ^ ( C[0] & C[1])
321	mov @C[0],$A[4][4](%rdi) # R[4][4] = C[4] ^ ( C[0] & C[1])
322
323	mov @C[2],@T[1]
324	and @C[1],@C[2]
325	xor @T[0],@C[2] # C[0] ^ ( C[2] & ~C[1])
326	mov @C[2],$A[4][0](%rdi) # R[4][0] = C[0] ^ ( C[2] & ~C[1])
327
328	or @C[4],@T[0]
329	xor @C[3],@T[0] # C[3] ^ ( C[0] \| C[4])
330	mov @T[0],$A[4][3](%rdi) # R[4][3] = C[3] ^ ( C[0] \| C[4])
331
332	and @C[3],@C[4]
333	xor @T[1],@C[4] # C[2] ^ ( C[4] & C[3])
334	mov @C[4],$A[4][2](%rdi) # R[4][2] = C[2] ^ ( C[4] & C[3])
335
336	or @T[1],@C[3]
337	xor @C[1],@C[3] # ~C[1] ^ ( C[2] \| C[3])
338	mov @C[3],$A[4][1](%rdi) # R[4][1] = ~C[1] ^ ( C[2] \| C[3])
339
340	mov @C[0],@C[1] # harmonize with the loop top
341	mov @T[0],@C[0]
342
343	test \$255,$iotas
344	jnz .Loop
345
346	lea -192($iotas),$iotas # rewind iotas
347	ret
348	.cfi_endproc
349	.size __KeccakF1600,.-__KeccakF1600
350
351	.type KeccakF1600,\@abi-omnipotent
352	.align 32
353	KeccakF1600:
354	.cfi_startproc
355	push %rbx
356	.cfi_push %rbx
357	push %rbp
358	.cfi_push %rbp
359	push %r12
360	.cfi_push %r12
361	push %r13
362	.cfi_push %r13
363	push %r14
364	.cfi_push %r14
365	push %r15
366	.cfi_push %r15
367
368	lea 100(%rdi),%rdi # size optimization
369	sub \$200,%rsp
370	.cfi_adjust_cfa_offset 200
371
372	notq $A[0][1](%rdi)
373	notq $A[0][2](%rdi)
374	notq $A[1][3](%rdi)
375	notq $A[2][2](%rdi)
376	notq $A[3][2](%rdi)
377	notq $A[4][0](%rdi)
378
379	lea iotas(%rip),$iotas
380	lea 100(%rsp),%rsi # size optimization
381
382	call __KeccakF1600
383
384	notq $A[0][1](%rdi)
385	notq $A[0][2](%rdi)
386	notq $A[1][3](%rdi)
387	notq $A[2][2](%rdi)
388	notq $A[3][2](%rdi)
389	notq $A[4][0](%rdi)
390	lea -100(%rdi),%rdi # preserve A[][]
391
392	add \$200,%rsp
393	.cfi_adjust_cfa_offset -200
394
395	pop %r15
396	.cfi_pop %r15
397	pop %r14
398	.cfi_pop %r14
399	pop %r13
400	.cfi_pop %r13
401	pop %r12
402	.cfi_pop %r12
403	pop %rbp
404	.cfi_pop %rbp
405	pop %rbx
406	.cfi_pop %rbx
407	ret
408	.cfi_endproc
409	.size KeccakF1600,.-KeccakF1600
410	___
411
412	{ my ($A_flat,$inp,$len,$bsz) = ("%rdi","%rsi","%rdx","%rcx");
413	($A_flat,$inp) = ("%r8","%r9");
414	$code.=<<___;
415	.globl SHA3_absorb
416	.type SHA3_absorb,\@function,4
417	.align 32
418	SHA3_absorb:
419	.cfi_startproc
420	push %rbx
421	.cfi_push %rbx
422	push %rbp
423	.cfi_push %rbp
424	push %r12
425	.cfi_push %r12
426	push %r13
427	.cfi_push %r13
428	push %r14
429	.cfi_push %r14
430	push %r15
431	.cfi_push %r15
432
433	lea 100(%rdi),%rdi # size optimization
434	sub \$232,%rsp
435	.cfi_adjust_cfa_offset 232
436
437	mov %rsi,$inp
438	lea 100(%rsp),%rsi # size optimization
439
440	notq $A[0][1](%rdi)
441	notq $A[0][2](%rdi)
442	notq $A[1][3](%rdi)
443	notq $A[2][2](%rdi)
444	notq $A[3][2](%rdi)
445	notq $A[4][0](%rdi)
446	lea iotas(%rip),$iotas
447
448	mov $bsz,216-100(%rsi) # save bsz
449
450	.Loop_absorb:
451	cmp $bsz,$len
452	jc .Ldone_absorb
453
454	shr \$3,$bsz
455	lea -100(%rdi),$A_flat
456
457	.Lblock_absorb:
458	mov ($inp),%rax
459	lea 8($inp),$inp
460	xor ($A_flat),%rax
461	lea 8($A_flat),$A_flat
462	sub \$8,$len
463	mov %rax,-8($A_flat)
464	sub \$1,$bsz
465	jnz .Lblock_absorb
466
467	mov $inp,200-100(%rsi) # save inp
468	mov $len,208-100(%rsi) # save len
469	call __KeccakF1600
470	mov 200-100(%rsi),$inp # pull inp
471	mov 208-100(%rsi),$len # pull len
472	mov 216-100(%rsi),$bsz # pull bsz
473	jmp .Loop_absorb
474
475	.align 32
476	.Ldone_absorb:
477	mov $len,%rax # return value
478
479	notq $A[0][1](%rdi)
480	notq $A[0][2](%rdi)
481	notq $A[1][3](%rdi)
482	notq $A[2][2](%rdi)
483	notq $A[3][2](%rdi)
484	notq $A[4][0](%rdi)
485
486	add \$232,%rsp
487	.cfi_adjust_cfa_offset -232
488
489	pop %r15
490	.cfi_pop %r15
491	pop %r14
492	.cfi_pop %r14
493	pop %r13
494	.cfi_pop %r13
495	pop %r12
496	.cfi_pop %r12
497	pop %rbp
498	.cfi_pop %rbp
499	pop %rbx
500	.cfi_pop %rbx
501	ret
502	.cfi_endproc
503	.size SHA3_absorb,.-SHA3_absorb
504	___
505	}
506	{ my ($A_flat,$out,$len,$bsz) = ("%rdi","%rsi","%rdx","%rcx");
507	($out,$len,$bsz) = ("%r12","%r13","%r14");
508
509	$code.=<<___;
510	.globl SHA3_squeeze
511	.type SHA3_squeeze,\@function,4
512	.align 32
513	SHA3_squeeze:
514	.cfi_startproc
515	push %r12
516	.cfi_push %r12
517	push %r13
518	.cfi_push %r13
519	push %r14
520	.cfi_push %r14
521
522	shr \$3,%rcx
523	mov $A_flat,%r8
524	mov %rsi,$out
525	mov %rdx,$len
526	mov %rcx,$bsz
527	jmp .Loop_squeeze
528
529	.align 32
530	.Loop_squeeze:
531	cmp \$8,$len
532	jb .Ltail_squeeze
533
534	mov (%r8),%rax
535	lea 8(%r8),%r8
536	mov %rax,($out)
537	lea 8($out),$out
538	sub \$8,$len # len -= 8
539	jz .Ldone_squeeze
540
541	sub \$1,%rcx # bsz--
542	jnz .Loop_squeeze
543
544	call KeccakF1600
545	mov $A_flat,%r8
546	mov $bsz,%rcx
547	jmp .Loop_squeeze
548
549	.Ltail_squeeze:
550	mov %r8, %rsi
551	mov $out,%rdi
552	mov $len,%rcx
553	.byte 0xf3,0xa4 # rep movsb
554
555	.Ldone_squeeze:
556	pop %r14
557	.cfi_pop %r14
558	pop %r13
559	.cfi_pop %r13
560	pop %r12
561	.cfi_pop %r13
562	ret
563	.cfi_endproc
564	.size SHA3_squeeze,.-SHA3_squeeze
565	___
566	}
567	$code.=<<___;
568	.align 256
569	.quad 0,0,0,0,0,0,0,0
570	.type iotas,\@object
571	iotas:
572	.quad 0x0000000000000001
573	.quad 0x0000000000008082
574	.quad 0x800000000000808a
575	.quad 0x8000000080008000
576	.quad 0x000000000000808b
577	.quad 0x0000000080000001
578	.quad 0x8000000080008081
579	.quad 0x8000000000008009
580	.quad 0x000000000000008a
581	.quad 0x0000000000000088
582	.quad 0x0000000080008009
583	.quad 0x000000008000000a
584	.quad 0x000000008000808b
585	.quad 0x800000000000008b
586	.quad 0x8000000000008089
587	.quad 0x8000000000008003
588	.quad 0x8000000000008002
589	.quad 0x8000000000000080
590	.quad 0x000000000000800a
591	.quad 0x800000008000000a
592	.quad 0x8000000080008081
593	.quad 0x8000000000008080
594	.quad 0x0000000080000001
595	.quad 0x8000000080008008
596	.size iotas,.-iotas
597	.asciz "Keccak-1600 absorb and squeeze for x86_64, CRYPTOGAMS by <appro\@openssl.org>"
598	___
599
600	foreach (split("\n",$code)) {
601	# Below replacement results in 11.2 on Sandy Bridge, 9.4 on
602	# Haswell, but it hurts other processors by up to 2-3-4x...
603	#s/rol\s+(\$[0-9]+),(%[a-z][a-z0-9]+)/shld\t$1,$2,$2/;
604	# Below replacement results in 9.3 on Haswell [as well as
605	# on Ryzen, i.e. it hurts Ryzen]...
606	#s/rol\s+\$([0-9]+),(%[a-z][a-z0-9]+)/rorx\t\$64-$1,$2,$2/;
607
608	print $_, "\n";
609	}
610
611	close STDOUT or die "error closing STDOUT: $!";

注意: 瀏覽 TracBrowser 來幫助您使用儲存庫瀏覽器

source: vbox/trunk/src/libs/openssl-3.1.0/crypto/sha/asm/keccak1600-x86_64.pl@ 99507

以其他格式下載: