sha1-ia64.pl@ 94081

最後變更在這個檔案從94081是 91772,由 vboxsync 提交於 3 年前
openssl-1.1.1l: Applied and adjusted our OpenSSL changes to 1.1.1l. bugref:10126
檔案大小: 8.9 KB

行
1	#! /usr/bin/env perl
2	# Copyright 2004-2016 The OpenSSL Project Authors. All Rights Reserved.
3	#
4	# Licensed under the OpenSSL license (the "License"). You may not use
5	# this file except in compliance with the License. You can obtain a copy
6	# in the file LICENSE in the source distribution or at
7	# https://www.openssl.org/source/license.html
8
9	#
10	# ====================================================================
11	# Written by Andy Polyakov <[email protected]> for the OpenSSL
12	# project. The module is, however, dual licensed under OpenSSL and
13	# CRYPTOGAMS licenses depending on where you obtain it. For further
14	# details see http://www.openssl.org/~appro/cryptogams/.
15	# ====================================================================
16	#
17	# Eternal question is what's wrong with compiler generated code? The
18	# trick is that it's possible to reduce the number of shifts required
19	# to perform rotations by maintaining copy of 32-bit value in upper
20	# bits of 64-bit register. Just follow mux2 and shrp instructions...
21	# Performance under big-endian OS such as HP-UX is 179MBps*1GHz, which
22	# is >50% better than HP C and >2x better than gcc.
23
24	$output = pop;
25
26	$code=<<___;
27	.ident \"sha1-ia64.s, version 1.3\"
28	.ident \"IA-64 ISA artwork by Andy Polyakov <appro\@fy.chalmers.se>\"
29	.explicit
30
31	___
32
33
34	if ($^O eq "hpux") {
35	$ADDP="addp4";
36	for (@ARGV) { $ADDP="add" if (/[\+DD\|\-mlp]64/); }
37	} else { $ADDP="add"; }
38
39	#$human=1;
40	if ($human) { # useful for visual code auditing...
41	($A,$B,$C,$D,$E) = ("A","B","C","D","E");
42	($h0,$h1,$h2,$h3,$h4) = ("h0","h1","h2","h3","h4");
43	($K_00_19, $K_20_39, $K_40_59, $K_60_79) =
44	( "K_00_19","K_20_39","K_40_59","K_60_79" );
45	@X= ( "X0", "X1", "X2", "X3", "X4", "X5", "X6", "X7",
46	"X8", "X9","X10","X11","X12","X13","X14","X15" );
47	}
48	else {
49	($A,$B,$C,$D,$E) = ("loc0","loc1","loc2","loc3","loc4");
50	($h0,$h1,$h2,$h3,$h4) = ("loc5","loc6","loc7","loc8","loc9");
51	($K_00_19, $K_20_39, $K_40_59, $K_60_79) =
52	( "r14", "r15", "loc10", "loc11" );
53	@X= ( "r16", "r17", "r18", "r19", "r20", "r21", "r22", "r23",
54	"r24", "r25", "r26", "r27", "r28", "r29", "r30", "r31" );
55	}
56
57	sub BODY_00_15 {
58	local *code=shift;
59	my ($i,$a,$b,$c,$d,$e)=@_;
60	my $j=$i+1;
61	my $Xn=@X[$j%16];
62
63	$code.=<<___ if ($i==0);
64	{ .mmi; ld1 $X[$i]=[inp],2 // MSB
65	ld1 tmp2=[tmp3],2 };;
66	{ .mmi; ld1 tmp0=[inp],2
67	ld1 tmp4=[tmp3],2 // LSB
68	dep $X[$i]=$X[$i],tmp2,8,8 };;
69	___
70	if ($i<15) {
71	$code.=<<___;
72	{ .mmi; ld1 $Xn=[inp],2 // forward Xload
73	nop.m 0x0
74	dep tmp1=tmp0,tmp4,8,8 };;
75	{ .mmi; ld1 tmp2=[tmp3],2 // forward Xload
76	and tmp4=$c,$b
77	dep $X[$i]=$X[$i],tmp1,16,16} //;;
78	{ .mmi; add $e=$e,$K_00_19 // e+=K_00_19
79	andcm tmp1=$d,$b
80	dep.z tmp5=$a,5,27 };; // a<<5
81	{ .mmi; add $e=$e,$X[$i] // e+=Xload
82	or tmp4=tmp4,tmp1 // F_00_19(b,c,d)=(b&c)\|(~b&d)
83	extr.u tmp1=$a,27,5 };; // a>>27
84	{ .mmi; ld1 tmp0=[inp],2 // forward Xload
85	add $e=$e,tmp4 // e+=F_00_19(b,c,d)
86	shrp $b=tmp6,tmp6,2 } // b=ROTATE(b,30)
87	{ .mmi; ld1 tmp4=[tmp3],2 // forward Xload
88	or tmp5=tmp1,tmp5 // ROTATE(a,5)
89	mux2 tmp6=$a,0x44 };; // see b in next iteration
90	{ .mii; add $e=$e,tmp5 // e+=ROTATE(a,5)
91	dep $Xn=$Xn,tmp2,8,8 // forward Xload
92	mux2 $X[$i]=$X[$i],0x44 } //;;
93
94	___
95	}
96	else {
97	$code.=<<___;
98	{ .mii; and tmp3=$c,$b
99	dep tmp1=tmp0,tmp4,8,8;;
100	dep $X[$i]=$X[$i],tmp1,16,16} //;;
101	{ .mmi; add $e=$e,$K_00_19 // e+=K_00_19
102	andcm tmp1=$d,$b
103	dep.z tmp5=$a,5,27 };; // a<<5
104	{ .mmi; add $e=$e,$X[$i] // e+=Xupdate
105	or tmp4=tmp3,tmp1 // F_00_19(b,c,d)=(b&c)\|(~b&d)
106	extr.u tmp1=$a,27,5 } // a>>27
107	{ .mmi; xor $Xn=$Xn,$X[($j+2)%16] // forward Xupdate
108	xor tmp3=$X[($j+8)%16],$X[($j+13)%16] // forward Xupdate
109	nop.i 0 };;
110	{ .mmi; add $e=$e,tmp4 // e+=F_00_19(b,c,d)
111	xor $Xn=$Xn,tmp3 // forward Xupdate
112	shrp $b=tmp6,tmp6,2 } // b=ROTATE(b,30)
113	{ .mmi; or tmp1=tmp1,tmp5 // ROTATE(a,5)
114	mux2 tmp6=$a,0x44 };; // see b in next iteration
115	{ .mii; add $e=$e,tmp1 // e+=ROTATE(a,5)
116	shrp $Xn=$Xn,$Xn,31 // ROTATE(x[0]^x[2]^x[8]^x[13],1)
117	mux2 $X[$i]=$X[$i],0x44 };;
118
119	___
120	}
121	}
122
123	sub BODY_16_19 {
124	local *code=shift;
125	my ($i,$a,$b,$c,$d,$e)=@_;
126	my $j=$i+1;
127	my $Xn=@X[$j%16];
128
129	$code.=<<___;
130	{ .mib; add $e=$e,$K_00_19 // e+=K_00_19
131	dep.z tmp5=$a,5,27 } // a<<5
132	{ .mib; andcm tmp1=$d,$b
133	and tmp0=$c,$b };;
134	{ .mmi; add $e=$e,$X[$i%16] // e+=Xupdate
135	or tmp0=tmp0,tmp1 // F_00_19(b,c,d)=(b&c)\|(~b&d)
136	extr.u tmp1=$a,27,5 } // a>>27
137	{ .mmi; xor $Xn=$Xn,$X[($j+2)%16] // forward Xupdate
138	xor tmp3=$X[($j+8)%16],$X[($j+13)%16] // forward Xupdate
139	nop.i 0 };;
140	{ .mmi; add $e=$e,tmp0 // f+=F_00_19(b,c,d)
141	xor $Xn=$Xn,tmp3 // forward Xupdate
142	shrp $b=tmp6,tmp6,2 } // b=ROTATE(b,30)
143	{ .mmi; or tmp1=tmp1,tmp5 // ROTATE(a,5)
144	mux2 tmp6=$a,0x44 };; // see b in next iteration
145	{ .mii; add $e=$e,tmp1 // e+=ROTATE(a,5)
146	shrp $Xn=$Xn,$Xn,31 // ROTATE(x[0]^x[2]^x[8]^x[13],1)
147	nop.i 0 };;
148
149	___
150	}
151
152	sub BODY_20_39 {
153	local *code=shift;
154	my ($i,$a,$b,$c,$d,$e,$Konst)=@_;
155	$Konst = $K_20_39 if (!defined($Konst));
156	my $j=$i+1;
157	my $Xn=@X[$j%16];
158
159	if ($i<79) {
160	$code.=<<___;
161	{ .mib; add $e=$e,$Konst // e+=K_XX_XX
162	dep.z tmp5=$a,5,27 } // a<<5
163	{ .mib; xor tmp0=$c,$b
164	xor $Xn=$Xn,$X[($j+2)%16] };; // forward Xupdate
165	{ .mib; add $e=$e,$X[$i%16] // e+=Xupdate
166	extr.u tmp1=$a,27,5 } // a>>27
167	{ .mib; xor tmp0=tmp0,$d // F_20_39(b,c,d)=b^c^d
168	xor $Xn=$Xn,$X[($j+8)%16] };; // forward Xupdate
169	{ .mmi; add $e=$e,tmp0 // e+=F_20_39(b,c,d)
170	xor $Xn=$Xn,$X[($j+13)%16] // forward Xupdate
171	shrp $b=tmp6,tmp6,2 } // b=ROTATE(b,30)
172	{ .mmi; or tmp1=tmp1,tmp5 // ROTATE(a,5)
173	mux2 tmp6=$a,0x44 };; // see b in next iteration
174	{ .mii; add $e=$e,tmp1 // e+=ROTATE(a,5)
175	shrp $Xn=$Xn,$Xn,31 // ROTATE(x[0]^x[2]^x[8]^x[13],1)
176	nop.i 0 };;
177
178	___
179	}
180	else {
181	$code.=<<___;
182	{ .mib; add $e=$e,$Konst // e+=K_60_79
183	dep.z tmp5=$a,5,27 } // a<<5
184	{ .mib; xor tmp0=$c,$b
185	add $h1=$h1,$a };; // wrap up
186	{ .mib; add $e=$e,$X[$i%16] // e+=Xupdate
187	extr.u tmp1=$a,27,5 } // a>>27
188	{ .mib; xor tmp0=tmp0,$d // F_20_39(b,c,d)=b^c^d
189	add $h3=$h3,$c };; // wrap up
190	{ .mmi; add $e=$e,tmp0 // e+=F_20_39(b,c,d)
191	or tmp1=tmp1,tmp5 // ROTATE(a,5)
192	shrp $b=tmp6,tmp6,2 };; // b=ROTATE(b,30) ;;?
193	{ .mmi; add $e=$e,tmp1 // e+=ROTATE(a,5)
194	add tmp3=1,inp // used in unaligned codepath
195	add $h4=$h4,$d };; // wrap up
196
197	___
198	}
199	}
200
201	sub BODY_40_59 {
202	local *code=shift;
203	my ($i,$a,$b,$c,$d,$e)=@_;
204	my $j=$i+1;
205	my $Xn=@X[$j%16];
206
207	$code.=<<___;
208	{ .mib; add $e=$e,$K_40_59 // e+=K_40_59
209	dep.z tmp5=$a,5,27 } // a<<5
210	{ .mib; and tmp1=$c,$d
211	xor tmp0=$c,$d };;
212	{ .mmi; add $e=$e,$X[$i%16] // e+=Xupdate
213	add tmp5=tmp5,tmp1 // a<<5+(c&d)
214	extr.u tmp1=$a,27,5 } // a>>27
215	{ .mmi; and tmp0=tmp0,$b
216	xor $Xn=$Xn,$X[($j+2)%16] // forward Xupdate
217	xor tmp3=$X[($j+8)%16],$X[($j+13)%16] };; // forward Xupdate
218	{ .mmi; add $e=$e,tmp0 // e+=b&(c^d)
219	add tmp5=tmp5,tmp1 // ROTATE(a,5)+(c&d)
220	shrp $b=tmp6,tmp6,2 } // b=ROTATE(b,30)
221	{ .mmi; xor $Xn=$Xn,tmp3
222	mux2 tmp6=$a,0x44 };; // see b in next iteration
223	{ .mii; add $e=$e,tmp5 // e+=ROTATE(a,5)+(c&d)
224	shrp $Xn=$Xn,$Xn,31 // ROTATE(x[0]^x[2]^x[8]^x[13],1)
225	nop.i 0x0 };;
226
227	___
228	}
229	sub BODY_60_79 { &BODY_20_39(@_,$K_60_79); }
230
231	$code.=<<___;
232	.text
233
234	tmp0=r8;
235	tmp1=r9;
236	tmp2=r10;
237	tmp3=r11;
238	ctx=r32; // in0
239	inp=r33; // in1
240
241	// void sha1_block_data_order(SHA_CTX c,const void p,size_t num);
242	.global sha1_block_data_order#
243	.proc sha1_block_data_order#
244	.align 32
245	sha1_block_data_order:
246	.prologue
247	{ .mmi; alloc tmp1=ar.pfs,3,14,0,0
248	$ADDP tmp0=4,ctx
249	.save ar.lc,r3
250	mov r3=ar.lc }
251	{ .mmi; $ADDP ctx=0,ctx
252	$ADDP inp=0,inp
253	mov r2=pr };;
254	tmp4=in2;
255	tmp5=loc12;
256	tmp6=loc13;
257	.body
258	{ .mlx; ld4 $h0=[ctx],8
259	movl $K_00_19=0x5a827999 }
260	{ .mlx; ld4 $h1=[tmp0],8
261	movl $K_20_39=0x6ed9eba1 };;
262	{ .mlx; ld4 $h2=[ctx],8
263	movl $K_40_59=0x8f1bbcdc }
264	{ .mlx; ld4 $h3=[tmp0]
265	movl $K_60_79=0xca62c1d6 };;
266	{ .mmi; ld4 $h4=[ctx],-16
267	add in2=-1,in2 // adjust num for ar.lc
268	mov ar.ec=1 };;
269	{ .mmi; nop.m 0
270	add tmp3=1,inp
271	mov ar.lc=in2 };; // brp.loop.imp: too far
272
273	.Ldtop:
274	{ .mmi; mov $A=$h0
275	mov $B=$h1
276	mux2 tmp6=$h1,0x44 }
277	{ .mmi; mov $C=$h2
278	mov $D=$h3
279	mov $E=$h4 };;
280
281	___
282
283	{ my $i;
284	my @V=($A,$B,$C,$D,$E);
285
286	for($i=0;$i<16;$i++) { &BODY_00_15(\$code,$i,@V); unshift(@V,pop(@V)); }
287	for(;$i<20;$i++) { &BODY_16_19(\$code,$i,@V); unshift(@V,pop(@V)); }
288	for(;$i<40;$i++) { &BODY_20_39(\$code,$i,@V); unshift(@V,pop(@V)); }
289	for(;$i<60;$i++) { &BODY_40_59(\$code,$i,@V); unshift(@V,pop(@V)); }
290	for(;$i<80;$i++) { &BODY_60_79(\$code,$i,@V); unshift(@V,pop(@V)); }
291
292	(($V[0] eq $A) and ($V[4] eq $E)) or die; # double-check
293	}
294
295	$code.=<<___;
296	{ .mmb; add $h0=$h0,$A
297	add $h2=$h2,$C
298	br.ctop.dptk.many .Ldtop };;
299	.Ldend:
300	{ .mmi; add tmp0=4,ctx
301	mov ar.lc=r3 };;
302	{ .mmi; st4 [ctx]=$h0,8
303	st4 [tmp0]=$h1,8 };;
304	{ .mmi; st4 [ctx]=$h2,8
305	st4 [tmp0]=$h3 };;
306	{ .mib; st4 [ctx]=$h4,-16
307	mov pr=r2,0x1ffff
308	br.ret.sptk.many b0 };;
309	.endp sha1_block_data_order#
310	stringz "SHA1 block transform for IA64, CRYPTOGAMS by <appro\@openssl.org>"
311	___
312
313	open STDOUT,">$output" if $output;
314	print $code;

注意: 瀏覽 TracBrowser 來幫助您使用儲存庫瀏覽器

source: vbox/trunk/src/libs/openssl-3.0.1/crypto/sha/asm/sha1-ia64.pl@ 94081

以其他格式下載: