1 | #! /usr/bin/env perl
|
---|
2 | # Copyright 2009-2020 The OpenSSL Project Authors. All Rights Reserved.
|
---|
3 | #
|
---|
4 | # Licensed under the Apache License 2.0 (the "License"). You may not use
|
---|
5 | # this file except in compliance with the License. You can obtain a copy
|
---|
6 | # in the file LICENSE in the source distribution or at
|
---|
7 | # https://www.openssl.org/source/license.html
|
---|
8 |
|
---|
9 |
|
---|
10 | # ====================================================================
|
---|
11 | # Written by Andy Polyakov <[email protected]> for the OpenSSL
|
---|
12 | # project. The module is, however, dual licensed under OpenSSL and
|
---|
13 | # CRYPTOGAMS licenses depending on where you obtain it. For further
|
---|
14 | # details see http://www.openssl.org/~appro/cryptogams/.
|
---|
15 | # ====================================================================
|
---|
16 |
|
---|
17 | # RC4 for PA-RISC.
|
---|
18 |
|
---|
19 | # June 2009.
|
---|
20 | #
|
---|
21 | # Performance is 33% better than gcc 3.2 generated code on PA-7100LC.
|
---|
22 | # For reference, [4x] unrolled loop is >40% faster than folded one.
|
---|
23 | # It's possible to unroll loop 8 times on PA-RISC 2.0, but improvement
|
---|
24 | # is believed to be not sufficient to justify the effort...
|
---|
25 | #
|
---|
26 | # Special thanks to polarhome.com for providing HP-UX account.
|
---|
27 |
|
---|
28 | $0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
|
---|
29 |
|
---|
30 | # $output is the last argument if it looks like a file (it has an extension)
|
---|
31 | # $flavour is the first argument if it doesn't look like a file
|
---|
32 | $output = $#ARGV >= 0 && $ARGV[$#ARGV] =~ m|\.\w+$| ? pop : undef;
|
---|
33 | $flavour = $#ARGV >= 0 && $ARGV[0] !~ m|\.| ? shift : undef;
|
---|
34 |
|
---|
35 | $output and open STDOUT,">$output";
|
---|
36 |
|
---|
37 | if ($flavour =~ /64/) {
|
---|
38 | $LEVEL ="2.0W";
|
---|
39 | $SIZE_T =8;
|
---|
40 | $FRAME_MARKER =80;
|
---|
41 | $SAVED_RP =16;
|
---|
42 | $PUSH ="std";
|
---|
43 | $PUSHMA ="std,ma";
|
---|
44 | $POP ="ldd";
|
---|
45 | $POPMB ="ldd,mb";
|
---|
46 | } else {
|
---|
47 | $LEVEL ="1.0";
|
---|
48 | $SIZE_T =4;
|
---|
49 | $FRAME_MARKER =48;
|
---|
50 | $SAVED_RP =20;
|
---|
51 | $PUSH ="stw";
|
---|
52 | $PUSHMA ="stwm";
|
---|
53 | $POP ="ldw";
|
---|
54 | $POPMB ="ldwm";
|
---|
55 | }
|
---|
56 |
|
---|
57 | $FRAME=4*$SIZE_T+$FRAME_MARKER; # 4 saved regs + frame marker
|
---|
58 | # [+ argument transfer]
|
---|
59 | $SZ=1; # defaults to RC4_CHAR
|
---|
60 | if (open CONF,"<${dir}../../opensslconf.h") {
|
---|
61 | while(<CONF>) {
|
---|
62 | if (m/#\s*define\s+RC4_INT\s+(.*)/) {
|
---|
63 | $SZ = ($1=~/char$/) ? 1 : 4;
|
---|
64 | last;
|
---|
65 | }
|
---|
66 | }
|
---|
67 | close CONF;
|
---|
68 | }
|
---|
69 |
|
---|
70 | if ($SZ==1) { # RC4_CHAR
|
---|
71 | $LD="ldb";
|
---|
72 | $LDX="ldbx";
|
---|
73 | $MKX="addl";
|
---|
74 | $ST="stb";
|
---|
75 | } else { # RC4_INT (~5% faster than RC4_CHAR on PA-7100LC)
|
---|
76 | $LD="ldw";
|
---|
77 | $LDX="ldwx,s";
|
---|
78 | $MKX="sh2addl";
|
---|
79 | $ST="stw";
|
---|
80 | }
|
---|
81 |
|
---|
82 | $key="%r26";
|
---|
83 | $len="%r25";
|
---|
84 | $inp="%r24";
|
---|
85 | $out="%r23";
|
---|
86 |
|
---|
87 | @XX=("%r19","%r20");
|
---|
88 | @TX=("%r21","%r22");
|
---|
89 | $YY="%r28";
|
---|
90 | $TY="%r29";
|
---|
91 |
|
---|
92 | $acc="%r1";
|
---|
93 | $ix="%r2";
|
---|
94 | $iy="%r3";
|
---|
95 | $dat0="%r4";
|
---|
96 | $dat1="%r5";
|
---|
97 | $rem="%r6";
|
---|
98 | $mask="%r31";
|
---|
99 |
|
---|
100 | sub unrolledloopbody {
|
---|
101 | for ($i=0;$i<4;$i++) {
|
---|
102 | $code.=<<___;
|
---|
103 | ldo 1($XX[0]),$XX[1]
|
---|
104 | `sprintf("$LDX %$TY(%$key),%$dat1") if ($i>0)`
|
---|
105 | and $mask,$XX[1],$XX[1]
|
---|
106 | $LDX $YY($key),$TY
|
---|
107 | $MKX $YY,$key,$ix
|
---|
108 | $LDX $XX[1]($key),$TX[1]
|
---|
109 | $MKX $XX[0],$key,$iy
|
---|
110 | $ST $TX[0],0($ix)
|
---|
111 | comclr,<> $XX[1],$YY,%r0 ; conditional
|
---|
112 | copy $TX[0],$TX[1] ; move
|
---|
113 | `sprintf("%sdep %$dat1,%d,8,%$acc",$i==1?"z":"",8*($i-1)+7) if ($i>0)`
|
---|
114 | $ST $TY,0($iy)
|
---|
115 | addl $TX[0],$TY,$TY
|
---|
116 | addl $TX[1],$YY,$YY
|
---|
117 | and $mask,$TY,$TY
|
---|
118 | and $mask,$YY,$YY
|
---|
119 | ___
|
---|
120 | push(@TX,shift(@TX)); push(@XX,shift(@XX)); # "rotate" registers
|
---|
121 | } }
|
---|
122 |
|
---|
123 | sub foldedloop {
|
---|
124 | my ($label,$count)=@_;
|
---|
125 | $code.=<<___;
|
---|
126 | $label
|
---|
127 | $MKX $YY,$key,$iy
|
---|
128 | $LDX $YY($key),$TY
|
---|
129 | $MKX $XX[0],$key,$ix
|
---|
130 | $ST $TX[0],0($iy)
|
---|
131 | ldo 1($XX[0]),$XX[0]
|
---|
132 | $ST $TY,0($ix)
|
---|
133 | addl $TX[0],$TY,$TY
|
---|
134 | ldbx $inp($out),$dat1
|
---|
135 | and $mask,$TY,$TY
|
---|
136 | and $mask,$XX[0],$XX[0]
|
---|
137 | $LDX $TY($key),$acc
|
---|
138 | $LDX $XX[0]($key),$TX[0]
|
---|
139 | ldo 1($out),$out
|
---|
140 | xor $dat1,$acc,$acc
|
---|
141 | addl $TX[0],$YY,$YY
|
---|
142 | stb $acc,-1($out)
|
---|
143 | addib,<> -1,$count,$label ; $count is always small
|
---|
144 | and $mask,$YY,$YY
|
---|
145 | ___
|
---|
146 | }
|
---|
147 |
|
---|
148 | $code=<<___;
|
---|
149 | .LEVEL $LEVEL
|
---|
150 | .SPACE \$TEXT\$
|
---|
151 | .SUBSPA \$CODE\$,QUAD=0,ALIGN=8,ACCESS=0x2C,CODE_ONLY
|
---|
152 |
|
---|
153 | .EXPORT RC4,ENTRY,ARGW0=GR,ARGW1=GR,ARGW2=GR,ARGW3=GR
|
---|
154 | RC4
|
---|
155 | .PROC
|
---|
156 | .CALLINFO FRAME=`$FRAME-4*$SIZE_T`,NO_CALLS,SAVE_RP,ENTRY_GR=6
|
---|
157 | .ENTRY
|
---|
158 | $PUSH %r2,-$SAVED_RP(%sp) ; standard prologue
|
---|
159 | $PUSHMA %r3,$FRAME(%sp)
|
---|
160 | $PUSH %r4,`-$FRAME+1*$SIZE_T`(%sp)
|
---|
161 | $PUSH %r5,`-$FRAME+2*$SIZE_T`(%sp)
|
---|
162 | $PUSH %r6,`-$FRAME+3*$SIZE_T`(%sp)
|
---|
163 |
|
---|
164 | cmpib,*= 0,$len,L\$abort
|
---|
165 | sub $inp,$out,$inp ; distance between $inp and $out
|
---|
166 |
|
---|
167 | $LD `0*$SZ`($key),$XX[0]
|
---|
168 | $LD `1*$SZ`($key),$YY
|
---|
169 | ldo `2*$SZ`($key),$key
|
---|
170 |
|
---|
171 | ldi 0xff,$mask
|
---|
172 | ldi 3,$dat0
|
---|
173 |
|
---|
174 | ldo 1($XX[0]),$XX[0] ; warm up loop
|
---|
175 | and $mask,$XX[0],$XX[0]
|
---|
176 | $LDX $XX[0]($key),$TX[0]
|
---|
177 | addl $TX[0],$YY,$YY
|
---|
178 | cmpib,*>>= 6,$len,L\$oop1 ; is $len large enough to bother?
|
---|
179 | and $mask,$YY,$YY
|
---|
180 |
|
---|
181 | and,<> $out,$dat0,$rem ; is $out aligned?
|
---|
182 | b L\$alignedout
|
---|
183 | subi 4,$rem,$rem
|
---|
184 | sub $len,$rem,$len
|
---|
185 | ___
|
---|
186 | &foldedloop("L\$alignout",$rem); # process till $out is aligned
|
---|
187 |
|
---|
188 | $code.=<<___;
|
---|
189 | L\$alignedout ; $len is at least 4 here
|
---|
190 | and,<> $inp,$dat0,$acc ; is $inp aligned?
|
---|
191 | b L\$oop4
|
---|
192 | sub $inp,$acc,$rem ; align $inp
|
---|
193 |
|
---|
194 | sh3addl $acc,%r0,$acc
|
---|
195 | subi 32,$acc,$acc
|
---|
196 | mtctl $acc,%cr11 ; load %sar with vshd align factor
|
---|
197 | ldwx $rem($out),$dat0
|
---|
198 | ldo 4($rem),$rem
|
---|
199 | L\$oop4misalignedinp
|
---|
200 | ___
|
---|
201 | &unrolledloopbody();
|
---|
202 | $code.=<<___;
|
---|
203 | $LDX $TY($key),$ix
|
---|
204 | ldwx $rem($out),$dat1
|
---|
205 | ldo -4($len),$len
|
---|
206 | or $ix,$acc,$acc ; last piece, no need to dep
|
---|
207 | vshd $dat0,$dat1,$iy ; align data
|
---|
208 | copy $dat1,$dat0
|
---|
209 | xor $iy,$acc,$acc
|
---|
210 | stw $acc,0($out)
|
---|
211 | cmpib,*<< 3,$len,L\$oop4misalignedinp
|
---|
212 | ldo 4($out),$out
|
---|
213 | cmpib,*= 0,$len,L\$done
|
---|
214 | nop
|
---|
215 | b L\$oop1
|
---|
216 | nop
|
---|
217 |
|
---|
218 | .ALIGN 8
|
---|
219 | L\$oop4
|
---|
220 | ___
|
---|
221 | &unrolledloopbody();
|
---|
222 | $code.=<<___;
|
---|
223 | $LDX $TY($key),$ix
|
---|
224 | ldwx $inp($out),$dat0
|
---|
225 | ldo -4($len),$len
|
---|
226 | or $ix,$acc,$acc ; last piece, no need to dep
|
---|
227 | xor $dat0,$acc,$acc
|
---|
228 | stw $acc,0($out)
|
---|
229 | cmpib,*<< 3,$len,L\$oop4
|
---|
230 | ldo 4($out),$out
|
---|
231 | cmpib,*= 0,$len,L\$done
|
---|
232 | nop
|
---|
233 | ___
|
---|
234 | &foldedloop("L\$oop1",$len);
|
---|
235 | $code.=<<___;
|
---|
236 | L\$done
|
---|
237 | $POP `-$FRAME-$SAVED_RP`(%sp),%r2
|
---|
238 | ldo -1($XX[0]),$XX[0] ; chill out loop
|
---|
239 | sub $YY,$TX[0],$YY
|
---|
240 | and $mask,$XX[0],$XX[0]
|
---|
241 | and $mask,$YY,$YY
|
---|
242 | $ST $XX[0],`-2*$SZ`($key)
|
---|
243 | $ST $YY,`-1*$SZ`($key)
|
---|
244 | $POP `-$FRAME+1*$SIZE_T`(%sp),%r4
|
---|
245 | $POP `-$FRAME+2*$SIZE_T`(%sp),%r5
|
---|
246 | $POP `-$FRAME+3*$SIZE_T`(%sp),%r6
|
---|
247 | L\$abort
|
---|
248 | bv (%r2)
|
---|
249 | .EXIT
|
---|
250 | $POPMB -$FRAME(%sp),%r3
|
---|
251 | .PROCEND
|
---|
252 | ___
|
---|
253 |
|
---|
254 | $code.=<<___;
|
---|
255 |
|
---|
256 | .EXPORT RC4_set_key,ENTRY,ARGW0=GR,ARGW1=GR,ARGW2=GR
|
---|
257 | .ALIGN 8
|
---|
258 | RC4_set_key
|
---|
259 | .PROC
|
---|
260 | .CALLINFO NO_CALLS
|
---|
261 | .ENTRY
|
---|
262 | $ST %r0,`0*$SZ`($key)
|
---|
263 | $ST %r0,`1*$SZ`($key)
|
---|
264 | ldo `2*$SZ`($key),$key
|
---|
265 | copy %r0,@XX[0]
|
---|
266 | L\$1st
|
---|
267 | $ST @XX[0],0($key)
|
---|
268 | ldo 1(@XX[0]),@XX[0]
|
---|
269 | bb,>= @XX[0],`31-8`,L\$1st ; @XX[0]<256
|
---|
270 | ldo $SZ($key),$key
|
---|
271 |
|
---|
272 | ldo `-256*$SZ`($key),$key ; rewind $key
|
---|
273 | addl $len,$inp,$inp ; $inp to point at the end
|
---|
274 | sub %r0,$len,%r23 ; inverse index
|
---|
275 | copy %r0,@XX[0]
|
---|
276 | copy %r0,@XX[1]
|
---|
277 | ldi 0xff,$mask
|
---|
278 |
|
---|
279 | L\$2nd
|
---|
280 | $LDX @XX[0]($key),@TX[0]
|
---|
281 | ldbx %r23($inp),@TX[1]
|
---|
282 | addi,nuv 1,%r23,%r23 ; increment and conditional
|
---|
283 | sub %r0,$len,%r23 ; inverse index
|
---|
284 | addl @TX[0],@XX[1],@XX[1]
|
---|
285 | addl @TX[1],@XX[1],@XX[1]
|
---|
286 | and $mask,@XX[1],@XX[1]
|
---|
287 | $MKX @XX[0],$key,$TY
|
---|
288 | $LDX @XX[1]($key),@TX[1]
|
---|
289 | $MKX @XX[1],$key,$YY
|
---|
290 | ldo 1(@XX[0]),@XX[0]
|
---|
291 | $ST @TX[0],0($YY)
|
---|
292 | bb,>= @XX[0],`31-8`,L\$2nd ; @XX[0]<256
|
---|
293 | $ST @TX[1],0($TY)
|
---|
294 |
|
---|
295 | bv,n (%r2)
|
---|
296 | .EXIT
|
---|
297 | nop
|
---|
298 | .PROCEND
|
---|
299 |
|
---|
300 | .EXPORT RC4_options,ENTRY
|
---|
301 | .ALIGN 8
|
---|
302 | RC4_options
|
---|
303 | .PROC
|
---|
304 | .CALLINFO NO_CALLS
|
---|
305 | .ENTRY
|
---|
306 | blr %r0,%r28
|
---|
307 | ldi 3,%r1
|
---|
308 | L\$pic
|
---|
309 | andcm %r28,%r1,%r28
|
---|
310 | bv (%r2)
|
---|
311 | .EXIT
|
---|
312 | ldo L\$opts-L\$pic(%r28),%r28
|
---|
313 | .PROCEND
|
---|
314 | .ALIGN 8
|
---|
315 | L\$opts
|
---|
316 | .STRINGZ "rc4(4x,`$SZ==1?"char":"int"`)"
|
---|
317 | .STRINGZ "RC4 for PA-RISC, CRYPTOGAMS by <appro\@openssl.org>"
|
---|
318 | ___
|
---|
319 |
|
---|
320 | if (`$ENV{CC} -Wa,-v -c -o /dev/null -x assembler /dev/null 2>&1`
|
---|
321 | =~ /GNU assembler/) {
|
---|
322 | $gnuas = 1;
|
---|
323 | }
|
---|
324 |
|
---|
325 | foreach(split("\n",$code)) {
|
---|
326 | s/\`([^\`]*)\`/eval $1/ge;
|
---|
327 |
|
---|
328 | s/(\.LEVEL\s+2\.0)W/$1w/ if ($gnuas && $SIZE_T==8);
|
---|
329 | s/\.SPACE\s+\$TEXT\$/.text/ if ($gnuas && $SIZE_T==8);
|
---|
330 | s/\.SUBSPA.*// if ($gnuas && $SIZE_T==8);
|
---|
331 | s/cmpib,\*/comib,/ if ($SIZE_T==4);
|
---|
332 | s/\bbv\b/bve/ if ($SIZE_T==8);
|
---|
333 |
|
---|
334 | print $_,"\n";
|
---|
335 | }
|
---|
336 | close STDOUT or die "error closing STDOUT: $!";
|
---|