VirtualBox

source: vbox/trunk/src/libs/openssl-3.1.7/crypto/sha/asm/keccak1600-ppc64.pl@ 108351

最後變更 在這個檔案從108351是 104078,由 vboxsync 提交於 12 月 前

openssl-3.1.5: Applied and adjusted our OpenSSL changes to 3.1.4. bugref:10638

檔案大小: 18.9 KB
 
1#!/usr/bin/env perl
2# Copyright 2017-2022 The OpenSSL Project Authors. All Rights Reserved.
3#
4# Licensed under the Apache License 2.0 (the "License"). You may not use
5# this file except in compliance with the License. You can obtain a copy
6# in the file LICENSE in the source distribution or at
7# https://www.openssl.org/source/license.html
8#
9# ====================================================================
10# Written by Andy Polyakov <[email protected]> for the OpenSSL
11# project. The module is, however, dual licensed under OpenSSL and
12# CRYPTOGAMS licenses depending on where you obtain it. For further
13# details see http://www.openssl.org/~appro/cryptogams/.
14# ====================================================================
15#
16# Keccak-1600 for PPC64.
17#
18# June 2017.
19#
20# This is straightforward KECCAK_1X_ALT implementation that works on
21# *any* PPC64. Then PowerISA 2.07 adds 2x64-bit vector rotate, and
22# it's possible to achieve performance better than below, but that is
23# naturally option only for POWER8 and successors...
24#
25######################################################################
26# Numbers are cycles per processed byte.
27#
28# r=1088(*)
29#
30# PPC970/G5 14.0/+130%
31# POWER7 9.7/+110%
32# POWER8 10.6/+100%
33# POWER9 8.2/+66%
34#
35# (*) Corresponds to SHA3-256. Percentage after slash is improvement
36# over gcc-4.x-generated KECCAK_1X_ALT code. Newer compilers do
37# much better (but watch out for them generating code specific
38# to processor they execute on).
39
40# $output is the last argument if it looks like a file (it has an extension)
41# $flavour is the first argument if it doesn't look like a file
42$output = $#ARGV >= 0 && $ARGV[$#ARGV] =~ m|\.\w+$| ? pop : undef;
43$flavour = $#ARGV >= 0 && $ARGV[0] !~ m|\.| ? shift : undef;
44
45if ($flavour =~ /64/) {
46 $SIZE_T =8;
47 $LRSAVE =2*$SIZE_T;
48 $UCMP ="cmpld";
49 $STU ="stdu";
50 $POP ="ld";
51 $PUSH ="std";
52} else { die "nonsense $flavour"; }
53
54$LITTLE_ENDIAN = ($flavour=~/le$/) ? 1 : 0;
55
56if ($LITTLE_ENDIAN) {
57 $DWORD_LE_LOAD = "ldu r0,8(r3)";
58 $LE_LOAD_SIZE = "8";
59} else {
60 $DWORD_LE_LOAD = "bl dword_le_load";
61 $LE_LOAD_SIZE = "1";
62}
63
64$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
65( $xlate="${dir}ppc-xlate.pl" and -f $xlate ) or
66( $xlate="${dir}../../perlasm/ppc-xlate.pl" and -f $xlate) or
67die "can't locate ppc-xlate.pl";
68
69open STDOUT,"| $^X $xlate $flavour \"$output\""
70 or die "can't call $xlate: $!";
71
72$FRAME=24*$SIZE_T+6*$SIZE_T+32;
73$LOCALS=6*$SIZE_T;
74$TEMP=$LOCALS+6*$SIZE_T;
75
76my $sp ="r1";
77
78my @A = map([ "r$_", "r".($_+1), "r".($_+2), "r".($_+3), "r".($_+4) ],
79 (7, 12, 17, 22, 27));
80 $A[1][1] = "r6"; # r13 is reserved
81
82my @C = map("r$_", (0,3,4,5));
83
84my @rhotates = ([ 0, 1, 62, 28, 27 ],
85 [ 36, 44, 6, 55, 20 ],
86 [ 3, 10, 43, 25, 39 ],
87 [ 41, 45, 15, 21, 8 ],
88 [ 18, 2, 61, 56, 14 ]);
89
90$code.=<<___;
91.text
92
93.type KeccakF1600_int,\@function
94.align 5
95KeccakF1600_int:
96 li r0,24
97 mtctr r0
98 b .Loop
99.align 4
100.Loop:
101 xor $C[0],$A[0][0],$A[1][0] ; Theta
102 std $A[0][4],`$TEMP+0`($sp)
103 xor $C[1],$A[0][1],$A[1][1]
104 std $A[1][4],`$TEMP+8`($sp)
105 xor $C[2],$A[0][2],$A[1][2]
106 std $A[2][4],`$TEMP+16`($sp)
107 xor $C[3],$A[0][3],$A[1][3]
108 std $A[3][4],`$TEMP+24`($sp)
109___
110 $C[4]=$A[0][4];
111 $C[5]=$A[1][4];
112 $C[6]=$A[2][4];
113 $C[7]=$A[3][4];
114$code.=<<___;
115 xor $C[4],$A[0][4],$A[1][4]
116 xor $C[0],$C[0],$A[2][0]
117 xor $C[1],$C[1],$A[2][1]
118 xor $C[2],$C[2],$A[2][2]
119 xor $C[3],$C[3],$A[2][3]
120 xor $C[4],$C[4],$A[2][4]
121 xor $C[0],$C[0],$A[3][0]
122 xor $C[1],$C[1],$A[3][1]
123 xor $C[2],$C[2],$A[3][2]
124 xor $C[3],$C[3],$A[3][3]
125 xor $C[4],$C[4],$A[3][4]
126 xor $C[0],$C[0],$A[4][0]
127 xor $C[2],$C[2],$A[4][2]
128 xor $C[1],$C[1],$A[4][1]
129 xor $C[3],$C[3],$A[4][3]
130 rotldi $C[5],$C[2],1
131 xor $C[4],$C[4],$A[4][4]
132 rotldi $C[6],$C[3],1
133 xor $C[5],$C[5],$C[0]
134 rotldi $C[7],$C[4],1
135
136 xor $A[0][1],$A[0][1],$C[5]
137 xor $A[1][1],$A[1][1],$C[5]
138 xor $A[2][1],$A[2][1],$C[5]
139 xor $A[3][1],$A[3][1],$C[5]
140 xor $A[4][1],$A[4][1],$C[5]
141
142 rotldi $C[5],$C[0],1
143 xor $C[6],$C[6],$C[1]
144 xor $C[2],$C[2],$C[7]
145 rotldi $C[7],$C[1],1
146 xor $C[3],$C[3],$C[5]
147 xor $C[4],$C[4],$C[7]
148
149 xor $C[1], $A[0][2],$C[6] ;mr $C[1],$A[0][2]
150 xor $A[1][2],$A[1][2],$C[6]
151 xor $A[2][2],$A[2][2],$C[6]
152 xor $A[3][2],$A[3][2],$C[6]
153 xor $A[4][2],$A[4][2],$C[6]
154
155 xor $A[0][0],$A[0][0],$C[4]
156 xor $A[1][0],$A[1][0],$C[4]
157 xor $A[2][0],$A[2][0],$C[4]
158 xor $A[3][0],$A[3][0],$C[4]
159 xor $A[4][0],$A[4][0],$C[4]
160___
161 $C[4]=undef;
162 $C[5]=undef;
163 $C[6]=undef;
164 $C[7]=undef;
165$code.=<<___;
166 ld $A[0][4],`$TEMP+0`($sp)
167 xor $C[0], $A[0][3],$C[2] ;mr $C[0],$A[0][3]
168 ld $A[1][4],`$TEMP+8`($sp)
169 xor $A[1][3],$A[1][3],$C[2]
170 ld $A[2][4],`$TEMP+16`($sp)
171 xor $A[2][3],$A[2][3],$C[2]
172 ld $A[3][4],`$TEMP+24`($sp)
173 xor $A[3][3],$A[3][3],$C[2]
174 xor $A[4][3],$A[4][3],$C[2]
175
176 xor $C[2], $A[0][4],$C[3] ;mr $C[2],$A[0][4]
177 xor $A[1][4],$A[1][4],$C[3]
178 xor $A[2][4],$A[2][4],$C[3]
179 xor $A[3][4],$A[3][4],$C[3]
180 xor $A[4][4],$A[4][4],$C[3]
181
182 mr $C[3],$A[0][1] ; Rho+Pi
183 rotldi $A[0][1],$A[1][1],$rhotates[1][1]
184 ;mr $C[1],$A[0][2]
185 rotldi $A[0][2],$A[2][2],$rhotates[2][2]
186 ;mr $C[0],$A[0][3]
187 rotldi $A[0][3],$A[3][3],$rhotates[3][3]
188 ;mr $C[2],$A[0][4]
189 rotldi $A[0][4],$A[4][4],$rhotates[4][4]
190
191 rotldi $A[1][1],$A[1][4],$rhotates[1][4]
192 rotldi $A[2][2],$A[2][3],$rhotates[2][3]
193 rotldi $A[3][3],$A[3][2],$rhotates[3][2]
194 rotldi $A[4][4],$A[4][1],$rhotates[4][1]
195
196 rotldi $A[1][4],$A[4][2],$rhotates[4][2]
197 rotldi $A[2][3],$A[3][4],$rhotates[3][4]
198 rotldi $A[3][2],$A[2][1],$rhotates[2][1]
199 rotldi $A[4][1],$A[1][3],$rhotates[1][3]
200
201 rotldi $A[4][2],$A[2][4],$rhotates[2][4]
202 rotldi $A[3][4],$A[4][3],$rhotates[4][3]
203 rotldi $A[2][1],$A[1][2],$rhotates[1][2]
204 rotldi $A[1][3],$A[3][1],$rhotates[3][1]
205
206 rotldi $A[2][4],$A[4][0],$rhotates[4][0]
207 rotldi $A[4][3],$A[3][0],$rhotates[3][0]
208 rotldi $A[1][2],$A[2][0],$rhotates[2][0]
209 rotldi $A[3][1],$A[1][0],$rhotates[1][0]
210
211 rotldi $A[1][0],$C[0],$rhotates[0][3]
212 rotldi $A[2][0],$C[3],$rhotates[0][1]
213 rotldi $A[3][0],$C[2],$rhotates[0][4]
214 rotldi $A[4][0],$C[1],$rhotates[0][2]
215
216 andc $C[0],$A[0][2],$A[0][1] ; Chi+Iota
217 andc $C[1],$A[0][3],$A[0][2]
218 andc $C[2],$A[0][0],$A[0][4]
219 andc $C[3],$A[0][1],$A[0][0]
220 xor $A[0][0],$A[0][0],$C[0]
221 andc $C[0],$A[0][4],$A[0][3]
222 xor $A[0][1],$A[0][1],$C[1]
223 ld $C[1],`$LOCALS+4*$SIZE_T`($sp)
224 xor $A[0][3],$A[0][3],$C[2]
225 xor $A[0][4],$A[0][4],$C[3]
226 xor $A[0][2],$A[0][2],$C[0]
227 ldu $C[3],8($C[1]) ; Iota[i++]
228
229 andc $C[0],$A[1][2],$A[1][1]
230 std $C[1],`$LOCALS+4*$SIZE_T`($sp)
231 andc $C[1],$A[1][3],$A[1][2]
232 andc $C[2],$A[1][0],$A[1][4]
233 xor $A[0][0],$A[0][0],$C[3] ; A[0][0] ^= Iota
234 andc $C[3],$A[1][1],$A[1][0]
235 xor $A[1][0],$A[1][0],$C[0]
236 andc $C[0],$A[1][4],$A[1][3]
237 xor $A[1][1],$A[1][1],$C[1]
238 xor $A[1][3],$A[1][3],$C[2]
239 xor $A[1][4],$A[1][4],$C[3]
240 xor $A[1][2],$A[1][2],$C[0]
241
242 andc $C[0],$A[2][2],$A[2][1]
243 andc $C[1],$A[2][3],$A[2][2]
244 andc $C[2],$A[2][0],$A[2][4]
245 andc $C[3],$A[2][1],$A[2][0]
246 xor $A[2][0],$A[2][0],$C[0]
247 andc $C[0],$A[2][4],$A[2][3]
248 xor $A[2][1],$A[2][1],$C[1]
249 xor $A[2][3],$A[2][3],$C[2]
250 xor $A[2][4],$A[2][4],$C[3]
251 xor $A[2][2],$A[2][2],$C[0]
252
253 andc $C[0],$A[3][2],$A[3][1]
254 andc $C[1],$A[3][3],$A[3][2]
255 andc $C[2],$A[3][0],$A[3][4]
256 andc $C[3],$A[3][1],$A[3][0]
257 xor $A[3][0],$A[3][0],$C[0]
258 andc $C[0],$A[3][4],$A[3][3]
259 xor $A[3][1],$A[3][1],$C[1]
260 xor $A[3][3],$A[3][3],$C[2]
261 xor $A[3][4],$A[3][4],$C[3]
262 xor $A[3][2],$A[3][2],$C[0]
263
264 andc $C[0],$A[4][2],$A[4][1]
265 andc $C[1],$A[4][3],$A[4][2]
266 andc $C[2],$A[4][0],$A[4][4]
267 andc $C[3],$A[4][1],$A[4][0]
268 xor $A[4][0],$A[4][0],$C[0]
269 andc $C[0],$A[4][4],$A[4][3]
270 xor $A[4][1],$A[4][1],$C[1]
271 xor $A[4][3],$A[4][3],$C[2]
272 xor $A[4][4],$A[4][4],$C[3]
273 xor $A[4][2],$A[4][2],$C[0]
274
275 bdnz .Loop
276
277 blr
278 .long 0
279 .byte 0,12,0x14,0,0,0,0,0
280.size KeccakF1600_int,.-KeccakF1600_int
281
282.type KeccakF1600,\@function
283.align 5
284KeccakF1600:
285 $STU $sp,-$FRAME($sp)
286 mflr r0
287 $PUSH r14,`$FRAME-$SIZE_T*18`($sp)
288 $PUSH r15,`$FRAME-$SIZE_T*17`($sp)
289 $PUSH r16,`$FRAME-$SIZE_T*16`($sp)
290 $PUSH r17,`$FRAME-$SIZE_T*15`($sp)
291 $PUSH r18,`$FRAME-$SIZE_T*14`($sp)
292 $PUSH r19,`$FRAME-$SIZE_T*13`($sp)
293 $PUSH r20,`$FRAME-$SIZE_T*12`($sp)
294 $PUSH r21,`$FRAME-$SIZE_T*11`($sp)
295 $PUSH r22,`$FRAME-$SIZE_T*10`($sp)
296 $PUSH r23,`$FRAME-$SIZE_T*9`($sp)
297 $PUSH r24,`$FRAME-$SIZE_T*8`($sp)
298 $PUSH r25,`$FRAME-$SIZE_T*7`($sp)
299 $PUSH r26,`$FRAME-$SIZE_T*6`($sp)
300 $PUSH r27,`$FRAME-$SIZE_T*5`($sp)
301 $PUSH r28,`$FRAME-$SIZE_T*4`($sp)
302 $PUSH r29,`$FRAME-$SIZE_T*3`($sp)
303 $PUSH r30,`$FRAME-$SIZE_T*2`($sp)
304 $PUSH r31,`$FRAME-$SIZE_T*1`($sp)
305 $PUSH r0,`$FRAME+$LRSAVE`($sp)
306
307 bl PICmeup
308 subi r12,r12,8 ; prepare for ldu
309
310 $PUSH r3,`$LOCALS+0*$SIZE_T`($sp)
311 ;$PUSH r4,`$LOCALS+1*$SIZE_T`($sp)
312 ;$PUSH r5,`$LOCALS+2*$SIZE_T`($sp)
313 ;$PUSH r6,`$LOCALS+3*$SIZE_T`($sp)
314 $PUSH r12,`$LOCALS+4*$SIZE_T`($sp)
315
316 ld $A[0][0],`8*0`(r3) ; load A[5][5]
317 ld $A[0][1],`8*1`(r3)
318 ld $A[0][2],`8*2`(r3)
319 ld $A[0][3],`8*3`(r3)
320 ld $A[0][4],`8*4`(r3)
321 ld $A[1][0],`8*5`(r3)
322 ld $A[1][1],`8*6`(r3)
323 ld $A[1][2],`8*7`(r3)
324 ld $A[1][3],`8*8`(r3)
325 ld $A[1][4],`8*9`(r3)
326 ld $A[2][0],`8*10`(r3)
327 ld $A[2][1],`8*11`(r3)
328 ld $A[2][2],`8*12`(r3)
329 ld $A[2][3],`8*13`(r3)
330 ld $A[2][4],`8*14`(r3)
331 ld $A[3][0],`8*15`(r3)
332 ld $A[3][1],`8*16`(r3)
333 ld $A[3][2],`8*17`(r3)
334 ld $A[3][3],`8*18`(r3)
335 ld $A[3][4],`8*19`(r3)
336 ld $A[4][0],`8*20`(r3)
337 ld $A[4][1],`8*21`(r3)
338 ld $A[4][2],`8*22`(r3)
339 ld $A[4][3],`8*23`(r3)
340 ld $A[4][4],`8*24`(r3)
341
342 bl KeccakF1600_int
343
344 $POP r3,`$LOCALS+0*$SIZE_T`($sp)
345 std $A[0][0],`8*0`(r3) ; return A[5][5]
346 std $A[0][1],`8*1`(r3)
347 std $A[0][2],`8*2`(r3)
348 std $A[0][3],`8*3`(r3)
349 std $A[0][4],`8*4`(r3)
350 std $A[1][0],`8*5`(r3)
351 std $A[1][1],`8*6`(r3)
352 std $A[1][2],`8*7`(r3)
353 std $A[1][3],`8*8`(r3)
354 std $A[1][4],`8*9`(r3)
355 std $A[2][0],`8*10`(r3)
356 std $A[2][1],`8*11`(r3)
357 std $A[2][2],`8*12`(r3)
358 std $A[2][3],`8*13`(r3)
359 std $A[2][4],`8*14`(r3)
360 std $A[3][0],`8*15`(r3)
361 std $A[3][1],`8*16`(r3)
362 std $A[3][2],`8*17`(r3)
363 std $A[3][3],`8*18`(r3)
364 std $A[3][4],`8*19`(r3)
365 std $A[4][0],`8*20`(r3)
366 std $A[4][1],`8*21`(r3)
367 std $A[4][2],`8*22`(r3)
368 std $A[4][3],`8*23`(r3)
369 std $A[4][4],`8*24`(r3)
370
371 $POP r0,`$FRAME+$LRSAVE`($sp)
372 $POP r14,`$FRAME-$SIZE_T*18`($sp)
373 $POP r15,`$FRAME-$SIZE_T*17`($sp)
374 $POP r16,`$FRAME-$SIZE_T*16`($sp)
375 $POP r17,`$FRAME-$SIZE_T*15`($sp)
376 $POP r18,`$FRAME-$SIZE_T*14`($sp)
377 $POP r19,`$FRAME-$SIZE_T*13`($sp)
378 $POP r20,`$FRAME-$SIZE_T*12`($sp)
379 $POP r21,`$FRAME-$SIZE_T*11`($sp)
380 $POP r22,`$FRAME-$SIZE_T*10`($sp)
381 $POP r23,`$FRAME-$SIZE_T*9`($sp)
382 $POP r24,`$FRAME-$SIZE_T*8`($sp)
383 $POP r25,`$FRAME-$SIZE_T*7`($sp)
384 $POP r26,`$FRAME-$SIZE_T*6`($sp)
385 $POP r27,`$FRAME-$SIZE_T*5`($sp)
386 $POP r28,`$FRAME-$SIZE_T*4`($sp)
387 $POP r29,`$FRAME-$SIZE_T*3`($sp)
388 $POP r30,`$FRAME-$SIZE_T*2`($sp)
389 $POP r31,`$FRAME-$SIZE_T*1`($sp)
390 mtlr r0
391 addi $sp,$sp,$FRAME
392 blr
393 .long 0
394 .byte 0,12,4,1,0x80,18,1,0
395 .long 0
396.size KeccakF1600,.-KeccakF1600
397___
398if (!$LITTLE_ENDIAN) {
399$code.=<<___;
400.type dword_le_load,\@function
401.align 5
402dword_le_load:
403 lbz r0,1(r3)
404 lbz r4,2(r3)
405 lbz r5,3(r3)
406 insrdi r0,r4,8,48
407 lbz r4,4(r3)
408 insrdi r0,r5,8,40
409 lbz r5,5(r3)
410 insrdi r0,r4,8,32
411 lbz r4,6(r3)
412 insrdi r0,r5,8,24
413 lbz r5,7(r3)
414 insrdi r0,r4,8,16
415 lbzu r4,8(r3)
416 insrdi r0,r5,8,8
417 insrdi r0,r4,8,0
418 blr
419 .long 0
420 .byte 0,12,0x14,0,0,0,1,0
421 .long 0
422.size dword_le_load,.-dword_le_load
423___
424}
425
426$code.=<<___;
427.globl SHA3_absorb
428.type SHA3_absorb,\@function
429.align 5
430SHA3_absorb:
431 $STU $sp,-$FRAME($sp)
432 mflr r0
433 $PUSH r14,`$FRAME-$SIZE_T*18`($sp)
434 $PUSH r15,`$FRAME-$SIZE_T*17`($sp)
435 $PUSH r16,`$FRAME-$SIZE_T*16`($sp)
436 $PUSH r17,`$FRAME-$SIZE_T*15`($sp)
437 $PUSH r18,`$FRAME-$SIZE_T*14`($sp)
438 $PUSH r19,`$FRAME-$SIZE_T*13`($sp)
439 $PUSH r20,`$FRAME-$SIZE_T*12`($sp)
440 $PUSH r21,`$FRAME-$SIZE_T*11`($sp)
441 $PUSH r22,`$FRAME-$SIZE_T*10`($sp)
442 $PUSH r23,`$FRAME-$SIZE_T*9`($sp)
443 $PUSH r24,`$FRAME-$SIZE_T*8`($sp)
444 $PUSH r25,`$FRAME-$SIZE_T*7`($sp)
445 $PUSH r26,`$FRAME-$SIZE_T*6`($sp)
446 $PUSH r27,`$FRAME-$SIZE_T*5`($sp)
447 $PUSH r28,`$FRAME-$SIZE_T*4`($sp)
448 $PUSH r29,`$FRAME-$SIZE_T*3`($sp)
449 $PUSH r30,`$FRAME-$SIZE_T*2`($sp)
450 $PUSH r31,`$FRAME-$SIZE_T*1`($sp)
451 $PUSH r0,`$FRAME+$LRSAVE`($sp)
452
453 bl PICmeup
454 subi r4,r4,$LE_LOAD_SIZE ; prepare for ldu or lbzu
455 subi r12,r12,8 ; prepare for ldu
456
457 $PUSH r3,`$LOCALS+0*$SIZE_T`($sp) ; save A[][]
458 $PUSH r4,`$LOCALS+1*$SIZE_T`($sp) ; save inp
459 $PUSH r5,`$LOCALS+2*$SIZE_T`($sp) ; save len
460 $PUSH r6,`$LOCALS+3*$SIZE_T`($sp) ; save bsz
461 mr r0,r6
462 $PUSH r12,`$LOCALS+4*$SIZE_T`($sp)
463
464 ld $A[0][0],`8*0`(r3) ; load A[5][5]
465 ld $A[0][1],`8*1`(r3)
466 ld $A[0][2],`8*2`(r3)
467 ld $A[0][3],`8*3`(r3)
468 ld $A[0][4],`8*4`(r3)
469 ld $A[1][0],`8*5`(r3)
470 ld $A[1][1],`8*6`(r3)
471 ld $A[1][2],`8*7`(r3)
472 ld $A[1][3],`8*8`(r3)
473 ld $A[1][4],`8*9`(r3)
474 ld $A[2][0],`8*10`(r3)
475 ld $A[2][1],`8*11`(r3)
476 ld $A[2][2],`8*12`(r3)
477 ld $A[2][3],`8*13`(r3)
478 ld $A[2][4],`8*14`(r3)
479 ld $A[3][0],`8*15`(r3)
480 ld $A[3][1],`8*16`(r3)
481 ld $A[3][2],`8*17`(r3)
482 ld $A[3][3],`8*18`(r3)
483 ld $A[3][4],`8*19`(r3)
484 ld $A[4][0],`8*20`(r3)
485 ld $A[4][1],`8*21`(r3)
486 ld $A[4][2],`8*22`(r3)
487 ld $A[4][3],`8*23`(r3)
488 ld $A[4][4],`8*24`(r3)
489
490 mr r3,r4
491 mr r4,r5
492 mr r5,r0
493
494 b .Loop_absorb
495
496.align 4
497.Loop_absorb:
498 $UCMP r4,r5 ; len < bsz?
499 blt .Labsorbed
500
501 sub r4,r4,r5 ; len -= bsz
502 srwi r5,r5,3
503 $PUSH r4,`$LOCALS+2*$SIZE_T`($sp) ; save len
504 mtctr r5
505 $DWORD_LE_LOAD ; *inp++
506 xor $A[0][0],$A[0][0],r0
507 bdz .Lprocess_block
508 $DWORD_LE_LOAD ; *inp++
509 xor $A[0][1],$A[0][1],r0
510 bdz .Lprocess_block
511 $DWORD_LE_LOAD ; *inp++
512 xor $A[0][2],$A[0][2],r0
513 bdz .Lprocess_block
514 $DWORD_LE_LOAD ; *inp++
515 xor $A[0][3],$A[0][3],r0
516 bdz .Lprocess_block
517 $DWORD_LE_LOAD ; *inp++
518 xor $A[0][4],$A[0][4],r0
519 bdz .Lprocess_block
520 $DWORD_LE_LOAD ; *inp++
521 xor $A[1][0],$A[1][0],r0
522 bdz .Lprocess_block
523 $DWORD_LE_LOAD ; *inp++
524 xor $A[1][1],$A[1][1],r0
525 bdz .Lprocess_block
526 $DWORD_LE_LOAD ; *inp++
527 xor $A[1][2],$A[1][2],r0
528 bdz .Lprocess_block
529 $DWORD_LE_LOAD ; *inp++
530 xor $A[1][3],$A[1][3],r0
531 bdz .Lprocess_block
532 $DWORD_LE_LOAD ; *inp++
533 xor $A[1][4],$A[1][4],r0
534 bdz .Lprocess_block
535 $DWORD_LE_LOAD ; *inp++
536 xor $A[2][0],$A[2][0],r0
537 bdz .Lprocess_block
538 $DWORD_LE_LOAD ; *inp++
539 xor $A[2][1],$A[2][1],r0
540 bdz .Lprocess_block
541 $DWORD_LE_LOAD ; *inp++
542 xor $A[2][2],$A[2][2],r0
543 bdz .Lprocess_block
544 $DWORD_LE_LOAD ; *inp++
545 xor $A[2][3],$A[2][3],r0
546 bdz .Lprocess_block
547 $DWORD_LE_LOAD ; *inp++
548 xor $A[2][4],$A[2][4],r0
549 bdz .Lprocess_block
550 $DWORD_LE_LOAD ; *inp++
551 xor $A[3][0],$A[3][0],r0
552 bdz .Lprocess_block
553 $DWORD_LE_LOAD ; *inp++
554 xor $A[3][1],$A[3][1],r0
555 bdz .Lprocess_block
556 $DWORD_LE_LOAD ; *inp++
557 xor $A[3][2],$A[3][2],r0
558 bdz .Lprocess_block
559 $DWORD_LE_LOAD ; *inp++
560 xor $A[3][3],$A[3][3],r0
561 bdz .Lprocess_block
562 $DWORD_LE_LOAD ; *inp++
563 xor $A[3][4],$A[3][4],r0
564 bdz .Lprocess_block
565 $DWORD_LE_LOAD ; *inp++
566 xor $A[4][0],$A[4][0],r0
567 bdz .Lprocess_block
568 $DWORD_LE_LOAD ; *inp++
569 xor $A[4][1],$A[4][1],r0
570 bdz .Lprocess_block
571 $DWORD_LE_LOAD ; *inp++
572 xor $A[4][2],$A[4][2],r0
573 bdz .Lprocess_block
574 $DWORD_LE_LOAD ; *inp++
575 xor $A[4][3],$A[4][3],r0
576 bdz .Lprocess_block
577 $DWORD_LE_LOAD ; *inp++
578 xor $A[4][4],$A[4][4],r0
579
580.Lprocess_block:
581 $PUSH r3,`$LOCALS+1*$SIZE_T`($sp) ; save inp
582
583 bl KeccakF1600_int
584
585 $POP r0,`$LOCALS+4*$SIZE_T`($sp) ; pull iotas[24]
586 $POP r5,`$LOCALS+3*$SIZE_T`($sp) ; restore bsz
587 $POP r4,`$LOCALS+2*$SIZE_T`($sp) ; restore len
588 $POP r3,`$LOCALS+1*$SIZE_T`($sp) ; restore inp
589 addic r0,r0,`-8*24` ; rewind iotas
590 $PUSH r0,`$LOCALS+4*$SIZE_T`($sp)
591
592 b .Loop_absorb
593
594.align 4
595.Labsorbed:
596 $POP r3,`$LOCALS+0*$SIZE_T`($sp)
597 std $A[0][0],`8*0`(r3) ; return A[5][5]
598 std $A[0][1],`8*1`(r3)
599 std $A[0][2],`8*2`(r3)
600 std $A[0][3],`8*3`(r3)
601 std $A[0][4],`8*4`(r3)
602 std $A[1][0],`8*5`(r3)
603 std $A[1][1],`8*6`(r3)
604 std $A[1][2],`8*7`(r3)
605 std $A[1][3],`8*8`(r3)
606 std $A[1][4],`8*9`(r3)
607 std $A[2][0],`8*10`(r3)
608 std $A[2][1],`8*11`(r3)
609 std $A[2][2],`8*12`(r3)
610 std $A[2][3],`8*13`(r3)
611 std $A[2][4],`8*14`(r3)
612 std $A[3][0],`8*15`(r3)
613 std $A[3][1],`8*16`(r3)
614 std $A[3][2],`8*17`(r3)
615 std $A[3][3],`8*18`(r3)
616 std $A[3][4],`8*19`(r3)
617 std $A[4][0],`8*20`(r3)
618 std $A[4][1],`8*21`(r3)
619 std $A[4][2],`8*22`(r3)
620 std $A[4][3],`8*23`(r3)
621 std $A[4][4],`8*24`(r3)
622
623 mr r3,r4 ; return value
624 $POP r0,`$FRAME+$LRSAVE`($sp)
625 $POP r14,`$FRAME-$SIZE_T*18`($sp)
626 $POP r15,`$FRAME-$SIZE_T*17`($sp)
627 $POP r16,`$FRAME-$SIZE_T*16`($sp)
628 $POP r17,`$FRAME-$SIZE_T*15`($sp)
629 $POP r18,`$FRAME-$SIZE_T*14`($sp)
630 $POP r19,`$FRAME-$SIZE_T*13`($sp)
631 $POP r20,`$FRAME-$SIZE_T*12`($sp)
632 $POP r21,`$FRAME-$SIZE_T*11`($sp)
633 $POP r22,`$FRAME-$SIZE_T*10`($sp)
634 $POP r23,`$FRAME-$SIZE_T*9`($sp)
635 $POP r24,`$FRAME-$SIZE_T*8`($sp)
636 $POP r25,`$FRAME-$SIZE_T*7`($sp)
637 $POP r26,`$FRAME-$SIZE_T*6`($sp)
638 $POP r27,`$FRAME-$SIZE_T*5`($sp)
639 $POP r28,`$FRAME-$SIZE_T*4`($sp)
640 $POP r29,`$FRAME-$SIZE_T*3`($sp)
641 $POP r30,`$FRAME-$SIZE_T*2`($sp)
642 $POP r31,`$FRAME-$SIZE_T*1`($sp)
643 mtlr r0
644 addi $sp,$sp,$FRAME
645 blr
646 .long 0
647 .byte 0,12,4,1,0x80,18,4,0
648 .long 0
649.size SHA3_absorb,.-SHA3_absorb
650___
651{
652my ($A_flat,$out,$len,$bsz) = map("r$_",(28..31));
653$code.=<<___;
654.globl SHA3_squeeze
655.type SHA3_squeeze,\@function
656.align 5
657SHA3_squeeze:
658 $STU $sp,`-10*$SIZE_T`($sp)
659 mflr r0
660 $PUSH r28,`6*$SIZE_T`($sp)
661 $PUSH r29,`7*$SIZE_T`($sp)
662 $PUSH r30,`8*$SIZE_T`($sp)
663 $PUSH r31,`9*$SIZE_T`($sp)
664 $PUSH r0,`10*$SIZE_T+$LRSAVE`($sp)
665
666 mr $A_flat,r3
667 subi r3,r3,8 ; prepare for ldu
668 subi $out,r4,1 ; prepare for stbu
669 mr $len,r5
670 mr $bsz,r6
671 b .Loop_squeeze
672
673.align 4
674.Loop_squeeze:
675 ldu r0,8(r3)
676 ${UCMP}i $len,8
677 blt .Lsqueeze_tail
678
679 stb r0,1($out)
680 srdi r0,r0,8
681 stb r0,2($out)
682 srdi r0,r0,8
683 stb r0,3($out)
684 srdi r0,r0,8
685 stb r0,4($out)
686 srdi r0,r0,8
687 stb r0,5($out)
688 srdi r0,r0,8
689 stb r0,6($out)
690 srdi r0,r0,8
691 stb r0,7($out)
692 srdi r0,r0,8
693 stbu r0,8($out)
694
695 subic. $len,$len,8
696 beq .Lsqueeze_done
697
698 subic. r6,r6,8
699 bgt .Loop_squeeze
700
701 mr r3,$A_flat
702 bl KeccakF1600
703 subi r3,$A_flat,8 ; prepare for ldu
704 mr r6,$bsz
705 b .Loop_squeeze
706
707.align 4
708.Lsqueeze_tail:
709 mtctr $len
710.Loop_tail:
711 stbu r0,1($out)
712 srdi r0,r0,8
713 bdnz .Loop_tail
714
715.Lsqueeze_done:
716 $POP r0,`10*$SIZE_T+$LRSAVE`($sp)
717 $POP r28,`6*$SIZE_T`($sp)
718 $POP r29,`7*$SIZE_T`($sp)
719 $POP r30,`8*$SIZE_T`($sp)
720 $POP r31,`9*$SIZE_T`($sp)
721 mtlr r0
722 addi $sp,$sp,`10*$SIZE_T`
723 blr
724 .long 0
725 .byte 0,12,4,1,0x80,4,4,0
726 .long 0
727.size SHA3_squeeze,.-SHA3_squeeze
728___
729}
730
731# Ugly hack here, because PPC assembler syntax seem to vary too
732# much from platforms to platform...
733$code.=<<___;
734.align 6
735PICmeup:
736 mflr r0
737 bcl 20,31,\$+4
738 mflr r12 ; vvvvvv "distance" between . and 1st data entry
739 addi r12,r12,`64-8`
740 mtlr r0
741 blr
742 .long 0
743 .byte 0,12,0x14,0,0,0,0,0
744 .space `64-9*4`
745.type iotas,\@object
746iotas:
747 .quad 0x0000000000000001
748 .quad 0x0000000000008082
749 .quad 0x800000000000808a
750 .quad 0x8000000080008000
751 .quad 0x000000000000808b
752 .quad 0x0000000080000001
753 .quad 0x8000000080008081
754 .quad 0x8000000000008009
755 .quad 0x000000000000008a
756 .quad 0x0000000000000088
757 .quad 0x0000000080008009
758 .quad 0x000000008000000a
759 .quad 0x000000008000808b
760 .quad 0x800000000000008b
761 .quad 0x8000000000008089
762 .quad 0x8000000000008003
763 .quad 0x8000000000008002
764 .quad 0x8000000000000080
765 .quad 0x000000000000800a
766 .quad 0x800000008000000a
767 .quad 0x8000000080008081
768 .quad 0x8000000000008080
769 .quad 0x0000000080000001
770 .quad 0x8000000080008008
771.size iotas,.-iotas
772.asciz "Keccak-1600 absorb and squeeze for PPC64, CRYPTOGAMS by <appro\@openssl.org>"
773___
774
775$code =~ s/\`([^\`]*)\`/eval $1/gem;
776print $code;
777close STDOUT or die "error closing STDOUT: $!";
注意: 瀏覽 TracBrowser 來幫助您使用儲存庫瀏覽器

© 2025 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette