1 | #! /usr/bin/env perl
|
---|
2 | # Copyright 2021-2023 The OpenSSL Project Authors. All Rights Reserved.
|
---|
3 | #
|
---|
4 | # Licensed under the Apache License 2.0 (the "License"). You may not use
|
---|
5 | # this file except in compliance with the License. You can obtain a copy
|
---|
6 | # in the file LICENSE in the source distribution or at
|
---|
7 | # https://www.openssl.org/source/license.html
|
---|
8 | #
|
---|
9 | # ====================================================================
|
---|
10 | # Written by Amitay Isaacs <[email protected]> and Martin Schwenke
|
---|
11 | # <[email protected]> for the OpenSSL project.
|
---|
12 | # ====================================================================
|
---|
13 | #
|
---|
14 | # p521 lower-level primitives for PPC64 using vector instructions.
|
---|
15 | #
|
---|
16 |
|
---|
17 | use strict;
|
---|
18 | use warnings;
|
---|
19 |
|
---|
20 | my $flavour = shift;
|
---|
21 | my $output = "";
|
---|
22 | while (($output=shift) && ($output!~/\w[\w\-]*\.\w+$/)) {}
|
---|
23 | if (!$output) {
|
---|
24 | $output = "-";
|
---|
25 | }
|
---|
26 |
|
---|
27 | my ($xlate, $dir);
|
---|
28 | $0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
|
---|
29 | ( $xlate="${dir}ppc-xlate.pl" and -f $xlate ) or
|
---|
30 | ( $xlate="${dir}../../perlasm/ppc-xlate.pl" and -f $xlate) or
|
---|
31 | die "can't locate ppc-xlate.pl";
|
---|
32 |
|
---|
33 | open OUT,"| \"$^X\" $xlate $flavour $output";
|
---|
34 | *STDOUT=*OUT;
|
---|
35 |
|
---|
36 | my $code = "";
|
---|
37 |
|
---|
38 | my ($sp, $outp, $savelr, $savesp) = ("r1", "r3", "r10", "r12");
|
---|
39 |
|
---|
40 | my $vzero = "v32";
|
---|
41 |
|
---|
42 | sub startproc($)
|
---|
43 | {
|
---|
44 | my ($name) = @_;
|
---|
45 |
|
---|
46 | $code.=<<___;
|
---|
47 | .globl ${name}
|
---|
48 | .align 5
|
---|
49 | ${name}:
|
---|
50 |
|
---|
51 | ___
|
---|
52 | }
|
---|
53 |
|
---|
54 | sub endproc($)
|
---|
55 | {
|
---|
56 | my ($name) = @_;
|
---|
57 |
|
---|
58 | $code.=<<___;
|
---|
59 | blr
|
---|
60 | .size ${name},.-${name}
|
---|
61 |
|
---|
62 | ___
|
---|
63 | }
|
---|
64 |
|
---|
65 |
|
---|
66 | sub push_vrs($$)
|
---|
67 | {
|
---|
68 | my ($min, $max) = @_;
|
---|
69 |
|
---|
70 | my $count = $max - $min + 1;
|
---|
71 |
|
---|
72 | $code.=<<___;
|
---|
73 | mr $savesp,$sp
|
---|
74 | stdu $sp,-16*`$count+1`($sp)
|
---|
75 |
|
---|
76 | ___
|
---|
77 | for (my $i = $min; $i <= $max; $i++) {
|
---|
78 | my $mult = $max - $i + 1;
|
---|
79 | $code.=<<___;
|
---|
80 | stxv $i,-16*$mult($savesp)
|
---|
81 | ___
|
---|
82 |
|
---|
83 | }
|
---|
84 |
|
---|
85 | $code.=<<___;
|
---|
86 |
|
---|
87 | ___
|
---|
88 | }
|
---|
89 |
|
---|
90 | sub pop_vrs($$)
|
---|
91 | {
|
---|
92 | my ($min, $max) = @_;
|
---|
93 |
|
---|
94 | $code.=<<___;
|
---|
95 | ld $savesp,0($sp)
|
---|
96 | ___
|
---|
97 | for (my $i = $min; $i <= $max; $i++) {
|
---|
98 | my $mult = $max - $i + 1;
|
---|
99 | $code.=<<___;
|
---|
100 | lxv $i,-16*$mult($savesp)
|
---|
101 | ___
|
---|
102 | }
|
---|
103 |
|
---|
104 | $code.=<<___;
|
---|
105 | mr $sp,$savesp
|
---|
106 |
|
---|
107 | ___
|
---|
108 | }
|
---|
109 |
|
---|
110 | sub load_vrs($$)
|
---|
111 | {
|
---|
112 | my ($pointer, $reg_list) = @_;
|
---|
113 |
|
---|
114 | for (my $i = 0; $i <= 8; $i++) {
|
---|
115 | my $offset = $i * 8;
|
---|
116 | $code.=<<___;
|
---|
117 | lxsd $reg_list->[$i],$offset($pointer)
|
---|
118 | ___
|
---|
119 | }
|
---|
120 |
|
---|
121 | $code.=<<___;
|
---|
122 |
|
---|
123 | ___
|
---|
124 | }
|
---|
125 |
|
---|
126 | sub store_vrs($$)
|
---|
127 | {
|
---|
128 | my ($pointer, $reg_list) = @_;
|
---|
129 |
|
---|
130 | for (my $i = 0; $i <= 8; $i++) {
|
---|
131 | my $offset = $i * 16;
|
---|
132 | $code.=<<___;
|
---|
133 | stxv $reg_list->[$i],$offset($pointer)
|
---|
134 | ___
|
---|
135 | }
|
---|
136 |
|
---|
137 | $code.=<<___;
|
---|
138 |
|
---|
139 | ___
|
---|
140 | }
|
---|
141 |
|
---|
142 | $code.=<<___;
|
---|
143 | .machine "any"
|
---|
144 | .text
|
---|
145 |
|
---|
146 | ___
|
---|
147 |
|
---|
148 | {
|
---|
149 | # mul/square common
|
---|
150 | my ($t1, $t2, $t3, $t4) = ("v33", "v34", "v44", "v54");
|
---|
151 | my ($zero, $one) = ("r8", "r9");
|
---|
152 | my @out = map("v$_",(55..63));
|
---|
153 |
|
---|
154 | {
|
---|
155 | #
|
---|
156 | # p521_felem_mul
|
---|
157 | #
|
---|
158 |
|
---|
159 | my ($in1p, $in2p) = ("r4", "r5");
|
---|
160 | my @in1 = map("v$_",(45..53));
|
---|
161 | my @in2 = map("v$_",(35..43));
|
---|
162 |
|
---|
163 | startproc("p521_felem_mul");
|
---|
164 |
|
---|
165 | push_vrs(52, 63);
|
---|
166 |
|
---|
167 | $code.=<<___;
|
---|
168 | vspltisw $vzero,0
|
---|
169 |
|
---|
170 | ___
|
---|
171 |
|
---|
172 | load_vrs($in1p, \@in1);
|
---|
173 | load_vrs($in2p, \@in2);
|
---|
174 |
|
---|
175 | $code.=<<___;
|
---|
176 | vmsumudm $out[0],$in1[0],$in2[0],$vzero
|
---|
177 |
|
---|
178 | xxpermdi $t1,$in1[0],$in1[1],0b00
|
---|
179 | xxpermdi $t2,$in2[1],$in2[0],0b00
|
---|
180 | vmsumudm $out[1],$t1,$t2,$vzero
|
---|
181 |
|
---|
182 | xxpermdi $t2,$in2[2],$in2[1],0b00
|
---|
183 | vmsumudm $out[2],$t1,$t2,$vzero
|
---|
184 | vmsumudm $out[2],$in1[2],$in2[0],$out[2]
|
---|
185 |
|
---|
186 | xxpermdi $t2,$in2[3],$in2[2],0b00
|
---|
187 | vmsumudm $out[3],$t1,$t2,$vzero
|
---|
188 | xxpermdi $t3,$in1[2],$in1[3],0b00
|
---|
189 | xxpermdi $t4,$in2[1],$in2[0],0b00
|
---|
190 | vmsumudm $out[3],$t3,$t4,$out[3]
|
---|
191 |
|
---|
192 | xxpermdi $t2,$in2[4],$in2[3],0b00
|
---|
193 | vmsumudm $out[4],$t1,$t2,$vzero
|
---|
194 | xxpermdi $t4,$in2[2],$in2[1],0b00
|
---|
195 | vmsumudm $out[4],$t3,$t4,$out[4]
|
---|
196 | vmsumudm $out[4],$in1[4],$in2[0],$out[4]
|
---|
197 |
|
---|
198 | xxpermdi $t2,$in2[5],$in2[4],0b00
|
---|
199 | vmsumudm $out[5],$t1,$t2,$vzero
|
---|
200 | xxpermdi $t4,$in2[3],$in2[2],0b00
|
---|
201 | vmsumudm $out[5],$t3,$t4,$out[5]
|
---|
202 |
|
---|
203 | xxpermdi $t2,$in2[6],$in2[5],0b00
|
---|
204 | vmsumudm $out[6],$t1,$t2,$vzero
|
---|
205 | xxpermdi $t4,$in2[4],$in2[3],0b00
|
---|
206 | vmsumudm $out[6],$t3,$t4,$out[6]
|
---|
207 |
|
---|
208 | xxpermdi $t2,$in2[7],$in2[6],0b00
|
---|
209 | vmsumudm $out[7],$t1,$t2,$vzero
|
---|
210 | xxpermdi $t4,$in2[5],$in2[4],0b00
|
---|
211 | vmsumudm $out[7],$t3,$t4,$out[7]
|
---|
212 |
|
---|
213 | xxpermdi $t2,$in2[8],$in2[7],0b00
|
---|
214 | vmsumudm $out[8],$t1,$t2,$vzero
|
---|
215 | xxpermdi $t4,$in2[6],$in2[5],0b00
|
---|
216 | vmsumudm $out[8],$t3,$t4,$out[8]
|
---|
217 |
|
---|
218 | xxpermdi $t1,$in1[4],$in1[5],0b00
|
---|
219 | xxpermdi $t2,$in2[1],$in2[0],0b00
|
---|
220 | vmsumudm $out[5],$t1,$t2,$out[5]
|
---|
221 |
|
---|
222 | xxpermdi $t2,$in2[2],$in2[1],0b00
|
---|
223 | vmsumudm $out[6],$t1,$t2,$out[6]
|
---|
224 | vmsumudm $out[6],$in1[6],$in2[0],$out[6]
|
---|
225 |
|
---|
226 | xxpermdi $t2,$in2[3],$in2[2],0b00
|
---|
227 | vmsumudm $out[7],$t1,$t2,$out[7]
|
---|
228 | xxpermdi $t3,$in1[6],$in1[7],0b00
|
---|
229 | xxpermdi $t4,$in2[1],$in2[0],0b00
|
---|
230 | vmsumudm $out[7],$t3,$t4,$out[7]
|
---|
231 |
|
---|
232 | xxpermdi $t2,$in2[4],$in2[3],0b00
|
---|
233 | vmsumudm $out[8],$t1,$t2,$out[8]
|
---|
234 | xxpermdi $t4,$in2[2],$in2[1],0b00
|
---|
235 | vmsumudm $out[8],$t3,$t4,$out[8]
|
---|
236 | vmsumudm $out[8],$in1[8],$in2[0],$out[8]
|
---|
237 |
|
---|
238 | li $zero,0
|
---|
239 | li $one,1
|
---|
240 | mtvsrdd $t1,$one,$zero
|
---|
241 | ___
|
---|
242 |
|
---|
243 | for (my $i = 0; $i <= 8; $i++) {
|
---|
244 | $code.=<<___;
|
---|
245 | vsld $in2[$i],$in2[$i],$t1
|
---|
246 | ___
|
---|
247 | }
|
---|
248 |
|
---|
249 | $code.=<<___;
|
---|
250 |
|
---|
251 | vmsumudm $out[7],$in1[8],$in2[8],$out[7]
|
---|
252 |
|
---|
253 | xxpermdi $t2,$in2[8],$in2[7],0b00
|
---|
254 | xxpermdi $t1,$in1[7],$in1[8],0b00
|
---|
255 | vmsumudm $out[6],$t1,$t2,$out[6]
|
---|
256 |
|
---|
257 | xxpermdi $t1,$in1[6],$in1[7],0b00
|
---|
258 | vmsumudm $out[5],$t1,$t2,$out[5]
|
---|
259 | vmsumudm $out[5],$in1[8],$in2[6],$out[5]
|
---|
260 |
|
---|
261 | xxpermdi $t1,$in1[5],$in1[6],0b00
|
---|
262 | vmsumudm $out[4],$t1,$t2,$out[4]
|
---|
263 | xxpermdi $t4,$in2[6],$in2[5],0b00
|
---|
264 | xxpermdi $t3,$in1[7],$in1[8],0b00
|
---|
265 | vmsumudm $out[4],$t3,$t4,$out[4]
|
---|
266 |
|
---|
267 | xxpermdi $t1,$in1[4],$in1[5],0b00
|
---|
268 | vmsumudm $out[3],$t1,$t2,$out[3]
|
---|
269 | xxpermdi $t3,$in1[6],$in1[7],0b00
|
---|
270 | vmsumudm $out[3],$t3,$t4,$out[3]
|
---|
271 | vmsumudm $out[3],$in1[8],$in2[4],$out[3]
|
---|
272 |
|
---|
273 | xxpermdi $t1,$in1[3],$in1[4],0b00
|
---|
274 | vmsumudm $out[2],$t1,$t2,$out[2]
|
---|
275 | xxpermdi $t3,$in1[5],$in1[6],0b00
|
---|
276 | vmsumudm $out[2],$t3,$t4,$out[2]
|
---|
277 |
|
---|
278 | xxpermdi $t1,$in1[2],$in1[3],0b00
|
---|
279 | vmsumudm $out[1],$t1,$t2,$out[1]
|
---|
280 | xxpermdi $t3,$in1[4],$in1[5],0b00
|
---|
281 | vmsumudm $out[1],$t3,$t4,$out[1]
|
---|
282 |
|
---|
283 | xxpermdi $t1,$in1[1],$in1[2],0b00
|
---|
284 | vmsumudm $out[0],$t1,$t2,$out[0]
|
---|
285 | xxpermdi $t3,$in1[3],$in1[4],0b00
|
---|
286 | vmsumudm $out[0],$t3,$t4,$out[0]
|
---|
287 |
|
---|
288 | xxpermdi $t2,$in2[4],$in2[3],0b00
|
---|
289 | xxpermdi $t1,$in1[7],$in1[8],0b00
|
---|
290 | vmsumudm $out[2],$t1,$t2,$out[2]
|
---|
291 |
|
---|
292 | xxpermdi $t1,$in1[6],$in1[7],0b00
|
---|
293 | vmsumudm $out[1],$t1,$t2,$out[1]
|
---|
294 | vmsumudm $out[1],$in1[8],$in2[2],$out[1]
|
---|
295 |
|
---|
296 | xxpermdi $t1,$in1[5],$in1[6],0b00
|
---|
297 | vmsumudm $out[0],$t1,$t2,$out[0]
|
---|
298 | xxpermdi $t4,$in2[2],$in2[1],0b00
|
---|
299 | xxpermdi $t3,$in1[7],$in1[8],0b00
|
---|
300 | vmsumudm $out[0],$t3,$t4,$out[0]
|
---|
301 |
|
---|
302 | ___
|
---|
303 |
|
---|
304 | store_vrs($outp, \@out);
|
---|
305 |
|
---|
306 | pop_vrs(52, 63);
|
---|
307 |
|
---|
308 | endproc("p521_felem_mul");
|
---|
309 | }
|
---|
310 |
|
---|
311 | {
|
---|
312 | #
|
---|
313 | # p51_felem_square
|
---|
314 | #
|
---|
315 |
|
---|
316 | my ($inp) = ("r4");
|
---|
317 | my @in = map("v$_",(45..53));
|
---|
318 | my @inx2 = map("v$_",(35..43));
|
---|
319 |
|
---|
320 | startproc("p521_felem_square");
|
---|
321 |
|
---|
322 | push_vrs(52, 63);
|
---|
323 |
|
---|
324 | $code.=<<___;
|
---|
325 | vspltisw $vzero,0
|
---|
326 |
|
---|
327 | ___
|
---|
328 |
|
---|
329 | load_vrs($inp, \@in);
|
---|
330 |
|
---|
331 | $code.=<<___;
|
---|
332 | li $zero,0
|
---|
333 | li $one,1
|
---|
334 | mtvsrdd $t1,$one,$zero
|
---|
335 | ___
|
---|
336 |
|
---|
337 | for (my $i = 0; $i <= 8; $i++) {
|
---|
338 | $code.=<<___;
|
---|
339 | vsld $inx2[$i],$in[$i],$t1
|
---|
340 | ___
|
---|
341 | }
|
---|
342 |
|
---|
343 | $code.=<<___;
|
---|
344 | vmsumudm $out[0],$in[0],$in[0],$vzero
|
---|
345 |
|
---|
346 | vmsumudm $out[1],$in[0],$inx2[1],$vzero
|
---|
347 |
|
---|
348 | xxpermdi $t1,$in[0],$in[1],0b00
|
---|
349 | xxpermdi $t2,$inx2[2],$in[1],0b00
|
---|
350 | vmsumudm $out[2],$t1,$t2,$vzero
|
---|
351 |
|
---|
352 | xxpermdi $t2,$inx2[3],$inx2[2],0b00
|
---|
353 | vmsumudm $out[3],$t1,$t2,$vzero
|
---|
354 |
|
---|
355 | xxpermdi $t2,$inx2[4],$inx2[3],0b00
|
---|
356 | vmsumudm $out[4],$t1,$t2,$vzero
|
---|
357 | vmsumudm $out[4],$in[2],$in[2],$out[4]
|
---|
358 |
|
---|
359 | xxpermdi $t2,$inx2[5],$inx2[4],0b00
|
---|
360 | vmsumudm $out[5],$t1,$t2,$vzero
|
---|
361 | vmsumudm $out[5],$in[2],$inx2[3],$out[5]
|
---|
362 |
|
---|
363 | xxpermdi $t2,$inx2[6],$inx2[5],0b00
|
---|
364 | vmsumudm $out[6],$t1,$t2,$vzero
|
---|
365 | xxpermdi $t3,$in[2],$in[3],0b00
|
---|
366 | xxpermdi $t4,$inx2[4],$in[3],0b00
|
---|
367 | vmsumudm $out[6],$t3,$t4,$out[6]
|
---|
368 |
|
---|
369 | xxpermdi $t2,$inx2[7],$inx2[6],0b00
|
---|
370 | vmsumudm $out[7],$t1,$t2,$vzero
|
---|
371 | xxpermdi $t4,$inx2[5],$inx2[4],0b00
|
---|
372 | vmsumudm $out[7],$t3,$t4,$out[7]
|
---|
373 |
|
---|
374 | xxpermdi $t2,$inx2[8],$inx2[7],0b00
|
---|
375 | vmsumudm $out[8],$t1,$t2,$vzero
|
---|
376 | xxpermdi $t4,$inx2[6],$inx2[5],0b00
|
---|
377 | vmsumudm $out[8],$t3,$t4,$out[8]
|
---|
378 | vmsumudm $out[8],$in[4],$in[4],$out[8]
|
---|
379 |
|
---|
380 | vmsumudm $out[1],$in[5],$inx2[5],$out[1]
|
---|
381 |
|
---|
382 | vmsumudm $out[3],$in[6],$inx2[6],$out[3]
|
---|
383 |
|
---|
384 | vmsumudm $out[5],$in[7],$inx2[7],$out[5]
|
---|
385 |
|
---|
386 | vmsumudm $out[7],$in[8],$inx2[8],$out[7]
|
---|
387 |
|
---|
388 | mtvsrdd $t1,$one,$zero
|
---|
389 | ___
|
---|
390 |
|
---|
391 | for (my $i = 5; $i <= 8; $i++) {
|
---|
392 | $code.=<<___;
|
---|
393 | vsld $inx2[$i],$inx2[$i],$t1
|
---|
394 | ___
|
---|
395 | }
|
---|
396 |
|
---|
397 | $code.=<<___;
|
---|
398 |
|
---|
399 | vmsumudm $out[6],$in[7],$inx2[8],$out[6]
|
---|
400 |
|
---|
401 | vmsumudm $out[5],$in[6],$inx2[8],$out[5]
|
---|
402 |
|
---|
403 | xxpermdi $t2,$inx2[8],$inx2[7],0b00
|
---|
404 | xxpermdi $t1,$in[5],$in[6],0b00
|
---|
405 | vmsumudm $out[4],$t1,$t2,$out[4]
|
---|
406 |
|
---|
407 | xxpermdi $t1,$in[4],$in[5],0b00
|
---|
408 | vmsumudm $out[3],$t1,$t2,$out[3]
|
---|
409 |
|
---|
410 | xxpermdi $t1,$in[3],$in[4],0b00
|
---|
411 | vmsumudm $out[2],$t1,$t2,$out[2]
|
---|
412 | vmsumudm $out[2],$in[5],$inx2[6],$out[2]
|
---|
413 |
|
---|
414 | xxpermdi $t1,$in[2],$in[3],0b00
|
---|
415 | vmsumudm $out[1],$t1,$t2,$out[1]
|
---|
416 | vmsumudm $out[1],$in[4],$inx2[6],$out[1]
|
---|
417 |
|
---|
418 | xxpermdi $t1,$in[1],$in[2],0b00
|
---|
419 | vmsumudm $out[0],$t1,$t2,$out[0]
|
---|
420 | xxpermdi $t2,$inx2[6],$inx2[5],0b00
|
---|
421 | xxpermdi $t1,$in[3],$in[4],0b00
|
---|
422 | vmsumudm $out[0],$t1,$t2,$out[0]
|
---|
423 |
|
---|
424 | ___
|
---|
425 |
|
---|
426 | store_vrs($outp, \@out);
|
---|
427 |
|
---|
428 | pop_vrs(52, 63);
|
---|
429 |
|
---|
430 | endproc("p521_felem_square");
|
---|
431 | }
|
---|
432 | }
|
---|
433 |
|
---|
434 | $code =~ s/\`([^\`]*)\`/eval $1/gem;
|
---|
435 | print $code;
|
---|
436 | close STDOUT or die "error closing STDOUT: $!";
|
---|