VirtualBox

source: vbox/trunk/src/libs/openssl-3.0.1/crypto/bn/asm/c64xplus-gf2m.pl@ 94081

最後變更 在這個檔案從94081是 91772,由 vboxsync 提交於 3 年 前

openssl-1.1.1l: Applied and adjusted our OpenSSL changes to 1.1.1l. bugref:10126

檔案大小: 4.0 KB
 
1#! /usr/bin/env perl
2# Copyright 2012-2020 The OpenSSL Project Authors. All Rights Reserved.
3#
4# Licensed under the OpenSSL license (the "License"). You may not use
5# this file except in compliance with the License. You can obtain a copy
6# in the file LICENSE in the source distribution or at
7# https://www.openssl.org/source/license.html
8
9#
10# ====================================================================
11# Written by Andy Polyakov <[email protected]> for the OpenSSL
12# project. The module is, however, dual licensed under OpenSSL and
13# CRYPTOGAMS licenses depending on where you obtain it. For further
14# details see http://www.openssl.org/~appro/cryptogams/.
15# ====================================================================
16#
17# February 2012
18#
19# The module implements bn_GF2m_mul_2x2 polynomial multiplication
20# used in bn_gf2m.c. It's kind of low-hanging mechanical port from
21# C for the time being... The subroutine runs in 37 cycles, which is
22# 4.5x faster than compiler-generated code. Though comparison is
23# totally unfair, because this module utilizes Galois Field Multiply
24# instruction.
25
26while (($output=shift) && ($output!~/\w[\w\-]*\.\w+$/)) {}
27open STDOUT,">$output";
28
29($rp,$a1,$a0,$b1,$b0)=("A4","B4","A6","B6","A8"); # argument vector
30
31($Alo,$Alox0,$Alox1,$Alox2,$Alox3)=map("A$_",(16..20));
32($Ahi,$Ahix0,$Ahix1,$Ahix2,$Ahix3)=map("B$_",(16..20));
33($B_0,$B_1,$B_2,$B_3)=("B5","A5","A7","B7");
34($A,$B)=($Alo,$B_1);
35$xFF="B1";
36
37sub mul_1x1_upper {
38my ($A,$B)=@_;
39$code.=<<___;
40 EXTU $B,8,24,$B_2 ; smash $B to 4 bytes
41|| AND $B,$xFF,$B_0
42|| SHRU $B,24,$B_3
43 SHRU $A,16, $Ahi ; smash $A to two halfwords
44|| EXTU $A,16,16,$Alo
45
46 XORMPY $Alo,$B_2,$Alox2 ; 16x8 bits multiplication
47|| XORMPY $Ahi,$B_2,$Ahix2
48|| EXTU $B,16,24,$B_1
49 XORMPY $Alo,$B_0,$Alox0
50|| XORMPY $Ahi,$B_0,$Ahix0
51 XORMPY $Alo,$B_3,$Alox3
52|| XORMPY $Ahi,$B_3,$Ahix3
53 XORMPY $Alo,$B_1,$Alox1
54|| XORMPY $Ahi,$B_1,$Ahix1
55___
56}
57sub mul_1x1_merged {
58my ($OUTlo,$OUThi,$A,$B)=@_;
59$code.=<<___;
60 EXTU $B,8,24,$B_2 ; smash $B to 4 bytes
61|| AND $B,$xFF,$B_0
62|| SHRU $B,24,$B_3
63 SHRU $A,16, $Ahi ; smash $A to two halfwords
64|| EXTU $A,16,16,$Alo
65
66 XOR $Ahix0,$Alox2,$Ahix0
67|| MV $Ahix2,$OUThi
68|| XORMPY $Alo,$B_2,$Alox2
69 XORMPY $Ahi,$B_2,$Ahix2
70|| EXTU $B,16,24,$B_1
71|| XORMPY $Alo,$B_0,A1 ; $Alox0
72 XOR $Ahix1,$Alox3,$Ahix1
73|| SHL $Ahix0,16,$OUTlo
74|| SHRU $Ahix0,16,$Ahix0
75 XOR $Alox0,$OUTlo,$OUTlo
76|| XOR $Ahix0,$OUThi,$OUThi
77|| XORMPY $Ahi,$B_0,$Ahix0
78|| XORMPY $Alo,$B_3,$Alox3
79|| SHL $Alox1,8,$Alox1
80|| SHL $Ahix3,8,$Ahix3
81 XOR $Alox1,$OUTlo,$OUTlo
82|| XOR $Ahix3,$OUThi,$OUThi
83|| XORMPY $Ahi,$B_3,$Ahix3
84|| SHL $Ahix1,24,$Alox1
85|| SHRU $Ahix1,8, $Ahix1
86 XOR $Alox1,$OUTlo,$OUTlo
87|| XOR $Ahix1,$OUThi,$OUThi
88|| XORMPY $Alo,$B_1,$Alox1
89|| XORMPY $Ahi,$B_1,$Ahix1
90|| MV A1,$Alox0
91___
92}
93sub mul_1x1_lower {
94my ($OUTlo,$OUThi)=@_;
95$code.=<<___;
96 ;NOP
97 XOR $Ahix0,$Alox2,$Ahix0
98|| MV $Ahix2,$OUThi
99 NOP
100 XOR $Ahix1,$Alox3,$Ahix1
101|| SHL $Ahix0,16,$OUTlo
102|| SHRU $Ahix0,16,$Ahix0
103 XOR $Alox0,$OUTlo,$OUTlo
104|| XOR $Ahix0,$OUThi,$OUThi
105|| SHL $Alox1,8,$Alox1
106|| SHL $Ahix3,8,$Ahix3
107 XOR $Alox1,$OUTlo,$OUTlo
108|| XOR $Ahix3,$OUThi,$OUThi
109|| SHL $Ahix1,24,$Alox1
110|| SHRU $Ahix1,8, $Ahix1
111 XOR $Alox1,$OUTlo,$OUTlo
112|| XOR $Ahix1,$OUThi,$OUThi
113___
114}
115$code.=<<___;
116 .text
117
118 .if .ASSEMBLER_VERSION<7000000
119 .asg 0,__TI_EABI__
120 .endif
121 .if __TI_EABI__
122 .asg bn_GF2m_mul_2x2,_bn_GF2m_mul_2x2
123 .endif
124
125 .global _bn_GF2m_mul_2x2
126_bn_GF2m_mul_2x2:
127 .asmfunc
128 MVK 0xFF,$xFF
129___
130 &mul_1x1_upper($a0,$b0); # a0·b0
131$code.=<<___;
132|| MV $b1,$B
133 MV $a1,$A
134___
135 &mul_1x1_merged("A28","B28",$A,$B); # a0·b0/a1·b1
136$code.=<<___;
137|| XOR $b0,$b1,$B
138 XOR $a0,$a1,$A
139___
140 &mul_1x1_merged("A31","B31",$A,$B); # a1·b1/(a0+a1)·(b0+b1)
141$code.=<<___;
142 XOR A28,A31,A29
143|| XOR B28,B31,B29 ; a0·b0+a1·b1
144___
145 &mul_1x1_lower("A30","B30"); # (a0+a1)·(b0+b1)
146$code.=<<___;
147|| BNOP B3
148 XOR A29,A30,A30
149|| XOR B29,B30,B30 ; (a0+a1)·(b0+b1)-a0·b0-a1·b1
150 XOR B28,A30,A30
151|| STW A28,*${rp}[0]
152 XOR B30,A31,A31
153|| STW A30,*${rp}[1]
154 STW A31,*${rp}[2]
155 STW B31,*${rp}[3]
156 .endasmfunc
157___
158
159print $code;
160close STDOUT or die "error closing STDOUT: $!";
注意: 瀏覽 TracBrowser 來幫助您使用儲存庫瀏覽器

© 2025 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette