VirtualBox

source: vbox/trunk/src/libs/openssl-3.3.2/crypto/md5/asm/md5-loongarch64.pl@ 108403

最後變更 在這個檔案從108403是 108206,由 vboxsync 提交於 6 週 前

openssl-3.3.2: Exported all files to OSE and removed .scm-settings ​bugref:10757

  • 屬性 svn:eol-style 設為 LF
  • 屬性 svn:executable 設為 *
  • 屬性 svn:keywords 設為 Author Date Id Revision
檔案大小: 11.0 KB
 
1#! /usr/bin/env perl
2# Author: Min Zhou <[email protected]>
3# Copyright 2023 The OpenSSL Project Authors. All Rights Reserved.
4#
5# Licensed under the OpenSSL license (the "License"). You may not use
6# this file except in compliance with the License. You can obtain a copy
7# in the file LICENSE in the source distribution or at
8# https://www.openssl.org/source/license.html
9
10# Reference to crypto/md5/asm/md5-x86_64.pl
11# MD5 optimized for LoongArch.
12
13use strict;
14
15my $code;
16
17my ($zero,$ra,$tp,$sp,$fp)=map("\$r$_",(0..3,22));
18my ($a0,$a1,$a2,$a3,$a4,$a5,$a6,$a7)=map("\$r$_",(4..11));
19my ($t0,$t1,$t2,$t3,$t4,$t5,$t6,$t7,$t8,$x)=map("\$r$_",(12..21));
20
21my $output;
22for (@ARGV) { $output=$_ if (/\w[\w\-]*\.\w+$/); }
23open STDOUT,">$output";
24
25# round1_step() does:
26# dst = x + ((dst + F(x,y,z) + X[k] + T_i) <<< s)
27# $t1 = y ^ z
28# $t2 = dst + X[k_next]
29sub round1_step
30{
31 my ($pos, $dst, $x, $y, $z, $k_next, $T_i, $s) = @_;
32 my $T_i_h = ($T_i & 0xfffff000) >> 12;
33 my $T_i_l = $T_i & 0xfff;
34
35# In LoongArch we have to use two instructions of lu12i.w and ori to load a
36# 32-bit immediate into a general register. Meanwhile, the instruction lu12i.w
37# treats the 20-bit immediate as a signed number. So if the T_i_h is greater
38# than or equal to (1<<19), we need provide lu12i.w a corresponding negative
39# number whose complement equals to the sign extension of T_i_h.
40
41# The details of the instruction lu12i.w can be found as following:
42# https://loongson.github.io/LoongArch-Documentation/LoongArch-Vol1-EN.html#_lu12i_w_lu32i_d_lu52i_d
43
44 $T_i_h = -((1<<32) - (0xfff00000 | $T_i_h)) if ($T_i_h >= (1<<19));
45
46 $code .= " ld.w $t0,$a1,0 /* (NEXT STEP) X[0] */\n" if ($pos == -1);
47 $code .= " xor $t1,$y,$z /* y ^ z */\n" if ($pos == -1);
48 $code .= " add.w $t2,$dst,$t0 /* dst + X[k] */\n" if ($pos == -1);
49 $code .= <<EOF;
50 lu12i.w $t8,$T_i_h /* load bits [31:12] of constant */
51 and $t1,$x,$t1 /* x & ... */
52 ori $t8,$t8,$T_i_l /* load bits [11:0] of constant */
53 xor $t1,$z,$t1 /* z ^ ... */
54 add.w $t7,$t2,$t8 /* dst + X[k] + Const */
55 ld.w $t0,$a1,$k_next*4 /* (NEXT STEP) X[$k_next] */
56 add.w $dst,$t7,$t1 /* dst += ... */
57 add.w $t2,$z,$t0 /* (NEXT STEP) dst + X[$k_next] */
58EOF
59
60 $code .= " rotri.w $dst,$dst,32-$s /* dst <<< s */\n";
61 if ($pos != 1) {
62 $code .= " xor $t1,$x,$y /* (NEXT STEP) y ^ z */\n";
63 } else {
64 $code .= " move $t0,$a7 /* (NEXT ROUND) $t0 = z' (copy of z) */\n";
65 $code .= " nor $t1,$zero,$a7 /* (NEXT ROUND) $t1 = not z' (copy of not z) */\n";
66 }
67 $code .= " add.w $dst,$dst,$x /* dst += x */\n";
68}
69
70# round2_step() does:
71# dst = x + ((dst + G(x,y,z) + X[k] + T_i) <<< s)
72# $t0 = z' (copy of z for the next step)
73# $t1 = not z' (copy of not z for the next step)
74# $t2 = dst + X[k_next]
75sub round2_step
76{
77 my ($pos, $dst, $x, $y, $z, $k_next, $T_i, $s) = @_;
78 my $T_i_h = ($T_i & 0xfffff000) >> 12;
79 my $T_i_l = $T_i & 0xfff;
80 $T_i_h = -((1<<32) - (0xfff00000 | $T_i_h)) if ($T_i_h >= (1<<19));
81
82 $code .= <<EOF;
83 lu12i.w $t8,$T_i_h /* load bits [31:12] of Constant */
84 and $t0,$x,$t0 /* x & z */
85 ori $t8,$t8,$T_i_l /* load bits [11:0] of Constant */
86 and $t1,$y,$t1 /* y & (not z) */
87 add.w $t7,$t2,$t8 /* dst + X[k] + Const */
88 or $t1,$t0,$t1 /* (y & (not z)) | (x & z) */
89 ld.w $t0,$a1,$k_next*4 /* (NEXT STEP) X[$k_next] */
90 add.w $dst,$t7,$t1 /* dst += ... */
91 add.w $t2,$z,$t0 /* (NEXT STEP) dst + X[$k_next] */
92EOF
93
94 $code .= " rotri.w $dst,$dst,32-$s /* dst <<< s */\n";
95 if ($pos != 1) {
96 $code .= " move $t0,$y /* (NEXT STEP) z' = $y */\n";
97 $code .= " nor $t1,$zero,$y /* (NEXT STEP) not z' = not $y */\n";
98 } else {
99 $code .= " xor $t1,$a6,$a7 /* (NEXT ROUND) $t1 = y ^ z */\n";
100 }
101 $code .= " add.w $dst,$dst,$x /* dst += x */\n";
102}
103
104# round3_step() does:
105# dst = x + ((dst + H(x,y,z) + X[k] + T_i) <<< s)
106# $t1 = y ^ z
107# $t2 = dst + X[k_next]
108sub round3_step
109{
110 my ($pos, $dst, $x, $y, $z, $k_next, $T_i, $s) = @_;
111 my $T_i_h = ($T_i & 0xfffff000) >> 12;
112 my $T_i_l = $T_i & 0xfff;
113 $T_i_h = -((1<<32) - (0xfff00000 | $T_i_h)) if ($T_i_h >= (1<<19));
114
115 $code .= <<EOF;
116 lu12i.w $t8,$T_i_h /* load bits [31:12] of Constant */
117 xor $t1,$x,$t1 /* x ^ ... */
118 ori $t8,$t8,$T_i_l /* load bits [11:0] of Constant */
119 add.w $t7,$t2,$t8 /* dst + X[k] + Const */
120 ld.w $t0,$a1,$k_next*4 /* (NEXT STEP) X[$k_next] */
121 add.w $dst,$t7,$t1 /* dst += ... */
122 add.w $t2,$z,$t0 /* (NEXT STEP) dst + X[$k_next] */
123EOF
124
125 $code .= " rotri.w $dst,$dst,32-$s /* dst <<< s */\n";
126 if ($pos != 1) {
127 $code .= " xor $t1,$x,$y /* (NEXT STEP) y ^ z */\n";
128 } else {
129 $code .= " nor $t1,$zero,$a7 /* (NEXT ROUND) $t1 = not z */\n";
130 }
131 $code .= " add.w $dst,$dst,$x /* dst += x */\n";
132}
133
134# round4_step() does:
135# dst = x + ((dst + I(x,y,z) + X[k] + T_i) <<< s)
136# $t1 = not z' (copy of not z for the next step)
137# $t2 = dst + X[k_next]
138sub round4_step
139{
140 my ($pos, $dst, $x, $y, $z, $k_next, $T_i, $s) = @_;
141 my $T_i_h = ($T_i & 0xfffff000) >> 12;
142 my $T_i_l = $T_i & 0xfff;
143 $T_i_h = -((1<<32) - (0xfff00000 | $T_i_h)) if ($T_i_h >= (1<<19));
144
145 $code .= <<EOF;
146 lu12i.w $t8,$T_i_h /* load bits [31:12] of Constant */
147 or $t1,$x,$t1 /* x | ... */
148 ori $t8,$t8,$T_i_l /* load bits [11:0] of Constant */
149 xor $t1,$y,$t1 /* y ^ ... */
150 add.w $t7,$t2,$t8 /* dst + X[k] + Const */
151EOF
152
153 if ($pos != 1) {
154 $code .= " ld.w $t0,$a1,$k_next*4 /* (NEXT STEP) X[$k_next] */\n";
155 $code .= " add.w $dst,$t7,$t1 /* dst += ... */\n";
156 $code .= " add.w $t2,$z,$t0 /* (NEXT STEP) dst + X[$k_next] */\n";
157 $code .= " rotri.w $dst,$dst,32-$s /* dst <<< s */\n";
158 $code .= " nor $t1,$zero,$y /* (NEXT STEP) not z' = not $y */\n";
159 $code .= " add.w $dst,$dst,$x /* dst += x */\n";
160 } else {
161 $code .= " add.w $a4,$t3,$a4 /* (NEXT LOOP) add old value of A */\n";
162 $code .= " add.w $dst,$t7,$t1 /* dst += ... */\n";
163 $code .= " add.w $a7,$t6,$a7 /* (NEXT LOOP) add old value of D */\n";
164 $code .= " rotri.w $dst,$dst,32-$s /* dst <<< s */\n";
165 $code .= " addi.d $a1,$a1,64 /* (NEXT LOOP) ptr += 64 */\n";
166 $code .= " add.w $dst,$dst,$x /* dst += x */\n";
167 }
168}
169
170$code .= <<EOF;
171.text
172
173.globl ossl_md5_block_asm_data_order
174.type ossl_md5_block_asm_data_order function
175ossl_md5_block_asm_data_order:
176 # $a0 = arg #1 (ctx, MD5_CTX pointer)
177 # $a1 = arg #2 (ptr, data pointer)
178 # $a2 = arg #3 (nbr, number of 16-word blocks to process)
179 beqz $a2,.Lend # cmp nbr with 0, jmp if nbr == 0
180
181 # ptr is '$a1'
182 # end is '$a3'
183 slli.d $t0,$a2,6
184 add.d $a3,$a1,$t0
185
186 # A is '$a4'
187 # B is '$a5'
188 # C is '$a6'
189 # D is '$a7'
190 ld.w $a4,$a0,0 # a4 = ctx->A
191 ld.w $a5,$a0,4 # a5 = ctx->B
192 ld.w $a6,$a0,8 # a6 = ctx->C
193 ld.w $a7,$a0,12 # a7 = ctx->D
194
195# BEGIN of loop over 16-word blocks
196.align 6
197.Lloop:
198 # save old values of A, B, C, D
199 move $t3,$a4
200 move $t4,$a5
201 move $t5,$a6
202 move $t6,$a7
203
204 preld 0,$a1,0
205 preld 0,$a1,64
206EOF
207
208round1_step(-1, $a4, $a5, $a6, $a7, '1', 0xd76aa478, '7');
209round1_step(0, $a7, $a4, $a5, $a6, '2', 0xe8c7b756, '12');
210round1_step(0, $a6, $a7, $a4, $a5, '3', 0x242070db, '17');
211round1_step(0, $a5, $a6, $a7, $a4, '4', 0xc1bdceee, '22');
212round1_step(0, $a4, $a5, $a6, $a7, '5', 0xf57c0faf, '7');
213round1_step(0, $a7, $a4, $a5, $a6, '6', 0x4787c62a, '12');
214round1_step(0, $a6, $a7, $a4, $a5, '7', 0xa8304613, '17');
215round1_step(0, $a5, $a6, $a7, $a4, '8', 0xfd469501, '22');
216round1_step(0, $a4, $a5, $a6, $a7, '9', 0x698098d8, '7');
217round1_step(0, $a7, $a4, $a5, $a6, '10', 0x8b44f7af, '12');
218round1_step(0, $a6, $a7, $a4, $a5, '11', 0xffff5bb1, '17');
219round1_step(0, $a5, $a6, $a7, $a4, '12', 0x895cd7be, '22');
220round1_step(0, $a4, $a5, $a6, $a7, '13', 0x6b901122, '7');
221round1_step(0, $a7, $a4, $a5, $a6, '14', 0xfd987193, '12');
222round1_step(0, $a6, $a7, $a4, $a5, '15', 0xa679438e, '17');
223round1_step(1, $a5, $a6, $a7, $a4, '1', 0x49b40821, '22');
224
225round2_step(-1, $a4, $a5, $a6, $a7, '6', 0xf61e2562, '5');
226round2_step(0, $a7, $a4, $a5, $a6, '11', 0xc040b340, '9');
227round2_step(0, $a6, $a7, $a4, $a5, '0', 0x265e5a51, '14');
228round2_step(0, $a5, $a6, $a7, $a4, '5', 0xe9b6c7aa, '20');
229round2_step(0, $a4, $a5, $a6, $a7, '10', 0xd62f105d, '5');
230round2_step(0, $a7, $a4, $a5, $a6, '15', 0x2441453, '9');
231round2_step(0, $a6, $a7, $a4, $a5, '4', 0xd8a1e681, '14');
232round2_step(0, $a5, $a6, $a7, $a4, '9', 0xe7d3fbc8, '20');
233round2_step(0, $a4, $a5, $a6, $a7, '14', 0x21e1cde6, '5');
234round2_step(0, $a7, $a4, $a5, $a6, '3', 0xc33707d6, '9');
235round2_step(0, $a6, $a7, $a4, $a5, '8', 0xf4d50d87, '14');
236round2_step(0, $a5, $a6, $a7, $a4, '13', 0x455a14ed, '20');
237round2_step(0, $a4, $a5, $a6, $a7, '2', 0xa9e3e905, '5');
238round2_step(0, $a7, $a4, $a5, $a6, '7', 0xfcefa3f8, '9');
239round2_step(0, $a6, $a7, $a4, $a5, '12', 0x676f02d9, '14');
240round2_step(1, $a5, $a6, $a7, $a4, '5', 0x8d2a4c8a, '20');
241
242round3_step(-1, $a4, $a5, $a6, $a7, '8', 0xfffa3942, '4');
243round3_step(0, $a7, $a4, $a5, $a6, '11', 0x8771f681, '11');
244round3_step(0, $a6, $a7, $a4, $a5, '14', 0x6d9d6122, '16');
245round3_step(0, $a5, $a6, $a7, $a4, '1', 0xfde5380c, '23');
246round3_step(0, $a4, $a5, $a6, $a7, '4', 0xa4beea44, '4');
247round3_step(0, $a7, $a4, $a5, $a6, '7', 0x4bdecfa9, '11');
248round3_step(0, $a6, $a7, $a4, $a5, '10', 0xf6bb4b60, '16');
249round3_step(0, $a5, $a6, $a7, $a4, '13', 0xbebfbc70, '23');
250round3_step(0, $a4, $a5, $a6, $a7, '0', 0x289b7ec6, '4');
251round3_step(0, $a7, $a4, $a5, $a6, '3', 0xeaa127fa, '11');
252round3_step(0, $a6, $a7, $a4, $a5, '6', 0xd4ef3085, '16');
253round3_step(0, $a5, $a6, $a7, $a4, '9', 0x4881d05, '23');
254round3_step(0, $a4, $a5, $a6, $a7, '12', 0xd9d4d039, '4');
255round3_step(0, $a7, $a4, $a5, $a6, '15', 0xe6db99e5, '11');
256round3_step(0, $a6, $a7, $a4, $a5, '2', 0x1fa27cf8, '16');
257round3_step(1, $a5, $a6, $a7, $a4, '0', 0xc4ac5665, '23');
258
259round4_step(-1, $a4, $a5, $a6, $a7, '7', 0xf4292244, '6');
260round4_step(0, $a7, $a4, $a5, $a6, '14', 0x432aff97, '10');
261round4_step(0, $a6, $a7, $a4, $a5, '5', 0xab9423a7, '15');
262round4_step(0, $a5, $a6, $a7, $a4, '12', 0xfc93a039, '21');
263round4_step(0, $a4, $a5, $a6, $a7, '3', 0x655b59c3, '6');
264round4_step(0, $a7, $a4, $a5, $a6, '10', 0x8f0ccc92, '10');
265round4_step(0, $a6, $a7, $a4, $a5, '1', 0xffeff47d, '15');
266round4_step(0, $a5, $a6, $a7, $a4, '8', 0x85845dd1, '21');
267round4_step(0, $a4, $a5, $a6, $a7, '15', 0x6fa87e4f, '6');
268round4_step(0, $a7, $a4, $a5, $a6, '6', 0xfe2ce6e0, '10');
269round4_step(0, $a6, $a7, $a4, $a5, '13', 0xa3014314, '15');
270round4_step(0, $a5, $a6, $a7, $a4, '4', 0x4e0811a1, '21');
271round4_step(0, $a4, $a5, $a6, $a7, '11', 0xf7537e82, '6');
272round4_step(0, $a7, $a4, $a5, $a6, '2', 0xbd3af235, '10');
273round4_step(0, $a6, $a7, $a4, $a5, '9', 0x2ad7d2bb, '15');
274round4_step(1, $a5, $a6, $a7, $a4, '0', 0xeb86d391, '21');
275
276$code .= <<EOF;
277 # add old values of B, C
278 add.w $a5,$t4,$a5
279 add.w $a6,$t5,$a6
280
281 bltu $a1,$a3,.Lloop # jmp if ptr < end
282
283 st.w $a4,$a0,0 # ctx->A = A
284 st.w $a5,$a0,4 # ctx->B = B
285 st.w $a6,$a0,8 # ctx->C = C
286 st.w $a7,$a0,12 # ctx->D = D
287
288.Lend:
289 jr $ra
290.size ossl_md5_block_asm_data_order,.-ossl_md5_block_asm_data_order
291EOF
292
293$code =~ s/\`([^\`]*)\`/eval($1)/gem;
294
295print $code;
296
297close STDOUT;
注意: 瀏覽 TracBrowser 來幫助您使用儲存庫瀏覽器

© 2025 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette