|
1 /* |
|
2 * include/asm-generic/xor.h |
|
3 * |
|
4 * Generic optimized RAID-5 checksumming functions. |
|
5 * |
|
6 * This program is free software; you can redistribute it and/or modify |
|
7 * it under the terms of the GNU General Public License as published by |
|
8 * the Free Software Foundation; either version 2, or (at your option) |
|
9 * any later version. |
|
10 * |
|
11 * You should have received a copy of the GNU General Public License |
|
12 * (for example /usr/src/linux/COPYING); if not, write to the Free |
|
13 * Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. |
|
14 */ |
|
15 |
|
16 #include <asm/processor.h> |
|
17 |
|
18 static void |
|
19 xor_8regs_2(unsigned long bytes, unsigned long *p1, unsigned long *p2) |
|
20 { |
|
21 long lines = bytes / (sizeof (long)) / 8; |
|
22 |
|
23 do { |
|
24 p1[0] ^= p2[0]; |
|
25 p1[1] ^= p2[1]; |
|
26 p1[2] ^= p2[2]; |
|
27 p1[3] ^= p2[3]; |
|
28 p1[4] ^= p2[4]; |
|
29 p1[5] ^= p2[5]; |
|
30 p1[6] ^= p2[6]; |
|
31 p1[7] ^= p2[7]; |
|
32 p1 += 8; |
|
33 p2 += 8; |
|
34 } while (--lines > 0); |
|
35 } |
|
36 |
|
37 static void |
|
38 xor_8regs_3(unsigned long bytes, unsigned long *p1, unsigned long *p2, |
|
39 unsigned long *p3) |
|
40 { |
|
41 long lines = bytes / (sizeof (long)) / 8; |
|
42 |
|
43 do { |
|
44 p1[0] ^= p2[0] ^ p3[0]; |
|
45 p1[1] ^= p2[1] ^ p3[1]; |
|
46 p1[2] ^= p2[2] ^ p3[2]; |
|
47 p1[3] ^= p2[3] ^ p3[3]; |
|
48 p1[4] ^= p2[4] ^ p3[4]; |
|
49 p1[5] ^= p2[5] ^ p3[5]; |
|
50 p1[6] ^= p2[6] ^ p3[6]; |
|
51 p1[7] ^= p2[7] ^ p3[7]; |
|
52 p1 += 8; |
|
53 p2 += 8; |
|
54 p3 += 8; |
|
55 } while (--lines > 0); |
|
56 } |
|
57 |
|
58 static void |
|
59 xor_8regs_4(unsigned long bytes, unsigned long *p1, unsigned long *p2, |
|
60 unsigned long *p3, unsigned long *p4) |
|
61 { |
|
62 long lines = bytes / (sizeof (long)) / 8; |
|
63 |
|
64 do { |
|
65 p1[0] ^= p2[0] ^ p3[0] ^ p4[0]; |
|
66 p1[1] ^= p2[1] ^ p3[1] ^ p4[1]; |
|
67 p1[2] ^= p2[2] ^ p3[2] ^ p4[2]; |
|
68 p1[3] ^= p2[3] ^ p3[3] ^ p4[3]; |
|
69 p1[4] ^= p2[4] ^ p3[4] ^ p4[4]; |
|
70 p1[5] ^= p2[5] ^ p3[5] ^ p4[5]; |
|
71 p1[6] ^= p2[6] ^ p3[6] ^ p4[6]; |
|
72 p1[7] ^= p2[7] ^ p3[7] ^ p4[7]; |
|
73 p1 += 8; |
|
74 p2 += 8; |
|
75 p3 += 8; |
|
76 p4 += 8; |
|
77 } while (--lines > 0); |
|
78 } |
|
79 |
|
80 static void |
|
81 xor_8regs_5(unsigned long bytes, unsigned long *p1, unsigned long *p2, |
|
82 unsigned long *p3, unsigned long *p4, unsigned long *p5) |
|
83 { |
|
84 long lines = bytes / (sizeof (long)) / 8; |
|
85 |
|
86 do { |
|
87 p1[0] ^= p2[0] ^ p3[0] ^ p4[0] ^ p5[0]; |
|
88 p1[1] ^= p2[1] ^ p3[1] ^ p4[1] ^ p5[1]; |
|
89 p1[2] ^= p2[2] ^ p3[2] ^ p4[2] ^ p5[2]; |
|
90 p1[3] ^= p2[3] ^ p3[3] ^ p4[3] ^ p5[3]; |
|
91 p1[4] ^= p2[4] ^ p3[4] ^ p4[4] ^ p5[4]; |
|
92 p1[5] ^= p2[5] ^ p3[5] ^ p4[5] ^ p5[5]; |
|
93 p1[6] ^= p2[6] ^ p3[6] ^ p4[6] ^ p5[6]; |
|
94 p1[7] ^= p2[7] ^ p3[7] ^ p4[7] ^ p5[7]; |
|
95 p1 += 8; |
|
96 p2 += 8; |
|
97 p3 += 8; |
|
98 p4 += 8; |
|
99 p5 += 8; |
|
100 } while (--lines > 0); |
|
101 } |
|
102 |
|
103 static void |
|
104 xor_32regs_2(unsigned long bytes, unsigned long *p1, unsigned long *p2) |
|
105 { |
|
106 long lines = bytes / (sizeof (long)) / 8; |
|
107 |
|
108 do { |
|
109 register long d0, d1, d2, d3, d4, d5, d6, d7; |
|
110 d0 = p1[0]; /* Pull the stuff into registers */ |
|
111 d1 = p1[1]; /* ... in bursts, if possible. */ |
|
112 d2 = p1[2]; |
|
113 d3 = p1[3]; |
|
114 d4 = p1[4]; |
|
115 d5 = p1[5]; |
|
116 d6 = p1[6]; |
|
117 d7 = p1[7]; |
|
118 d0 ^= p2[0]; |
|
119 d1 ^= p2[1]; |
|
120 d2 ^= p2[2]; |
|
121 d3 ^= p2[3]; |
|
122 d4 ^= p2[4]; |
|
123 d5 ^= p2[5]; |
|
124 d6 ^= p2[6]; |
|
125 d7 ^= p2[7]; |
|
126 p1[0] = d0; /* Store the result (in bursts) */ |
|
127 p1[1] = d1; |
|
128 p1[2] = d2; |
|
129 p1[3] = d3; |
|
130 p1[4] = d4; |
|
131 p1[5] = d5; |
|
132 p1[6] = d6; |
|
133 p1[7] = d7; |
|
134 p1 += 8; |
|
135 p2 += 8; |
|
136 } while (--lines > 0); |
|
137 } |
|
138 |
|
139 static void |
|
140 xor_32regs_3(unsigned long bytes, unsigned long *p1, unsigned long *p2, |
|
141 unsigned long *p3) |
|
142 { |
|
143 long lines = bytes / (sizeof (long)) / 8; |
|
144 |
|
145 do { |
|
146 register long d0, d1, d2, d3, d4, d5, d6, d7; |
|
147 d0 = p1[0]; /* Pull the stuff into registers */ |
|
148 d1 = p1[1]; /* ... in bursts, if possible. */ |
|
149 d2 = p1[2]; |
|
150 d3 = p1[3]; |
|
151 d4 = p1[4]; |
|
152 d5 = p1[5]; |
|
153 d6 = p1[6]; |
|
154 d7 = p1[7]; |
|
155 d0 ^= p2[0]; |
|
156 d1 ^= p2[1]; |
|
157 d2 ^= p2[2]; |
|
158 d3 ^= p2[3]; |
|
159 d4 ^= p2[4]; |
|
160 d5 ^= p2[5]; |
|
161 d6 ^= p2[6]; |
|
162 d7 ^= p2[7]; |
|
163 d0 ^= p3[0]; |
|
164 d1 ^= p3[1]; |
|
165 d2 ^= p3[2]; |
|
166 d3 ^= p3[3]; |
|
167 d4 ^= p3[4]; |
|
168 d5 ^= p3[5]; |
|
169 d6 ^= p3[6]; |
|
170 d7 ^= p3[7]; |
|
171 p1[0] = d0; /* Store the result (in bursts) */ |
|
172 p1[1] = d1; |
|
173 p1[2] = d2; |
|
174 p1[3] = d3; |
|
175 p1[4] = d4; |
|
176 p1[5] = d5; |
|
177 p1[6] = d6; |
|
178 p1[7] = d7; |
|
179 p1 += 8; |
|
180 p2 += 8; |
|
181 p3 += 8; |
|
182 } while (--lines > 0); |
|
183 } |
|
184 |
|
185 static void |
|
186 xor_32regs_4(unsigned long bytes, unsigned long *p1, unsigned long *p2, |
|
187 unsigned long *p3, unsigned long *p4) |
|
188 { |
|
189 long lines = bytes / (sizeof (long)) / 8; |
|
190 |
|
191 do { |
|
192 register long d0, d1, d2, d3, d4, d5, d6, d7; |
|
193 d0 = p1[0]; /* Pull the stuff into registers */ |
|
194 d1 = p1[1]; /* ... in bursts, if possible. */ |
|
195 d2 = p1[2]; |
|
196 d3 = p1[3]; |
|
197 d4 = p1[4]; |
|
198 d5 = p1[5]; |
|
199 d6 = p1[6]; |
|
200 d7 = p1[7]; |
|
201 d0 ^= p2[0]; |
|
202 d1 ^= p2[1]; |
|
203 d2 ^= p2[2]; |
|
204 d3 ^= p2[3]; |
|
205 d4 ^= p2[4]; |
|
206 d5 ^= p2[5]; |
|
207 d6 ^= p2[6]; |
|
208 d7 ^= p2[7]; |
|
209 d0 ^= p3[0]; |
|
210 d1 ^= p3[1]; |
|
211 d2 ^= p3[2]; |
|
212 d3 ^= p3[3]; |
|
213 d4 ^= p3[4]; |
|
214 d5 ^= p3[5]; |
|
215 d6 ^= p3[6]; |
|
216 d7 ^= p3[7]; |
|
217 d0 ^= p4[0]; |
|
218 d1 ^= p4[1]; |
|
219 d2 ^= p4[2]; |
|
220 d3 ^= p4[3]; |
|
221 d4 ^= p4[4]; |
|
222 d5 ^= p4[5]; |
|
223 d6 ^= p4[6]; |
|
224 d7 ^= p4[7]; |
|
225 p1[0] = d0; /* Store the result (in bursts) */ |
|
226 p1[1] = d1; |
|
227 p1[2] = d2; |
|
228 p1[3] = d3; |
|
229 p1[4] = d4; |
|
230 p1[5] = d5; |
|
231 p1[6] = d6; |
|
232 p1[7] = d7; |
|
233 p1 += 8; |
|
234 p2 += 8; |
|
235 p3 += 8; |
|
236 p4 += 8; |
|
237 } while (--lines > 0); |
|
238 } |
|
239 |
|
240 static void |
|
241 xor_32regs_5(unsigned long bytes, unsigned long *p1, unsigned long *p2, |
|
242 unsigned long *p3, unsigned long *p4, unsigned long *p5) |
|
243 { |
|
244 long lines = bytes / (sizeof (long)) / 8; |
|
245 |
|
246 do { |
|
247 register long d0, d1, d2, d3, d4, d5, d6, d7; |
|
248 d0 = p1[0]; /* Pull the stuff into registers */ |
|
249 d1 = p1[1]; /* ... in bursts, if possible. */ |
|
250 d2 = p1[2]; |
|
251 d3 = p1[3]; |
|
252 d4 = p1[4]; |
|
253 d5 = p1[5]; |
|
254 d6 = p1[6]; |
|
255 d7 = p1[7]; |
|
256 d0 ^= p2[0]; |
|
257 d1 ^= p2[1]; |
|
258 d2 ^= p2[2]; |
|
259 d3 ^= p2[3]; |
|
260 d4 ^= p2[4]; |
|
261 d5 ^= p2[5]; |
|
262 d6 ^= p2[6]; |
|
263 d7 ^= p2[7]; |
|
264 d0 ^= p3[0]; |
|
265 d1 ^= p3[1]; |
|
266 d2 ^= p3[2]; |
|
267 d3 ^= p3[3]; |
|
268 d4 ^= p3[4]; |
|
269 d5 ^= p3[5]; |
|
270 d6 ^= p3[6]; |
|
271 d7 ^= p3[7]; |
|
272 d0 ^= p4[0]; |
|
273 d1 ^= p4[1]; |
|
274 d2 ^= p4[2]; |
|
275 d3 ^= p4[3]; |
|
276 d4 ^= p4[4]; |
|
277 d5 ^= p4[5]; |
|
278 d6 ^= p4[6]; |
|
279 d7 ^= p4[7]; |
|
280 d0 ^= p5[0]; |
|
281 d1 ^= p5[1]; |
|
282 d2 ^= p5[2]; |
|
283 d3 ^= p5[3]; |
|
284 d4 ^= p5[4]; |
|
285 d5 ^= p5[5]; |
|
286 d6 ^= p5[6]; |
|
287 d7 ^= p5[7]; |
|
288 p1[0] = d0; /* Store the result (in bursts) */ |
|
289 p1[1] = d1; |
|
290 p1[2] = d2; |
|
291 p1[3] = d3; |
|
292 p1[4] = d4; |
|
293 p1[5] = d5; |
|
294 p1[6] = d6; |
|
295 p1[7] = d7; |
|
296 p1 += 8; |
|
297 p2 += 8; |
|
298 p3 += 8; |
|
299 p4 += 8; |
|
300 p5 += 8; |
|
301 } while (--lines > 0); |
|
302 } |
|
303 |
|
304 static void |
|
305 xor_8regs_p_2(unsigned long bytes, unsigned long *p1, unsigned long *p2) |
|
306 { |
|
307 long lines = bytes / (sizeof (long)) / 8 - 1; |
|
308 prefetchw(p1); |
|
309 prefetch(p2); |
|
310 |
|
311 do { |
|
312 prefetchw(p1+8); |
|
313 prefetch(p2+8); |
|
314 once_more: |
|
315 p1[0] ^= p2[0]; |
|
316 p1[1] ^= p2[1]; |
|
317 p1[2] ^= p2[2]; |
|
318 p1[3] ^= p2[3]; |
|
319 p1[4] ^= p2[4]; |
|
320 p1[5] ^= p2[5]; |
|
321 p1[6] ^= p2[6]; |
|
322 p1[7] ^= p2[7]; |
|
323 p1 += 8; |
|
324 p2 += 8; |
|
325 } while (--lines > 0); |
|
326 if (lines == 0) |
|
327 goto once_more; |
|
328 } |
|
329 |
|
330 static void |
|
331 xor_8regs_p_3(unsigned long bytes, unsigned long *p1, unsigned long *p2, |
|
332 unsigned long *p3) |
|
333 { |
|
334 long lines = bytes / (sizeof (long)) / 8 - 1; |
|
335 prefetchw(p1); |
|
336 prefetch(p2); |
|
337 prefetch(p3); |
|
338 |
|
339 do { |
|
340 prefetchw(p1+8); |
|
341 prefetch(p2+8); |
|
342 prefetch(p3+8); |
|
343 once_more: |
|
344 p1[0] ^= p2[0] ^ p3[0]; |
|
345 p1[1] ^= p2[1] ^ p3[1]; |
|
346 p1[2] ^= p2[2] ^ p3[2]; |
|
347 p1[3] ^= p2[3] ^ p3[3]; |
|
348 p1[4] ^= p2[4] ^ p3[4]; |
|
349 p1[5] ^= p2[5] ^ p3[5]; |
|
350 p1[6] ^= p2[6] ^ p3[6]; |
|
351 p1[7] ^= p2[7] ^ p3[7]; |
|
352 p1 += 8; |
|
353 p2 += 8; |
|
354 p3 += 8; |
|
355 } while (--lines > 0); |
|
356 if (lines == 0) |
|
357 goto once_more; |
|
358 } |
|
359 |
|
360 static void |
|
361 xor_8regs_p_4(unsigned long bytes, unsigned long *p1, unsigned long *p2, |
|
362 unsigned long *p3, unsigned long *p4) |
|
363 { |
|
364 long lines = bytes / (sizeof (long)) / 8 - 1; |
|
365 |
|
366 prefetchw(p1); |
|
367 prefetch(p2); |
|
368 prefetch(p3); |
|
369 prefetch(p4); |
|
370 |
|
371 do { |
|
372 prefetchw(p1+8); |
|
373 prefetch(p2+8); |
|
374 prefetch(p3+8); |
|
375 prefetch(p4+8); |
|
376 once_more: |
|
377 p1[0] ^= p2[0] ^ p3[0] ^ p4[0]; |
|
378 p1[1] ^= p2[1] ^ p3[1] ^ p4[1]; |
|
379 p1[2] ^= p2[2] ^ p3[2] ^ p4[2]; |
|
380 p1[3] ^= p2[3] ^ p3[3] ^ p4[3]; |
|
381 p1[4] ^= p2[4] ^ p3[4] ^ p4[4]; |
|
382 p1[5] ^= p2[5] ^ p3[5] ^ p4[5]; |
|
383 p1[6] ^= p2[6] ^ p3[6] ^ p4[6]; |
|
384 p1[7] ^= p2[7] ^ p3[7] ^ p4[7]; |
|
385 p1 += 8; |
|
386 p2 += 8; |
|
387 p3 += 8; |
|
388 p4 += 8; |
|
389 } while (--lines > 0); |
|
390 if (lines == 0) |
|
391 goto once_more; |
|
392 } |
|
393 |
|
394 static void |
|
395 xor_8regs_p_5(unsigned long bytes, unsigned long *p1, unsigned long *p2, |
|
396 unsigned long *p3, unsigned long *p4, unsigned long *p5) |
|
397 { |
|
398 long lines = bytes / (sizeof (long)) / 8 - 1; |
|
399 |
|
400 prefetchw(p1); |
|
401 prefetch(p2); |
|
402 prefetch(p3); |
|
403 prefetch(p4); |
|
404 prefetch(p5); |
|
405 |
|
406 do { |
|
407 prefetchw(p1+8); |
|
408 prefetch(p2+8); |
|
409 prefetch(p3+8); |
|
410 prefetch(p4+8); |
|
411 prefetch(p5+8); |
|
412 once_more: |
|
413 p1[0] ^= p2[0] ^ p3[0] ^ p4[0] ^ p5[0]; |
|
414 p1[1] ^= p2[1] ^ p3[1] ^ p4[1] ^ p5[1]; |
|
415 p1[2] ^= p2[2] ^ p3[2] ^ p4[2] ^ p5[2]; |
|
416 p1[3] ^= p2[3] ^ p3[3] ^ p4[3] ^ p5[3]; |
|
417 p1[4] ^= p2[4] ^ p3[4] ^ p4[4] ^ p5[4]; |
|
418 p1[5] ^= p2[5] ^ p3[5] ^ p4[5] ^ p5[5]; |
|
419 p1[6] ^= p2[6] ^ p3[6] ^ p4[6] ^ p5[6]; |
|
420 p1[7] ^= p2[7] ^ p3[7] ^ p4[7] ^ p5[7]; |
|
421 p1 += 8; |
|
422 p2 += 8; |
|
423 p3 += 8; |
|
424 p4 += 8; |
|
425 p5 += 8; |
|
426 } while (--lines > 0); |
|
427 if (lines == 0) |
|
428 goto once_more; |
|
429 } |
|
430 |
|
431 static void |
|
432 xor_32regs_p_2(unsigned long bytes, unsigned long *p1, unsigned long *p2) |
|
433 { |
|
434 long lines = bytes / (sizeof (long)) / 8 - 1; |
|
435 |
|
436 prefetchw(p1); |
|
437 prefetch(p2); |
|
438 |
|
439 do { |
|
440 register long d0, d1, d2, d3, d4, d5, d6, d7; |
|
441 |
|
442 prefetchw(p1+8); |
|
443 prefetch(p2+8); |
|
444 once_more: |
|
445 d0 = p1[0]; /* Pull the stuff into registers */ |
|
446 d1 = p1[1]; /* ... in bursts, if possible. */ |
|
447 d2 = p1[2]; |
|
448 d3 = p1[3]; |
|
449 d4 = p1[4]; |
|
450 d5 = p1[5]; |
|
451 d6 = p1[6]; |
|
452 d7 = p1[7]; |
|
453 d0 ^= p2[0]; |
|
454 d1 ^= p2[1]; |
|
455 d2 ^= p2[2]; |
|
456 d3 ^= p2[3]; |
|
457 d4 ^= p2[4]; |
|
458 d5 ^= p2[5]; |
|
459 d6 ^= p2[6]; |
|
460 d7 ^= p2[7]; |
|
461 p1[0] = d0; /* Store the result (in bursts) */ |
|
462 p1[1] = d1; |
|
463 p1[2] = d2; |
|
464 p1[3] = d3; |
|
465 p1[4] = d4; |
|
466 p1[5] = d5; |
|
467 p1[6] = d6; |
|
468 p1[7] = d7; |
|
469 p1 += 8; |
|
470 p2 += 8; |
|
471 } while (--lines > 0); |
|
472 if (lines == 0) |
|
473 goto once_more; |
|
474 } |
|
475 |
|
476 static void |
|
477 xor_32regs_p_3(unsigned long bytes, unsigned long *p1, unsigned long *p2, |
|
478 unsigned long *p3) |
|
479 { |
|
480 long lines = bytes / (sizeof (long)) / 8 - 1; |
|
481 |
|
482 prefetchw(p1); |
|
483 prefetch(p2); |
|
484 prefetch(p3); |
|
485 |
|
486 do { |
|
487 register long d0, d1, d2, d3, d4, d5, d6, d7; |
|
488 |
|
489 prefetchw(p1+8); |
|
490 prefetch(p2+8); |
|
491 prefetch(p3+8); |
|
492 once_more: |
|
493 d0 = p1[0]; /* Pull the stuff into registers */ |
|
494 d1 = p1[1]; /* ... in bursts, if possible. */ |
|
495 d2 = p1[2]; |
|
496 d3 = p1[3]; |
|
497 d4 = p1[4]; |
|
498 d5 = p1[5]; |
|
499 d6 = p1[6]; |
|
500 d7 = p1[7]; |
|
501 d0 ^= p2[0]; |
|
502 d1 ^= p2[1]; |
|
503 d2 ^= p2[2]; |
|
504 d3 ^= p2[3]; |
|
505 d4 ^= p2[4]; |
|
506 d5 ^= p2[5]; |
|
507 d6 ^= p2[6]; |
|
508 d7 ^= p2[7]; |
|
509 d0 ^= p3[0]; |
|
510 d1 ^= p3[1]; |
|
511 d2 ^= p3[2]; |
|
512 d3 ^= p3[3]; |
|
513 d4 ^= p3[4]; |
|
514 d5 ^= p3[5]; |
|
515 d6 ^= p3[6]; |
|
516 d7 ^= p3[7]; |
|
517 p1[0] = d0; /* Store the result (in bursts) */ |
|
518 p1[1] = d1; |
|
519 p1[2] = d2; |
|
520 p1[3] = d3; |
|
521 p1[4] = d4; |
|
522 p1[5] = d5; |
|
523 p1[6] = d6; |
|
524 p1[7] = d7; |
|
525 p1 += 8; |
|
526 p2 += 8; |
|
527 p3 += 8; |
|
528 } while (--lines > 0); |
|
529 if (lines == 0) |
|
530 goto once_more; |
|
531 } |
|
532 |
|
533 static void |
|
534 xor_32regs_p_4(unsigned long bytes, unsigned long *p1, unsigned long *p2, |
|
535 unsigned long *p3, unsigned long *p4) |
|
536 { |
|
537 long lines = bytes / (sizeof (long)) / 8 - 1; |
|
538 |
|
539 prefetchw(p1); |
|
540 prefetch(p2); |
|
541 prefetch(p3); |
|
542 prefetch(p4); |
|
543 |
|
544 do { |
|
545 register long d0, d1, d2, d3, d4, d5, d6, d7; |
|
546 |
|
547 prefetchw(p1+8); |
|
548 prefetch(p2+8); |
|
549 prefetch(p3+8); |
|
550 prefetch(p4+8); |
|
551 once_more: |
|
552 d0 = p1[0]; /* Pull the stuff into registers */ |
|
553 d1 = p1[1]; /* ... in bursts, if possible. */ |
|
554 d2 = p1[2]; |
|
555 d3 = p1[3]; |
|
556 d4 = p1[4]; |
|
557 d5 = p1[5]; |
|
558 d6 = p1[6]; |
|
559 d7 = p1[7]; |
|
560 d0 ^= p2[0]; |
|
561 d1 ^= p2[1]; |
|
562 d2 ^= p2[2]; |
|
563 d3 ^= p2[3]; |
|
564 d4 ^= p2[4]; |
|
565 d5 ^= p2[5]; |
|
566 d6 ^= p2[6]; |
|
567 d7 ^= p2[7]; |
|
568 d0 ^= p3[0]; |
|
569 d1 ^= p3[1]; |
|
570 d2 ^= p3[2]; |
|
571 d3 ^= p3[3]; |
|
572 d4 ^= p3[4]; |
|
573 d5 ^= p3[5]; |
|
574 d6 ^= p3[6]; |
|
575 d7 ^= p3[7]; |
|
576 d0 ^= p4[0]; |
|
577 d1 ^= p4[1]; |
|
578 d2 ^= p4[2]; |
|
579 d3 ^= p4[3]; |
|
580 d4 ^= p4[4]; |
|
581 d5 ^= p4[5]; |
|
582 d6 ^= p4[6]; |
|
583 d7 ^= p4[7]; |
|
584 p1[0] = d0; /* Store the result (in bursts) */ |
|
585 p1[1] = d1; |
|
586 p1[2] = d2; |
|
587 p1[3] = d3; |
|
588 p1[4] = d4; |
|
589 p1[5] = d5; |
|
590 p1[6] = d6; |
|
591 p1[7] = d7; |
|
592 p1 += 8; |
|
593 p2 += 8; |
|
594 p3 += 8; |
|
595 p4 += 8; |
|
596 } while (--lines > 0); |
|
597 if (lines == 0) |
|
598 goto once_more; |
|
599 } |
|
600 |
|
601 static void |
|
602 xor_32regs_p_5(unsigned long bytes, unsigned long *p1, unsigned long *p2, |
|
603 unsigned long *p3, unsigned long *p4, unsigned long *p5) |
|
604 { |
|
605 long lines = bytes / (sizeof (long)) / 8 - 1; |
|
606 |
|
607 prefetchw(p1); |
|
608 prefetch(p2); |
|
609 prefetch(p3); |
|
610 prefetch(p4); |
|
611 prefetch(p5); |
|
612 |
|
613 do { |
|
614 register long d0, d1, d2, d3, d4, d5, d6, d7; |
|
615 |
|
616 prefetchw(p1+8); |
|
617 prefetch(p2+8); |
|
618 prefetch(p3+8); |
|
619 prefetch(p4+8); |
|
620 prefetch(p5+8); |
|
621 once_more: |
|
622 d0 = p1[0]; /* Pull the stuff into registers */ |
|
623 d1 = p1[1]; /* ... in bursts, if possible. */ |
|
624 d2 = p1[2]; |
|
625 d3 = p1[3]; |
|
626 d4 = p1[4]; |
|
627 d5 = p1[5]; |
|
628 d6 = p1[6]; |
|
629 d7 = p1[7]; |
|
630 d0 ^= p2[0]; |
|
631 d1 ^= p2[1]; |
|
632 d2 ^= p2[2]; |
|
633 d3 ^= p2[3]; |
|
634 d4 ^= p2[4]; |
|
635 d5 ^= p2[5]; |
|
636 d6 ^= p2[6]; |
|
637 d7 ^= p2[7]; |
|
638 d0 ^= p3[0]; |
|
639 d1 ^= p3[1]; |
|
640 d2 ^= p3[2]; |
|
641 d3 ^= p3[3]; |
|
642 d4 ^= p3[4]; |
|
643 d5 ^= p3[5]; |
|
644 d6 ^= p3[6]; |
|
645 d7 ^= p3[7]; |
|
646 d0 ^= p4[0]; |
|
647 d1 ^= p4[1]; |
|
648 d2 ^= p4[2]; |
|
649 d3 ^= p4[3]; |
|
650 d4 ^= p4[4]; |
|
651 d5 ^= p4[5]; |
|
652 d6 ^= p4[6]; |
|
653 d7 ^= p4[7]; |
|
654 d0 ^= p5[0]; |
|
655 d1 ^= p5[1]; |
|
656 d2 ^= p5[2]; |
|
657 d3 ^= p5[3]; |
|
658 d4 ^= p5[4]; |
|
659 d5 ^= p5[5]; |
|
660 d6 ^= p5[6]; |
|
661 d7 ^= p5[7]; |
|
662 p1[0] = d0; /* Store the result (in bursts) */ |
|
663 p1[1] = d1; |
|
664 p1[2] = d2; |
|
665 p1[3] = d3; |
|
666 p1[4] = d4; |
|
667 p1[5] = d5; |
|
668 p1[6] = d6; |
|
669 p1[7] = d7; |
|
670 p1 += 8; |
|
671 p2 += 8; |
|
672 p3 += 8; |
|
673 p4 += 8; |
|
674 p5 += 8; |
|
675 } while (--lines > 0); |
|
676 if (lines == 0) |
|
677 goto once_more; |
|
678 } |
|
679 |
|
680 static struct xor_block_template xor_block_8regs = { |
|
681 .name = "8regs", |
|
682 .do_2 = xor_8regs_2, |
|
683 .do_3 = xor_8regs_3, |
|
684 .do_4 = xor_8regs_4, |
|
685 .do_5 = xor_8regs_5, |
|
686 }; |
|
687 |
|
688 static struct xor_block_template xor_block_32regs = { |
|
689 .name = "32regs", |
|
690 .do_2 = xor_32regs_2, |
|
691 .do_3 = xor_32regs_3, |
|
692 .do_4 = xor_32regs_4, |
|
693 .do_5 = xor_32regs_5, |
|
694 }; |
|
695 |
|
696 static struct xor_block_template xor_block_8regs_p = { |
|
697 .name = "8regs_prefetch", |
|
698 .do_2 = xor_8regs_p_2, |
|
699 .do_3 = xor_8regs_p_3, |
|
700 .do_4 = xor_8regs_p_4, |
|
701 .do_5 = xor_8regs_p_5, |
|
702 }; |
|
703 |
|
704 static struct xor_block_template xor_block_32regs_p = { |
|
705 .name = "32regs_prefetch", |
|
706 .do_2 = xor_32regs_p_2, |
|
707 .do_3 = xor_32regs_p_3, |
|
708 .do_4 = xor_32regs_p_4, |
|
709 .do_5 = xor_32regs_p_5, |
|
710 }; |
|
711 |
|
712 #define XOR_TRY_TEMPLATES \ |
|
713 do { \ |
|
714 xor_speed(&xor_block_8regs); \ |
|
715 xor_speed(&xor_block_8regs_p); \ |
|
716 xor_speed(&xor_block_32regs); \ |
|
717 xor_speed(&xor_block_32regs_p); \ |
|
718 } while (0) |