|
1 /* |
|
2 * Copyright 2002,2003 Andi Kleen, SuSE Labs. |
|
3 * |
|
4 * This file is subject to the terms and conditions of the GNU General Public |
|
5 * License. See the file COPYING in the main directory of this archive |
|
6 * for more details. No warranty for anything given at all. |
|
7 */ |
|
8 #include <linux/linkage.h> |
|
9 #include <asm/dwarf2.h> |
|
10 #include <asm/errno.h> |
|
11 |
|
12 /* |
|
13 * Checksum copy with exception handling. |
|
14 * On exceptions src_err_ptr or dst_err_ptr is set to -EFAULT and the |
|
15 * destination is zeroed. |
|
16 * |
|
17 * Input |
|
18 * rdi source |
|
19 * rsi destination |
|
20 * edx len (32bit) |
|
21 * ecx sum (32bit) |
|
22 * r8 src_err_ptr (int) |
|
23 * r9 dst_err_ptr (int) |
|
24 * |
|
25 * Output |
|
26 * eax 64bit sum. undefined in case of exception. |
|
27 * |
|
28 * Wrappers need to take care of valid exception sum and zeroing. |
|
29 * They also should align source or destination to 8 bytes. |
|
30 */ |
|
31 |
|
32 .macro source |
|
33 10: |
|
34 .section __ex_table,"a" |
|
35 .align 8 |
|
36 .quad 10b,.Lbad_source |
|
37 .previous |
|
38 .endm |
|
39 |
|
40 .macro dest |
|
41 20: |
|
42 .section __ex_table,"a" |
|
43 .align 8 |
|
44 .quad 20b,.Lbad_dest |
|
45 .previous |
|
46 .endm |
|
47 |
|
48 .macro ignore L=.Lignore |
|
49 30: |
|
50 .section __ex_table,"a" |
|
51 .align 8 |
|
52 .quad 30b,\L |
|
53 .previous |
|
54 .endm |
|
55 |
|
56 |
|
57 ENTRY(csum_partial_copy_generic) |
|
58 CFI_STARTPROC |
|
59 cmpl $3*64,%edx |
|
60 jle .Lignore |
|
61 |
|
62 .Lignore: |
|
63 subq $7*8,%rsp |
|
64 CFI_ADJUST_CFA_OFFSET 7*8 |
|
65 movq %rbx,2*8(%rsp) |
|
66 CFI_REL_OFFSET rbx, 2*8 |
|
67 movq %r12,3*8(%rsp) |
|
68 CFI_REL_OFFSET r12, 3*8 |
|
69 movq %r14,4*8(%rsp) |
|
70 CFI_REL_OFFSET r14, 4*8 |
|
71 movq %r13,5*8(%rsp) |
|
72 CFI_REL_OFFSET r13, 5*8 |
|
73 movq %rbp,6*8(%rsp) |
|
74 CFI_REL_OFFSET rbp, 6*8 |
|
75 |
|
76 movq %r8,(%rsp) |
|
77 movq %r9,1*8(%rsp) |
|
78 |
|
79 movl %ecx,%eax |
|
80 movl %edx,%ecx |
|
81 |
|
82 xorl %r9d,%r9d |
|
83 movq %rcx,%r12 |
|
84 |
|
85 shrq $6,%r12 |
|
86 jz .Lhandle_tail /* < 64 */ |
|
87 |
|
88 clc |
|
89 |
|
90 /* main loop. clear in 64 byte blocks */ |
|
91 /* r9: zero, r8: temp2, rbx: temp1, rax: sum, rcx: saved length */ |
|
92 /* r11: temp3, rdx: temp4, r12 loopcnt */ |
|
93 /* r10: temp5, rbp: temp6, r14 temp7, r13 temp8 */ |
|
94 .p2align 4 |
|
95 .Lloop: |
|
96 source |
|
97 movq (%rdi),%rbx |
|
98 source |
|
99 movq 8(%rdi),%r8 |
|
100 source |
|
101 movq 16(%rdi),%r11 |
|
102 source |
|
103 movq 24(%rdi),%rdx |
|
104 |
|
105 source |
|
106 movq 32(%rdi),%r10 |
|
107 source |
|
108 movq 40(%rdi),%rbp |
|
109 source |
|
110 movq 48(%rdi),%r14 |
|
111 source |
|
112 movq 56(%rdi),%r13 |
|
113 |
|
114 ignore 2f |
|
115 prefetcht0 5*64(%rdi) |
|
116 2: |
|
117 adcq %rbx,%rax |
|
118 adcq %r8,%rax |
|
119 adcq %r11,%rax |
|
120 adcq %rdx,%rax |
|
121 adcq %r10,%rax |
|
122 adcq %rbp,%rax |
|
123 adcq %r14,%rax |
|
124 adcq %r13,%rax |
|
125 |
|
126 decl %r12d |
|
127 |
|
128 dest |
|
129 movq %rbx,(%rsi) |
|
130 dest |
|
131 movq %r8,8(%rsi) |
|
132 dest |
|
133 movq %r11,16(%rsi) |
|
134 dest |
|
135 movq %rdx,24(%rsi) |
|
136 |
|
137 dest |
|
138 movq %r10,32(%rsi) |
|
139 dest |
|
140 movq %rbp,40(%rsi) |
|
141 dest |
|
142 movq %r14,48(%rsi) |
|
143 dest |
|
144 movq %r13,56(%rsi) |
|
145 |
|
146 3: |
|
147 |
|
148 leaq 64(%rdi),%rdi |
|
149 leaq 64(%rsi),%rsi |
|
150 |
|
151 jnz .Lloop |
|
152 |
|
153 adcq %r9,%rax |
|
154 |
|
155 /* do last upto 56 bytes */ |
|
156 .Lhandle_tail: |
|
157 /* ecx: count */ |
|
158 movl %ecx,%r10d |
|
159 andl $63,%ecx |
|
160 shrl $3,%ecx |
|
161 jz .Lfold |
|
162 clc |
|
163 .p2align 4 |
|
164 .Lloop_8: |
|
165 source |
|
166 movq (%rdi),%rbx |
|
167 adcq %rbx,%rax |
|
168 decl %ecx |
|
169 dest |
|
170 movq %rbx,(%rsi) |
|
171 leaq 8(%rsi),%rsi /* preserve carry */ |
|
172 leaq 8(%rdi),%rdi |
|
173 jnz .Lloop_8 |
|
174 adcq %r9,%rax /* add in carry */ |
|
175 |
|
176 .Lfold: |
|
177 /* reduce checksum to 32bits */ |
|
178 movl %eax,%ebx |
|
179 shrq $32,%rax |
|
180 addl %ebx,%eax |
|
181 adcl %r9d,%eax |
|
182 |
|
183 /* do last upto 6 bytes */ |
|
184 .Lhandle_7: |
|
185 movl %r10d,%ecx |
|
186 andl $7,%ecx |
|
187 shrl $1,%ecx |
|
188 jz .Lhandle_1 |
|
189 movl $2,%edx |
|
190 xorl %ebx,%ebx |
|
191 clc |
|
192 .p2align 4 |
|
193 .Lloop_1: |
|
194 source |
|
195 movw (%rdi),%bx |
|
196 adcl %ebx,%eax |
|
197 decl %ecx |
|
198 dest |
|
199 movw %bx,(%rsi) |
|
200 leaq 2(%rdi),%rdi |
|
201 leaq 2(%rsi),%rsi |
|
202 jnz .Lloop_1 |
|
203 adcl %r9d,%eax /* add in carry */ |
|
204 |
|
205 /* handle last odd byte */ |
|
206 .Lhandle_1: |
|
207 testl $1,%r10d |
|
208 jz .Lende |
|
209 xorl %ebx,%ebx |
|
210 source |
|
211 movb (%rdi),%bl |
|
212 dest |
|
213 movb %bl,(%rsi) |
|
214 addl %ebx,%eax |
|
215 adcl %r9d,%eax /* carry */ |
|
216 |
|
217 CFI_REMEMBER_STATE |
|
218 .Lende: |
|
219 movq 2*8(%rsp),%rbx |
|
220 CFI_RESTORE rbx |
|
221 movq 3*8(%rsp),%r12 |
|
222 CFI_RESTORE r12 |
|
223 movq 4*8(%rsp),%r14 |
|
224 CFI_RESTORE r14 |
|
225 movq 5*8(%rsp),%r13 |
|
226 CFI_RESTORE r13 |
|
227 movq 6*8(%rsp),%rbp |
|
228 CFI_RESTORE rbp |
|
229 addq $7*8,%rsp |
|
230 CFI_ADJUST_CFA_OFFSET -7*8 |
|
231 ret |
|
232 CFI_RESTORE_STATE |
|
233 |
|
234 /* Exception handlers. Very simple, zeroing is done in the wrappers */ |
|
235 .Lbad_source: |
|
236 movq (%rsp),%rax |
|
237 testq %rax,%rax |
|
238 jz .Lende |
|
239 movl $-EFAULT,(%rax) |
|
240 jmp .Lende |
|
241 |
|
242 .Lbad_dest: |
|
243 movq 8(%rsp),%rax |
|
244 testq %rax,%rax |
|
245 jz .Lende |
|
246 movl $-EFAULT,(%rax) |
|
247 jmp .Lende |
|
248 CFI_ENDPROC |
|
249 ENDPROC(csum_partial_copy_generic) |