|
1 /* gf128mul.h - GF(2^128) multiplication functions |
|
2 * |
|
3 * Copyright (c) 2003, Dr Brian Gladman, Worcester, UK. |
|
4 * Copyright (c) 2006 Rik Snel <rsnel@cube.dyndns.org> |
|
5 * |
|
6 * Based on Dr Brian Gladman's (GPL'd) work published at |
|
7 * http://fp.gladman.plus.com/cryptography_technology/index.htm |
|
8 * See the original copyright notice below. |
|
9 * |
|
10 * This program is free software; you can redistribute it and/or modify it |
|
11 * under the terms of the GNU General Public License as published by the Free |
|
12 * Software Foundation; either version 2 of the License, or (at your option) |
|
13 * any later version. |
|
14 */ |
|
15 /* |
|
16 --------------------------------------------------------------------------- |
|
17 Copyright (c) 2003, Dr Brian Gladman, Worcester, UK. All rights reserved. |
|
18 |
|
19 LICENSE TERMS |
|
20 |
|
21 The free distribution and use of this software in both source and binary |
|
22 form is allowed (with or without changes) provided that: |
|
23 |
|
24 1. distributions of this source code include the above copyright |
|
25 notice, this list of conditions and the following disclaimer; |
|
26 |
|
27 2. distributions in binary form include the above copyright |
|
28 notice, this list of conditions and the following disclaimer |
|
29 in the documentation and/or other associated materials; |
|
30 |
|
31 3. the copyright holder's name is not used to endorse products |
|
32 built using this software without specific written permission. |
|
33 |
|
34 ALTERNATIVELY, provided that this notice is retained in full, this product |
|
35 may be distributed under the terms of the GNU General Public License (GPL), |
|
36 in which case the provisions of the GPL apply INSTEAD OF those given above. |
|
37 |
|
38 DISCLAIMER |
|
39 |
|
40 This software is provided 'as is' with no explicit or implied warranties |
|
41 in respect of its properties, including, but not limited to, correctness |
|
42 and/or fitness for purpose. |
|
43 --------------------------------------------------------------------------- |
|
44 Issue Date: 31/01/2006 |
|
45 |
|
46 An implementation of field multiplication in Galois Field GF(128) |
|
47 */ |
|
48 |
|
49 #ifndef _CRYPTO_GF128MUL_H |
|
50 #define _CRYPTO_GF128MUL_H |
|
51 |
|
52 #include <crypto/b128ops.h> |
|
53 #include <linux/slab.h> |
|
54 |
|
55 /* Comment by Rik: |
|
56 * |
|
57 * For some background on GF(2^128) see for example: http://- |
|
58 * csrc.nist.gov/CryptoToolkit/modes/proposedmodes/gcm/gcm-revised-spec.pdf |
|
59 * |
|
60 * The elements of GF(2^128) := GF(2)[X]/(X^128-X^7-X^2-X^1-1) can |
|
61 * be mapped to computer memory in a variety of ways. Let's examine |
|
62 * three common cases. |
|
63 * |
|
64 * Take a look at the 16 binary octets below in memory order. The msb's |
|
65 * are left and the lsb's are right. char b[16] is an array and b[0] is |
|
66 * the first octet. |
|
67 * |
|
68 * 80000000 00000000 00000000 00000000 .... 00000000 00000000 00000000 |
|
69 * b[0] b[1] b[2] b[3] b[13] b[14] b[15] |
|
70 * |
|
71 * Every bit is a coefficient of some power of X. We can store the bits |
|
72 * in every byte in little-endian order and the bytes themselves also in |
|
73 * little endian order. I will call this lle (little-little-endian). |
|
74 * The above buffer represents the polynomial 1, and X^7+X^2+X^1+1 looks |
|
75 * like 11100001 00000000 .... 00000000 = { 0xE1, 0x00, }. |
|
76 * This format was originally implemented in gf128mul and is used |
|
77 * in GCM (Galois/Counter mode) and in ABL (Arbitrary Block Length). |
|
78 * |
|
79 * Another convention says: store the bits in bigendian order and the |
|
80 * bytes also. This is bbe (big-big-endian). Now the buffer above |
|
81 * represents X^127. X^7+X^2+X^1+1 looks like 00000000 .... 10000111, |
|
82 * b[15] = 0x87 and the rest is 0. LRW uses this convention and bbe |
|
83 * is partly implemented. |
|
84 * |
|
85 * Both of the above formats are easy to implement on big-endian |
|
86 * machines. |
|
87 * |
|
88 * EME (which is patent encumbered) uses the ble format (bits are stored |
|
89 * in big endian order and the bytes in little endian). The above buffer |
|
90 * represents X^7 in this case and the primitive polynomial is b[0] = 0x87. |
|
91 * |
|
92 * The common machine word-size is smaller than 128 bits, so to make |
|
93 * an efficient implementation we must split into machine word sizes. |
|
94 * This file uses one 32bit for the moment. Machine endianness comes into |
|
95 * play. The lle format in relation to machine endianness is discussed |
|
96 * below by the original author of gf128mul Dr Brian Gladman. |
|
97 * |
|
98 * Let's look at the bbe and ble format on a little endian machine. |
|
99 * |
|
100 * bbe on a little endian machine u32 x[4]: |
|
101 * |
|
102 * MS x[0] LS MS x[1] LS |
|
103 * ms ls ms ls ms ls ms ls ms ls ms ls ms ls ms ls |
|
104 * 103..96 111.104 119.112 127.120 71...64 79...72 87...80 95...88 |
|
105 * |
|
106 * MS x[2] LS MS x[3] LS |
|
107 * ms ls ms ls ms ls ms ls ms ls ms ls ms ls ms ls |
|
108 * 39...32 47...40 55...48 63...56 07...00 15...08 23...16 31...24 |
|
109 * |
|
110 * ble on a little endian machine |
|
111 * |
|
112 * MS x[0] LS MS x[1] LS |
|
113 * ms ls ms ls ms ls ms ls ms ls ms ls ms ls ms ls |
|
114 * 31...24 23...16 15...08 07...00 63...56 55...48 47...40 39...32 |
|
115 * |
|
116 * MS x[2] LS MS x[3] LS |
|
117 * ms ls ms ls ms ls ms ls ms ls ms ls ms ls ms ls |
|
118 * 95...88 87...80 79...72 71...64 127.120 199.112 111.104 103..96 |
|
119 * |
|
120 * Multiplications in GF(2^128) are mostly bit-shifts, so you see why |
|
121 * ble (and lbe also) are easier to implement on a little-endian |
|
122 * machine than on a big-endian machine. The converse holds for bbe |
|
123 * and lle. |
|
124 * |
|
125 * Note: to have good alignment, it seems to me that it is sufficient |
|
126 * to keep elements of GF(2^128) in type u64[2]. On 32-bit wordsize |
|
127 * machines this will automatically aligned to wordsize and on a 64-bit |
|
128 * machine also. |
|
129 */ |
|
130 /* Multiply a GF128 field element by x. Field elements are held in arrays |
|
131 of bytes in which field bits 8n..8n + 7 are held in byte[n], with lower |
|
132 indexed bits placed in the more numerically significant bit positions |
|
133 within bytes. |
|
134 |
|
135 On little endian machines the bit indexes translate into the bit |
|
136 positions within four 32-bit words in the following way |
|
137 |
|
138 MS x[0] LS MS x[1] LS |
|
139 ms ls ms ls ms ls ms ls ms ls ms ls ms ls ms ls |
|
140 24...31 16...23 08...15 00...07 56...63 48...55 40...47 32...39 |
|
141 |
|
142 MS x[2] LS MS x[3] LS |
|
143 ms ls ms ls ms ls ms ls ms ls ms ls ms ls ms ls |
|
144 88...95 80...87 72...79 64...71 120.127 112.119 104.111 96..103 |
|
145 |
|
146 On big endian machines the bit indexes translate into the bit |
|
147 positions within four 32-bit words in the following way |
|
148 |
|
149 MS x[0] LS MS x[1] LS |
|
150 ms ls ms ls ms ls ms ls ms ls ms ls ms ls ms ls |
|
151 00...07 08...15 16...23 24...31 32...39 40...47 48...55 56...63 |
|
152 |
|
153 MS x[2] LS MS x[3] LS |
|
154 ms ls ms ls ms ls ms ls ms ls ms ls ms ls ms ls |
|
155 64...71 72...79 80...87 88...95 96..103 104.111 112.119 120.127 |
|
156 */ |
|
157 |
|
158 /* A slow generic version of gf_mul, implemented for lle and bbe |
|
159 * It multiplies a and b and puts the result in a */ |
|
160 void gf128mul_lle(be128 *a, const be128 *b); |
|
161 |
|
162 void gf128mul_bbe(be128 *a, const be128 *b); |
|
163 |
|
164 /* multiply by x in ble format, needed by XTS */ |
|
165 void gf128mul_x_ble(be128 *a, const be128 *b); |
|
166 |
|
167 /* 4k table optimization */ |
|
168 |
|
169 struct gf128mul_4k { |
|
170 be128 t[256]; |
|
171 }; |
|
172 |
|
173 struct gf128mul_4k *gf128mul_init_4k_lle(const be128 *g); |
|
174 struct gf128mul_4k *gf128mul_init_4k_bbe(const be128 *g); |
|
175 void gf128mul_4k_lle(be128 *a, struct gf128mul_4k *t); |
|
176 void gf128mul_4k_bbe(be128 *a, struct gf128mul_4k *t); |
|
177 |
|
178 static inline void gf128mul_free_4k(struct gf128mul_4k *t) |
|
179 { |
|
180 kfree(t); |
|
181 } |
|
182 |
|
183 |
|
184 /* 64k table optimization, implemented for lle and bbe */ |
|
185 |
|
186 struct gf128mul_64k { |
|
187 struct gf128mul_4k *t[16]; |
|
188 }; |
|
189 |
|
190 /* first initialize with the constant factor with which you |
|
191 * want to multiply and then call gf128_64k_lle with the other |
|
192 * factor in the first argument, the table in the second and a |
|
193 * scratch register in the third. Afterwards *a = *r. */ |
|
194 struct gf128mul_64k *gf128mul_init_64k_lle(const be128 *g); |
|
195 struct gf128mul_64k *gf128mul_init_64k_bbe(const be128 *g); |
|
196 void gf128mul_free_64k(struct gf128mul_64k *t); |
|
197 void gf128mul_64k_lle(be128 *a, struct gf128mul_64k *t); |
|
198 void gf128mul_64k_bbe(be128 *a, struct gf128mul_64k *t); |
|
199 |
|
200 #endif /* _CRYPTO_GF128MUL_H */ |