random: Take at max 25% from RDRAND
[libgcrypt.git] / cipher / camellia-glue.c
1 /* camellia-glue.c - Glue for the Camellia cipher
2  * Copyright (C) 2007 Free Software Foundation, Inc.
3  *
4  * This file is part of Libgcrypt.
5  *
6  * Libgcrypt is free software; you can redistribute it and/or modify
7  * it under the terms of the GNU Lesser General Public License as
8  * published by the Free Software Foundation; either version 2.1 of
9  * the License, or (at your option) any later version.
10  *
11  * Libgcrypt is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14  * GNU General Public License for more details.
15  *
16  * You should have received a copy of the GNU Lesser General Public
17  * License along with this program; if not, write to the Free Software
18  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
19  * 02110-1301, USA.
20  */
21
22 /* I put all the libgcrypt-specific stuff in this file to keep the
23    camellia.c/camellia.h files exactly as provided by NTT.  If they
24    update their code, this should make it easier to bring the changes
25    in. - dshaw
26
27    There is one small change which needs to be done: Include the
28    following code at the top of camellia.h: */
29 #if 0
30
31 /* To use Camellia with libraries it is often useful to keep the name
32  * space of the library clean.  The following macro is thus useful:
33  *
34  *     #define CAMELLIA_EXT_SYM_PREFIX foo_
35  *
36  * This prefixes all external symbols with "foo_".
37  */
38 #ifdef HAVE_CONFIG_H
39 #include <config.h>
40 #endif
41 #ifdef CAMELLIA_EXT_SYM_PREFIX
42 #define CAMELLIA_PREFIX1(x,y) x ## y
43 #define CAMELLIA_PREFIX2(x,y) CAMELLIA_PREFIX1(x,y)
44 #define CAMELLIA_PREFIX(x)    CAMELLIA_PREFIX2(CAMELLIA_EXT_SYM_PREFIX,x)
45 #define Camellia_Ekeygen      CAMELLIA_PREFIX(Camellia_Ekeygen)
46 #define Camellia_EncryptBlock CAMELLIA_PREFIX(Camellia_EncryptBlock)
47 #define Camellia_DecryptBlock CAMELLIA_PREFIX(Camellia_DecryptBlock)
48 #define camellia_decrypt128   CAMELLIA_PREFIX(camellia_decrypt128)
49 #define camellia_decrypt256   CAMELLIA_PREFIX(camellia_decrypt256)
50 #define camellia_encrypt128   CAMELLIA_PREFIX(camellia_encrypt128)
51 #define camellia_encrypt256   CAMELLIA_PREFIX(camellia_encrypt256)
52 #define camellia_setup128     CAMELLIA_PREFIX(camellia_setup128)
53 #define camellia_setup192     CAMELLIA_PREFIX(camellia_setup192)
54 #define camellia_setup256     CAMELLIA_PREFIX(camellia_setup256)
55 #endif /*CAMELLIA_EXT_SYM_PREFIX*/
56
57 #endif /* Code sample. */
58
59
60 #include <config.h>
61 #include "types.h"
62 #include "g10lib.h"
63 #include "cipher.h"
64 #include "camellia.h"
65 #include "bufhelp.h"
66 #include "cipher-internal.h"
67 #include "cipher-selftest.h"
68
69 /* Helper macro to force alignment to 16 bytes.  */
70 #ifdef HAVE_GCC_ATTRIBUTE_ALIGNED
71 # define ATTR_ALIGNED_16  __attribute__ ((aligned (16)))
72 #else
73 # define ATTR_ALIGNED_16
74 #endif
75
76 /* USE_AESNI inidicates whether to compile with Intel AES-NI/AVX code. */
77 #undef USE_AESNI_AVX
78 #if defined(ENABLE_AESNI_SUPPORT) && defined(ENABLE_AVX_SUPPORT)
79 # if defined(__x86_64__) && (defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) || \
80      defined(HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS))
81 #  define USE_AESNI_AVX 1
82 # endif
83 #endif
84
85 /* USE_AESNI_AVX2 inidicates whether to compile with Intel AES-NI/AVX2 code. */
86 #undef USE_AESNI_AVX2
87 #if defined(ENABLE_AESNI_SUPPORT) && defined(ENABLE_AVX2_SUPPORT)
88 # if defined(__x86_64__) && (defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) || \
89      defined(HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS))
90 #  define USE_AESNI_AVX2 1
91 # endif
92 #endif
93
94 typedef struct
95 {
96   KEY_TABLE_TYPE keytable;
97   int keybitlength;
98 #ifdef USE_AESNI_AVX
99   unsigned int use_aesni_avx:1; /* AES-NI/AVX implementation shall be used.  */
100 #endif /*USE_AESNI_AVX*/
101 #ifdef USE_AESNI_AVX2
102   unsigned int use_aesni_avx2:1;/* AES-NI/AVX2 implementation shall be used.  */
103 #endif /*USE_AESNI_AVX2*/
104 } CAMELLIA_context;
105
106 /* Assembly implementations use SystemV ABI, ABI conversion and additional
107  * stack to store XMM6-XMM15 needed on Win64. */
108 #undef ASM_FUNC_ABI
109 #undef ASM_EXTRA_STACK
110 #if defined(USE_AESNI_AVX) || defined(USE_AESNI_AVX2)
111 # ifdef HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS
112 #  define ASM_FUNC_ABI __attribute__((sysv_abi))
113 #  define ASM_EXTRA_STACK (10 * 16)
114 # else
115 #  define ASM_FUNC_ABI
116 #  define ASM_EXTRA_STACK 0
117 # endif
118 #endif
119
120 #ifdef USE_AESNI_AVX
121 /* Assembler implementations of Camellia using AES-NI and AVX.  Process data
122    in 16 block same time.
123  */
124 extern void _gcry_camellia_aesni_avx_ctr_enc(CAMELLIA_context *ctx,
125                                              unsigned char *out,
126                                              const unsigned char *in,
127                                              unsigned char *ctr) ASM_FUNC_ABI;
128
129 extern void _gcry_camellia_aesni_avx_cbc_dec(CAMELLIA_context *ctx,
130                                              unsigned char *out,
131                                              const unsigned char *in,
132                                              unsigned char *iv) ASM_FUNC_ABI;
133
134 extern void _gcry_camellia_aesni_avx_cfb_dec(CAMELLIA_context *ctx,
135                                              unsigned char *out,
136                                              const unsigned char *in,
137                                              unsigned char *iv) ASM_FUNC_ABI;
138
139 extern void _gcry_camellia_aesni_avx_ocb_enc(CAMELLIA_context *ctx,
140                                              unsigned char *out,
141                                              const unsigned char *in,
142                                              unsigned char *offset,
143                                              unsigned char *checksum,
144                                              const u64 Ls[16]) ASM_FUNC_ABI;
145
146 extern void _gcry_camellia_aesni_avx_ocb_dec(CAMELLIA_context *ctx,
147                                              unsigned char *out,
148                                              const unsigned char *in,
149                                              unsigned char *offset,
150                                              unsigned char *checksum,
151                                              const u64 Ls[16]) ASM_FUNC_ABI;
152
153 extern void _gcry_camellia_aesni_avx_ocb_auth(CAMELLIA_context *ctx,
154                                              const unsigned char *abuf,
155                                              unsigned char *offset,
156                                              unsigned char *checksum,
157                                              const u64 Ls[16]) ASM_FUNC_ABI;
158
159 extern void _gcry_camellia_aesni_avx_keygen(CAMELLIA_context *ctx,
160                                             const unsigned char *key,
161                                             unsigned int keylen) ASM_FUNC_ABI;
162 #endif
163
164 #ifdef USE_AESNI_AVX2
165 /* Assembler implementations of Camellia using AES-NI and AVX2.  Process data
166    in 32 block same time.
167  */
168 extern void _gcry_camellia_aesni_avx2_ctr_enc(CAMELLIA_context *ctx,
169                                               unsigned char *out,
170                                               const unsigned char *in,
171                                               unsigned char *ctr) ASM_FUNC_ABI;
172
173 extern void _gcry_camellia_aesni_avx2_cbc_dec(CAMELLIA_context *ctx,
174                                               unsigned char *out,
175                                               const unsigned char *in,
176                                               unsigned char *iv) ASM_FUNC_ABI;
177
178 extern void _gcry_camellia_aesni_avx2_cfb_dec(CAMELLIA_context *ctx,
179                                               unsigned char *out,
180                                               const unsigned char *in,
181                                               unsigned char *iv) ASM_FUNC_ABI;
182
183 extern void _gcry_camellia_aesni_avx2_ocb_enc(CAMELLIA_context *ctx,
184                                               unsigned char *out,
185                                               const unsigned char *in,
186                                               unsigned char *offset,
187                                               unsigned char *checksum,
188                                               const u64 Ls[32]) ASM_FUNC_ABI;
189
190 extern void _gcry_camellia_aesni_avx2_ocb_dec(CAMELLIA_context *ctx,
191                                               unsigned char *out,
192                                               const unsigned char *in,
193                                               unsigned char *offset,
194                                               unsigned char *checksum,
195                                               const u64 Ls[32]) ASM_FUNC_ABI;
196
197 extern void _gcry_camellia_aesni_avx2_ocb_auth(CAMELLIA_context *ctx,
198                                                const unsigned char *abuf,
199                                                unsigned char *offset,
200                                                unsigned char *checksum,
201                                                const u64 Ls[32]) ASM_FUNC_ABI;
202 #endif
203
204 static const char *selftest(void);
205
206 static gcry_err_code_t
207 camellia_setkey(void *c, const byte *key, unsigned keylen)
208 {
209   CAMELLIA_context *ctx=c;
210   static int initialized=0;
211   static const char *selftest_failed=NULL;
212 #if defined(USE_AESNI_AVX) || defined(USE_AESNI_AVX2)
213   unsigned int hwf = _gcry_get_hw_features ();
214 #endif
215
216   if(keylen!=16 && keylen!=24 && keylen!=32)
217     return GPG_ERR_INV_KEYLEN;
218
219   if(!initialized)
220     {
221       initialized=1;
222       selftest_failed=selftest();
223       if(selftest_failed)
224         log_error("%s\n",selftest_failed);
225     }
226
227   if(selftest_failed)
228     return GPG_ERR_SELFTEST_FAILED;
229
230 #ifdef USE_AESNI_AVX
231   ctx->use_aesni_avx = (hwf & HWF_INTEL_AESNI) && (hwf & HWF_INTEL_AVX);
232 #endif
233 #ifdef USE_AESNI_AVX2
234   ctx->use_aesni_avx2 = (hwf & HWF_INTEL_AESNI) && (hwf & HWF_INTEL_AVX2);
235 #endif
236
237   ctx->keybitlength=keylen*8;
238
239   if (0)
240     { }
241 #ifdef USE_AESNI_AVX
242   else if (ctx->use_aesni_avx)
243     _gcry_camellia_aesni_avx_keygen(ctx, key, keylen);
244   else
245 #endif
246     {
247       Camellia_Ekeygen(ctx->keybitlength,key,ctx->keytable);
248       _gcry_burn_stack
249         ((19+34+34)*sizeof(u32)+2*sizeof(void*) /* camellia_setup256 */
250          +(4+32)*sizeof(u32)+2*sizeof(void*)    /* camellia_setup192 */
251          +0+sizeof(int)+2*sizeof(void*)         /* Camellia_Ekeygen */
252          +3*2*sizeof(void*)                     /* Function calls.  */
253          );
254     }
255
256   return 0;
257 }
258
259 #ifdef USE_ARM_ASM
260
261 /* Assembly implementations of Camellia. */
262 extern void _gcry_camellia_arm_encrypt_block(const KEY_TABLE_TYPE keyTable,
263                                                byte *outbuf, const byte *inbuf,
264                                                const int keybits);
265
266 extern void _gcry_camellia_arm_decrypt_block(const KEY_TABLE_TYPE keyTable,
267                                                byte *outbuf, const byte *inbuf,
268                                                const int keybits);
269
270 static void Camellia_EncryptBlock(const int keyBitLength,
271                                   const unsigned char *plaintext,
272                                   const KEY_TABLE_TYPE keyTable,
273                                   unsigned char *cipherText)
274 {
275   _gcry_camellia_arm_encrypt_block(keyTable, cipherText, plaintext,
276                                      keyBitLength);
277 }
278
279 static void Camellia_DecryptBlock(const int keyBitLength,
280                                   const unsigned char *cipherText,
281                                   const KEY_TABLE_TYPE keyTable,
282                                   unsigned char *plaintext)
283 {
284   _gcry_camellia_arm_decrypt_block(keyTable, plaintext, cipherText,
285                                      keyBitLength);
286 }
287
288 static unsigned int
289 camellia_encrypt(void *c, byte *outbuf, const byte *inbuf)
290 {
291   CAMELLIA_context *ctx = c;
292   Camellia_EncryptBlock(ctx->keybitlength,inbuf,ctx->keytable,outbuf);
293 #define CAMELLIA_encrypt_stack_burn_size (15*4)
294   return /*burn_stack*/ (CAMELLIA_encrypt_stack_burn_size);
295 }
296
297 static unsigned int
298 camellia_decrypt(void *c, byte *outbuf, const byte *inbuf)
299 {
300   CAMELLIA_context *ctx=c;
301   Camellia_DecryptBlock(ctx->keybitlength,inbuf,ctx->keytable,outbuf);
302 #define CAMELLIA_decrypt_stack_burn_size (15*4)
303   return /*burn_stack*/ (CAMELLIA_decrypt_stack_burn_size);
304 }
305
306 #else /*USE_ARM_ASM*/
307
308 static unsigned int
309 camellia_encrypt(void *c, byte *outbuf, const byte *inbuf)
310 {
311   CAMELLIA_context *ctx=c;
312
313   Camellia_EncryptBlock(ctx->keybitlength,inbuf,ctx->keytable,outbuf);
314
315 #define CAMELLIA_encrypt_stack_burn_size \
316   (sizeof(int)+2*sizeof(unsigned char *)+sizeof(void*/*KEY_TABLE_TYPE*/) \
317      +4*sizeof(u32)+4*sizeof(u32) \
318      +2*sizeof(u32*)+4*sizeof(u32) \
319      +2*2*sizeof(void*) /* Function calls.  */ \
320     )
321
322   return /*burn_stack*/ (CAMELLIA_encrypt_stack_burn_size);
323 }
324
325 static unsigned int
326 camellia_decrypt(void *c, byte *outbuf, const byte *inbuf)
327 {
328   CAMELLIA_context *ctx=c;
329
330   Camellia_DecryptBlock(ctx->keybitlength,inbuf,ctx->keytable,outbuf);
331
332 #define CAMELLIA_decrypt_stack_burn_size \
333     (sizeof(int)+2*sizeof(unsigned char *)+sizeof(void*/*KEY_TABLE_TYPE*/) \
334      +4*sizeof(u32)+4*sizeof(u32) \
335      +2*sizeof(u32*)+4*sizeof(u32) \
336      +2*2*sizeof(void*) /* Function calls.  */ \
337     )
338
339   return /*burn_stack*/ (CAMELLIA_decrypt_stack_burn_size);
340 }
341
342 #endif /*!USE_ARM_ASM*/
343
344 /* Bulk encryption of complete blocks in CTR mode.  This function is only
345    intended for the bulk encryption feature of cipher.c.  CTR is expected to be
346    of size CAMELLIA_BLOCK_SIZE. */
347 void
348 _gcry_camellia_ctr_enc(void *context, unsigned char *ctr,
349                        void *outbuf_arg, const void *inbuf_arg,
350                        size_t nblocks)
351 {
352   CAMELLIA_context *ctx = context;
353   unsigned char *outbuf = outbuf_arg;
354   const unsigned char *inbuf = inbuf_arg;
355   unsigned char tmpbuf[CAMELLIA_BLOCK_SIZE];
356   int burn_stack_depth = CAMELLIA_encrypt_stack_burn_size;
357   int i;
358
359 #ifdef USE_AESNI_AVX2
360   if (ctx->use_aesni_avx2)
361     {
362       int did_use_aesni_avx2 = 0;
363
364       /* Process data in 32 block chunks. */
365       while (nblocks >= 32)
366         {
367           _gcry_camellia_aesni_avx2_ctr_enc(ctx, outbuf, inbuf, ctr);
368
369           nblocks -= 32;
370           outbuf += 32 * CAMELLIA_BLOCK_SIZE;
371           inbuf  += 32 * CAMELLIA_BLOCK_SIZE;
372           did_use_aesni_avx2 = 1;
373         }
374
375       if (did_use_aesni_avx2)
376         {
377           int avx2_burn_stack_depth = 32 * CAMELLIA_BLOCK_SIZE + 16 +
378                                         2 * sizeof(void *) + ASM_EXTRA_STACK;
379
380           if (burn_stack_depth < avx2_burn_stack_depth)
381             burn_stack_depth = avx2_burn_stack_depth;
382         }
383
384       /* Use generic code to handle smaller chunks... */
385       /* TODO: use caching instead? */
386     }
387 #endif
388
389 #ifdef USE_AESNI_AVX
390   if (ctx->use_aesni_avx)
391     {
392       int did_use_aesni_avx = 0;
393
394       /* Process data in 16 block chunks. */
395       while (nblocks >= 16)
396         {
397           _gcry_camellia_aesni_avx_ctr_enc(ctx, outbuf, inbuf, ctr);
398
399           nblocks -= 16;
400           outbuf += 16 * CAMELLIA_BLOCK_SIZE;
401           inbuf  += 16 * CAMELLIA_BLOCK_SIZE;
402           did_use_aesni_avx = 1;
403         }
404
405       if (did_use_aesni_avx)
406         {
407           int avx_burn_stack_depth = 16 * CAMELLIA_BLOCK_SIZE +
408                                        2 * sizeof(void *) + ASM_EXTRA_STACK;
409
410           if (burn_stack_depth < avx_burn_stack_depth)
411             burn_stack_depth = avx_burn_stack_depth;
412         }
413
414       /* Use generic code to handle smaller chunks... */
415       /* TODO: use caching instead? */
416     }
417 #endif
418
419   for ( ;nblocks; nblocks-- )
420     {
421       /* Encrypt the counter. */
422       Camellia_EncryptBlock(ctx->keybitlength, ctr, ctx->keytable, tmpbuf);
423       /* XOR the input with the encrypted counter and store in output.  */
424       buf_xor(outbuf, tmpbuf, inbuf, CAMELLIA_BLOCK_SIZE);
425       outbuf += CAMELLIA_BLOCK_SIZE;
426       inbuf  += CAMELLIA_BLOCK_SIZE;
427       /* Increment the counter.  */
428       for (i = CAMELLIA_BLOCK_SIZE; i > 0; i--)
429         {
430           ctr[i-1]++;
431           if (ctr[i-1])
432             break;
433         }
434     }
435
436   wipememory(tmpbuf, sizeof(tmpbuf));
437   _gcry_burn_stack(burn_stack_depth);
438 }
439
440 /* Bulk decryption of complete blocks in CBC mode.  This function is only
441    intended for the bulk encryption feature of cipher.c. */
442 void
443 _gcry_camellia_cbc_dec(void *context, unsigned char *iv,
444                        void *outbuf_arg, const void *inbuf_arg,
445                        size_t nblocks)
446 {
447   CAMELLIA_context *ctx = context;
448   unsigned char *outbuf = outbuf_arg;
449   const unsigned char *inbuf = inbuf_arg;
450   unsigned char savebuf[CAMELLIA_BLOCK_SIZE];
451   int burn_stack_depth = CAMELLIA_decrypt_stack_burn_size;
452
453 #ifdef USE_AESNI_AVX2
454   if (ctx->use_aesni_avx2)
455     {
456       int did_use_aesni_avx2 = 0;
457
458       /* Process data in 32 block chunks. */
459       while (nblocks >= 32)
460         {
461           _gcry_camellia_aesni_avx2_cbc_dec(ctx, outbuf, inbuf, iv);
462
463           nblocks -= 32;
464           outbuf += 32 * CAMELLIA_BLOCK_SIZE;
465           inbuf  += 32 * CAMELLIA_BLOCK_SIZE;
466           did_use_aesni_avx2 = 1;
467         }
468
469       if (did_use_aesni_avx2)
470         {
471           int avx2_burn_stack_depth = 32 * CAMELLIA_BLOCK_SIZE + 16 +
472                                         2 * sizeof(void *) + ASM_EXTRA_STACK;;
473
474           if (burn_stack_depth < avx2_burn_stack_depth)
475             burn_stack_depth = avx2_burn_stack_depth;
476         }
477
478       /* Use generic code to handle smaller chunks... */
479     }
480 #endif
481
482 #ifdef USE_AESNI_AVX
483   if (ctx->use_aesni_avx)
484     {
485       int did_use_aesni_avx = 0;
486
487       /* Process data in 16 block chunks. */
488       while (nblocks >= 16)
489         {
490           _gcry_camellia_aesni_avx_cbc_dec(ctx, outbuf, inbuf, iv);
491
492           nblocks -= 16;
493           outbuf += 16 * CAMELLIA_BLOCK_SIZE;
494           inbuf  += 16 * CAMELLIA_BLOCK_SIZE;
495           did_use_aesni_avx = 1;
496         }
497
498       if (did_use_aesni_avx)
499         {
500           int avx_burn_stack_depth = 16 * CAMELLIA_BLOCK_SIZE +
501                                        2 * sizeof(void *) + ASM_EXTRA_STACK;
502
503           if (burn_stack_depth < avx_burn_stack_depth)
504             burn_stack_depth = avx_burn_stack_depth;
505         }
506
507       /* Use generic code to handle smaller chunks... */
508     }
509 #endif
510
511   for ( ;nblocks; nblocks-- )
512     {
513       /* INBUF is needed later and it may be identical to OUTBUF, so store
514          the intermediate result to SAVEBUF.  */
515       Camellia_DecryptBlock(ctx->keybitlength, inbuf, ctx->keytable, savebuf);
516
517       buf_xor_n_copy_2(outbuf, savebuf, iv, inbuf, CAMELLIA_BLOCK_SIZE);
518       inbuf += CAMELLIA_BLOCK_SIZE;
519       outbuf += CAMELLIA_BLOCK_SIZE;
520     }
521
522   wipememory(savebuf, sizeof(savebuf));
523   _gcry_burn_stack(burn_stack_depth);
524 }
525
526 /* Bulk decryption of complete blocks in CFB mode.  This function is only
527    intended for the bulk encryption feature of cipher.c. */
528 void
529 _gcry_camellia_cfb_dec(void *context, unsigned char *iv,
530                        void *outbuf_arg, const void *inbuf_arg,
531                        size_t nblocks)
532 {
533   CAMELLIA_context *ctx = context;
534   unsigned char *outbuf = outbuf_arg;
535   const unsigned char *inbuf = inbuf_arg;
536   int burn_stack_depth = CAMELLIA_decrypt_stack_burn_size;
537
538 #ifdef USE_AESNI_AVX2
539   if (ctx->use_aesni_avx2)
540     {
541       int did_use_aesni_avx2 = 0;
542
543       /* Process data in 32 block chunks. */
544       while (nblocks >= 32)
545         {
546           _gcry_camellia_aesni_avx2_cfb_dec(ctx, outbuf, inbuf, iv);
547
548           nblocks -= 32;
549           outbuf += 32 * CAMELLIA_BLOCK_SIZE;
550           inbuf  += 32 * CAMELLIA_BLOCK_SIZE;
551           did_use_aesni_avx2 = 1;
552         }
553
554       if (did_use_aesni_avx2)
555         {
556           int avx2_burn_stack_depth = 32 * CAMELLIA_BLOCK_SIZE + 16 +
557                                         2 * sizeof(void *) + ASM_EXTRA_STACK;
558
559           if (burn_stack_depth < avx2_burn_stack_depth)
560             burn_stack_depth = avx2_burn_stack_depth;
561         }
562
563       /* Use generic code to handle smaller chunks... */
564     }
565 #endif
566
567 #ifdef USE_AESNI_AVX
568   if (ctx->use_aesni_avx)
569     {
570       int did_use_aesni_avx = 0;
571
572       /* Process data in 16 block chunks. */
573       while (nblocks >= 16)
574         {
575           _gcry_camellia_aesni_avx_cfb_dec(ctx, outbuf, inbuf, iv);
576
577           nblocks -= 16;
578           outbuf += 16 * CAMELLIA_BLOCK_SIZE;
579           inbuf  += 16 * CAMELLIA_BLOCK_SIZE;
580           did_use_aesni_avx = 1;
581         }
582
583       if (did_use_aesni_avx)
584         {
585           int avx_burn_stack_depth = 16 * CAMELLIA_BLOCK_SIZE +
586                                        2 * sizeof(void *) + ASM_EXTRA_STACK;
587
588           if (burn_stack_depth < avx_burn_stack_depth)
589             burn_stack_depth = avx_burn_stack_depth;
590         }
591
592       /* Use generic code to handle smaller chunks... */
593     }
594 #endif
595
596   for ( ;nblocks; nblocks-- )
597     {
598       Camellia_EncryptBlock(ctx->keybitlength, iv, ctx->keytable, iv);
599       buf_xor_n_copy(outbuf, iv, inbuf, CAMELLIA_BLOCK_SIZE);
600       outbuf += CAMELLIA_BLOCK_SIZE;
601       inbuf  += CAMELLIA_BLOCK_SIZE;
602     }
603
604   _gcry_burn_stack(burn_stack_depth);
605 }
606
607 /* Bulk encryption/decryption of complete blocks in OCB mode. */
608 size_t
609 _gcry_camellia_ocb_crypt (gcry_cipher_hd_t c, void *outbuf_arg,
610                           const void *inbuf_arg, size_t nblocks, int encrypt)
611 {
612 #if defined(USE_AESNI_AVX) || defined(USE_AESNI_AVX2)
613   CAMELLIA_context *ctx = (void *)&c->context.c;
614   unsigned char *outbuf = outbuf_arg;
615   const unsigned char *inbuf = inbuf_arg;
616   unsigned char l_tmp[CAMELLIA_BLOCK_SIZE];
617   int burn_stack_depth;
618   u64 blkn = c->u_mode.ocb.data_nblocks;
619
620   burn_stack_depth = encrypt ? CAMELLIA_encrypt_stack_burn_size :
621                               CAMELLIA_decrypt_stack_burn_size;
622 #else
623   (void)c;
624   (void)outbuf_arg;
625   (void)inbuf_arg;
626   (void)encrypt;
627 #endif
628
629 #ifdef USE_AESNI_AVX2
630   if (ctx->use_aesni_avx2)
631     {
632       int did_use_aesni_avx2 = 0;
633       u64 Ls[32];
634       unsigned int n = 32 - (blkn % 32);
635       u64 *l;
636       int i;
637
638       if (nblocks >= 32)
639         {
640           for (i = 0; i < 32; i += 8)
641             {
642               /* Use u64 to store pointers for x32 support (assembly function
643                * assumes 64-bit pointers). */
644               Ls[(i + 0 + n) % 32] = (uintptr_t)(void *)c->u_mode.ocb.L[0];
645               Ls[(i + 1 + n) % 32] = (uintptr_t)(void *)c->u_mode.ocb.L[1];
646               Ls[(i + 2 + n) % 32] = (uintptr_t)(void *)c->u_mode.ocb.L[0];
647               Ls[(i + 3 + n) % 32] = (uintptr_t)(void *)c->u_mode.ocb.L[2];
648               Ls[(i + 4 + n) % 32] = (uintptr_t)(void *)c->u_mode.ocb.L[0];
649               Ls[(i + 5 + n) % 32] = (uintptr_t)(void *)c->u_mode.ocb.L[1];
650               Ls[(i + 6 + n) % 32] = (uintptr_t)(void *)c->u_mode.ocb.L[0];
651             }
652
653           Ls[(7 + n) % 32] = (uintptr_t)(void *)c->u_mode.ocb.L[3];
654           Ls[(15 + n) % 32] = (uintptr_t)(void *)c->u_mode.ocb.L[4];
655           Ls[(23 + n) % 32] = (uintptr_t)(void *)c->u_mode.ocb.L[3];
656           l = &Ls[(31 + n) % 32];
657
658           /* Process data in 32 block chunks. */
659           while (nblocks >= 32)
660             {
661               /* l_tmp will be used only every 65536-th block. */
662               blkn += 32;
663               *l = (uintptr_t)(void *)ocb_get_l(c, l_tmp, blkn - blkn % 32);
664
665               if (encrypt)
666                 _gcry_camellia_aesni_avx2_ocb_enc(ctx, outbuf, inbuf, c->u_iv.iv,
667                                                   c->u_ctr.ctr, Ls);
668               else
669                 _gcry_camellia_aesni_avx2_ocb_dec(ctx, outbuf, inbuf, c->u_iv.iv,
670                                                   c->u_ctr.ctr, Ls);
671
672               nblocks -= 32;
673               outbuf += 32 * CAMELLIA_BLOCK_SIZE;
674               inbuf  += 32 * CAMELLIA_BLOCK_SIZE;
675               did_use_aesni_avx2 = 1;
676             }
677         }
678
679       if (did_use_aesni_avx2)
680         {
681           int avx2_burn_stack_depth = 32 * CAMELLIA_BLOCK_SIZE +
682                                       2 * sizeof(void *) + ASM_EXTRA_STACK;
683
684           if (burn_stack_depth < avx2_burn_stack_depth)
685             burn_stack_depth = avx2_burn_stack_depth;
686         }
687
688       /* Use generic code to handle smaller chunks... */
689     }
690 #endif
691
692 #ifdef USE_AESNI_AVX
693   if (ctx->use_aesni_avx)
694     {
695       int did_use_aesni_avx = 0;
696       u64 Ls[16];
697       unsigned int n = 16 - (blkn % 16);
698       u64 *l;
699       int i;
700
701       if (nblocks >= 16)
702         {
703           for (i = 0; i < 16; i += 8)
704             {
705               /* Use u64 to store pointers for x32 support (assembly function
706                * assumes 64-bit pointers). */
707               Ls[(i + 0 + n) % 16] = (uintptr_t)(void *)c->u_mode.ocb.L[0];
708               Ls[(i + 1 + n) % 16] = (uintptr_t)(void *)c->u_mode.ocb.L[1];
709               Ls[(i + 2 + n) % 16] = (uintptr_t)(void *)c->u_mode.ocb.L[0];
710               Ls[(i + 3 + n) % 16] = (uintptr_t)(void *)c->u_mode.ocb.L[2];
711               Ls[(i + 4 + n) % 16] = (uintptr_t)(void *)c->u_mode.ocb.L[0];
712               Ls[(i + 5 + n) % 16] = (uintptr_t)(void *)c->u_mode.ocb.L[1];
713               Ls[(i + 6 + n) % 16] = (uintptr_t)(void *)c->u_mode.ocb.L[0];
714             }
715
716           Ls[(7 + n) % 16] = (uintptr_t)(void *)c->u_mode.ocb.L[3];
717           l = &Ls[(15 + n) % 16];
718
719           /* Process data in 16 block chunks. */
720           while (nblocks >= 16)
721             {
722               /* l_tmp will be used only every 65536-th block. */
723               blkn += 16;
724               *l = (uintptr_t)(void *)ocb_get_l(c, l_tmp, blkn - blkn % 16);
725
726               if (encrypt)
727                 _gcry_camellia_aesni_avx_ocb_enc(ctx, outbuf, inbuf, c->u_iv.iv,
728                                                 c->u_ctr.ctr, Ls);
729               else
730                 _gcry_camellia_aesni_avx_ocb_dec(ctx, outbuf, inbuf, c->u_iv.iv,
731                                                 c->u_ctr.ctr, Ls);
732
733               nblocks -= 16;
734               outbuf += 16 * CAMELLIA_BLOCK_SIZE;
735               inbuf  += 16 * CAMELLIA_BLOCK_SIZE;
736               did_use_aesni_avx = 1;
737             }
738         }
739
740       if (did_use_aesni_avx)
741         {
742           int avx_burn_stack_depth = 16 * CAMELLIA_BLOCK_SIZE +
743                                       2 * sizeof(void *) + ASM_EXTRA_STACK;
744
745           if (burn_stack_depth < avx_burn_stack_depth)
746             burn_stack_depth = avx_burn_stack_depth;
747         }
748
749       /* Use generic code to handle smaller chunks... */
750     }
751 #endif
752
753 #if defined(USE_AESNI_AVX) || defined(USE_AESNI_AVX2)
754   c->u_mode.ocb.data_nblocks = blkn;
755
756   wipememory(&l_tmp, sizeof(l_tmp));
757
758   if (burn_stack_depth)
759     _gcry_burn_stack (burn_stack_depth + 4 * sizeof(void *));
760 #endif
761
762   return nblocks;
763 }
764
765 /* Bulk authentication of complete blocks in OCB mode. */
766 size_t
767 _gcry_camellia_ocb_auth (gcry_cipher_hd_t c, const void *abuf_arg,
768                          size_t nblocks)
769 {
770 #if defined(USE_AESNI_AVX) || defined(USE_AESNI_AVX2)
771   CAMELLIA_context *ctx = (void *)&c->context.c;
772   const unsigned char *abuf = abuf_arg;
773   unsigned char l_tmp[CAMELLIA_BLOCK_SIZE];
774   int burn_stack_depth;
775   u64 blkn = c->u_mode.ocb.aad_nblocks;
776
777   burn_stack_depth = CAMELLIA_encrypt_stack_burn_size;
778 #else
779   (void)c;
780   (void)abuf_arg;
781 #endif
782
783 #ifdef USE_AESNI_AVX2
784   if (ctx->use_aesni_avx2)
785     {
786       int did_use_aesni_avx2 = 0;
787       u64 Ls[32];
788       unsigned int n = 32 - (blkn % 32);
789       u64 *l;
790       int i;
791
792       if (nblocks >= 32)
793         {
794           for (i = 0; i < 32; i += 8)
795             {
796               /* Use u64 to store pointers for x32 support (assembly function
797                * assumes 64-bit pointers). */
798               Ls[(i + 0 + n) % 32] = (uintptr_t)(void *)c->u_mode.ocb.L[0];
799               Ls[(i + 1 + n) % 32] = (uintptr_t)(void *)c->u_mode.ocb.L[1];
800               Ls[(i + 2 + n) % 32] = (uintptr_t)(void *)c->u_mode.ocb.L[0];
801               Ls[(i + 3 + n) % 32] = (uintptr_t)(void *)c->u_mode.ocb.L[2];
802               Ls[(i + 4 + n) % 32] = (uintptr_t)(void *)c->u_mode.ocb.L[0];
803               Ls[(i + 5 + n) % 32] = (uintptr_t)(void *)c->u_mode.ocb.L[1];
804               Ls[(i + 6 + n) % 32] = (uintptr_t)(void *)c->u_mode.ocb.L[0];
805             }
806
807           Ls[(7 + n) % 32] = (uintptr_t)(void *)c->u_mode.ocb.L[3];
808           Ls[(15 + n) % 32] = (uintptr_t)(void *)c->u_mode.ocb.L[4];
809           Ls[(23 + n) % 32] = (uintptr_t)(void *)c->u_mode.ocb.L[3];
810           l = &Ls[(31 + n) % 32];
811
812           /* Process data in 32 block chunks. */
813           while (nblocks >= 32)
814             {
815               /* l_tmp will be used only every 65536-th block. */
816               blkn += 32;
817               *l = (uintptr_t)(void *)ocb_get_l(c, l_tmp, blkn - blkn % 32);
818
819               _gcry_camellia_aesni_avx2_ocb_auth(ctx, abuf,
820                                                  c->u_mode.ocb.aad_offset,
821                                                  c->u_mode.ocb.aad_sum, Ls);
822
823               nblocks -= 32;
824               abuf += 32 * CAMELLIA_BLOCK_SIZE;
825               did_use_aesni_avx2 = 1;
826             }
827         }
828
829       if (did_use_aesni_avx2)
830         {
831           int avx2_burn_stack_depth = 32 * CAMELLIA_BLOCK_SIZE +
832                                       2 * sizeof(void *) + ASM_EXTRA_STACK;
833
834           if (burn_stack_depth < avx2_burn_stack_depth)
835             burn_stack_depth = avx2_burn_stack_depth;
836         }
837
838       /* Use generic code to handle smaller chunks... */
839     }
840 #endif
841
842 #ifdef USE_AESNI_AVX
843   if (ctx->use_aesni_avx)
844     {
845       int did_use_aesni_avx = 0;
846       u64 Ls[16];
847       unsigned int n = 16 - (blkn % 16);
848       u64 *l;
849       int i;
850
851       if (nblocks >= 16)
852         {
853           for (i = 0; i < 16; i += 8)
854             {
855               /* Use u64 to store pointers for x32 support (assembly function
856                * assumes 64-bit pointers). */
857               Ls[(i + 0 + n) % 16] = (uintptr_t)(void *)c->u_mode.ocb.L[0];
858               Ls[(i + 1 + n) % 16] = (uintptr_t)(void *)c->u_mode.ocb.L[1];
859               Ls[(i + 2 + n) % 16] = (uintptr_t)(void *)c->u_mode.ocb.L[0];
860               Ls[(i + 3 + n) % 16] = (uintptr_t)(void *)c->u_mode.ocb.L[2];
861               Ls[(i + 4 + n) % 16] = (uintptr_t)(void *)c->u_mode.ocb.L[0];
862               Ls[(i + 5 + n) % 16] = (uintptr_t)(void *)c->u_mode.ocb.L[1];
863               Ls[(i + 6 + n) % 16] = (uintptr_t)(void *)c->u_mode.ocb.L[0];
864             }
865
866           Ls[(7 + n) % 16] = (uintptr_t)(void *)c->u_mode.ocb.L[3];
867           l = &Ls[(15 + n) % 16];
868
869           /* Process data in 16 block chunks. */
870           while (nblocks >= 16)
871             {
872               /* l_tmp will be used only every 65536-th block. */
873               blkn += 16;
874               *l = (uintptr_t)(void *)ocb_get_l(c, l_tmp, blkn - blkn % 16);
875
876               _gcry_camellia_aesni_avx_ocb_auth(ctx, abuf,
877                                                 c->u_mode.ocb.aad_offset,
878                                                 c->u_mode.ocb.aad_sum, Ls);
879
880               nblocks -= 16;
881               abuf += 16 * CAMELLIA_BLOCK_SIZE;
882               did_use_aesni_avx = 1;
883             }
884         }
885
886       if (did_use_aesni_avx)
887         {
888           int avx_burn_stack_depth = 16 * CAMELLIA_BLOCK_SIZE +
889                                       2 * sizeof(void *) + ASM_EXTRA_STACK;
890
891           if (burn_stack_depth < avx_burn_stack_depth)
892             burn_stack_depth = avx_burn_stack_depth;
893         }
894
895       /* Use generic code to handle smaller chunks... */
896     }
897 #endif
898
899 #if defined(USE_AESNI_AVX) || defined(USE_AESNI_AVX2)
900   c->u_mode.ocb.aad_nblocks = blkn;
901
902   wipememory(&l_tmp, sizeof(l_tmp));
903
904   if (burn_stack_depth)
905     _gcry_burn_stack (burn_stack_depth + 4 * sizeof(void *));
906 #endif
907
908   return nblocks;
909 }
910
911 /* Run the self-tests for CAMELLIA-CTR-128, tests IV increment of bulk CTR
912    encryption.  Returns NULL on success. */
913 static const char*
914 selftest_ctr_128 (void)
915 {
916   const int nblocks = 32+16+1;
917   const int blocksize = CAMELLIA_BLOCK_SIZE;
918   const int context_size = sizeof(CAMELLIA_context);
919
920   return _gcry_selftest_helper_ctr("CAMELLIA", &camellia_setkey,
921            &camellia_encrypt, &_gcry_camellia_ctr_enc, nblocks, blocksize,
922            context_size);
923 }
924
925 /* Run the self-tests for CAMELLIA-CBC-128, tests bulk CBC decryption.
926    Returns NULL on success. */
927 static const char*
928 selftest_cbc_128 (void)
929 {
930   const int nblocks = 32+16+2;
931   const int blocksize = CAMELLIA_BLOCK_SIZE;
932   const int context_size = sizeof(CAMELLIA_context);
933
934   return _gcry_selftest_helper_cbc("CAMELLIA", &camellia_setkey,
935            &camellia_encrypt, &_gcry_camellia_cbc_dec, nblocks, blocksize,
936            context_size);
937 }
938
939 /* Run the self-tests for CAMELLIA-CFB-128, tests bulk CFB decryption.
940    Returns NULL on success. */
941 static const char*
942 selftest_cfb_128 (void)
943 {
944   const int nblocks = 32+16+2;
945   const int blocksize = CAMELLIA_BLOCK_SIZE;
946   const int context_size = sizeof(CAMELLIA_context);
947
948   return _gcry_selftest_helper_cfb("CAMELLIA", &camellia_setkey,
949            &camellia_encrypt, &_gcry_camellia_cfb_dec, nblocks, blocksize,
950            context_size);
951 }
952
953 static const char *
954 selftest(void)
955 {
956   CAMELLIA_context ctx;
957   byte scratch[16];
958   const char *r;
959
960   /* These test vectors are from RFC-3713 */
961   static const byte plaintext[]=
962     {
963       0x01,0x23,0x45,0x67,0x89,0xab,0xcd,0xef,
964       0xfe,0xdc,0xba,0x98,0x76,0x54,0x32,0x10
965     };
966   static const byte key_128[]=
967     {
968       0x01,0x23,0x45,0x67,0x89,0xab,0xcd,0xef,
969       0xfe,0xdc,0xba,0x98,0x76,0x54,0x32,0x10
970     };
971   static const byte ciphertext_128[]=
972     {
973       0x67,0x67,0x31,0x38,0x54,0x96,0x69,0x73,
974       0x08,0x57,0x06,0x56,0x48,0xea,0xbe,0x43
975     };
976   static const byte key_192[]=
977     {
978       0x01,0x23,0x45,0x67,0x89,0xab,0xcd,0xef,0xfe,0xdc,0xba,0x98,
979       0x76,0x54,0x32,0x10,0x00,0x11,0x22,0x33,0x44,0x55,0x66,0x77
980     };
981   static const byte ciphertext_192[]=
982     {
983       0xb4,0x99,0x34,0x01,0xb3,0xe9,0x96,0xf8,
984       0x4e,0xe5,0xce,0xe7,0xd7,0x9b,0x09,0xb9
985     };
986   static const byte key_256[]=
987     {
988       0x01,0x23,0x45,0x67,0x89,0xab,0xcd,0xef,0xfe,0xdc,0xba,
989       0x98,0x76,0x54,0x32,0x10,0x00,0x11,0x22,0x33,0x44,0x55,
990       0x66,0x77,0x88,0x99,0xaa,0xbb,0xcc,0xdd,0xee,0xff
991     };
992   static const byte ciphertext_256[]=
993     {
994       0x9a,0xcc,0x23,0x7d,0xff,0x16,0xd7,0x6c,
995       0x20,0xef,0x7c,0x91,0x9e,0x3a,0x75,0x09
996     };
997
998   camellia_setkey(&ctx,key_128,sizeof(key_128));
999   camellia_encrypt(&ctx,scratch,plaintext);
1000   if(memcmp(scratch,ciphertext_128,sizeof(ciphertext_128))!=0)
1001     return "CAMELLIA-128 test encryption failed.";
1002   camellia_decrypt(&ctx,scratch,scratch);
1003   if(memcmp(scratch,plaintext,sizeof(plaintext))!=0)
1004     return "CAMELLIA-128 test decryption failed.";
1005
1006   camellia_setkey(&ctx,key_192,sizeof(key_192));
1007   camellia_encrypt(&ctx,scratch,plaintext);
1008   if(memcmp(scratch,ciphertext_192,sizeof(ciphertext_192))!=0)
1009     return "CAMELLIA-192 test encryption failed.";
1010   camellia_decrypt(&ctx,scratch,scratch);
1011   if(memcmp(scratch,plaintext,sizeof(plaintext))!=0)
1012     return "CAMELLIA-192 test decryption failed.";
1013
1014   camellia_setkey(&ctx,key_256,sizeof(key_256));
1015   camellia_encrypt(&ctx,scratch,plaintext);
1016   if(memcmp(scratch,ciphertext_256,sizeof(ciphertext_256))!=0)
1017     return "CAMELLIA-256 test encryption failed.";
1018   camellia_decrypt(&ctx,scratch,scratch);
1019   if(memcmp(scratch,plaintext,sizeof(plaintext))!=0)
1020     return "CAMELLIA-256 test decryption failed.";
1021
1022   if ( (r = selftest_ctr_128 ()) )
1023     return r;
1024
1025   if ( (r = selftest_cbc_128 ()) )
1026     return r;
1027
1028   if ( (r = selftest_cfb_128 ()) )
1029     return r;
1030
1031   return NULL;
1032 }
1033
1034 /* These oids are from
1035    <http://info.isl.ntt.co.jp/crypt/eng/camellia/specifications_oid.html>,
1036    retrieved May 1, 2007. */
1037
1038 static gcry_cipher_oid_spec_t camellia128_oids[] =
1039   {
1040     {"1.2.392.200011.61.1.1.1.2", GCRY_CIPHER_MODE_CBC},
1041     {"0.3.4401.5.3.1.9.1", GCRY_CIPHER_MODE_ECB},
1042     {"0.3.4401.5.3.1.9.3", GCRY_CIPHER_MODE_OFB},
1043     {"0.3.4401.5.3.1.9.4", GCRY_CIPHER_MODE_CFB},
1044     { NULL }
1045   };
1046
1047 static gcry_cipher_oid_spec_t camellia192_oids[] =
1048   {
1049     {"1.2.392.200011.61.1.1.1.3", GCRY_CIPHER_MODE_CBC},
1050     {"0.3.4401.5.3.1.9.21", GCRY_CIPHER_MODE_ECB},
1051     {"0.3.4401.5.3.1.9.23", GCRY_CIPHER_MODE_OFB},
1052     {"0.3.4401.5.3.1.9.24", GCRY_CIPHER_MODE_CFB},
1053     { NULL }
1054   };
1055
1056 static gcry_cipher_oid_spec_t camellia256_oids[] =
1057   {
1058     {"1.2.392.200011.61.1.1.1.4", GCRY_CIPHER_MODE_CBC},
1059     {"0.3.4401.5.3.1.9.41", GCRY_CIPHER_MODE_ECB},
1060     {"0.3.4401.5.3.1.9.43", GCRY_CIPHER_MODE_OFB},
1061     {"0.3.4401.5.3.1.9.44", GCRY_CIPHER_MODE_CFB},
1062     { NULL }
1063   };
1064
1065 gcry_cipher_spec_t _gcry_cipher_spec_camellia128 =
1066   {
1067     GCRY_CIPHER_CAMELLIA128, {0, 0},
1068     "CAMELLIA128",NULL,camellia128_oids,CAMELLIA_BLOCK_SIZE,128,
1069     sizeof(CAMELLIA_context),camellia_setkey,camellia_encrypt,camellia_decrypt
1070   };
1071
1072 gcry_cipher_spec_t _gcry_cipher_spec_camellia192 =
1073   {
1074     GCRY_CIPHER_CAMELLIA192, {0, 0},
1075     "CAMELLIA192",NULL,camellia192_oids,CAMELLIA_BLOCK_SIZE,192,
1076     sizeof(CAMELLIA_context),camellia_setkey,camellia_encrypt,camellia_decrypt
1077   };
1078
1079 gcry_cipher_spec_t _gcry_cipher_spec_camellia256 =
1080   {
1081     GCRY_CIPHER_CAMELLIA256, {0, 0},
1082     "CAMELLIA256",NULL,camellia256_oids,CAMELLIA_BLOCK_SIZE,256,
1083     sizeof(CAMELLIA_context),camellia_setkey,camellia_encrypt,camellia_decrypt
1084   };