ecc: Fix ec_mulm_25519.
[libgcrypt.git] / cipher / camellia-glue.c
1 /* camellia-glue.c - Glue for the Camellia cipher
2  * Copyright (C) 2007 Free Software Foundation, Inc.
3  *
4  * This file is part of Libgcrypt.
5  *
6  * Libgcrypt is free software; you can redistribute it and/or modify
7  * it under the terms of the GNU Lesser General Public License as
8  * published by the Free Software Foundation; either version 2.1 of
9  * the License, or (at your option) any later version.
10  *
11  * Libgcrypt is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14  * GNU General Public License for more details.
15  *
16  * You should have received a copy of the GNU Lesser General Public
17  * License along with this program; if not, write to the Free Software
18  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
19  * 02110-1301, USA.
20  */
21
22 /* I put all the libgcrypt-specific stuff in this file to keep the
23    camellia.c/camellia.h files exactly as provided by NTT.  If they
24    update their code, this should make it easier to bring the changes
25    in. - dshaw
26
27    There is one small change which needs to be done: Include the
28    following code at the top of camellia.h: */
29 #if 0
30
31 /* To use Camellia with libraries it is often useful to keep the name
32  * space of the library clean.  The following macro is thus useful:
33  *
34  *     #define CAMELLIA_EXT_SYM_PREFIX foo_
35  *
36  * This prefixes all external symbols with "foo_".
37  */
38 #ifdef HAVE_CONFIG_H
39 #include <config.h>
40 #endif
41 #ifdef CAMELLIA_EXT_SYM_PREFIX
42 #define CAMELLIA_PREFIX1(x,y) x ## y
43 #define CAMELLIA_PREFIX2(x,y) CAMELLIA_PREFIX1(x,y)
44 #define CAMELLIA_PREFIX(x)    CAMELLIA_PREFIX2(CAMELLIA_EXT_SYM_PREFIX,x)
45 #define Camellia_Ekeygen      CAMELLIA_PREFIX(Camellia_Ekeygen)
46 #define Camellia_EncryptBlock CAMELLIA_PREFIX(Camellia_EncryptBlock)
47 #define Camellia_DecryptBlock CAMELLIA_PREFIX(Camellia_DecryptBlock)
48 #define camellia_decrypt128   CAMELLIA_PREFIX(camellia_decrypt128)
49 #define camellia_decrypt256   CAMELLIA_PREFIX(camellia_decrypt256)
50 #define camellia_encrypt128   CAMELLIA_PREFIX(camellia_encrypt128)
51 #define camellia_encrypt256   CAMELLIA_PREFIX(camellia_encrypt256)
52 #define camellia_setup128     CAMELLIA_PREFIX(camellia_setup128)
53 #define camellia_setup192     CAMELLIA_PREFIX(camellia_setup192)
54 #define camellia_setup256     CAMELLIA_PREFIX(camellia_setup256)
55 #endif /*CAMELLIA_EXT_SYM_PREFIX*/
56
57 #endif /* Code sample. */
58
59
60 #include <config.h>
61 #include "types.h"
62 #include "g10lib.h"
63 #include "cipher.h"
64 #include "camellia.h"
65 #include "bufhelp.h"
66 #include "cipher-internal.h"
67 #include "cipher-selftest.h"
68
69 /* Helper macro to force alignment to 16 bytes.  */
70 #ifdef HAVE_GCC_ATTRIBUTE_ALIGNED
71 # define ATTR_ALIGNED_16  __attribute__ ((aligned (16)))
72 #else
73 # define ATTR_ALIGNED_16
74 #endif
75
76 /* USE_AESNI inidicates whether to compile with Intel AES-NI/AVX code. */
77 #undef USE_AESNI_AVX
78 #if defined(ENABLE_AESNI_SUPPORT) && defined(ENABLE_AVX_SUPPORT)
79 # if defined(__x86_64__) && (defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) || \
80      defined(HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS))
81 #  define USE_AESNI_AVX 1
82 # endif
83 #endif
84
85 /* USE_AESNI_AVX2 inidicates whether to compile with Intel AES-NI/AVX2 code. */
86 #undef USE_AESNI_AVX2
87 #if defined(ENABLE_AESNI_SUPPORT) && defined(ENABLE_AVX2_SUPPORT)
88 # if defined(__x86_64__) && (defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) || \
89      defined(HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS))
90 #  define USE_AESNI_AVX2 1
91 # endif
92 #endif
93
94 typedef struct
95 {
96   KEY_TABLE_TYPE keytable;
97   int keybitlength;
98 #ifdef USE_AESNI_AVX
99   unsigned int use_aesni_avx:1; /* AES-NI/AVX implementation shall be used.  */
100 #endif /*USE_AESNI_AVX*/
101 #ifdef USE_AESNI_AVX2
102   unsigned int use_aesni_avx2:1;/* AES-NI/AVX2 implementation shall be used.  */
103 #endif /*USE_AESNI_AVX2*/
104 } CAMELLIA_context;
105
106 /* Assembly implementations use SystemV ABI, ABI conversion and additional
107  * stack to store XMM6-XMM15 needed on Win64. */
108 #undef ASM_FUNC_ABI
109 #undef ASM_EXTRA_STACK
110 #if defined(USE_AESNI_AVX) || defined(USE_AESNI_AVX2)
111 # ifdef HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS
112 #  define ASM_FUNC_ABI __attribute__((sysv_abi))
113 #  define ASM_EXTRA_STACK (10 * 16)
114 # else
115 #  define ASM_FUNC_ABI
116 #  define ASM_EXTRA_STACK 0
117 # endif
118 #endif
119
120 #ifdef USE_AESNI_AVX
121 /* Assembler implementations of Camellia using AES-NI and AVX.  Process data
122    in 16 block same time.
123  */
124 extern void _gcry_camellia_aesni_avx_ctr_enc(CAMELLIA_context *ctx,
125                                              unsigned char *out,
126                                              const unsigned char *in,
127                                              unsigned char *ctr) ASM_FUNC_ABI;
128
129 extern void _gcry_camellia_aesni_avx_cbc_dec(CAMELLIA_context *ctx,
130                                              unsigned char *out,
131                                              const unsigned char *in,
132                                              unsigned char *iv) ASM_FUNC_ABI;
133
134 extern void _gcry_camellia_aesni_avx_cfb_dec(CAMELLIA_context *ctx,
135                                              unsigned char *out,
136                                              const unsigned char *in,
137                                              unsigned char *iv) ASM_FUNC_ABI;
138
139 extern void _gcry_camellia_aesni_avx_ocb_enc(CAMELLIA_context *ctx,
140                                              unsigned char *out,
141                                              const unsigned char *in,
142                                              unsigned char *offset,
143                                              unsigned char *checksum,
144                                              const u64 Ls[16]) ASM_FUNC_ABI;
145
146 extern void _gcry_camellia_aesni_avx_ocb_dec(CAMELLIA_context *ctx,
147                                              unsigned char *out,
148                                              const unsigned char *in,
149                                              unsigned char *offset,
150                                              unsigned char *checksum,
151                                              const u64 Ls[16]) ASM_FUNC_ABI;
152
153 extern void _gcry_camellia_aesni_avx_ocb_auth(CAMELLIA_context *ctx,
154                                              const unsigned char *abuf,
155                                              unsigned char *offset,
156                                              unsigned char *checksum,
157                                              const u64 Ls[16]) ASM_FUNC_ABI;
158
159 extern void _gcry_camellia_aesni_avx_keygen(CAMELLIA_context *ctx,
160                                             const unsigned char *key,
161                                             unsigned int keylen) ASM_FUNC_ABI;
162 #endif
163
164 #ifdef USE_AESNI_AVX2
165 /* Assembler implementations of Camellia using AES-NI and AVX2.  Process data
166    in 32 block same time.
167  */
168 extern void _gcry_camellia_aesni_avx2_ctr_enc(CAMELLIA_context *ctx,
169                                               unsigned char *out,
170                                               const unsigned char *in,
171                                               unsigned char *ctr) ASM_FUNC_ABI;
172
173 extern void _gcry_camellia_aesni_avx2_cbc_dec(CAMELLIA_context *ctx,
174                                               unsigned char *out,
175                                               const unsigned char *in,
176                                               unsigned char *iv) ASM_FUNC_ABI;
177
178 extern void _gcry_camellia_aesni_avx2_cfb_dec(CAMELLIA_context *ctx,
179                                               unsigned char *out,
180                                               const unsigned char *in,
181                                               unsigned char *iv) ASM_FUNC_ABI;
182
183 extern void _gcry_camellia_aesni_avx2_ocb_enc(CAMELLIA_context *ctx,
184                                               unsigned char *out,
185                                               const unsigned char *in,
186                                               unsigned char *offset,
187                                               unsigned char *checksum,
188                                               const u64 Ls[32]) ASM_FUNC_ABI;
189
190 extern void _gcry_camellia_aesni_avx2_ocb_dec(CAMELLIA_context *ctx,
191                                               unsigned char *out,
192                                               const unsigned char *in,
193                                               unsigned char *offset,
194                                               unsigned char *checksum,
195                                               const u64 Ls[32]) ASM_FUNC_ABI;
196
197 extern void _gcry_camellia_aesni_avx2_ocb_auth(CAMELLIA_context *ctx,
198                                                const unsigned char *abuf,
199                                                unsigned char *offset,
200                                                unsigned char *checksum,
201                                                const u64 Ls[32]) ASM_FUNC_ABI;
202 #endif
203
204 static const char *selftest(void);
205
206 static gcry_err_code_t
207 camellia_setkey(void *c, const byte *key, unsigned keylen)
208 {
209   CAMELLIA_context *ctx=c;
210   static int initialized=0;
211   static const char *selftest_failed=NULL;
212 #if defined(USE_AESNI_AVX) || defined(USE_AESNI_AVX2)
213   unsigned int hwf = _gcry_get_hw_features ();
214 #endif
215
216   if(keylen!=16 && keylen!=24 && keylen!=32)
217     return GPG_ERR_INV_KEYLEN;
218
219   if(!initialized)
220     {
221       initialized=1;
222       selftest_failed=selftest();
223       if(selftest_failed)
224         log_error("%s\n",selftest_failed);
225     }
226
227   if(selftest_failed)
228     return GPG_ERR_SELFTEST_FAILED;
229
230 #ifdef USE_AESNI_AVX
231   ctx->use_aesni_avx = (hwf & HWF_INTEL_AESNI) && (hwf & HWF_INTEL_AVX);
232 #endif
233 #ifdef USE_AESNI_AVX2
234   ctx->use_aesni_avx2 = (hwf & HWF_INTEL_AESNI) && (hwf & HWF_INTEL_AVX2);
235 #endif
236
237   ctx->keybitlength=keylen*8;
238
239   if (0)
240     { }
241 #ifdef USE_AESNI_AVX
242   else if (ctx->use_aesni_avx)
243     _gcry_camellia_aesni_avx_keygen(ctx, key, keylen);
244   else
245 #endif
246     {
247       Camellia_Ekeygen(ctx->keybitlength,key,ctx->keytable);
248       _gcry_burn_stack
249         ((19+34+34)*sizeof(u32)+2*sizeof(void*) /* camellia_setup256 */
250          +(4+32)*sizeof(u32)+2*sizeof(void*)    /* camellia_setup192 */
251          +0+sizeof(int)+2*sizeof(void*)         /* Camellia_Ekeygen */
252          +3*2*sizeof(void*)                     /* Function calls.  */
253          );
254     }
255
256   return 0;
257 }
258
259 #ifdef USE_ARM_ASM
260
261 /* Assembly implementations of Camellia. */
262 extern void _gcry_camellia_arm_encrypt_block(const KEY_TABLE_TYPE keyTable,
263                                                byte *outbuf, const byte *inbuf,
264                                                const int keybits);
265
266 extern void _gcry_camellia_arm_decrypt_block(const KEY_TABLE_TYPE keyTable,
267                                                byte *outbuf, const byte *inbuf,
268                                                const int keybits);
269
270 static void Camellia_EncryptBlock(const int keyBitLength,
271                                   const unsigned char *plaintext,
272                                   const KEY_TABLE_TYPE keyTable,
273                                   unsigned char *cipherText)
274 {
275   _gcry_camellia_arm_encrypt_block(keyTable, cipherText, plaintext,
276                                      keyBitLength);
277 }
278
279 static void Camellia_DecryptBlock(const int keyBitLength,
280                                   const unsigned char *cipherText,
281                                   const KEY_TABLE_TYPE keyTable,
282                                   unsigned char *plaintext)
283 {
284   _gcry_camellia_arm_decrypt_block(keyTable, plaintext, cipherText,
285                                      keyBitLength);
286 }
287
288 #ifdef __aarch64__
289 #  define CAMELLIA_encrypt_stack_burn_size (0)
290 #  define CAMELLIA_decrypt_stack_burn_size (0)
291 #else
292 #  define CAMELLIA_encrypt_stack_burn_size (15*4)
293 #  define CAMELLIA_decrypt_stack_burn_size (15*4)
294 #endif
295
296 static unsigned int
297 camellia_encrypt(void *c, byte *outbuf, const byte *inbuf)
298 {
299   CAMELLIA_context *ctx = c;
300   Camellia_EncryptBlock(ctx->keybitlength,inbuf,ctx->keytable,outbuf);
301   return /*burn_stack*/ (CAMELLIA_encrypt_stack_burn_size);
302 }
303
304 static unsigned int
305 camellia_decrypt(void *c, byte *outbuf, const byte *inbuf)
306 {
307   CAMELLIA_context *ctx=c;
308   Camellia_DecryptBlock(ctx->keybitlength,inbuf,ctx->keytable,outbuf);
309   return /*burn_stack*/ (CAMELLIA_decrypt_stack_burn_size);
310 }
311
312 #else /*USE_ARM_ASM*/
313
314 static unsigned int
315 camellia_encrypt(void *c, byte *outbuf, const byte *inbuf)
316 {
317   CAMELLIA_context *ctx=c;
318
319   Camellia_EncryptBlock(ctx->keybitlength,inbuf,ctx->keytable,outbuf);
320
321 #define CAMELLIA_encrypt_stack_burn_size \
322   (sizeof(int)+2*sizeof(unsigned char *)+sizeof(void*/*KEY_TABLE_TYPE*/) \
323      +4*sizeof(u32)+4*sizeof(u32) \
324      +2*sizeof(u32*)+4*sizeof(u32) \
325      +2*2*sizeof(void*) /* Function calls.  */ \
326     )
327
328   return /*burn_stack*/ (CAMELLIA_encrypt_stack_burn_size);
329 }
330
331 static unsigned int
332 camellia_decrypt(void *c, byte *outbuf, const byte *inbuf)
333 {
334   CAMELLIA_context *ctx=c;
335
336   Camellia_DecryptBlock(ctx->keybitlength,inbuf,ctx->keytable,outbuf);
337
338 #define CAMELLIA_decrypt_stack_burn_size \
339     (sizeof(int)+2*sizeof(unsigned char *)+sizeof(void*/*KEY_TABLE_TYPE*/) \
340      +4*sizeof(u32)+4*sizeof(u32) \
341      +2*sizeof(u32*)+4*sizeof(u32) \
342      +2*2*sizeof(void*) /* Function calls.  */ \
343     )
344
345   return /*burn_stack*/ (CAMELLIA_decrypt_stack_burn_size);
346 }
347
348 #endif /*!USE_ARM_ASM*/
349
350 /* Bulk encryption of complete blocks in CTR mode.  This function is only
351    intended for the bulk encryption feature of cipher.c.  CTR is expected to be
352    of size CAMELLIA_BLOCK_SIZE. */
353 void
354 _gcry_camellia_ctr_enc(void *context, unsigned char *ctr,
355                        void *outbuf_arg, const void *inbuf_arg,
356                        size_t nblocks)
357 {
358   CAMELLIA_context *ctx = context;
359   unsigned char *outbuf = outbuf_arg;
360   const unsigned char *inbuf = inbuf_arg;
361   unsigned char tmpbuf[CAMELLIA_BLOCK_SIZE];
362   int burn_stack_depth = CAMELLIA_encrypt_stack_burn_size;
363   int i;
364
365 #ifdef USE_AESNI_AVX2
366   if (ctx->use_aesni_avx2)
367     {
368       int did_use_aesni_avx2 = 0;
369
370       /* Process data in 32 block chunks. */
371       while (nblocks >= 32)
372         {
373           _gcry_camellia_aesni_avx2_ctr_enc(ctx, outbuf, inbuf, ctr);
374
375           nblocks -= 32;
376           outbuf += 32 * CAMELLIA_BLOCK_SIZE;
377           inbuf  += 32 * CAMELLIA_BLOCK_SIZE;
378           did_use_aesni_avx2 = 1;
379         }
380
381       if (did_use_aesni_avx2)
382         {
383           int avx2_burn_stack_depth = 32 * CAMELLIA_BLOCK_SIZE + 16 +
384                                         2 * sizeof(void *) + ASM_EXTRA_STACK;
385
386           if (burn_stack_depth < avx2_burn_stack_depth)
387             burn_stack_depth = avx2_burn_stack_depth;
388         }
389
390       /* Use generic code to handle smaller chunks... */
391       /* TODO: use caching instead? */
392     }
393 #endif
394
395 #ifdef USE_AESNI_AVX
396   if (ctx->use_aesni_avx)
397     {
398       int did_use_aesni_avx = 0;
399
400       /* Process data in 16 block chunks. */
401       while (nblocks >= 16)
402         {
403           _gcry_camellia_aesni_avx_ctr_enc(ctx, outbuf, inbuf, ctr);
404
405           nblocks -= 16;
406           outbuf += 16 * CAMELLIA_BLOCK_SIZE;
407           inbuf  += 16 * CAMELLIA_BLOCK_SIZE;
408           did_use_aesni_avx = 1;
409         }
410
411       if (did_use_aesni_avx)
412         {
413           int avx_burn_stack_depth = 16 * CAMELLIA_BLOCK_SIZE +
414                                        2 * sizeof(void *) + ASM_EXTRA_STACK;
415
416           if (burn_stack_depth < avx_burn_stack_depth)
417             burn_stack_depth = avx_burn_stack_depth;
418         }
419
420       /* Use generic code to handle smaller chunks... */
421       /* TODO: use caching instead? */
422     }
423 #endif
424
425   for ( ;nblocks; nblocks-- )
426     {
427       /* Encrypt the counter. */
428       Camellia_EncryptBlock(ctx->keybitlength, ctr, ctx->keytable, tmpbuf);
429       /* XOR the input with the encrypted counter and store in output.  */
430       buf_xor(outbuf, tmpbuf, inbuf, CAMELLIA_BLOCK_SIZE);
431       outbuf += CAMELLIA_BLOCK_SIZE;
432       inbuf  += CAMELLIA_BLOCK_SIZE;
433       /* Increment the counter.  */
434       for (i = CAMELLIA_BLOCK_SIZE; i > 0; i--)
435         {
436           ctr[i-1]++;
437           if (ctr[i-1])
438             break;
439         }
440     }
441
442   wipememory(tmpbuf, sizeof(tmpbuf));
443   _gcry_burn_stack(burn_stack_depth);
444 }
445
446 /* Bulk decryption of complete blocks in CBC mode.  This function is only
447    intended for the bulk encryption feature of cipher.c. */
448 void
449 _gcry_camellia_cbc_dec(void *context, unsigned char *iv,
450                        void *outbuf_arg, const void *inbuf_arg,
451                        size_t nblocks)
452 {
453   CAMELLIA_context *ctx = context;
454   unsigned char *outbuf = outbuf_arg;
455   const unsigned char *inbuf = inbuf_arg;
456   unsigned char savebuf[CAMELLIA_BLOCK_SIZE];
457   int burn_stack_depth = CAMELLIA_decrypt_stack_burn_size;
458
459 #ifdef USE_AESNI_AVX2
460   if (ctx->use_aesni_avx2)
461     {
462       int did_use_aesni_avx2 = 0;
463
464       /* Process data in 32 block chunks. */
465       while (nblocks >= 32)
466         {
467           _gcry_camellia_aesni_avx2_cbc_dec(ctx, outbuf, inbuf, iv);
468
469           nblocks -= 32;
470           outbuf += 32 * CAMELLIA_BLOCK_SIZE;
471           inbuf  += 32 * CAMELLIA_BLOCK_SIZE;
472           did_use_aesni_avx2 = 1;
473         }
474
475       if (did_use_aesni_avx2)
476         {
477           int avx2_burn_stack_depth = 32 * CAMELLIA_BLOCK_SIZE + 16 +
478                                         2 * sizeof(void *) + ASM_EXTRA_STACK;;
479
480           if (burn_stack_depth < avx2_burn_stack_depth)
481             burn_stack_depth = avx2_burn_stack_depth;
482         }
483
484       /* Use generic code to handle smaller chunks... */
485     }
486 #endif
487
488 #ifdef USE_AESNI_AVX
489   if (ctx->use_aesni_avx)
490     {
491       int did_use_aesni_avx = 0;
492
493       /* Process data in 16 block chunks. */
494       while (nblocks >= 16)
495         {
496           _gcry_camellia_aesni_avx_cbc_dec(ctx, outbuf, inbuf, iv);
497
498           nblocks -= 16;
499           outbuf += 16 * CAMELLIA_BLOCK_SIZE;
500           inbuf  += 16 * CAMELLIA_BLOCK_SIZE;
501           did_use_aesni_avx = 1;
502         }
503
504       if (did_use_aesni_avx)
505         {
506           int avx_burn_stack_depth = 16 * CAMELLIA_BLOCK_SIZE +
507                                        2 * sizeof(void *) + ASM_EXTRA_STACK;
508
509           if (burn_stack_depth < avx_burn_stack_depth)
510             burn_stack_depth = avx_burn_stack_depth;
511         }
512
513       /* Use generic code to handle smaller chunks... */
514     }
515 #endif
516
517   for ( ;nblocks; nblocks-- )
518     {
519       /* INBUF is needed later and it may be identical to OUTBUF, so store
520          the intermediate result to SAVEBUF.  */
521       Camellia_DecryptBlock(ctx->keybitlength, inbuf, ctx->keytable, savebuf);
522
523       buf_xor_n_copy_2(outbuf, savebuf, iv, inbuf, CAMELLIA_BLOCK_SIZE);
524       inbuf += CAMELLIA_BLOCK_SIZE;
525       outbuf += CAMELLIA_BLOCK_SIZE;
526     }
527
528   wipememory(savebuf, sizeof(savebuf));
529   _gcry_burn_stack(burn_stack_depth);
530 }
531
532 /* Bulk decryption of complete blocks in CFB mode.  This function is only
533    intended for the bulk encryption feature of cipher.c. */
534 void
535 _gcry_camellia_cfb_dec(void *context, unsigned char *iv,
536                        void *outbuf_arg, const void *inbuf_arg,
537                        size_t nblocks)
538 {
539   CAMELLIA_context *ctx = context;
540   unsigned char *outbuf = outbuf_arg;
541   const unsigned char *inbuf = inbuf_arg;
542   int burn_stack_depth = CAMELLIA_decrypt_stack_burn_size;
543
544 #ifdef USE_AESNI_AVX2
545   if (ctx->use_aesni_avx2)
546     {
547       int did_use_aesni_avx2 = 0;
548
549       /* Process data in 32 block chunks. */
550       while (nblocks >= 32)
551         {
552           _gcry_camellia_aesni_avx2_cfb_dec(ctx, outbuf, inbuf, iv);
553
554           nblocks -= 32;
555           outbuf += 32 * CAMELLIA_BLOCK_SIZE;
556           inbuf  += 32 * CAMELLIA_BLOCK_SIZE;
557           did_use_aesni_avx2 = 1;
558         }
559
560       if (did_use_aesni_avx2)
561         {
562           int avx2_burn_stack_depth = 32 * CAMELLIA_BLOCK_SIZE + 16 +
563                                         2 * sizeof(void *) + ASM_EXTRA_STACK;
564
565           if (burn_stack_depth < avx2_burn_stack_depth)
566             burn_stack_depth = avx2_burn_stack_depth;
567         }
568
569       /* Use generic code to handle smaller chunks... */
570     }
571 #endif
572
573 #ifdef USE_AESNI_AVX
574   if (ctx->use_aesni_avx)
575     {
576       int did_use_aesni_avx = 0;
577
578       /* Process data in 16 block chunks. */
579       while (nblocks >= 16)
580         {
581           _gcry_camellia_aesni_avx_cfb_dec(ctx, outbuf, inbuf, iv);
582
583           nblocks -= 16;
584           outbuf += 16 * CAMELLIA_BLOCK_SIZE;
585           inbuf  += 16 * CAMELLIA_BLOCK_SIZE;
586           did_use_aesni_avx = 1;
587         }
588
589       if (did_use_aesni_avx)
590         {
591           int avx_burn_stack_depth = 16 * CAMELLIA_BLOCK_SIZE +
592                                        2 * sizeof(void *) + ASM_EXTRA_STACK;
593
594           if (burn_stack_depth < avx_burn_stack_depth)
595             burn_stack_depth = avx_burn_stack_depth;
596         }
597
598       /* Use generic code to handle smaller chunks... */
599     }
600 #endif
601
602   for ( ;nblocks; nblocks-- )
603     {
604       Camellia_EncryptBlock(ctx->keybitlength, iv, ctx->keytable, iv);
605       buf_xor_n_copy(outbuf, iv, inbuf, CAMELLIA_BLOCK_SIZE);
606       outbuf += CAMELLIA_BLOCK_SIZE;
607       inbuf  += CAMELLIA_BLOCK_SIZE;
608     }
609
610   _gcry_burn_stack(burn_stack_depth);
611 }
612
613 /* Bulk encryption/decryption of complete blocks in OCB mode. */
614 size_t
615 _gcry_camellia_ocb_crypt (gcry_cipher_hd_t c, void *outbuf_arg,
616                           const void *inbuf_arg, size_t nblocks, int encrypt)
617 {
618 #if defined(USE_AESNI_AVX) || defined(USE_AESNI_AVX2)
619   CAMELLIA_context *ctx = (void *)&c->context.c;
620   unsigned char *outbuf = outbuf_arg;
621   const unsigned char *inbuf = inbuf_arg;
622   int burn_stack_depth;
623   u64 blkn = c->u_mode.ocb.data_nblocks;
624
625   burn_stack_depth = encrypt ? CAMELLIA_encrypt_stack_burn_size :
626                               CAMELLIA_decrypt_stack_burn_size;
627 #else
628   (void)c;
629   (void)outbuf_arg;
630   (void)inbuf_arg;
631   (void)encrypt;
632 #endif
633
634 #ifdef USE_AESNI_AVX2
635   if (ctx->use_aesni_avx2)
636     {
637       int did_use_aesni_avx2 = 0;
638       u64 Ls[32];
639       unsigned int n = 32 - (blkn % 32);
640       u64 *l;
641       int i;
642
643       if (nblocks >= 32)
644         {
645           for (i = 0; i < 32; i += 8)
646             {
647               /* Use u64 to store pointers for x32 support (assembly function
648                * assumes 64-bit pointers). */
649               Ls[(i + 0 + n) % 32] = (uintptr_t)(void *)c->u_mode.ocb.L[0];
650               Ls[(i + 1 + n) % 32] = (uintptr_t)(void *)c->u_mode.ocb.L[1];
651               Ls[(i + 2 + n) % 32] = (uintptr_t)(void *)c->u_mode.ocb.L[0];
652               Ls[(i + 3 + n) % 32] = (uintptr_t)(void *)c->u_mode.ocb.L[2];
653               Ls[(i + 4 + n) % 32] = (uintptr_t)(void *)c->u_mode.ocb.L[0];
654               Ls[(i + 5 + n) % 32] = (uintptr_t)(void *)c->u_mode.ocb.L[1];
655               Ls[(i + 6 + n) % 32] = (uintptr_t)(void *)c->u_mode.ocb.L[0];
656             }
657
658           Ls[(7 + n) % 32] = (uintptr_t)(void *)c->u_mode.ocb.L[3];
659           Ls[(15 + n) % 32] = (uintptr_t)(void *)c->u_mode.ocb.L[4];
660           Ls[(23 + n) % 32] = (uintptr_t)(void *)c->u_mode.ocb.L[3];
661           l = &Ls[(31 + n) % 32];
662
663           /* Process data in 32 block chunks. */
664           while (nblocks >= 32)
665             {
666               blkn += 32;
667               *l = (uintptr_t)(void *)ocb_get_l(c, blkn - blkn % 32);
668
669               if (encrypt)
670                 _gcry_camellia_aesni_avx2_ocb_enc(ctx, outbuf, inbuf, c->u_iv.iv,
671                                                   c->u_ctr.ctr, Ls);
672               else
673                 _gcry_camellia_aesni_avx2_ocb_dec(ctx, outbuf, inbuf, c->u_iv.iv,
674                                                   c->u_ctr.ctr, Ls);
675
676               nblocks -= 32;
677               outbuf += 32 * CAMELLIA_BLOCK_SIZE;
678               inbuf  += 32 * CAMELLIA_BLOCK_SIZE;
679               did_use_aesni_avx2 = 1;
680             }
681         }
682
683       if (did_use_aesni_avx2)
684         {
685           int avx2_burn_stack_depth = 32 * CAMELLIA_BLOCK_SIZE +
686                                       2 * sizeof(void *) + ASM_EXTRA_STACK;
687
688           if (burn_stack_depth < avx2_burn_stack_depth)
689             burn_stack_depth = avx2_burn_stack_depth;
690         }
691
692       /* Use generic code to handle smaller chunks... */
693     }
694 #endif
695
696 #ifdef USE_AESNI_AVX
697   if (ctx->use_aesni_avx)
698     {
699       int did_use_aesni_avx = 0;
700       u64 Ls[16];
701       unsigned int n = 16 - (blkn % 16);
702       u64 *l;
703       int i;
704
705       if (nblocks >= 16)
706         {
707           for (i = 0; i < 16; i += 8)
708             {
709               /* Use u64 to store pointers for x32 support (assembly function
710                * assumes 64-bit pointers). */
711               Ls[(i + 0 + n) % 16] = (uintptr_t)(void *)c->u_mode.ocb.L[0];
712               Ls[(i + 1 + n) % 16] = (uintptr_t)(void *)c->u_mode.ocb.L[1];
713               Ls[(i + 2 + n) % 16] = (uintptr_t)(void *)c->u_mode.ocb.L[0];
714               Ls[(i + 3 + n) % 16] = (uintptr_t)(void *)c->u_mode.ocb.L[2];
715               Ls[(i + 4 + n) % 16] = (uintptr_t)(void *)c->u_mode.ocb.L[0];
716               Ls[(i + 5 + n) % 16] = (uintptr_t)(void *)c->u_mode.ocb.L[1];
717               Ls[(i + 6 + n) % 16] = (uintptr_t)(void *)c->u_mode.ocb.L[0];
718             }
719
720           Ls[(7 + n) % 16] = (uintptr_t)(void *)c->u_mode.ocb.L[3];
721           l = &Ls[(15 + n) % 16];
722
723           /* Process data in 16 block chunks. */
724           while (nblocks >= 16)
725             {
726               blkn += 16;
727               *l = (uintptr_t)(void *)ocb_get_l(c, blkn - blkn % 16);
728
729               if (encrypt)
730                 _gcry_camellia_aesni_avx_ocb_enc(ctx, outbuf, inbuf, c->u_iv.iv,
731                                                 c->u_ctr.ctr, Ls);
732               else
733                 _gcry_camellia_aesni_avx_ocb_dec(ctx, outbuf, inbuf, c->u_iv.iv,
734                                                 c->u_ctr.ctr, Ls);
735
736               nblocks -= 16;
737               outbuf += 16 * CAMELLIA_BLOCK_SIZE;
738               inbuf  += 16 * CAMELLIA_BLOCK_SIZE;
739               did_use_aesni_avx = 1;
740             }
741         }
742
743       if (did_use_aesni_avx)
744         {
745           int avx_burn_stack_depth = 16 * CAMELLIA_BLOCK_SIZE +
746                                       2 * sizeof(void *) + ASM_EXTRA_STACK;
747
748           if (burn_stack_depth < avx_burn_stack_depth)
749             burn_stack_depth = avx_burn_stack_depth;
750         }
751
752       /* Use generic code to handle smaller chunks... */
753     }
754 #endif
755
756 #if defined(USE_AESNI_AVX) || defined(USE_AESNI_AVX2)
757   c->u_mode.ocb.data_nblocks = blkn;
758
759   if (burn_stack_depth)
760     _gcry_burn_stack (burn_stack_depth + 4 * sizeof(void *));
761 #endif
762
763   return nblocks;
764 }
765
766 /* Bulk authentication of complete blocks in OCB mode. */
767 size_t
768 _gcry_camellia_ocb_auth (gcry_cipher_hd_t c, const void *abuf_arg,
769                          size_t nblocks)
770 {
771 #if defined(USE_AESNI_AVX) || defined(USE_AESNI_AVX2)
772   CAMELLIA_context *ctx = (void *)&c->context.c;
773   const unsigned char *abuf = abuf_arg;
774   int burn_stack_depth;
775   u64 blkn = c->u_mode.ocb.aad_nblocks;
776
777   burn_stack_depth = CAMELLIA_encrypt_stack_burn_size;
778 #else
779   (void)c;
780   (void)abuf_arg;
781 #endif
782
783 #ifdef USE_AESNI_AVX2
784   if (ctx->use_aesni_avx2)
785     {
786       int did_use_aesni_avx2 = 0;
787       u64 Ls[32];
788       unsigned int n = 32 - (blkn % 32);
789       u64 *l;
790       int i;
791
792       if (nblocks >= 32)
793         {
794           for (i = 0; i < 32; i += 8)
795             {
796               /* Use u64 to store pointers for x32 support (assembly function
797                * assumes 64-bit pointers). */
798               Ls[(i + 0 + n) % 32] = (uintptr_t)(void *)c->u_mode.ocb.L[0];
799               Ls[(i + 1 + n) % 32] = (uintptr_t)(void *)c->u_mode.ocb.L[1];
800               Ls[(i + 2 + n) % 32] = (uintptr_t)(void *)c->u_mode.ocb.L[0];
801               Ls[(i + 3 + n) % 32] = (uintptr_t)(void *)c->u_mode.ocb.L[2];
802               Ls[(i + 4 + n) % 32] = (uintptr_t)(void *)c->u_mode.ocb.L[0];
803               Ls[(i + 5 + n) % 32] = (uintptr_t)(void *)c->u_mode.ocb.L[1];
804               Ls[(i + 6 + n) % 32] = (uintptr_t)(void *)c->u_mode.ocb.L[0];
805             }
806
807           Ls[(7 + n) % 32] = (uintptr_t)(void *)c->u_mode.ocb.L[3];
808           Ls[(15 + n) % 32] = (uintptr_t)(void *)c->u_mode.ocb.L[4];
809           Ls[(23 + n) % 32] = (uintptr_t)(void *)c->u_mode.ocb.L[3];
810           l = &Ls[(31 + n) % 32];
811
812           /* Process data in 32 block chunks. */
813           while (nblocks >= 32)
814             {
815               blkn += 32;
816               *l = (uintptr_t)(void *)ocb_get_l(c, blkn - blkn % 32);
817
818               _gcry_camellia_aesni_avx2_ocb_auth(ctx, abuf,
819                                                  c->u_mode.ocb.aad_offset,
820                                                  c->u_mode.ocb.aad_sum, Ls);
821
822               nblocks -= 32;
823               abuf += 32 * CAMELLIA_BLOCK_SIZE;
824               did_use_aesni_avx2 = 1;
825             }
826         }
827
828       if (did_use_aesni_avx2)
829         {
830           int avx2_burn_stack_depth = 32 * CAMELLIA_BLOCK_SIZE +
831                                       2 * sizeof(void *) + ASM_EXTRA_STACK;
832
833           if (burn_stack_depth < avx2_burn_stack_depth)
834             burn_stack_depth = avx2_burn_stack_depth;
835         }
836
837       /* Use generic code to handle smaller chunks... */
838     }
839 #endif
840
841 #ifdef USE_AESNI_AVX
842   if (ctx->use_aesni_avx)
843     {
844       int did_use_aesni_avx = 0;
845       u64 Ls[16];
846       unsigned int n = 16 - (blkn % 16);
847       u64 *l;
848       int i;
849
850       if (nblocks >= 16)
851         {
852           for (i = 0; i < 16; i += 8)
853             {
854               /* Use u64 to store pointers for x32 support (assembly function
855                * assumes 64-bit pointers). */
856               Ls[(i + 0 + n) % 16] = (uintptr_t)(void *)c->u_mode.ocb.L[0];
857               Ls[(i + 1 + n) % 16] = (uintptr_t)(void *)c->u_mode.ocb.L[1];
858               Ls[(i + 2 + n) % 16] = (uintptr_t)(void *)c->u_mode.ocb.L[0];
859               Ls[(i + 3 + n) % 16] = (uintptr_t)(void *)c->u_mode.ocb.L[2];
860               Ls[(i + 4 + n) % 16] = (uintptr_t)(void *)c->u_mode.ocb.L[0];
861               Ls[(i + 5 + n) % 16] = (uintptr_t)(void *)c->u_mode.ocb.L[1];
862               Ls[(i + 6 + n) % 16] = (uintptr_t)(void *)c->u_mode.ocb.L[0];
863             }
864
865           Ls[(7 + n) % 16] = (uintptr_t)(void *)c->u_mode.ocb.L[3];
866           l = &Ls[(15 + n) % 16];
867
868           /* Process data in 16 block chunks. */
869           while (nblocks >= 16)
870             {
871               blkn += 16;
872               *l = (uintptr_t)(void *)ocb_get_l(c, blkn - blkn % 16);
873
874               _gcry_camellia_aesni_avx_ocb_auth(ctx, abuf,
875                                                 c->u_mode.ocb.aad_offset,
876                                                 c->u_mode.ocb.aad_sum, Ls);
877
878               nblocks -= 16;
879               abuf += 16 * CAMELLIA_BLOCK_SIZE;
880               did_use_aesni_avx = 1;
881             }
882         }
883
884       if (did_use_aesni_avx)
885         {
886           int avx_burn_stack_depth = 16 * CAMELLIA_BLOCK_SIZE +
887                                       2 * sizeof(void *) + ASM_EXTRA_STACK;
888
889           if (burn_stack_depth < avx_burn_stack_depth)
890             burn_stack_depth = avx_burn_stack_depth;
891         }
892
893       /* Use generic code to handle smaller chunks... */
894     }
895 #endif
896
897 #if defined(USE_AESNI_AVX) || defined(USE_AESNI_AVX2)
898   c->u_mode.ocb.aad_nblocks = blkn;
899
900   if (burn_stack_depth)
901     _gcry_burn_stack (burn_stack_depth + 4 * sizeof(void *));
902 #endif
903
904   return nblocks;
905 }
906
907 /* Run the self-tests for CAMELLIA-CTR-128, tests IV increment of bulk CTR
908    encryption.  Returns NULL on success. */
909 static const char*
910 selftest_ctr_128 (void)
911 {
912   const int nblocks = 32+16+1;
913   const int blocksize = CAMELLIA_BLOCK_SIZE;
914   const int context_size = sizeof(CAMELLIA_context);
915
916   return _gcry_selftest_helper_ctr("CAMELLIA", &camellia_setkey,
917            &camellia_encrypt, &_gcry_camellia_ctr_enc, nblocks, blocksize,
918            context_size);
919 }
920
921 /* Run the self-tests for CAMELLIA-CBC-128, tests bulk CBC decryption.
922    Returns NULL on success. */
923 static const char*
924 selftest_cbc_128 (void)
925 {
926   const int nblocks = 32+16+2;
927   const int blocksize = CAMELLIA_BLOCK_SIZE;
928   const int context_size = sizeof(CAMELLIA_context);
929
930   return _gcry_selftest_helper_cbc("CAMELLIA", &camellia_setkey,
931            &camellia_encrypt, &_gcry_camellia_cbc_dec, nblocks, blocksize,
932            context_size);
933 }
934
935 /* Run the self-tests for CAMELLIA-CFB-128, tests bulk CFB decryption.
936    Returns NULL on success. */
937 static const char*
938 selftest_cfb_128 (void)
939 {
940   const int nblocks = 32+16+2;
941   const int blocksize = CAMELLIA_BLOCK_SIZE;
942   const int context_size = sizeof(CAMELLIA_context);
943
944   return _gcry_selftest_helper_cfb("CAMELLIA", &camellia_setkey,
945            &camellia_encrypt, &_gcry_camellia_cfb_dec, nblocks, blocksize,
946            context_size);
947 }
948
949 static const char *
950 selftest(void)
951 {
952   CAMELLIA_context ctx;
953   byte scratch[16];
954   const char *r;
955
956   /* These test vectors are from RFC-3713 */
957   static const byte plaintext[]=
958     {
959       0x01,0x23,0x45,0x67,0x89,0xab,0xcd,0xef,
960       0xfe,0xdc,0xba,0x98,0x76,0x54,0x32,0x10
961     };
962   static const byte key_128[]=
963     {
964       0x01,0x23,0x45,0x67,0x89,0xab,0xcd,0xef,
965       0xfe,0xdc,0xba,0x98,0x76,0x54,0x32,0x10
966     };
967   static const byte ciphertext_128[]=
968     {
969       0x67,0x67,0x31,0x38,0x54,0x96,0x69,0x73,
970       0x08,0x57,0x06,0x56,0x48,0xea,0xbe,0x43
971     };
972   static const byte key_192[]=
973     {
974       0x01,0x23,0x45,0x67,0x89,0xab,0xcd,0xef,0xfe,0xdc,0xba,0x98,
975       0x76,0x54,0x32,0x10,0x00,0x11,0x22,0x33,0x44,0x55,0x66,0x77
976     };
977   static const byte ciphertext_192[]=
978     {
979       0xb4,0x99,0x34,0x01,0xb3,0xe9,0x96,0xf8,
980       0x4e,0xe5,0xce,0xe7,0xd7,0x9b,0x09,0xb9
981     };
982   static const byte key_256[]=
983     {
984       0x01,0x23,0x45,0x67,0x89,0xab,0xcd,0xef,0xfe,0xdc,0xba,
985       0x98,0x76,0x54,0x32,0x10,0x00,0x11,0x22,0x33,0x44,0x55,
986       0x66,0x77,0x88,0x99,0xaa,0xbb,0xcc,0xdd,0xee,0xff
987     };
988   static const byte ciphertext_256[]=
989     {
990       0x9a,0xcc,0x23,0x7d,0xff,0x16,0xd7,0x6c,
991       0x20,0xef,0x7c,0x91,0x9e,0x3a,0x75,0x09
992     };
993
994   camellia_setkey(&ctx,key_128,sizeof(key_128));
995   camellia_encrypt(&ctx,scratch,plaintext);
996   if(memcmp(scratch,ciphertext_128,sizeof(ciphertext_128))!=0)
997     return "CAMELLIA-128 test encryption failed.";
998   camellia_decrypt(&ctx,scratch,scratch);
999   if(memcmp(scratch,plaintext,sizeof(plaintext))!=0)
1000     return "CAMELLIA-128 test decryption failed.";
1001
1002   camellia_setkey(&ctx,key_192,sizeof(key_192));
1003   camellia_encrypt(&ctx,scratch,plaintext);
1004   if(memcmp(scratch,ciphertext_192,sizeof(ciphertext_192))!=0)
1005     return "CAMELLIA-192 test encryption failed.";
1006   camellia_decrypt(&ctx,scratch,scratch);
1007   if(memcmp(scratch,plaintext,sizeof(plaintext))!=0)
1008     return "CAMELLIA-192 test decryption failed.";
1009
1010   camellia_setkey(&ctx,key_256,sizeof(key_256));
1011   camellia_encrypt(&ctx,scratch,plaintext);
1012   if(memcmp(scratch,ciphertext_256,sizeof(ciphertext_256))!=0)
1013     return "CAMELLIA-256 test encryption failed.";
1014   camellia_decrypt(&ctx,scratch,scratch);
1015   if(memcmp(scratch,plaintext,sizeof(plaintext))!=0)
1016     return "CAMELLIA-256 test decryption failed.";
1017
1018   if ( (r = selftest_ctr_128 ()) )
1019     return r;
1020
1021   if ( (r = selftest_cbc_128 ()) )
1022     return r;
1023
1024   if ( (r = selftest_cfb_128 ()) )
1025     return r;
1026
1027   return NULL;
1028 }
1029
1030 /* These oids are from
1031    <http://info.isl.ntt.co.jp/crypt/eng/camellia/specifications_oid.html>,
1032    retrieved May 1, 2007. */
1033
1034 static gcry_cipher_oid_spec_t camellia128_oids[] =
1035   {
1036     {"1.2.392.200011.61.1.1.1.2", GCRY_CIPHER_MODE_CBC},
1037     {"0.3.4401.5.3.1.9.1", GCRY_CIPHER_MODE_ECB},
1038     {"0.3.4401.5.3.1.9.3", GCRY_CIPHER_MODE_OFB},
1039     {"0.3.4401.5.3.1.9.4", GCRY_CIPHER_MODE_CFB},
1040     { NULL }
1041   };
1042
1043 static gcry_cipher_oid_spec_t camellia192_oids[] =
1044   {
1045     {"1.2.392.200011.61.1.1.1.3", GCRY_CIPHER_MODE_CBC},
1046     {"0.3.4401.5.3.1.9.21", GCRY_CIPHER_MODE_ECB},
1047     {"0.3.4401.5.3.1.9.23", GCRY_CIPHER_MODE_OFB},
1048     {"0.3.4401.5.3.1.9.24", GCRY_CIPHER_MODE_CFB},
1049     { NULL }
1050   };
1051
1052 static gcry_cipher_oid_spec_t camellia256_oids[] =
1053   {
1054     {"1.2.392.200011.61.1.1.1.4", GCRY_CIPHER_MODE_CBC},
1055     {"0.3.4401.5.3.1.9.41", GCRY_CIPHER_MODE_ECB},
1056     {"0.3.4401.5.3.1.9.43", GCRY_CIPHER_MODE_OFB},
1057     {"0.3.4401.5.3.1.9.44", GCRY_CIPHER_MODE_CFB},
1058     { NULL }
1059   };
1060
1061 gcry_cipher_spec_t _gcry_cipher_spec_camellia128 =
1062   {
1063     GCRY_CIPHER_CAMELLIA128, {0, 0},
1064     "CAMELLIA128",NULL,camellia128_oids,CAMELLIA_BLOCK_SIZE,128,
1065     sizeof(CAMELLIA_context),camellia_setkey,camellia_encrypt,camellia_decrypt
1066   };
1067
1068 gcry_cipher_spec_t _gcry_cipher_spec_camellia192 =
1069   {
1070     GCRY_CIPHER_CAMELLIA192, {0, 0},
1071     "CAMELLIA192",NULL,camellia192_oids,CAMELLIA_BLOCK_SIZE,192,
1072     sizeof(CAMELLIA_context),camellia_setkey,camellia_encrypt,camellia_decrypt
1073   };
1074
1075 gcry_cipher_spec_t _gcry_cipher_spec_camellia256 =
1076   {
1077     GCRY_CIPHER_CAMELLIA256, {0, 0},
1078     "CAMELLIA256",NULL,camellia256_oids,CAMELLIA_BLOCK_SIZE,256,
1079     sizeof(CAMELLIA_context),camellia_setkey,camellia_encrypt,camellia_decrypt
1080   };