Add AES-NI/AVX accelerated Camellia implementation
[libgcrypt.git] / cipher / camellia-glue.c
1 /* camellia-glue.c - Glue for the Camellia cipher
2  * Copyright (C) 2007 Free Software Foundation, Inc.
3  *
4  * This file is part of Libgcrypt.
5  *
6  * Libgcrypt is free software; you can redistribute it and/or modify
7  * it under the terms of the GNU Lesser General Public License as
8  * published by the Free Software Foundation; either version 2.1 of
9  * the License, or (at your option) any later version.
10  *
11  * Libgcrypt is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14  * GNU General Public License for more details.
15  *
16  * You should have received a copy of the GNU Lesser General Public
17  * License along with this program; if not, write to the Free Software
18  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
19  * 02110-1301, USA.
20  */
21
22 /* I put all the libgcrypt-specific stuff in this file to keep the
23    camellia.c/camellia.h files exactly as provided by NTT.  If they
24    update their code, this should make it easier to bring the changes
25    in. - dshaw
26
27    There is one small change which needs to be done: Include the
28    following code at the top of camellia.h: */
29 #if 0
30
31 /* To use Camellia with libraries it is often useful to keep the name
32  * space of the library clean.  The following macro is thus useful:
33  *
34  *     #define CAMELLIA_EXT_SYM_PREFIX foo_
35  *
36  * This prefixes all external symbols with "foo_".
37  */
38 #ifdef HAVE_CONFIG_H
39 #include <config.h>
40 #endif
41 #ifdef CAMELLIA_EXT_SYM_PREFIX
42 #define CAMELLIA_PREFIX1(x,y) x ## y
43 #define CAMELLIA_PREFIX2(x,y) CAMELLIA_PREFIX1(x,y)
44 #define CAMELLIA_PREFIX(x)    CAMELLIA_PREFIX2(CAMELLIA_EXT_SYM_PREFIX,x)
45 #define Camellia_Ekeygen      CAMELLIA_PREFIX(Camellia_Ekeygen)
46 #define Camellia_EncryptBlock CAMELLIA_PREFIX(Camellia_EncryptBlock)
47 #define Camellia_DecryptBlock CAMELLIA_PREFIX(Camellia_DecryptBlock)
48 #define camellia_decrypt128   CAMELLIA_PREFIX(camellia_decrypt128)
49 #define camellia_decrypt256   CAMELLIA_PREFIX(camellia_decrypt256)
50 #define camellia_encrypt128   CAMELLIA_PREFIX(camellia_encrypt128)
51 #define camellia_encrypt256   CAMELLIA_PREFIX(camellia_encrypt256)
52 #define camellia_setup128     CAMELLIA_PREFIX(camellia_setup128)
53 #define camellia_setup192     CAMELLIA_PREFIX(camellia_setup192)
54 #define camellia_setup256     CAMELLIA_PREFIX(camellia_setup256)
55 #endif /*CAMELLIA_EXT_SYM_PREFIX*/
56
57 #endif /* Code sample. */
58
59
60 #include <config.h>
61 #include "types.h"
62 #include "g10lib.h"
63 #include "cipher.h"
64 #include "camellia.h"
65 #include "bufhelp.h"
66
67 /* Helper macro to force alignment to 16 bytes.  */
68 #ifdef HAVE_GCC_ATTRIBUTE_ALIGNED
69 # define ATTR_ALIGNED_16  __attribute__ ((aligned (16)))
70 #else
71 # define ATTR_ALIGNED_16
72 #endif
73
74 /* USE_AESNI inidicates whether to compile with Intel AES-NI/AVX code. */
75 #undef USE_AESNI_AVX
76 #if defined(ENABLE_AESNI_SUPPORT) && defined(ENABLE_AVX_SUPPORT)
77 # if defined(__x86_64__)
78 #  define USE_AESNI_AVX 1
79 # endif
80 #endif
81
82 typedef struct
83 {
84   int keybitlength;
85   KEY_TABLE_TYPE keytable;
86 #ifdef USE_AESNI_AVX
87   int use_aesni_avx;            /* AES-NI/AVX implementation shall be used.  */
88 #endif /*USE_AESNI_AVX*/
89 } CAMELLIA_context;
90
91 #ifdef USE_AESNI_AVX
92 /* Assembler implementations of Camellia using AES-NI and AVX.  Process data
93    in 16 block same time.
94  */
95 extern void _gcry_camellia_aesni_avx_ctr_enc(CAMELLIA_context *ctx,
96                                              unsigned char *out,
97                                              const unsigned char *in,
98                                              unsigned char *ctr);
99
100 extern void _gcry_camellia_aesni_avx_cbc_dec(CAMELLIA_context *ctx,
101                                              unsigned char *out,
102                                              const unsigned char *in,
103                                              unsigned char *iv);
104 #endif
105
106 static const char *selftest(void);
107
108 static gcry_err_code_t
109 camellia_setkey(void *c, const byte *key, unsigned keylen)
110 {
111   CAMELLIA_context *ctx=c;
112   static int initialized=0;
113   static const char *selftest_failed=NULL;
114
115   if(keylen!=16 && keylen!=24 && keylen!=32)
116     return GPG_ERR_INV_KEYLEN;
117
118   if(!initialized)
119     {
120       initialized=1;
121       selftest_failed=selftest();
122       if(selftest_failed)
123         log_error("%s\n",selftest_failed);
124     }
125
126   if(selftest_failed)
127     return GPG_ERR_SELFTEST_FAILED;
128
129   ctx->keybitlength=keylen*8;
130   Camellia_Ekeygen(ctx->keybitlength,key,ctx->keytable);
131   _gcry_burn_stack
132     ((19+34+34)*sizeof(u32)+2*sizeof(void*) /* camellia_setup256 */
133      +(4+32)*sizeof(u32)+2*sizeof(void*)    /* camellia_setup192 */
134      +0+sizeof(int)+2*sizeof(void*)         /* Camellia_Ekeygen */
135      +3*2*sizeof(void*)                     /* Function calls.  */
136      );
137
138 #ifdef USE_AESNI_AVX
139   ctx->use_aesni_avx = 0;
140   if ((_gcry_get_hw_features () & HWF_INTEL_AESNI) &&
141       (_gcry_get_hw_features () & HWF_INTEL_AVX))
142     {
143       ctx->use_aesni_avx = 1;
144     }
145 #endif
146
147   return 0;
148 }
149
150 static void
151 camellia_encrypt(void *c, byte *outbuf, const byte *inbuf)
152 {
153   CAMELLIA_context *ctx=c;
154
155   Camellia_EncryptBlock(ctx->keybitlength,inbuf,ctx->keytable,outbuf);
156
157 #define CAMELLIA_encrypt_stack_burn_size \
158   (sizeof(int)+2*sizeof(unsigned char *)+sizeof(void*/*KEY_TABLE_TYPE*/) \
159      +4*sizeof(u32)+4*sizeof(u32) \
160      +2*sizeof(u32*)+4*sizeof(u32) \
161      +2*2*sizeof(void*) /* Function calls.  */ \
162     )
163
164   _gcry_burn_stack(CAMELLIA_encrypt_stack_burn_size);
165 }
166
167 static void
168 camellia_decrypt(void *c, byte *outbuf, const byte *inbuf)
169 {
170   CAMELLIA_context *ctx=c;
171
172   Camellia_DecryptBlock(ctx->keybitlength,inbuf,ctx->keytable,outbuf);
173
174 #define CAMELLIA_decrypt_stack_burn_size \
175     (sizeof(int)+2*sizeof(unsigned char *)+sizeof(void*/*KEY_TABLE_TYPE*/) \
176      +4*sizeof(u32)+4*sizeof(u32) \
177      +2*sizeof(u32*)+4*sizeof(u32) \
178      +2*2*sizeof(void*) /* Function calls.  */ \
179     )
180
181   _gcry_burn_stack(CAMELLIA_decrypt_stack_burn_size);
182 }
183
184 /* Bulk encryption of complete blocks in CTR mode.  This function is only
185    intended for the bulk encryption feature of cipher.c.  CTR is expected to be
186    of size CAMELLIA_BLOCK_SIZE. */
187 void
188 _gcry_camellia_ctr_enc(void *context, unsigned char *ctr,
189                        void *outbuf_arg, const void *inbuf_arg,
190                        unsigned int nblocks)
191 {
192   CAMELLIA_context *ctx = context;
193   unsigned char *outbuf = outbuf_arg;
194   const unsigned char *inbuf = inbuf_arg;
195   unsigned char tmpbuf[CAMELLIA_BLOCK_SIZE];
196   int burn_stack_depth = CAMELLIA_encrypt_stack_burn_size;
197   int i;
198
199 #ifdef USE_AESNI_AVX
200   if (ctx->use_aesni_avx)
201     {
202       int did_use_aesni_avx = 0;
203
204       /* Process data in 16 block chunks. */
205       while (nblocks >= 16)
206         {
207           _gcry_camellia_aesni_avx_ctr_enc(ctx, outbuf, inbuf, ctr);
208
209           nblocks -= 16;
210           outbuf += 16 * CAMELLIA_BLOCK_SIZE;
211           inbuf  += 16 * CAMELLIA_BLOCK_SIZE;
212           did_use_aesni_avx = 1;
213         }
214
215       if (did_use_aesni_avx)
216         {
217           /* clear AVX registers */
218           asm volatile ("vzeroall;\n":::);
219
220           if (burn_stack_depth < 16 * CAMELLIA_BLOCK_SIZE + 2 * sizeof(void *))
221             burn_stack_depth = 16 * CAMELLIA_BLOCK_SIZE + 2 * sizeof(void *);
222         }
223
224       /* Use generic code to handle smaller chunks... */
225       /* TODO: use caching instead? */
226     }
227 #endif
228
229   for ( ;nblocks; nblocks-- )
230     {
231       /* Encrypt the counter. */
232       Camellia_EncryptBlock(ctx->keybitlength, ctr, ctx->keytable, tmpbuf);
233       /* XOR the input with the encrypted counter and store in output.  */
234       buf_xor(outbuf, tmpbuf, inbuf, CAMELLIA_BLOCK_SIZE);
235       outbuf += CAMELLIA_BLOCK_SIZE;
236       inbuf  += CAMELLIA_BLOCK_SIZE;
237       /* Increment the counter.  */
238       for (i = CAMELLIA_BLOCK_SIZE; i > 0; i--)
239         {
240           ctr[i-1]++;
241           if (ctr[i-1])
242             break;
243         }
244     }
245
246   wipememory(tmpbuf, sizeof(tmpbuf));
247   _gcry_burn_stack(burn_stack_depth);
248 }
249
250 /* Bulk decryption of complete blocks in CBC mode.  This function is only
251    intended for the bulk encryption feature of cipher.c. */
252 void
253 _gcry_camellia_cbc_dec(void *context, unsigned char *iv,
254                        void *outbuf_arg, const void *inbuf_arg,
255                        unsigned int nblocks)
256 {
257   CAMELLIA_context *ctx = context;
258   unsigned char *outbuf = outbuf_arg;
259   const unsigned char *inbuf = inbuf_arg;
260   unsigned char savebuf[CAMELLIA_BLOCK_SIZE];
261   int burn_stack_depth = CAMELLIA_decrypt_stack_burn_size;
262
263 #ifdef USE_AESNI_AVX
264   if (ctx->use_aesni_avx)
265     {
266       int did_use_aesni_avx = 0;
267
268       /* Process data in 16 block chunks. */
269       while (nblocks >= 16)
270         {
271           _gcry_camellia_aesni_avx_cbc_dec(ctx, outbuf, inbuf, iv);
272
273           nblocks -= 16;
274           outbuf += 16 * CAMELLIA_BLOCK_SIZE;
275           inbuf  += 16 * CAMELLIA_BLOCK_SIZE;
276           did_use_aesni_avx = 1;
277         }
278
279       if (did_use_aesni_avx)
280         {
281           /* clear AVX registers */
282           asm volatile ("vzeroall;\n":::);
283
284           if (burn_stack_depth < 16 * CAMELLIA_BLOCK_SIZE + 2 * sizeof(void *))
285             burn_stack_depth = 16 * CAMELLIA_BLOCK_SIZE + 2 * sizeof(void *);
286         }
287
288       /* Use generic code to handle smaller chunks... */
289     }
290 #endif
291
292   for ( ;nblocks; nblocks-- )
293     {
294       /* We need to save INBUF away because it may be identical to
295          OUTBUF.  */
296       memcpy(savebuf, inbuf, CAMELLIA_BLOCK_SIZE);
297
298       Camellia_DecryptBlock(ctx->keybitlength, inbuf, ctx->keytable, outbuf);
299
300       buf_xor(outbuf, outbuf, iv, CAMELLIA_BLOCK_SIZE);
301       memcpy(iv, savebuf, CAMELLIA_BLOCK_SIZE);
302       inbuf += CAMELLIA_BLOCK_SIZE;
303       outbuf += CAMELLIA_BLOCK_SIZE;
304     }
305
306   wipememory(savebuf, sizeof(savebuf));
307   _gcry_burn_stack(burn_stack_depth);
308 }
309
310 /* Run the self-tests for CAMELLIA-CTR-128, tests IV increment of bulk CTR
311    encryption.  Returns NULL on success. */
312 static const char*
313 selftest_ctr_128 (void)
314 {
315   const int nblocks = 16+1;
316   CAMELLIA_context ctx ATTR_ALIGNED_16;
317   unsigned char plaintext[nblocks*16] ATTR_ALIGNED_16;
318   unsigned char ciphertext[nblocks*16] ATTR_ALIGNED_16;
319   unsigned char plaintext2[nblocks*16] ATTR_ALIGNED_16;
320   unsigned char iv[16] ATTR_ALIGNED_16;
321   unsigned char iv2[16] ATTR_ALIGNED_16;
322   int i, j, diff;
323
324   static const unsigned char key[16] ATTR_ALIGNED_16 = {
325       0x06,0x9A,0x00,0x7F,0xC7,0x6A,0x45,0x9F,
326       0x98,0xBA,0xF9,0x17,0xFE,0xDF,0x95,0x21
327     };
328   static char error_str[128];
329
330   camellia_setkey (&ctx, key, sizeof (key));
331
332   /* Test single block code path */
333   memset(iv, 0xff, sizeof(iv));
334   for (i = 0; i < 16; i++)
335     plaintext[i] = i;
336
337   /* CTR manually.  */
338   camellia_encrypt (&ctx, ciphertext, iv);
339   for (i = 0; i < 16; i++)
340     ciphertext[i] ^= plaintext[i];
341   for (i = 16; i > 0; i--)
342     {
343       iv[i-1]++;
344       if (iv[i-1])
345         break;
346     }
347
348   memset(iv2, 0xff, sizeof(iv2));
349   _gcry_camellia_ctr_enc (&ctx, iv2, plaintext2, ciphertext, 1);
350
351   if (memcmp(plaintext2, plaintext, 16))
352     return "CAMELLIA-128-CTR test failed (plaintext mismatch)";
353
354   if (memcmp(iv2, iv, 16))
355     return "CAMELLIA-128-CTR test failed (IV mismatch)";
356
357   /* Test parallelized code paths */
358   for (diff = 0; diff < nblocks; diff++) {
359     memset(iv, 0xff, sizeof(iv));
360     iv[15] -= diff;
361
362     for (i = 0; i < sizeof(plaintext); i++)
363       plaintext[i] = i;
364
365     /* Create CTR ciphertext manually.  */
366     for (i = 0; i < sizeof(plaintext); i+=16)
367       {
368         camellia_encrypt (&ctx, &ciphertext[i], iv);
369         for (j = 0; j < 16; j++)
370           ciphertext[i+j] ^= plaintext[i+j];
371         for (j = 16; j > 0; j--)
372           {
373             iv[j-1]++;
374             if (iv[j-1])
375               break;
376           }
377       }
378
379     /* Decrypt using bulk CTR and compare result.  */
380     memset(iv2, 0xff, sizeof(iv2));
381     iv2[15] -= diff;
382
383     _gcry_camellia_ctr_enc (&ctx, iv2, plaintext2, ciphertext,
384                             sizeof(ciphertext) / CAMELLIA_BLOCK_SIZE);
385
386     if (memcmp(plaintext2, plaintext, sizeof(plaintext)))
387       {
388         snprintf(error_str, sizeof(error_str),
389                  "CAMELLIA-128-CTR test failed (plaintext mismatch, diff: %d)",
390                  diff);
391         return error_str;
392       }
393     if (memcmp(iv2, iv, sizeof(iv)))
394       {
395         snprintf(error_str, sizeof(error_str),
396                  "CAMELLIA-128-CTR test failed (IV mismatch, diff: %d)",
397                  diff);
398         return error_str;
399       }
400   }
401
402   return NULL;
403 }
404
405 static const char *
406 selftest(void)
407 {
408   CAMELLIA_context ctx;
409   byte scratch[16];
410   const char *r;
411
412   /* These test vectors are from RFC-3713 */
413   const byte plaintext[]=
414     {
415       0x01,0x23,0x45,0x67,0x89,0xab,0xcd,0xef,
416       0xfe,0xdc,0xba,0x98,0x76,0x54,0x32,0x10
417     };
418   const byte key_128[]=
419     {
420       0x01,0x23,0x45,0x67,0x89,0xab,0xcd,0xef,
421       0xfe,0xdc,0xba,0x98,0x76,0x54,0x32,0x10
422     };
423   const byte ciphertext_128[]=
424     {
425       0x67,0x67,0x31,0x38,0x54,0x96,0x69,0x73,
426       0x08,0x57,0x06,0x56,0x48,0xea,0xbe,0x43
427     };
428   const byte key_192[]=
429     {
430       0x01,0x23,0x45,0x67,0x89,0xab,0xcd,0xef,0xfe,0xdc,0xba,0x98,
431       0x76,0x54,0x32,0x10,0x00,0x11,0x22,0x33,0x44,0x55,0x66,0x77
432     };
433   const byte ciphertext_192[]=
434     {
435       0xb4,0x99,0x34,0x01,0xb3,0xe9,0x96,0xf8,
436       0x4e,0xe5,0xce,0xe7,0xd7,0x9b,0x09,0xb9
437     };
438   const byte key_256[]=
439     {
440       0x01,0x23,0x45,0x67,0x89,0xab,0xcd,0xef,0xfe,0xdc,0xba,
441       0x98,0x76,0x54,0x32,0x10,0x00,0x11,0x22,0x33,0x44,0x55,
442       0x66,0x77,0x88,0x99,0xaa,0xbb,0xcc,0xdd,0xee,0xff
443     };
444   const byte ciphertext_256[]=
445     {
446       0x9a,0xcc,0x23,0x7d,0xff,0x16,0xd7,0x6c,
447       0x20,0xef,0x7c,0x91,0x9e,0x3a,0x75,0x09
448     };
449
450   camellia_setkey(&ctx,key_128,sizeof(key_128));
451   camellia_encrypt(&ctx,scratch,plaintext);
452   if(memcmp(scratch,ciphertext_128,sizeof(ciphertext_128))!=0)
453     return "CAMELLIA-128 test encryption failed.";
454   camellia_decrypt(&ctx,scratch,scratch);
455   if(memcmp(scratch,plaintext,sizeof(plaintext))!=0)
456     return "CAMELLIA-128 test decryption failed.";
457
458   camellia_setkey(&ctx,key_192,sizeof(key_192));
459   camellia_encrypt(&ctx,scratch,plaintext);
460   if(memcmp(scratch,ciphertext_192,sizeof(ciphertext_192))!=0)
461     return "CAMELLIA-192 test encryption failed.";
462   camellia_decrypt(&ctx,scratch,scratch);
463   if(memcmp(scratch,plaintext,sizeof(plaintext))!=0)
464     return "CAMELLIA-192 test decryption failed.";
465
466   camellia_setkey(&ctx,key_256,sizeof(key_256));
467   camellia_encrypt(&ctx,scratch,plaintext);
468   if(memcmp(scratch,ciphertext_256,sizeof(ciphertext_256))!=0)
469     return "CAMELLIA-256 test encryption failed.";
470   camellia_decrypt(&ctx,scratch,scratch);
471   if(memcmp(scratch,plaintext,sizeof(plaintext))!=0)
472     return "CAMELLIA-256 test decryption failed.";
473
474   if ( (r = selftest_ctr_128 ()) )
475     return r;
476
477   return NULL;
478 }
479
480 /* These oids are from
481    <http://info.isl.ntt.co.jp/crypt/eng/camellia/specifications_oid.html>,
482    retrieved May 1, 2007. */
483
484 static gcry_cipher_oid_spec_t camellia128_oids[] =
485   {
486     {"1.2.392.200011.61.1.1.1.2", GCRY_CIPHER_MODE_CBC},
487     {"0.3.4401.5.3.1.9.1", GCRY_CIPHER_MODE_ECB},
488     {"0.3.4401.5.3.1.9.3", GCRY_CIPHER_MODE_OFB},
489     {"0.3.4401.5.3.1.9.4", GCRY_CIPHER_MODE_CFB},
490     { NULL }
491   };
492
493 static gcry_cipher_oid_spec_t camellia192_oids[] =
494   {
495     {"1.2.392.200011.61.1.1.1.3", GCRY_CIPHER_MODE_CBC},
496     {"0.3.4401.5.3.1.9.21", GCRY_CIPHER_MODE_ECB},
497     {"0.3.4401.5.3.1.9.23", GCRY_CIPHER_MODE_OFB},
498     {"0.3.4401.5.3.1.9.24", GCRY_CIPHER_MODE_CFB},
499     { NULL }
500   };
501
502 static gcry_cipher_oid_spec_t camellia256_oids[] =
503   {
504     {"1.2.392.200011.61.1.1.1.4", GCRY_CIPHER_MODE_CBC},
505     {"0.3.4401.5.3.1.9.41", GCRY_CIPHER_MODE_ECB},
506     {"0.3.4401.5.3.1.9.43", GCRY_CIPHER_MODE_OFB},
507     {"0.3.4401.5.3.1.9.44", GCRY_CIPHER_MODE_CFB},
508     { NULL }
509   };
510
511 gcry_cipher_spec_t _gcry_cipher_spec_camellia128 =
512   {
513     "CAMELLIA128",NULL,camellia128_oids,CAMELLIA_BLOCK_SIZE,128,
514     sizeof(CAMELLIA_context),camellia_setkey,camellia_encrypt,camellia_decrypt
515   };
516
517 gcry_cipher_spec_t _gcry_cipher_spec_camellia192 =
518   {
519     "CAMELLIA192",NULL,camellia192_oids,CAMELLIA_BLOCK_SIZE,192,
520     sizeof(CAMELLIA_context),camellia_setkey,camellia_encrypt,camellia_decrypt
521   };
522
523 gcry_cipher_spec_t _gcry_cipher_spec_camellia256 =
524   {
525     "CAMELLIA256",NULL,camellia256_oids,CAMELLIA_BLOCK_SIZE,256,
526     sizeof(CAMELLIA_context),camellia_setkey,camellia_encrypt,camellia_decrypt
527   };