AES: setup cipher object bulk routines with optimized versions
authorJussi Kivilinna <jussi.kivilinna@iki.fi>
Tue, 19 Jun 2018 15:34:33 +0000 (18:34 +0300)
committerJussi Kivilinna <jussi.kivilinna@iki.fi>
Tue, 19 Jun 2018 16:29:25 +0000 (19:29 +0300)
* cipher/rijndael-aesni.c
(_gcry_aes_aesni_prepare_decryption): Rename...
(do_aesni_prepare_decryption): .. to this.
(_gcry_aes_aesni_prepare_decryption): New.
(_gcry_aes_aesni_cfb_enc, _gcry_aes_aesni_cbc_enc)
(_gcry_aes_aesni_ctr_enc, _gcry_aes_aesni_cfb_dec)
(_gcry_aes_aesni_cbc_dec): Reorder parameters to match bulk
operations.
(_gcry_aes_aesni_cbc_dec, aesni_ocb_dec)
(_gcry_aes_aesni_xts_dec): Check and prepare decryption.
(_gcry_aes_aesni_ocb_crypt, _gcry_aes_aesni_ocb_auth): Change return
type to size_t.
* cipher/rijndael-armv8-ce.c
(_gcry_aes_armv8_ce_cfb_enc, _gcry_aes_armv8_ce_cbc_enc)
(_gcry_aes_armv8_ce_ctr_enc, _gcry_aes_armv8_ce_cfb_dec)
(_gcry_aes_armv8_ce_cbc_dec): Reorder parameters to match bulk
operations.
(_gcry_aes_armv8_ce_cbc_dec, _gcry_aes_armv8_ce_ocb_crypt)
(_gcry_aes_armv8_ce_xts_dec): Check and prepare decryption.
(_gcry_aes_armv8_ce_ocb_crypt, _gcry_aes_armv8_ce_ocb_auth): Change
return type to size_t.
* cipher/rijndael-ssse3-amd64.c
(_gcry_ssse3_prepare_decryption): Rename...
(do_ssse3_prepare_decryption): .. to this.
(_gcry_ssse3_prepare_decryption): New.
(_gcry_aes_ssse3_cfb_enc, _gcry_aes_ssse3_cbc_enc)
(_gcry_aes_ssse3_ctr_enc, _gcry_aes_ssse3_cfb_dec)
(_gcry_aes_ssse3_cbc_dec): Reorder parameters to match bulk
operations.
(_gcry_aes_ssse3_cbc_dec, ssse3_ocb_dec): Check and prepare decryption.
(_gcry_aes_ssse3_ocb_crypt, _gcry_aes_ssse3_ocb_auth): Change return
type to size_t.
* cipher/rijndael.c
(_gcry_aes_aesni_cfb_enc, _gcry_aes_aesni_cbc_enc)
(_gcry_aes_aesni_ctr_enc, _gcry_aes_aesni_cfb_dec)
(_gcry_aes_aesni_cbc_dec, _gcry_aes_aesni_ocb_crypt)
(_gcry_aes_aesni_ocb_auth, _gcry_aes_aesni_xts_crypt)
(_gcry_aes_ssse3_cfb_enc, _gcry_aes_ssse3_cbc_enc)
(_gcry_aes_ssse3_ctr_enc, _gcry_aes_ssse3_cfb_dec)
(_gcry_aes_ssse3_cbc_dec, _gcry_aes_ssse3_ocb_crypt)
(_gcry_aes_ssse3_ocb_auth, _gcry_aes_ssse3_xts_crypt)
(_gcry_aes_armv8_ce_cfb_enc, _gcry_aes_armv8_ce_cbc_enc)
(_gcry_aes_armv8_ce_ctr_enc, _gcry_aes_armv8_ce_cfb_dec)
(_gcry_aes_armv8_ce_cbc_dec, _gcry_aes_armv8_ce_ocb_crypt)
(_gcry_aes_armv8_ce_ocb_auth, _gcry_aes_armv8_ce_xts_crypt): Change
prototypes to match bulk operations.
(do_setkey): Setup bulk operations with optimized implementations.
(_gcry_aes_cfb_enc, _gcry_aes_cbc_enc, _gcry_aes_ctr_enc)
(_gcry_aes_cfb_dec, _gcry_aes_cbc_dec, _gcry_aes_ocb_crypt)
(_gcry_aes_ocb_crypt, _gcry_aes_ocb_auth, _gcry_aes_xts_crypt): Update
usage to match new prototypes, avoid prefetch and decryption
preparation on optimized code paths.
--

Replace bulk operation functions of cipher object with faster
version for reduced per call overhead.

Signed-off-by: Jussi Kivilinna <jussi.kivilinna@iki.fi>
cipher/rijndael-aesni.c
cipher/rijndael-armv8-ce.c
cipher/rijndael-ssse3-amd64.c
cipher/rijndael.c

index 50a0745..e7e61ca 100644 (file)
@@ -371,8 +371,8 @@ _gcry_aes_aesni_do_setkey (RIJNDAEL_context *ctx, const byte *key)
 
 
 /* Make a decryption key from an encryption key. */
-void
-_gcry_aes_aesni_prepare_decryption (RIJNDAEL_context *ctx)
+static inline void
+do_aesni_prepare_decryption (RIJNDAEL_context *ctx)
 {
   /* The AES-NI decrypt instructions use the Equivalent Inverse
      Cipher, thus we can't use the the standard decrypt key
@@ -382,8 +382,6 @@ _gcry_aes_aesni_prepare_decryption (RIJNDAEL_context *ctx)
   int rr;
   int r;
 
-  aesni_prepare();
-
 #define DO_AESNI_AESIMC() \
   asm volatile ("movdqa %[ekey], %%xmm1\n\t" \
                 /*"aesimc %%xmm1, %%xmm1\n\t"*/ \
@@ -419,7 +417,13 @@ _gcry_aes_aesni_prepare_decryption (RIJNDAEL_context *ctx)
   dkey[r] = ekey[0];
 
 #undef DO_AESNI_AESIMC
+}
 
+void
+_gcry_aes_aesni_prepare_decryption (RIJNDAEL_context *ctx)
+{
+  aesni_prepare();
+  do_aesni_prepare_decryption (ctx);
   aesni_cleanup();
 }
 
@@ -1696,8 +1700,8 @@ _gcry_aes_aesni_encrypt (const RIJNDAEL_context *ctx, unsigned char *dst,
 
 
 void
-_gcry_aes_aesni_cfb_enc (RIJNDAEL_context *ctx, unsigned char *outbuf,
-                         const unsigned char *inbuf, unsigned char *iv,
+_gcry_aes_aesni_cfb_enc (RIJNDAEL_context *ctx, unsigned char *iv,
+                         unsigned char *outbuf, const unsigned char *inbuf,
                          size_t nblocks)
 {
   aesni_prepare ();
@@ -1732,8 +1736,8 @@ _gcry_aes_aesni_cfb_enc (RIJNDAEL_context *ctx, unsigned char *outbuf,
 
 
 void
-_gcry_aes_aesni_cbc_enc (RIJNDAEL_context *ctx, unsigned char *outbuf,
-                         const unsigned char *inbuf, unsigned char *iv,
+_gcry_aes_aesni_cbc_enc (RIJNDAEL_context *ctx, unsigned char *iv,
+                         unsigned char *outbuf, const unsigned char *inbuf,
                          size_t nblocks, int cbc_mac)
 {
   aesni_prepare_2_6_variable;
@@ -1778,8 +1782,8 @@ _gcry_aes_aesni_cbc_enc (RIJNDAEL_context *ctx, unsigned char *outbuf,
 
 
 void
-_gcry_aes_aesni_ctr_enc (RIJNDAEL_context *ctx, unsigned char *outbuf,
-                         const unsigned char *inbuf, unsigned char *ctr,
+_gcry_aes_aesni_ctr_enc (RIJNDAEL_context *ctx, unsigned char *ctr,
+                         unsigned char *outbuf, const unsigned char *inbuf,
                          size_t nblocks)
 {
   static const unsigned char be_mask[16] __attribute__ ((aligned (16))) =
@@ -1851,8 +1855,8 @@ _gcry_aes_aesni_decrypt (const RIJNDAEL_context *ctx, unsigned char *dst,
 
 
 void
-_gcry_aes_aesni_cfb_dec (RIJNDAEL_context *ctx, unsigned char *outbuf,
-                         const unsigned char *inbuf, unsigned char *iv,
+_gcry_aes_aesni_cfb_dec (RIJNDAEL_context *ctx, unsigned char *iv,
+                         unsigned char *outbuf, const unsigned char *inbuf,
                          size_t nblocks)
 {
   aesni_prepare_2_6_variable;
@@ -2006,15 +2010,21 @@ _gcry_aes_aesni_cfb_dec (RIJNDAEL_context *ctx, unsigned char *outbuf,
 
 
 void
-_gcry_aes_aesni_cbc_dec (RIJNDAEL_context *ctx, unsigned char *outbuf,
-                        const unsigned char *inbuf, unsigned char *iv,
-                        size_t nblocks)
+_gcry_aes_aesni_cbc_dec (RIJNDAEL_context *ctx, unsigned char *iv,
+                         unsigned char *outbuf, const unsigned char *inbuf,
+                         size_t nblocks)
 {
   aesni_prepare_2_6_variable;
 
   aesni_prepare ();
   aesni_prepare_2_6();
 
+  if ( !ctx->decryption_prepared )
+    {
+      do_aesni_prepare_decryption ( ctx );
+      ctx->decryption_prepared = 1;
+    }
+
   asm volatile
     ("movdqu %[iv], %%xmm5\n\t"        /* use xmm5 as fast IV storage */
      : /* No output */
@@ -2477,6 +2487,12 @@ aesni_ocb_dec (gcry_cipher_hd_t c, void *outbuf_arg,
   aesni_prepare ();
   aesni_prepare_2_6 ();
 
+  if ( !ctx->decryption_prepared )
+    {
+      do_aesni_prepare_decryption ( ctx );
+      ctx->decryption_prepared = 1;
+    }
+
   /* Preload Offset and Checksum */
   asm volatile ("movdqu %[iv], %%xmm5\n\t"
                 "movdqu %[ctr], %%xmm6\n\t"
@@ -2761,7 +2777,7 @@ aesni_ocb_dec (gcry_cipher_hd_t c, void *outbuf_arg,
 }
 
 
-void
+size_t
 _gcry_aes_aesni_ocb_crypt(gcry_cipher_hd_t c, void *outbuf_arg,
                           const void *inbuf_arg, size_t nblocks, int encrypt)
 {
@@ -2769,10 +2785,12 @@ _gcry_aes_aesni_ocb_crypt(gcry_cipher_hd_t c, void *outbuf_arg,
     aesni_ocb_enc(c, outbuf_arg, inbuf_arg, nblocks);
   else
     aesni_ocb_dec(c, outbuf_arg, inbuf_arg, nblocks);
+
+  return 0;
 }
 
 
-void
+size_t
 _gcry_aes_aesni_ocb_auth (gcry_cipher_hd_t c, const void *abuf_arg,
                           size_t nblocks)
 {
@@ -3004,6 +3022,8 @@ _gcry_aes_aesni_ocb_auth (gcry_cipher_hd_t c, const void *abuf_arg,
 
   aesni_cleanup ();
   aesni_cleanup_2_6 ();
+
+  return 0;
 }
 
 
@@ -3159,6 +3179,12 @@ _gcry_aes_aesni_xts_dec (RIJNDAEL_context *ctx, unsigned char *tweak,
   aesni_prepare ();
   aesni_prepare_2_6 ();
 
+  if ( !ctx->decryption_prepared )
+    {
+      do_aesni_prepare_decryption ( ctx );
+      ctx->decryption_prepared = 1;
+    }
+
   /* Preload Tweak */
   asm volatile ("movdqu %[tweak], %%xmm5\n\t"
                "movdqa %[gfmul], %%xmm6\n\t"
index 6af7108..6e46830 100644 (file)
@@ -284,8 +284,8 @@ _gcry_aes_armv8_ce_decrypt (const RIJNDAEL_context *ctx, unsigned char *dst,
 }
 
 void
-_gcry_aes_armv8_ce_cbc_enc (const RIJNDAEL_context *ctx, unsigned char *outbuf,
-                            const unsigned char *inbuf, unsigned char *iv,
+_gcry_aes_armv8_ce_cbc_enc (const RIJNDAEL_context *ctx, unsigned char *iv,
+                            unsigned char *outbuf, const unsigned char *inbuf,
                             size_t nblocks, int cbc_mac)
 {
   const void *keysched = ctx->keyschenc32;
@@ -296,19 +296,25 @@ _gcry_aes_armv8_ce_cbc_enc (const RIJNDAEL_context *ctx, unsigned char *outbuf,
 }
 
 void
-_gcry_aes_armv8_ce_cbc_dec (RIJNDAEL_context *ctx, unsigned char *outbuf,
-                            const unsigned char *inbuf, unsigned char *iv,
+_gcry_aes_armv8_ce_cbc_dec (RIJNDAEL_context *ctx, unsigned char *iv,
+                            unsigned char *outbuf, const unsigned char *inbuf,
                             size_t nblocks)
 {
   const void *keysched = ctx->keyschdec32;
   unsigned int nrounds = ctx->rounds;
 
+  if ( !ctx->decryption_prepared )
+    {
+      _gcry_aes_armv8_ce_prepare_decryption ( ctx );
+      ctx->decryption_prepared = 1;
+    }
+
   _gcry_aes_cbc_dec_armv8_ce(keysched, outbuf, inbuf, iv, nblocks, nrounds);
 }
 
 void
-_gcry_aes_armv8_ce_cfb_enc (RIJNDAEL_context *ctx, unsigned char *outbuf,
-                            const unsigned char *inbuf, unsigned char *iv,
+_gcry_aes_armv8_ce_cfb_enc (RIJNDAEL_context *ctx, unsigned char *iv,
+                            unsigned char *outbuf, const unsigned char *inbuf,
                             size_t nblocks)
 {
   const void *keysched = ctx->keyschenc32;
@@ -318,8 +324,8 @@ _gcry_aes_armv8_ce_cfb_enc (RIJNDAEL_context *ctx, unsigned char *outbuf,
 }
 
 void
-_gcry_aes_armv8_ce_cfb_dec (RIJNDAEL_context *ctx, unsigned char *outbuf,
-                            const unsigned char *inbuf, unsigned char *iv,
+_gcry_aes_armv8_ce_cfb_dec (RIJNDAEL_context *ctx, unsigned char *iv,
+                            unsigned char *outbuf, const unsigned char *inbuf,
                             size_t nblocks)
 {
   const void *keysched = ctx->keyschenc32;
@@ -329,8 +335,8 @@ _gcry_aes_armv8_ce_cfb_dec (RIJNDAEL_context *ctx, unsigned char *outbuf,
 }
 
 void
-_gcry_aes_armv8_ce_ctr_enc (RIJNDAEL_context *ctx, unsigned char *outbuf,
-                            const unsigned char *inbuf, unsigned char *iv,
+_gcry_aes_armv8_ce_ctr_enc (RIJNDAEL_context *ctx, unsigned char *iv,
+                            unsigned char *outbuf, const unsigned char *inbuf,
                             size_t nblocks)
 {
   const void *keysched = ctx->keyschenc32;
@@ -339,7 +345,7 @@ _gcry_aes_armv8_ce_ctr_enc (RIJNDAEL_context *ctx, unsigned char *outbuf,
   _gcry_aes_ctr_enc_armv8_ce(keysched, outbuf, inbuf, iv, nblocks, nrounds);
 }
 
-void
+size_t
 _gcry_aes_armv8_ce_ocb_crypt (gcry_cipher_hd_t c, void *outbuf_arg,
                               const void *inbuf_arg, size_t nblocks,
                               int encrypt)
@@ -353,13 +359,21 @@ _gcry_aes_armv8_ce_ocb_crypt (gcry_cipher_hd_t c, void *outbuf_arg,
   unsigned int nrounds = ctx->rounds;
   u64 blkn = c->u_mode.ocb.data_nblocks;
 
+  if ( !encrypt && !ctx->decryption_prepared )
+    {
+      _gcry_aes_armv8_ce_prepare_decryption ( ctx );
+      ctx->decryption_prepared = 1;
+    }
+
   c->u_mode.ocb.data_nblocks = blkn + nblocks;
 
   crypt_fn(keysched, outbuf, inbuf, c->u_iv.iv, c->u_ctr.ctr,
            c->u_mode.ocb.L[0], nblocks, nrounds, (unsigned int)blkn);
+
+  return 0;
 }
 
-void
+size_t
 _gcry_aes_armv8_ce_ocb_auth (gcry_cipher_hd_t c, void *abuf_arg,
                              size_t nblocks)
 {
@@ -374,6 +388,8 @@ _gcry_aes_armv8_ce_ocb_auth (gcry_cipher_hd_t c, void *abuf_arg,
   _gcry_aes_ocb_auth_armv8_ce(keysched, abuf, c->u_mode.ocb.aad_offset,
                              c->u_mode.ocb.aad_sum, c->u_mode.ocb.L[0],
                              nblocks, nrounds, (unsigned int)blkn);
+
+  return 0;
 }
 
 void
@@ -386,6 +402,12 @@ _gcry_aes_armv8_ce_xts_crypt (RIJNDAEL_context *ctx, unsigned char *tweak,
                                     : _gcry_aes_xts_dec_armv8_ce;
   unsigned int nrounds = ctx->rounds;
 
+  if ( !encrypt && !ctx->decryption_prepared )
+    {
+      _gcry_aes_armv8_ce_prepare_decryption ( ctx );
+      ctx->decryption_prepared = 1;
+    }
+
   crypt_fn(keysched, outbuf, inbuf, tweak, nblocks, nrounds);
 }
 
index 98660ec..07a64a4 100644 (file)
@@ -175,11 +175,11 @@ _gcry_aes_ssse3_do_setkey (RIJNDAEL_context *ctx, const byte *key)
 
 
 /* Make a decryption key from an encryption key. */
-void
-_gcry_aes_ssse3_prepare_decryption (RIJNDAEL_context *ctx)
+static inline void
+do_ssse3_prepare_decryption (RIJNDAEL_context *ctx,
+                             byte ssse3_state[SSSE3_STATE_SIZE])
 {
   unsigned int keybits = (ctx->rounds - 10) * 32 + 128;
-  byte ssse3_state[SSSE3_STATE_SIZE];
 
   vpaes_ssse3_prepare();
 
@@ -190,6 +190,14 @@ _gcry_aes_ssse3_prepare_decryption (RIJNDAEL_context *ctx)
   vpaes_ssse3_cleanup();
 }
 
+void
+_gcry_aes_ssse3_prepare_decryption (RIJNDAEL_context *ctx)
+{
+  byte ssse3_state[SSSE3_STATE_SIZE];
+
+  do_ssse3_prepare_decryption(ctx, ssse3_state);
+}
+
 
 /* Encrypt one block using the Intel SSSE3 instructions.  Block is input
 * and output through SSE register xmm0. */
@@ -232,9 +240,9 @@ _gcry_aes_ssse3_encrypt (const RIJNDAEL_context *ctx, unsigned char *dst,
 
 
 void
-_gcry_aes_ssse3_cfb_enc (RIJNDAEL_context *ctx, unsigned char *outbuf,
-                        const unsigned char *inbuf, unsigned char *iv,
-                        size_t nblocks)
+_gcry_aes_ssse3_cfb_enc (RIJNDAEL_context *ctx, unsigned char *iv,
+                         unsigned char *outbuf, const unsigned char *inbuf,
+                         size_t nblocks)
 {
   unsigned int nrounds = ctx->rounds;
   byte ssse3_state[SSSE3_STATE_SIZE];
@@ -271,9 +279,9 @@ _gcry_aes_ssse3_cfb_enc (RIJNDAEL_context *ctx, unsigned char *outbuf,
 
 
 void
-_gcry_aes_ssse3_cbc_enc (RIJNDAEL_context *ctx, unsigned char *outbuf,
-                        const unsigned char *inbuf, unsigned char *iv,
-                        size_t nblocks, int cbc_mac)
+_gcry_aes_ssse3_cbc_enc (RIJNDAEL_context *ctx, unsigned char *iv,
+                         unsigned char *outbuf, const unsigned char *inbuf,
+                         size_t nblocks, int cbc_mac)
 {
   unsigned int nrounds = ctx->rounds;
   byte ssse3_state[SSSE3_STATE_SIZE];
@@ -316,9 +324,9 @@ _gcry_aes_ssse3_cbc_enc (RIJNDAEL_context *ctx, unsigned char *outbuf,
 
 
 void
-_gcry_aes_ssse3_ctr_enc (RIJNDAEL_context *ctx, unsigned char *outbuf,
-                        const unsigned char *inbuf, unsigned char *ctr,
-                        size_t nblocks)
+_gcry_aes_ssse3_ctr_enc (RIJNDAEL_context *ctx, unsigned char *ctr,
+                         unsigned char *outbuf, const unsigned char *inbuf,
+                         size_t nblocks)
 {
   static const unsigned char be_mask[16] __attribute__ ((aligned (16))) =
     { 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0 };
@@ -384,7 +392,7 @@ _gcry_aes_ssse3_ctr_enc (RIJNDAEL_context *ctx, unsigned char *outbuf,
 
 unsigned int
 _gcry_aes_ssse3_decrypt (const RIJNDAEL_context *ctx, unsigned char *dst,
-                        const unsigned char *src)
+                         const unsigned char *src)
 {
   unsigned int nrounds = ctx->rounds;
   byte ssse3_state[SSSE3_STATE_SIZE];
@@ -405,9 +413,9 @@ _gcry_aes_ssse3_decrypt (const RIJNDAEL_context *ctx, unsigned char *dst,
 
 
 void
-_gcry_aes_ssse3_cfb_dec (RIJNDAEL_context *ctx, unsigned char *outbuf,
-                        const unsigned char *inbuf, unsigned char *iv,
-                        size_t nblocks)
+_gcry_aes_ssse3_cfb_dec (RIJNDAEL_context *ctx, unsigned char *iv,
+                         unsigned char *outbuf, const unsigned char *inbuf,
+                         size_t nblocks)
 {
   unsigned int nrounds = ctx->rounds;
   byte ssse3_state[SSSE3_STATE_SIZE];
@@ -445,13 +453,19 @@ _gcry_aes_ssse3_cfb_dec (RIJNDAEL_context *ctx, unsigned char *outbuf,
 
 
 void
-_gcry_aes_ssse3_cbc_dec (RIJNDAEL_context *ctx, unsigned char *outbuf,
-                        const unsigned char *inbuf, unsigned char *iv,
-                        size_t nblocks)
+_gcry_aes_ssse3_cbc_dec (RIJNDAEL_context *ctx, unsigned char *iv,
+                         unsigned char *outbuf, const unsigned char *inbuf,
+                         size_t nblocks)
 {
   unsigned int nrounds = ctx->rounds;
   byte ssse3_state[SSSE3_STATE_SIZE];
 
+  if ( !ctx->decryption_prepared )
+    {
+      do_ssse3_prepare_decryption ( ctx, ssse3_state );
+      ctx->decryption_prepared = 1;
+    }
+
   vpaes_ssse3_prepare_dec ();
 
   asm volatile ("movdqu %[iv], %%xmm7\n\t"     /* use xmm7 as fast IV storage */
@@ -563,6 +577,12 @@ ssse3_ocb_dec (gcry_cipher_hd_t c, void *outbuf_arg,
   unsigned int nrounds = ctx->rounds;
   byte ssse3_state[SSSE3_STATE_SIZE];
 
+  if ( !ctx->decryption_prepared )
+    {
+      do_ssse3_prepare_decryption ( ctx, ssse3_state );
+      ctx->decryption_prepared = 1;
+    }
+
   vpaes_ssse3_prepare_dec ();
 
   /* Preload Offset and Checksum */
@@ -616,7 +636,7 @@ ssse3_ocb_dec (gcry_cipher_hd_t c, void *outbuf_arg,
 }
 
 
-void
+size_t
 _gcry_aes_ssse3_ocb_crypt(gcry_cipher_hd_t c, void *outbuf_arg,
                           const void *inbuf_arg, size_t nblocks, int encrypt)
 {
@@ -624,10 +644,12 @@ _gcry_aes_ssse3_ocb_crypt(gcry_cipher_hd_t c, void *outbuf_arg,
     ssse3_ocb_enc(c, outbuf_arg, inbuf_arg, nblocks);
   else
     ssse3_ocb_dec(c, outbuf_arg, inbuf_arg, nblocks);
+
+  return 0;
 }
 
 
-void
+size_t
 _gcry_aes_ssse3_ocb_auth (gcry_cipher_hd_t c, const void *abuf_arg,
                           size_t nblocks)
 {
@@ -683,6 +705,8 @@ _gcry_aes_ssse3_ocb_auth (gcry_cipher_hd_t c, const void *abuf_arg,
                 : "memory" );
 
   vpaes_ssse3_cleanup ();
+
+  return 0;
 }
 
 #endif /* USE_SSSE3 */
index f9666d0..d3fcb76 100644 (file)
@@ -77,37 +77,29 @@ extern unsigned int _gcry_aes_aesni_encrypt (const RIJNDAEL_context *ctx,
 extern unsigned int _gcry_aes_aesni_decrypt (const RIJNDAEL_context *ctx,
                                              unsigned char *dst,
                                              const unsigned char *src);
-extern void _gcry_aes_aesni_cfb_enc (RIJNDAEL_context *ctx,
-                                     unsigned char *outbuf,
-                                     const unsigned char *inbuf,
-                                     unsigned char *iv, size_t nblocks);
-extern void _gcry_aes_aesni_cbc_enc (RIJNDAEL_context *ctx,
-                                     unsigned char *outbuf,
-                                     const unsigned char *inbuf,
-                                     unsigned char *iv, size_t nblocks,
-                                     int cbc_mac);
-extern void _gcry_aes_aesni_ctr_enc (RIJNDAEL_context *ctx,
-                                     unsigned char *outbuf,
-                                     const unsigned char *inbuf,
-                                     unsigned char *ctr, size_t nblocks);
-extern void _gcry_aes_aesni_cfb_dec (RIJNDAEL_context *ctx,
-                                     unsigned char *outbuf,
-                                     const unsigned char *inbuf,
-                                     unsigned char *iv, size_t nblocks);
-extern void _gcry_aes_aesni_cbc_dec (RIJNDAEL_context *ctx,
-                                     unsigned char *outbuf,
-                                     const unsigned char *inbuf,
-                                     unsigned char *iv, size_t nblocks);
-extern void _gcry_aes_aesni_ocb_crypt (gcry_cipher_hd_t c, void *outbuf_arg,
-                                       const void *inbuf_arg, size_t nblocks,
-                                       int encrypt);
-extern void _gcry_aes_aesni_ocb_auth (gcry_cipher_hd_t c, const void *abuf_arg,
-                                      size_t nblocks);
-extern void _gcry_aes_aesni_xts_crypt (RIJNDAEL_context *ctx,
-                                      unsigned char *tweak,
-                                      unsigned char *outbuf,
-                                      const unsigned char *inbuf,
-                                      size_t nblocks, int encrypt);
+extern void _gcry_aes_aesni_cfb_enc (void *context, unsigned char *iv,
+                                     void *outbuf_arg, const void *inbuf_arg,
+                                     size_t nblocks);
+extern void _gcry_aes_aesni_cbc_enc (void *context, unsigned char *iv,
+                                     void *outbuf_arg, const void *inbuf_arg,
+                                     size_t nblocks, int cbc_mac);
+extern void _gcry_aes_aesni_ctr_enc (void *context, unsigned char *ctr,
+                                     void *outbuf_arg, const void *inbuf_arg,
+                                     size_t nblocks);
+extern void _gcry_aes_aesni_cfb_dec (void *context, unsigned char *iv,
+                                     void *outbuf_arg, const void *inbuf_arg,
+                                     size_t nblocks);
+extern void _gcry_aes_aesni_cbc_dec (void *context, unsigned char *iv,
+                                     void *outbuf_arg, const void *inbuf_arg,
+                                     size_t nblocks);
+extern size_t _gcry_aes_aesni_ocb_crypt (gcry_cipher_hd_t c, void *outbuf_arg,
+                                         const void *inbuf_arg, size_t nblocks,
+                                         int encrypt);
+extern size_t _gcry_aes_aesni_ocb_auth (gcry_cipher_hd_t c, const void *abuf_arg,
+                                        size_t nblocks);
+extern void _gcry_aes_aesni_xts_crypt (void *context, unsigned char *tweak,
+                                       void *outbuf_arg, const void *inbuf_arg,
+                                       size_t nblocks, int encrypt);
 #endif
 
 #ifdef USE_SSSE3
@@ -121,32 +113,27 @@ extern unsigned int _gcry_aes_ssse3_encrypt (const RIJNDAEL_context *ctx,
 extern unsigned int _gcry_aes_ssse3_decrypt (const RIJNDAEL_context *ctx,
                                              unsigned char *dst,
                                              const unsigned char *src);
-extern void _gcry_aes_ssse3_cfb_enc (RIJNDAEL_context *ctx,
-                                     unsigned char *outbuf,
-                                     const unsigned char *inbuf,
-                                     unsigned char *iv, size_t nblocks);
-extern void _gcry_aes_ssse3_cbc_enc (RIJNDAEL_context *ctx,
-                                     unsigned char *outbuf,
-                                     const unsigned char *inbuf,
-                                     unsigned char *iv, size_t nblocks,
+extern void _gcry_aes_ssse3_cfb_enc (void *context, unsigned char *iv,
+                                     void *outbuf_arg, const void *inbuf_arg,
+                                     size_t nblocks);
+extern void _gcry_aes_ssse3_cbc_enc (void *context, unsigned char *iv,
+                                     void *outbuf_arg, const void *inbuf_arg,
+                                     size_t nblocks,
                                      int cbc_mac);
-extern void _gcry_aes_ssse3_ctr_enc (RIJNDAEL_context *ctx,
-                                     unsigned char *outbuf,
-                                     const unsigned char *inbuf,
-                                     unsigned char *ctr, size_t nblocks);
-extern void _gcry_aes_ssse3_cfb_dec (RIJNDAEL_context *ctx,
-                                     unsigned char *outbuf,
-                                     const unsigned char *inbuf,
-                                     unsigned char *iv, size_t nblocks);
-extern void _gcry_aes_ssse3_cbc_dec (RIJNDAEL_context *ctx,
-                                     unsigned char *outbuf,
-                                     const unsigned char *inbuf,
-                                     unsigned char *iv, size_t nblocks);
-extern void _gcry_aes_ssse3_ocb_crypt (gcry_cipher_hd_t c, void *outbuf_arg,
-                                       const void *inbuf_arg, size_t nblocks,
-                                       int encrypt);
-extern void _gcry_aes_ssse3_ocb_auth (gcry_cipher_hd_t c, const void *abuf_arg,
-                                      size_t nblocks);
+extern void _gcry_aes_ssse3_ctr_enc (void *context, unsigned char *ctr,
+                                     void *outbuf_arg, const void *inbuf_arg,
+                                     size_t nblocks);
+extern void _gcry_aes_ssse3_cfb_dec (void *context, unsigned char *iv,
+                                     void *outbuf_arg, const void *inbuf_arg,
+                                     size_t nblocks);
+extern void _gcry_aes_ssse3_cbc_dec (void *context, unsigned char *iv,
+                                     void *outbuf_arg, const void *inbuf_arg,
+                                     size_t nblocks);
+extern size_t _gcry_aes_ssse3_ocb_crypt (gcry_cipher_hd_t c, void *outbuf_arg,
+                                         const void *inbuf_arg, size_t nblocks,
+                                         int encrypt);
+extern size_t _gcry_aes_ssse3_ocb_auth (gcry_cipher_hd_t c, const void *abuf_arg,
+                                        size_t nblocks);
 #endif
 
 #ifdef USE_PADLOCK
@@ -185,36 +172,30 @@ extern unsigned int _gcry_aes_armv8_ce_decrypt(const RIJNDAEL_context *ctx,
                                                unsigned char *dst,
                                                const unsigned char *src);
 
-extern void _gcry_aes_armv8_ce_cfb_enc (RIJNDAEL_context *ctx,
-                                        unsigned char *outbuf,
-                                        const unsigned char *inbuf,
-                                        unsigned char *iv, size_t nblocks);
-extern void _gcry_aes_armv8_ce_cbc_enc (RIJNDAEL_context *ctx,
-                                        unsigned char *outbuf,
-                                        const unsigned char *inbuf,
-                                        unsigned char *iv, size_t nblocks,
+extern void _gcry_aes_armv8_ce_cfb_enc (void *context, unsigned char *iv,
+                                        void *outbuf_arg, const void *inbuf_arg,
+                                        size_t nblocks);
+extern void _gcry_aes_armv8_ce_cbc_enc (void *context, unsigned char *iv,
+                                        void *outbuf_arg, const void *inbuf_arg,
+                                        size_t nblocks,
                                         int cbc_mac);
-extern void _gcry_aes_armv8_ce_ctr_enc (RIJNDAEL_context *ctx,
-                                        unsigned char *outbuf,
-                                        const unsigned char *inbuf,
-                                        unsigned char *ctr, size_t nblocks);
-extern void _gcry_aes_armv8_ce_cfb_dec (RIJNDAEL_context *ctx,
-                                        unsigned char *outbuf,
-                                        const unsigned char *inbuf,
-                                        unsigned char *iv, size_t nblocks);
-extern void _gcry_aes_armv8_ce_cbc_dec (RIJNDAEL_context *ctx,
-                                        unsigned char *outbuf,
-                                        const unsigned char *inbuf,
-                                        unsigned char *iv, size_t nblocks);
-extern void _gcry_aes_armv8_ce_ocb_crypt (gcry_cipher_hd_t c, void *outbuf_arg,
-                                          const void *inbuf_arg, size_t nblocks,
-                                          int encrypt);
-extern void _gcry_aes_armv8_ce_ocb_auth (gcry_cipher_hd_t c,
-                                         const void *abuf_arg, size_t nblocks);
-extern void _gcry_aes_armv8_ce_xts_crypt (RIJNDAEL_context *ctx,
-                                          unsigned char *tweak,
-                                          unsigned char *outbuf,
-                                          const unsigned char *inbuf,
+extern void _gcry_aes_armv8_ce_ctr_enc (void *context, unsigned char *ctr,
+                                        void *outbuf_arg, const void *inbuf_arg,
+                                        size_t nblocks);
+extern void _gcry_aes_armv8_ce_cfb_dec (void *context, unsigned char *iv,
+                                        void *outbuf_arg, const void *inbuf_arg,
+                                        size_t nblocks);
+extern void _gcry_aes_armv8_ce_cbc_dec (void *context, unsigned char *iv,
+                                        void *outbuf_arg, const void *inbuf_arg,
+                                        size_t nblocks);
+extern size_t _gcry_aes_armv8_ce_ocb_crypt (gcry_cipher_hd_t c, void *outbuf_arg,
+                                            const void *inbuf_arg, size_t nblocks,
+                                            int encrypt);
+extern size_t _gcry_aes_armv8_ce_ocb_auth (gcry_cipher_hd_t c,
+                                           const void *abuf_arg, size_t nblocks);
+extern void _gcry_aes_armv8_ce_xts_crypt (void *context, unsigned char *tweak,
+                                          void *outbuf_arg,
+                                          const void *inbuf_arg,
                                           size_t nblocks, int encrypt);
 #endif /*USE_ARM_ASM*/
 
@@ -270,7 +251,8 @@ static void prefetch_dec(void)
 \f
 /* Perform the key setup.  */
 static gcry_err_code_t
-do_setkey (RIJNDAEL_context *ctx, const byte *key, const unsigned keylen)
+do_setkey (RIJNDAEL_context *ctx, const byte *key, const unsigned keylen,
+           gcry_cipher_hd_t hd)
 {
   static int initialized = 0;
   static const char *selftest_failed = 0;
@@ -350,6 +332,17 @@ do_setkey (RIJNDAEL_context *ctx, const byte *key, const unsigned keylen)
       ctx->prefetch_enc_fn = NULL;
       ctx->prefetch_dec_fn = NULL;
       ctx->use_aesni = 1;
+      if (hd)
+        {
+          hd->bulk.cfb_enc = _gcry_aes_aesni_cfb_enc;
+          hd->bulk.cfb_dec = _gcry_aes_aesni_cfb_dec;
+          hd->bulk.cbc_enc = _gcry_aes_aesni_cbc_enc;
+          hd->bulk.cbc_dec = _gcry_aes_aesni_cbc_dec;
+          hd->bulk.ctr_enc = _gcry_aes_aesni_ctr_enc;
+          hd->bulk.ocb_crypt = _gcry_aes_aesni_ocb_crypt;
+          hd->bulk.ocb_auth = _gcry_aes_aesni_ocb_auth;
+          hd->bulk.xts_crypt = _gcry_aes_aesni_xts_crypt;
+        }
     }
 #endif
 #ifdef USE_PADLOCK
@@ -371,6 +364,16 @@ do_setkey (RIJNDAEL_context *ctx, const byte *key, const unsigned keylen)
       ctx->prefetch_enc_fn = NULL;
       ctx->prefetch_dec_fn = NULL;
       ctx->use_ssse3 = 1;
+      if (hd)
+        {
+          hd->bulk.cfb_enc = _gcry_aes_ssse3_cfb_enc;
+          hd->bulk.cfb_dec = _gcry_aes_ssse3_cfb_dec;
+          hd->bulk.cbc_enc = _gcry_aes_ssse3_cbc_enc;
+          hd->bulk.cbc_dec = _gcry_aes_ssse3_cbc_dec;
+          hd->bulk.ctr_enc = _gcry_aes_ssse3_ctr_enc;
+          hd->bulk.ocb_crypt = _gcry_aes_ssse3_ocb_crypt;
+          hd->bulk.ocb_auth = _gcry_aes_ssse3_ocb_auth;
+        }
     }
 #endif
 #ifdef USE_ARM_CE
@@ -381,6 +384,17 @@ do_setkey (RIJNDAEL_context *ctx, const byte *key, const unsigned keylen)
       ctx->prefetch_enc_fn = NULL;
       ctx->prefetch_dec_fn = NULL;
       ctx->use_arm_ce = 1;
+      if (hd)
+        {
+          hd->bulk.cfb_enc = _gcry_aes_armv8_ce_cfb_enc;
+          hd->bulk.cfb_dec = _gcry_aes_armv8_ce_cfb_dec;
+          hd->bulk.cbc_enc = _gcry_aes_armv8_ce_cbc_enc;
+          hd->bulk.cbc_dec = _gcry_aes_armv8_ce_cbc_dec;
+          hd->bulk.ctr_enc = _gcry_aes_armv8_ce_ctr_enc;
+          hd->bulk.ocb_crypt = _gcry_aes_armv8_ce_ocb_crypt;
+          hd->bulk.ocb_auth = _gcry_aes_armv8_ce_ocb_auth;
+          hd->bulk.xts_crypt = _gcry_aes_armv8_ce_xts_crypt;
+        }
     }
 #endif
   else
@@ -517,8 +531,7 @@ rijndael_setkey (void *context, const byte *key, const unsigned keylen,
                  gcry_cipher_hd_t hd)
 {
   RIJNDAEL_context *ctx = context;
-  (void)hd;
-  return do_setkey (ctx, key, keylen);
+  return do_setkey (ctx, key, keylen, hd);
 }
 
 
@@ -783,36 +796,36 @@ _gcry_aes_cfb_enc (void *context, unsigned char *iv,
   const unsigned char *inbuf = inbuf_arg;
   unsigned int burn_depth = 0;
 
-  if (ctx->prefetch_enc_fn)
-    ctx->prefetch_enc_fn();
-
   if (0)
     ;
 #ifdef USE_AESNI
   else if (ctx->use_aesni)
     {
-      _gcry_aes_aesni_cfb_enc (ctx, outbuf, inbuf, iv, nblocks);
-      burn_depth = 0;
+      _gcry_aes_aesni_cfb_enc (ctx, iv, outbuf, inbuf, nblocks);
+      return;
     }
 #endif /*USE_AESNI*/
 #ifdef USE_SSSE3
   else if (ctx->use_ssse3)
     {
-      _gcry_aes_ssse3_cfb_enc (ctx, outbuf, inbuf, iv, nblocks);
-      burn_depth = 0;
+      _gcry_aes_ssse3_cfb_enc (ctx, iv, outbuf, inbuf, nblocks);
+      return;
     }
 #endif /*USE_SSSE3*/
 #ifdef USE_ARM_CE
   else if (ctx->use_arm_ce)
     {
-      _gcry_aes_armv8_ce_cfb_enc (ctx, outbuf, inbuf, iv, nblocks);
-      burn_depth = 0;
+      _gcry_aes_armv8_ce_cfb_enc (ctx, iv, outbuf, inbuf, nblocks);
+      return;
     }
 #endif /*USE_ARM_CE*/
   else
     {
       rijndael_cryptfn_t encrypt_fn = ctx->encrypt_fn;
 
+      if (ctx->prefetch_enc_fn)
+        ctx->prefetch_enc_fn();
+
       for ( ;nblocks; nblocks-- )
         {
           /* Encrypt the IV. */
@@ -844,36 +857,36 @@ _gcry_aes_cbc_enc (void *context, unsigned char *iv,
   unsigned char *last_iv;
   unsigned int burn_depth = 0;
 
-  if (ctx->prefetch_enc_fn)
-    ctx->prefetch_enc_fn();
-
   if (0)
     ;
 #ifdef USE_AESNI
   else if (ctx->use_aesni)
     {
-      _gcry_aes_aesni_cbc_enc (ctx, outbuf, inbuf, iv, nblocks, cbc_mac);
-      burn_depth = 0;
+      _gcry_aes_aesni_cbc_enc (ctx, iv, outbuf, inbuf, nblocks, cbc_mac);
+      return;
     }
 #endif /*USE_AESNI*/
 #ifdef USE_SSSE3
   else if (ctx->use_ssse3)
     {
-      _gcry_aes_ssse3_cbc_enc (ctx, outbuf, inbuf, iv, nblocks, cbc_mac);
-      burn_depth = 0;
+      _gcry_aes_ssse3_cbc_enc (ctx, iv, outbuf, inbuf, nblocks, cbc_mac);
+      return;
     }
 #endif /*USE_SSSE3*/
 #ifdef USE_ARM_CE
   else if (ctx->use_arm_ce)
     {
-      _gcry_aes_armv8_ce_cbc_enc (ctx, outbuf, inbuf, iv, nblocks, cbc_mac);
-      burn_depth = 0;
+      _gcry_aes_armv8_ce_cbc_enc (ctx, iv, outbuf, inbuf, nblocks, cbc_mac);
+      return;
     }
 #endif /*USE_ARM_CE*/
   else
     {
       rijndael_cryptfn_t encrypt_fn = ctx->encrypt_fn;
 
+      if (ctx->prefetch_enc_fn)
+        ctx->prefetch_enc_fn();
+
       last_iv = iv;
 
       for ( ;nblocks; nblocks-- )
@@ -913,30 +926,27 @@ _gcry_aes_ctr_enc (void *context, unsigned char *ctr,
   unsigned int burn_depth = 0;
   int i;
 
-  if (ctx->prefetch_enc_fn)
-    ctx->prefetch_enc_fn();
-
   if (0)
     ;
 #ifdef USE_AESNI
   else if (ctx->use_aesni)
     {
-      _gcry_aes_aesni_ctr_enc (ctx, outbuf, inbuf, ctr, nblocks);
-      burn_depth = 0;
+      _gcry_aes_aesni_ctr_enc (ctx, ctr, outbuf, inbuf, nblocks);
+      return;
     }
 #endif /*USE_AESNI*/
 #ifdef USE_SSSE3
   else if (ctx->use_ssse3)
     {
-      _gcry_aes_ssse3_ctr_enc (ctx, outbuf, inbuf, ctr, nblocks);
-      burn_depth = 0;
+      _gcry_aes_ssse3_ctr_enc (ctx, ctr, outbuf, inbuf, nblocks);
+      return;
     }
 #endif /*USE_SSSE3*/
 #ifdef USE_ARM_CE
   else if (ctx->use_arm_ce)
     {
-      _gcry_aes_armv8_ce_ctr_enc (ctx, outbuf, inbuf, ctr, nblocks);
-      burn_depth = 0;
+      _gcry_aes_armv8_ce_ctr_enc (ctx, ctr, outbuf, inbuf, nblocks);
+      return;
     }
 #endif /*USE_ARM_CE*/
   else
@@ -944,6 +954,9 @@ _gcry_aes_ctr_enc (void *context, unsigned char *ctr,
       union { unsigned char x1[16] ATTR_ALIGNED_16; u32 x32[4]; } tmp;
       rijndael_cryptfn_t encrypt_fn = ctx->encrypt_fn;
 
+      if (ctx->prefetch_enc_fn)
+        ctx->prefetch_enc_fn();
+
       for ( ;nblocks; nblocks-- )
         {
           /* Encrypt the counter. */
@@ -1161,36 +1174,36 @@ _gcry_aes_cfb_dec (void *context, unsigned char *iv,
   const unsigned char *inbuf = inbuf_arg;
   unsigned int burn_depth = 0;
 
-  if (ctx->prefetch_enc_fn)
-    ctx->prefetch_enc_fn();
-
   if (0)
     ;
 #ifdef USE_AESNI
   else if (ctx->use_aesni)
     {
-      _gcry_aes_aesni_cfb_dec (ctx, outbuf, inbuf, iv, nblocks);
-      burn_depth = 0;
+      _gcry_aes_aesni_cfb_dec (ctx, iv, outbuf, inbuf, nblocks);
+      return;
     }
 #endif /*USE_AESNI*/
 #ifdef USE_SSSE3
   else if (ctx->use_ssse3)
     {
-      _gcry_aes_ssse3_cfb_dec (ctx, outbuf, inbuf, iv, nblocks);
-      burn_depth = 0;
+      _gcry_aes_ssse3_cfb_dec (ctx, iv, outbuf, inbuf, nblocks);
+      return;
     }
 #endif /*USE_SSSE3*/
 #ifdef USE_ARM_CE
   else if (ctx->use_arm_ce)
     {
-      _gcry_aes_armv8_ce_cfb_dec (ctx, outbuf, inbuf, iv, nblocks);
-      burn_depth = 0;
+      _gcry_aes_armv8_ce_cfb_dec (ctx, iv, outbuf, inbuf, nblocks);
+      return;
     }
 #endif /*USE_ARM_CE*/
   else
     {
       rijndael_cryptfn_t encrypt_fn = ctx->encrypt_fn;
 
+      if (ctx->prefetch_enc_fn)
+        ctx->prefetch_enc_fn();
+
       for ( ;nblocks; nblocks-- )
         {
           burn_depth = encrypt_fn (ctx, iv, iv);
@@ -1219,32 +1232,27 @@ _gcry_aes_cbc_dec (void *context, unsigned char *iv,
   const unsigned char *inbuf = inbuf_arg;
   unsigned int burn_depth = 0;
 
-  check_decryption_preparation (ctx);
-
-  if (ctx->prefetch_dec_fn)
-    ctx->prefetch_dec_fn();
-
   if (0)
     ;
 #ifdef USE_AESNI
   else if (ctx->use_aesni)
     {
-      _gcry_aes_aesni_cbc_dec (ctx, outbuf, inbuf, iv, nblocks);
-      burn_depth = 0;
+      _gcry_aes_aesni_cbc_dec (ctx, iv, outbuf, inbuf, nblocks);
+      return;
     }
 #endif /*USE_AESNI*/
 #ifdef USE_SSSE3
   else if (ctx->use_ssse3)
     {
-      _gcry_aes_ssse3_cbc_dec (ctx, outbuf, inbuf, iv, nblocks);
-      burn_depth = 0;
+      _gcry_aes_ssse3_cbc_dec (ctx, iv, outbuf, inbuf, nblocks);
+      return;
     }
 #endif /*USE_SSSE3*/
 #ifdef USE_ARM_CE
   else if (ctx->use_arm_ce)
     {
-      _gcry_aes_armv8_ce_cbc_dec (ctx, outbuf, inbuf, iv, nblocks);
-      burn_depth = 0;
+      _gcry_aes_armv8_ce_cbc_dec (ctx, iv, outbuf, inbuf, nblocks);
+      return;
     }
 #endif /*USE_ARM_CE*/
   else
@@ -1252,6 +1260,11 @@ _gcry_aes_cbc_dec (void *context, unsigned char *iv,
       unsigned char savebuf[BLOCKSIZE] ATTR_ALIGNED_16;
       rijndael_cryptfn_t decrypt_fn = ctx->decrypt_fn;
 
+      check_decryption_preparation (ctx);
+
+      if (ctx->prefetch_dec_fn)
+        ctx->prefetch_dec_fn();
+
       for ( ;nblocks; nblocks-- )
         {
           /* INBUF is needed later and it may be identical to OUTBUF, so store
@@ -1283,40 +1296,24 @@ _gcry_aes_ocb_crypt (gcry_cipher_hd_t c, void *outbuf_arg,
   const unsigned char *inbuf = inbuf_arg;
   unsigned int burn_depth = 0;
 
-  if (encrypt)
-    {
-      if (ctx->prefetch_enc_fn)
-        ctx->prefetch_enc_fn();
-    }
-  else
-    {
-      check_decryption_preparation (ctx);
-
-      if (ctx->prefetch_dec_fn)
-        ctx->prefetch_dec_fn();
-    }
-
   if (0)
     ;
 #ifdef USE_AESNI
   else if (ctx->use_aesni)
     {
-      _gcry_aes_aesni_ocb_crypt (c, outbuf, inbuf, nblocks, encrypt);
-      burn_depth = 0;
+      return _gcry_aes_aesni_ocb_crypt (c, outbuf, inbuf, nblocks, encrypt);
     }
 #endif /*USE_AESNI*/
 #ifdef USE_SSSE3
   else if (ctx->use_ssse3)
     {
-      _gcry_aes_ssse3_ocb_crypt (c, outbuf, inbuf, nblocks, encrypt);
-      burn_depth = 0;
+      return _gcry_aes_ssse3_ocb_crypt (c, outbuf, inbuf, nblocks, encrypt);
     }
 #endif /*USE_SSSE3*/
 #ifdef USE_ARM_CE
   else if (ctx->use_arm_ce)
     {
-      _gcry_aes_armv8_ce_ocb_crypt (c, outbuf, inbuf, nblocks, encrypt);
-      burn_depth = 0;
+      return _gcry_aes_armv8_ce_ocb_crypt (c, outbuf, inbuf, nblocks, encrypt);
     }
 #endif /*USE_ARM_CE*/
   else if (encrypt)
@@ -1324,6 +1321,9 @@ _gcry_aes_ocb_crypt (gcry_cipher_hd_t c, void *outbuf_arg,
       union { unsigned char x1[16] ATTR_ALIGNED_16; u32 x32[4]; } l_tmp;
       rijndael_cryptfn_t encrypt_fn = ctx->encrypt_fn;
 
+      if (ctx->prefetch_enc_fn)
+        ctx->prefetch_enc_fn();
+
       for ( ;nblocks; nblocks-- )
         {
           u64 i = ++c->u_mode.ocb.data_nblocks;
@@ -1349,6 +1349,11 @@ _gcry_aes_ocb_crypt (gcry_cipher_hd_t c, void *outbuf_arg,
       union { unsigned char x1[16] ATTR_ALIGNED_16; u32 x32[4]; } l_tmp;
       rijndael_cryptfn_t decrypt_fn = ctx->decrypt_fn;
 
+      check_decryption_preparation (ctx);
+
+      if (ctx->prefetch_dec_fn)
+        ctx->prefetch_dec_fn();
+
       for ( ;nblocks; nblocks-- )
         {
           u64 i = ++c->u_mode.ocb.data_nblocks;
@@ -1385,30 +1390,24 @@ _gcry_aes_ocb_auth (gcry_cipher_hd_t c, const void *abuf_arg, size_t nblocks)
   const unsigned char *abuf = abuf_arg;
   unsigned int burn_depth = 0;
 
-  if (ctx->prefetch_enc_fn)
-    ctx->prefetch_enc_fn();
-
   if (0)
     ;
 #ifdef USE_AESNI
   else if (ctx->use_aesni)
     {
-      _gcry_aes_aesni_ocb_auth (c, abuf, nblocks);
-      burn_depth = 0;
+      return _gcry_aes_aesni_ocb_auth (c, abuf, nblocks);
     }
 #endif /*USE_AESNI*/
 #ifdef USE_SSSE3
   else if (ctx->use_ssse3)
     {
-      _gcry_aes_ssse3_ocb_auth (c, abuf, nblocks);
-      burn_depth = 0;
+      return _gcry_aes_ssse3_ocb_auth (c, abuf, nblocks);
     }
 #endif /*USE_SSSE3*/
 #ifdef USE_ARM_CE
   else if (ctx->use_arm_ce)
     {
-      _gcry_aes_armv8_ce_ocb_auth (c, abuf, nblocks);
-      burn_depth = 0;
+      return _gcry_aes_armv8_ce_ocb_auth (c, abuf, nblocks);
     }
 #endif /*USE_ARM_CE*/
   else
@@ -1416,6 +1415,9 @@ _gcry_aes_ocb_auth (gcry_cipher_hd_t c, const void *abuf_arg, size_t nblocks)
       union { unsigned char x1[16] ATTR_ALIGNED_16; u32 x32[4]; } l_tmp;
       rijndael_cryptfn_t encrypt_fn = ctx->encrypt_fn;
 
+      if (ctx->prefetch_enc_fn)
+        ctx->prefetch_enc_fn();
+
       for ( ;nblocks; nblocks-- )
         {
           u64 i = ++c->u_mode.ocb.aad_nblocks;
@@ -1454,41 +1456,41 @@ _gcry_aes_xts_crypt (void *context, unsigned char *tweak,
   rijndael_cryptfn_t crypt_fn;
   u64 tweak_lo, tweak_hi, tweak_next_lo, tweak_next_hi, tmp_lo, tmp_hi, carry;
 
-  if (encrypt)
-    {
-      if (ctx->prefetch_enc_fn)
-        ctx->prefetch_enc_fn();
-
-      crypt_fn = ctx->encrypt_fn;
-    }
-  else
-    {
-      check_decryption_preparation (ctx);
-
-      if (ctx->prefetch_dec_fn)
-        ctx->prefetch_dec_fn();
-
-      crypt_fn = ctx->decrypt_fn;
-    }
-
   if (0)
     ;
 #ifdef USE_AESNI
   else if (ctx->use_aesni)
     {
       _gcry_aes_aesni_xts_crypt (ctx, tweak, outbuf, inbuf, nblocks, encrypt);
-      burn_depth = 0;
+      return;
     }
 #endif /*USE_AESNI*/
 #ifdef USE_ARM_CE
   else if (ctx->use_arm_ce)
     {
       _gcry_aes_armv8_ce_xts_crypt (ctx, tweak, outbuf, inbuf, nblocks, encrypt);
-      burn_depth = 0;
+      return;
     }
 #endif /*USE_ARM_CE*/
   else
     {
+      if (encrypt)
+        {
+          if (ctx->prefetch_enc_fn)
+            ctx->prefetch_enc_fn();
+
+          crypt_fn = ctx->encrypt_fn;
+        }
+      else
+        {
+          check_decryption_preparation (ctx);
+
+          if (ctx->prefetch_dec_fn)
+            ctx->prefetch_dec_fn();
+
+          crypt_fn = ctx->decrypt_fn;
+        }
+
       tweak_next_lo = buf_get_le64 (tweak + 0);
       tweak_next_hi = buf_get_le64 (tweak + 8);