Add x86_64 support for AES-NI
authorJussi Kivilinna <jussi.kivilinna@mbnet.fi>
Fri, 16 Nov 2012 08:44:54 +0000 (10:44 +0200)
committerWerner Koch <wk@gnupg.org>
Wed, 21 Nov 2012 10:52:04 +0000 (11:52 +0100)
* cipher/rijndael.c [ENABLE_AESNI_SUPPORT]: Enable USE_AESNI on x86-64.
(do_setkey) [USE_AESNI_is_disabled_here]: Use %[key] and %[ksch]
directly as registers instead of using temporary register %%esi.
[USE_AESNI] (do_aesni_enc_aligned, do_aesni_dec_aligned, do_aesni_cfb,
do_aesni_ctr, do_aesni_ctr_4): Use %[key] directly as register instead
of using temporary register %%esi.
[USE_AESNI] (do_aesni_cfb, do_aesni_ctr, do_aesni_ctr_4): Change %[key]
from generic "g" type to register "r".
* src/hwfeatures.c (_gcry_detect_hw_features) [__x86_64__]: Do not
clear AES-NI feature flag.
--

AES-NI assembler uses %%esi for key-material pointer register. However
%[key] can be marked as "r" (register) and automatically be 64bit on
x86-64 and be 32bit on i386.

So use %[key] for pointer register instead of %esi and that way make
same AES-NI code work on both x86-64 and i386.

[v2]
 - Add GNU style changelog
 - Fixed do_setkey changes, use %[ksch] for output instead of %[key]
 - Changed [key] assembler arguments from "g" to "r" to force use of
   registers in all cases (when tested v1, "g" did work as indented
   and %[key] mapped to register on i386 and x86-64, but that might
   not happen always).

Signed-off-by: Jussi Kivilinna <jussi.kivilinna@mbnet.fi>
cipher/rijndael.c
src/hwfeatures.c

index b9ee8ad..3418c99 100644 (file)
@@ -75,7 +75,7 @@
    gcc 3.  However, to be on the safe side we require at least gcc 4.  */
 #undef USE_AESNI
 #ifdef ENABLE_AESNI_SUPPORT
-# if defined (__i386__) && SIZEOF_UNSIGNED_LONG == 4 && __GNUC__ >= 4
+# if ((defined (__i386__) && SIZEOF_UNSIGNED_LONG == 4) || defined(__x86_64__)) && __GNUC__ >= 4
 #  define USE_AESNI 1
 # endif
 #endif /* ENABLE_AESNI_SUPPORT */
@@ -297,40 +297,38 @@ do_setkey (RIJNDAEL_context *ctx, const byte *key, const unsigned keylen)
          than using the standard key schedule.  We disable it for
          now and don't put any effort into implementing this for
          AES-192 and AES-256.  */
-      asm volatile ("movl   %[key], %%esi\n\t"
-                    "movdqu (%%esi), %%xmm1\n\t"     /* xmm1 := key   */
-                    "movl   %[ksch], %%esi\n\t"
-                    "movdqa %%xmm1, (%%esi)\n\t"     /* ksch[0] := xmm1  */
+      asm volatile ("movdqu (%[key]), %%xmm1\n\t"     /* xmm1 := key   */
+                    "movdqa %%xmm1, (%[ksch])\n\t"     /* ksch[0] := xmm1  */
                     "aeskeygenassist $0x01, %%xmm1, %%xmm2\n\t"
                     "call .Lexpand128_%=\n\t"
-                    "movdqa %%xmm1, 0x10(%%esi)\n\t" /* ksch[1] := xmm1  */
+                    "movdqa %%xmm1, 0x10(%[ksch])\n\t" /* ksch[1] := xmm1  */
                     "aeskeygenassist $0x02, %%xmm1, %%xmm2\n\t"
                     "call .Lexpand128_%=\n\t"
-                    "movdqa %%xmm1, 0x20(%%esi)\n\t" /* ksch[2] := xmm1  */
+                    "movdqa %%xmm1, 0x20(%[ksch])\n\t" /* ksch[2] := xmm1  */
                     "aeskeygenassist $0x04, %%xmm1, %%xmm2\n\t"
                     "call .Lexpand128_%=\n\t"
-                    "movdqa %%xmm1, 0x30(%%esi)\n\t" /* ksch[3] := xmm1  */
+                    "movdqa %%xmm1, 0x30(%[ksch])\n\t" /* ksch[3] := xmm1  */
                     "aeskeygenassist $0x08, %%xmm1, %%xmm2\n\t"
                     "call .Lexpand128_%=\n\t"
-                    "movdqa %%xmm1, 0x40(%%esi)\n\t" /* ksch[4] := xmm1  */
+                    "movdqa %%xmm1, 0x40(%[ksch])\n\t" /* ksch[4] := xmm1  */
                     "aeskeygenassist $0x10, %%xmm1, %%xmm2\n\t"
                     "call .Lexpand128_%=\n\t"
-                    "movdqa %%xmm1, 0x50(%%esi)\n\t" /* ksch[5] := xmm1  */
+                    "movdqa %%xmm1, 0x50(%[ksch])\n\t" /* ksch[5] := xmm1  */
                     "aeskeygenassist $0x20, %%xmm1, %%xmm2\n\t"
                     "call .Lexpand128_%=\n\t"
-                    "movdqa %%xmm1, 0x60(%%esi)\n\t" /* ksch[6] := xmm1  */
+                    "movdqa %%xmm1, 0x60(%[ksch])\n\t" /* ksch[6] := xmm1  */
                     "aeskeygenassist $0x40, %%xmm1, %%xmm2\n\t"
                     "call .Lexpand128_%=\n\t"
-                    "movdqa %%xmm1, 0x70(%%esi)\n\t" /* ksch[7] := xmm1  */
+                    "movdqa %%xmm1, 0x70(%[ksch])\n\t" /* ksch[7] := xmm1  */
                     "aeskeygenassist $0x80, %%xmm1, %%xmm2\n\t"
                     "call .Lexpand128_%=\n\t"
-                    "movdqa %%xmm1, 0x80(%%esi)\n\t" /* ksch[8] := xmm1  */
+                    "movdqa %%xmm1, 0x80(%[ksch])\n\t" /* ksch[8] := xmm1  */
                     "aeskeygenassist $0x1b, %%xmm1, %%xmm2\n\t"
                     "call .Lexpand128_%=\n\t"
-                    "movdqa %%xmm1, 0x90(%%esi)\n\t" /* ksch[9] := xmm1  */
+                    "movdqa %%xmm1, 0x90(%[ksch])\n\t" /* ksch[9] := xmm1  */
                     "aeskeygenassist $0x36, %%xmm1, %%xmm2\n\t"
                     "call .Lexpand128_%=\n\t"
-                    "movdqa %%xmm1, 0xa0(%%esi)\n\t" /* ksch[10] := xmm1  */
+                    "movdqa %%xmm1, 0xa0(%[ksch])\n\t" /* ksch[10] := xmm1  */
                     "jmp .Lleave%=\n"
 
                     ".Lexpand128_%=:\n\t"
@@ -350,8 +348,8 @@ do_setkey (RIJNDAEL_context *ctx, const byte *key, const unsigned keylen)
                     "pxor %%xmm2, %%xmm2\n\t"
                     "pxor %%xmm3, %%xmm3\n"
                     :
-                    : [key] "g" (key), [ksch] "g" (ctx->keyschenc)
-                    : "%esi", "cc", "memory" );
+                    : [key] "r" (key), [ksch] "r" (ctx->keyschenc)
+                    : "cc", "memory" );
     }
 #endif /*USE_AESNI*/
   else
@@ -722,40 +720,39 @@ do_aesni_enc_aligned (const RIJNDAEL_context *ctx,
      aligned but that is a special case.  We should better implement
      CFB direct in asm.  */
   asm volatile ("movdqu %[src], %%xmm0\n\t"     /* xmm0 := *a     */
-                "movl   %[key], %%esi\n\t"      /* esi  := keyschenc */
-                "movdqa (%%esi), %%xmm1\n\t"    /* xmm1 := key[0] */
+                "movdqa (%[key]), %%xmm1\n\t"    /* xmm1 := key[0] */
                 "pxor   %%xmm1, %%xmm0\n\t"     /* xmm0 ^= key[0] */
-                "movdqa 0x10(%%esi), %%xmm1\n\t"
+                "movdqa 0x10(%[key]), %%xmm1\n\t"
                 aesenc_xmm1_xmm0
-                "movdqa 0x20(%%esi), %%xmm1\n\t"
+                "movdqa 0x20(%[key]), %%xmm1\n\t"
                 aesenc_xmm1_xmm0
-                "movdqa 0x30(%%esi), %%xmm1\n\t"
+                "movdqa 0x30(%[key]), %%xmm1\n\t"
                 aesenc_xmm1_xmm0
-                "movdqa 0x40(%%esi), %%xmm1\n\t"
+                "movdqa 0x40(%[key]), %%xmm1\n\t"
                 aesenc_xmm1_xmm0
-                "movdqa 0x50(%%esi), %%xmm1\n\t"
+                "movdqa 0x50(%[key]), %%xmm1\n\t"
                 aesenc_xmm1_xmm0
-                "movdqa 0x60(%%esi), %%xmm1\n\t"
+                "movdqa 0x60(%[key]), %%xmm1\n\t"
                 aesenc_xmm1_xmm0
-                "movdqa 0x70(%%esi), %%xmm1\n\t"
+                "movdqa 0x70(%[key]), %%xmm1\n\t"
                 aesenc_xmm1_xmm0
-                "movdqa 0x80(%%esi), %%xmm1\n\t"
+                "movdqa 0x80(%[key]), %%xmm1\n\t"
                 aesenc_xmm1_xmm0
-                "movdqa 0x90(%%esi), %%xmm1\n\t"
+                "movdqa 0x90(%[key]), %%xmm1\n\t"
                 aesenc_xmm1_xmm0
-                "movdqa 0xa0(%%esi), %%xmm1\n\t"
+                "movdqa 0xa0(%[key]), %%xmm1\n\t"
                 "cmp $10, %[rounds]\n\t"
                 "jz .Lenclast%=\n\t"
                 aesenc_xmm1_xmm0
-                "movdqa 0xb0(%%esi), %%xmm1\n\t"
+                "movdqa 0xb0(%[key]), %%xmm1\n\t"
                 aesenc_xmm1_xmm0
-                "movdqa 0xc0(%%esi), %%xmm1\n\t"
+                "movdqa 0xc0(%[key]), %%xmm1\n\t"
                 "cmp $12, %[rounds]\n\t"
                 "jz .Lenclast%=\n\t"
                 aesenc_xmm1_xmm0
-                "movdqa 0xd0(%%esi), %%xmm1\n\t"
+                "movdqa 0xd0(%[key]), %%xmm1\n\t"
                 aesenc_xmm1_xmm0
-                "movdqa 0xe0(%%esi), %%xmm1\n"
+                "movdqa 0xe0(%[key]), %%xmm1\n"
 
                 ".Lenclast%=:\n\t"
                 aesenclast_xmm1_xmm0
@@ -764,7 +761,7 @@ do_aesni_enc_aligned (const RIJNDAEL_context *ctx,
                 : [src] "m" (*a),
                   [key] "r" (ctx->keyschenc),
                   [rounds] "r" (ctx->rounds)
-                : "%esi", "cc", "memory");
+                : "cc", "memory");
 #undef aesenc_xmm1_xmm0
 #undef aesenclast_xmm1_xmm0
 }
@@ -777,40 +774,39 @@ do_aesni_dec_aligned (const RIJNDAEL_context *ctx,
 #define aesdec_xmm1_xmm0      ".byte 0x66, 0x0f, 0x38, 0xde, 0xc1\n\t"
 #define aesdeclast_xmm1_xmm0  ".byte 0x66, 0x0f, 0x38, 0xdf, 0xc1\n\t"
   asm volatile ("movdqu %[src], %%xmm0\n\t"     /* xmm0 := *a     */
-                "movl   %[key], %%esi\n\t"
-                "movdqa (%%esi), %%xmm1\n\t"
+                "movdqa (%[key]), %%xmm1\n\t"
                 "pxor   %%xmm1, %%xmm0\n\t"     /* xmm0 ^= key[0] */
-                "movdqa 0x10(%%esi), %%xmm1\n\t"
+                "movdqa 0x10(%[key]), %%xmm1\n\t"
                 aesdec_xmm1_xmm0
-                "movdqa 0x20(%%esi), %%xmm1\n\t"
+                "movdqa 0x20(%[key]), %%xmm1\n\t"
                 aesdec_xmm1_xmm0
-                "movdqa 0x30(%%esi), %%xmm1\n\t"
+                "movdqa 0x30(%[key]), %%xmm1\n\t"
                 aesdec_xmm1_xmm0
-                "movdqa 0x40(%%esi), %%xmm1\n\t"
+                "movdqa 0x40(%[key]), %%xmm1\n\t"
                 aesdec_xmm1_xmm0
-                "movdqa 0x50(%%esi), %%xmm1\n\t"
+                "movdqa 0x50(%[key]), %%xmm1\n\t"
                 aesdec_xmm1_xmm0
-                "movdqa 0x60(%%esi), %%xmm1\n\t"
+                "movdqa 0x60(%[key]), %%xmm1\n\t"
                 aesdec_xmm1_xmm0
-                "movdqa 0x70(%%esi), %%xmm1\n\t"
+                "movdqa 0x70(%[key]), %%xmm1\n\t"
                 aesdec_xmm1_xmm0
-                "movdqa 0x80(%%esi), %%xmm1\n\t"
+                "movdqa 0x80(%[key]), %%xmm1\n\t"
                 aesdec_xmm1_xmm0
-                "movdqa 0x90(%%esi), %%xmm1\n\t"
+                "movdqa 0x90(%[key]), %%xmm1\n\t"
                 aesdec_xmm1_xmm0
-                "movdqa 0xa0(%%esi), %%xmm1\n\t"
+                "movdqa 0xa0(%[key]), %%xmm1\n\t"
                 "cmp $10, %[rounds]\n\t"
                 "jz .Ldeclast%=\n\t"
                 aesdec_xmm1_xmm0
-                "movdqa 0xb0(%%esi), %%xmm1\n\t"
+                "movdqa 0xb0(%[key]), %%xmm1\n\t"
                 aesdec_xmm1_xmm0
-                "movdqa 0xc0(%%esi), %%xmm1\n\t"
+                "movdqa 0xc0(%[key]), %%xmm1\n\t"
                 "cmp $12, %[rounds]\n\t"
                 "jz .Ldeclast%=\n\t"
                 aesdec_xmm1_xmm0
-                "movdqa 0xd0(%%esi), %%xmm1\n\t"
+                "movdqa 0xd0(%[key]), %%xmm1\n\t"
                 aesdec_xmm1_xmm0
-                "movdqa 0xe0(%%esi), %%xmm1\n"
+                "movdqa 0xe0(%[key]), %%xmm1\n"
 
                 ".Ldeclast%=:\n\t"
                 aesdeclast_xmm1_xmm0
@@ -819,7 +815,7 @@ do_aesni_dec_aligned (const RIJNDAEL_context *ctx,
                 : [src] "m" (*a),
                   [key] "r" (ctx->keyschdec),
                   [rounds] "r" (ctx->rounds)
-                : "%esi", "cc", "memory");
+                : "cc", "memory");
 #undef aesdec_xmm1_xmm0
 #undef aesdeclast_xmm1_xmm0
 }
@@ -836,40 +832,39 @@ do_aesni_cfb (const RIJNDAEL_context *ctx, int decrypt_flag,
 #define aesenc_xmm1_xmm0      ".byte 0x66, 0x0f, 0x38, 0xdc, 0xc1\n\t"
 #define aesenclast_xmm1_xmm0  ".byte 0x66, 0x0f, 0x38, 0xdd, 0xc1\n\t"
   asm volatile ("movdqa %[iv], %%xmm0\n\t"      /* xmm0 := IV     */
-                "movl   %[key], %%esi\n\t"      /* esi  := keyschenc */
-                "movdqa (%%esi), %%xmm1\n\t"    /* xmm1 := key[0] */
+                "movdqa (%[key]), %%xmm1\n\t"    /* xmm1 := key[0] */
                 "pxor   %%xmm1, %%xmm0\n\t"     /* xmm0 ^= key[0] */
-                "movdqa 0x10(%%esi), %%xmm1\n\t"
+                "movdqa 0x10(%[key]), %%xmm1\n\t"
                 aesenc_xmm1_xmm0
-                "movdqa 0x20(%%esi), %%xmm1\n\t"
+                "movdqa 0x20(%[key]), %%xmm1\n\t"
                 aesenc_xmm1_xmm0
-                "movdqa 0x30(%%esi), %%xmm1\n\t"
+                "movdqa 0x30(%[key]), %%xmm1\n\t"
                 aesenc_xmm1_xmm0
-                "movdqa 0x40(%%esi), %%xmm1\n\t"
+                "movdqa 0x40(%[key]), %%xmm1\n\t"
                 aesenc_xmm1_xmm0
-                "movdqa 0x50(%%esi), %%xmm1\n\t"
+                "movdqa 0x50(%[key]), %%xmm1\n\t"
                 aesenc_xmm1_xmm0
-                "movdqa 0x60(%%esi), %%xmm1\n\t"
+                "movdqa 0x60(%[key]), %%xmm1\n\t"
                 aesenc_xmm1_xmm0
-                "movdqa 0x70(%%esi), %%xmm1\n\t"
+                "movdqa 0x70(%[key]), %%xmm1\n\t"
                 aesenc_xmm1_xmm0
-                "movdqa 0x80(%%esi), %%xmm1\n\t"
+                "movdqa 0x80(%[key]), %%xmm1\n\t"
                 aesenc_xmm1_xmm0
-                "movdqa 0x90(%%esi), %%xmm1\n\t"
+                "movdqa 0x90(%[key]), %%xmm1\n\t"
                 aesenc_xmm1_xmm0
-                "movdqa 0xa0(%%esi), %%xmm1\n\t"
+                "movdqa 0xa0(%[key]), %%xmm1\n\t"
                 "cmp $10, %[rounds]\n\t"
                 "jz .Lenclast%=\n\t"
                 aesenc_xmm1_xmm0
-                "movdqa 0xb0(%%esi), %%xmm1\n\t"
+                "movdqa 0xb0(%[key]), %%xmm1\n\t"
                 aesenc_xmm1_xmm0
-                "movdqa 0xc0(%%esi), %%xmm1\n\t"
+                "movdqa 0xc0(%[key]), %%xmm1\n\t"
                 "cmp $12, %[rounds]\n\t"
                 "jz .Lenclast%=\n\t"
                 aesenc_xmm1_xmm0
-                "movdqa 0xd0(%%esi), %%xmm1\n\t"
+                "movdqa 0xd0(%[key]), %%xmm1\n\t"
                 aesenc_xmm1_xmm0
-                "movdqa 0xe0(%%esi), %%xmm1\n"
+                "movdqa 0xe0(%[key]), %%xmm1\n"
 
                 ".Lenclast%=:\n\t"
                 aesenclast_xmm1_xmm0
@@ -886,10 +881,10 @@ do_aesni_cfb (const RIJNDAEL_context *ctx, int decrypt_flag,
                 "movdqu %%xmm0, %[dst]\n"        /* Store output.   */
                 : [iv] "+m" (*iv), [dst] "=m" (*b)
                 : [src] "m" (*a),
-                  [key] "g" (ctx->keyschenc),
+                  [key] "r" (ctx->keyschenc),
                   [rounds] "g" (ctx->rounds),
                   [decrypt] "m" (decrypt_flag)
-                : "%esi", "cc", "memory");
+                : "cc", "memory");
 #undef aesenc_xmm1_xmm0
 #undef aesenclast_xmm1_xmm0
 }
@@ -915,40 +910,39 @@ do_aesni_ctr (const RIJNDAEL_context *ctx,
                 "pshufb %[mask], %%xmm2\n\t"
                 "movdqa %%xmm2, %[ctr]\n"       /* Update CTR.         */
 
-                "movl   %[key], %%esi\n\t"      /* esi  := keyschenc */
-                "movdqa (%%esi), %%xmm1\n\t"    /* xmm1 := key[0]    */
+                "movdqa (%[key]), %%xmm1\n\t"    /* xmm1 := key[0]    */
                 "pxor   %%xmm1, %%xmm0\n\t"     /* xmm0 ^= key[0]    */
-                "movdqa 0x10(%%esi), %%xmm1\n\t"
+                "movdqa 0x10(%[key]), %%xmm1\n\t"
                 aesenc_xmm1_xmm0
-                "movdqa 0x20(%%esi), %%xmm1\n\t"
+                "movdqa 0x20(%[key]), %%xmm1\n\t"
                 aesenc_xmm1_xmm0
-                "movdqa 0x30(%%esi), %%xmm1\n\t"
+                "movdqa 0x30(%[key]), %%xmm1\n\t"
                 aesenc_xmm1_xmm0
-                "movdqa 0x40(%%esi), %%xmm1\n\t"
+                "movdqa 0x40(%[key]), %%xmm1\n\t"
                 aesenc_xmm1_xmm0
-                "movdqa 0x50(%%esi), %%xmm1\n\t"
+                "movdqa 0x50(%[key]), %%xmm1\n\t"
                 aesenc_xmm1_xmm0
-                "movdqa 0x60(%%esi), %%xmm1\n\t"
+                "movdqa 0x60(%[key]), %%xmm1\n\t"
                 aesenc_xmm1_xmm0
-                "movdqa 0x70(%%esi), %%xmm1\n\t"
+                "movdqa 0x70(%[key]), %%xmm1\n\t"
                 aesenc_xmm1_xmm0
-                "movdqa 0x80(%%esi), %%xmm1\n\t"
+                "movdqa 0x80(%[key]), %%xmm1\n\t"
                 aesenc_xmm1_xmm0
-                "movdqa 0x90(%%esi), %%xmm1\n\t"
+                "movdqa 0x90(%[key]), %%xmm1\n\t"
                 aesenc_xmm1_xmm0
-                "movdqa 0xa0(%%esi), %%xmm1\n\t"
+                "movdqa 0xa0(%[key]), %%xmm1\n\t"
                 "cmp $10, %[rounds]\n\t"
                 "jz .Lenclast%=\n\t"
                 aesenc_xmm1_xmm0
-                "movdqa 0xb0(%%esi), %%xmm1\n\t"
+                "movdqa 0xb0(%[key]), %%xmm1\n\t"
                 aesenc_xmm1_xmm0
-                "movdqa 0xc0(%%esi), %%xmm1\n\t"
+                "movdqa 0xc0(%[key]), %%xmm1\n\t"
                 "cmp $12, %[rounds]\n\t"
                 "jz .Lenclast%=\n\t"
                 aesenc_xmm1_xmm0
-                "movdqa 0xd0(%%esi), %%xmm1\n\t"
+                "movdqa 0xd0(%[key]), %%xmm1\n\t"
                 aesenc_xmm1_xmm0
-                "movdqa 0xe0(%%esi), %%xmm1\n"
+                "movdqa 0xe0(%[key]), %%xmm1\n"
 
                 ".Lenclast%=:\n\t"
                 aesenclast_xmm1_xmm0
@@ -958,7 +952,7 @@ do_aesni_ctr (const RIJNDAEL_context *ctx,
 
                 : [ctr] "+m" (*ctr), [dst] "=m" (*b)
                 : [src] "m" (*a),
-                  [key] "g" (ctx->keyschenc),
+                  [key] "r" (ctx->keyschenc),
                   [rounds] "g" (ctx->rounds),
                   [mask] "m" (*be_mask)
                 : "%esi", "cc", "memory");
@@ -1012,82 +1006,81 @@ do_aesni_ctr_4 (const RIJNDAEL_context *ctx,
                 "pshufb %[mask], %%xmm5\n\t"    /* xmm5 := be(xmm5) */
                 "movdqa %%xmm5, %[ctr]\n"       /* Update CTR.      */
 
-                "movl   %[key], %%esi\n\t"      /* esi  := keyschenc */
-                "movdqa (%%esi), %%xmm1\n\t"    /* xmm1 := key[0]    */
+                "movdqa (%[key]), %%xmm1\n\t"    /* xmm1 := key[0]    */
                 "pxor   %%xmm1, %%xmm0\n\t"     /* xmm0 ^= key[0]    */
                 "pxor   %%xmm1, %%xmm2\n\t"     /* xmm2 ^= key[0]    */
                 "pxor   %%xmm1, %%xmm3\n\t"     /* xmm3 ^= key[0]    */
                 "pxor   %%xmm1, %%xmm4\n\t"     /* xmm4 ^= key[0]    */
-                "movdqa 0x10(%%esi), %%xmm1\n\t"
+                "movdqa 0x10(%[key]), %%xmm1\n\t"
                 aesenc_xmm1_xmm0
                 aesenc_xmm1_xmm2
                 aesenc_xmm1_xmm3
                 aesenc_xmm1_xmm4
-                "movdqa 0x20(%%esi), %%xmm1\n\t"
+                "movdqa 0x20(%[key]), %%xmm1\n\t"
                 aesenc_xmm1_xmm0
                 aesenc_xmm1_xmm2
                 aesenc_xmm1_xmm3
                 aesenc_xmm1_xmm4
-                "movdqa 0x30(%%esi), %%xmm1\n\t"
+                "movdqa 0x30(%[key]), %%xmm1\n\t"
                 aesenc_xmm1_xmm0
                 aesenc_xmm1_xmm2
                 aesenc_xmm1_xmm3
                 aesenc_xmm1_xmm4
-                "movdqa 0x40(%%esi), %%xmm1\n\t"
+                "movdqa 0x40(%[key]), %%xmm1\n\t"
                 aesenc_xmm1_xmm0
                 aesenc_xmm1_xmm2
                 aesenc_xmm1_xmm3
                 aesenc_xmm1_xmm4
-                "movdqa 0x50(%%esi), %%xmm1\n\t"
+                "movdqa 0x50(%[key]), %%xmm1\n\t"
                 aesenc_xmm1_xmm0
                 aesenc_xmm1_xmm2
                 aesenc_xmm1_xmm3
                 aesenc_xmm1_xmm4
-                "movdqa 0x60(%%esi), %%xmm1\n\t"
+                "movdqa 0x60(%[key]), %%xmm1\n\t"
                 aesenc_xmm1_xmm0
                 aesenc_xmm1_xmm2
                 aesenc_xmm1_xmm3
                 aesenc_xmm1_xmm4
-                "movdqa 0x70(%%esi), %%xmm1\n\t"
+                "movdqa 0x70(%[key]), %%xmm1\n\t"
                 aesenc_xmm1_xmm0
                 aesenc_xmm1_xmm2
                 aesenc_xmm1_xmm3
                 aesenc_xmm1_xmm4
-                "movdqa 0x80(%%esi), %%xmm1\n\t"
+                "movdqa 0x80(%[key]), %%xmm1\n\t"
                 aesenc_xmm1_xmm0
                 aesenc_xmm1_xmm2
                 aesenc_xmm1_xmm3
                 aesenc_xmm1_xmm4
-                "movdqa 0x90(%%esi), %%xmm1\n\t"
+                "movdqa 0x90(%[key]), %%xmm1\n\t"
                 aesenc_xmm1_xmm0
                 aesenc_xmm1_xmm2
                 aesenc_xmm1_xmm3
                 aesenc_xmm1_xmm4
-                "movdqa 0xa0(%%esi), %%xmm1\n\t"
+                "movdqa 0xa0(%[key]), %%xmm1\n\t"
                 "cmp $10, %[rounds]\n\t"
                 "jz .Lenclast%=\n\t"
                 aesenc_xmm1_xmm0
                 aesenc_xmm1_xmm2
                 aesenc_xmm1_xmm3
                 aesenc_xmm1_xmm4
-                "movdqa 0xb0(%%esi), %%xmm1\n\t"
+                "movdqa 0xb0(%[key]), %%xmm1\n\t"
                 aesenc_xmm1_xmm0
                 aesenc_xmm1_xmm2
                 aesenc_xmm1_xmm3
                 aesenc_xmm1_xmm4
-                "movdqa 0xc0(%%esi), %%xmm1\n\t"
+                "movdqa 0xc0(%[key]), %%xmm1\n\t"
                 "cmp $12, %[rounds]\n\t"
                 "jz .Lenclast%=\n\t"
                 aesenc_xmm1_xmm0
                 aesenc_xmm1_xmm2
                 aesenc_xmm1_xmm3
                 aesenc_xmm1_xmm4
-                "movdqa 0xd0(%%esi), %%xmm1\n\t"
+                "movdqa 0xd0(%[key]), %%xmm1\n\t"
                 aesenc_xmm1_xmm0
                 aesenc_xmm1_xmm2
                 aesenc_xmm1_xmm3
                 aesenc_xmm1_xmm4
-                "movdqa 0xe0(%%esi), %%xmm1\n"
+                "movdqa 0xe0(%[key]), %%xmm1\n"
 
                 ".Lenclast%=:\n\t"
                 aesenclast_xmm1_xmm0
@@ -1113,7 +1106,7 @@ do_aesni_ctr_4 (const RIJNDAEL_context *ctx,
 
                 : [ctr] "+m" (*ctr), [dst] "=m" (*b)
                 : [src] "m" (*a),
-                  [key] "g" (ctx->keyschenc),
+                  [key] "r" (ctx->keyschenc),
                   [rounds] "g" (ctx->rounds),
                   [mask] "m" (*be_mask)
                 : "%esi", "cc", "memory");
index 606f3e7..89d7685 100644 (file)
@@ -292,9 +292,6 @@ _gcry_detect_hw_features (unsigned int disabled_features)
 # ifdef __GNUC__
   {
     detect_x86_64_gnuc ();
-    /* We don't have AESNI support for 64 bit yet.  Thus we should not
-       announce it.  */
-    hw_features &= ~HWF_INTEL_AESNI;
   }
 # endif
 #endif