Enable AMD64 Salsa20 implementation on WIN64
authorJussi Kivilinna <jussi.kivilinna@iki.fi>
Thu, 14 May 2015 09:37:21 +0000 (12:37 +0300)
committerJussi Kivilinna <jussi.kivilinna@iki.fi>
Thu, 14 May 2015 11:10:28 +0000 (14:10 +0300)
* cipher/salsa20-amd64.S: Enable when
HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS defined.
(ELF): New macro to mask lines with ELF specific commands.
* cipher/salsa20.c (USE_AMD64): Enable when
HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS defined.
[USE_AMD64] (ASM_FUNC_ABI, ASM_EXTRA_STACK): New.
(_gcry_salsa20_amd64_keysetup, _gcry_salsa20_amd64_ivsetup)
(_gcry_salsa20_amd64_encrypt_blocks): Add ASM_FUNC_ABI.
[USE_AMD64] (salsa20_core): Add ASM_EXTRA_STACK.
(salsa20_do_encrypt_stream) [USE_AMD64]: Add ASM_EXTRA_STACK.
--

Signed-off-by: Jussi Kivilinna <jussi.kivilinna@iki.fi>
cipher/salsa20-amd64.S
cipher/salsa20.c

index 7046dbb..470c32a 100644 (file)
 
 #ifdef __x86_64
 #include <config.h>
-#if defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) && defined(USE_SALSA20)
+#if (defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) || \
+    defined(HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS)) && defined(USE_SALSA20)
+
+#ifdef HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS
+# define ELF(...) __VA_ARGS__
+#else
+# define ELF(...) /*_*/
+#endif
 
 .text
 
 .align 8
 .globl _gcry_salsa20_amd64_keysetup
-.type  _gcry_salsa20_amd64_keysetup,@function;
+ELF(.type  _gcry_salsa20_amd64_keysetup,@function;)
 _gcry_salsa20_amd64_keysetup:
        movl   0(%rsi),%r8d
        movl   4(%rsi),%r9d
@@ -83,7 +90,7 @@ _gcry_salsa20_amd64_keysetup:
 
 .align 8
 .globl _gcry_salsa20_amd64_ivsetup
-.type  _gcry_salsa20_amd64_ivsetup,@function;
+ELF(.type  _gcry_salsa20_amd64_ivsetup,@function;)
 _gcry_salsa20_amd64_ivsetup:
        movl   0(%rsi),%r8d
        movl   4(%rsi),%esi
@@ -97,7 +104,7 @@ _gcry_salsa20_amd64_ivsetup:
 
 .align 8
 .globl _gcry_salsa20_amd64_encrypt_blocks
-.type  _gcry_salsa20_amd64_encrypt_blocks,@function;
+ELF(.type  _gcry_salsa20_amd64_encrypt_blocks,@function;)
 _gcry_salsa20_amd64_encrypt_blocks:
        /*
         * Modifications to original implementation:
@@ -918,7 +925,7 @@ _gcry_salsa20_amd64_encrypt_blocks:
        add  $64,%rdi
        add  $64,%rsi
        jmp .L_bytes_are_64_128_or_192
-.size _gcry_salsa20_amd64_encrypt_blocks,.-_gcry_salsa20_amd64_encrypt_blocks;
+ELF(.size _gcry_salsa20_amd64_encrypt_blocks,.-_gcry_salsa20_amd64_encrypt_blocks;)
 
 #endif /*defined(USE_SALSA20)*/
 #endif /*__x86_64*/
index d75fe51..fa3d23b 100644 (file)
@@ -43,7 +43,8 @@
 
 /* USE_AMD64 indicates whether to compile with AMD64 code. */
 #undef USE_AMD64
-#if defined(__x86_64__) && defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS)
+#if defined(__x86_64__) && (defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) || \
+    defined(HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS))
 # define USE_AMD64 1
 #endif
 
@@ -118,12 +119,25 @@ static const char *selftest (void);
 
 
 #ifdef USE_AMD64
+
+/* Assembly implementations use SystemV ABI, ABI conversion and additional
+ * stack to store XMM6-XMM15 needed on Win64. */
+#ifdef HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS
+# define ASM_FUNC_ABI __attribute__((sysv_abi))
+# define ASM_EXTRA_STACK (10 * 16)
+#else
+# define ASM_FUNC_ABI
+# define ASM_EXTRA_STACK 0
+#endif
+
 /* AMD64 assembly implementations of Salsa20. */
-void _gcry_salsa20_amd64_keysetup(u32 *ctxinput, const void *key, int keybits);
-void _gcry_salsa20_amd64_ivsetup(u32 *ctxinput, const void *iv);
+void _gcry_salsa20_amd64_keysetup(u32 *ctxinput, const void *key, int keybits)
+                                 ASM_FUNC_ABI;
+void _gcry_salsa20_amd64_ivsetup(u32 *ctxinput, const void *iv)
+                                ASM_FUNC_ABI;
 unsigned int
 _gcry_salsa20_amd64_encrypt_blocks(u32 *ctxinput, const void *src, void *dst,
-                                   size_t len, int rounds);
+                                   size_t len, int rounds) ASM_FUNC_ABI;
 
 static void
 salsa20_keysetup(SALSA20_context_t *ctx, const byte *key, int keylen)
@@ -141,7 +155,8 @@ static unsigned int
 salsa20_core (u32 *dst, SALSA20_context_t *ctx, unsigned int rounds)
 {
   memset(dst, 0, SALSA20_BLOCK_SIZE);
-  return _gcry_salsa20_amd64_encrypt_blocks(ctx->input, dst, dst, 1, rounds);
+  return _gcry_salsa20_amd64_encrypt_blocks(ctx->input, dst, dst, 1, rounds)
+         + ASM_EXTRA_STACK;
 }
 
 #else /* USE_AMD64 */
@@ -418,6 +433,7 @@ salsa20_do_encrypt_stream (SALSA20_context_t *ctx,
       size_t nblocks = length / SALSA20_BLOCK_SIZE;
       burn = _gcry_salsa20_amd64_encrypt_blocks(ctx->input, inbuf, outbuf,
                                                 nblocks, rounds);
+      burn += ASM_EXTRA_STACK;
       length -= SALSA20_BLOCK_SIZE * nblocks;
       outbuf += SALSA20_BLOCK_SIZE * nblocks;
       inbuf  += SALSA20_BLOCK_SIZE * nblocks;