Enable AMD64 SHA1 implementations for WIN64
authorJussi Kivilinna <jussi.kivilinna@iki.fi>
Sat, 2 May 2015 09:57:07 +0000 (12:57 +0300)
committerJussi Kivilinna <jussi.kivilinna@iki.fi>
Sat, 2 May 2015 09:57:07 +0000 (12:57 +0300)
* cipher/sha1-avx-amd64.S: Enable when
HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS defined.
(ELF): New macro to mask lines with ELF specific commands.
* cipher/sha1-avx-bmi2-amd64.S: Ditto.
* cipher/sha1-ssse3-amd64.S: Ditto.
* cipher/sha1.c (USE_SSSE3, USE_AVX, USE_BMI2): Enable
when HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS defined.
[USE_SSSE3 || USE_AVX || USE_BMI2] (ASM_FUNC_ABI)
(ASM_EXTRA_STACK): New.
(_gcry_sha1_transform_amd64_ssse3, _gcry_sha1_transform_amd64_avx)
(_gcry_sha1_transform_amd64_avx_bmi2): Add ASM_FUNC_ABI to
prototypes.
(transform): Add ASM_EXTRA_STACK to stack burn value.
--

Signed-off-by: Jussi Kivilinna <jussi.kivilinna@iki.fi>
cipher/sha1-avx-amd64.S
cipher/sha1-avx-bmi2-amd64.S
cipher/sha1-ssse3-amd64.S
cipher/sha1.c

index 6bec389..062a45b 100644 (file)
@@ -29,7 +29,8 @@
 #ifdef __x86_64__
 #include <config.h>
 
-#if defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) && \
+#if (defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) || \
+     defined(HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS)) && \
     defined(HAVE_GCC_INLINE_ASM_BMI2) && \
     defined(HAVE_GCC_INLINE_ASM_AVX2) && defined(USE_SHA1)
 
 #endif
 
 
+#ifdef HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS
+# define ELF(...) __VA_ARGS__
+#else
+# define ELF(...) /*_*/
+#endif
+
+
 /* Context structure */
 
 #define state_h0 0
  */
 .text
 .globl _gcry_sha1_transform_amd64_avx
-.type _gcry_sha1_transform_amd64_avx,@function
+ELF(.type _gcry_sha1_transform_amd64_avx,@function)
 .align 16
 _gcry_sha1_transform_amd64_avx:
   /* input:
index cd5af5b..22bcbb3 100644 (file)
@@ -29,7 +29,8 @@
 #ifdef __x86_64__
 #include <config.h>
 
-#if defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) && \
+#if (defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) || \
+     defined(HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS)) && \
     defined(HAVE_GCC_INLINE_ASM_BMI2) && \
     defined(HAVE_GCC_INLINE_ASM_AVX) && defined(USE_SHA1)
 
 #endif
 
 
+#ifdef HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS
+# define ELF(...) __VA_ARGS__
+#else
+# define ELF(...) /*_*/
+#endif
+
+
 /* Context structure */
 
 #define state_h0 0
  */
 .text
 .globl _gcry_sha1_transform_amd64_avx_bmi2
-.type _gcry_sha1_transform_amd64_avx_bmi2,@function
+ELF(.type _gcry_sha1_transform_amd64_avx_bmi2,@function)
 .align 16
 _gcry_sha1_transform_amd64_avx_bmi2:
   /* input:
index 226988d..98a19e6 100644 (file)
@@ -29,7 +29,8 @@
 #ifdef __x86_64__
 #include <config.h>
 
-#if defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) && \
+#if (defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) || \
+     defined(HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS)) && \
     defined(HAVE_GCC_INLINE_ASM_SSSE3) && defined(USE_SHA1)
 
 #ifdef __PIC__
 #endif
 
 
+#ifdef HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS
+# define ELF(...) __VA_ARGS__
+#else
+# define ELF(...) /*_*/
+#endif
+
+
 /* Context structure */
 
 #define state_h0 0
  */
 .text
 .globl _gcry_sha1_transform_amd64_ssse3
-.type _gcry_sha1_transform_amd64_ssse3,@function
+ELF(.type _gcry_sha1_transform_amd64_ssse3,@function)
 .align 16
 _gcry_sha1_transform_amd64_ssse3:
   /* input:
index 6ccf0e8..eb42883 100644 (file)
 
 /* USE_SSSE3 indicates whether to compile with Intel SSSE3 code. */
 #undef USE_SSSE3
-#if defined(__x86_64__) && defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) && \
-    defined(HAVE_GCC_INLINE_ASM_SSSE3)
+#if defined(__x86_64__) && defined(HAVE_GCC_INLINE_ASM_SSSE3) && \
+    (defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) || \
+     defined(HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS))
 # define USE_SSSE3 1
 #endif
 
 /* USE_AVX indicates whether to compile with Intel AVX code. */
 #undef USE_AVX
-#if defined(__x86_64__) && defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) && \
-    defined(HAVE_GCC_INLINE_ASM_AVX)
+#if defined(__x86_64__) && defined(HAVE_GCC_INLINE_ASM_AVX) && \
+    (defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) || \
+     defined(HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS))
 # define USE_AVX 1
 #endif
 
 /* USE_BMI2 indicates whether to compile with Intel AVX/BMI2 code. */
 #undef USE_BMI2
-#if defined(__x86_64__) && defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) && \
-    defined(HAVE_GCC_INLINE_ASM_AVX) && defined(HAVE_GCC_INLINE_ASM_BMI2)
+#if defined(__x86_64__) && defined(HAVE_GCC_INLINE_ASM_AVX) && \
+    defined(HAVE_GCC_INLINE_ASM_BMI2) && \
+    (defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) || \
+     defined(HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS))
 # define USE_BMI2 1
 #endif
 
@@ -287,22 +291,37 @@ transform_blk (void *ctx, const unsigned char *data)
 }
 
 
+/* Assembly implementations use SystemV ABI, ABI conversion and additional
+ * stack to store XMM6-XMM15 needed on Win64. */
+#undef ASM_FUNC_ABI
+#undef ASM_EXTRA_STACK
+#if defined(USE_SSSE3) || defined(USE_AVX) || defined(USE_BMI2)
+# ifdef HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS
+#  define ASM_FUNC_ABI __attribute__((sysv_abi))
+#  define ASM_EXTRA_STACK (10 * 16)
+# else
+#  define ASM_FUNC_ABI
+#  define ASM_EXTRA_STACK 0
+# endif
+#endif
+
+
 #ifdef USE_SSSE3
 unsigned int
 _gcry_sha1_transform_amd64_ssse3 (void *state, const unsigned char *data,
-                                  size_t nblks);
+                                  size_t nblks) ASM_FUNC_ABI;
 #endif
 
 #ifdef USE_AVX
 unsigned int
 _gcry_sha1_transform_amd64_avx (void *state, const unsigned char *data,
-                                 size_t nblks);
+                                 size_t nblks) ASM_FUNC_ABI;
 #endif
 
 #ifdef USE_BMI2
 unsigned int
 _gcry_sha1_transform_amd64_avx_bmi2 (void *state, const unsigned char *data,
-                                     size_t nblks);
+                                     size_t nblks) ASM_FUNC_ABI;
 #endif
 
 
@@ -315,17 +334,17 @@ transform (void *ctx, const unsigned char *data, size_t nblks)
 #ifdef USE_BMI2
   if (hd->use_bmi2)
     return _gcry_sha1_transform_amd64_avx_bmi2 (&hd->h0, data, nblks)
-           + 4 * sizeof(void*);
+           + 4 * sizeof(void*) + ASM_EXTRA_STACK;
 #endif
 #ifdef USE_AVX
   if (hd->use_avx)
     return _gcry_sha1_transform_amd64_avx (&hd->h0, data, nblks)
-           + 4 * sizeof(void*);
+           + 4 * sizeof(void*) + ASM_EXTRA_STACK;
 #endif
 #ifdef USE_SSSE3
   if (hd->use_ssse3)
     return _gcry_sha1_transform_amd64_ssse3 (&hd->h0, data, nblks)
-           + 4 * sizeof(void*);
+           + 4 * sizeof(void*) + ASM_EXTRA_STACK;
 #endif
 #ifdef USE_NEON
   if (hd->use_neon)
@@ -340,6 +359,14 @@ transform (void *ctx, const unsigned char *data, size_t nblks)
     }
   while (--nblks);
 
+#ifdef ASM_EXTRA_STACK
+  /* 'transform_blk' is typically inlined and XMM6-XMM15 are stored at
+   *  the prologue of this function. Therefore need to add ASM_EXTRA_STACK to
+   *  here too.
+   */
+  burn += ASM_EXTRA_STACK;
+#endif
+
   return burn;
 }