Enable AMD64 SHA512 implementations for WIN64
[libgcrypt.git] / cipher / sha512.c
index 5a6af80..029f8f0 100644 (file)
 
 /* USE_SSSE3 indicates whether to compile with Intel SSSE3 code. */
 #undef USE_SSSE3
-#if defined(__x86_64__) && defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) && \
-    defined(HAVE_GCC_INLINE_ASM_SSSE3) && \
-    defined(HAVE_INTEL_SYNTAX_PLATFORM_AS)
+#if defined(__x86_64__) && defined(HAVE_GCC_INLINE_ASM_SSSE3) && \
+    defined(HAVE_INTEL_SYNTAX_PLATFORM_AS) && \
+    (defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) || \
+     defined(HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS))
 # define USE_SSSE3 1
 #endif
 
 
 /* USE_AVX indicates whether to compile with Intel AVX code. */
 #undef USE_AVX
-#if defined(__x86_64__) && defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) && \
-    defined(HAVE_GCC_INLINE_ASM_AVX) && \
-    defined(HAVE_INTEL_SYNTAX_PLATFORM_AS)
+#if defined(__x86_64__) && defined(HAVE_GCC_INLINE_ASM_AVX) && \
+    defined(HAVE_INTEL_SYNTAX_PLATFORM_AS) && \
+    (defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) || \
+     defined(HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS))
 # define USE_AVX 1
 #endif
 
 
 /* USE_AVX2 indicates whether to compile with Intel AVX2/rorx code. */
 #undef USE_AVX2
-#if defined(__x86_64__) && defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) && \
-    defined(HAVE_GCC_INLINE_ASM_AVX2) && defined(HAVE_GCC_INLINE_ASM_BMI2) && \
-    defined(HAVE_INTEL_SYNTAX_PLATFORM_AS)
+#if defined(__x86_64__) && defined(HAVE_GCC_INLINE_ASM_AVX2) && \
+    defined(HAVE_GCC_INLINE_ASM_BMI2) && \
+    defined(HAVE_INTEL_SYNTAX_PLATFORM_AS) && \
+    (defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) || \
+     defined(HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS))
 # define USE_AVX2 1
 #endif
 
@@ -543,6 +547,21 @@ transform_blk (SHA512_STATE *hd, const unsigned char *data)
 }
 
 
+/* AMD64 assembly implementations use SystemV ABI, ABI conversion and additional
+ * stack to store XMM6-XMM15 needed on Win64. */
+#undef ASM_FUNC_ABI
+#undef ASM_EXTRA_STACK
+#if defined(USE_SSSE3) || defined(USE_AVX) || defined(USE_AVX2)
+# ifdef HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS
+#  define ASM_FUNC_ABI __attribute__((sysv_abi))
+#  define ASM_EXTRA_STACK (10 * 16)
+# else
+#  define ASM_FUNC_ABI
+#  define ASM_EXTRA_STACK 0
+# endif
+#endif
+
+
 #ifdef USE_ARM_NEON_ASM
 void _gcry_sha512_transform_armv7_neon (SHA512_STATE *hd,
                                        const unsigned char *data,
@@ -551,17 +570,20 @@ void _gcry_sha512_transform_armv7_neon (SHA512_STATE *hd,
 
 #ifdef USE_SSSE3
 unsigned int _gcry_sha512_transform_amd64_ssse3(const void *input_data,
-                                               void *state, size_t num_blks);
+                                                void *state,
+                                                size_t num_blks) ASM_FUNC_ABI;
 #endif
 
 #ifdef USE_AVX
 unsigned int _gcry_sha512_transform_amd64_avx(const void *input_data,
-                                             void *state, size_t num_blks);
+                                              void *state,
+                                              size_t num_blks) ASM_FUNC_ABI;
 #endif
 
 #ifdef USE_AVX2
 unsigned int _gcry_sha512_transform_amd64_avx2(const void *input_data,
-                                              void *state, size_t num_blks);
+                                               void *state,
+                                               size_t num_blks) ASM_FUNC_ABI;
 #endif
 
 
@@ -574,19 +596,19 @@ transform (void *context, const unsigned char *data, size_t nblks)
 #ifdef USE_AVX2
   if (ctx->use_avx2)
     return _gcry_sha512_transform_amd64_avx2 (data, &ctx->state, nblks)
-           + 4 * sizeof(void*);
+           + 4 * sizeof(void*) + ASM_EXTRA_STACK;
 #endif
 
 #ifdef USE_AVX
   if (ctx->use_avx)
     return _gcry_sha512_transform_amd64_avx (data, &ctx->state, nblks)
-           + 4 * sizeof(void*);
+           + 4 * sizeof(void*) + ASM_EXTRA_STACK;
 #endif
 
 #ifdef USE_SSSE3
   if (ctx->use_ssse3)
     return _gcry_sha512_transform_amd64_ssse3 (data, &ctx->state, nblks)
-           + 4 * sizeof(void*);
+           + 4 * sizeof(void*) + ASM_EXTRA_STACK;
 #endif
 
 #ifdef USE_ARM_NEON_ASM
@@ -607,6 +629,14 @@ transform (void *context, const unsigned char *data, size_t nblks)
     }
   while (--nblks);
 
+#ifdef ASM_EXTRA_STACK
+  /* 'transform_blk' is typically inlined and XMM6-XMM15 are stored at
+   *  the prologue of this function. Therefore need to add ASM_EXTRA_STACK to
+   *  here too.
+   */
+  burn += ASM_EXTRA_STACK;
+#endif
+
   return burn;
 }