Enable AMD64 Blowfish implementation on WIN64
authorJussi Kivilinna <jussi.kivilinna@iki.fi>
Sun, 3 May 2015 14:28:40 +0000 (17:28 +0300)
committerJussi Kivilinna <jussi.kivilinna@iki.fi>
Thu, 14 May 2015 10:41:05 +0000 (13:41 +0300)
* cipher/blowfish-amd64.S: Enable when
HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS defined.
(ELF): New macro to mask lines with ELF specific commands.
* cipher/blowfish.c (USE_AMD64_ASM): Enable when
HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS defined.
[HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS] (call_sysv_fn): New.
(do_encrypt, do_encrypt_block, do_decrypt_block)
[HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS]: Call assembly
function through 'call_sysv_fn'.
(blowfish_amd64_ctr_enc, blowfish_amd64_cbc_dec)
(blowfish_amd64_cfb_dec): New wrapper functions for bulk
assembly functions.
..

Signed-off-by: Jussi Kivilinna <jussi.kivilinna@iki.fi>
cipher/blowfish-amd64.S
cipher/blowfish.c

index 87b676f..21b63fc 100644 (file)
 
 #ifdef __x86_64
 #include <config.h>
-#if defined(USE_BLOWFISH) && defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS)
+#if defined(USE_BLOWFISH) && \
+    (defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) || \
+     defined(HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS))
+
+#ifdef HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS
+# define ELF(...) __VA_ARGS__
+#else
+# define ELF(...) /*_*/
+#endif
 
 .text
 
        movq RX0,               (RIO);
 
 .align 8
-.type   __blowfish_enc_blk1,@function;
+ELF(.type   __blowfish_enc_blk1,@function;)
 
 __blowfish_enc_blk1:
        /* input:
@@ -145,11 +153,11 @@ __blowfish_enc_blk1:
        movq %r11, %rbp;
 
        ret;
-.size __blowfish_enc_blk1,.-__blowfish_enc_blk1;
+ELF(.size __blowfish_enc_blk1,.-__blowfish_enc_blk1;)
 
 .align 8
 .globl  _gcry_blowfish_amd64_do_encrypt
-.type   _gcry_blowfish_amd64_do_encrypt,@function;
+ELF(.type   _gcry_blowfish_amd64_do_encrypt,@function;)
 
 _gcry_blowfish_amd64_do_encrypt:
        /* input:
@@ -171,11 +179,11 @@ _gcry_blowfish_amd64_do_encrypt:
        movl RX0d, (RX2);
 
        ret;
-.size _gcry_blowfish_amd64_do_encrypt,.-_gcry_blowfish_amd64_do_encrypt;
+ELF(.size _gcry_blowfish_amd64_do_encrypt,.-_gcry_blowfish_amd64_do_encrypt;)
 
 .align 8
 .globl  _gcry_blowfish_amd64_encrypt_block
-.type   _gcry_blowfish_amd64_encrypt_block,@function;
+ELF(.type   _gcry_blowfish_amd64_encrypt_block,@function;)
 
 _gcry_blowfish_amd64_encrypt_block:
        /* input:
@@ -195,11 +203,11 @@ _gcry_blowfish_amd64_encrypt_block:
        write_block();
 
        ret;
-.size _gcry_blowfish_amd64_encrypt_block,.-_gcry_blowfish_amd64_encrypt_block;
+ELF(.size _gcry_blowfish_amd64_encrypt_block,.-_gcry_blowfish_amd64_encrypt_block;)
 
 .align 8
 .globl  _gcry_blowfish_amd64_decrypt_block
-.type   _gcry_blowfish_amd64_decrypt_block,@function;
+ELF(.type   _gcry_blowfish_amd64_decrypt_block,@function;)
 
 _gcry_blowfish_amd64_decrypt_block:
        /* input:
@@ -231,7 +239,7 @@ _gcry_blowfish_amd64_decrypt_block:
        movq %r11, %rbp;
 
        ret;
-.size _gcry_blowfish_amd64_decrypt_block,.-_gcry_blowfish_amd64_decrypt_block;
+ELF(.size _gcry_blowfish_amd64_decrypt_block,.-_gcry_blowfish_amd64_decrypt_block;)
 
 /**********************************************************************
   4-way blowfish, four blocks parallel
@@ -319,7 +327,7 @@ _gcry_blowfish_amd64_decrypt_block:
        bswapq                  RX3;
 
 .align 8
-.type   __blowfish_enc_blk4,@function;
+ELF(.type   __blowfish_enc_blk4,@function;)
 
 __blowfish_enc_blk4:
        /* input:
@@ -343,10 +351,10 @@ __blowfish_enc_blk4:
        outbswap_block4();
 
        ret;
-.size __blowfish_enc_blk4,.-__blowfish_enc_blk4;
+ELF(.size __blowfish_enc_blk4,.-__blowfish_enc_blk4;)
 
 .align 8
-.type   __blowfish_dec_blk4,@function;
+ELF(.type   __blowfish_dec_blk4,@function;)
 
 __blowfish_dec_blk4:
        /* input:
@@ -372,11 +380,11 @@ __blowfish_dec_blk4:
        outbswap_block4();
 
        ret;
-.size __blowfish_dec_blk4,.-__blowfish_dec_blk4;
+ELF(.size __blowfish_dec_blk4,.-__blowfish_dec_blk4;)
 
 .align 8
 .globl  _gcry_blowfish_amd64_ctr_enc
-.type   _gcry_blowfish_amd64_ctr_enc,@function;
+ELF(.type   _gcry_blowfish_amd64_ctr_enc,@function;)
 _gcry_blowfish_amd64_ctr_enc:
        /* input:
         *      %rdi: ctx, CTX
@@ -429,11 +437,11 @@ _gcry_blowfish_amd64_ctr_enc:
        popq %rbp;
 
        ret;
-.size _gcry_blowfish_amd64_ctr_enc,.-_gcry_blowfish_amd64_ctr_enc;
+ELF(.size _gcry_blowfish_amd64_ctr_enc,.-_gcry_blowfish_amd64_ctr_enc;)
 
 .align 8
 .globl  _gcry_blowfish_amd64_cbc_dec
-.type   _gcry_blowfish_amd64_cbc_dec,@function;
+ELF(.type   _gcry_blowfish_amd64_cbc_dec,@function;)
 _gcry_blowfish_amd64_cbc_dec:
        /* input:
         *      %rdi: ctx, CTX
@@ -477,11 +485,11 @@ _gcry_blowfish_amd64_cbc_dec:
        popq %rbp;
 
        ret;
-.size _gcry_blowfish_amd64_cbc_dec,.-_gcry_blowfish_amd64_cbc_dec;
+ELF(.size _gcry_blowfish_amd64_cbc_dec,.-_gcry_blowfish_amd64_cbc_dec;)
 
 .align 8
 .globl  _gcry_blowfish_amd64_cfb_dec
-.type   _gcry_blowfish_amd64_cfb_dec,@function;
+ELF(.type   _gcry_blowfish_amd64_cfb_dec,@function;)
 _gcry_blowfish_amd64_cfb_dec:
        /* input:
         *      %rdi: ctx, CTX
@@ -527,7 +535,7 @@ _gcry_blowfish_amd64_cfb_dec:
        popq %rbx;
        popq %rbp;
        ret;
-.size _gcry_blowfish_amd64_cfb_dec,.-_gcry_blowfish_amd64_cfb_dec;
+ELF(.size _gcry_blowfish_amd64_cfb_dec,.-_gcry_blowfish_amd64_cfb_dec;)
 
 #endif /*defined(USE_BLOWFISH)*/
 #endif /*__x86_64*/
index ae470d8..a3fc26c 100644 (file)
@@ -45,7 +45,8 @@
 
 /* USE_AMD64_ASM indicates whether to use AMD64 assembly code. */
 #undef USE_AMD64_ASM
-#if defined(__x86_64__) && defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) && \
+#if defined(__x86_64__) && (defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) || \
+    defined(HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS)) && \
     (BLOWFISH_ROUNDS == 16)
 # define USE_AMD64_ASM 1
 #endif
@@ -280,22 +281,87 @@ extern void _gcry_blowfish_amd64_cbc_dec(BLOWFISH_context *ctx, byte *out,
 extern void _gcry_blowfish_amd64_cfb_dec(BLOWFISH_context *ctx, byte *out,
                                         const byte *in, byte *iv);
 
+#ifdef HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS
+static inline void
+call_sysv_fn (const void *fn, const void *arg1, const void *arg2,
+              const void *arg3, const void *arg4)
+{
+  /* Call SystemV ABI function without storing non-volatile XMM registers,
+   * as target function does not use vector instruction sets. */
+  asm volatile ("callq *%0\n\t"
+                : "+a" (fn),
+                  "+D" (arg1),
+                  "+S" (arg2),
+                  "+d" (arg3),
+                  "+c" (arg4)
+                :
+                : "cc", "memory", "r8", "r9", "r10", "r11");
+}
+#endif
+
 static void
 do_encrypt ( BLOWFISH_context *bc, u32 *ret_xl, u32 *ret_xr )
 {
+#ifdef HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS
+  call_sysv_fn (_gcry_blowfish_amd64_do_encrypt, bc, ret_xl, ret_xr, NULL);
+#else
   _gcry_blowfish_amd64_do_encrypt (bc, ret_xl, ret_xr);
+#endif
 }
 
 static void
 do_encrypt_block (BLOWFISH_context *context, byte *outbuf, const byte *inbuf)
 {
+#ifdef HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS
+  call_sysv_fn (_gcry_blowfish_amd64_encrypt_block, context, outbuf, inbuf,
+                NULL);
+#else
   _gcry_blowfish_amd64_encrypt_block (context, outbuf, inbuf);
+#endif
 }
 
 static void
 do_decrypt_block (BLOWFISH_context *context, byte *outbuf, const byte *inbuf)
 {
+#ifdef HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS
+  call_sysv_fn (_gcry_blowfish_amd64_decrypt_block, context, outbuf, inbuf,
+                NULL);
+#else
   _gcry_blowfish_amd64_decrypt_block (context, outbuf, inbuf);
+#endif
+}
+
+static inline void
+blowfish_amd64_ctr_enc(BLOWFISH_context *ctx, byte *out, const byte *in,
+                       byte *ctr)
+{
+#ifdef HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS
+  call_sysv_fn (_gcry_blowfish_amd64_ctr_enc, ctx, out, in, ctr);
+#else
+  _gcry_blowfish_amd64_ctr_enc(ctx, out, in, ctr);
+#endif
+}
+
+static inline void
+blowfish_amd64_cbc_dec(BLOWFISH_context *ctx, byte *out, const byte *in,
+                       byte *iv)
+{
+#ifdef HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS
+  call_sysv_fn (_gcry_blowfish_amd64_cbc_dec, ctx, out, in, iv);
+#else
+  _gcry_blowfish_amd64_cbc_dec(ctx, out, in, iv);
+#endif
+}
+
+static inline void
+blowfish_amd64_cfb_dec(BLOWFISH_context *ctx, byte *out, const byte *in,
+                       byte *iv)
+{
+#ifdef HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS
+  call_sysv_fn (_gcry_blowfish_amd64_cfb_dec, ctx, out, in, iv);
+#else
+  _gcry_blowfish_amd64_cfb_dec(ctx, out, in, iv);
+#endif
 }
 
 static unsigned int
@@ -605,7 +671,7 @@ _gcry_blowfish_ctr_enc(void *context, unsigned char *ctr, void *outbuf_arg,
     /* Process data in 4 block chunks. */
     while (nblocks >= 4)
       {
-        _gcry_blowfish_amd64_ctr_enc(ctx, outbuf, inbuf, ctr);
+        blowfish_amd64_ctr_enc(ctx, outbuf, inbuf, ctr);
 
         nblocks -= 4;
         outbuf += 4 * BLOWFISH_BLOCKSIZE;
@@ -674,7 +740,7 @@ _gcry_blowfish_cbc_dec(void *context, unsigned char *iv, void *outbuf_arg,
     /* Process data in 4 block chunks. */
     while (nblocks >= 4)
       {
-        _gcry_blowfish_amd64_cbc_dec(ctx, outbuf, inbuf, iv);
+        blowfish_amd64_cbc_dec(ctx, outbuf, inbuf, iv);
 
         nblocks -= 4;
         outbuf += 4 * BLOWFISH_BLOCKSIZE;
@@ -734,7 +800,7 @@ _gcry_blowfish_cfb_dec(void *context, unsigned char *iv, void *outbuf_arg,
     /* Process data in 4 block chunks. */
     while (nblocks >= 4)
       {
-        _gcry_blowfish_amd64_cfb_dec(ctx, outbuf, inbuf, iv);
+        blowfish_amd64_cfb_dec(ctx, outbuf, inbuf, iv);
 
         nblocks -= 4;
         outbuf += 4 * BLOWFISH_BLOCKSIZE;