aarch64/assembly: only use the lower 32 bit of an int parameters
authorJussi Kivilinna <jussi.kivilinna@iki.fi>
Sat, 24 Mar 2018 15:49:16 +0000 (17:49 +0200)
committerJussi Kivilinna <jussi.kivilinna@iki.fi>
Wed, 28 Mar 2018 17:40:37 +0000 (20:40 +0300)
* cipher/camellia-aarch64.S (_gcry_camellia_arm_encrypt_block)
(__gcry_camellia_arm_decrypt_block): Make comment section about input
registers match usage.
* cipher/rijndael-armv8-aarch64-ce.S (_gcry_aes_ocb_auth_armv8_ce): Use
'w12' and 'w7' instead of 'x12' and 'x7'.
(_gcry_aes_xts_enc_armv8_ce, _gcry_aes_xts_dec_armv8_ce): Fix function
prototype in comments.
* mpi/aarch64/mpih-add1.S: Use 32-bit registers for 32-bit mpi_size_t
parameters.
* mpi/aarch64/mpih-mul1.S: Ditto.
* mpi/aarch64/mpih-mul2.S: Ditto.
* mpi/aarch64/mpih-mul3.S: Ditto.
* mpi/aarch64/mpih-sub1.S: Ditto.
--

Signed-off-by: Jussi Kivilinna <jussi.kivilinna@iki.fi>
cipher/camellia-aarch64.S
cipher/rijndael-armv8-aarch64-ce.S
mpi/aarch64/mpih-add1.S
mpi/aarch64/mpih-mul1.S
mpi/aarch64/mpih-mul2.S
mpi/aarch64/mpih-mul3.S
mpi/aarch64/mpih-sub1.S

index c3cc463..b0e9a03 100644 (file)
@@ -206,7 +206,7 @@ _gcry_camellia_arm_encrypt_block:
         *      x0: keytable
         *      x1: dst
         *      x2: src
-        *      x3: keybitlen
+        *      w3: keybitlen
         */
 
        adr RTAB1,  _gcry_camellia_arm_tables;
@@ -252,7 +252,7 @@ _gcry_camellia_arm_decrypt_block:
         *      x0: keytable
         *      x1: dst
         *      x2: src
-        *      x3: keybitlen
+        *      w3: keybitlen
         */
 
        adr RTAB1,  _gcry_camellia_arm_tables;
index 5859557..f0012c2 100644 (file)
@@ -1157,8 +1157,8 @@ _gcry_aes_ocb_auth_armv8_ce:
    *    w6: nrounds => w7
    *    w7: blkn => w12
    */
-  mov x12, x7
-  mov x7, x6
+  mov w12, w7
+  mov w7, w6
   mov x6, x5
   mov x5, x4
   mov x4, x3
@@ -1280,7 +1280,9 @@ ELF(.size _gcry_aes_ocb_auth_armv8_ce,.-_gcry_aes_ocb_auth_armv8_ce;)
  * void _gcry_aes_xts_enc_armv8_ce (const void *keysched,
  *                                  unsigned char *outbuf,
  *                                  const unsigned char *inbuf,
- *                                  unsigned char *tweak, unsigned int nrounds);
+ *                                  unsigned char *tweak,
+ *                                  size_t nblocks,
+ *                                  unsigned int nrounds);
  */
 
 .align 3
@@ -1417,7 +1419,9 @@ ELF(.size _gcry_aes_xts_enc_armv8_ce,.-_gcry_aes_xts_enc_armv8_ce;)
  * void _gcry_aes_xts_dec_armv8_ce (const void *keysched,
  *                                  unsigned char *outbuf,
  *                                  const unsigned char *inbuf,
- *                                  unsigned char *tweak, unsigned int nrounds);
+ *                                  unsigned char *tweak,
+ *                                  size_t nblocks,
+ *                                  unsigned int nrounds);
  */
 
 .align 3
index 4ead1c2..3370320 100644 (file)
@@ -29,7 +29,7 @@
  *  _gcry_mpih_add_n( mpi_ptr_t res_ptr,       x0
  *                mpi_ptr_t s1_ptr,            x1
  *                mpi_ptr_t s2_ptr,            x2
- *                mpi_size_t size)             x3
+ *                mpi_size_t size)             w3
  */
 
 .text
 .globl _gcry_mpih_add_n
 ELF(.type  _gcry_mpih_add_n,%function)
 _gcry_mpih_add_n:
-       and     x5, x3, #3;
+       and     w5, w3, #3;
        adds    xzr, xzr, xzr; /* clear carry flag */
 
-       cbz     x5, .Large_loop;
+       cbz     w5, .Large_loop;
 
 .Loop:
        ldr     x4, [x1], #8;
-       sub     x3, x3, #1;
+       sub     w3, w3, #1;
        ldr     x11, [x2], #8;
-       and     x5, x3, #3;
+       and     w5, w3, #3;
        adcs    x4, x4, x11;
        str     x4, [x0], #8;
-       cbz     x3, .Lend;
-       cbnz    x5, .Loop;
+       cbz     w3, .Lend;
+       cbnz    w5, .Loop;
 
 .Large_loop:
        ldp     x4, x6, [x1], #16;
        ldp     x5, x7, [x2], #16;
        ldp     x8, x10, [x1], #16;
        ldp     x9, x11, [x2], #16;
-       sub     x3, x3, #4;
+       sub     w3, w3, #4;
        adcs    x4, x4, x5;
        adcs    x6, x6, x7;
        adcs    x8, x8, x9;
        adcs    x10, x10, x11;
        stp     x4, x6, [x0], #16;
        stp     x8, x10, [x0], #16;
-       cbnz    x3, .Large_loop;
+       cbnz    w3, .Large_loop;
 
 .Lend:
        adc     x0, xzr, xzr;
index 8a86269..8830845 100644 (file)
@@ -28,7 +28,7 @@
  * mpi_limb_t
  * _gcry_mpih_mul_1( mpi_ptr_t res_ptr,                x0
  *               mpi_ptr_t s1_ptr,             x1
- *               mpi_size_t s1_size,           x2
+ *               mpi_size_t s1_size,           w2
  *               mpi_limb_t s2_limb)           x3
  */
 
 .globl _gcry_mpih_mul_1
 ELF(.type  _gcry_mpih_mul_1,%function)
 _gcry_mpih_mul_1:
-       and     x5, x2, #3;
+       and     w5, w2, #3;
        mov     x4, xzr;
 
-       cbz     x5, .Large_loop;
+       cbz     w5, .Large_loop;
 
 .Loop:
        ldr     x5, [x1], #8;
-       sub     x2, x2, #1;
+       sub     w2, w2, #1;
        mul     x9, x5, x3;
        umulh   x10, x5, x3;
-       and     x5, x2, #3;
+       and     w5, w2, #3;
        adds    x4, x4, x9;
        str     x4, [x0], #8;
        adc     x4, x10, xzr;
 
-       cbz     x2, .Lend;
-       cbnz    x5, .Loop;
+       cbz     w2, .Lend;
+       cbnz    w5, .Loop;
 
 .Large_loop:
        ldp     x5, x6, [x1];
-       sub     x2, x2, #4;
+       sub     w2, w2, #4;
 
        mul     x9, x5, x3;
        ldp     x7, x8, [x1, #16];
@@ -89,7 +89,7 @@ _gcry_mpih_mul_1:
        str     x4, [x0], #8;
        adc     x4, x16, xzr;
 
-       cbnz    x2, .Large_loop;
+       cbnz    w2, .Large_loop;
 
 .Lend:
        mov     x0, x4;
index c7c08e5..5d73699 100644 (file)
@@ -28,7 +28,7 @@
  * mpi_limb_t
  * _gcry_mpih_addmul_1( mpi_ptr_t res_ptr,     x0
  *                  mpi_ptr_t s1_ptr,          x1
- *                  mpi_size_t s1_size,        x2
+ *                  mpi_size_t s1_size,        w2
  *                  mpi_limb_t s2_limb)        x3
  */
 
 .globl _gcry_mpih_addmul_1
 ELF(.type  _gcry_mpih_addmul_1,%function)
 _gcry_mpih_addmul_1:
-       and     x5, x2, #3;
+       and     w5, w2, #3;
        mov     x6, xzr;
        mov     x7, xzr;
 
-       cbz     x5, .Large_loop;
+       cbz     w5, .Large_loop;
 
 .Loop:
        ldr     x5, [x1], #8;
@@ -49,21 +49,21 @@ _gcry_mpih_addmul_1:
        mul     x12, x5, x3;
        ldr     x4, [x0];
        umulh   x13, x5, x3;
-       sub     x2, x2, #1;
+       sub     w2, w2, #1;
 
        adds    x12, x12, x4;
-       and     x5, x2, #3;
+       and     w5, w2, #3;
        adc     x13, x13, x7;
        adds    x12, x12, x6;
        str     x12, [x0], #8;
        adc     x6, x7, x13;
 
-       cbz     x2, .Lend;
-       cbnz    x5, .Loop;
+       cbz     w2, .Lend;
+       cbnz    w5, .Loop;
 
 .Large_loop:
        ldp     x5, x9, [x1], #16;
-       sub     x2, x2, #4;
+       sub     w2, w2, #4;
        ldp     x4, x8, [x0];
 
        mul     x12, x5, x3;
@@ -101,7 +101,7 @@ _gcry_mpih_addmul_1:
        str     x14, [x0], #8;
        adc     x6, x7, x15;
 
-       cbnz    x2, .Large_loop;
+       cbnz    w2, .Large_loop;
 
 .Lend:
        mov     x0, x6;
index ccc961e..f785e5e 100644 (file)
@@ -28,7 +28,7 @@
  * mpi_limb_t
  * _gcry_mpih_submul_1( mpi_ptr_t res_ptr,     x0
  *                  mpi_ptr_t s1_ptr,          x1
- *                  mpi_size_t s1_size,        x2
+ *                  mpi_size_t s1_size,        w2
  *                  mpi_limb_t s2_limb)        x3
  */
 
@@ -37,9 +37,9 @@
 .globl _gcry_mpih_submul_1
 ELF(.type  _gcry_mpih_submul_1,%function)
 _gcry_mpih_submul_1:
-       and     x5, x2, #3;
+       and     w5, w2, #3;
        mov     x7, xzr;
-       cbz     x5, .Large_loop;
+       cbz     w5, .Large_loop;
 
        subs    xzr, xzr, xzr;
 
@@ -47,26 +47,26 @@ _gcry_mpih_submul_1:
        ldr     x4, [x1], #8;
        cinc    x7, x7, cc;
        ldr     x5, [x0];
-       sub     x2, x2, #1;
+       sub     w2, w2, #1;
 
        mul     x6, x4, x3;
        subs    x5, x5, x7;
        umulh   x4, x4, x3;
-       and     x10, x2, #3;
+       and     w10, w2, #3;
 
        cset    x7, cc;
        subs    x5, x5, x6;
        add     x7, x7, x4;
        str     x5, [x0], #8;
 
-       cbz     x2, .Loop_end;
-       cbnz    x10, .Loop;
+       cbz     w2, .Loop_end;
+       cbnz    w10, .Loop;
 
        cinc    x7, x7, cc;
 
 .Large_loop:
        ldp     x4, x8, [x1], #16;
-       sub     x2, x2, #4;
+       sub     w2, w2, #4;
        ldp     x5, x9, [x0];
 
        mul     x6, x4, x3;
@@ -111,7 +111,7 @@ _gcry_mpih_submul_1:
        str     x9, [x0], #8;
        cinc    x7, x7, cc;
 
-       cbnz    x2, .Large_loop;
+       cbnz    w2, .Large_loop;
 
        mov     x0, x7;
        ret;
index 4a66373..45a7b04 100644 (file)
@@ -29,7 +29,7 @@
  *  _gcry_mpih_sub_n( mpi_ptr_t res_ptr,       x0
  *                mpi_ptr_t s1_ptr,            x1
  *                mpi_ptr_t s2_ptr,            x2
- *                mpi_size_t size)             x3
+ *                mpi_size_t size)             w3
  */
 
 .text
 .globl _gcry_mpih_sub_n
 ELF(.type  _gcry_mpih_sub_n,%function)
 _gcry_mpih_sub_n:
-       and     x5, x3, #3;
+       and     w5, w3, #3;
        subs    xzr, xzr, xzr; /* prepare carry flag for sub */
 
-       cbz     x5, .Large_loop;
+       cbz     w5, .Large_loop;
 
 .Loop:
        ldr     x4, [x1], #8;
-       sub     x3, x3, #1;
+       sub     w3, w3, #1;
        ldr     x11, [x2], #8;
-       and     x5, x3, #3;
+       and     w5, w3, #3;
        sbcs    x4, x4, x11;
        str     x4, [x0], #8;
-       cbz     x3, .Lend;
-       cbnz    x5, .Loop;
+       cbz     w3, .Lend;
+       cbnz    w5, .Loop;
 
 .Large_loop:
        ldp     x4, x6, [x1], #16;
        ldp     x5, x7, [x2], #16;
        ldp     x8, x10, [x1], #16;
        ldp     x9, x11, [x2], #16;
-       sub     x3, x3, #4;
+       sub     w3, w3, #4;
        sbcs    x4, x4, x5;
        sbcs    x6, x6, x7;
        sbcs    x8, x8, x9;
        sbcs    x10, x10, x11;
        stp     x4, x6, [x0], #16;
        stp     x8, x10, [x0], #16;
-       cbnz    x3, .Large_loop;
+       cbnz    w3, .Large_loop;
 
 .Lend:
        cset    x0, cc;