Fix strict-aliasing problems in rijndael.c
authorWerner Koch <wk@gnupg.org>
Fri, 18 Feb 2011 13:30:13 +0000 (14:30 +0100)
committerWerner Koch <wk@gnupg.org>
Fri, 18 Feb 2011 13:30:13 +0000 (14:30 +0100)
We used to use -fno-strict-aliasing but only if configured in
maintainer-mode.  Thus with gcc-4.4 we could run into problems.  The
fix is to define a new type with the may_alias attribute and use this
for the casting stuff in do_encrypt_aligned and do_decrypt_aligned.

ChangeLog
cipher/ChangeLog
cipher/rijndael.c
configure.ac

index f192d20..01c1213 100644 (file)
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,7 @@
+2011-02-18  Werner Koch  <wk@g10code.com>
+
+       * configure.ac [GCC]: Remove the use of -fno-strict-aliasing.
+
 2011-02-11  Werner Koch  <wk@g10code.com>
 
        * configure.ac: Add option --disbale-aesni-support.
index 85dd43f..d10ce07 100644 (file)
@@ -1,3 +1,9 @@
+2011-02-18  Werner Koch  <wk@g10code.com>
+
+       * rijndael.c (u32_a_t): New.
+       (do_encrypt_aligned, do_encrypt_aligned): Use the new type to
+       avoid problems with strict aliasing rules.
+
 2011-02-16  Werner Koch  <wk@g10code.com>
 
        * rijndael.c (do_aesni_cfb) [USE_AESNI]: New.
index 4c49847..fb97274 100644 (file)
   typedef int m128i_t __attribute__ ((__vector_size__ (16)));
 #endif /*USE_AESNI*/
 
+/* Define an u32 variant for the sake of gcc 4.4's strict aliasing.  */
+#if __GNUC__ > 4 || ( __GNUC__ == 4 && __GNUC_MINOR__ >= 4 )
+typedef u32           __attribute__ ((__may_alias__)) u32_a_t;
+#else
+typedef u32           u32_a_t;
+#endif
+
+
 static const char *selftest(void);
 
 
@@ -505,57 +513,57 @@ do_encrypt_aligned (const RIJNDAEL_context *ctx,
     byte temp[4][4];
   } u;
 
-  *((u32*)u.temp[0]) = *((u32*)(a   )) ^ *((u32*)rk[0][0]);
-  *((u32*)u.temp[1]) = *((u32*)(a+ 4)) ^ *((u32*)rk[0][1]);
-  *((u32*)u.temp[2]) = *((u32*)(a+ 8)) ^ *((u32*)rk[0][2]);
-  *((u32*)u.temp[3]) = *((u32*)(a+12)) ^ *((u32*)rk[0][3]);
-  *((u32*)(b    ))   = (*((u32*)T1[u.temp[0][0]])
-                        ^ *((u32*)T2[u.temp[1][1]])
-                        ^ *((u32*)T3[u.temp[2][2]])
-                        ^ *((u32*)T4[u.temp[3][3]]));
-  *((u32*)(b + 4))   = (*((u32*)T1[u.temp[1][0]])
-                        ^ *((u32*)T2[u.temp[2][1]])
-                        ^ *((u32*)T3[u.temp[3][2]])
-                        ^ *((u32*)T4[u.temp[0][3]]));
-  *((u32*)(b + 8))   = (*((u32*)T1[u.temp[2][0]])
-                        ^ *((u32*)T2[u.temp[3][1]])
-                        ^ *((u32*)T3[u.temp[0][2]])
-                        ^ *((u32*)T4[u.temp[1][3]]));
-  *((u32*)(b +12))   = (*((u32*)T1[u.temp[3][0]])
-                        ^ *((u32*)T2[u.temp[0][1]])
-                        ^ *((u32*)T3[u.temp[1][2]])
-                        ^ *((u32*)T4[u.temp[2][3]]));
+  *((u32_a_t*)u.temp[0]) = *((u32_a_t*)(a   )) ^ *((u32_a_t*)rk[0][0]);
+  *((u32_a_t*)u.temp[1]) = *((u32_a_t*)(a+ 4)) ^ *((u32_a_t*)rk[0][1]);
+  *((u32_a_t*)u.temp[2]) = *((u32_a_t*)(a+ 8)) ^ *((u32_a_t*)rk[0][2]);
+  *((u32_a_t*)u.temp[3]) = *((u32_a_t*)(a+12)) ^ *((u32_a_t*)rk[0][3]);
+  *((u32_a_t*)(b    ))   = (*((u32_a_t*)T1[u.temp[0][0]])
+                        ^ *((u32_a_t*)T2[u.temp[1][1]])
+                        ^ *((u32_a_t*)T3[u.temp[2][2]])
+                        ^ *((u32_a_t*)T4[u.temp[3][3]]));
+  *((u32_a_t*)(b + 4))   = (*((u32_a_t*)T1[u.temp[1][0]])
+                        ^ *((u32_a_t*)T2[u.temp[2][1]])
+                        ^ *((u32_a_t*)T3[u.temp[3][2]])
+                        ^ *((u32_a_t*)T4[u.temp[0][3]]));
+  *((u32_a_t*)(b + 8))   = (*((u32_a_t*)T1[u.temp[2][0]])
+                        ^ *((u32_a_t*)T2[u.temp[3][1]])
+                        ^ *((u32_a_t*)T3[u.temp[0][2]])
+                        ^ *((u32_a_t*)T4[u.temp[1][3]]));
+  *((u32_a_t*)(b +12))   = (*((u32_a_t*)T1[u.temp[3][0]])
+                        ^ *((u32_a_t*)T2[u.temp[0][1]])
+                        ^ *((u32_a_t*)T3[u.temp[1][2]])
+                        ^ *((u32_a_t*)T4[u.temp[2][3]]));
 
   for (r = 1; r < rounds-1; r++)
     {
-      *((u32*)u.temp[0]) = *((u32*)(b   )) ^ *((u32*)rk[r][0]);
-      *((u32*)u.temp[1]) = *((u32*)(b+ 4)) ^ *((u32*)rk[r][1]);
-      *((u32*)u.temp[2]) = *((u32*)(b+ 8)) ^ *((u32*)rk[r][2]);
-      *((u32*)u.temp[3]) = *((u32*)(b+12)) ^ *((u32*)rk[r][3]);
-
-      *((u32*)(b    ))   = (*((u32*)T1[u.temp[0][0]])
-                            ^ *((u32*)T2[u.temp[1][1]])
-                            ^ *((u32*)T3[u.temp[2][2]])
-                            ^ *((u32*)T4[u.temp[3][3]]));
-      *((u32*)(b + 4))   = (*((u32*)T1[u.temp[1][0]])
-                            ^ *((u32*)T2[u.temp[2][1]])
-                            ^ *((u32*)T3[u.temp[3][2]])
-                            ^ *((u32*)T4[u.temp[0][3]]));
-      *((u32*)(b + 8))   = (*((u32*)T1[u.temp[2][0]])
-                            ^ *((u32*)T2[u.temp[3][1]])
-                            ^ *((u32*)T3[u.temp[0][2]])
-                            ^ *((u32*)T4[u.temp[1][3]]));
-      *((u32*)(b +12))   = (*((u32*)T1[u.temp[3][0]])
-                            ^ *((u32*)T2[u.temp[0][1]])
-                            ^ *((u32*)T3[u.temp[1][2]])
-                            ^ *((u32*)T4[u.temp[2][3]]));
+      *((u32_a_t*)u.temp[0]) = *((u32_a_t*)(b   )) ^ *((u32_a_t*)rk[r][0]);
+      *((u32_a_t*)u.temp[1]) = *((u32_a_t*)(b+ 4)) ^ *((u32_a_t*)rk[r][1]);
+      *((u32_a_t*)u.temp[2]) = *((u32_a_t*)(b+ 8)) ^ *((u32_a_t*)rk[r][2]);
+      *((u32_a_t*)u.temp[3]) = *((u32_a_t*)(b+12)) ^ *((u32_a_t*)rk[r][3]);
+
+      *((u32_a_t*)(b    ))   = (*((u32_a_t*)T1[u.temp[0][0]])
+                            ^ *((u32_a_t*)T2[u.temp[1][1]])
+                            ^ *((u32_a_t*)T3[u.temp[2][2]])
+                            ^ *((u32_a_t*)T4[u.temp[3][3]]));
+      *((u32_a_t*)(b + 4))   = (*((u32_a_t*)T1[u.temp[1][0]])
+                            ^ *((u32_a_t*)T2[u.temp[2][1]])
+                            ^ *((u32_a_t*)T3[u.temp[3][2]])
+                            ^ *((u32_a_t*)T4[u.temp[0][3]]));
+      *((u32_a_t*)(b + 8))   = (*((u32_a_t*)T1[u.temp[2][0]])
+                            ^ *((u32_a_t*)T2[u.temp[3][1]])
+                            ^ *((u32_a_t*)T3[u.temp[0][2]])
+                            ^ *((u32_a_t*)T4[u.temp[1][3]]));
+      *((u32_a_t*)(b +12))   = (*((u32_a_t*)T1[u.temp[3][0]])
+                            ^ *((u32_a_t*)T2[u.temp[0][1]])
+                            ^ *((u32_a_t*)T3[u.temp[1][2]])
+                            ^ *((u32_a_t*)T4[u.temp[2][3]]));
     }
 
   /* Last round is special. */
-  *((u32*)u.temp[0]) = *((u32*)(b   )) ^ *((u32*)rk[rounds-1][0]);
-  *((u32*)u.temp[1]) = *((u32*)(b+ 4)) ^ *((u32*)rk[rounds-1][1]);
-  *((u32*)u.temp[2]) = *((u32*)(b+ 8)) ^ *((u32*)rk[rounds-1][2]);
-  *((u32*)u.temp[3]) = *((u32*)(b+12)) ^ *((u32*)rk[rounds-1][3]);
+  *((u32_a_t*)u.temp[0]) = *((u32_a_t*)(b   )) ^ *((u32_a_t*)rk[rounds-1][0]);
+  *((u32_a_t*)u.temp[1]) = *((u32_a_t*)(b+ 4)) ^ *((u32_a_t*)rk[rounds-1][1]);
+  *((u32_a_t*)u.temp[2]) = *((u32_a_t*)(b+ 8)) ^ *((u32_a_t*)rk[rounds-1][2]);
+  *((u32_a_t*)u.temp[3]) = *((u32_a_t*)(b+12)) ^ *((u32_a_t*)rk[rounds-1][3]);
   b[ 0] = T1[u.temp[0][0]][1];
   b[ 1] = T1[u.temp[1][1]][1];
   b[ 2] = T1[u.temp[2][2]][1];
@@ -572,10 +580,10 @@ do_encrypt_aligned (const RIJNDAEL_context *ctx,
   b[13] = T1[u.temp[0][1]][1];
   b[14] = T1[u.temp[1][2]][1];
   b[15] = T1[u.temp[2][3]][1];
-  *((u32*)(b   )) ^= *((u32*)rk[rounds][0]);
-  *((u32*)(b+ 4)) ^= *((u32*)rk[rounds][1]);
-  *((u32*)(b+ 8)) ^= *((u32*)rk[rounds][2]);
-  *((u32*)(b+12)) ^= *((u32*)rk[rounds][3]);
+  *((u32_a_t*)(b   )) ^= *((u32_a_t*)rk[rounds][0]);
+  *((u32_a_t*)(b+ 4)) ^= *((u32_a_t*)rk[rounds][1]);
+  *((u32_a_t*)(b+ 8)) ^= *((u32_a_t*)rk[rounds][2]);
+  *((u32_a_t*)(b+12)) ^= *((u32_a_t*)rk[rounds][3]);
 #undef rk
 }
 
@@ -1026,57 +1034,57 @@ do_decrypt_aligned (RIJNDAEL_context *ctx,
   } u;
 
 
-  *((u32*)u.temp[0]) = *((u32*)(a   )) ^ *((u32*)rk[rounds][0]);
-  *((u32*)u.temp[1]) = *((u32*)(a+ 4)) ^ *((u32*)rk[rounds][1]);
-  *((u32*)u.temp[2]) = *((u32*)(a+ 8)) ^ *((u32*)rk[rounds][2]);
-  *((u32*)u.temp[3]) = *((u32*)(a+12)) ^ *((u32*)rk[rounds][3]);
-
-  *((u32*)(b   ))    = (*((u32*)T5[u.temp[0][0]])
-                        ^ *((u32*)T6[u.temp[3][1]])
-                        ^ *((u32*)T7[u.temp[2][2]])
-                        ^ *((u32*)T8[u.temp[1][3]]));
-  *((u32*)(b+ 4))    = (*((u32*)T5[u.temp[1][0]])
-                        ^ *((u32*)T6[u.temp[0][1]])
-                        ^ *((u32*)T7[u.temp[3][2]])
-                        ^ *((u32*)T8[u.temp[2][3]]));
-  *((u32*)(b+ 8))    = (*((u32*)T5[u.temp[2][0]])
-                        ^ *((u32*)T6[u.temp[1][1]])
-                        ^ *((u32*)T7[u.temp[0][2]])
-                        ^ *((u32*)T8[u.temp[3][3]]));
-  *((u32*)(b+12))    = (*((u32*)T5[u.temp[3][0]])
-                        ^ *((u32*)T6[u.temp[2][1]])
-                        ^ *((u32*)T7[u.temp[1][2]])
-                        ^ *((u32*)T8[u.temp[0][3]]));
+  *((u32_a_t*)u.temp[0]) = *((u32_a_t*)(a   )) ^ *((u32_a_t*)rk[rounds][0]);
+  *((u32_a_t*)u.temp[1]) = *((u32_a_t*)(a+ 4)) ^ *((u32_a_t*)rk[rounds][1]);
+  *((u32_a_t*)u.temp[2]) = *((u32_a_t*)(a+ 8)) ^ *((u32_a_t*)rk[rounds][2]);
+  *((u32_a_t*)u.temp[3]) = *((u32_a_t*)(a+12)) ^ *((u32_a_t*)rk[rounds][3]);
+
+  *((u32_a_t*)(b   ))    = (*((u32_a_t*)T5[u.temp[0][0]])
+                        ^ *((u32_a_t*)T6[u.temp[3][1]])
+                        ^ *((u32_a_t*)T7[u.temp[2][2]])
+                        ^ *((u32_a_t*)T8[u.temp[1][3]]));
+  *((u32_a_t*)(b+ 4))    = (*((u32_a_t*)T5[u.temp[1][0]])
+                        ^ *((u32_a_t*)T6[u.temp[0][1]])
+                        ^ *((u32_a_t*)T7[u.temp[3][2]])
+                        ^ *((u32_a_t*)T8[u.temp[2][3]]));
+  *((u32_a_t*)(b+ 8))    = (*((u32_a_t*)T5[u.temp[2][0]])
+                        ^ *((u32_a_t*)T6[u.temp[1][1]])
+                        ^ *((u32_a_t*)T7[u.temp[0][2]])
+                        ^ *((u32_a_t*)T8[u.temp[3][3]]));
+  *((u32_a_t*)(b+12))    = (*((u32_a_t*)T5[u.temp[3][0]])
+                        ^ *((u32_a_t*)T6[u.temp[2][1]])
+                        ^ *((u32_a_t*)T7[u.temp[1][2]])
+                        ^ *((u32_a_t*)T8[u.temp[0][3]]));
 
   for (r = rounds-1; r > 1; r--)
     {
-      *((u32*)u.temp[0]) = *((u32*)(b   )) ^ *((u32*)rk[r][0]);
-      *((u32*)u.temp[1]) = *((u32*)(b+ 4)) ^ *((u32*)rk[r][1]);
-      *((u32*)u.temp[2]) = *((u32*)(b+ 8)) ^ *((u32*)rk[r][2]);
-      *((u32*)u.temp[3]) = *((u32*)(b+12)) ^ *((u32*)rk[r][3]);
-      *((u32*)(b   ))    = (*((u32*)T5[u.temp[0][0]])
-                            ^ *((u32*)T6[u.temp[3][1]])
-                            ^ *((u32*)T7[u.temp[2][2]])
-                            ^ *((u32*)T8[u.temp[1][3]]));
-      *((u32*)(b+ 4))    = (*((u32*)T5[u.temp[1][0]])
-                            ^ *((u32*)T6[u.temp[0][1]])
-                            ^ *((u32*)T7[u.temp[3][2]])
-                            ^ *((u32*)T8[u.temp[2][3]]));
-      *((u32*)(b+ 8))    = (*((u32*)T5[u.temp[2][0]])
-                            ^ *((u32*)T6[u.temp[1][1]])
-                            ^ *((u32*)T7[u.temp[0][2]])
-                            ^ *((u32*)T8[u.temp[3][3]]));
-      *((u32*)(b+12))    = (*((u32*)T5[u.temp[3][0]])
-                            ^ *((u32*)T6[u.temp[2][1]])
-                            ^ *((u32*)T7[u.temp[1][2]])
-                            ^ *((u32*)T8[u.temp[0][3]]));
+      *((u32_a_t*)u.temp[0]) = *((u32_a_t*)(b   )) ^ *((u32_a_t*)rk[r][0]);
+      *((u32_a_t*)u.temp[1]) = *((u32_a_t*)(b+ 4)) ^ *((u32_a_t*)rk[r][1]);
+      *((u32_a_t*)u.temp[2]) = *((u32_a_t*)(b+ 8)) ^ *((u32_a_t*)rk[r][2]);
+      *((u32_a_t*)u.temp[3]) = *((u32_a_t*)(b+12)) ^ *((u32_a_t*)rk[r][3]);
+      *((u32_a_t*)(b   ))    = (*((u32_a_t*)T5[u.temp[0][0]])
+                            ^ *((u32_a_t*)T6[u.temp[3][1]])
+                            ^ *((u32_a_t*)T7[u.temp[2][2]])
+                            ^ *((u32_a_t*)T8[u.temp[1][3]]));
+      *((u32_a_t*)(b+ 4))    = (*((u32_a_t*)T5[u.temp[1][0]])
+                            ^ *((u32_a_t*)T6[u.temp[0][1]])
+                            ^ *((u32_a_t*)T7[u.temp[3][2]])
+                            ^ *((u32_a_t*)T8[u.temp[2][3]]));
+      *((u32_a_t*)(b+ 8))    = (*((u32_a_t*)T5[u.temp[2][0]])
+                            ^ *((u32_a_t*)T6[u.temp[1][1]])
+                            ^ *((u32_a_t*)T7[u.temp[0][2]])
+                            ^ *((u32_a_t*)T8[u.temp[3][3]]));
+      *((u32_a_t*)(b+12))    = (*((u32_a_t*)T5[u.temp[3][0]])
+                            ^ *((u32_a_t*)T6[u.temp[2][1]])
+                            ^ *((u32_a_t*)T7[u.temp[1][2]])
+                            ^ *((u32_a_t*)T8[u.temp[0][3]]));
     }
 
   /* Last round is special. */
-  *((u32*)u.temp[0]) = *((u32*)(b   )) ^ *((u32*)rk[1][0]);
-  *((u32*)u.temp[1]) = *((u32*)(b+ 4)) ^ *((u32*)rk[1][1]);
-  *((u32*)u.temp[2]) = *((u32*)(b+ 8)) ^ *((u32*)rk[1][2]);
-  *((u32*)u.temp[3]) = *((u32*)(b+12)) ^ *((u32*)rk[1][3]);
+  *((u32_a_t*)u.temp[0]) = *((u32_a_t*)(b   )) ^ *((u32_a_t*)rk[1][0]);
+  *((u32_a_t*)u.temp[1]) = *((u32_a_t*)(b+ 4)) ^ *((u32_a_t*)rk[1][1]);
+  *((u32_a_t*)u.temp[2]) = *((u32_a_t*)(b+ 8)) ^ *((u32_a_t*)rk[1][2]);
+  *((u32_a_t*)u.temp[3]) = *((u32_a_t*)(b+12)) ^ *((u32_a_t*)rk[1][3]);
   b[ 0] = S5[u.temp[0][0]];
   b[ 1] = S5[u.temp[3][1]];
   b[ 2] = S5[u.temp[2][2]];
@@ -1093,10 +1101,10 @@ do_decrypt_aligned (RIJNDAEL_context *ctx,
   b[13] = S5[u.temp[2][1]];
   b[14] = S5[u.temp[1][2]];
   b[15] = S5[u.temp[0][3]];
-  *((u32*)(b   )) ^= *((u32*)rk[0][0]);
-  *((u32*)(b+ 4)) ^= *((u32*)rk[0][1]);
-  *((u32*)(b+ 8)) ^= *((u32*)rk[0][2]);
-  *((u32*)(b+12)) ^= *((u32*)rk[0][3]);
+  *((u32_a_t*)(b   )) ^= *((u32_a_t*)rk[0][0]);
+  *((u32_a_t*)(b+ 4)) ^= *((u32_a_t*)rk[0][1]);
+  *((u32_a_t*)(b+ 8)) ^= *((u32_a_t*)rk[0][2]);
+  *((u32_a_t*)(b+12)) ^= *((u32_a_t*)rk[0][3]);
 #undef rk
 }
 
index 64692b6..013ff3a 100644 (file)
@@ -951,17 +951,6 @@ if test "$GCC" = yes; then
         if test x"$_gcc_wopt" = xyes ; then
           CFLAGS="$CFLAGS -Wpointer-arith"
         fi
-
-        AC_MSG_CHECKING([if gcc supports -fno-strict-aliasing])
-        _gcc_cflags_save=$CFLAGS
-        CFLAGS="-fno-strict-aliasing"
-        AC_COMPILE_IFELSE(AC_LANG_PROGRAM([]),_gcc_wopt=yes,_gcc_wopt=no)
-        AC_MSG_RESULT($_gcc_wopt)
-        CFLAGS=$_gcc_cflags_save;
-        if test x"$_gcc_wopt" = xyes ; then
-          CFLAGS="$CFLAGS -fno-strict-aliasing"
-        fi
-
     fi
 
 fi
@@ -1228,6 +1217,8 @@ echo "
         Enabled pubkey algorithms: $enabled_pubkey_ciphers
         Random number generator:   $random
         Using linux capabilities:  $use_capabilities
+        Try using Padlock crypto:  $padlocksupport
+        Try using AES-NI crypto:   $aesnisupport
 "
 
 if test "$print_egd_notice" = "yes"; then