asm volatile
("pushl %%ebx\n\t" /* Save GOT register. */
+ "movl %1, %%ebx\n\t"
"cpuid\n\t"
"movl %%ebx, %1\n\t"
"popl %%ebx\n\t" /* Restore GOT register. */
- : "=a" (regs[0]), "=r" (regs[1]), "=c" (regs[2]), "=d" (regs[3])
- : "0" (in)
+ : "=a" (regs[0]), "=D" (regs[1]), "=c" (regs[2]), "=d" (regs[3])
+ : "0" (in), "1" (0), "2" (0), "3" (0)
: "cc"
);
if (edx)
*edx = regs[3];
}
+
+#if defined(ENABLE_AVX_SUPPORT) || defined(ENABLE_AVX2_SUPPORT)
+static unsigned int
+get_xgetbv(void)
+{
+ unsigned int t_eax, t_edx;
+
+ asm volatile
+ ("xgetbv\n\t"
+ : "=a" (t_eax), "=d" (t_edx)
+ : "c" (0)
+ );
+
+ return t_eax;
+}
+#endif /* ENABLE_AVX_SUPPORT || ENABLE_AVX2_SUPPORT */
+
#endif /* i386 && GNUC */
asm volatile
("cpuid\n\t"
: "=a" (regs[0]), "=b" (regs[1]), "=c" (regs[2]), "=d" (regs[3])
- : "0" (in)
+ : "0" (in), "1" (0), "2" (0), "3" (0)
: "cc"
);
if (edx)
*edx = regs[3];
}
+
+#if defined(ENABLE_AVX_SUPPORT) || defined(ENABLE_AVX2_SUPPORT)
+static unsigned int
+get_xgetbv(void)
+{
+ unsigned int t_eax, t_edx;
+
+ asm volatile
+ ("xgetbv\n\t"
+ : "=a" (t_eax), "=d" (t_edx)
+ : "c" (0)
+ );
+
+ return t_eax;
+}
+#endif /* ENABLE_AVX_SUPPORT || ENABLE_AVX2_SUPPORT */
+
#endif /* x86-64 && GNUC */
{
char vendor_id[12+1];
unsigned int features;
+ unsigned int os_supports_avx_avx2_registers = 0;
+ unsigned int max_cpuid_level;
unsigned int result = 0;
+ (void)os_supports_avx_avx2_registers;
+
if (!is_cpuid_available())
return 0;
- get_cpuid(0, NULL,
+ get_cpuid(0, &max_cpuid_level,
(unsigned int *)&vendor_id[0],
(unsigned int *)&vendor_id[8],
(unsigned int *)&vendor_id[4]);
else if (!strcmp (vendor_id, "GenuineIntel"))
{
/* This is an Intel CPU. */
+ result |= HWF_INTEL_CPU;
}
else if (!strcmp (vendor_id, "AuthenticAMD"))
{
/* Get CPU info and Intel feature flags (ECX). */
get_cpuid(1, NULL, NULL, &features, NULL);
+#ifdef ENABLE_PCLMUL_SUPPORT
+ /* Test bit 1 for PCLMUL. */
+ if (features & 0x00000002)
+ result |= HWF_INTEL_PCLMUL;
+#endif
+ /* Test bit 9 for SSSE3. */
+ if (features & 0x00000200)
+ result |= HWF_INTEL_SSSE3;
#ifdef ENABLE_AESNI_SUPPORT
/* Test bit 25 for AES-NI. */
if (features & 0x02000000)
result |= HWF_INTEL_AESNI;
#endif /*ENABLE_AESNI_SUPPORT*/
+#if defined(ENABLE_AVX_SUPPORT) || defined(ENABLE_AVX2_SUPPORT)
+ /* Test bit 27 for OSXSAVE (required for AVX/AVX2). */
+ if (features & 0x08000000)
+ {
+ /* Check that OS has enabled both XMM and YMM state support. */
+ if ((get_xgetbv() & 0x6) == 0x6)
+ os_supports_avx_avx2_registers = 1;
+ }
+#endif
+#ifdef ENABLE_AVX_SUPPORT
+ /* Test bit 28 for AVX. */
+ if (features & 0x10000000)
+ if (os_supports_avx_avx2_registers)
+ result |= HWF_INTEL_AVX;
+#endif /*ENABLE_AVX_SUPPORT*/
#ifdef ENABLE_DRNG_SUPPORT
/* Test bit 30 for RDRAND. */
if (features & 0x40000000)
result |= HWF_INTEL_RDRAND;
#endif /*ENABLE_DRNG_SUPPORT*/
+ /* Check additional Intel feature flags. Early Intel P5 processors report
+ * too high max_cpuid_level, so don't check level 7 if processor does not
+ * support SSE3 (as cpuid:7 contains only features for newer processors).
+ * Source: http://www.sandpile.org/x86/cpuid.htm */
+ if (max_cpuid_level >= 7 && (features & 0x00000001))
+ {
+ /* Get CPUID:7 contains further Intel feature flags. */
+ get_cpuid(7, NULL, &features, NULL, NULL);
+
+ /* Test bit 8 for BMI2. */
+ if (features & 0x00000100)
+ result |= HWF_INTEL_BMI2;
+
+#ifdef ENABLE_AVX2_SUPPORT
+ /* Test bit 5 for AVX2. */
+ if (features & 0x00000020)
+ if (os_supports_avx_avx2_registers)
+ result |= HWF_INTEL_AVX2;
+#endif /*ENABLE_AVX_SUPPORT*/
+ }
+
return result;
}
#endif /* HAS_X86_CPUID */