Mailing List Archive

[PATCH] Enable VIA Padlock on x86_64 platforms
Convert existing 32bits asm to 64bits:
- *l -> *q (long->quad)
- e** registers -> r** registers
- don't mess with ebx GOT register

Tested with make check on VIA Nano X2 L4350
---
Note: make check actually fails, but it's not a regression from 32 bits,
where that test fails with the exact same output.

% ./tests/basic
aes-ctr, encrypt mismatch entry 0:0
expected: 87 4d 61 91 b6 20 e3 26 1b ef 68 64 99 0d b6 ce
computed: 68 b1 c5 2b a6 00 3d e7 be 60 08 ad 1b f8 7c c0
aes-ctr, encrypt mismatch entry 0:1
expected: 98 06 f6 6b 79 70 fd ff 86 17 18 7b b9 ff fd ff
computed: 2b 2d 2c db 8c 5b d3 ee d1 57 79 37 9d 09 10 ed
aes-ctr, encrypt mismatch entry 0:2
expected: 5a e4 df 3e db d5 d3 5e 5b 4f 09 02 0d b0 3e ab
computed: 39 1e cc a8 f9 d0 62 bf 4c 60 4b 82 1d 8e 32 90
aes-ctr, encrypt mismatch entry 0:3
expected: 1e 03 1d da 2f be 03 d1 79 21 70 a0 f3 00 9c ee
computed: fb 7d c4 ce 68 9e 5d dc 74 32 1a 71 35 8a cf 49
aes-ctr, encrypt mismatch entry 3:0
expected: 87 4d 61 91 b6 20 e3 26 1b ef 68 64 99 0d b6 ce
computed: 68 b1 c5 2b a6 00 3d e7 be 60 08 ad 1b f8 7c c0
aes-ctr, encrypt mismatch entry 4:0
expected: 87 4d 61 91 b6 20 e3 26 1b ef 68 64 99 0d b6 ce
computed: 68 b1 c5 2b a6 00 3d e7 be 60 08 ad 1b f8 7c c0
aes-ctr, encrypt mismatch entry 7:0
expected: 87 4d 61 91 b6 20 e3 26 1b ef 68 64 99 0d b6 ce
computed: 68 b1 c5 2b a6 00 3d e7 be 60 08 ad 1b f8 7c c0
aes-ctr, encrypt mismatch entry 7:2
expected: ff 5a e4 df 3e db d5 d3 5e 5b 4f 09 02 0d b0 3e ab
computed: ff 39 1e cc a8 f9 d0 62 bf 4c 60 4b 82 1d 8e 32 90
aes-ctr, encrypt mismatch entry 7:3
expected: 1e 03 1d da 2f be 03 d1 79 21 70 a0 f3 00 9c ee
computed: fb 7d c4 ce 68 9e 5d dc 74 32 1a 71 35 8a cf 49
encrypt mismatch (algo 7, mode 6)

cipher/rijndael.c | 21 +++++++++---
random/rndhw.c | 13 ++++++-
src/hwfeatures.c | 97 +++++++++++++++++++++++++++++++++++++++++++++++++++++
3 files changed, 125 insertions(+), 6 deletions(-)

diff --git a/cipher/rijndael.c b/cipher/rijndael.c
index 2df8ea9..c2e0a77 100644
--- a/cipher/rijndael.c
+++ b/cipher/rijndael.c
@@ -63,7 +63,7 @@
code. */
#undef USE_PADLOCK
#ifdef ENABLE_PADLOCK_SUPPORT
-# if defined (__i386__) && SIZEOF_UNSIGNED_LONG == 4 && defined (__GNUC__)
+# if ( ( defined (__i386__) && SIZEOF_UNSIGNED_LONG == 4 ) || defined(__x86_64__) ) && defined (__GNUC__)
# define USE_PADLOCK 1
# endif
#endif /*ENABLE_PADLOCK_SUPPORT*/
@@ -663,17 +663,28 @@ do_padlock (const RIJNDAEL_context *ctx, int decrypt_flag,

memcpy (a, ax, 16);

+ int blocks = 1; /* Init counter for just one block. */
+#ifdef __x86_64__
+ asm volatile
+ ("pushfq\n\t" /* Force key reload. */
+ "popfq\n\t"
+ ".byte 0xf3, 0x0f, 0xa7, 0xc8\n\t" /* REP XCRYPT ECB. */
+ : /* No output */
+ : "S" (a), "D" (b), "d" (cword), "b" (ctx->padlockkey), "c" (blocks)
+ : "cc", "memory"
+ );
+#else
asm volatile
("pushfl\n\t" /* Force key reload. */
"popfl\n\t"
"xchg %3, %%ebx\n\t" /* Load key. */
- "movl $1, %%ecx\n\t" /* Init counter for just one block. */
- ".byte 0xf3, 0x0f, 0xa7, 0xc8\n\t" /* REP XSTORE ECB. */
+ ".byte 0xf3, 0x0f, 0xa7, 0xc8\n\t" /* REP XCRYPT ECB. */
"xchg %3, %%ebx\n" /* Restore GOT register. */
: /* No output */
- : "S" (a), "D" (b), "d" (cword), "r" (ctx->padlockkey)
- : "%ecx", "cc", "memory"
+ : "S" (a), "D" (b), "d" (cword), "r" (ctx->padlockkey), "c" (blocks)
+ : "cc", "memory"
);
+#endif

memcpy (bx, b, 16);

diff --git a/random/rndhw.c b/random/rndhw.c
index 82faab4..c933cf9 100644
--- a/random/rndhw.c
+++ b/random/rndhw.c
@@ -27,7 +27,7 @@

#undef USE_PADLOCK
#ifdef ENABLE_PADLOCK_SUPPORT
-# if defined (__i386__) && SIZEOF_UNSIGNED_LONG == 4 && defined (__GNUC__)
+# if ( (defined (__i386__) && SIZEOF_UNSIGNED_LONG == 4) || defined(__x86_64__) ) && defined (__GNUC__)
# define USE_PADLOCK
# endif
#endif /*ENABLE_PADLOCK_SUPPORT*/
@@ -55,6 +55,16 @@ poll_padlock (void (*add)(const void*, size_t, enum random_origins),
nbytes = 0;
while (nbytes < 64)
{
+#ifdef __x86_64__
+ asm volatile
+ ("movq %1, %%rdi\n\t" /* Set buffer. */
+ "xorq %%rdx, %%rdx\n\t" /* Request up to 8 bytes. */
+ ".byte 0x0f, 0xa7, 0xc0\n\t" /* XSTORE RNG. */
+ : "=a" (status)
+ : "g" (p)
+ : "%rdx", "%rdi", "cc"
+ );
+#else
asm volatile
("movl %1, %%edi\n\t" /* Set buffer. */
"xorl %%edx, %%edx\n\t" /* Request up to 8 bytes. */
@@ -63,6 +73,7 @@ poll_padlock (void (*add)(const void*, size_t, enum random_origins),
: "g" (p)
: "%edx", "%edi", "cc"
);
+#endif
if ((status & (1<<6)) /* RNG still enabled. */
&& !(status & (1<<13)) /* von Neumann corrector is enabled. */
&& !(status & (1<<14)) /* String filter is disabled. */
diff --git a/src/hwfeatures.c b/src/hwfeatures.c
index c356798..73db917 100644
--- a/src/hwfeatures.c
+++ b/src/hwfeatures.c
@@ -40,6 +40,99 @@ _gcry_get_hw_features (void)
}


+#if defined (__x86_64__) && defined (__GNUC__)
+static void
+detect_x86_64_gnuc (void)
+{
+ /* The code here is only useful for the PadLock engine thus we don't
+ build it if that support has been disabled. */
+ char vendor_id[12+1];
+
+ asm volatile
+ ("xorl %%eax, %%eax\n\t" /* 0 -> EAX. */
+ "cpuid\n\t" /* Get vendor ID. */
+ "movl %%ebx, (%0)\n\t" /* EBX,EDX,ECX -> VENDOR_ID. */
+ "movl %%edx, 4(%0)\n\t"
+ "movl %%ecx, 8(%0)\n\t"
+ :
+ : "S" (&vendor_id[0])
+ : "%eax", "%ecx", "%edx", "cc"
+ );
+ vendor_id[12] = 0;
+
+ if (0)
+ ; /* Just to make "else if" and ifdef macros look pretty. */
+#ifdef ENABLE_PADLOCK_SUPPORT
+ else if (!strcmp (vendor_id, "CentaurHauls"))
+ {
+ /* This is a VIA CPU. Check what PadLock features we have. */
+ asm volatile
+ ("movl $0xC0000000, %%eax\n\t" /* Check for extended centaur */
+ "cpuid\n\t" /* feature flags. */
+ "cmpl $0xC0000001, %%eax\n\t"
+ "jb .Lready%=\n\t" /* EAX < 0xC0000000 => no padlock. */
+
+ "movl $0xC0000001, %%eax\n\t" /* Ask for the extended */
+ "cpuid\n\t" /* feature flags. */
+
+ "movl %%edx, %%eax\n\t" /* Take copy of feature flags. */
+ "andl $0x0C, %%eax\n\t" /* Test bits 2 and 3 to see whether */
+ "cmpl $0x0C, %%eax\n\t" /* the RNG exists and is enabled. */
+ "jnz .Lno_rng%=\n\t"
+ "orl $1, %0\n" /* Set our HWF_PADLOCK_RNG bit. */
+
+ ".Lno_rng%=:\n\t"
+ "movl %%edx, %%eax\n\t" /* Take copy of feature flags. */
+ "andl $0xC0, %%eax\n\t" /* Test bits 6 and 7 to see whether */
+ "cmpl $0xC0, %%eax\n\t" /* the ACE exists and is enabled. */
+ "jnz .Lno_ace%=\n\t"
+ "orl $2, %0\n" /* Set our HWF_PADLOCK_AES bit. */
+
+ ".Lno_ace%=:\n\t"
+ "movl %%edx, %%eax\n\t" /* Take copy of feature flags. */
+ "andl $0xC00, %%eax\n\t" /* Test bits 10, 11 to see whether */
+ "cmpl $0xC00, %%eax\n\t" /* the PHE exists and is enabled. */
+ "jnz .Lno_phe%=\n\t"
+ "orl $4, %0\n" /* Set our HWF_PADLOCK_SHA bit. */
+
+ ".Lno_phe%=:\n\t"
+ "movl %%edx, %%eax\n\t" /* Take copy of feature flags. */
+ "andl $0x3000, %%eax\n\t" /* Test bits 12, 13 to see whether */
+ "cmpl $0x3000, %%eax\n\t" /* MONTMUL exists and is enabled. */
+ "jnz .Lready%=\n\t"
+ "orl $8, %0\n" /* Set our HWF_PADLOCK_MMUL bit. */
+
+ ".Lready%=:\n"
+ : "+r" (hw_features)
+ :
+ : "%eax", "%edx", "cc"
+ );
+ }
+#endif /*ENABLE_PADLOCK_SUPPORT*/
+ else if (!strcmp (vendor_id, "GenuineIntel"))
+ {
+ /* This is an Intel CPU. */
+ asm volatile
+ ("movl $1, %%eax\n\t" /* Get CPU info and feature flags. */
+ "cpuid\n"
+ "testl $0x02000000, %%ecx\n\t" /* Test bit 25. */
+ "jz .Lno_aes%=\n\t" /* No AES support. */
+ "orl $256, %0\n" /* Set our HWF_INTEL_AES bit. */
+
+ ".Lno_aes%=:\n"
+ : "+r" (hw_features)
+ :
+ : "%eax", "%ecx", "%edx", "cc"
+ );
+ }
+ else if (!strcmp (vendor_id, "AuthenticAMD"))
+ {
+ /* This is an AMD CPU. */
+
+ }
+}
+#endif /* __x86_64__ && __GNUC__ */
+
#if defined (__i386__) && SIZEOF_UNSIGNED_LONG == 4 && defined (__GNUC__)
static void
detect_ia32_gnuc (void)
@@ -186,6 +279,10 @@ _gcry_detect_hw_features (unsigned int disabled_features)
#elif defined (__i386__) && SIZEOF_UNSIGNED_LONG == 8
#ifdef __GNUC__
#endif
+#elif defined (__x86_64__)
+#ifdef __GNUC__
+ detect_x86_64_gnuc ();
+#endif
#endif

hw_features &= ~disabled_features;
--
1.7.9.1

_______________________________________________
Gcrypt-devel mailing list
Gcrypt-devel@gnupg.org
http://lists.gnupg.org/mailman/listinfo/gcrypt-devel
Re: [PATCH] Enable VIA Padlock on x86_64 platforms [ In reply to ]
On Wed, 11 Apr 2012 06:20, funman@videolan.org said:

> Note: make check actually fails, but it's not a regression from 32 bits,
> where that test fails with the exact same output.

Does that mean, there is a regression in Libgcrypt if a newer 32 bit VIA
CPU is used? On what OS platform?


Salam-Shalom,

Werner

--
Die Gedanken sind frei. Ausnahmen regelt ein Bundesgesetz.


_______________________________________________
Gcrypt-devel mailing list
Gcrypt-devel@gnupg.org
http://lists.gnupg.org/mailman/listinfo/gcrypt-devel
Re: [PATCH] Enable VIA Padlock on x86_64 platforms [ In reply to ]
Le 2012-04-11 04:18, Werner Koch a écrit :
> On Wed, 11 Apr 2012 06:20, funman@videolan.org said:
>
>> Note: make check actually fails, but it's not a regression from 32 bits,
>> where that test fails with the exact same output.
>
> Does that mean, there is a regression in Libgcrypt if a newer 32 bit VIA
> CPU is used?

I would not know, I don't own another (older) VIA CPU.

> On what OS platform?

Linux x86_64 (Ubuntu 12.04)

I noticed the test fails only with GCRY_CIPHER_MODE_CTR.

After starting to understand what is this mode, I noticed it also failed
on 32 bits and just gave up on it, hoping that someone (the padlock code
author?) could explain it.

Do you happen to have a VIA CPU on which libgcrypt was fully tested around?

If it fails with a recent version of libgcrypt too I could bisect the bug.

> Salam-Shalom,
>
> Werner

_______________________________________________
Gcrypt-devel mailing list
Gcrypt-devel@gnupg.org
http://lists.gnupg.org/mailman/listinfo/gcrypt-devel
Re: [PATCH] Enable VIA Padlock on x86_64 platforms [ In reply to ]
On Wed, 11 Apr 2012 13:03, funman@videolan.org said:

> After starting to understand what is this mode, I noticed it also failed
> on 32 bits and just gave up on it, hoping that someone (the padlock code
> author?) could explain it.

That's me. It can't be specific to the padlock code because the
_gcry_aes_ctr_enc diverts only for AESNI; the default code path is used
by all other CPUs.

> Do you happen to have a VIA CPU on which libgcrypt was fully tested around?

I don't think so, back then I used Niko's settop box remotely. The GCC
compile farm has no VIA cpu.


Shalom-Salam,

Werner

--
Die Gedanken sind frei. Ausnahmen regelt ein Bundesgesetz.


_______________________________________________
Gcrypt-devel mailing list
Gcrypt-devel@gnupg.org
http://lists.gnupg.org/mailman/listinfo/gcrypt-devel
Re: [PATCH] Enable VIA Padlock on x86_64 platforms [ In reply to ]
Le 2012-04-12 06:23, Werner Koch a écrit :
> On Wed, 11 Apr 2012 13:03, funman@videolan.org said:
>
>> After starting to understand what is this mode, I noticed it also failed
>> on 32 bits and just gave up on it, hoping that someone (the padlock code
>> author?) could explain it.
>
> That's me. It can't be specific to the padlock code because the
> _gcry_aes_ctr_enc diverts only for AESNI; the default code path is used
> by all other CPUs.

I tried the test suite on your commit introducing PADLOCK code and it
worked, so I spent some time bisecting:

2674140cdfdc59ce5ad0238177da1542f5df6e00 is the first bad commit
commit 2674140cdfdc59ce5ad0238177da1542f5df6e00
Author: Werner Koch <wk@gnupg.org>
Date: Tue Feb 22 16:08:13 2011 +0100

Use AES-NI insns for CTR mode.

That really boosts the performance of CTR.

:100644 100644 ffbdc840c4f74f2866b3179aab232b93ef28dc8a
979ad403e920229d48417e0e41cf3ebf5e9dede0 M NEWS
:040000 040000 c7ae1843e27728fcb5e7e54d771b68e665fbd822
af64d30472d52e93bad24ca16b2f45205e90340a M cipher
:100644 100644 a28ea8383ce4b86b7f19aa649143361457d7a640
013ff3af4279e36ed3021e7f6d4b679eabc77bfe M configure.ac
:040000 040000 23af7cde7dcf6806caa4a907dc6bdb0addaa910f
e027fe7032c7752a94369e34d6736c6734410ceb M src


I could fix the test suite with the following diff:

diff --git a/cipher/rijndael.c b/cipher/rijndael.c
index 2df8ea9..c7320a4 100644
--- a/cipher/rijndael.c
+++ b/cipher/rijndael.c
@@ -282,7 +282,7 @@ do_setkey (RIJNDAEL_context *ctx, const byte *key,
const unsigned keylen)
if (0)
;
#ifdef USE_PADLOCK
- else if (ctx->use_padlock)
+ else if (0 && ctx->use_padlock)
{
/* Nothing to do as we support only hardware key generation for
now. */


I believe the correct fix would be to use the padlock for all cipher modes.

Is there a reason why it's not used for CTR?


With this one-liner, the test suite now passes in both 32bits and 64bits
with the patch I previously sent, so I'm resending that one again with a
Signed-off-by.

Btw, thanks for relaxing the requirement on a copyright assignement, I
was already preparing my defense that this code was not mine but instead
a mere copy of 32bits asm code with non-copyrightable very small and
obvious changes.

_______________________________________________
Gcrypt-devel mailing list
Gcrypt-devel@gnupg.org
http://lists.gnupg.org/mailman/listinfo/gcrypt-devel
[PATCH] Enable VIA Padlock on x86_64 platforms [ In reply to ]
Convert existing 32bits asm to 64bits:
- *l -> *q (long->quad)
- e** registers -> r** registers
- don't mess with ebx GOT register

Tested with make check on VIA Nano X2 L4350

Signed-off-by: Rafaël Carré <funman@videolan.org>
---
cipher/rijndael.c | 21 +++++++++---
random/rndhw.c | 13 ++++++-
src/hwfeatures.c | 97 +++++++++++++++++++++++++++++++++++++++++++++++++++++
3 files changed, 125 insertions(+), 6 deletions(-)

diff --git a/cipher/rijndael.c b/cipher/rijndael.c
index 2df8ea9..c2e0a77 100644
--- a/cipher/rijndael.c
+++ b/cipher/rijndael.c
@@ -63,7 +63,7 @@
code. */
#undef USE_PADLOCK
#ifdef ENABLE_PADLOCK_SUPPORT
-# if defined (__i386__) && SIZEOF_UNSIGNED_LONG == 4 && defined (__GNUC__)
+# if ( ( defined (__i386__) && SIZEOF_UNSIGNED_LONG == 4 ) || defined(__x86_64__) ) && defined (__GNUC__)
# define USE_PADLOCK 1
# endif
#endif /*ENABLE_PADLOCK_SUPPORT*/
@@ -663,17 +663,28 @@ do_padlock (const RIJNDAEL_context *ctx, int decrypt_flag,

memcpy (a, ax, 16);

+ int blocks = 1; /* Init counter for just one block. */
+#ifdef __x86_64__
+ asm volatile
+ ("pushfq\n\t" /* Force key reload. */
+ "popfq\n\t"
+ ".byte 0xf3, 0x0f, 0xa7, 0xc8\n\t" /* REP XCRYPT ECB. */
+ : /* No output */
+ : "S" (a), "D" (b), "d" (cword), "b" (ctx->padlockkey), "c" (blocks)
+ : "cc", "memory"
+ );
+#else
asm volatile
("pushfl\n\t" /* Force key reload. */
"popfl\n\t"
"xchg %3, %%ebx\n\t" /* Load key. */
- "movl $1, %%ecx\n\t" /* Init counter for just one block. */
- ".byte 0xf3, 0x0f, 0xa7, 0xc8\n\t" /* REP XSTORE ECB. */
+ ".byte 0xf3, 0x0f, 0xa7, 0xc8\n\t" /* REP XCRYPT ECB. */
"xchg %3, %%ebx\n" /* Restore GOT register. */
: /* No output */
- : "S" (a), "D" (b), "d" (cword), "r" (ctx->padlockkey)
- : "%ecx", "cc", "memory"
+ : "S" (a), "D" (b), "d" (cword), "r" (ctx->padlockkey), "c" (blocks)
+ : "cc", "memory"
);
+#endif

memcpy (bx, b, 16);

diff --git a/random/rndhw.c b/random/rndhw.c
index 82faab4..c933cf9 100644
--- a/random/rndhw.c
+++ b/random/rndhw.c
@@ -27,7 +27,7 @@

#undef USE_PADLOCK
#ifdef ENABLE_PADLOCK_SUPPORT
-# if defined (__i386__) && SIZEOF_UNSIGNED_LONG == 4 && defined (__GNUC__)
+# if ( (defined (__i386__) && SIZEOF_UNSIGNED_LONG == 4) || defined(__x86_64__) ) && defined (__GNUC__)
# define USE_PADLOCK
# endif
#endif /*ENABLE_PADLOCK_SUPPORT*/
@@ -55,6 +55,16 @@ poll_padlock (void (*add)(const void*, size_t, enum random_origins),
nbytes = 0;
while (nbytes < 64)
{
+#ifdef __x86_64__
+ asm volatile
+ ("movq %1, %%rdi\n\t" /* Set buffer. */
+ "xorq %%rdx, %%rdx\n\t" /* Request up to 8 bytes. */
+ ".byte 0x0f, 0xa7, 0xc0\n\t" /* XSTORE RNG. */
+ : "=a" (status)
+ : "g" (p)
+ : "%rdx", "%rdi", "cc"
+ );
+#else
asm volatile
("movl %1, %%edi\n\t" /* Set buffer. */
"xorl %%edx, %%edx\n\t" /* Request up to 8 bytes. */
@@ -63,6 +73,7 @@ poll_padlock (void (*add)(const void*, size_t, enum random_origins),
: "g" (p)
: "%edx", "%edi", "cc"
);
+#endif
if ((status & (1<<6)) /* RNG still enabled. */
&& !(status & (1<<13)) /* von Neumann corrector is enabled. */
&& !(status & (1<<14)) /* String filter is disabled. */
diff --git a/src/hwfeatures.c b/src/hwfeatures.c
index c356798..73db917 100644
--- a/src/hwfeatures.c
+++ b/src/hwfeatures.c
@@ -40,6 +40,99 @@ _gcry_get_hw_features (void)
}


+#if defined (__x86_64__) && defined (__GNUC__)
+static void
+detect_x86_64_gnuc (void)
+{
+ /* The code here is only useful for the PadLock engine thus we don't
+ build it if that support has been disabled. */
+ char vendor_id[12+1];
+
+ asm volatile
+ ("xorl %%eax, %%eax\n\t" /* 0 -> EAX. */
+ "cpuid\n\t" /* Get vendor ID. */
+ "movl %%ebx, (%0)\n\t" /* EBX,EDX,ECX -> VENDOR_ID. */
+ "movl %%edx, 4(%0)\n\t"
+ "movl %%ecx, 8(%0)\n\t"
+ :
+ : "S" (&vendor_id[0])
+ : "%eax", "%ecx", "%edx", "cc"
+ );
+ vendor_id[12] = 0;
+
+ if (0)
+ ; /* Just to make "else if" and ifdef macros look pretty. */
+#ifdef ENABLE_PADLOCK_SUPPORT
+ else if (!strcmp (vendor_id, "CentaurHauls"))
+ {
+ /* This is a VIA CPU. Check what PadLock features we have. */
+ asm volatile
+ ("movl $0xC0000000, %%eax\n\t" /* Check for extended centaur */
+ "cpuid\n\t" /* feature flags. */
+ "cmpl $0xC0000001, %%eax\n\t"
+ "jb .Lready%=\n\t" /* EAX < 0xC0000000 => no padlock. */
+
+ "movl $0xC0000001, %%eax\n\t" /* Ask for the extended */
+ "cpuid\n\t" /* feature flags. */
+
+ "movl %%edx, %%eax\n\t" /* Take copy of feature flags. */
+ "andl $0x0C, %%eax\n\t" /* Test bits 2 and 3 to see whether */
+ "cmpl $0x0C, %%eax\n\t" /* the RNG exists and is enabled. */
+ "jnz .Lno_rng%=\n\t"
+ "orl $1, %0\n" /* Set our HWF_PADLOCK_RNG bit. */
+
+ ".Lno_rng%=:\n\t"
+ "movl %%edx, %%eax\n\t" /* Take copy of feature flags. */
+ "andl $0xC0, %%eax\n\t" /* Test bits 6 and 7 to see whether */
+ "cmpl $0xC0, %%eax\n\t" /* the ACE exists and is enabled. */
+ "jnz .Lno_ace%=\n\t"
+ "orl $2, %0\n" /* Set our HWF_PADLOCK_AES bit. */
+
+ ".Lno_ace%=:\n\t"
+ "movl %%edx, %%eax\n\t" /* Take copy of feature flags. */
+ "andl $0xC00, %%eax\n\t" /* Test bits 10, 11 to see whether */
+ "cmpl $0xC00, %%eax\n\t" /* the PHE exists and is enabled. */
+ "jnz .Lno_phe%=\n\t"
+ "orl $4, %0\n" /* Set our HWF_PADLOCK_SHA bit. */
+
+ ".Lno_phe%=:\n\t"
+ "movl %%edx, %%eax\n\t" /* Take copy of feature flags. */
+ "andl $0x3000, %%eax\n\t" /* Test bits 12, 13 to see whether */
+ "cmpl $0x3000, %%eax\n\t" /* MONTMUL exists and is enabled. */
+ "jnz .Lready%=\n\t"
+ "orl $8, %0\n" /* Set our HWF_PADLOCK_MMUL bit. */
+
+ ".Lready%=:\n"
+ : "+r" (hw_features)
+ :
+ : "%eax", "%edx", "cc"
+ );
+ }
+#endif /*ENABLE_PADLOCK_SUPPORT*/
+ else if (!strcmp (vendor_id, "GenuineIntel"))
+ {
+ /* This is an Intel CPU. */
+ asm volatile
+ ("movl $1, %%eax\n\t" /* Get CPU info and feature flags. */
+ "cpuid\n"
+ "testl $0x02000000, %%ecx\n\t" /* Test bit 25. */
+ "jz .Lno_aes%=\n\t" /* No AES support. */
+ "orl $256, %0\n" /* Set our HWF_INTEL_AES bit. */
+
+ ".Lno_aes%=:\n"
+ : "+r" (hw_features)
+ :
+ : "%eax", "%ecx", "%edx", "cc"
+ );
+ }
+ else if (!strcmp (vendor_id, "AuthenticAMD"))
+ {
+ /* This is an AMD CPU. */
+
+ }
+}
+#endif /* __x86_64__ && __GNUC__ */
+
#if defined (__i386__) && SIZEOF_UNSIGNED_LONG == 4 && defined (__GNUC__)
static void
detect_ia32_gnuc (void)
@@ -186,6 +279,10 @@ _gcry_detect_hw_features (unsigned int disabled_features)
#elif defined (__i386__) && SIZEOF_UNSIGNED_LONG == 8
#ifdef __GNUC__
#endif
+#elif defined (__x86_64__)
+#ifdef __GNUC__
+ detect_x86_64_gnuc ();
+#endif
#endif

hw_features &= ~disabled_features;
--
1.7.9.5

_______________________________________________
Gcrypt-devel mailing list
Gcrypt-devel@gnupg.org
http://lists.gnupg.org/mailman/listinfo/gcrypt-devel
[PATCH] Enable VIA Padlock on x86_64 platforms [ In reply to ]
* cipher/rijndael.c: Duplicate x86 assembly and convert to x86_64.
* random/rndhw.c: Likewise.
* src/hwfeatures.c: Likewise.
--
Changes made to the x86 assembly:
- *l -> *q (long -> quad)
- e** registers -> r** registers (use widest registers available)
- don't mess with ebx GOT register

Tested with make check on VIA Nano X2 L4350

Signed-off-by: Rafaël Carré <funman@videolan.org>
---
cipher/rijndael.c | 21 +++++++++---
random/rndhw.c | 13 ++++++-
src/hwfeatures.c | 97 +++++++++++++++++++++++++++++++++++++++++++++++++++++
3 files changed, 125 insertions(+), 6 deletions(-)

diff --git a/cipher/rijndael.c b/cipher/rijndael.c
index 2df8ea9..c2e0a77 100644
--- a/cipher/rijndael.c
+++ b/cipher/rijndael.c
@@ -63,7 +63,7 @@
code. */
#undef USE_PADLOCK
#ifdef ENABLE_PADLOCK_SUPPORT
-# if defined (__i386__) && SIZEOF_UNSIGNED_LONG == 4 && defined (__GNUC__)
+# if ( ( defined (__i386__) && SIZEOF_UNSIGNED_LONG == 4 ) || defined(__x86_64__) ) && defined (__GNUC__)
# define USE_PADLOCK 1
# endif
#endif /*ENABLE_PADLOCK_SUPPORT*/
@@ -663,17 +663,28 @@ do_padlock (const RIJNDAEL_context *ctx, int decrypt_flag,

memcpy (a, ax, 16);

+ int blocks = 1; /* Init counter for just one block. */
+#ifdef __x86_64__
+ asm volatile
+ ("pushfq\n\t" /* Force key reload. */
+ "popfq\n\t"
+ ".byte 0xf3, 0x0f, 0xa7, 0xc8\n\t" /* REP XCRYPT ECB. */
+ : /* No output */
+ : "S" (a), "D" (b), "d" (cword), "b" (ctx->padlockkey), "c" (blocks)
+ : "cc", "memory"
+ );
+#else
asm volatile
("pushfl\n\t" /* Force key reload. */
"popfl\n\t"
"xchg %3, %%ebx\n\t" /* Load key. */
- "movl $1, %%ecx\n\t" /* Init counter for just one block. */
- ".byte 0xf3, 0x0f, 0xa7, 0xc8\n\t" /* REP XSTORE ECB. */
+ ".byte 0xf3, 0x0f, 0xa7, 0xc8\n\t" /* REP XCRYPT ECB. */
"xchg %3, %%ebx\n" /* Restore GOT register. */
: /* No output */
- : "S" (a), "D" (b), "d" (cword), "r" (ctx->padlockkey)
- : "%ecx", "cc", "memory"
+ : "S" (a), "D" (b), "d" (cword), "r" (ctx->padlockkey), "c" (blocks)
+ : "cc", "memory"
);
+#endif

memcpy (bx, b, 16);

diff --git a/random/rndhw.c b/random/rndhw.c
index 82faab4..c933cf9 100644
--- a/random/rndhw.c
+++ b/random/rndhw.c
@@ -27,7 +27,7 @@

#undef USE_PADLOCK
#ifdef ENABLE_PADLOCK_SUPPORT
-# if defined (__i386__) && SIZEOF_UNSIGNED_LONG == 4 && defined (__GNUC__)
+# if ( (defined (__i386__) && SIZEOF_UNSIGNED_LONG == 4) || defined(__x86_64__) ) && defined (__GNUC__)
# define USE_PADLOCK
# endif
#endif /*ENABLE_PADLOCK_SUPPORT*/
@@ -55,6 +55,16 @@ poll_padlock (void (*add)(const void*, size_t, enum random_origins),
nbytes = 0;
while (nbytes < 64)
{
+#ifdef __x86_64__
+ asm volatile
+ ("movq %1, %%rdi\n\t" /* Set buffer. */
+ "xorq %%rdx, %%rdx\n\t" /* Request up to 8 bytes. */
+ ".byte 0x0f, 0xa7, 0xc0\n\t" /* XSTORE RNG. */
+ : "=a" (status)
+ : "g" (p)
+ : "%rdx", "%rdi", "cc"
+ );
+#else
asm volatile
("movl %1, %%edi\n\t" /* Set buffer. */
"xorl %%edx, %%edx\n\t" /* Request up to 8 bytes. */
@@ -63,6 +73,7 @@ poll_padlock (void (*add)(const void*, size_t, enum random_origins),
: "g" (p)
: "%edx", "%edi", "cc"
);
+#endif
if ((status & (1<<6)) /* RNG still enabled. */
&& !(status & (1<<13)) /* von Neumann corrector is enabled. */
&& !(status & (1<<14)) /* String filter is disabled. */
diff --git a/src/hwfeatures.c b/src/hwfeatures.c
index c356798..73db917 100644
--- a/src/hwfeatures.c
+++ b/src/hwfeatures.c
@@ -40,6 +40,99 @@ _gcry_get_hw_features (void)
}


+#if defined (__x86_64__) && defined (__GNUC__)
+static void
+detect_x86_64_gnuc (void)
+{
+ /* The code here is only useful for the PadLock engine thus we don't
+ build it if that support has been disabled. */
+ char vendor_id[12+1];
+
+ asm volatile
+ ("xorl %%eax, %%eax\n\t" /* 0 -> EAX. */
+ "cpuid\n\t" /* Get vendor ID. */
+ "movl %%ebx, (%0)\n\t" /* EBX,EDX,ECX -> VENDOR_ID. */
+ "movl %%edx, 4(%0)\n\t"
+ "movl %%ecx, 8(%0)\n\t"
+ :
+ : "S" (&vendor_id[0])
+ : "%eax", "%ecx", "%edx", "cc"
+ );
+ vendor_id[12] = 0;
+
+ if (0)
+ ; /* Just to make "else if" and ifdef macros look pretty. */
+#ifdef ENABLE_PADLOCK_SUPPORT
+ else if (!strcmp (vendor_id, "CentaurHauls"))
+ {
+ /* This is a VIA CPU. Check what PadLock features we have. */
+ asm volatile
+ ("movl $0xC0000000, %%eax\n\t" /* Check for extended centaur */
+ "cpuid\n\t" /* feature flags. */
+ "cmpl $0xC0000001, %%eax\n\t"
+ "jb .Lready%=\n\t" /* EAX < 0xC0000000 => no padlock. */
+
+ "movl $0xC0000001, %%eax\n\t" /* Ask for the extended */
+ "cpuid\n\t" /* feature flags. */
+
+ "movl %%edx, %%eax\n\t" /* Take copy of feature flags. */
+ "andl $0x0C, %%eax\n\t" /* Test bits 2 and 3 to see whether */
+ "cmpl $0x0C, %%eax\n\t" /* the RNG exists and is enabled. */
+ "jnz .Lno_rng%=\n\t"
+ "orl $1, %0\n" /* Set our HWF_PADLOCK_RNG bit. */
+
+ ".Lno_rng%=:\n\t"
+ "movl %%edx, %%eax\n\t" /* Take copy of feature flags. */
+ "andl $0xC0, %%eax\n\t" /* Test bits 6 and 7 to see whether */
+ "cmpl $0xC0, %%eax\n\t" /* the ACE exists and is enabled. */
+ "jnz .Lno_ace%=\n\t"
+ "orl $2, %0\n" /* Set our HWF_PADLOCK_AES bit. */
+
+ ".Lno_ace%=:\n\t"
+ "movl %%edx, %%eax\n\t" /* Take copy of feature flags. */
+ "andl $0xC00, %%eax\n\t" /* Test bits 10, 11 to see whether */
+ "cmpl $0xC00, %%eax\n\t" /* the PHE exists and is enabled. */
+ "jnz .Lno_phe%=\n\t"
+ "orl $4, %0\n" /* Set our HWF_PADLOCK_SHA bit. */
+
+ ".Lno_phe%=:\n\t"
+ "movl %%edx, %%eax\n\t" /* Take copy of feature flags. */
+ "andl $0x3000, %%eax\n\t" /* Test bits 12, 13 to see whether */
+ "cmpl $0x3000, %%eax\n\t" /* MONTMUL exists and is enabled. */
+ "jnz .Lready%=\n\t"
+ "orl $8, %0\n" /* Set our HWF_PADLOCK_MMUL bit. */
+
+ ".Lready%=:\n"
+ : "+r" (hw_features)
+ :
+ : "%eax", "%edx", "cc"
+ );
+ }
+#endif /*ENABLE_PADLOCK_SUPPORT*/
+ else if (!strcmp (vendor_id, "GenuineIntel"))
+ {
+ /* This is an Intel CPU. */
+ asm volatile
+ ("movl $1, %%eax\n\t" /* Get CPU info and feature flags. */
+ "cpuid\n"
+ "testl $0x02000000, %%ecx\n\t" /* Test bit 25. */
+ "jz .Lno_aes%=\n\t" /* No AES support. */
+ "orl $256, %0\n" /* Set our HWF_INTEL_AES bit. */
+
+ ".Lno_aes%=:\n"
+ : "+r" (hw_features)
+ :
+ : "%eax", "%ecx", "%edx", "cc"
+ );
+ }
+ else if (!strcmp (vendor_id, "AuthenticAMD"))
+ {
+ /* This is an AMD CPU. */
+
+ }
+}
+#endif /* __x86_64__ && __GNUC__ */
+
#if defined (__i386__) && SIZEOF_UNSIGNED_LONG == 4 && defined (__GNUC__)
static void
detect_ia32_gnuc (void)
@@ -186,6 +279,10 @@ _gcry_detect_hw_features (unsigned int disabled_features)
#elif defined (__i386__) && SIZEOF_UNSIGNED_LONG == 8
#ifdef __GNUC__
#endif
+#elif defined (__x86_64__)
+#ifdef __GNUC__
+ detect_x86_64_gnuc ();
+#endif
#endif

hw_features &= ~disabled_features;
--
1.7.9.5

_______________________________________________
Gcrypt-devel mailing list
Gcrypt-devel@gnupg.org
http://lists.gnupg.org/mailman/listinfo/gcrypt-devel
Re: [PATCH] Enable VIA Padlock on x86_64 platforms [ In reply to ]
Le 20/04/2012 10:39, Werner Koch a écrit :
> Hi!
>
> If you want me to consider your patches, you should follow the new
> instructions in doc/HACKING and send the DCO file to this list. My
> apologies in advance, in case I you already did.

Just did that :p

> --
> Because the tear off line is the first line of the body, not even the
> summary line will be copied to the ChangeLog. Using only the tear off
> line without any text is often useful for commits like "Typo and
> grammar fixes."
> #+end_example

This doesn't seem to work:

make dist
tar zfx libgcrypt-1.6.0-git9eb9fc3.tar.gz
make gen-ChangeLog
vim libgcrypt-1.6.0-gitc56149d/ChangeLog
/^ --

The commit log is copied verbatim:

2011-12-16 Werner Koch <wk@gnupg.org>

Add alignment tests for the cipher tests.
* tests/basic.c (check_one_cipher): Factor most code out to
check_one_cipher_core. Call that core function several times using
different alignment settings.
(check_one_cipher_core): New. Add extra args to allow alignment
testing.
--

As reported in bug#1384 Serpent fails on a sparc64. One problem with
the test code is that due to the bus error the error message won't be
printed.

_______________________________________________
Gcrypt-devel mailing list
Gcrypt-devel@gnupg.org
http://lists.gnupg.org/mailman/listinfo/gcrypt-devel