Further MD5 optimizations

flocal-proof
Inga 🏳‍🌈 8 years ago
parent 268f5ef1ef
commit 581572fa4e
  1. 13
      README.md
  2. 73
      dotnet/WhiteRabbit/MD5Digest.cs
  3. 19
      dotnet/WhiteRabbit/Phrase.cs

@ -45,11 +45,11 @@ Multi-threaded performance with RyuJIT (.NET 4.6, 64-bit system) on quad-core Sa
* If only phrases of at most 4 words are allowed, then it takes **1.5 seconds** to find and check all 7433016 anagrams; **all hashes are solved in first 0.2 seconds**.
* If phrases of 5 words are allowed as well, then it takes 3.5 minutes to find and check all 1348876896 anagrams; all hashes are solved in first 5 seconds.
* If phrases of 5 words are allowed as well, then it takes 3.5 minutes to find and check all 1348876896 anagrams; all hashes are solved in less than 5 seconds.
* If phrases of 6 words are allowed as well, then "more difficult" hash is solved in 5 seconds, "easiest" in 30 seconds, and "hard" in 75 seconds.
* If phrases of 6 words are allowed as well, then "more difficult" hash is solved in 4.5 seconds, "easiest" in 28 seconds, and "hard" in 70 seconds.
* If phrases of 7 words are allowed as well, then "more difficult" hash is solved in 29 seconds, "easiest" in 3.5 minutes, and "hard" in 9.5 minutes.
* If phrases of 7 words are allowed as well, then "more difficult" hash is solved in 27 seconds, "easiest" in less than 3.5 minutes, and "hard" in 9.5 minutes.
Note that all measurements were done on a Release build; Debug build is significantly slower.
@ -58,7 +58,7 @@ For comparison, certain other solutions available on GitHub seem to require 3 ho
Conditional compilation symbols
===============================
* Define `SINGLE_THREADED` to use standard enumerables instead of ParallelEnumerable.
* Define `SINGLE_THREADED` to use standard enumerables instead of ParallelEnumerable (useful for profiling).
* Define `DEBUG`, or build in debug mode, to get the total number of anagrams (not optimized, memory-hogging).
@ -104,8 +104,9 @@ As we have ordered the words by weight, when we're looping over the dictionary,
9. Another possible optimization with such an ordering is employing binary search.
There is no need in processing all the words that are too large to be useful at this moment; we can start with a first word with a weight not exceeding distance between current partial sum and the target.
10. And then, all that remains are implementation optimizations: precomputing weights, optimizing memory usage and loops, using byte arrays instead of strings, etc.
10. And then, all that remains are implementation optimizations: precomputing weights, optimizing memory usage and loops, using byte arrays instead of strings, etc. Some of optimizations which hurt code readability:
* Words are stored as byte arrays (one byte per character, as we're working with ASCII), with trailing space (to make concatenating words into anagram easier);
* Anagrams are stored in a way optimized for MD5 - as MD5 message (i.e. with trailing "128" byte, as an array of 8 uints, with last uint set to anagram length * 8). For example, "poultry outwits ants" is stored as fixed 32-byte memory area containing "poultry outwits ants" + 0x80 + (0x00)x7 + (uint)0x50 (for 20 characters).
11. Filtering the original dictionary (e.g. throwing away all single-letter words) does not really improve the performance, thanks to the optimizations mentioned in notes 7-9.
This solution finds all anagrams, including those with single-letter words.

@ -15,27 +15,18 @@
{
public static unsafe uint[] Compute(Phrase input)
{
var xUints = stackalloc uint[8]; // it seems that alignment helps
*(long*)xUints = *(long*)input.Buffer;
*(long*)(xUints + 2) = *(long*)(input.Buffer + 8);
*(long*)(xUints + 4) = *(long*)(input.Buffer + 16);
*(long*)(xUints + 6) = *(long*)(input.Buffer + 24);
((byte*)xUints)[31] = 0;
((byte*)xUints)[input.Buffer[31]] = 128;
xUints[7] = (uint)(input.Buffer[31] << 3);
uint a = 0x67452301;
uint b = 0xefcdab89;
uint c = 0x98badcfe;
uint d = 0x10325476;
a = LeftRotate(xUints[0] + 0xd76aa478 + a + ((b & c) | (~b & d)), 7, 32 - 7) + b;
d = LeftRotate(xUints[1] + 0xe8c7b756 + d + ((a & b) | (~a & c)), 12, 32 - 12) + a;
c = LeftRotate(xUints[2] + 0x242070db + c + ((d & a) | (~d & b)), 17, 32 - 17) + d;
b = LeftRotate(xUints[3] + 0xc1bdceee + b + ((c & d) | (~c & a)), 22, 32 - 22) + c;
a = LeftRotate(xUints[4] + 0xf57c0faf + a + ((b & c) | (~b & d)), 7, 32 - 7) + b;
d = LeftRotate(xUints[5] + 0x4787c62a + d + ((a & b) | (~a & c)), 12, 32 - 12) + a;
c = LeftRotate(xUints[6] + 0xa8304613 + c + ((d & a) | (~d & b)), 17, 32 - 17) + d;
a = LeftRotate(input.Buffer[0] + 0xd76aa478 + a + ((b & c) | (~b & d)), 7, 32 - 7) + b;
d = LeftRotate(input.Buffer[1] + 0xe8c7b756 + d + ((a & b) | (~a & c)), 12, 32 - 12) + a;
c = LeftRotate(input.Buffer[2] + 0x242070db + c + ((d & a) | (~d & b)), 17, 32 - 17) + d;
b = LeftRotate(input.Buffer[3] + 0xc1bdceee + b + ((c & d) | (~c & a)), 22, 32 - 22) + c;
a = LeftRotate(input.Buffer[4] + 0xf57c0faf + a + ((b & c) | (~b & d)), 7, 32 - 7) + b;
d = LeftRotate(input.Buffer[5] + 0x4787c62a + d + ((a & b) | (~a & c)), 12, 32 - 12) + a;
c = LeftRotate(input.Buffer[6] + 0xa8304613 + c + ((d & a) | (~d & b)), 17, 32 - 17) + d;
b = LeftRotate(0xfd469501 + b + ((c & d) | (~c & a)), 22, 32 - 22) + c;
a = LeftRotate(0x698098d8 + a + ((b & c) | (~b & d)), 7, 32 - 7) + b;
d = LeftRotate(0x8b44f7af + d + ((a & b) | (~a & c)), 12, 32 - 12) + a;
@ -43,58 +34,58 @@
b = LeftRotate(0x895cd7be + b + ((c & d) | (~c & a)), 22, 32 - 22) + c;
a = LeftRotate(0x6b901122 + a + ((b & c) | (~b & d)), 7, 32 - 7) + b;
d = LeftRotate(0xfd987193 + d + ((a & b) | (~a & c)), 12, 32 - 12) + a;
c = LeftRotate(xUints[7] + 0xa679438e + c + ((d & a) | (~d & b)), 17, 32 - 17) + d;
c = LeftRotate(input.Buffer[7] + 0xa679438e + c + ((d & a) | (~d & b)), 17, 32 - 17) + d;
b = LeftRotate(0x49b40821 + b + ((c & d) | (~c & a)), 22, 32 - 22) + c;
a = LeftRotate(xUints[1] + 0xf61e2562 + a + ((b & d) | (c & ~d)), 5, 32 - 5) + b;
d = LeftRotate(xUints[6] + 0xc040b340 + d + ((a & c) | (b & ~c)), 9, 32 - 9) + a;
a = LeftRotate(input.Buffer[1] + 0xf61e2562 + a + ((b & d) | (c & ~d)), 5, 32 - 5) + b;
d = LeftRotate(input.Buffer[6] + 0xc040b340 + d + ((a & c) | (b & ~c)), 9, 32 - 9) + a;
c = LeftRotate(0x265e5a51 + c + ((d & b) | (a & ~b)), 14, 32 - 14) + d;
b = LeftRotate(xUints[0] + 0xe9b6c7aa + b + ((c & a) | (d & ~a)), 20, 32 - 20) + c;
a = LeftRotate(xUints[5] + 0xd62f105d + a + ((b & d) | (c & ~d)), 5, 32 - 5) + b;
b = LeftRotate(input.Buffer[0] + 0xe9b6c7aa + b + ((c & a) | (d & ~a)), 20, 32 - 20) + c;
a = LeftRotate(input.Buffer[5] + 0xd62f105d + a + ((b & d) | (c & ~d)), 5, 32 - 5) + b;
d = LeftRotate(0x2441453 + d + ((a & c) | (b & ~c)), 9, 32 - 9) + a;
c = LeftRotate(0xd8a1e681 + c + ((d & b) | (a & ~b)), 14, 32 - 14) + d;
b = LeftRotate(xUints[4] + 0xe7d3fbc8 + b + ((c & a) | (d & ~a)), 20, 32 - 20) + c;
b = LeftRotate(input.Buffer[4] + 0xe7d3fbc8 + b + ((c & a) | (d & ~a)), 20, 32 - 20) + c;
a = LeftRotate(0x21e1cde6 + a + ((b & d) | (c & ~d)), 5, 32 - 5) + b;
d = LeftRotate(xUints[7] + 0xc33707d6 + d + ((a & c) | (b & ~c)), 9, 32 - 9) + a;
c = LeftRotate(xUints[3] + 0xf4d50d87 + c + ((d & b) | (a & ~b)), 14, 32 - 14) + d;
d = LeftRotate(input.Buffer[7] + 0xc33707d6 + d + ((a & c) | (b & ~c)), 9, 32 - 9) + a;
c = LeftRotate(input.Buffer[3] + 0xf4d50d87 + c + ((d & b) | (a & ~b)), 14, 32 - 14) + d;
b = LeftRotate(0x455a14ed + b + ((c & a) | (d & ~a)), 20, 32 - 20) + c;
a = LeftRotate(0xa9e3e905 + a + ((b & d) | (c & ~d)), 5, 32 - 5) + b;
d = LeftRotate(xUints[2] + 0xfcefa3f8 + d + ((a & c) | (b & ~c)), 9, 32 - 9) + a;
d = LeftRotate(input.Buffer[2] + 0xfcefa3f8 + d + ((a & c) | (b & ~c)), 9, 32 - 9) + a;
c = LeftRotate(0x676f02d9 + c + ((d & b) | (a & ~b)), 14, 32 - 14) + d;
b = LeftRotate(0x8d2a4c8a + b + ((c & a) | (d & ~a)), 20, 32 - 20) + c;
a = LeftRotate(xUints[5] + 0xfffa3942 + a + (b ^ c ^ d), 4, 32 - 4) + b;
a = LeftRotate(input.Buffer[5] + 0xfffa3942 + a + (b ^ c ^ d), 4, 32 - 4) + b;
d = LeftRotate(0x8771f681 + d + (a ^ b ^ c), 11, 32 - 11) + a;
c = LeftRotate(0x6d9d6122 + c + (d ^ a ^ b), 16, 32 - 16) + d;
b = LeftRotate(xUints[7] + 0xfde5380c + b + (c ^ d ^ a), 23, 32 - 23) + c;
a = LeftRotate(xUints[1] + 0xa4beea44 + a + (b ^ c ^ d), 4, 32 - 4) + b;
d = LeftRotate(xUints[4] + 0x4bdecfa9 + d + (a ^ b ^ c), 11, 32 - 11) + a;
b = LeftRotate(input.Buffer[7] + 0xfde5380c + b + (c ^ d ^ a), 23, 32 - 23) + c;
a = LeftRotate(input.Buffer[1] + 0xa4beea44 + a + (b ^ c ^ d), 4, 32 - 4) + b;
d = LeftRotate(input.Buffer[4] + 0x4bdecfa9 + d + (a ^ b ^ c), 11, 32 - 11) + a;
c = LeftRotate(0xf6bb4b60 + c + (d ^ a ^ b), 16, 32 - 16) + d;
b = LeftRotate(0xbebfbc70 + b + (c ^ d ^ a), 23, 32 - 23) + c;
a = LeftRotate(0x289b7ec6 + a + (b ^ c ^ d), 4, 32 - 4) + b;
d = LeftRotate(xUints[0] + 0xeaa127fa + d + (a ^ b ^ c), 11, 32 - 11) + a;
c = LeftRotate(xUints[3] + 0xd4ef3085 + c + (d ^ a ^ b), 16, 32 - 16) + d;
b = LeftRotate(xUints[6] + 0x4881d05 + b + (c ^ d ^ a), 23, 32 - 23) + c;
d = LeftRotate(input.Buffer[0] + 0xeaa127fa + d + (a ^ b ^ c), 11, 32 - 11) + a;
c = LeftRotate(input.Buffer[3] + 0xd4ef3085 + c + (d ^ a ^ b), 16, 32 - 16) + d;
b = LeftRotate(input.Buffer[6] + 0x4881d05 + b + (c ^ d ^ a), 23, 32 - 23) + c;
a = LeftRotate(0xd9d4d039 + a + (b ^ c ^ d), 4, 32 - 4) + b;
d = LeftRotate(0xe6db99e5 + d + (a ^ b ^ c), 11, 32 - 11) + a;
c = LeftRotate(0x1fa27cf8 + c + (d ^ a ^ b), 16, 32 - 16) + d;
b = LeftRotate(xUints[2] + 0xc4ac5665 + b + (c ^ d ^ a), 23, 32 - 23) + c;
b = LeftRotate(input.Buffer[2] + 0xc4ac5665 + b + (c ^ d ^ a), 23, 32 - 23) + c;
a = LeftRotate(xUints[0] + 0xf4292244 + a + (c ^ (b | ~d)), 6, 32 - 6) + b;
a = LeftRotate(input.Buffer[0] + 0xf4292244 + a + (c ^ (b | ~d)), 6, 32 - 6) + b;
d = LeftRotate(0x432aff97 + d + (b ^ (a | ~c)), 10, 32 - 10) + a;
c = LeftRotate(xUints[7] + 0xab9423a7 + c + (a ^ (d | ~b)), 15, 32 - 15) + d;
b = LeftRotate(xUints[5] + 0xfc93a039 + b + (d ^ (c | ~a)), 21, 32 - 21) + c;
c = LeftRotate(input.Buffer[7] + 0xab9423a7 + c + (a ^ (d | ~b)), 15, 32 - 15) + d;
b = LeftRotate(input.Buffer[5] + 0xfc93a039 + b + (d ^ (c | ~a)), 21, 32 - 21) + c;
a = LeftRotate(0x655b59c3 + a + (c ^ (b | ~d)), 6, 32 - 6) + b;
d = LeftRotate(xUints[3] + 0x8f0ccc92 + d + (b ^ (a | ~c)), 10, 32 - 10) + a;
d = LeftRotate(input.Buffer[3] + 0x8f0ccc92 + d + (b ^ (a | ~c)), 10, 32 - 10) + a;
c = LeftRotate(0xffeff47d + c + (a ^ (d | ~b)), 15, 32 - 15) + d;
b = LeftRotate(xUints[1] + 0x85845dd1 + b + (d ^ (c | ~a)), 21, 32 - 21) + c;
b = LeftRotate(input.Buffer[1] + 0x85845dd1 + b + (d ^ (c | ~a)), 21, 32 - 21) + c;
a = LeftRotate(0x6fa87e4f + a + (c ^ (b | ~d)), 6, 32 - 6) + b;
d = LeftRotate(0xfe2ce6e0 + d + (b ^ (a | ~c)), 10, 32 - 10) + a;
c = LeftRotate(xUints[6] + 0xa3014314 + c + (a ^ (d | ~b)), 15, 32 - 15) + d;
c = LeftRotate(input.Buffer[6] + 0xa3014314 + c + (a ^ (d | ~b)), 15, 32 - 15) + d;
b = LeftRotate(0x4e0811a1 + b + (d ^ (c | ~a)), 21, 32 - 21) + c;
a = LeftRotate(xUints[4] + 0xf7537e82 + a + (c ^ (b | ~d)), 6, 32 - 6) + b;
a = LeftRotate(input.Buffer[4] + 0xf7537e82 + a + (c ^ (b | ~d)), 6, 32 - 6) + b;
d = LeftRotate(0xbd3af235 + d + (b ^ (a | ~c)), 10, 32 - 10) + a;
c = LeftRotate(xUints[2] + 0x2ad7d2bb + c + (a ^ (d | ~b)), 15, 32 - 15) + d;
c = LeftRotate(input.Buffer[2] + 0x2ad7d2bb + c + (a ^ (d | ~b)), 15, 32 - 15) + d;
b = LeftRotate(0xeb86d391 + b + (d ^ (c | ~a)), 21, 32 - 21) + c;
return new[]

@ -1,20 +1,22 @@
namespace WhiteRabbit
{
// Anagram representation optimized for MD5
internal unsafe struct Phrase
{
public fixed byte Buffer[32];
public fixed uint Buffer[8];
public Phrase(byte[][] words, int numberOfCharacters)
{
fixed (byte* bufferPointer = this.Buffer)
fixed (uint* bufferPointer = this.Buffer)
{
var length = numberOfCharacters + words.Length - 1;
byte* end = bufferPointer + length;
byte[] currentWord = words[0];
var j = 0;
var wordIndex = 0;
for (var currentPointer = bufferPointer; currentPointer < end; currentPointer++)
var currentPointer = (byte*)bufferPointer;
byte* lastPointer = currentPointer + length;
for (; currentPointer < lastPointer; currentPointer++)
{
if (j >= currentWord.Length)
{
@ -26,20 +28,21 @@
*currentPointer = currentWord[j];
j++;
}
*currentPointer = 128;
bufferPointer[31] = (byte)length;
bufferPointer[7] = (uint)(length << 3);
}
}
public byte[] GetBytes()
{
fixed(byte* bufferPointer = this.Buffer)
fixed(uint* bufferPointer = this.Buffer)
{
var length = bufferPointer[31];
var length = bufferPointer[7] >> 3;
var result = new byte[length];
for (var i = 0; i < length; i++)
{
result[i] = bufferPointer[i];
result[i] = ((byte*)bufferPointer)[i];
}
return result;

Loading…
Cancel
Save