diff --git a/README.md b/README.md index 97e62f8..1cc67f2 100644 --- a/README.md +++ b/README.md @@ -29,6 +29,8 @@ Usage info WhiteRabbit.exe < wordlist ``` +**Note that this code only works correctly on big-endian x64 systems, due to heavy optimizations of MD5 computation!** + Performance =========== @@ -43,12 +45,11 @@ Multi-threaded performance with RyuJIT (.NET 4.6, 64-bit system) on quad-core Sa * If only phrases of at most 4 words are allowed, then it takes **less than 2 seconds** to find and check all 7433016 anagrams; all hashes are solved in first 0.3 seconds. -* If phrases of 5 words are allowed as well, then it takes around 5 minutes to find and check all 1348876896 anagrams; all hashes are solved in first 7.5 seconds. -Most of time is spent on MD5 computations for correct anagrams, so there is not a lot to optimize further. +* If phrases of 5 words are allowed as well, then it takes around 4 minutes to find and check all 1348876896 anagrams; all hashes are solved in first 6.5 seconds. -* If phrases of 6 words are allowed as well, then "more difficult" hash is solved in 8 seconds, "easiest" in 48 seconds, and "hard" in less than 2 minutes. +* If phrases of 6 words are allowed as well, then "more difficult" hash is solved in 6 seconds, "easiest" in 38 seconds, and "hard" in 1.5 minutes. -* If phrases of 7 words are allowed as well, then "more difficult" hash is solved in 46 seconds, "easiest" in less than 6 minutes, and "hard" in around 15 minutes. +* If phrases of 7 words are allowed as well, then "more difficult" hash is solved in 39 seconds, "easiest" in less than 5 minutes, and "hard" in 13 minutes. Note that all measurements were done on a Release build; Debug build is significantly slower. @@ -57,8 +58,6 @@ For comparison, certain other solutions available on GitHub seem to require 3 ho Conditional compilation symbols =============================== -* Define `BIG_ENDIAN` if you plan to run this on big-endian PC; this will disable certain MD5 optimizations that only produce correct result on little-endian PCs. - * Define `SINGLE_THREADED` to use standard enumerables instead of ParallelEnumerable. * Define `DEBUG`, or build in debug mode, to get the total number of anagrams (not optimized, memory-hogging). diff --git a/WhiteRabbit/MD5Digest.cs b/WhiteRabbit/MD5Digest.cs index 1dcd407..d1c19b5 100644 --- a/WhiteRabbit/MD5Digest.cs +++ b/WhiteRabbit/MD5Digest.cs @@ -1,6 +1,8 @@ namespace WhiteRabbit { using System; + using System.Linq; + using System.Reflection; /** * Code taken from BouncyCastle and optimized for specific constraints (e.g. input is always larger than 4 bytes and smaller than 52 bytes). @@ -11,45 +13,29 @@ */ internal static class MD5Digest { - public static uint[] Compute(byte[] input) + public static unsafe uint[] Compute(Phrase input) { - var length = input.Length; - - var xUints = new uint[8]; // it seems that alignment helps -#if BIG_ENDIAN - xUints[0] = LE_To_UInt32(xBytes, 4 * 0); - xUints[1] = LE_To_UInt32(xBytes, 4 * 1); - xUints[2] = LE_To_UInt32(xBytes, 4 * 2); - xUints[3] = LE_To_UInt32(xBytes, 4 * 3); - xUints[4] = LE_To_UInt32(xBytes, 4 * 4); - xUints[5] = LE_To_UInt32(xBytes, 4 * 5); - xUints[6] = LE_To_UInt32(xBytes, 4 * 6); -#else - Buffer.BlockCopy(input, 0, xUints, 0, length); -#endif - xUints[length >> 2] |= (uint)128 << (8 * (length & 3)); - - var x0 = xUints[0]; - var x1 = xUints[1]; - var x2 = xUints[2]; - var x3 = xUints[3]; - var x4 = xUints[4]; - var x5 = xUints[5]; - var x6 = xUints[6]; - var x14 = (uint)(length << 3); + var xUints = stackalloc uint[8]; // it seems that alignment helps + *(long*)xUints = *(long*)input.Buffer; + *(long*)(xUints + 2) = *(long*)(input.Buffer + 8); + *(long*)(xUints + 4) = *(long*)(input.Buffer + 16); + *(long*)(xUints + 6) = *(long*)(input.Buffer + 24); + ((byte*)xUints)[31] = 0; + ((byte*)xUints)[input.Buffer[31]] = 128; + xUints[7] = (uint)(input.Buffer[31] << 3); uint a = 0x67452301; uint b = 0xefcdab89; uint c = 0x98badcfe; uint d = 0x10325476; - a = LeftRotate(x0 + 0xd76aa478 + a + ((b & c) | (~b & d)), 7, 32 - 7) + b; - d = LeftRotate(x1 + 0xe8c7b756 + d + ((a & b) | (~a & c)), 12, 32 - 12) + a; - c = LeftRotate(x2 + 0x242070db + c + ((d & a) | (~d & b)), 17, 32 - 17) + d; - b = LeftRotate(x3 + 0xc1bdceee + b + ((c & d) | (~c & a)), 22, 32 - 22) + c; - a = LeftRotate(x4 + 0xf57c0faf + a + ((b & c) | (~b & d)), 7, 32 - 7) + b; - d = LeftRotate(x5 + 0x4787c62a + d + ((a & b) | (~a & c)), 12, 32 - 12) + a; - c = LeftRotate(x6 + 0xa8304613 + c + ((d & a) | (~d & b)), 17, 32 - 17) + d; + a = LeftRotate(xUints[0] + 0xd76aa478 + a + ((b & c) | (~b & d)), 7, 32 - 7) + b; + d = LeftRotate(xUints[1] + 0xe8c7b756 + d + ((a & b) | (~a & c)), 12, 32 - 12) + a; + c = LeftRotate(xUints[2] + 0x242070db + c + ((d & a) | (~d & b)), 17, 32 - 17) + d; + b = LeftRotate(xUints[3] + 0xc1bdceee + b + ((c & d) | (~c & a)), 22, 32 - 22) + c; + a = LeftRotate(xUints[4] + 0xf57c0faf + a + ((b & c) | (~b & d)), 7, 32 - 7) + b; + d = LeftRotate(xUints[5] + 0x4787c62a + d + ((a & b) | (~a & c)), 12, 32 - 12) + a; + c = LeftRotate(xUints[6] + 0xa8304613 + c + ((d & a) | (~d & b)), 17, 32 - 17) + d; b = LeftRotate(0xfd469501 + b + ((c & d) | (~c & a)), 22, 32 - 22) + c; a = LeftRotate(0x698098d8 + a + ((b & c) | (~b & d)), 7, 32 - 7) + b; d = LeftRotate(0x8b44f7af + d + ((a & b) | (~a & c)), 12, 32 - 12) + a; @@ -57,58 +43,58 @@ b = LeftRotate(0x895cd7be + b + ((c & d) | (~c & a)), 22, 32 - 22) + c; a = LeftRotate(0x6b901122 + a + ((b & c) | (~b & d)), 7, 32 - 7) + b; d = LeftRotate(0xfd987193 + d + ((a & b) | (~a & c)), 12, 32 - 12) + a; - c = LeftRotate(x14 + 0xa679438e + c + ((d & a) | (~d & b)), 17, 32 - 17) + d; + c = LeftRotate(xUints[7] + 0xa679438e + c + ((d & a) | (~d & b)), 17, 32 - 17) + d; b = LeftRotate(0x49b40821 + b + ((c & d) | (~c & a)), 22, 32 - 22) + c; - a = LeftRotate(x1 + 0xf61e2562 + a + ((b & d) | (c & ~d)), 5, 32 - 5) + b; - d = LeftRotate(x6 + 0xc040b340 + d + ((a & c) | (b & ~c)), 9, 32 - 9) + a; + a = LeftRotate(xUints[1] + 0xf61e2562 + a + ((b & d) | (c & ~d)), 5, 32 - 5) + b; + d = LeftRotate(xUints[6] + 0xc040b340 + d + ((a & c) | (b & ~c)), 9, 32 - 9) + a; c = LeftRotate(0x265e5a51 + c + ((d & b) | (a & ~b)), 14, 32 - 14) + d; - b = LeftRotate(x0 + 0xe9b6c7aa + b + ((c & a) | (d & ~a)), 20, 32 - 20) + c; - a = LeftRotate(x5 + 0xd62f105d + a + ((b & d) | (c & ~d)), 5, 32 - 5) + b; + b = LeftRotate(xUints[0] + 0xe9b6c7aa + b + ((c & a) | (d & ~a)), 20, 32 - 20) + c; + a = LeftRotate(xUints[5] + 0xd62f105d + a + ((b & d) | (c & ~d)), 5, 32 - 5) + b; d = LeftRotate(0x2441453 + d + ((a & c) | (b & ~c)), 9, 32 - 9) + a; c = LeftRotate(0xd8a1e681 + c + ((d & b) | (a & ~b)), 14, 32 - 14) + d; - b = LeftRotate(x4 + 0xe7d3fbc8 + b + ((c & a) | (d & ~a)), 20, 32 - 20) + c; + b = LeftRotate(xUints[4] + 0xe7d3fbc8 + b + ((c & a) | (d & ~a)), 20, 32 - 20) + c; a = LeftRotate(0x21e1cde6 + a + ((b & d) | (c & ~d)), 5, 32 - 5) + b; - d = LeftRotate(x14 + 0xc33707d6 + d + ((a & c) | (b & ~c)), 9, 32 - 9) + a; - c = LeftRotate(x3 + 0xf4d50d87 + c + ((d & b) | (a & ~b)), 14, 32 - 14) + d; + d = LeftRotate(xUints[7] + 0xc33707d6 + d + ((a & c) | (b & ~c)), 9, 32 - 9) + a; + c = LeftRotate(xUints[3] + 0xf4d50d87 + c + ((d & b) | (a & ~b)), 14, 32 - 14) + d; b = LeftRotate(0x455a14ed + b + ((c & a) | (d & ~a)), 20, 32 - 20) + c; a = LeftRotate(0xa9e3e905 + a + ((b & d) | (c & ~d)), 5, 32 - 5) + b; - d = LeftRotate(x2 + 0xfcefa3f8 + d + ((a & c) | (b & ~c)), 9, 32 - 9) + a; + d = LeftRotate(xUints[2] + 0xfcefa3f8 + d + ((a & c) | (b & ~c)), 9, 32 - 9) + a; c = LeftRotate(0x676f02d9 + c + ((d & b) | (a & ~b)), 14, 32 - 14) + d; b = LeftRotate(0x8d2a4c8a + b + ((c & a) | (d & ~a)), 20, 32 - 20) + c; - a = LeftRotate(x5 + 0xfffa3942 + a + (b ^ c ^ d), 4, 32 - 4) + b; + a = LeftRotate(xUints[5] + 0xfffa3942 + a + (b ^ c ^ d), 4, 32 - 4) + b; d = LeftRotate(0x8771f681 + d + (a ^ b ^ c), 11, 32 - 11) + a; c = LeftRotate(0x6d9d6122 + c + (d ^ a ^ b), 16, 32 - 16) + d; - b = LeftRotate(x14 + 0xfde5380c + b + (c ^ d ^ a), 23, 32 - 23) + c; - a = LeftRotate(x1 + 0xa4beea44 + a + (b ^ c ^ d), 4, 32 - 4) + b; - d = LeftRotate(x4 + 0x4bdecfa9 + d + (a ^ b ^ c), 11, 32 - 11) + a; + b = LeftRotate(xUints[7] + 0xfde5380c + b + (c ^ d ^ a), 23, 32 - 23) + c; + a = LeftRotate(xUints[1] + 0xa4beea44 + a + (b ^ c ^ d), 4, 32 - 4) + b; + d = LeftRotate(xUints[4] + 0x4bdecfa9 + d + (a ^ b ^ c), 11, 32 - 11) + a; c = LeftRotate(0xf6bb4b60 + c + (d ^ a ^ b), 16, 32 - 16) + d; b = LeftRotate(0xbebfbc70 + b + (c ^ d ^ a), 23, 32 - 23) + c; a = LeftRotate(0x289b7ec6 + a + (b ^ c ^ d), 4, 32 - 4) + b; - d = LeftRotate(x0 + 0xeaa127fa + d + (a ^ b ^ c), 11, 32 - 11) + a; - c = LeftRotate(x3 + 0xd4ef3085 + c + (d ^ a ^ b), 16, 32 - 16) + d; - b = LeftRotate(x6 + 0x4881d05 + b + (c ^ d ^ a), 23, 32 - 23) + c; + d = LeftRotate(xUints[0] + 0xeaa127fa + d + (a ^ b ^ c), 11, 32 - 11) + a; + c = LeftRotate(xUints[3] + 0xd4ef3085 + c + (d ^ a ^ b), 16, 32 - 16) + d; + b = LeftRotate(xUints[6] + 0x4881d05 + b + (c ^ d ^ a), 23, 32 - 23) + c; a = LeftRotate(0xd9d4d039 + a + (b ^ c ^ d), 4, 32 - 4) + b; d = LeftRotate(0xe6db99e5 + d + (a ^ b ^ c), 11, 32 - 11) + a; c = LeftRotate(0x1fa27cf8 + c + (d ^ a ^ b), 16, 32 - 16) + d; - b = LeftRotate(x2 + 0xc4ac5665 + b + (c ^ d ^ a), 23, 32 - 23) + c; + b = LeftRotate(xUints[2] + 0xc4ac5665 + b + (c ^ d ^ a), 23, 32 - 23) + c; - a = LeftRotate(x0 + 0xf4292244 + a + (c ^ (b | ~d)), 6, 32 - 6) + b; + a = LeftRotate(xUints[0] + 0xf4292244 + a + (c ^ (b | ~d)), 6, 32 - 6) + b; d = LeftRotate(0x432aff97 + d + (b ^ (a | ~c)), 10, 32 - 10) + a; - c = LeftRotate(x14 + 0xab9423a7 + c + (a ^ (d | ~b)), 15, 32 - 15) + d; - b = LeftRotate(x5 + 0xfc93a039 + b + (d ^ (c | ~a)), 21, 32 - 21) + c; + c = LeftRotate(xUints[7] + 0xab9423a7 + c + (a ^ (d | ~b)), 15, 32 - 15) + d; + b = LeftRotate(xUints[5] + 0xfc93a039 + b + (d ^ (c | ~a)), 21, 32 - 21) + c; a = LeftRotate(0x655b59c3 + a + (c ^ (b | ~d)), 6, 32 - 6) + b; - d = LeftRotate(x3 + 0x8f0ccc92 + d + (b ^ (a | ~c)), 10, 32 - 10) + a; + d = LeftRotate(xUints[3] + 0x8f0ccc92 + d + (b ^ (a | ~c)), 10, 32 - 10) + a; c = LeftRotate(0xffeff47d + c + (a ^ (d | ~b)), 15, 32 - 15) + d; - b = LeftRotate(x1 + 0x85845dd1 + b + (d ^ (c | ~a)), 21, 32 - 21) + c; + b = LeftRotate(xUints[1] + 0x85845dd1 + b + (d ^ (c | ~a)), 21, 32 - 21) + c; a = LeftRotate(0x6fa87e4f + a + (c ^ (b | ~d)), 6, 32 - 6) + b; d = LeftRotate(0xfe2ce6e0 + d + (b ^ (a | ~c)), 10, 32 - 10) + a; - c = LeftRotate(x6 + 0xa3014314 + c + (a ^ (d | ~b)), 15, 32 - 15) + d; + c = LeftRotate(xUints[6] + 0xa3014314 + c + (a ^ (d | ~b)), 15, 32 - 15) + d; b = LeftRotate(0x4e0811a1 + b + (d ^ (c | ~a)), 21, 32 - 21) + c; - a = LeftRotate(x4 + 0xf7537e82 + a + (c ^ (b | ~d)), 6, 32 - 6) + b; + a = LeftRotate(xUints[4] + 0xf7537e82 + a + (c ^ (b | ~d)), 6, 32 - 6) + b; d = LeftRotate(0xbd3af235 + d + (b ^ (a | ~c)), 10, 32 - 10) + a; - c = LeftRotate(x2 + 0x2ad7d2bb + c + (a ^ (d | ~b)), 15, 32 - 15) + d; + c = LeftRotate(xUints[2] + 0x2ad7d2bb + c + (a ^ (d | ~b)), 15, 32 - 15) + d; b = LeftRotate(0xeb86d391 + b + (d ^ (c | ~a)), 21, 32 - 21) + c; return new[] @@ -120,15 +106,7 @@ }; } - private static uint LE_To_UInt32(byte[] bs, int off) - { - return (uint)bs[off] - | (uint)bs[off + 1] << 8 - | (uint)bs[off + 2] << 16 - | (uint)bs[off + 3] << 24; - } - - private static uint LeftRotate(uint x, int left, int right) + private static uint LeftRotate(uint x, int left, int right) { return (x << left) | (x >> right); } diff --git a/WhiteRabbit/Phrase.cs b/WhiteRabbit/Phrase.cs new file mode 100644 index 0000000..6ec3ada --- /dev/null +++ b/WhiteRabbit/Phrase.cs @@ -0,0 +1,54 @@ +namespace WhiteRabbit +{ + internal unsafe struct Phrase + { + private const byte SPACE = 32; + + public fixed byte Buffer[32]; + + public Phrase(byte[][] words, int numberOfCharacters) + { + fixed (byte* bufferPointer = this.Buffer) + { + var length = numberOfCharacters + words.Length - 1; + + byte* end = bufferPointer + length; + byte[] currentWord = words[0]; + var j = 0; + var wordIndex = 0; + for (var currentPointer = bufferPointer; currentPointer < end; currentPointer++) + { + if (j >= currentWord.Length) + { + *currentPointer = SPACE; + j = 0; + wordIndex++; + currentWord = words[wordIndex]; + } + else + { + *currentPointer = currentWord[j]; + j++; + } + } + + bufferPointer[31] = (byte)length; + } + } + + public byte[] GetBytes() + { + fixed(byte* bufferPointer = this.Buffer) + { + var length = bufferPointer[31]; + var result = new byte[length]; + for (var i = 0; i < length; i++) + { + result[i] = bufferPointer[i]; + } + + return result; + } + } + } +} diff --git a/WhiteRabbit/Program.cs b/WhiteRabbit/Program.cs index bdb50c9..58cc288 100644 --- a/WhiteRabbit/Program.cs +++ b/WhiteRabbit/Program.cs @@ -47,19 +47,19 @@ .ForAll(phraseBytes => { Debug.Assert( - sourceChars == ToOrderedChars(Encoding.ASCII.GetString(phraseBytes)), - $"StringsProcessor produced incorrect anagram: {Encoding.ASCII.GetString(phraseBytes)}"); + sourceChars == ToOrderedChars(ToString(phraseBytes)), + $"StringsProcessor produced incorrect anagram: {ToString(phraseBytes)}"); var hashVector = ComputeHashVector(phraseBytes); if (Array.IndexOf(expectedHashesAsVectors, hashVector) >= 0) { - var phrase = Encoding.ASCII.GetString(phraseBytes); + var phrase = ToString(phraseBytes); var hash = VectorToHexadecimalString(hashVector); Console.WriteLine($"Found phrase for {hash}: {phrase}; time from start is {stopwatch.Elapsed}"); } #if DEBUG - anagramsBag.Add(Encoding.ASCII.GetString(phraseBytes)); + anagramsBag.Add(ToString(phraseBytes)); #endif }); @@ -94,7 +94,7 @@ } // Bouncy Castle is used instead of standard .NET methods for performance reasons - private static Vector ComputeHashVector(byte[] input) + private static Vector ComputeHashVector(Phrase input) { return new Vector(MD5Digest.Compute(input)); } @@ -113,6 +113,11 @@ return hex.Substring(6, 2) + hex.Substring(4, 2) + hex.Substring(2, 2) + hex.Substring(0, 2); } + private static string ToString(Phrase phrase) + { + return Encoding.ASCII.GetString(phrase.GetBytes()); + } + private static IEnumerable ReadInput() { string line; diff --git a/WhiteRabbit/StringsProcessor.cs b/WhiteRabbit/StringsProcessor.cs index dc50d6c..a3cd74f 100644 --- a/WhiteRabbit/StringsProcessor.cs +++ b/WhiteRabbit/StringsProcessor.cs @@ -46,7 +46,7 @@ #if SINGLE_THREADED public IEnumerable GeneratePhrases() #else - public ParallelQuery GeneratePhrases() + public ParallelQuery GeneratePhrases() #endif { // task of finding anagrams could be reduced to the task of finding sequences of dictionary vectors with the target sum @@ -71,24 +71,9 @@ return words; } - private byte[] ConvertWordsToPhrase(byte[][] words) + private unsafe Phrase ConvertWordsToPhrase(byte[][] words) { - var result = new byte[this.NumberOfCharacters + words.Length - 1]; - - byte[] currentWord = words[0]; - Buffer.BlockCopy(currentWord, 0, result, 0, currentWord.Length); - var position = currentWord.Length; - for (var i = 1; i < words.Length; i++) - { - result[position] = SPACE; - position++; - - currentWord = words[i]; - Buffer.BlockCopy(currentWord, 0, result, position, currentWord.Length); - position += currentWord.Length; - } - - return result; + return new Phrase(words, this.NumberOfCharacters); } } } diff --git a/WhiteRabbit/WhiteRabbit.csproj b/WhiteRabbit/WhiteRabbit.csproj index df68df9..39ab4e4 100644 --- a/WhiteRabbit/WhiteRabbit.csproj +++ b/WhiteRabbit/WhiteRabbit.csproj @@ -12,6 +12,7 @@ v4.6 512 true + true AnyCPU @@ -59,6 +60,7 @@ +