diff --git a/README.md b/README.md index 5cdf82d..1bc9c82 100644 --- a/README.md +++ b/README.md @@ -43,13 +43,13 @@ Anagrams generation is not parallelized, as even single-threaded performance for Multi-threaded performance with RyuJIT (.NET 4.6, 64-bit system) on quad-core Sandy Bridge @2.8GHz is as follows (excluding initialization time of 0.2 seconds): -* If only phrases of at most 4 words are allowed, then it takes **1.1 seconds** to find and check all 7,433,016 anagrams; **all hashes are solved in first 0.2 seconds**. +* If only phrases of at most 4 words are allowed, then it takes **0.9 seconds** to find and check all 7,433,016 anagrams; **all hashes are solved in first 0.15 seconds**. -* If phrases of 5 words are allowed as well, then it takes 2:45 minutes to find and check all 1,348,876,896 anagrams; all hashes are solved in first 4 seconds. +* If phrases of 5 words are allowed as well, then it takes around 100 seconds to find and check all 1,348,876,896 anagrams; all hashes are solved in first 2.5 seconds. -* If phrases of 6 words are allowed as well, then it takes less than 2 hours to find and check all 58,837,302,096 anagrams; "more difficult" hash is solved in 3.5 seconds, "easiest" in 21 seconds, and "hard" in 54 seconds. +* If phrases of 6 words are allowed as well, then it takes around 75 minutes to find and check all 58,837,302,096 anagrams; "more difficult" hash is solved in 2.5 seconds, "easiest" in 14 seconds, and "hard" in 35 seconds. -* If phrases of 7 words are allowed as well, then it takes 75 seconds to count all 1,108,328,708,976 anagrams, and around 40 hours to find and check all these anagrams; "more difficult" hash is solved in 20 seconds, "easiest" in less than 2.5 minutes, and "hard" in 6:45 minutes. +* If phrases of 7 words are allowed as well, then it takes 75 seconds to count all 1,108,328,708,976 anagrams, and around 40 hours (speculatively) to find and check all these anagrams; "more difficult" hash is solved in 13 seconds, "easiest" in 1.5 minutes, and "hard" in 4.5 minutes. Note that all measurements were done on a Release build; Debug build is significantly slower. diff --git a/dotnet/WhiteRabbit.UnmanagedBridge/WhiteRabbit.UnmanagedBridge.cpp b/dotnet/WhiteRabbit.UnmanagedBridge/WhiteRabbit.UnmanagedBridge.cpp index c7cac52..bf810bc 100644 --- a/dotnet/WhiteRabbit.UnmanagedBridge/WhiteRabbit.UnmanagedBridge.cpp +++ b/dotnet/WhiteRabbit.UnmanagedBridge/WhiteRabbit.UnmanagedBridge.cpp @@ -7,12 +7,12 @@ void WhiteRabbitUnmanagedBridge::MD5Unmanaged::ComputeMD5(unsigned int * input, unsigned int* output) { - md5(input + 0 * 8, output + 0 * 4); - md5(input + 1 * 8, output + 1 * 4); - md5(input + 2 * 8, output + 2 * 4); - md5(input + 3 * 8, output + 3 * 4); - md5(input + 4 * 8, output + 4 * 4); - md5(input + 5 * 8, output + 5 * 4); - md5(input + 6 * 8, output + 6 * 4); - md5(input + 7 * 8, output + 7 * 4); + md5(input + 0 * 8, output + 0); + md5(input + 1 * 8, output + 1); + md5(input + 2 * 8, output + 2); + md5(input + 3 * 8, output + 3); + md5(input + 4 * 8, output + 4); + md5(input + 5 * 8, output + 5); + md5(input + 6 * 8, output + 6); + md5(input + 7 * 8, output + 7); } diff --git a/dotnet/WhiteRabbit.UnmanagedBridge/md5.cpp b/dotnet/WhiteRabbit.UnmanagedBridge/md5.cpp index b766526..5ff1d4f 100644 --- a/dotnet/WhiteRabbit.UnmanagedBridge/md5.cpp +++ b/dotnet/WhiteRabbit.UnmanagedBridge/md5.cpp @@ -80,7 +80,6 @@ inline MD5Word Step4(MD5Word a, MD5Word b, MD5Word c, MD5Word d, MD5Word k) void md5(unsigned int * input, unsigned int * output) { - MD5Word a = 0x67452301; MD5Word b = 0xefcdab89; MD5Word c = 0x98badcfe; @@ -150,13 +149,7 @@ void md5(unsigned int * input, unsigned int * output) c = Step4<15>(c, d, a, b, 0xa3014314, input[6]); b = Step4<21>(b, c, d, a, 0x4e0811a1); a = Step4<6>(a, b, c, d, 0xf7537e82, input[4]); - d = Step4<10>(d, a, b, c, 0xbd3af235); - c = Step4<15>(c, d, a, b, 0x2ad7d2bb, input[2]); - b = Step4<21>(b, c, d, a, 0xeb86d391); output[0] = 0x67452301 + a; - output[1] = 0xefcdab89 + b; - output[2] = 0x98badcfe + c; - output[3] = 0x10325476 + d; } #pragma managed diff --git a/dotnet/WhiteRabbit/MD5Digest.cs b/dotnet/WhiteRabbit/MD5Digest.cs index df27888..2ff6e02 100644 --- a/dotnet/WhiteRabbit/MD5Digest.cs +++ b/dotnet/WhiteRabbit/MD5Digest.cs @@ -4,30 +4,18 @@ using WhiteRabbitUnmanagedBridge; namespace WhiteRabbit { - /** - * Code taken from BouncyCastle and optimized for specific constraints (e.g. input is always larger than 4 bytes and smaller than 52 bytes). - * Further optimization: input could be assumed to be smaller than 27 bytes (original phrase contains 18 letters, so that allows anagrams of 9 words) - * base implementation of MD4 family style digest as outlined in - * "Handbook of Applied Cryptography", pages 344 - 347. - * implementation of MD5 as outlined in "Handbook of Applied Cryptography", pages 346 - 347. - */ internal static class MD5Digest { + // It only returns first component of MD5 hash [MethodImpl(MethodImplOptions.AggressiveInlining)] - public static unsafe Vector[] Compute(PhraseSet input) + public static unsafe uint[] Compute(PhraseSet input) { - var rawResult = new uint[4 * Constants.PhrasesPerSet]; - fixed (uint* resultPointer = rawResult) + var result = new uint[Constants.PhrasesPerSet]; + fixed (uint* resultPointer = result) { MD5Unmanaged.ComputeMD5(input.Buffer, resultPointer); } - var result = new Vector[Constants.PhrasesPerSet]; - for (var i = 0; i < Constants.PhrasesPerSet; i++) - { - result[i] = new Vector(rawResult, 4 * i); - } - return result; } } diff --git a/dotnet/WhiteRabbit/Program.cs b/dotnet/WhiteRabbit/Program.cs index fb9f5d7..bc8d603 100644 --- a/dotnet/WhiteRabbit/Program.cs +++ b/dotnet/WhiteRabbit/Program.cs @@ -7,6 +7,7 @@ using System.Diagnostics; using System.Linq; using System.Numerics; + using System.Security.Cryptography; using System.Text; /// @@ -49,6 +50,8 @@ .Select(hash => new Vector(HexadecimalStringToUnsignedIntArray(hash))) .ToArray(); + var expectedHashesFirstComponents = expectedHashesAsVectors.Select(vector => vector[0]).ToArray(); + var processor = new StringsProcessor( Encoding.ASCII.GetBytes(sourcePhrase), maxWordsInPhrase, @@ -64,19 +67,19 @@ stopwatch.Restart(); processor.GeneratePhrases() - .ForAll(phraseBytes => + .ForAll(phraseSet => { - var hashVectors = MD5Digest.Compute(phraseBytes); + var hashesFirstComponents = MD5Digest.Compute(phraseSet); for (var i = 0; i < Constants.PhrasesPerSet; i++) { Debug.Assert( - sourceChars == ToOrderedChars(ToString(phraseBytes, i)), - $"StringsProcessor produced incorrect anagram: {ToString(phraseBytes, i)}"); + sourceChars == ToOrderedChars(ToString(phraseSet, i)), + $"StringsProcessor produced incorrect anagram: {ToString(phraseSet, i)}"); - if (Array.IndexOf(expectedHashesAsVectors, hashVectors[i]) >= 0) + if (Array.IndexOf(expectedHashesFirstComponents, hashesFirstComponents[i]) >= 0) { - var phrase = ToString(phraseBytes, i); - var hash = VectorToHexadecimalString(hashVectors[i]); + var phrase = ToString(phraseSet, i); + var hash = ComputeFullMD5(phrase); Console.WriteLine($"Found phrase for {hash}: {phrase}; time from start is {stopwatch.Elapsed}"); } } @@ -96,13 +99,15 @@ .ToArray(); } - private static string VectorToHexadecimalString(Vector hash) + // We can afford to spend some time here; this code will only run for matched phrases (and for one in several billion non-matched) + private static string ComputeFullMD5(string phrase) { - var components = Enumerable.Range(0, 4) - .Select(i => hash[i].ToString("x8")) - .Select(ChangeEndianness); - - return string.Concat(components); + var phraseBytes = Encoding.ASCII.GetBytes(phrase); + using (var hashAlgorithm = new MD5CryptoServiceProvider()) + { + var resultBytes = hashAlgorithm.ComputeHash(phraseBytes); + return string.Concat(resultBytes.Select(b => b.ToString("x2"))); + } } private static string ChangeEndianness(string hex)