Microoptimization: one part of MD5 is enough for search

unmanaged
Inga 🏳‍🌈 8 years ago
parent 4702fba26b
commit bcd6a1d053
  1. 8
      README.md
  2. 16
      dotnet/WhiteRabbit.UnmanagedBridge/WhiteRabbit.UnmanagedBridge.cpp
  3. 7
      dotnet/WhiteRabbit.UnmanagedBridge/md5.cpp
  4. 20
      dotnet/WhiteRabbit/MD5Digest.cs
  5. 31
      dotnet/WhiteRabbit/Program.cs

@ -43,13 +43,13 @@ Anagrams generation is not parallelized, as even single-threaded performance for
Multi-threaded performance with RyuJIT (.NET 4.6, 64-bit system) on quad-core Sandy Bridge @2.8GHz is as follows (excluding initialization time of 0.2 seconds): Multi-threaded performance with RyuJIT (.NET 4.6, 64-bit system) on quad-core Sandy Bridge @2.8GHz is as follows (excluding initialization time of 0.2 seconds):
* If only phrases of at most 4 words are allowed, then it takes **1.1 seconds** to find and check all 7,433,016 anagrams; **all hashes are solved in first 0.2 seconds**. * If only phrases of at most 4 words are allowed, then it takes **0.9 seconds** to find and check all 7,433,016 anagrams; **all hashes are solved in first 0.15 seconds**.
* If phrases of 5 words are allowed as well, then it takes 2:45 minutes to find and check all 1,348,876,896 anagrams; all hashes are solved in first 4 seconds. * If phrases of 5 words are allowed as well, then it takes around 100 seconds to find and check all 1,348,876,896 anagrams; all hashes are solved in first 2.5 seconds.
* If phrases of 6 words are allowed as well, then it takes less than 2 hours to find and check all 58,837,302,096 anagrams; "more difficult" hash is solved in 3.5 seconds, "easiest" in 21 seconds, and "hard" in 54 seconds. * If phrases of 6 words are allowed as well, then it takes around 75 minutes to find and check all 58,837,302,096 anagrams; "more difficult" hash is solved in 2.5 seconds, "easiest" in 14 seconds, and "hard" in 35 seconds.
* If phrases of 7 words are allowed as well, then it takes 75 seconds to count all 1,108,328,708,976 anagrams, and around 40 hours to find and check all these anagrams; "more difficult" hash is solved in 20 seconds, "easiest" in less than 2.5 minutes, and "hard" in 6:45 minutes. * If phrases of 7 words are allowed as well, then it takes 75 seconds to count all 1,108,328,708,976 anagrams, and around 40 hours (speculatively) to find and check all these anagrams; "more difficult" hash is solved in 13 seconds, "easiest" in 1.5 minutes, and "hard" in 4.5 minutes.
Note that all measurements were done on a Release build; Debug build is significantly slower. Note that all measurements were done on a Release build; Debug build is significantly slower.

@ -7,12 +7,12 @@
void WhiteRabbitUnmanagedBridge::MD5Unmanaged::ComputeMD5(unsigned int * input, unsigned int* output) void WhiteRabbitUnmanagedBridge::MD5Unmanaged::ComputeMD5(unsigned int * input, unsigned int* output)
{ {
md5(input + 0 * 8, output + 0 * 4); md5(input + 0 * 8, output + 0);
md5(input + 1 * 8, output + 1 * 4); md5(input + 1 * 8, output + 1);
md5(input + 2 * 8, output + 2 * 4); md5(input + 2 * 8, output + 2);
md5(input + 3 * 8, output + 3 * 4); md5(input + 3 * 8, output + 3);
md5(input + 4 * 8, output + 4 * 4); md5(input + 4 * 8, output + 4);
md5(input + 5 * 8, output + 5 * 4); md5(input + 5 * 8, output + 5);
md5(input + 6 * 8, output + 6 * 4); md5(input + 6 * 8, output + 6);
md5(input + 7 * 8, output + 7 * 4); md5(input + 7 * 8, output + 7);
} }

@ -80,7 +80,6 @@ inline MD5Word Step4(MD5Word a, MD5Word b, MD5Word c, MD5Word d, MD5Word k)
void md5(unsigned int * input, unsigned int * output) void md5(unsigned int * input, unsigned int * output)
{ {
MD5Word a = 0x67452301; MD5Word a = 0x67452301;
MD5Word b = 0xefcdab89; MD5Word b = 0xefcdab89;
MD5Word c = 0x98badcfe; MD5Word c = 0x98badcfe;
@ -150,13 +149,7 @@ void md5(unsigned int * input, unsigned int * output)
c = Step4<15>(c, d, a, b, 0xa3014314, input[6]); c = Step4<15>(c, d, a, b, 0xa3014314, input[6]);
b = Step4<21>(b, c, d, a, 0x4e0811a1); b = Step4<21>(b, c, d, a, 0x4e0811a1);
a = Step4<6>(a, b, c, d, 0xf7537e82, input[4]); a = Step4<6>(a, b, c, d, 0xf7537e82, input[4]);
d = Step4<10>(d, a, b, c, 0xbd3af235);
c = Step4<15>(c, d, a, b, 0x2ad7d2bb, input[2]);
b = Step4<21>(b, c, d, a, 0xeb86d391);
output[0] = 0x67452301 + a; output[0] = 0x67452301 + a;
output[1] = 0xefcdab89 + b;
output[2] = 0x98badcfe + c;
output[3] = 0x10325476 + d;
} }
#pragma managed #pragma managed

@ -4,30 +4,18 @@ using WhiteRabbitUnmanagedBridge;
namespace WhiteRabbit namespace WhiteRabbit
{ {
/**
* Code taken from BouncyCastle and optimized for specific constraints (e.g. input is always larger than 4 bytes and smaller than 52 bytes).
* Further optimization: input could be assumed to be smaller than 27 bytes (original phrase contains 18 letters, so that allows anagrams of 9 words)
* base implementation of MD4 family style digest as outlined in
* "Handbook of Applied Cryptography", pages 344 - 347.
* implementation of MD5 as outlined in "Handbook of Applied Cryptography", pages 346 - 347.
*/
internal static class MD5Digest internal static class MD5Digest
{ {
// It only returns first component of MD5 hash
[MethodImpl(MethodImplOptions.AggressiveInlining)] [MethodImpl(MethodImplOptions.AggressiveInlining)]
public static unsafe Vector<uint>[] Compute(PhraseSet input) public static unsafe uint[] Compute(PhraseSet input)
{ {
var rawResult = new uint[4 * Constants.PhrasesPerSet]; var result = new uint[Constants.PhrasesPerSet];
fixed (uint* resultPointer = rawResult) fixed (uint* resultPointer = result)
{ {
MD5Unmanaged.ComputeMD5(input.Buffer, resultPointer); MD5Unmanaged.ComputeMD5(input.Buffer, resultPointer);
} }
var result = new Vector<uint>[Constants.PhrasesPerSet];
for (var i = 0; i < Constants.PhrasesPerSet; i++)
{
result[i] = new Vector<uint>(rawResult, 4 * i);
}
return result; return result;
} }
} }

@ -7,6 +7,7 @@
using System.Diagnostics; using System.Diagnostics;
using System.Linq; using System.Linq;
using System.Numerics; using System.Numerics;
using System.Security.Cryptography;
using System.Text; using System.Text;
/// <summary> /// <summary>
@ -49,6 +50,8 @@
.Select(hash => new Vector<uint>(HexadecimalStringToUnsignedIntArray(hash))) .Select(hash => new Vector<uint>(HexadecimalStringToUnsignedIntArray(hash)))
.ToArray(); .ToArray();
var expectedHashesFirstComponents = expectedHashesAsVectors.Select(vector => vector[0]).ToArray();
var processor = new StringsProcessor( var processor = new StringsProcessor(
Encoding.ASCII.GetBytes(sourcePhrase), Encoding.ASCII.GetBytes(sourcePhrase),
maxWordsInPhrase, maxWordsInPhrase,
@ -64,19 +67,19 @@
stopwatch.Restart(); stopwatch.Restart();
processor.GeneratePhrases() processor.GeneratePhrases()
.ForAll(phraseBytes => .ForAll(phraseSet =>
{ {
var hashVectors = MD5Digest.Compute(phraseBytes); var hashesFirstComponents = MD5Digest.Compute(phraseSet);
for (var i = 0; i < Constants.PhrasesPerSet; i++) for (var i = 0; i < Constants.PhrasesPerSet; i++)
{ {
Debug.Assert( Debug.Assert(
sourceChars == ToOrderedChars(ToString(phraseBytes, i)), sourceChars == ToOrderedChars(ToString(phraseSet, i)),
$"StringsProcessor produced incorrect anagram: {ToString(phraseBytes, i)}"); $"StringsProcessor produced incorrect anagram: {ToString(phraseSet, i)}");
if (Array.IndexOf(expectedHashesAsVectors, hashVectors[i]) >= 0) if (Array.IndexOf(expectedHashesFirstComponents, hashesFirstComponents[i]) >= 0)
{ {
var phrase = ToString(phraseBytes, i); var phrase = ToString(phraseSet, i);
var hash = VectorToHexadecimalString(hashVectors[i]); var hash = ComputeFullMD5(phrase);
Console.WriteLine($"Found phrase for {hash}: {phrase}; time from start is {stopwatch.Elapsed}"); Console.WriteLine($"Found phrase for {hash}: {phrase}; time from start is {stopwatch.Elapsed}");
} }
} }
@ -96,13 +99,15 @@
.ToArray(); .ToArray();
} }
private static string VectorToHexadecimalString(Vector<uint> hash) // We can afford to spend some time here; this code will only run for matched phrases (and for one in several billion non-matched)
private static string ComputeFullMD5(string phrase)
{ {
var components = Enumerable.Range(0, 4) var phraseBytes = Encoding.ASCII.GetBytes(phrase);
.Select(i => hash[i].ToString("x8")) using (var hashAlgorithm = new MD5CryptoServiceProvider())
.Select(ChangeEndianness); {
var resultBytes = hashAlgorithm.ComputeHash(phraseBytes);
return string.Concat(components); return string.Concat(resultBytes.Select(b => b.ToString("x2")));
}
} }
private static string ChangeEndianness(string hex) private static string ChangeEndianness(string hex)

Loading…
Cancel
Save