Further optimizations

feature-optimized-md5
Inga 🏳‍🌈 7 years ago
parent 91f543aa84
commit 760c1b5b13
  1. 10
      README.md
  2. 2
      WhiteRabbit/PrecomputedPermutationsGenerator.cs
  3. 2
      WhiteRabbit/Program.cs
  4. 52
      WhiteRabbit/StringsProcessor.cs
  5. 5
      WhiteRabbit/VectorsConverter.cs
  6. 18
      WhiteRabbit/VectorsProcessor.cs

@ -13,7 +13,7 @@ WhiteRabbit.exe < wordlist
Performance Performance
=========== ===========
Memory usage is minimal (for that kind of task), around 10-30MB. Memory usage is minimal (for that kind of task), around 10-20MB.
It is also somewhat optimized for likely intended phrases, as anagrams consisting of longer words are generated first. It is also somewhat optimized for likely intended phrases, as anagrams consisting of longer words are generated first.
That's why the given hashes are solved much sooner than it takes to check all anagrams. That's why the given hashes are solved much sooner than it takes to check all anagrams.
@ -22,13 +22,13 @@ Anagrams generation is not parallelized, as even single-threaded performance for
Multi-threaded performance with RyuJIT (.NET 4.6, 64-bit system) on quad-core Sandy Bridge @2.8GHz is as follows: Multi-threaded performance with RyuJIT (.NET 4.6, 64-bit system) on quad-core Sandy Bridge @2.8GHz is as follows:
* If only phrases of at most 4 words are allowed, then it takes less than 5.5 seconds to find and check all 7433016 anagrams; all hashes are solved in first 0.7 seconds. * If only phrases of at most 4 words are allowed, then it takes less than 4.5 seconds to find and check all 7433016 anagrams; all hashes are solved in first 0.6 seconds.
* If phrases of 5 words are allowed as well, then it takes around 17 minutes to find and check all anagrams; all hashes are solved in first 25 seconds. Most of time is spent on MD5 computations for correct anagrams, so there is not a lot to optimize further. * If phrases of 5 words are allowed as well, then it takes around 13 minutes to find and check all 1348876896 anagrams; all hashes are solved in first 20 seconds. Most of time is spent on MD5 computations for correct anagrams, so there is not a lot to optimize further.
* If phrases of 6 words are allowed as well, then "more difficult" hash is solved in 30 seconds, "easiest" in 3 minutes, and "hard" in 6 minutes. * If phrases of 6 words are allowed as well, then "more difficult" hash is solved in 20 seconds, "easiest" in 2.5 minutes, and "hard" in 6 minutes.
* If phrases of 7 words are allowed as well, then "more difficult" hash is solved in 3 minutes. * If phrases of 7 words are allowed as well, then "more difficult" hash is solved in 2.5 minutes.
Note that all measurements were done on a Release build; Debug build is significantly slower. Note that all measurements were done on a Release build; Debug build is significantly slower.

@ -15,8 +15,6 @@
PermutationsGenerator.HamiltonianPermutations(5).ToArray(), PermutationsGenerator.HamiltonianPermutations(5).ToArray(),
PermutationsGenerator.HamiltonianPermutations(6).ToArray(), PermutationsGenerator.HamiltonianPermutations(6).ToArray(),
PermutationsGenerator.HamiltonianPermutations(7).ToArray(), PermutationsGenerator.HamiltonianPermutations(7).ToArray(),
PermutationsGenerator.HamiltonianPermutations(8).ToArray(),
PermutationsGenerator.HamiltonianPermutations(9).ToArray(),
}; };
public static IEnumerable<PermutationsGenerator.Permutation> HamiltonianPermutations(int n) public static IEnumerable<PermutationsGenerator.Permutation> HamiltonianPermutations(int n)

@ -46,7 +46,7 @@
processor.GeneratePhrases() processor.GeneratePhrases()
.Select(phraseBytes => new { phraseBytes, hashVector = ComputeHashVector(phraseBytes) }) .Select(phraseBytes => new { phraseBytes, hashVector = ComputeHashVector(phraseBytes) })
.Where(tuple => expectedHashesAsVectors.Contains(tuple.hashVector)) .Where(tuple => Array.IndexOf(expectedHashesAsVectors, tuple.hashVector) >= 0)
.Select(tuple => new { phrase = Encoding.ASCII.GetString(tuple.phraseBytes), hash = VectorToHexadecimalString(tuple.hashVector) }) .Select(tuple => new { phrase = Encoding.ASCII.GetString(tuple.phraseBytes), hash = VectorToHexadecimalString(tuple.hashVector) })
.ForAll(phraseInfo => Console.WriteLine($"Found phrase for {phraseInfo.hash}: {phraseInfo.phrase}; time from start is {stopwatch.Elapsed}")); .ForAll(phraseInfo => Console.WriteLine($"Found phrase for {phraseInfo.hash}: {phraseInfo.phrase}; time from start is {stopwatch.Elapsed}"));

@ -16,18 +16,16 @@
// Dictionary of vectors to array of words represented by this vector // Dictionary of vectors to array of words represented by this vector
this.VectorsToWords = words this.VectorsToWords = words
.Distinct(new ByteArrayEqualityComparer())
.Select(word => new { word, vector = this.VectorsConverter.GetVector(word) }) .Select(word => new { word, vector = this.VectorsConverter.GetVector(word) })
.Where(tuple => tuple.vector != null) .Where(tuple => tuple.vector != null)
.Select(tuple => new { tuple.word, vector = tuple.vector.Value }) .Select(tuple => new { tuple.word, vector = tuple.vector.Value })
.GroupBy(tuple => tuple.vector) .GroupBy(tuple => tuple.vector)
.ToDictionary(group => group.Key, group => group.Select(tuple => tuple.word).ToArray()); .ToDictionary(group => group.Key, group => group.Select(tuple => tuple.word).Distinct(new ByteArrayEqualityComparer()).ToArray());
this.VectorsProcessor = new VectorsProcessor( this.VectorsProcessor = new VectorsProcessor(
this.VectorsConverter.GetVector(filteredSource).Value, this.VectorsConverter.GetVector(filteredSource).Value,
maxWordsCount, maxWordsCount,
this.VectorsToWords.Keys, this.VectorsToWords.Keys);
this.VectorsConverter.GetString);
} }
private VectorsConverter VectorsConverter { get; } private VectorsConverter VectorsConverter { get; }
@ -44,12 +42,10 @@
var sums = this.VectorsProcessor.GenerateSequences(); var sums = this.VectorsProcessor.GenerateSequences();
// converting sequences of vectors to the sequences of words... // converting sequences of vectors to the sequences of words...
var anagramsWords = sums return sums
.Select(sum => ImmutableStack.Create(sum.Select(vector => this.VectorsToWords[vector]).ToArray())) .Select(ConvertVectorsToWords)
.SelectMany(Flatten) .SelectMany(FlattenWords)
.Select(stack => stack.ToArray()); .Select(ConvertWordsToPhrase);
return anagramsWords.Select(WordsToPhrase);
} }
// Converts e.g. pair of variants [[a, b, c], [d, e]] into all possible pairs: [[a, d], [a, e], [b, d], [b, e], [c, d], [c, e]] // Converts e.g. pair of variants [[a, b, c], [d, e]] into all possible pairs: [[a, d], [a, e], [b, d], [b, e], [c, d], [c, e]]
@ -65,19 +61,41 @@
return Flatten(newStack).SelectMany(remainder => wordVariants.Select(word => remainder.Push(word))); return Flatten(newStack).SelectMany(remainder => wordVariants.Select(word => remainder.Push(word)));
} }
private byte[] WordsToPhrase(byte[][] words) private Tuple<int, ImmutableStack<byte[][]>> ConvertVectorsToWords(Vector<byte>[] vectors)
{ {
var result = new byte[this.NumberOfCharacters + words.Length - 1]; var length = vectors.Length;
var words = new byte[length][][];
for (var i = 0; i < length; i++)
{
words[i] = this.VectorsToWords[vectors[i]];
}
return Tuple.Create(length, ImmutableStack.Create(words));
}
Buffer.BlockCopy(words[0], 0, result, 0, words[0].Length); private IEnumerable<Tuple<int, ImmutableStack<byte[]>>> FlattenWords(Tuple<int, ImmutableStack<byte[][]>> wordVariants)
var position = words[0].Length; {
for (var i = 1; i < words.Length; i++) var item1 = wordVariants.Item1;
return Flatten(wordVariants.Item2).Select(words => Tuple.Create(item1, words));
}
private byte[] ConvertWordsToPhrase(Tuple<int, ImmutableStack<byte[]>> words)
{
var wordCount = words.Item1;
var result = new byte[this.NumberOfCharacters + wordCount - 1];
byte[] currentWord;
var currentStack = words.Item2.Pop(out currentWord);
Buffer.BlockCopy(currentWord, 0, result, 0, currentWord.Length);
var position = currentWord.Length;
while (!currentStack.IsEmpty)
{ {
result[position] = 32; result[position] = 32;
position++; position++;
Buffer.BlockCopy(words[i], 0, result, position, words[i].Length); currentStack = currentStack.Pop(out currentWord);
position += words[i].Length; Buffer.BlockCopy(currentWord, 0, result, position, currentWord.Length);
position += currentWord.Length;
} }
return result; return result;

@ -43,10 +43,5 @@
return new Vector<byte>(arr); return new Vector<byte>(arr);
} }
public string GetString(Vector<byte> vector)
{
return new string(Enumerable.Range(0, this.IntToChar.Length).SelectMany(i => Enumerable.Repeat((char)this.IntToChar[i], vector[i])).ToArray());
}
} }
} }

@ -17,7 +17,7 @@
PrecomputedPermutationsGenerator.HamiltonianPermutations(0); PrecomputedPermutationsGenerator.HamiltonianPermutations(0);
} }
public VectorsProcessor(Vector<byte> target, int maxVectorsCount, IEnumerable<Vector<byte>> dictionary, Func<Vector<byte>, string> vectorToString) public VectorsProcessor(Vector<byte> target, int maxVectorsCount, IEnumerable<Vector<byte>> dictionary)
{ {
if (Enumerable.Range(0, Vector<byte>.Count).Any(i => target[i] > MaxComponentValue)) if (Enumerable.Range(0, Vector<byte>.Count).Any(i => target[i] > MaxComponentValue))
{ {
@ -27,7 +27,6 @@
this.Target = target; this.Target = target;
this.MaxVectorsCount = maxVectorsCount; this.MaxVectorsCount = maxVectorsCount;
this.VectorToString = vectorToString;
this.Dictionary = ImmutableArray.Create(FilterVectors(dictionary, target).ToArray()); this.Dictionary = ImmutableArray.Create(FilterVectors(dictionary, target).ToArray());
} }
@ -37,10 +36,6 @@
private ImmutableArray<VectorInfo> Dictionary { get; } private ImmutableArray<VectorInfo> Dictionary { get; }
private Func<Vector<byte>, string> VectorToString { get; }
private long Iterations { get; set; } = 0;
// Produces all sequences of vectors with the target sum // Produces all sequences of vectors with the target sum
public ParallelQuery<Vector<byte>[]> GenerateSequences() public ParallelQuery<Vector<byte>[]> GenerateSequences()
{ {
@ -165,9 +160,16 @@
private static IEnumerable<T[]> GeneratePermutations<T>(T[] original) private static IEnumerable<T[]> GeneratePermutations<T>(T[] original)
{ {
foreach (var permutation in PrecomputedPermutationsGenerator.HamiltonianPermutations(original.Length)) var length = original.Length;
foreach (var permutation in PrecomputedPermutationsGenerator.HamiltonianPermutations(length))
{ {
yield return permutation.Select(i => original[i]).ToArray(); var result = new T[length];
for (var i = 0; i < length; i++)
{
result[i] = original[permutation[i]];
}
yield return result;
} }
} }

Loading…
Cancel
Save