diff --git a/README.md b/README.md index 3349d8c..6020463 100644 --- a/README.md +++ b/README.md @@ -13,9 +13,12 @@ WhiteRabbit.exe < wordlist Performance =========== +Memory usage is minimal (for that kind of task), around 10-30MB. + This solution is partially optimized for multi-threading. It is also somewhat optimized for likely intended phrases, as anagrams consisting of longer words are generated first. +That's why the given hashes are solved much sooner than it takes to check all anagrams. Single-threaded performance on Sandy Bridge @2.8GHz is as follows: @@ -29,6 +32,10 @@ Anagrams generation is not parallelized, as even single-threaded performance for Multi-threaded performance is as follows: -* If only phrases of at most 4 words are allowed, then it takes 20 seconds to find and check all anagrams; all hashes are solved in first 1.5 seconds +* If only phrases of at most 4 words are allowed, then it takes 20 seconds to find and check all anagrams; all hashes are solved in first 1 second. * If phrases of 5 words are allowed as well, then it takes around half an hour to find and check all anagrams; all hashes are solved in first 25 seconds. Around 50% of time is spent on MD5 computations for correct anagrams, so there is not a lot to optimize further. + +* If phrases of 6 words are allowed as well, then "more difficult" hash is solved in 50 seconds, "easiest" in 3.5 minutes, and "hard" in 6 minutes. + +* If phrases of 7 words are allowed as well, then "more difficult" hash is solved in 6 minutes. diff --git a/WhiteRabbit/Program.cs b/WhiteRabbit/Program.cs index 526555a..aa07566 100644 --- a/WhiteRabbit/Program.cs +++ b/WhiteRabbit/Program.cs @@ -25,7 +25,6 @@ var stopwatch = new Stopwatch(); stopwatch.Start(); - var processor = new StringsProcessor(Encoding.ASCII.GetBytes(SourcePhrase), MaxWordsInPhrase); var expectedHashes = new[] { "e4820b45d2277f3844eac66c903e84be", @@ -35,7 +34,11 @@ var expectedHashesAsVectors = expectedHashes.Select(hash => new Vector(HexadecimalStringToByteArray(hash))).ToArray(); - processor.GeneratePhrases(ReadInput()) + var processor = new StringsProcessor(Encoding.ASCII.GetBytes(SourcePhrase), MaxWordsInPhrase, ReadInput()); + + Console.WriteLine($"Initialization complete; time spent: {stopwatch.Elapsed}"); + + processor.GeneratePhrases() .Select(phraseBytes => new { phraseBytes, hashVector = ComputeHashVector(phraseBytes) }) .Where(tuple => expectedHashesAsVectors.Contains(tuple.hashVector)) .Select(tuple => new { phrase = Encoding.ASCII.GetString(tuple.phraseBytes), hash = VectorToHexadecimalString(tuple.hashVector) }) @@ -54,6 +57,7 @@ .ToArray(); } + // Bouncy Castle is used instead of standard .NET methods for performance reasons private static Vector ComputeHashVector(byte[] input) { var digest = new MD5Digest(); @@ -75,8 +79,6 @@ { yield return Encoding.ASCII.GetBytes(line); } - - //System.Threading.Thread.Sleep(10000); } } } diff --git a/WhiteRabbit/StringsProcessor.cs b/WhiteRabbit/StringsProcessor.cs index 474e614..884c8bf 100644 --- a/WhiteRabbit/StringsProcessor.cs +++ b/WhiteRabbit/StringsProcessor.cs @@ -4,40 +4,45 @@ using System.Collections.Generic; using System.Collections.Immutable; using System.Linq; + using System.Numerics; internal class StringsProcessor { - public StringsProcessor(byte[] sourceString, int maxWordsCount) + public StringsProcessor(byte[] sourceString, int maxWordsCount, IEnumerable words) { var filteredSource = sourceString.Where(ch => ch != 32).ToArray(); this.VectorsConverter = new VectorsConverter(filteredSource); + + // Dictionary of vectors to array of words represented by this vector + this.VectorsToWords = words + .Distinct(new ByteArrayEqualityComparer()) + .Select(word => new { word, vector = this.VectorsConverter.GetVector(word) }) + .Where(tuple => tuple.vector != null) + .Select(tuple => new { tuple.word, vector = tuple.vector.Value }) + .GroupBy(tuple => tuple.vector) + .ToDictionary(group => group.Key, group => group.Select(tuple => tuple.word).ToArray()); + this.VectorsProcessor = new VectorsProcessor( this.VectorsConverter.GetVector(filteredSource).Value, maxWordsCount, + this.VectorsToWords.Keys, this.VectorsConverter.GetString); } private VectorsConverter VectorsConverter { get; } + private Dictionary, byte[][]> VectorsToWords { get; } + private VectorsProcessor VectorsProcessor { get; } - public ParallelQuery GeneratePhrases(IEnumerable words) + public ParallelQuery GeneratePhrases() { - // Dictionary of vectors to array of words represented by this vector - var formattedWords = words - .Distinct(new ByteArrayEqualityComparer()) - .Select(word => new { word, vector = this.VectorsConverter.GetVector(word) }) - .Where(tuple => tuple.vector != null) - .Select(tuple => new { tuple.word, vector = tuple.vector.Value }) - .GroupBy(tuple => tuple.vector) - .ToDictionary(group => group.Key, group => group.Select(tuple => tuple.word).ToArray()); - // task of finding anagrams could be reduced to the task of finding sequences of dictionary vectors with the target sum - var sums = this.VectorsProcessor.GenerateSequences(formattedWords.Keys); + var sums = this.VectorsProcessor.GenerateSequences(); // converting sequences of vectors to the sequences of words... var anagramsWords = sums - .Select(sum => ImmutableStack.Create(sum.Select(vector => formattedWords[vector]).ToArray())) + .Select(sum => ImmutableStack.Create(sum.Select(vector => this.VectorsToWords[vector]).ToArray())) .SelectMany(this.Flatten) .Select(stack => stack.ToArray()); diff --git a/WhiteRabbit/VectorsProcessor.cs b/WhiteRabbit/VectorsProcessor.cs index f7f738b..4a7eaa2 100644 --- a/WhiteRabbit/VectorsProcessor.cs +++ b/WhiteRabbit/VectorsProcessor.cs @@ -9,11 +9,18 @@ internal class VectorsProcessor { - public VectorsProcessor(Vector target, int maxVectorsCount, Func, string> vectorToString) + // Ensure that permutations are precomputed prior to main run, so that processing times will be correct + static VectorsProcessor() + { + PrecomputedPermutationsGenerator.HamiltonianPermutations(0); + } + + public VectorsProcessor(Vector target, int maxVectorsCount, IEnumerable> dictionary, Func, string> vectorToString) { this.Target = target; this.MaxVectorsCount = maxVectorsCount; this.VectorToString = vectorToString; + this.Dictionary = ImmutableStack.Create>(FilterVectors(dictionary, target).ToArray()); } /// @@ -28,16 +35,16 @@ private int MaxVectorsCount { get; } + private ImmutableStack> Dictionary { get; } + private Func, string> VectorToString { get; } private long Iterations { get; set; } = 0; // Produces all sequences of vectors with the target sum - public ParallelQuery[]> GenerateSequences(IEnumerable> vectors) + public ParallelQuery[]> GenerateSequences() { - var filteredVectors = this.FilterVectors(vectors); - var dictionary = ImmutableStack.Create(filteredVectors.ToArray()); - var unorderedSequences = this.GenerateUnorderedSequences(this.Target, ImmutableStack.Create>(), dictionary) + var unorderedSequences = this.GenerateUnorderedSequences(this.Target, ImmutableStack.Create>(), this.Dictionary) .AsParallel(); var allSequences = unorderedSequences.SelectMany(this.GeneratePermutations); @@ -50,22 +57,22 @@ // And total number of quintuplets becomes reasonable 1412M. // Also, it produces the intended results faster (as these are more likely to contain longer words - e.g. "poultry outwits ants" is more likely than "p o u l t r y o u t w i t s a n t s"). // This method basically gives us the 1-norm of the vector in the space rescaled so that the target is [1, 1, ..., 1]. - private int GetVectorWeight(Vector vector) + private static int GetVectorWeight(Vector vector, Vector target) { var weight = 0; - for (var i = 0; this.Target[i] != 0; i++) + for (var i = 0; target[i] != 0; i++) { - weight += (720 * vector[i]) / this.Target[i]; // 720 = 6!, so that the result will be a whole number (unless Target[i] > 6) + weight += (720 * vector[i]) / target[i]; // 720 = 6!, so that the result will be a whole number (unless Target[i] > 6) } return weight; } - private IEnumerable> FilterVectors(IEnumerable> vectors) + private static IEnumerable> FilterVectors(IEnumerable> vectors, Vector target) { return vectors - .Where(vector => ((this.Target - vector) & Negative) == Vector.Zero) - .OrderBy(GetVectorWeight); + .Where(vector => ((target - vector) & Negative) == Vector.Zero) + .OrderBy(vector => GetVectorWeight(vector, target)); } [Conditional("DEBUG")]