Code cleanup; additional information

feature-optimized-md5
Inga 🏳‍🌈 7 years ago
parent c66ab408ff
commit 937ce45af2
  1. 9
      README.md
  2. 10
      WhiteRabbit/Program.cs
  3. 31
      WhiteRabbit/StringsProcessor.cs
  4. 29
      WhiteRabbit/VectorsProcessor.cs

@ -13,9 +13,12 @@ WhiteRabbit.exe < wordlist
Performance
===========
Memory usage is minimal (for that kind of task), around 10-30MB.
This solution is partially optimized for multi-threading.
It is also somewhat optimized for likely intended phrases, as anagrams consisting of longer words are generated first.
That's why the given hashes are solved much sooner than it takes to check all anagrams.
Single-threaded performance on Sandy Bridge @2.8GHz is as follows:
@ -29,6 +32,10 @@ Anagrams generation is not parallelized, as even single-threaded performance for
Multi-threaded performance is as follows:
* If only phrases of at most 4 words are allowed, then it takes 20 seconds to find and check all anagrams; all hashes are solved in first 1.5 seconds
* If only phrases of at most 4 words are allowed, then it takes 20 seconds to find and check all anagrams; all hashes are solved in first 1 second.
* If phrases of 5 words are allowed as well, then it takes around half an hour to find and check all anagrams; all hashes are solved in first 25 seconds. Around 50% of time is spent on MD5 computations for correct anagrams, so there is not a lot to optimize further.
* If phrases of 6 words are allowed as well, then "more difficult" hash is solved in 50 seconds, "easiest" in 3.5 minutes, and "hard" in 6 minutes.
* If phrases of 7 words are allowed as well, then "more difficult" hash is solved in 6 minutes.

@ -25,7 +25,6 @@
var stopwatch = new Stopwatch();
stopwatch.Start();
var processor = new StringsProcessor(Encoding.ASCII.GetBytes(SourcePhrase), MaxWordsInPhrase);
var expectedHashes = new[]
{
"e4820b45d2277f3844eac66c903e84be",
@ -35,7 +34,11 @@
var expectedHashesAsVectors = expectedHashes.Select(hash => new Vector<byte>(HexadecimalStringToByteArray(hash))).ToArray();
processor.GeneratePhrases(ReadInput())
var processor = new StringsProcessor(Encoding.ASCII.GetBytes(SourcePhrase), MaxWordsInPhrase, ReadInput());
Console.WriteLine($"Initialization complete; time spent: {stopwatch.Elapsed}");
processor.GeneratePhrases()
.Select(phraseBytes => new { phraseBytes, hashVector = ComputeHashVector(phraseBytes) })
.Where(tuple => expectedHashesAsVectors.Contains(tuple.hashVector))
.Select(tuple => new { phrase = Encoding.ASCII.GetString(tuple.phraseBytes), hash = VectorToHexadecimalString(tuple.hashVector) })
@ -54,6 +57,7 @@
.ToArray();
}
// Bouncy Castle is used instead of standard .NET methods for performance reasons
private static Vector<byte> ComputeHashVector(byte[] input)
{
var digest = new MD5Digest();
@ -75,8 +79,6 @@
{
yield return Encoding.ASCII.GetBytes(line);
}
//System.Threading.Thread.Sleep(10000);
}
}
}

@ -4,40 +4,45 @@
using System.Collections.Generic;
using System.Collections.Immutable;
using System.Linq;
using System.Numerics;
internal class StringsProcessor
{
public StringsProcessor(byte[] sourceString, int maxWordsCount)
public StringsProcessor(byte[] sourceString, int maxWordsCount, IEnumerable<byte[]> words)
{
var filteredSource = sourceString.Where(ch => ch != 32).ToArray();
this.VectorsConverter = new VectorsConverter(filteredSource);
// Dictionary of vectors to array of words represented by this vector
this.VectorsToWords = words
.Distinct(new ByteArrayEqualityComparer())
.Select(word => new { word, vector = this.VectorsConverter.GetVector(word) })
.Where(tuple => tuple.vector != null)
.Select(tuple => new { tuple.word, vector = tuple.vector.Value })
.GroupBy(tuple => tuple.vector)
.ToDictionary(group => group.Key, group => group.Select(tuple => tuple.word).ToArray());
this.VectorsProcessor = new VectorsProcessor(
this.VectorsConverter.GetVector(filteredSource).Value,
maxWordsCount,
this.VectorsToWords.Keys,
this.VectorsConverter.GetString);
}
private VectorsConverter VectorsConverter { get; }
private Dictionary<Vector<byte>, byte[][]> VectorsToWords { get; }
private VectorsProcessor VectorsProcessor { get; }
public ParallelQuery<byte[]> GeneratePhrases(IEnumerable<byte[]> words)
public ParallelQuery<byte[]> GeneratePhrases()
{
// Dictionary of vectors to array of words represented by this vector
var formattedWords = words
.Distinct(new ByteArrayEqualityComparer())
.Select(word => new { word, vector = this.VectorsConverter.GetVector(word) })
.Where(tuple => tuple.vector != null)
.Select(tuple => new { tuple.word, vector = tuple.vector.Value })
.GroupBy(tuple => tuple.vector)
.ToDictionary(group => group.Key, group => group.Select(tuple => tuple.word).ToArray());
// task of finding anagrams could be reduced to the task of finding sequences of dictionary vectors with the target sum
var sums = this.VectorsProcessor.GenerateSequences(formattedWords.Keys);
var sums = this.VectorsProcessor.GenerateSequences();
// converting sequences of vectors to the sequences of words...
var anagramsWords = sums
.Select(sum => ImmutableStack.Create(sum.Select(vector => formattedWords[vector]).ToArray()))
.Select(sum => ImmutableStack.Create(sum.Select(vector => this.VectorsToWords[vector]).ToArray()))
.SelectMany(this.Flatten)
.Select(stack => stack.ToArray());

@ -9,11 +9,18 @@
internal class VectorsProcessor
{
public VectorsProcessor(Vector<byte> target, int maxVectorsCount, Func<Vector<byte>, string> vectorToString)
// Ensure that permutations are precomputed prior to main run, so that processing times will be correct
static VectorsProcessor()
{
PrecomputedPermutationsGenerator.HamiltonianPermutations(0);
}
public VectorsProcessor(Vector<byte> target, int maxVectorsCount, IEnumerable<Vector<byte>> dictionary, Func<Vector<byte>, string> vectorToString)
{
this.Target = target;
this.MaxVectorsCount = maxVectorsCount;
this.VectorToString = vectorToString;
this.Dictionary = ImmutableStack.Create<Vector<byte>>(FilterVectors(dictionary, target).ToArray());
}
/// <summary>
@ -28,16 +35,16 @@
private int MaxVectorsCount { get; }
private ImmutableStack<Vector<byte>> Dictionary { get; }
private Func<Vector<byte>, string> VectorToString { get; }
private long Iterations { get; set; } = 0;
// Produces all sequences of vectors with the target sum
public ParallelQuery<Vector<byte>[]> GenerateSequences(IEnumerable<Vector<byte>> vectors)
public ParallelQuery<Vector<byte>[]> GenerateSequences()
{
var filteredVectors = this.FilterVectors(vectors);
var dictionary = ImmutableStack.Create(filteredVectors.ToArray());
var unorderedSequences = this.GenerateUnorderedSequences(this.Target, ImmutableStack.Create<Vector<byte>>(), dictionary)
var unorderedSequences = this.GenerateUnorderedSequences(this.Target, ImmutableStack.Create<Vector<byte>>(), this.Dictionary)
.AsParallel();
var allSequences = unorderedSequences.SelectMany(this.GeneratePermutations);
@ -50,22 +57,22 @@
// And total number of quintuplets becomes reasonable 1412M.
// Also, it produces the intended results faster (as these are more likely to contain longer words - e.g. "poultry outwits ants" is more likely than "p o u l t r y o u t w i t s a n t s").
// This method basically gives us the 1-norm of the vector in the space rescaled so that the target is [1, 1, ..., 1].
private int GetVectorWeight(Vector<byte> vector)
private static int GetVectorWeight(Vector<byte> vector, Vector<byte> target)
{
var weight = 0;
for (var i = 0; this.Target[i] != 0; i++)
for (var i = 0; target[i] != 0; i++)
{
weight += (720 * vector[i]) / this.Target[i]; // 720 = 6!, so that the result will be a whole number (unless Target[i] > 6)
weight += (720 * vector[i]) / target[i]; // 720 = 6!, so that the result will be a whole number (unless Target[i] > 6)
}
return weight;
}
private IEnumerable<Vector<byte>> FilterVectors(IEnumerable<Vector<byte>> vectors)
private static IEnumerable<Vector<byte>> FilterVectors(IEnumerable<Vector<byte>> vectors, Vector<byte> target)
{
return vectors
.Where(vector => ((this.Target - vector) & Negative) == Vector<byte>.Zero)
.OrderBy(GetVectorWeight);
.Where(vector => ((target - vector) & Negative) == Vector<byte>.Zero)
.OrderBy(vector => GetVectorWeight(vector, target));
}
[Conditional("DEBUG")]

Loading…
Cancel
Save