Switched to Parallel LINQ

plinq
Inga 🏳‍🌈 8 years ago
parent 4964fb7673
commit f9151c329d
  1. 14
      README.md
  2. 10
      WhiteRabbit/Program.cs
  3. 2
      WhiteRabbit/StringsProcessor.cs
  4. 5
      WhiteRabbit/VectorsProcessor.cs

@ -13,12 +13,20 @@ WhiteRabbit.exe < wordlist
Performance Performance
=========== ===========
This solution is not optimized for multi-threading. This solution is partially optimized for multi-threading.
Nevertheless, the performance on Sandy Bridge @2.8GHz is as follows: Single-threaded performance on Sandy Bridge @2.8GHz is as follows:
* If only phrases of at most 3 words are allowed, then it takes 2.5 seconds to find and check all anagrams; all relevant hashes are solved in first 0.4 seconds; * If only phrases of at most 3 words are allowed, then it takes 2.5 seconds to find and check all anagrams; all relevant hashes are solved in first 0.4 seconds;
* If phrases of 4 words are allowed as well, then it takes 40 seconds to find and check all anagrams; all hashes are solved in first 3 seconds; * If phrases of 4 words are allowed as well, then it takes 40 seconds to find and check all anagrams; all hashes are solved in first 3 seconds;
For comparison, certain other solutions available on GitHub seem to require 3 hours to find all 3-word anagrams (i.e. this solution is faster by a factor of 4000 in 3-word case). For comparison, certain other solutions available on GitHub seem to require 3 hours to find all 3-word anagrams (i.e. this solution is faster by a factor of 4000 in 3-word case).
Anagrams generation is not parallelized, as even single-threaded performance for 4-word anagrams is high enough; and 5-word (or larger) anagrams are frequent enough for most of the time being spent on computing hashes, with full CPU load.
Multi-threaded performance is as follows:
* If only phrases of at most 4 words are allowed, then it takes 20 seconds to find and check all anagrams; all hashes are solved in first 1.5 seconds
* If phrases of 5 words are allowed as well, then all hashes are solved in first 29 seconds. Around 50% of time is spent on MD5 computations for correct anagrams, so there is not a lot to optimize further.

@ -35,15 +35,11 @@
var expectedHashesAsVectors = expectedHashes.Select(hash => new Vector<byte>(HexadecimalStringToByteArray(hash))).ToArray(); var expectedHashesAsVectors = expectedHashes.Select(hash => new Vector<byte>(HexadecimalStringToByteArray(hash))).ToArray();
var result = processor.GeneratePhrases(ReadInput()) processor.GeneratePhrases(ReadInput())
.Select(phraseBytes => new { phraseBytes, hashVector = ComputeHashVector(phraseBytes) }) .Select(phraseBytes => new { phraseBytes, hashVector = ComputeHashVector(phraseBytes) })
.Where(tuple => expectedHashesAsVectors.Contains(tuple.hashVector)) .Where(tuple => expectedHashesAsVectors.Contains(tuple.hashVector))
.Select(tuple => new { phrase = Encoding.ASCII.GetString(tuple.phraseBytes), hash = VectorToHexadecimalString(tuple.hashVector) }); .Select(tuple => new { phrase = Encoding.ASCII.GetString(tuple.phraseBytes), hash = VectorToHexadecimalString(tuple.hashVector) })
.ForAll(phraseInfo => Console.WriteLine($"Found phrase for {phraseInfo.hash}: {phraseInfo.phrase} (spent {stopwatch.Elapsed})"));
foreach (var phraseInfo in result)
{
Console.WriteLine($"Found phrase for {phraseInfo.hash}: {phraseInfo.phrase} (spent {stopwatch.Elapsed})");
}
stopwatch.Stop(); stopwatch.Stop();
Console.WriteLine($"Total time spent: {stopwatch.Elapsed}"); Console.WriteLine($"Total time spent: {stopwatch.Elapsed}");

@ -21,7 +21,7 @@
private VectorsProcessor VectorsProcessor { get; } private VectorsProcessor VectorsProcessor { get; }
public IEnumerable<byte[]> GeneratePhrases(IEnumerable<byte[]> words) public ParallelQuery<byte[]> GeneratePhrases(IEnumerable<byte[]> words)
{ {
// Dictionary of vectors to array of words represented by this vector // Dictionary of vectors to array of words represented by this vector
var formattedWords = words var formattedWords = words

@ -33,11 +33,12 @@
private long Iterations { get; set; } = 0; private long Iterations { get; set; } = 0;
// Produces all sequences of vectors with the target sum // Produces all sequences of vectors with the target sum
public IEnumerable<Vector<byte>[]> GenerateSequences(IEnumerable<Vector<byte>> vectors) public ParallelQuery<Vector<byte>[]> GenerateSequences(IEnumerable<Vector<byte>> vectors)
{ {
var filteredVectors = this.FilterVectors(vectors); var filteredVectors = this.FilterVectors(vectors);
var dictionary = ImmutableStack.Create(filteredVectors.ToArray()); var dictionary = ImmutableStack.Create(filteredVectors.ToArray());
var unorderedSequences = this.GenerateUnorderedSequences(this.Target, ImmutableStack.Create<Vector<byte>>(), dictionary); var unorderedSequences = this.GenerateUnorderedSequences(this.Target, ImmutableStack.Create<Vector<byte>>(), dictionary)
.AsParallel();
var allSequences = unorderedSequences.SelectMany(this.GeneratePermutations); var allSequences = unorderedSequences.SelectMany(this.GeneratePermutations);
return allSequences; return allSequences;

Loading…
Cancel
Save