Compare commits

...

3 Commits

  1. 87
      WhiteRabbit/DataflowBlockHelpers.cs
  2. 67
      WhiteRabbit/Program.cs
  3. 64
      WhiteRabbit/StringsProcessor.cs
  4. 48
      WhiteRabbit/VectorsProcessor.cs
  5. 5
      WhiteRabbit/WhiteRabbit.csproj
  6. 1
      WhiteRabbit/packages.config

@ -0,0 +1,87 @@
namespace WhiteRabbit
{
using System;
using System.Collections.Generic;
using System.Threading.Tasks;
using System.Threading.Tasks.Dataflow;
internal static class DataflowBlockHelpers
{
private static ExecutionDataflowBlockOptions ExecutionOptions { get; } = new ExecutionDataflowBlockOptions
{
BoundedCapacity = 100000,
};
public static IPropagatorBlock<T, T> Id<T>()
{
return new TransformBlock<T, T>(element => element, ExecutionOptions);
}
public static void WriteToTargetBlock<T>(this IEnumerable<T> enumerable, ITargetBlock<T> target)
{
var block = new TransformBlock<T, T>(line => line, ExecutionOptions);
block.LinkForever(target);
WriteToTargetBlockAsync(enumerable, block).Wait();
block.Complete();
}
public static IPropagatorBlock<TInput, TOutput> PipeMany<TInput, TIntermediate, TOutput>(this IPropagatorBlock<TInput, TIntermediate> source, Func<TIntermediate, IEnumerable<TOutput>> mapper)
{
return source.Pipe(new TransformManyBlock<TIntermediate, TOutput>(mapper, ExecutionOptions));
}
public static IPropagatorBlock<TInput, TOutput> Pipe<TInput, TIntermediate, TOutput>(this IPropagatorBlock<TInput, TIntermediate> source, Func<TIntermediate, TOutput> mapper)
{
return source.Pipe(new TransformBlock<TIntermediate, TOutput>(mapper, ExecutionOptions));
}
public static IPropagatorBlock<TInput, TOutput> Pipe<TInput, TIntermediate, TOutput>(this IPropagatorBlock<TInput, TIntermediate> source, IPropagatorBlock<TIntermediate, TOutput> target)
{
source.LinkForever(target);
return DataflowBlock.Encapsulate(source, target);
}
public static ISourceBlock<TOutput> Pipe<TInput, TOutput>(this ISourceBlock<TInput> source, Func<TInput, TOutput> mapper)
{
return source.Pipe(new TransformBlock<TInput, TOutput>(mapper, ExecutionOptions));
}
public static ISourceBlock<TOutput> Pipe<TInput, TOutput>(this ISourceBlock<TInput> source, IPropagatorBlock<TInput, TOutput> target)
{
source.LinkForever(target);
return target;
}
public static Task LinkForever<TOutput>(this ISourceBlock<TOutput> source, Action<TOutput> action)
{
return source.LinkForever(new ActionBlock<TOutput>(action, ExecutionOptions));
}
public static Task LinkForever<TOutput>(this ISourceBlock<TOutput> source, ITargetBlock<TOutput> target)
{
source.LinkTo(target);
source.Completion.ContinueWith(t =>
{
if (t.IsFaulted)
{
target.Fault(t.Exception);
}
else
{
target.Complete();
}
});
return target.Completion;
}
private static async Task WriteToTargetBlockAsync<T>(IEnumerable<T> enumerable, ITargetBlock<T> target)
{
foreach (var element in enumerable)
{
await target.SendAsync(element);
}
}
}
}

@ -18,10 +18,6 @@
/// </summary>
public static void Main()
{
var stopwatch = new Stopwatch();
stopwatch.Start();
var processor = new StringsProcessor("poultry outwits ants", 4);
var expectedHashes = new[]
{
"e4820b45d2277f3844eac66c903e84be",
@ -31,16 +27,50 @@
var expectedHashesAsVectors = new HashSet<Vector<byte>>(expectedHashes.Select(hash => new Vector<byte>(StringToByteArray(hash))));
foreach (var result in AddHashes(processor.GeneratePhrases(ReadInput())))
var stopwatch = new Stopwatch();
stopwatch.Start();
using (var hasher = MD5.Create())
{
if (expectedHashesAsVectors.Contains(result.Item2))
{
Console.WriteLine($"Found phrase: {result.Item1} (spent {stopwatch.Elapsed})");
}
}
stopwatch.Stop();
Console.WriteLine($"Total time spent: {stopwatch.Elapsed}");
var processor = new StringsProcessor("poultry outwits ants", 4, ReadInput());
var startBlock = DataflowBlockHelpers.Id<Vector<byte>[]>();
var task = startBlock
.Pipe(processor.CreateUnorderedSequencesToPhrasesTransform())
.Pipe(phrase =>
{
//Console.WriteLine("Found phrase: " + phrase);
var hash = new Vector<byte>(hasher.ComputeHash(Encoding.ASCII.GetBytes(phrase)));
return new PhraseWithHash(phrase, hash);
})
.PipeMany(phraseWithHash =>
{
//Console.WriteLine($"Found phrase with hash: " + phraseWithHash.Phrase);
if (!expectedHashesAsVectors.Contains(phraseWithHash.Hash))
{
return Enumerable.Empty<PhraseWithHash>();
}
return new PhraseWithHash[]
{
phraseWithHash,
};
})
.LinkForever(phraseWithHash =>
{
Console.WriteLine($"Found phrase for hash {phraseWithHash.Hash}: {phraseWithHash.Phrase} (spent {stopwatch.Elapsed})");
});
Console.WriteLine($"Initialization complete: time spent: {stopwatch.Elapsed}");
processor.PostUnorderedSequences(startBlock);
task.Wait();
Console.WriteLine($"Total time spent: {stopwatch.Elapsed}");
}
}
// Code taken from http://stackoverflow.com/a/321404/831314
@ -72,5 +102,18 @@
yield return line;
}
}
private class PhraseWithHash
{
public PhraseWithHash(string phrase, Vector<byte> hash)
{
this.Phrase = phrase;
this.Hash = hash;
}
public string Phrase { get; }
public Vector<byte> Hash { get; }
}
}
}

@ -3,27 +3,17 @@
using System.Collections.Generic;
using System.Collections.Immutable;
using System.Linq;
using System.Numerics;
using System.Threading.Tasks.Dataflow;
internal class StringsProcessor
{
public StringsProcessor(string sourceString, int maxWordsCount)
public StringsProcessor(string sourceString, int maxWordsCount, IEnumerable<string> words)
{
var filteredSource = new string(sourceString.Where(ch => ch != ' ').ToArray());
this.VectorsConverter = new VectorsConverter(filteredSource);
this.VectorsProcessor = new VectorsProcessor(
this.VectorsConverter.GetVector(filteredSource).Value,
maxWordsCount,
this.VectorsConverter.GetString);
}
private VectorsConverter VectorsConverter { get; }
private VectorsProcessor VectorsProcessor { get; }
public IEnumerable<string> GeneratePhrases(IEnumerable<string> words)
{
// Dictionary of vectors to array of words represented by this vector
var formattedWords = words
this.VectorsToWords = words
.Distinct()
.Select(word => new { word, vector = this.VectorsConverter.GetVector(word) })
.Where(tuple => tuple.vector != null)
@ -31,16 +21,31 @@
.GroupBy(tuple => tuple.vector)
.ToDictionary(group => group.Key, group => group.Select(tuple => tuple.word).ToArray());
// task of finding anagrams could be reduced to the task of finding sequences of dictionary vectors with the target sum
var sums = this.VectorsProcessor.GenerateSequences(formattedWords.Keys);
this.VectorsProcessor = new VectorsProcessor(
this.VectorsConverter.GetVector(filteredSource).Value,
maxWordsCount,
this.VectorsToWords.Keys,
this.VectorsConverter.GetString);
}
// converting sequences of vectors to the sequences of words...
var anagramsWords = sums
.Select(sum => ImmutableStack.Create(sum.Select(vector => formattedWords[vector]).ToArray()))
.SelectMany(this.Flatten)
.Select(stack => stack.ToArray());
private VectorsProcessor VectorsProcessor { get; }
return anagramsWords.Select(list => string.Join(" ", list));
private VectorsConverter VectorsConverter { get; }
private Dictionary<Vector<byte>, string[]> VectorsToWords { get; }
public void PostUnorderedSequences(ITargetBlock<Vector<byte>[]> target)
{
this.VectorsProcessor.GenerateUnorderedSequences().WriteToTargetBlock(target);
}
public IPropagatorBlock<Vector<byte>[], string> CreateUnorderedSequencesToPhrasesTransform()
{
return DataflowBlockHelpers.Id<Vector<byte>[]>()
.PipeMany(this.VectorsProcessor.UnorderedSequenceToOrderedSequences)
.Pipe(this.OrderedSequenceToWordVariants)
.PipeMany(this.WordVariantsToFlatWords)
.Pipe(this.FlatWordsToPhrase);
}
// Converts e.g. pair of variants [[a, b, c], [d, e]] into all possible pairs: [[a, d], [a, e], [b, d], [b, e], [c, d], [c, e]]
@ -55,5 +60,20 @@
var newStack = phrase.Pop(out wordVariants);
return this.Flatten(newStack).SelectMany(remainder => wordVariants.Select(word => remainder.Push(word)));
}
private ImmutableStack<string[]> OrderedSequenceToWordVariants(Vector<byte>[] sum)
{
return ImmutableStack.CreateRange(sum.Select(vector => this.VectorsToWords[vector]));
}
private IEnumerable<ImmutableStack<string>> WordVariantsToFlatWords(ImmutableStack<string[]> wordVariants)
{
return this.Flatten(wordVariants);
}
private string FlatWordsToPhrase(ImmutableStack<string> words)
{
return string.Join(" ", words);
}
}
}

@ -6,14 +6,18 @@
using System.Diagnostics;
using System.Linq;
using System.Numerics;
using System.Threading.Tasks;
using System.Threading.Tasks.Dataflow;
internal class VectorsProcessor
{
public VectorsProcessor(Vector<byte> target, int maxVectorsCount, Func<Vector<byte>, string> vectorToString)
public VectorsProcessor(Vector<byte> target, int maxVectorsCount, IEnumerable<Vector<byte>> vectors, Func<Vector<byte>, string> vectorToString)
{
this.Target = target;
this.MaxVectorsCount = maxVectorsCount;
this.VectorToString = vectorToString;
var filteredVectors = FilterVectors(vectors, target);
this.Vectors = ImmutableStack.Create(filteredVectors.ToArray());
}
/// <summary>
@ -28,19 +32,23 @@
private int MaxVectorsCount { get; }
private ImmutableStack<Vector<byte>> Vectors { get; }
private Func<Vector<byte>, string> VectorToString { get; }
private long Iterations { get; set; } = 0;
// Produces all sequences of vectors with the target sum
public IEnumerable<Vector<byte>[]> GenerateSequences(IEnumerable<Vector<byte>> vectors)
public IEnumerable<Vector<byte>[]> UnorderedSequenceToOrderedSequences(Vector<byte>[] sequence)
{
var filteredVectors = this.FilterVectors(vectors);
var dictionary = ImmutableStack.Create(filteredVectors.ToArray());
var unorderedSequences = this.GenerateUnorderedSequences(this.Target, ImmutableStack.Create<Vector<byte>>(), dictionary);
var allSequences = unorderedSequences.SelectMany(this.GeneratePermutations);
foreach (var permutation in PrecomputedPermutationsGenerator.HamiltonianPermutations(sequence.Length))
{
yield return permutation.Select(i => sequence[i]).ToArray();
}
}
return allSequences;
public IEnumerable<Vector<byte>[]> GenerateUnorderedSequences()
{
return this.GenerateUnorderedSequences(this.Target, ImmutableStack.Create<Vector<byte>>(), this.Vectors);
}
// We want words with more letters (and among these, words with more "rare" letters) to appear first, to reduce the searching time somewhat.
@ -48,25 +56,25 @@
// Total number of quadruplets is reduced from 1468M to mere 311M.
// Also, it produces the intended results faster (as these are more likely to contain longer words - e.g. "poultry outwits ants" is more likely than "p o u l t r y o u t w i t s a n t s").
// This method basically gives us the 1-norm of the vector in the space rescaled so that the target is [1, 1, ..., 1].
private int GetVectorWeight(Vector<byte> vector)
private static int GetVectorWeight(Vector<byte> vector, Vector<byte> target)
{
var weight = 0;
for (var i = 0; this.Target[i] != 0; i++)
for (var i = 0; target[i] != 0; i++)
{
weight += (720 * vector[i]) / this.Target[i]; // 720 = 6!, so that the result will be a whole number (unless Target[i] > 6)
weight += (720 * vector[i]) / target[i]; // 720 = 6!, so that the result will be a whole number (unless Target[i] > 6)
}
return weight;
}
private IEnumerable<Vector<byte>> FilterVectors(IEnumerable<Vector<byte>> vectors)
private static IEnumerable<Vector<byte>> FilterVectors(IEnumerable<Vector<byte>> vectors, Vector<byte> target)
{
return vectors
.Where(vector => ((this.Target - vector) & Negative) == Vector<byte>.Zero)
.OrderBy(GetVectorWeight);
.Where(vector => ((target - vector) & Negative) == Vector<byte>.Zero)
.OrderBy(vector => GetVectorWeight(vector, target));
}
[Conditional("DEBUG")]
[Conditional("XDEBUG")]
private void DebugState(ImmutableStack<Vector<byte>> partialSumStack, Vector<byte> currentVector)
{
this.Iterations++;
@ -127,13 +135,5 @@
}
}
}
private IEnumerable<T[]> GeneratePermutations<T>(T[] original)
{
foreach (var permutation in PrecomputedPermutationsGenerator.HamiltonianPermutations(original.Length))
{
yield return permutation.Select(i => original[i]).ToArray();
}
}
}
}

@ -45,6 +45,10 @@
<HintPath>..\packages\System.Numerics.Vectors.4.3.0\lib\net46\System.Numerics.Vectors.dll</HintPath>
<Private>True</Private>
</Reference>
<Reference Include="System.Threading.Tasks.Dataflow, Version=4.5.24.0, Culture=neutral, PublicKeyToken=b03f5f7f11d50a3a, processorArchitecture=MSIL">
<HintPath>..\packages\Microsoft.Tpl.Dataflow.4.5.24\lib\portable-net45+win8+wpa81\System.Threading.Tasks.Dataflow.dll</HintPath>
<Private>True</Private>
</Reference>
<Reference Include="System.Xml.Linq" />
<Reference Include="System.Data.DataSetExtensions" />
<Reference Include="Microsoft.CSharp" />
@ -53,6 +57,7 @@
<Reference Include="System.Xml" />
</ItemGroup>
<ItemGroup>
<Compile Include="DataflowBlockHelpers.cs" />
<Compile Include="PrecomputedPermutationsGenerator.cs" />
<Compile Include="PermutationsGenerator.cs" />
<Compile Include="StringsProcessor.cs" />

@ -1,5 +1,6 @@
<?xml version="1.0" encoding="utf-8"?>
<packages>
<package id="Microsoft.Tpl.Dataflow" version="4.5.24" targetFramework="net46" />
<package id="System.Collections.Immutable" version="1.3.1" targetFramework="net46" />
<package id="System.Numerics.Vectors" version="4.3.0" targetFramework="net46" />
</packages>
Loading…
Cancel
Save