Refactoring: words list moved to constructors

dataflow
Inga 🏳‍🌈 8 years ago
parent e153e20cc6
commit e4a1cc0d23
  1. 4
      WhiteRabbit/Program.cs
  2. 31
      WhiteRabbit/StringsProcessor.cs
  3. 25
      WhiteRabbit/VectorsProcessor.cs

@ -21,7 +21,7 @@
var stopwatch = new Stopwatch();
stopwatch.Start();
var processor = new StringsProcessor("poultry outwits ants", 4);
var processor = new StringsProcessor("poultry outwits ants", 4, ReadInput());
var expectedHashes = new[]
{
"e4820b45d2277f3844eac66c903e84be",
@ -31,7 +31,7 @@
var expectedHashesAsVectors = new HashSet<Vector<byte>>(expectedHashes.Select(hash => new Vector<byte>(StringToByteArray(hash))));
foreach (var result in AddHashes(processor.GeneratePhrases(ReadInput())))
foreach (var result in AddHashes(processor.GeneratePhrases()))
{
if (expectedHashesAsVectors.Contains(result.Item2))
{

@ -3,16 +3,28 @@
using System.Collections.Generic;
using System.Collections.Immutable;
using System.Linq;
using System.Numerics;
internal class StringsProcessor
{
public StringsProcessor(string sourceString, int maxWordsCount)
public StringsProcessor(string sourceString, int maxWordsCount, IEnumerable<string> words)
{
var filteredSource = new string(sourceString.Where(ch => ch != ' ').ToArray());
this.VectorsConverter = new VectorsConverter(filteredSource);
// Dictionary of vectors to array of words represented by this vector
this.VectorsToWords = words
.Distinct()
.Select(word => new { word, vector = this.VectorsConverter.GetVector(word) })
.Where(tuple => tuple.vector != null)
.Select(tuple => new { tuple.word, vector = tuple.vector.Value })
.GroupBy(tuple => tuple.vector)
.ToDictionary(group => group.Key, group => group.Select(tuple => tuple.word).ToArray());
this.VectorsProcessor = new VectorsProcessor(
this.VectorsConverter.GetVector(filteredSource).Value,
maxWordsCount,
this.VectorsToWords.Keys,
this.VectorsConverter.GetString);
}
@ -20,23 +32,16 @@
private VectorsProcessor VectorsProcessor { get; }
public IEnumerable<string> GeneratePhrases(IEnumerable<string> words)
{
// Dictionary of vectors to array of words represented by this vector
var formattedWords = words
.Distinct()
.Select(word => new { word, vector = this.VectorsConverter.GetVector(word) })
.Where(tuple => tuple.vector != null)
.Select(tuple => new { tuple.word, vector = tuple.vector.Value })
.GroupBy(tuple => tuple.vector)
.ToDictionary(group => group.Key, group => group.Select(tuple => tuple.word).ToArray());
private Dictionary<Vector<byte>, string[]> VectorsToWords { get; }
public IEnumerable<string> GeneratePhrases()
{
// task of finding anagrams could be reduced to the task of finding sequences of dictionary vectors with the target sum
var sums = this.VectorsProcessor.GenerateSequences(formattedWords.Keys);
var sums = this.VectorsProcessor.GenerateSequences();
// converting sequences of vectors to the sequences of words...
var anagramsWords = sums
.Select(sum => ImmutableStack.Create(sum.Select(vector => formattedWords[vector]).ToArray()))
.Select(sum => ImmutableStack.Create(sum.Select(vector => this.VectorsToWords[vector]).ToArray()))
.SelectMany(this.Flatten)
.Select(stack => stack.ToArray());

@ -9,11 +9,14 @@
internal class VectorsProcessor
{
public VectorsProcessor(Vector<byte> target, int maxVectorsCount, Func<Vector<byte>, string> vectorToString)
public VectorsProcessor(Vector<byte> target, int maxVectorsCount, IEnumerable<Vector<byte>> vectors, Func<Vector<byte>, string> vectorToString)
{
this.Target = target;
this.MaxVectorsCount = maxVectorsCount;
this.VectorToString = vectorToString;
var filteredVectors = FilterVectors(vectors, target);
this.Vectors = ImmutableStack.Create(filteredVectors.ToArray());
}
/// <summary>
@ -28,16 +31,16 @@
private int MaxVectorsCount { get; }
private ImmutableStack<Vector<byte>> Vectors { get; }
private Func<Vector<byte>, string> VectorToString { get; }
private long Iterations { get; set; } = 0;
// Produces all sequences of vectors with the target sum
public IEnumerable<Vector<byte>[]> GenerateSequences(IEnumerable<Vector<byte>> vectors)
public IEnumerable<Vector<byte>[]> GenerateSequences()
{
var filteredVectors = this.FilterVectors(vectors);
var dictionary = ImmutableStack.Create(filteredVectors.ToArray());
var unorderedSequences = this.GenerateUnorderedSequences(this.Target, ImmutableStack.Create<Vector<byte>>(), dictionary);
var unorderedSequences = this.GenerateUnorderedSequences(this.Target, ImmutableStack.Create<Vector<byte>>(), this.Vectors);
var allSequences = unorderedSequences.SelectMany(this.GeneratePermutations);
return allSequences;
@ -48,22 +51,22 @@
// Total number of quadruplets is reduced from 1468M to mere 311M.
// Also, it produces the intended results faster (as these are more likely to contain longer words - e.g. "poultry outwits ants" is more likely than "p o u l t r y o u t w i t s a n t s").
// This method basically gives us the 1-norm of the vector in the space rescaled so that the target is [1, 1, ..., 1].
private int GetVectorWeight(Vector<byte> vector)
private static int GetVectorWeight(Vector<byte> vector, Vector<byte> target)
{
var weight = 0;
for (var i = 0; this.Target[i] != 0; i++)
for (var i = 0; target[i] != 0; i++)
{
weight += (720 * vector[i]) / this.Target[i]; // 720 = 6!, so that the result will be a whole number (unless Target[i] > 6)
weight += (720 * vector[i]) / target[i]; // 720 = 6!, so that the result will be a whole number (unless Target[i] > 6)
}
return weight;
}
private IEnumerable<Vector<byte>> FilterVectors(IEnumerable<Vector<byte>> vectors)
private static IEnumerable<Vector<byte>> FilterVectors(IEnumerable<Vector<byte>> vectors, Vector<byte> target)
{
return vectors
.Where(vector => ((this.Target - vector) & Negative) == Vector<byte>.Zero)
.OrderBy(GetVectorWeight);
.Where(vector => ((target - vector) & Negative) == Vector<byte>.Zero)
.OrderBy(vector => GetVectorWeight(vector, target));
}
[Conditional("DEBUG")]

Loading…
Cancel
Save