Refactoring: words list moved to constructors

dataflow
Inga 🏳‍🌈 8 years ago
parent e153e20cc6
commit e4a1cc0d23
  1. 4
      WhiteRabbit/Program.cs
  2. 31
      WhiteRabbit/StringsProcessor.cs
  3. 25
      WhiteRabbit/VectorsProcessor.cs

@ -21,7 +21,7 @@
var stopwatch = new Stopwatch(); var stopwatch = new Stopwatch();
stopwatch.Start(); stopwatch.Start();
var processor = new StringsProcessor("poultry outwits ants", 4); var processor = new StringsProcessor("poultry outwits ants", 4, ReadInput());
var expectedHashes = new[] var expectedHashes = new[]
{ {
"e4820b45d2277f3844eac66c903e84be", "e4820b45d2277f3844eac66c903e84be",
@ -31,7 +31,7 @@
var expectedHashesAsVectors = new HashSet<Vector<byte>>(expectedHashes.Select(hash => new Vector<byte>(StringToByteArray(hash)))); var expectedHashesAsVectors = new HashSet<Vector<byte>>(expectedHashes.Select(hash => new Vector<byte>(StringToByteArray(hash))));
foreach (var result in AddHashes(processor.GeneratePhrases(ReadInput()))) foreach (var result in AddHashes(processor.GeneratePhrases()))
{ {
if (expectedHashesAsVectors.Contains(result.Item2)) if (expectedHashesAsVectors.Contains(result.Item2))
{ {

@ -3,16 +3,28 @@
using System.Collections.Generic; using System.Collections.Generic;
using System.Collections.Immutable; using System.Collections.Immutable;
using System.Linq; using System.Linq;
using System.Numerics;
internal class StringsProcessor internal class StringsProcessor
{ {
public StringsProcessor(string sourceString, int maxWordsCount) public StringsProcessor(string sourceString, int maxWordsCount, IEnumerable<string> words)
{ {
var filteredSource = new string(sourceString.Where(ch => ch != ' ').ToArray()); var filteredSource = new string(sourceString.Where(ch => ch != ' ').ToArray());
this.VectorsConverter = new VectorsConverter(filteredSource); this.VectorsConverter = new VectorsConverter(filteredSource);
// Dictionary of vectors to array of words represented by this vector
this.VectorsToWords = words
.Distinct()
.Select(word => new { word, vector = this.VectorsConverter.GetVector(word) })
.Where(tuple => tuple.vector != null)
.Select(tuple => new { tuple.word, vector = tuple.vector.Value })
.GroupBy(tuple => tuple.vector)
.ToDictionary(group => group.Key, group => group.Select(tuple => tuple.word).ToArray());
this.VectorsProcessor = new VectorsProcessor( this.VectorsProcessor = new VectorsProcessor(
this.VectorsConverter.GetVector(filteredSource).Value, this.VectorsConverter.GetVector(filteredSource).Value,
maxWordsCount, maxWordsCount,
this.VectorsToWords.Keys,
this.VectorsConverter.GetString); this.VectorsConverter.GetString);
} }
@ -20,23 +32,16 @@
private VectorsProcessor VectorsProcessor { get; } private VectorsProcessor VectorsProcessor { get; }
public IEnumerable<string> GeneratePhrases(IEnumerable<string> words) private Dictionary<Vector<byte>, string[]> VectorsToWords { get; }
{
// Dictionary of vectors to array of words represented by this vector
var formattedWords = words
.Distinct()
.Select(word => new { word, vector = this.VectorsConverter.GetVector(word) })
.Where(tuple => tuple.vector != null)
.Select(tuple => new { tuple.word, vector = tuple.vector.Value })
.GroupBy(tuple => tuple.vector)
.ToDictionary(group => group.Key, group => group.Select(tuple => tuple.word).ToArray());
public IEnumerable<string> GeneratePhrases()
{
// task of finding anagrams could be reduced to the task of finding sequences of dictionary vectors with the target sum // task of finding anagrams could be reduced to the task of finding sequences of dictionary vectors with the target sum
var sums = this.VectorsProcessor.GenerateSequences(formattedWords.Keys); var sums = this.VectorsProcessor.GenerateSequences();
// converting sequences of vectors to the sequences of words... // converting sequences of vectors to the sequences of words...
var anagramsWords = sums var anagramsWords = sums
.Select(sum => ImmutableStack.Create(sum.Select(vector => formattedWords[vector]).ToArray())) .Select(sum => ImmutableStack.Create(sum.Select(vector => this.VectorsToWords[vector]).ToArray()))
.SelectMany(this.Flatten) .SelectMany(this.Flatten)
.Select(stack => stack.ToArray()); .Select(stack => stack.ToArray());

@ -9,11 +9,14 @@
internal class VectorsProcessor internal class VectorsProcessor
{ {
public VectorsProcessor(Vector<byte> target, int maxVectorsCount, Func<Vector<byte>, string> vectorToString) public VectorsProcessor(Vector<byte> target, int maxVectorsCount, IEnumerable<Vector<byte>> vectors, Func<Vector<byte>, string> vectorToString)
{ {
this.Target = target; this.Target = target;
this.MaxVectorsCount = maxVectorsCount; this.MaxVectorsCount = maxVectorsCount;
this.VectorToString = vectorToString; this.VectorToString = vectorToString;
var filteredVectors = FilterVectors(vectors, target);
this.Vectors = ImmutableStack.Create(filteredVectors.ToArray());
} }
/// <summary> /// <summary>
@ -28,16 +31,16 @@
private int MaxVectorsCount { get; } private int MaxVectorsCount { get; }
private ImmutableStack<Vector<byte>> Vectors { get; }
private Func<Vector<byte>, string> VectorToString { get; } private Func<Vector<byte>, string> VectorToString { get; }
private long Iterations { get; set; } = 0; private long Iterations { get; set; } = 0;
// Produces all sequences of vectors with the target sum // Produces all sequences of vectors with the target sum
public IEnumerable<Vector<byte>[]> GenerateSequences(IEnumerable<Vector<byte>> vectors) public IEnumerable<Vector<byte>[]> GenerateSequences()
{ {
var filteredVectors = this.FilterVectors(vectors); var unorderedSequences = this.GenerateUnorderedSequences(this.Target, ImmutableStack.Create<Vector<byte>>(), this.Vectors);
var dictionary = ImmutableStack.Create(filteredVectors.ToArray());
var unorderedSequences = this.GenerateUnorderedSequences(this.Target, ImmutableStack.Create<Vector<byte>>(), dictionary);
var allSequences = unorderedSequences.SelectMany(this.GeneratePermutations); var allSequences = unorderedSequences.SelectMany(this.GeneratePermutations);
return allSequences; return allSequences;
@ -48,22 +51,22 @@
// Total number of quadruplets is reduced from 1468M to mere 311M. // Total number of quadruplets is reduced from 1468M to mere 311M.
// Also, it produces the intended results faster (as these are more likely to contain longer words - e.g. "poultry outwits ants" is more likely than "p o u l t r y o u t w i t s a n t s"). // Also, it produces the intended results faster (as these are more likely to contain longer words - e.g. "poultry outwits ants" is more likely than "p o u l t r y o u t w i t s a n t s").
// This method basically gives us the 1-norm of the vector in the space rescaled so that the target is [1, 1, ..., 1]. // This method basically gives us the 1-norm of the vector in the space rescaled so that the target is [1, 1, ..., 1].
private int GetVectorWeight(Vector<byte> vector) private static int GetVectorWeight(Vector<byte> vector, Vector<byte> target)
{ {
var weight = 0; var weight = 0;
for (var i = 0; this.Target[i] != 0; i++) for (var i = 0; target[i] != 0; i++)
{ {
weight += (720 * vector[i]) / this.Target[i]; // 720 = 6!, so that the result will be a whole number (unless Target[i] > 6) weight += (720 * vector[i]) / target[i]; // 720 = 6!, so that the result will be a whole number (unless Target[i] > 6)
} }
return weight; return weight;
} }
private IEnumerable<Vector<byte>> FilterVectors(IEnumerable<Vector<byte>> vectors) private static IEnumerable<Vector<byte>> FilterVectors(IEnumerable<Vector<byte>> vectors, Vector<byte> target)
{ {
return vectors return vectors
.Where(vector => ((this.Target - vector) & Negative) == Vector<byte>.Zero) .Where(vector => ((target - vector) & Negative) == Vector<byte>.Zero)
.OrderBy(GetVectorWeight); .OrderBy(vector => GetVectorWeight(vector, target));
} }
[Conditional("DEBUG")] [Conditional("DEBUG")]

Loading…
Cancel
Save