Refactored to use vectors

rx
Inga 🏳‍🌈 8 years ago
parent 30a55106cf
commit 2ad1385a20
  1. 127
      WhiteRabbit/Processor.cs
  2. 41
      WhiteRabbit/VectorsConverter.cs
  3. 6
      WhiteRabbit/WhiteRabbit.csproj
  4. 1
      WhiteRabbit/packages.config

@ -4,6 +4,7 @@
using System.Collections.Generic; using System.Collections.Generic;
using System.Collections.Immutable; using System.Collections.Immutable;
using System.Linq; using System.Linq;
using System.Numerics;
internal class Processor internal class Processor
{ {
@ -12,24 +13,17 @@
public Processor(string sourceString, int maxWordsCount) public Processor(string sourceString, int maxWordsCount)
{ {
var rawNumberOfOccurrences = sourceString.Where(ch => ch != ' ').GroupBy(ch => ch).ToDictionary(group => group.Key, group => group.Count()); var filteredSource = new string(sourceString.Where(ch => ch != ' ').ToArray());
this.IntToChar = rawNumberOfOccurrences.Select(kvp => kvp.Key).OrderBy(ch => ch).ToArray(); this.VectorsConverter = new VectorsConverter(filteredSource);
if (this.IntToChar.Length != DifferentChars) this.Target = this.VectorsConverter.GetVector(filteredSource).Value;
{
throw new ArgumentException("Unsupported phrase", nameof(sourceString));
}
this.CharToInt = Enumerable.Range(0, DifferentChars).ToDictionary(i => this.IntToChar[i], i => i);
this.NumberOfOccurrences = Enumerable.Range(0, DifferentChars).Select(i => this.IntToChar[i]).Select(ch => rawNumberOfOccurrences.ContainsKey(ch) ? rawNumberOfOccurrences[ch] : 0).ToArray();
this.MaxWordsCount = maxWordsCount; this.MaxWordsCount = maxWordsCount;
} }
private Dictionary<char, int> CharToInt { get; } private static Vector<byte> Negative { get; } = new Vector<byte>(Enumerable.Repeat((byte)128, 16).ToArray());
private char[] IntToChar { get; }
private int[] NumberOfOccurrences { get; } private VectorsConverter VectorsConverter { get; }
private int TotalCharsNumber { get; } private Vector<byte> Target { get; }
private int MaxWordsCount { get; } private int MaxWordsCount { get; }
@ -37,102 +31,84 @@
public IEnumerable<string> GeneratePhrases(IEnumerable<string> words) public IEnumerable<string> GeneratePhrases(IEnumerable<string> words)
{ {
var filtered = FilterWords(words); var formattedWordsList = FormatWords(words);
var formattedWordsList = FormatWords(filtered);
var formattedWords = formattedWordsList.ToDictionary(tuple => tuple.Item1, tuple => tuple.Item2); var formattedWords = formattedWordsList.ToDictionary(tuple => tuple.Item1, tuple => tuple.Item2);
var dictionary = ImmutableStack.Create(formattedWordsList.Select(tuple => tuple.Item1).ToArray()); var dictionary = ImmutableStack.Create(formattedWordsList.Select(tuple => tuple.Item1).ToArray());
var anagrams = GenerateOrderedPhrases(this.NumberOfOccurrences, ImmutableStack.Create<int[]>(), dictionary); var anagrams = GenerateOrderedPhrases(this.Target, ImmutableStack.Create<Vector<byte>>(), dictionary);
var anagramsWords = anagrams var anagramsWithPermutations = anagrams.SelectMany(GeneratePermutations);
var anagramsWords = anagramsWithPermutations
.Select(list => ImmutableStack.Create(list.Select(wordArray => formattedWords[wordArray]).ToArray())) .Select(list => ImmutableStack.Create(list.Select(wordArray => formattedWords[wordArray]).ToArray()))
.SelectMany(Flatten) .SelectMany(Flatten)
.Select(stack => stack.ToArray()); .Select(stack => stack.ToArray());
return anagramsWords.SelectMany(GeneratePermutations).Select(list => string.Join(" ", list)); return anagramsWords.Select(list => string.Join(" ", list));
} }
private IEnumerable<string> FilterWords(IEnumerable<string> words) private List<Tuple<Vector<byte>, string[]>> FormatWords(IEnumerable<string> words)
{ {
return words return words
.Where(word => word.All(this.CharToInt.ContainsKey))
.OrderBy(word => word)
.Distinct() .Distinct()
.Where(word => word.GroupBy(ch => this.CharToInt[ch]).All(group => group.Count() <= this.NumberOfOccurrences[group.Key])); .Select(word => new { word, vector = this.VectorsConverter.GetVector(word) })
.Where(tuple => tuple.vector != null)
.Select(tuple => new { tuple.word, vector = tuple.vector.Value })
.Where(tuple => ((this.Target - tuple.vector) & Negative) == Vector<byte>.Zero)
.GroupBy(tuple => tuple.vector)
.Select(group => Tuple.Create(group.Key, group.Select(tuple => tuple.word).ToArray()))
.OrderByDescending(tuple => this.VectorsConverter.GetString(tuple.Item1)) //so that letters that are more rare will come first
.ToList();
} }
private int GetWordWeight(int[] word) // This method takes most of the time, so everything related to it must be optimized
private IEnumerable<Vector<byte>[]> GenerateOrderedPhrases(Vector<byte> currentState, ImmutableStack<Vector<byte>> phraseStack, ImmutableStack<Vector<byte>> dictionaryStack)
{ {
var result = 0; var count = phraseStack.Count() + 1;
var orderedChars = Enumerable.Range(0, DifferentChars) if (count < this.MaxWordsCount)
.Select(i => new { Index = i, Count = this.NumberOfOccurrences[i] })
.OrderBy(charInfo => charInfo.Count)
.ThenBy(charInfo => charInfo.Index);
foreach (var charInfo in orderedChars)
{ {
result += word[charInfo.Index]; var remainder = dictionaryStack;
result *= charInfo.Count + 1; while (!remainder.IsEmpty)
} {
Vector<byte> currentWord;
var nextRemainder = remainder.Pop(out currentWord);
return result; this.Iterations++;
if (this.Iterations % 1000000 == 0)
{
Console.WriteLine($"Iteration #{this.Iterations}: {string.Join(" ", phraseStack.Push(currentWord).Reverse().Select(word => this.VectorsConverter.GetString(word)))}");
} }
private List<Tuple<int[], string[]>> FormatWords(IEnumerable<string> filteredWords) var newState = currentState - currentWord;
if (newState == Vector<byte>.Zero)
{ {
return filteredWords yield return phraseStack.Push(currentWord).Reverse().ToArray();
.GroupBy(word => new string(word.OrderBy(ch => ch).ToArray()))
.Select(group => Tuple.Create(Enumerable.Range(0, DifferentChars).Select(i => group.Key.Count(ch => ch == IntToChar[i])).ToArray(), group.ToArray()))
.OrderBy(tuple => GetWordWeight(tuple.Item1)) //so that letters that are more rare will come first
.ToList();
} }
else if ((newState & Negative) == Vector<byte>.Zero)
private int[] GetStatus(int[] originalState, int[] newWord, out int status)
{ {
var tmpArray = new int[DifferentChars]; foreach (var result in GenerateOrderedPhrases(newState, phraseStack.Push(currentWord), remainder))
tmpArray[0] = originalState[0] - newWord[0]; {
tmpArray[1] = originalState[1] - newWord[1]; yield return result;
tmpArray[2] = originalState[2] - newWord[2]; }
tmpArray[3] = originalState[3] - newWord[3];
tmpArray[4] = originalState[4] - newWord[4];
tmpArray[5] = originalState[5] - newWord[5];
tmpArray[6] = originalState[6] - newWord[6];
tmpArray[7] = originalState[7] - newWord[7];
tmpArray[8] = originalState[8] - newWord[8];
tmpArray[9] = originalState[9] - newWord[9];
tmpArray[10] = originalState[10] - newWord[10];
tmpArray[11] = originalState[11] - newWord[11];
// Negative if at least one element is negative; zero if all elements are zero; positive if all elements are non-negative and at least one element is positive
status = tmpArray[0] | tmpArray[1] | tmpArray[2] | tmpArray[3] | tmpArray[4] | tmpArray[5] | tmpArray[6] | tmpArray[7] | tmpArray[8] | tmpArray[9] | tmpArray[10] | tmpArray[11];
return tmpArray;
} }
// This method takes most of the time, so everything related to it must be optimized remainder = nextRemainder;
private IEnumerable<int[][]> GenerateOrderedPhrases(int[] currentState, ImmutableStack<int[]> phraseStack, ImmutableStack<int[]> dictionaryStack) }
}
else if (count == this.MaxWordsCount)
{ {
var remainder = dictionaryStack; var remainder = dictionaryStack;
var count = phraseStack.Count() + 1;
while (!remainder.IsEmpty) while (!remainder.IsEmpty)
{ {
int[] currentWord; Vector<byte> currentWord;
var nextRemainder = remainder.Pop(out currentWord); var nextRemainder = remainder.Pop(out currentWord);
this.Iterations++; this.Iterations++;
if (this.Iterations % 1000000 == 0) if (this.Iterations % 1000000 == 0)
{ {
Console.WriteLine($"Iteration #{this.Iterations}: {string.Join(" ", phraseStack.Push(currentWord).Reverse().Select(word => new string(Enumerable.Range(0, DifferentChars).SelectMany(i => Enumerable.Repeat(IntToChar[i], word[i])).ToArray())))}"); Console.WriteLine($"Iteration #{this.Iterations}: {string.Join(" ", phraseStack.Push(currentWord).Reverse().Select(word => this.VectorsConverter.GetString(word)))}");
} }
int status; var newState = currentState - currentWord;
var state = GetStatus(currentState, currentWord, out status); if (newState == Vector<byte>.Zero)
if (status > 0 && count < this.MaxWordsCount)
{
foreach (var result in GenerateOrderedPhrases(state, phraseStack.Push(currentWord), remainder))
{
yield return result;
}
}
else if (status == 0)
{ {
yield return phraseStack.Push(currentWord).Reverse().ToArray(); yield return phraseStack.Push(currentWord).Reverse().ToArray();
} }
@ -140,8 +116,9 @@
remainder = nextRemainder; remainder = nextRemainder;
} }
} }
}
private IEnumerable<string[]> GeneratePermutations(string[] original) private IEnumerable<T[]> GeneratePermutations<T>(T[] original)
{ {
foreach (var permutation in PermutationsGenerator.HamiltonianPermutations(original.Length)) foreach (var permutation in PermutationsGenerator.HamiltonianPermutations(original.Length))
{ {

@ -0,0 +1,41 @@
namespace WhiteRabbit
{
using System.Collections.Generic;
using System.Linq;
using System.Numerics;
internal class VectorsConverter
{
public VectorsConverter(string sourceString)
{
var rawNumberOfOccurrences = sourceString.GroupBy(ch => ch).ToDictionary(group => group.Key, group => group.Count());
this.IntToChar = rawNumberOfOccurrences.OrderBy(kvp => kvp.Value).ThenBy(kvp => kvp.Key).Select(kvp => kvp.Key).ToArray();
this.CharToInt = Enumerable.Range(0, this.IntToChar.Length).ToDictionary(i => this.IntToChar[i], i => i);
}
private Dictionary<char, int> CharToInt { get; }
private char[] IntToChar { get; }
public Vector<byte>? GetVector(string word)
{
if (word.Any(ch => !this.CharToInt.ContainsKey(ch)))
{
return null;
}
var arr = new byte[16];
foreach (var ch in word)
{
arr[this.CharToInt[ch]]++;
}
return new Vector<byte>(arr);
}
public string GetString(Vector<byte> vector)
{
return new string(Enumerable.Range(0, this.IntToChar.Length).SelectMany(i => Enumerable.Repeat(this.IntToChar[i], (int)vector[i])).ToArray());
}
}
}

@ -40,6 +40,11 @@
<Private>True</Private> <Private>True</Private>
</Reference> </Reference>
<Reference Include="System.Core" /> <Reference Include="System.Core" />
<Reference Include="System.Numerics" />
<Reference Include="System.Numerics.Vectors, Version=4.1.2.0, Culture=neutral, PublicKeyToken=b03f5f7f11d50a3a, processorArchitecture=MSIL">
<HintPath>..\packages\System.Numerics.Vectors.4.3.0\lib\net46\System.Numerics.Vectors.dll</HintPath>
<Private>True</Private>
</Reference>
<Reference Include="System.Xml.Linq" /> <Reference Include="System.Xml.Linq" />
<Reference Include="System.Data.DataSetExtensions" /> <Reference Include="System.Data.DataSetExtensions" />
<Reference Include="Microsoft.CSharp" /> <Reference Include="Microsoft.CSharp" />
@ -52,6 +57,7 @@
<Compile Include="Processor.cs" /> <Compile Include="Processor.cs" />
<Compile Include="Program.cs" /> <Compile Include="Program.cs" />
<Compile Include="Properties\AssemblyInfo.cs" /> <Compile Include="Properties\AssemblyInfo.cs" />
<Compile Include="VectorsConverter.cs" />
</ItemGroup> </ItemGroup>
<ItemGroup> <ItemGroup>
<None Include="App.config" /> <None Include="App.config" />

@ -1,4 +1,5 @@
<?xml version="1.0" encoding="utf-8"?> <?xml version="1.0" encoding="utf-8"?>
<packages> <packages>
<package id="System.Collections.Immutable" version="1.3.1" targetFramework="net46" /> <package id="System.Collections.Immutable" version="1.3.1" targetFramework="net46" />
<package id="System.Numerics.Vectors" version="4.3.0" targetFramework="net46" />
</packages> </packages>
Loading…
Cancel
Save