Refactored vector-to-words conversion to lower-level code

master
Inga 🏳‍🌈 8 years ago
parent 0090bce443
commit cbb7ccb59b
  1. 10
      dotnet/WhiteRabbit/PhraseSet.cs
  2. 34
      dotnet/WhiteRabbit/StringsProcessor.cs
  3. 9
      dotnet/WhiteRabbit/Word.cs

@ -7,20 +7,20 @@
{ {
public long[] Buffer; public long[] Buffer;
public unsafe PhraseSet(Word[] words, ulong[] permutations, int offset, int numberOfCharacters) public unsafe PhraseSet(Word[] allWords, int[] wordIndexes, ulong[] permutations, int permutationOffset, int numberOfCharacters)
{ {
Debug.Assert(numberOfCharacters + words.Length - 1 < 27); Debug.Assert(numberOfCharacters + wordIndexes.Length - 1 < 27);
this.Buffer = new long[4 * Constants.PhrasesPerSet]; this.Buffer = new long[4 * Constants.PhrasesPerSet];
fixed (long* bufferPointer = this.Buffer) fixed (long* bufferPointer = this.Buffer)
{ {
long* longBuffer = (long*)bufferPointer; long* longBuffer = (long*)bufferPointer;
int numberOfWords = words.Length; int numberOfWords = wordIndexes.Length;
fixed (ulong* permutationsPointer = permutations) fixed (ulong* permutationsPointer = permutations)
{ {
var currentPermutationPointer = permutationsPointer + offset; var currentPermutationPointer = permutationsPointer + permutationOffset;
for (var i = 0; i < Constants.PhrasesPerSet; i++, currentPermutationPointer++) for (var i = 0; i < Constants.PhrasesPerSet; i++, currentPermutationPointer++)
{ {
var permutation = *currentPermutationPointer; var permutation = *currentPermutationPointer;
@ -32,7 +32,7 @@
var cumulativeWordOffsetX4 = 0; var cumulativeWordOffsetX4 = 0;
for (var j = 0; j < numberOfWords; j++) for (var j = 0; j < numberOfWords; j++)
{ {
var currentWord = words[permutation & 15]; var currentWord = allWords[wordIndexes[permutation & 15]];
permutation = permutation >> 4; permutation = permutation >> 4;
longBuffer[0] |= currentWord.Buffers[cumulativeWordOffsetX4 + 0]; longBuffer[0] |= currentWord.Buffers[cumulativeWordOffsetX4 + 0];
longBuffer[1] |= currentWord.Buffers[cumulativeWordOffsetX4 + 1]; longBuffer[1] |= currentWord.Buffers[cumulativeWordOffsetX4 + 1];

@ -20,18 +20,26 @@
this.NumberOfCharacters = filteredSource.Length; this.NumberOfCharacters = filteredSource.Length;
this.VectorsConverter = new VectorsConverter(filteredSource); this.VectorsConverter = new VectorsConverter(filteredSource);
// Dictionary of vectors to array of words represented by this vector var allWordsAndVectors = words
var vectorsToWords = words
.Where(word => word != null && word.Length > 0) .Where(word => word != null && word.Length > 0)
.Select(word => new { word, vector = this.VectorsConverter.GetVector(word) }) .Select(word => new { word, vector = this.VectorsConverter.GetVector(word) })
.Where(tuple => tuple.vector != null) .Where(tuple => tuple.vector != null)
.Select(tuple => new { tuple.word, vector = tuple.vector.Value }) .Select(tuple => tuple.word)
.Distinct(new ByteArrayEqualityComparer())
.Select(word => word)
.ToArray();
// Dictionary of vectors to array of words represented by this vector
var vectorsToWords = allWordsAndVectors
.Select((word, index) => new { word, index, vector = this.VectorsConverter.GetVector(word).Value })
.GroupBy(tuple => tuple.vector) .GroupBy(tuple => tuple.vector)
.Select(group => new { vector = group.Key, words = group.Select(tuple => tuple.word).Distinct(new ByteArrayEqualityComparer()).Select(word => new Word(word)).ToArray() }) .Select(group => new { vector = group.Key, words = group.Select(tuple => tuple.index).ToArray() })
.ToList(); .ToList();
this.WordsDictionary = vectorsToWords.Select(tuple => tuple.words).ToArray(); this.WordsDictionary = vectorsToWords.Select(tuple => tuple.words).ToArray();
this.AllWords = allWordsAndVectors.Select(word => new Word(word)).ToArray();
this.VectorsProcessor = new VectorsProcessor( this.VectorsProcessor = new VectorsProcessor(
this.VectorsConverter.GetVector(filteredSource).Value, this.VectorsConverter.GetVector(filteredSource).Value,
maxWordsCount, maxWordsCount,
@ -40,10 +48,12 @@
private VectorsConverter VectorsConverter { get; } private VectorsConverter VectorsConverter { get; }
private Word[] AllWords { get; }
/// <summary> /// <summary>
/// WordsDictionary[vectorIndex] = [word1, word2, ...] /// WordsDictionary[vectorIndex] = [word1index, word2index, ...]
/// </summary> /// </summary>
private Word[][] WordsDictionary { get; } private int[][] WordsDictionary { get; }
private VectorsProcessor VectorsProcessor { get; } private VectorsProcessor VectorsProcessor { get; }
@ -61,7 +71,7 @@
// converting sequences of vectors to the sequences of words... // converting sequences of vectors to the sequences of words...
return from sum in sums return from sum in sums
let filter = ComputeFilter(sum) let filter = ComputeFilter(sum)
let wordsVariants = this.ConvertVectorsToWords(sum) let wordsVariants = this.ConvertVectorsToWordIndexes(sum)
from wordsArray in Flattener.Flatten(wordsVariants) from wordsArray in Flattener.Flatten(wordsVariants)
from phraseSet in this.ConvertWordsToPhrases(wordsArray, filter) from phraseSet in this.ConvertWordsToPhrases(wordsArray, filter)
select phraseSet; select phraseSet;
@ -88,10 +98,10 @@
return result; return result;
} }
private Word[][] ConvertVectorsToWords(int[] vectors) private int[][] ConvertVectorsToWordIndexes(int[] vectors)
{ {
var length = vectors.Length; var length = vectors.Length;
var words = new Word[length][]; var words = new int[length][];
for (var i = 0; i < length; i++) for (var i = 0; i < length; i++)
{ {
words[i] = this.WordsDictionary[vectors[i]]; words[i] = this.WordsDictionary[vectors[i]];
@ -111,13 +121,13 @@
return Tuple.Create(vectors.Length, result); return Tuple.Create(vectors.Length, result);
} }
private IEnumerable<PhraseSet> ConvertWordsToPhrases(Word[] words, uint filter) private IEnumerable<PhraseSet> ConvertWordsToPhrases(int[] wordIndexes, uint filter)
{ {
var permutations = PrecomputedPermutationsGenerator.HamiltonianPermutations(words.Length, filter); var permutations = PrecomputedPermutationsGenerator.HamiltonianPermutations(wordIndexes.Length, filter);
var permutationsLength = permutations.Length; var permutationsLength = permutations.Length;
for (var i = 0; i < permutationsLength; i += Constants.PhrasesPerSet) for (var i = 0; i < permutationsLength; i += Constants.PhrasesPerSet)
{ {
yield return new PhraseSet(words, permutations, i, this.NumberOfCharacters); yield return new PhraseSet(this.AllWords, wordIndexes, permutations, i, this.NumberOfCharacters);
} }
} }
} }

@ -8,6 +8,13 @@
public int LengthX4 { get; } public int LengthX4 { get; }
private Word()
{
this.Original = new byte[0];
this.Buffers = new long[128];
this.LengthX4 = 0;
}
public unsafe Word(byte[] word) public unsafe Word(byte[] word)
{ {
var tmpWord = new byte[word.Length + 1]; var tmpWord = new byte[word.Length + 1];
@ -37,5 +44,7 @@
this.Buffers = buffers; this.Buffers = buffers;
this.LengthX4 = tmpWord.Length * 4; this.LengthX4 = tmpWord.Length * 4;
} }
private static Word Empty { get; } = new Word();
} }
} }

Loading…
Cancel
Save