diff --git a/dotnet/WhiteRabbit/PhraseSet.cs b/dotnet/WhiteRabbit/PhraseSet.cs
index 964f62c..1bad330 100644
--- a/dotnet/WhiteRabbit/PhraseSet.cs
+++ b/dotnet/WhiteRabbit/PhraseSet.cs
@@ -7,20 +7,20 @@
{
public long[] Buffer;
- public unsafe PhraseSet(Word[] words, ulong[] permutations, int offset, int numberOfCharacters)
+ public unsafe PhraseSet(Word[] allWords, int[] wordIndexes, ulong[] permutations, int permutationOffset, int numberOfCharacters)
{
- Debug.Assert(numberOfCharacters + words.Length - 1 < 27);
+ Debug.Assert(numberOfCharacters + wordIndexes.Length - 1 < 27);
this.Buffer = new long[4 * Constants.PhrasesPerSet];
fixed (long* bufferPointer = this.Buffer)
{
long* longBuffer = (long*)bufferPointer;
- int numberOfWords = words.Length;
+ int numberOfWords = wordIndexes.Length;
fixed (ulong* permutationsPointer = permutations)
{
- var currentPermutationPointer = permutationsPointer + offset;
+ var currentPermutationPointer = permutationsPointer + permutationOffset;
for (var i = 0; i < Constants.PhrasesPerSet; i++, currentPermutationPointer++)
{
var permutation = *currentPermutationPointer;
@@ -32,7 +32,7 @@
var cumulativeWordOffsetX4 = 0;
for (var j = 0; j < numberOfWords; j++)
{
- var currentWord = words[permutation & 15];
+ var currentWord = allWords[wordIndexes[permutation & 15]];
permutation = permutation >> 4;
longBuffer[0] |= currentWord.Buffers[cumulativeWordOffsetX4 + 0];
longBuffer[1] |= currentWord.Buffers[cumulativeWordOffsetX4 + 1];
diff --git a/dotnet/WhiteRabbit/StringsProcessor.cs b/dotnet/WhiteRabbit/StringsProcessor.cs
index da244c0..fd82e10 100644
--- a/dotnet/WhiteRabbit/StringsProcessor.cs
+++ b/dotnet/WhiteRabbit/StringsProcessor.cs
@@ -20,18 +20,26 @@
this.NumberOfCharacters = filteredSource.Length;
this.VectorsConverter = new VectorsConverter(filteredSource);
- // Dictionary of vectors to array of words represented by this vector
- var vectorsToWords = words
+ var allWordsAndVectors = words
.Where(word => word != null && word.Length > 0)
.Select(word => new { word, vector = this.VectorsConverter.GetVector(word) })
.Where(tuple => tuple.vector != null)
- .Select(tuple => new { tuple.word, vector = tuple.vector.Value })
+ .Select(tuple => tuple.word)
+ .Distinct(new ByteArrayEqualityComparer())
+ .Select(word => word)
+ .ToArray();
+
+ // Dictionary of vectors to array of words represented by this vector
+ var vectorsToWords = allWordsAndVectors
+ .Select((word, index) => new { word, index, vector = this.VectorsConverter.GetVector(word).Value })
.GroupBy(tuple => tuple.vector)
- .Select(group => new { vector = group.Key, words = group.Select(tuple => tuple.word).Distinct(new ByteArrayEqualityComparer()).Select(word => new Word(word)).ToArray() })
+ .Select(group => new { vector = group.Key, words = group.Select(tuple => tuple.index).ToArray() })
.ToList();
this.WordsDictionary = vectorsToWords.Select(tuple => tuple.words).ToArray();
+ this.AllWords = allWordsAndVectors.Select(word => new Word(word)).ToArray();
+
this.VectorsProcessor = new VectorsProcessor(
this.VectorsConverter.GetVector(filteredSource).Value,
maxWordsCount,
@@ -40,10 +48,12 @@
private VectorsConverter VectorsConverter { get; }
+ private Word[] AllWords { get; }
+
///
- /// WordsDictionary[vectorIndex] = [word1, word2, ...]
+ /// WordsDictionary[vectorIndex] = [word1index, word2index, ...]
///
- private Word[][] WordsDictionary { get; }
+ private int[][] WordsDictionary { get; }
private VectorsProcessor VectorsProcessor { get; }
@@ -61,7 +71,7 @@
// converting sequences of vectors to the sequences of words...
return from sum in sums
let filter = ComputeFilter(sum)
- let wordsVariants = this.ConvertVectorsToWords(sum)
+ let wordsVariants = this.ConvertVectorsToWordIndexes(sum)
from wordsArray in Flattener.Flatten(wordsVariants)
from phraseSet in this.ConvertWordsToPhrases(wordsArray, filter)
select phraseSet;
@@ -88,10 +98,10 @@
return result;
}
- private Word[][] ConvertVectorsToWords(int[] vectors)
+ private int[][] ConvertVectorsToWordIndexes(int[] vectors)
{
var length = vectors.Length;
- var words = new Word[length][];
+ var words = new int[length][];
for (var i = 0; i < length; i++)
{
words[i] = this.WordsDictionary[vectors[i]];
@@ -111,13 +121,13 @@
return Tuple.Create(vectors.Length, result);
}
- private IEnumerable ConvertWordsToPhrases(Word[] words, uint filter)
+ private IEnumerable ConvertWordsToPhrases(int[] wordIndexes, uint filter)
{
- var permutations = PrecomputedPermutationsGenerator.HamiltonianPermutations(words.Length, filter);
+ var permutations = PrecomputedPermutationsGenerator.HamiltonianPermutations(wordIndexes.Length, filter);
var permutationsLength = permutations.Length;
for (var i = 0; i < permutationsLength; i += Constants.PhrasesPerSet)
{
- yield return new PhraseSet(words, permutations, i, this.NumberOfCharacters);
+ yield return new PhraseSet(this.AllWords, wordIndexes, permutations, i, this.NumberOfCharacters);
}
}
}
diff --git a/dotnet/WhiteRabbit/Word.cs b/dotnet/WhiteRabbit/Word.cs
index 0ccf3b1..9ddf46d 100644
--- a/dotnet/WhiteRabbit/Word.cs
+++ b/dotnet/WhiteRabbit/Word.cs
@@ -8,6 +8,13 @@
public int LengthX4 { get; }
+ private Word()
+ {
+ this.Original = new byte[0];
+ this.Buffers = new long[128];
+ this.LengthX4 = 0;
+ }
+
public unsafe Word(byte[] word)
{
var tmpWord = new byte[word.Length + 1];
@@ -37,5 +44,7 @@
this.Buffers = buffers;
this.LengthX4 = tmpWord.Length * 4;
}
+
+ private static Word Empty { get; } = new Word();
}
}