From cbb7ccb59b340089e33c9ffd4d0f6489844ed423 Mon Sep 17 00:00:00 2001 From: inga-lovinde <52715130+inga-lovinde@users.noreply.github.com> Date: Wed, 26 Apr 2017 16:50:47 +0300 Subject: [PATCH] Refactored vector-to-words conversion to lower-level code --- dotnet/WhiteRabbit/PhraseSet.cs | 10 ++++---- dotnet/WhiteRabbit/StringsProcessor.cs | 34 +++++++++++++++++--------- dotnet/WhiteRabbit/Word.cs | 9 +++++++ 3 files changed, 36 insertions(+), 17 deletions(-) diff --git a/dotnet/WhiteRabbit/PhraseSet.cs b/dotnet/WhiteRabbit/PhraseSet.cs index 964f62c..1bad330 100644 --- a/dotnet/WhiteRabbit/PhraseSet.cs +++ b/dotnet/WhiteRabbit/PhraseSet.cs @@ -7,20 +7,20 @@ { public long[] Buffer; - public unsafe PhraseSet(Word[] words, ulong[] permutations, int offset, int numberOfCharacters) + public unsafe PhraseSet(Word[] allWords, int[] wordIndexes, ulong[] permutations, int permutationOffset, int numberOfCharacters) { - Debug.Assert(numberOfCharacters + words.Length - 1 < 27); + Debug.Assert(numberOfCharacters + wordIndexes.Length - 1 < 27); this.Buffer = new long[4 * Constants.PhrasesPerSet]; fixed (long* bufferPointer = this.Buffer) { long* longBuffer = (long*)bufferPointer; - int numberOfWords = words.Length; + int numberOfWords = wordIndexes.Length; fixed (ulong* permutationsPointer = permutations) { - var currentPermutationPointer = permutationsPointer + offset; + var currentPermutationPointer = permutationsPointer + permutationOffset; for (var i = 0; i < Constants.PhrasesPerSet; i++, currentPermutationPointer++) { var permutation = *currentPermutationPointer; @@ -32,7 +32,7 @@ var cumulativeWordOffsetX4 = 0; for (var j = 0; j < numberOfWords; j++) { - var currentWord = words[permutation & 15]; + var currentWord = allWords[wordIndexes[permutation & 15]]; permutation = permutation >> 4; longBuffer[0] |= currentWord.Buffers[cumulativeWordOffsetX4 + 0]; longBuffer[1] |= currentWord.Buffers[cumulativeWordOffsetX4 + 1]; diff --git a/dotnet/WhiteRabbit/StringsProcessor.cs b/dotnet/WhiteRabbit/StringsProcessor.cs index da244c0..fd82e10 100644 --- a/dotnet/WhiteRabbit/StringsProcessor.cs +++ b/dotnet/WhiteRabbit/StringsProcessor.cs @@ -20,18 +20,26 @@ this.NumberOfCharacters = filteredSource.Length; this.VectorsConverter = new VectorsConverter(filteredSource); - // Dictionary of vectors to array of words represented by this vector - var vectorsToWords = words + var allWordsAndVectors = words .Where(word => word != null && word.Length > 0) .Select(word => new { word, vector = this.VectorsConverter.GetVector(word) }) .Where(tuple => tuple.vector != null) - .Select(tuple => new { tuple.word, vector = tuple.vector.Value }) + .Select(tuple => tuple.word) + .Distinct(new ByteArrayEqualityComparer()) + .Select(word => word) + .ToArray(); + + // Dictionary of vectors to array of words represented by this vector + var vectorsToWords = allWordsAndVectors + .Select((word, index) => new { word, index, vector = this.VectorsConverter.GetVector(word).Value }) .GroupBy(tuple => tuple.vector) - .Select(group => new { vector = group.Key, words = group.Select(tuple => tuple.word).Distinct(new ByteArrayEqualityComparer()).Select(word => new Word(word)).ToArray() }) + .Select(group => new { vector = group.Key, words = group.Select(tuple => tuple.index).ToArray() }) .ToList(); this.WordsDictionary = vectorsToWords.Select(tuple => tuple.words).ToArray(); + this.AllWords = allWordsAndVectors.Select(word => new Word(word)).ToArray(); + this.VectorsProcessor = new VectorsProcessor( this.VectorsConverter.GetVector(filteredSource).Value, maxWordsCount, @@ -40,10 +48,12 @@ private VectorsConverter VectorsConverter { get; } + private Word[] AllWords { get; } + /// - /// WordsDictionary[vectorIndex] = [word1, word2, ...] + /// WordsDictionary[vectorIndex] = [word1index, word2index, ...] /// - private Word[][] WordsDictionary { get; } + private int[][] WordsDictionary { get; } private VectorsProcessor VectorsProcessor { get; } @@ -61,7 +71,7 @@ // converting sequences of vectors to the sequences of words... return from sum in sums let filter = ComputeFilter(sum) - let wordsVariants = this.ConvertVectorsToWords(sum) + let wordsVariants = this.ConvertVectorsToWordIndexes(sum) from wordsArray in Flattener.Flatten(wordsVariants) from phraseSet in this.ConvertWordsToPhrases(wordsArray, filter) select phraseSet; @@ -88,10 +98,10 @@ return result; } - private Word[][] ConvertVectorsToWords(int[] vectors) + private int[][] ConvertVectorsToWordIndexes(int[] vectors) { var length = vectors.Length; - var words = new Word[length][]; + var words = new int[length][]; for (var i = 0; i < length; i++) { words[i] = this.WordsDictionary[vectors[i]]; @@ -111,13 +121,13 @@ return Tuple.Create(vectors.Length, result); } - private IEnumerable ConvertWordsToPhrases(Word[] words, uint filter) + private IEnumerable ConvertWordsToPhrases(int[] wordIndexes, uint filter) { - var permutations = PrecomputedPermutationsGenerator.HamiltonianPermutations(words.Length, filter); + var permutations = PrecomputedPermutationsGenerator.HamiltonianPermutations(wordIndexes.Length, filter); var permutationsLength = permutations.Length; for (var i = 0; i < permutationsLength; i += Constants.PhrasesPerSet) { - yield return new PhraseSet(words, permutations, i, this.NumberOfCharacters); + yield return new PhraseSet(this.AllWords, wordIndexes, permutations, i, this.NumberOfCharacters); } } } diff --git a/dotnet/WhiteRabbit/Word.cs b/dotnet/WhiteRabbit/Word.cs index 0ccf3b1..9ddf46d 100644 --- a/dotnet/WhiteRabbit/Word.cs +++ b/dotnet/WhiteRabbit/Word.cs @@ -8,6 +8,13 @@ public int LengthX4 { get; } + private Word() + { + this.Original = new byte[0]; + this.Buffers = new long[128]; + this.LengthX4 = 0; + } + public unsafe Word(byte[] word) { var tmpWord = new byte[word.Length + 1]; @@ -37,5 +44,7 @@ this.Buffers = buffers; this.LengthX4 = tmpWord.Length * 4; } + + private static Word Empty { get; } = new Word(); } }