From 35c12f649dcce33eea490d91457c4d1aab851b39 Mon Sep 17 00:00:00 2001 From: inga-lovinde <52715130+inga-lovinde@users.noreply.github.com> Date: Fri, 7 Apr 2017 12:22:47 +0300 Subject: [PATCH] PhraseSet initialization optimization --- README.md | 8 ++-- dotnet/WhiteRabbit/App.config | 8 ++-- dotnet/WhiteRabbit/MD5Digest.cs | 2 +- dotnet/WhiteRabbit/PhraseSet.cs | 65 +++++++++++++++----------- dotnet/WhiteRabbit/StringsProcessor.cs | 12 ++--- dotnet/WhiteRabbit/WhiteRabbit.csproj | 4 +- dotnet/WhiteRabbit/Word.cs | 41 ++++++++++++++++ 7 files changed, 97 insertions(+), 43 deletions(-) create mode 100644 dotnet/WhiteRabbit/Word.cs diff --git a/README.md b/README.md index 6e95d33..5ff8624 100644 --- a/README.md +++ b/README.md @@ -46,10 +46,10 @@ Multi-threaded performance with RyuJIT (.NET 4.6, 64-bit system) on quad-core Sa Number of words|Time to check all anagrams no longer than that|Time to solve "easy" hash|Time to solve "more difficult" hash|Time to solve "hard" hash|Number of anagrams no longer than that (see note below) ---------------|----------------------------------------------|-------------------------|-----------------------------------|-------------------------|------------------------------------------------------- 3|Fractions of a second||||4560 -4|0.6s|||0.1s|7,433,016 -5|60s|||1.5s|1,348,876,896 -6|45 minutes|||21s|58,837,302,096 -7|10 hours (?)|1.5 minutes|8s|4.5 minutes|1,108,328,708,976 +4|0.55s|||0.1s|7,433,016 +5|46s|||1.1s|1,348,876,896 +6|34 minutes|||15s|58,837,302,096 +7|11 hours (?)|45s|6.5s|2 minutes|1,108,328,708,976 8|||||12,089,249,231,856 9|||||88,977,349,731,696 10|||||482,627,715,786,096 diff --git a/dotnet/WhiteRabbit/App.config b/dotnet/WhiteRabbit/App.config index 6677d73..a1dcaba 100644 --- a/dotnet/WhiteRabbit/App.config +++ b/dotnet/WhiteRabbit/App.config @@ -1,11 +1,11 @@ - + - + - - + + diff --git a/dotnet/WhiteRabbit/MD5Digest.cs b/dotnet/WhiteRabbit/MD5Digest.cs index 154210d..85959d3 100644 --- a/dotnet/WhiteRabbit/MD5Digest.cs +++ b/dotnet/WhiteRabbit/MD5Digest.cs @@ -12,7 +12,7 @@ var result = new uint[Constants.PhrasesPerSet]; fixed (uint* resultPointer = result) { - MD5Unmanaged.ComputeMD5(input.Buffer, resultPointer); + MD5Unmanaged.ComputeMD5((uint*)input.Buffer, resultPointer); } return result; diff --git a/dotnet/WhiteRabbit/PhraseSet.cs b/dotnet/WhiteRabbit/PhraseSet.cs index 6bed70a..667bfcf 100644 --- a/dotnet/WhiteRabbit/PhraseSet.cs +++ b/dotnet/WhiteRabbit/PhraseSet.cs @@ -1,52 +1,63 @@ namespace WhiteRabbit { + using System.Diagnostics; + // Anagram representation optimized for MD5 internal unsafe struct PhraseSet { - public fixed uint Buffer[8 * Constants.PhrasesPerSet]; + public fixed long Buffer[4 * Constants.PhrasesPerSet]; - public PhraseSet(byte[][] words, int[][] permutations, int offset, int numberOfCharacters) + public PhraseSet(Word[] words, int[][] permutations, int offset, int numberOfCharacters) { - fixed (uint* bufferPointer = this.Buffer) - { - var length = numberOfCharacters + words.Length - 1; + Debug.Assert(numberOfCharacters + words.Length - 1 < 27); + fixed (long* bufferPointer = this.Buffer) + { + long* longBuffer = (long*)bufferPointer; + int numberOfWords = words.Length; for (var i = 0; i < Constants.PhrasesPerSet; i++) { var permutation = permutations[offset + i]; - var startPointer = bufferPointer + i * 8; - byte[] currentWord = words[permutation[0]]; - var j = 0; - var wordIndex = 0; - var currentPointer = (byte*)startPointer; - byte* lastPointer = currentPointer + length; - for (; currentPointer < lastPointer; currentPointer++) + var cumulativeWordOffsetX4 = 0; + for (var j = 0; j < numberOfWords; j++) { - if (j >= currentWord.Length) - { - j = 0; - wordIndex++; - currentWord = words[permutation[wordIndex]]; - } - - *currentPointer = currentWord[j]; - j++; + var currentWord = words[permutation[j]]; + longBuffer[0] |= currentWord.Buffers[cumulativeWordOffsetX4 + 0]; + longBuffer[1] |= currentWord.Buffers[cumulativeWordOffsetX4 + 1]; + longBuffer[2] ^= currentWord.Buffers[cumulativeWordOffsetX4 + 2]; + longBuffer[3] ^= currentWord.Buffers[cumulativeWordOffsetX4 + 3]; + cumulativeWordOffsetX4 += currentWord.LengthX4; } - *currentPointer = 128; - startPointer[7] = (uint)(length << 3); + longBuffer += 4; + } + + var length = numberOfCharacters + numberOfWords - 1; + byte* byteBuffer = ((byte*)bufferPointer) + length; + for (var i = 0; i < Constants.PhrasesPerSet; i++) + { + *byteBuffer = 128; + byteBuffer += 32; + } + + var lengthInBits = (uint)(length << 3); + uint* uintBuffer = ((uint*)bufferPointer) + 7; + for (var i = 0; i < Constants.PhrasesPerSet; i++) + { + *uintBuffer = lengthInBits; + uintBuffer += 8; } } } public byte[] GetBytes(int number) { - System.Diagnostics.Debug.Assert(number < Constants.PhrasesPerSet); + Debug.Assert(number < Constants.PhrasesPerSet); - fixed(uint* bufferPointer = this.Buffer) + fixed(long* bufferPointer = this.Buffer) { - var phrasePointer = bufferPointer + 8 * number; - var length = phrasePointer[7] >> 3; + var phrasePointer = bufferPointer + 4 * number; + var length = ((uint*)phrasePointer)[7] >> 3; var result = new byte[length]; for (var i = 0; i < length; i++) { diff --git a/dotnet/WhiteRabbit/StringsProcessor.cs b/dotnet/WhiteRabbit/StringsProcessor.cs index 0793f36..cb5cbff 100644 --- a/dotnet/WhiteRabbit/StringsProcessor.cs +++ b/dotnet/WhiteRabbit/StringsProcessor.cs @@ -23,11 +23,11 @@ // Dictionary of vectors to array of words represented by this vector var vectorsToWords = words .Where(word => word != null && word.Length > 0) - .Select(word => new { word = word.Concat(new byte[] { SPACE }).ToArray(), vector = this.VectorsConverter.GetVector(word) }) + .Select(word => new { word, vector = this.VectorsConverter.GetVector(word) }) .Where(tuple => tuple.vector != null) .Select(tuple => new { tuple.word, vector = tuple.vector.Value }) .GroupBy(tuple => tuple.vector) - .Select(group => new { vector = group.Key, words = group.Select(tuple => tuple.word).Distinct(new ByteArrayEqualityComparer()).ToArray() }) + .Select(group => new { vector = group.Key, words = group.Select(tuple => tuple.word).Distinct(new ByteArrayEqualityComparer()).Select(word => new Word(word)).ToArray() }) .ToList(); this.WordsDictionary = vectorsToWords.Select(tuple => tuple.words).ToArray(); @@ -43,7 +43,7 @@ /// /// WordsDictionary[vectorIndex] = [word1, word2, ...] /// - private byte[][][] WordsDictionary { get; } + private Word[][] WordsDictionary { get; } private VectorsProcessor VectorsProcessor { get; } @@ -72,10 +72,10 @@ .Sum(tuple => tuple.Item2 * PrecomputedPermutationsGenerator.GetPermutationsNumber(tuple.Item1)); } - private byte[][][] ConvertVectorsToWords(int[] vectors) + private Word[][] ConvertVectorsToWords(int[] vectors) { var length = vectors.Length; - var words = new byte[length][][]; + var words = new Word[length][]; for (var i = 0; i < length; i++) { words[i] = this.WordsDictionary[vectors[i]]; @@ -95,7 +95,7 @@ return Tuple.Create(vectors.Length, result); } - private IEnumerable ConvertWordsToPhrases(byte[][] words) + private IEnumerable ConvertWordsToPhrases(Word[] words) { var permutations = PrecomputedPermutationsGenerator.HamiltonianPermutations(words.Length); var permutationsLength = permutations.Length; diff --git a/dotnet/WhiteRabbit/WhiteRabbit.csproj b/dotnet/WhiteRabbit/WhiteRabbit.csproj index e7e21c3..5549f68 100644 --- a/dotnet/WhiteRabbit/WhiteRabbit.csproj +++ b/dotnet/WhiteRabbit/WhiteRabbit.csproj @@ -9,10 +9,11 @@ Properties WhiteRabbit WhiteRabbit - v4.6 + v4.6.1 512 true true + x64 @@ -69,6 +70,7 @@ + diff --git a/dotnet/WhiteRabbit/Word.cs b/dotnet/WhiteRabbit/Word.cs new file mode 100644 index 0000000..0ccf3b1 --- /dev/null +++ b/dotnet/WhiteRabbit/Word.cs @@ -0,0 +1,41 @@ +namespace WhiteRabbit +{ + class Word + { + public byte[] Original; + + public long[] Buffers { get; } + + public int LengthX4 { get; } + + public unsafe Word(byte[] word) + { + var tmpWord = new byte[word.Length + 1]; + tmpWord[word.Length] = (byte)' '; + for (var i = 0; i < word.Length; i++) + { + tmpWord[i] = word[i]; + } + + this.Original = tmpWord; + + var buffers = new long[128]; + fixed (long* buffersPointer = buffers) + { + for (var i = 0; i < 32; i++) + { + var bytePointer = (byte*)(buffersPointer + 4 * i); + var endPointer = bytePointer + 32; + var currentPointer = bytePointer + i; + for (var j = 0; j < tmpWord.Length && currentPointer < endPointer; j++, currentPointer++) + { + *currentPointer = tmpWord[j]; + } + } + } + + this.Buffers = buffers; + this.LengthX4 = tmpWord.Length * 4; + } + } +}