From 35c12f649dcce33eea490d91457c4d1aab851b39 Mon Sep 17 00:00:00 2001
From: inga-lovinde <52715130+inga-lovinde@users.noreply.github.com>
Date: Fri, 7 Apr 2017 12:22:47 +0300
Subject: [PATCH] PhraseSet initialization optimization
---
README.md | 8 ++--
dotnet/WhiteRabbit/App.config | 8 ++--
dotnet/WhiteRabbit/MD5Digest.cs | 2 +-
dotnet/WhiteRabbit/PhraseSet.cs | 65 +++++++++++++++-----------
dotnet/WhiteRabbit/StringsProcessor.cs | 12 ++---
dotnet/WhiteRabbit/WhiteRabbit.csproj | 4 +-
dotnet/WhiteRabbit/Word.cs | 41 ++++++++++++++++
7 files changed, 97 insertions(+), 43 deletions(-)
create mode 100644 dotnet/WhiteRabbit/Word.cs
diff --git a/README.md b/README.md
index 6e95d33..5ff8624 100644
--- a/README.md
+++ b/README.md
@@ -46,10 +46,10 @@ Multi-threaded performance with RyuJIT (.NET 4.6, 64-bit system) on quad-core Sa
Number of words|Time to check all anagrams no longer than that|Time to solve "easy" hash|Time to solve "more difficult" hash|Time to solve "hard" hash|Number of anagrams no longer than that (see note below)
---------------|----------------------------------------------|-------------------------|-----------------------------------|-------------------------|-------------------------------------------------------
3|Fractions of a second||||4560
-4|0.6s|||0.1s|7,433,016
-5|60s|||1.5s|1,348,876,896
-6|45 minutes|||21s|58,837,302,096
-7|10 hours (?)|1.5 minutes|8s|4.5 minutes|1,108,328,708,976
+4|0.55s|||0.1s|7,433,016
+5|46s|||1.1s|1,348,876,896
+6|34 minutes|||15s|58,837,302,096
+7|11 hours (?)|45s|6.5s|2 minutes|1,108,328,708,976
8|||||12,089,249,231,856
9|||||88,977,349,731,696
10|||||482,627,715,786,096
diff --git a/dotnet/WhiteRabbit/App.config b/dotnet/WhiteRabbit/App.config
index 6677d73..a1dcaba 100644
--- a/dotnet/WhiteRabbit/App.config
+++ b/dotnet/WhiteRabbit/App.config
@@ -1,11 +1,11 @@
-
+
-
+
-
-
+
+
diff --git a/dotnet/WhiteRabbit/MD5Digest.cs b/dotnet/WhiteRabbit/MD5Digest.cs
index 154210d..85959d3 100644
--- a/dotnet/WhiteRabbit/MD5Digest.cs
+++ b/dotnet/WhiteRabbit/MD5Digest.cs
@@ -12,7 +12,7 @@
var result = new uint[Constants.PhrasesPerSet];
fixed (uint* resultPointer = result)
{
- MD5Unmanaged.ComputeMD5(input.Buffer, resultPointer);
+ MD5Unmanaged.ComputeMD5((uint*)input.Buffer, resultPointer);
}
return result;
diff --git a/dotnet/WhiteRabbit/PhraseSet.cs b/dotnet/WhiteRabbit/PhraseSet.cs
index 6bed70a..667bfcf 100644
--- a/dotnet/WhiteRabbit/PhraseSet.cs
+++ b/dotnet/WhiteRabbit/PhraseSet.cs
@@ -1,52 +1,63 @@
namespace WhiteRabbit
{
+ using System.Diagnostics;
+
// Anagram representation optimized for MD5
internal unsafe struct PhraseSet
{
- public fixed uint Buffer[8 * Constants.PhrasesPerSet];
+ public fixed long Buffer[4 * Constants.PhrasesPerSet];
- public PhraseSet(byte[][] words, int[][] permutations, int offset, int numberOfCharacters)
+ public PhraseSet(Word[] words, int[][] permutations, int offset, int numberOfCharacters)
{
- fixed (uint* bufferPointer = this.Buffer)
- {
- var length = numberOfCharacters + words.Length - 1;
+ Debug.Assert(numberOfCharacters + words.Length - 1 < 27);
+ fixed (long* bufferPointer = this.Buffer)
+ {
+ long* longBuffer = (long*)bufferPointer;
+ int numberOfWords = words.Length;
for (var i = 0; i < Constants.PhrasesPerSet; i++)
{
var permutation = permutations[offset + i];
- var startPointer = bufferPointer + i * 8;
- byte[] currentWord = words[permutation[0]];
- var j = 0;
- var wordIndex = 0;
- var currentPointer = (byte*)startPointer;
- byte* lastPointer = currentPointer + length;
- for (; currentPointer < lastPointer; currentPointer++)
+ var cumulativeWordOffsetX4 = 0;
+ for (var j = 0; j < numberOfWords; j++)
{
- if (j >= currentWord.Length)
- {
- j = 0;
- wordIndex++;
- currentWord = words[permutation[wordIndex]];
- }
-
- *currentPointer = currentWord[j];
- j++;
+ var currentWord = words[permutation[j]];
+ longBuffer[0] |= currentWord.Buffers[cumulativeWordOffsetX4 + 0];
+ longBuffer[1] |= currentWord.Buffers[cumulativeWordOffsetX4 + 1];
+ longBuffer[2] ^= currentWord.Buffers[cumulativeWordOffsetX4 + 2];
+ longBuffer[3] ^= currentWord.Buffers[cumulativeWordOffsetX4 + 3];
+ cumulativeWordOffsetX4 += currentWord.LengthX4;
}
- *currentPointer = 128;
- startPointer[7] = (uint)(length << 3);
+ longBuffer += 4;
+ }
+
+ var length = numberOfCharacters + numberOfWords - 1;
+ byte* byteBuffer = ((byte*)bufferPointer) + length;
+ for (var i = 0; i < Constants.PhrasesPerSet; i++)
+ {
+ *byteBuffer = 128;
+ byteBuffer += 32;
+ }
+
+ var lengthInBits = (uint)(length << 3);
+ uint* uintBuffer = ((uint*)bufferPointer) + 7;
+ for (var i = 0; i < Constants.PhrasesPerSet; i++)
+ {
+ *uintBuffer = lengthInBits;
+ uintBuffer += 8;
}
}
}
public byte[] GetBytes(int number)
{
- System.Diagnostics.Debug.Assert(number < Constants.PhrasesPerSet);
+ Debug.Assert(number < Constants.PhrasesPerSet);
- fixed(uint* bufferPointer = this.Buffer)
+ fixed(long* bufferPointer = this.Buffer)
{
- var phrasePointer = bufferPointer + 8 * number;
- var length = phrasePointer[7] >> 3;
+ var phrasePointer = bufferPointer + 4 * number;
+ var length = ((uint*)phrasePointer)[7] >> 3;
var result = new byte[length];
for (var i = 0; i < length; i++)
{
diff --git a/dotnet/WhiteRabbit/StringsProcessor.cs b/dotnet/WhiteRabbit/StringsProcessor.cs
index 0793f36..cb5cbff 100644
--- a/dotnet/WhiteRabbit/StringsProcessor.cs
+++ b/dotnet/WhiteRabbit/StringsProcessor.cs
@@ -23,11 +23,11 @@
// Dictionary of vectors to array of words represented by this vector
var vectorsToWords = words
.Where(word => word != null && word.Length > 0)
- .Select(word => new { word = word.Concat(new byte[] { SPACE }).ToArray(), vector = this.VectorsConverter.GetVector(word) })
+ .Select(word => new { word, vector = this.VectorsConverter.GetVector(word) })
.Where(tuple => tuple.vector != null)
.Select(tuple => new { tuple.word, vector = tuple.vector.Value })
.GroupBy(tuple => tuple.vector)
- .Select(group => new { vector = group.Key, words = group.Select(tuple => tuple.word).Distinct(new ByteArrayEqualityComparer()).ToArray() })
+ .Select(group => new { vector = group.Key, words = group.Select(tuple => tuple.word).Distinct(new ByteArrayEqualityComparer()).Select(word => new Word(word)).ToArray() })
.ToList();
this.WordsDictionary = vectorsToWords.Select(tuple => tuple.words).ToArray();
@@ -43,7 +43,7 @@
///
/// WordsDictionary[vectorIndex] = [word1, word2, ...]
///
- private byte[][][] WordsDictionary { get; }
+ private Word[][] WordsDictionary { get; }
private VectorsProcessor VectorsProcessor { get; }
@@ -72,10 +72,10 @@
.Sum(tuple => tuple.Item2 * PrecomputedPermutationsGenerator.GetPermutationsNumber(tuple.Item1));
}
- private byte[][][] ConvertVectorsToWords(int[] vectors)
+ private Word[][] ConvertVectorsToWords(int[] vectors)
{
var length = vectors.Length;
- var words = new byte[length][][];
+ var words = new Word[length][];
for (var i = 0; i < length; i++)
{
words[i] = this.WordsDictionary[vectors[i]];
@@ -95,7 +95,7 @@
return Tuple.Create(vectors.Length, result);
}
- private IEnumerable ConvertWordsToPhrases(byte[][] words)
+ private IEnumerable ConvertWordsToPhrases(Word[] words)
{
var permutations = PrecomputedPermutationsGenerator.HamiltonianPermutations(words.Length);
var permutationsLength = permutations.Length;
diff --git a/dotnet/WhiteRabbit/WhiteRabbit.csproj b/dotnet/WhiteRabbit/WhiteRabbit.csproj
index e7e21c3..5549f68 100644
--- a/dotnet/WhiteRabbit/WhiteRabbit.csproj
+++ b/dotnet/WhiteRabbit/WhiteRabbit.csproj
@@ -9,10 +9,11 @@
Properties
WhiteRabbit
WhiteRabbit
- v4.6
+ v4.6.1
512
true
true
+
x64
@@ -69,6 +70,7 @@
+
diff --git a/dotnet/WhiteRabbit/Word.cs b/dotnet/WhiteRabbit/Word.cs
new file mode 100644
index 0000000..0ccf3b1
--- /dev/null
+++ b/dotnet/WhiteRabbit/Word.cs
@@ -0,0 +1,41 @@
+namespace WhiteRabbit
+{
+ class Word
+ {
+ public byte[] Original;
+
+ public long[] Buffers { get; }
+
+ public int LengthX4 { get; }
+
+ public unsafe Word(byte[] word)
+ {
+ var tmpWord = new byte[word.Length + 1];
+ tmpWord[word.Length] = (byte)' ';
+ for (var i = 0; i < word.Length; i++)
+ {
+ tmpWord[i] = word[i];
+ }
+
+ this.Original = tmpWord;
+
+ var buffers = new long[128];
+ fixed (long* buffersPointer = buffers)
+ {
+ for (var i = 0; i < 32; i++)
+ {
+ var bytePointer = (byte*)(buffersPointer + 4 * i);
+ var endPointer = bytePointer + 32;
+ var currentPointer = bytePointer + i;
+ for (var j = 0; j < tmpWord.Length && currentPointer < endPointer; j++, currentPointer++)
+ {
+ *currentPointer = tmpWord[j];
+ }
+ }
+ }
+
+ this.Buffers = buffers;
+ this.LengthX4 = tmpWord.Length * 4;
+ }
+ }
+}