From fba2d3e10e6160d1c224db18cc494b30c4503984 Mon Sep 17 00:00:00 2001 From: inga-lovinde <52715130+inga-lovinde@users.noreply.github.com> Date: Tue, 4 Apr 2017 16:45:03 +0300 Subject: [PATCH] Refactored to use phrasesets --- dotnet/WhiteRabbit/Constants.cs | 7 +++ dotnet/WhiteRabbit/MD5Digest.cs | 23 ++++--- dotnet/WhiteRabbit/Phrase.cs | 52 ---------------- dotnet/WhiteRabbit/PhraseSet.cs | 60 +++++++++++++++++++ .../PrecomputedPermutationsGenerator.cs | 30 ++++++---- dotnet/WhiteRabbit/Program.cs | 49 +++++---------- dotnet/WhiteRabbit/StringsProcessor.cs | 10 ++-- dotnet/WhiteRabbit/WhiteRabbit.csproj | 3 +- 8 files changed, 121 insertions(+), 113 deletions(-) create mode 100644 dotnet/WhiteRabbit/Constants.cs delete mode 100644 dotnet/WhiteRabbit/Phrase.cs create mode 100644 dotnet/WhiteRabbit/PhraseSet.cs diff --git a/dotnet/WhiteRabbit/Constants.cs b/dotnet/WhiteRabbit/Constants.cs new file mode 100644 index 0000000..4328b55 --- /dev/null +++ b/dotnet/WhiteRabbit/Constants.cs @@ -0,0 +1,7 @@ +namespace WhiteRabbit +{ + internal class Constants + { + public const int PhrasesPerSet = 1; + } +} diff --git a/dotnet/WhiteRabbit/MD5Digest.cs b/dotnet/WhiteRabbit/MD5Digest.cs index d8e05d2..df27888 100644 --- a/dotnet/WhiteRabbit/MD5Digest.cs +++ b/dotnet/WhiteRabbit/MD5Digest.cs @@ -14,16 +14,21 @@ namespace WhiteRabbit internal static class MD5Digest { [MethodImpl(MethodImplOptions.AggressiveInlining)] - public static unsafe Vector Compute(Phrase input) + public static unsafe Vector[] Compute(PhraseSet input) { - var result = stackalloc uint[4]; - MD5Unmanaged.ComputeMD5(input.Buffer, result); - return new Vector(new[] { - result[0], - result[1], - result[2], - result[3], - }); + var rawResult = new uint[4 * Constants.PhrasesPerSet]; + fixed (uint* resultPointer = rawResult) + { + MD5Unmanaged.ComputeMD5(input.Buffer, resultPointer); + } + + var result = new Vector[Constants.PhrasesPerSet]; + for (var i = 0; i < Constants.PhrasesPerSet; i++) + { + result[i] = new Vector(rawResult, 4 * i); + } + + return result; } } } diff --git a/dotnet/WhiteRabbit/Phrase.cs b/dotnet/WhiteRabbit/Phrase.cs deleted file mode 100644 index 5faab25..0000000 --- a/dotnet/WhiteRabbit/Phrase.cs +++ /dev/null @@ -1,52 +0,0 @@ -namespace WhiteRabbit -{ - // Anagram representation optimized for MD5 - internal unsafe struct Phrase - { - public fixed uint Buffer[8]; - - public Phrase(byte[][] words, PermutationsGenerator.Permutation permutation, int numberOfCharacters) - { - fixed (uint* bufferPointer = this.Buffer) - { - var length = numberOfCharacters + words.Length - 1; - - byte[] currentWord = words[permutation[0]]; - var j = 0; - var wordIndex = 0; - var currentPointer = (byte*)bufferPointer; - byte* lastPointer = currentPointer + length; - for (; currentPointer < lastPointer; currentPointer++) - { - if (j >= currentWord.Length) - { - j = 0; - wordIndex++; - currentWord = words[permutation[wordIndex]]; - } - - *currentPointer = currentWord[j]; - j++; - } - *currentPointer = 128; - - bufferPointer[7] = (uint)(length << 3); - } - } - - public byte[] GetBytes() - { - fixed(uint* bufferPointer = this.Buffer) - { - var length = bufferPointer[7] >> 3; - var result = new byte[length]; - for (var i = 0; i < length; i++) - { - result[i] = ((byte*)bufferPointer)[i]; - } - - return result; - } - } - } -} diff --git a/dotnet/WhiteRabbit/PhraseSet.cs b/dotnet/WhiteRabbit/PhraseSet.cs new file mode 100644 index 0000000..ed57af3 --- /dev/null +++ b/dotnet/WhiteRabbit/PhraseSet.cs @@ -0,0 +1,60 @@ +namespace WhiteRabbit +{ + // Anagram representation optimized for MD5 + internal unsafe struct PhraseSet + { + public fixed uint Buffer[8 * Constants.PhrasesPerSet]; + + public PhraseSet(byte[][] words, PermutationsGenerator.Permutation[] permutations, int offset, int numberOfCharacters) + { + fixed (uint* bufferPointer = this.Buffer) + { + var length = numberOfCharacters + words.Length - 1; + + for (var i = 0; i < Constants.PhrasesPerSet; i++) + { + var permutation = permutations[offset + i]; + var startPointer = bufferPointer + i * 8; + byte[] currentWord = words[permutations[offset][0]]; + var j = 0; + var wordIndex = 0; + var currentPointer = (byte*)startPointer; + byte* lastPointer = currentPointer + length; + for (; currentPointer < lastPointer; currentPointer++) + { + if (j >= currentWord.Length) + { + j = 0; + wordIndex++; + currentWord = words[permutation[wordIndex]]; + } + + *currentPointer = currentWord[j]; + j++; + } + *currentPointer = 128; + + startPointer[7] = (uint)(length << 3); + } + } + } + + public byte[] GetBytes(int number) + { + System.Diagnostics.Debug.Assert(number < Constants.PhrasesPerSet); + + fixed(uint* bufferPointer = this.Buffer) + { + var phrasePointer = bufferPointer + 8 * number; + var length = phrasePointer[7] >> 3; + var result = new byte[length]; + for (var i = 0; i < length; i++) + { + result[i] = ((byte*)phrasePointer)[i]; + } + + return result; + } + } + } +} diff --git a/dotnet/WhiteRabbit/PrecomputedPermutationsGenerator.cs b/dotnet/WhiteRabbit/PrecomputedPermutationsGenerator.cs index 78e7b93..6424083 100644 --- a/dotnet/WhiteRabbit/PrecomputedPermutationsGenerator.cs +++ b/dotnet/WhiteRabbit/PrecomputedPermutationsGenerator.cs @@ -7,24 +7,30 @@ { private static PermutationsGenerator.Permutation[][] Permutations { get; } = new[] { - PermutationsGenerator.HamiltonianPermutations(0).ToArray(), - PermutationsGenerator.HamiltonianPermutations(1).ToArray(), - PermutationsGenerator.HamiltonianPermutations(2).ToArray(), - PermutationsGenerator.HamiltonianPermutations(3).ToArray(), - PermutationsGenerator.HamiltonianPermutations(4).ToArray(), - PermutationsGenerator.HamiltonianPermutations(5).ToArray(), - PermutationsGenerator.HamiltonianPermutations(6).ToArray(), - PermutationsGenerator.HamiltonianPermutations(7).ToArray(), + GeneratePermutations(0), + GeneratePermutations(1), + GeneratePermutations(2), + GeneratePermutations(3), + GeneratePermutations(4), + GeneratePermutations(5), + GeneratePermutations(6), + GeneratePermutations(7), }; - public static IEnumerable HamiltonianPermutations(int n) + public static PermutationsGenerator.Permutation[] HamiltonianPermutations(int n) { - if (n > 9) + return Permutations[n]; + } + + private static PermutationsGenerator.Permutation[] GeneratePermutations(int n) + { + var result = PermutationsGenerator.HamiltonianPermutations(n).ToArray(); + if (result.Length % Constants.PhrasesPerSet == 0) { - return PermutationsGenerator.HamiltonianPermutations(n); + return result; } - return Permutations[n]; + return result.Concat(Enumerable.Repeat(result[0], Constants.PhrasesPerSet - (result.Length % Constants.PhrasesPerSet))).ToArray(); } } } diff --git a/dotnet/WhiteRabbit/Program.cs b/dotnet/WhiteRabbit/Program.cs index 8b9178f..fb9f5d7 100644 --- a/dotnet/WhiteRabbit/Program.cs +++ b/dotnet/WhiteRabbit/Program.cs @@ -49,10 +49,6 @@ .Select(hash => new Vector(HexadecimalStringToUnsignedIntArray(hash))) .ToArray(); -#if DEBUG - var anagramsBag = new ConcurrentBag(); -#endif - var processor = new StringsProcessor( Encoding.ASCII.GetBytes(sourcePhrase), maxWordsInPhrase, @@ -70,41 +66,24 @@ processor.GeneratePhrases() .ForAll(phraseBytes => { - Debug.Assert( - sourceChars == ToOrderedChars(ToString(phraseBytes)), - $"StringsProcessor produced incorrect anagram: {ToString(phraseBytes)}"); - - var hashVector = MD5Digest.Compute(phraseBytes); - if (Array.IndexOf(expectedHashesAsVectors, hashVector) >= 0) + var hashVectors = MD5Digest.Compute(phraseBytes); + for (var i = 0; i < Constants.PhrasesPerSet; i++) { - var phrase = ToString(phraseBytes); - var hash = VectorToHexadecimalString(hashVector); - Console.WriteLine($"Found phrase for {hash}: {phrase}; time from start is {stopwatch.Elapsed}"); + Debug.Assert( + sourceChars == ToOrderedChars(ToString(phraseBytes, i)), + $"StringsProcessor produced incorrect anagram: {ToString(phraseBytes, i)}"); + + if (Array.IndexOf(expectedHashesAsVectors, hashVectors[i]) >= 0) + { + var phrase = ToString(phraseBytes, i); + var hash = VectorToHexadecimalString(hashVectors[i]); + Console.WriteLine($"Found phrase for {hash}: {phrase}; time from start is {stopwatch.Elapsed}"); + } } - -#if DEBUG - anagramsBag.Add(ToString(phraseBytes)); -#endif }); Console.WriteLine($"Done; time from start: {stopwatch.Elapsed}"); -#if DEBUG - var anagramsArray = anagramsBag.ToArray(); - var anagramsSet = new HashSet(anagramsArray); - Array.Sort(anagramsArray); - - Console.WriteLine("All anagrams:"); - for (var i = 0; i < anagramsArray.Length; i++) - { - Console.WriteLine(anagramsArray[i]); - } - - // Duplicate anagrams are expected, as e.g. "norway spoils tut tut" will be taken twice: - // as "norway1 spoils2 tut3 tut4" and "norway1 spoils2 tut4 tut3" - // (in addition to e.g. "norway1 tut3 spoils2 tut4") - Console.WriteLine($"Total anagrams count: {anagramsArray.Length}; unique anagrams: {anagramsSet.Count}; time from start: {stopwatch.Elapsed}"); -#endif } // Code taken from http://stackoverflow.com/a/321404/831314 @@ -131,9 +110,9 @@ return hex.Substring(6, 2) + hex.Substring(4, 2) + hex.Substring(2, 2) + hex.Substring(0, 2); } - private static string ToString(Phrase phrase) + private static string ToString(PhraseSet phrase, int offset) { - return Encoding.ASCII.GetString(phrase.GetBytes()); + return Encoding.ASCII.GetString(phrase.GetBytes(offset)); } private static IEnumerable ReadInput() diff --git a/dotnet/WhiteRabbit/StringsProcessor.cs b/dotnet/WhiteRabbit/StringsProcessor.cs index 6f7ef16..db4d7f4 100644 --- a/dotnet/WhiteRabbit/StringsProcessor.cs +++ b/dotnet/WhiteRabbit/StringsProcessor.cs @@ -52,7 +52,7 @@ #if SINGLE_THREADED public IEnumerable GeneratePhrases() #else - public ParallelQuery GeneratePhrases() + public ParallelQuery GeneratePhrases() #endif { // task of finding anagrams could be reduced to the task of finding sequences of dictionary vectors with the target sum @@ -85,11 +85,13 @@ return words; } - private IEnumerable ConvertWordsToPhrases(byte[][] words) + private IEnumerable ConvertWordsToPhrases(byte[][] words) { - foreach (var permutation in PrecomputedPermutationsGenerator.HamiltonianPermutations(words.Length)) + var permutations = PrecomputedPermutationsGenerator.HamiltonianPermutations(words.Length); + var permutationsLength = permutations.Length; + for (var i = 0; i < permutationsLength; i++) { - yield return new Phrase(words, permutation, this.NumberOfCharacters); + yield return new PhraseSet(words, permutations, i, this.NumberOfCharacters); } } } diff --git a/dotnet/WhiteRabbit/WhiteRabbit.csproj b/dotnet/WhiteRabbit/WhiteRabbit.csproj index 1592db4..7392e3e 100644 --- a/dotnet/WhiteRabbit/WhiteRabbit.csproj +++ b/dotnet/WhiteRabbit/WhiteRabbit.csproj @@ -58,9 +58,10 @@ + - +