From bbc77613336bb36b509ac6514dc4d06c5d4149c7 Mon Sep 17 00:00:00 2001 From: inga-lovinde <52715130+inga-lovinde@users.noreply.github.com> Date: Fri, 12 May 2017 10:00:48 +0300 Subject: [PATCH] Hash checking optimization --- README.md | 12 ++--- .../WhiteRabbit.UnmanagedBridge.cpp | 4 +- .../WhiteRabbit.UnmanagedBridge.h | 2 +- dotnet/WhiteRabbit.UnmanagedBridge/md5.cpp | 33 ++++++-------- dotnet/WhiteRabbit.UnmanagedBridge/md5.h | 2 +- dotnet/WhiteRabbit/PhraseSet.cs | 44 +++++++++++++------ dotnet/WhiteRabbit/Program.cs | 10 ++--- dotnet/WhiteRabbit/StringsProcessor.cs | 8 ++-- 8 files changed, 62 insertions(+), 53 deletions(-) diff --git a/README.md b/README.md index 1ecf567..2527bcb 100644 --- a/README.md +++ b/README.md @@ -47,12 +47,12 @@ Number of words|Time to check all anagrams no longer than that|Time to solve "ea ---------------|----------------------------------------------|-------------------------|-----------------------------------|-------------------------|--------------------------------------------- 3|0.04s||||4560 4|0.45s|||0.08s|7,431,984 -5|10s|0.15s|0.06s|0.29s|1,347,437,484 -6|5 minutes|0.9s|0.2s|2.3s|58,405,904,844 -7|94 minutes|5.1s|0.7s|14.5s|1,070,307,744,114 -8|16 hours|19s|2s|59s|10,893,594,396,594 -9||49s|4.4s|2.7 minutes|70,596,864,409,954 -10||89s|6.5s|5.2 minutes|314,972,701,475,754 +5|9.6s|0.15s|0.06s|0.29s|1,347,437,484 +6|4.5 minutes|0.85s|0.17s|2.05s|58,405,904,844 +7|83 minutes|4.7s|0.6s|13.3s|1,070,307,744,114 +8|14 hours|17.6s|1.8s|55s|10,893,594,396,594 +9||45s|4s|2.5 minutes|70,596,864,409,954 +10||80s|5.8s|4.8 minutes|314,972,701,475,754 Note that all measurements were done on a Release build; Debug build is significantly slower. diff --git a/dotnet/WhiteRabbit.UnmanagedBridge/WhiteRabbit.UnmanagedBridge.cpp b/dotnet/WhiteRabbit.UnmanagedBridge/WhiteRabbit.UnmanagedBridge.cpp index e45ab64..4ba46fc 100644 --- a/dotnet/WhiteRabbit.UnmanagedBridge/WhiteRabbit.UnmanagedBridge.cpp +++ b/dotnet/WhiteRabbit.UnmanagedBridge/WhiteRabbit.UnmanagedBridge.cpp @@ -6,10 +6,10 @@ #include "md5.h" #include "phraseset.h" -void WhiteRabbitUnmanagedBridge::MD5Unmanaged::ComputeMD5(unsigned __int32 * input) +void WhiteRabbitUnmanagedBridge::MD5Unmanaged::ComputeMD5(unsigned __int32 * input, unsigned __int32 * expected) { #if AVX2 - md5(input + 0 * 8 * 8); + md5(input + 0 * 8 * 8, expected); #elif SIMD md5(input + 0 * 8 * 4); md5(input + 1 * 8 * 4); diff --git a/dotnet/WhiteRabbit.UnmanagedBridge/WhiteRabbit.UnmanagedBridge.h b/dotnet/WhiteRabbit.UnmanagedBridge/WhiteRabbit.UnmanagedBridge.h index 35ecab5..8294cd2 100644 --- a/dotnet/WhiteRabbit.UnmanagedBridge/WhiteRabbit.UnmanagedBridge.h +++ b/dotnet/WhiteRabbit.UnmanagedBridge/WhiteRabbit.UnmanagedBridge.h @@ -12,7 +12,7 @@ namespace WhiteRabbitUnmanagedBridge { { public: literal int PhrasesPerSet = PHRASES_PER_SET; - static void ComputeMD5(unsigned int* input); + static void ComputeMD5(unsigned int* input, unsigned __int32 * expected); static void FillPhraseSet(unsigned __int64* initialBufferPointer, unsigned __int64* bufferPointer, unsigned __int64* allWordsPointer, __int32* wordIndexes, unsigned __int64* permutationsPointer, int numberOfWords); }; } diff --git a/dotnet/WhiteRabbit.UnmanagedBridge/md5.cpp b/dotnet/WhiteRabbit.UnmanagedBridge/md5.cpp index e87f5db..600eb8a 100644 --- a/dotnet/WhiteRabbit.UnmanagedBridge/md5.cpp +++ b/dotnet/WhiteRabbit.UnmanagedBridge/md5.cpp @@ -81,23 +81,18 @@ _mm256_set_epi32( \ input[offset + 8 * 8]) \ ) -#define WRITE_TO_OUTPUT(a, output) \ - output[7 + 0 * 8] = a.m_V0.m256i_u32[0]; \ - output[7 + 1 * 8] = a.m_V0.m256i_u32[1]; \ - output[7 + 2 * 8] = a.m_V0.m256i_u32[2]; \ - output[7 + 3 * 8] = a.m_V0.m256i_u32[3]; \ - output[7 + 4 * 8] = a.m_V0.m256i_u32[4]; \ - output[7 + 5 * 8] = a.m_V0.m256i_u32[5]; \ - output[7 + 6 * 8] = a.m_V0.m256i_u32[6]; \ - output[7 + 7 * 8] = a.m_V0.m256i_u32[7]; \ - output[7 + 8 * 8] = a.m_V1.m256i_u32[0]; \ - output[7 + 9 * 8] = a.m_V1.m256i_u32[1]; \ - output[7 + 10 * 8] = a.m_V1.m256i_u32[2]; \ - output[7 + 11 * 8] = a.m_V1.m256i_u32[3]; \ - output[7 + 12 * 8] = a.m_V1.m256i_u32[4]; \ - output[7 + 13 * 8] = a.m_V1.m256i_u32[5]; \ - output[7 + 14 * 8] = a.m_V1.m256i_u32[6]; \ - output[7 + 15 * 8] = a.m_V1.m256i_u32[7]; +#define PERMUTE_MASK 85 + +#define WRITE_TO_OUTPUT(a, output, expected) \ + output[0] = _mm256_movemask_epi8(_mm256_cmpeq_epi32(*expected, _mm256_permute4x64_epi64(a.m_V0, 0 * PERMUTE_MASK))); \ + output[1] = _mm256_movemask_epi8(_mm256_cmpeq_epi32(*expected, _mm256_permute4x64_epi64(a.m_V0, 1 * PERMUTE_MASK))); \ + output[2] = _mm256_movemask_epi8(_mm256_cmpeq_epi32(*expected, _mm256_permute4x64_epi64(a.m_V0, 2 * PERMUTE_MASK))); \ + output[3] = _mm256_movemask_epi8(_mm256_cmpeq_epi32(*expected, _mm256_permute4x64_epi64(a.m_V0, 3 * PERMUTE_MASK))); \ + output[4] = _mm256_movemask_epi8(_mm256_cmpeq_epi32(*expected, _mm256_permute4x64_epi64(a.m_V1, 0 * PERMUTE_MASK))); \ + output[5] = _mm256_movemask_epi8(_mm256_cmpeq_epi32(*expected, _mm256_permute4x64_epi64(a.m_V1, 1 * PERMUTE_MASK))); \ + output[6] = _mm256_movemask_epi8(_mm256_cmpeq_epi32(*expected, _mm256_permute4x64_epi64(a.m_V1, 2 * PERMUTE_MASK))); \ + output[7] = _mm256_movemask_epi8(_mm256_cmpeq_epi32(*expected, _mm256_permute4x64_epi64(a.m_V1, 3 * PERMUTE_MASK))); \ + output[8] = _mm256_movemask_epi8(_mm256_or_si256(*((__m256i*)output), _mm256_slli_epi16(*((__m256i*)output), 4))); #elif SIMD @@ -246,7 +241,7 @@ static const MD5Parameters Parameters = { #define Step4(r, a, b, c, d, k, w) StepOuter(r, a, b, OP_ADD(I(c, b, d), OP_ADD(CREATE_VECTOR(k), OP_ADD(a, w)))) #define Step4E(r, a, b, c, d, k) StepOuter(r, a, b, OP_ADD(I(c, b, d), OP_ADD(CREATE_VECTOR(k), a))) -void md5(unsigned __int32 * input) +void md5(unsigned __int32 * input, unsigned __int32 * expected) { MD5Vector a = CREATE_VECTOR(Parameters.Init[0]); MD5Vector b = CREATE_VECTOR(Parameters.Init[1]); @@ -329,6 +324,6 @@ void md5(unsigned __int32 * input) a = OP_ADD(CREATE_VECTOR(Parameters.Init[0]), a); - WRITE_TO_OUTPUT(a, input); + WRITE_TO_OUTPUT(a, ((__int32*)input), ((__m256i*)expected)); } #pragma managed diff --git a/dotnet/WhiteRabbit.UnmanagedBridge/md5.h b/dotnet/WhiteRabbit.UnmanagedBridge/md5.h index 6f5ff4f..082b505 100644 --- a/dotnet/WhiteRabbit.UnmanagedBridge/md5.h +++ b/dotnet/WhiteRabbit.UnmanagedBridge/md5.h @@ -1,3 +1,3 @@ #pragma once -void md5(unsigned int* input); +void md5(unsigned int* input, unsigned __int32 * expected); diff --git a/dotnet/WhiteRabbit/PhraseSet.cs b/dotnet/WhiteRabbit/PhraseSet.cs index ee0e3cc..8878378 100644 --- a/dotnet/WhiteRabbit/PhraseSet.cs +++ b/dotnet/WhiteRabbit/PhraseSet.cs @@ -31,7 +31,7 @@ } } - public unsafe void ProcessPermutations(PhraseSet initialPhraseSet, Word[] allWords, int[] wordIndexes, ulong[] permutations, Vector expectedHashes, Action action) + public unsafe void ProcessPermutations(PhraseSet initialPhraseSet, Word[] allWords, int[] wordIndexes, ulong[] permutations, uint[] expectedHashesVector, Action action) { fixed (uint* bufferPointer = this.Buffer, initialBufferPointer = initialPhraseSet.Buffer) { @@ -41,23 +41,39 @@ { fixed (Word* allWordsPointer = allWords) { - for (var i = 0; i < permutations.Length; i += Constants.PhrasesPerSet) + fixed (uint* expectedHashesPointer = expectedHashesVector) { - MD5Unmanaged.FillPhraseSet( - (ulong*)initialBufferPointer, - (ulong*)bufferPointer, - (ulong*)allWordsPointer, - wordIndexesPointer, - permutationsPointer + i, - wordIndexes.Length); + for (var i = 0; i < permutations.Length; i += Constants.PhrasesPerSet) + { + MD5Unmanaged.FillPhraseSet( + (ulong*)initialBufferPointer, + (ulong*)bufferPointer, + (ulong*)allWordsPointer, + wordIndexesPointer, + permutationsPointer + i, + wordIndexes.Length); - MD5Unmanaged.ComputeMD5(bufferPointer); + MD5Unmanaged.ComputeMD5(bufferPointer, expectedHashesPointer); - for (var j = 0; j < Constants.PhrasesPerSet; j++) - { - if (Vector.EqualsAny(expectedHashes, new Vector(bufferPointer[j * 8 + 7]))) + if (bufferPointer[Constants.PhrasesPerSet / 2] != 0) { - action(this.GetBytes(j), bufferPointer[j * 8 + 7]); + for (var j = 0; j < Constants.PhrasesPerSet; j++) + { + var match = (bufferPointer[j / 2] >> (4 * (j % 2))) & 0xF0F0F0F; + if (match != 0) + { + var bufferInfo = ((ulong)bufferPointer[Constants.PhrasesPerSet] << 32) | bufferPointer[j]; + MD5Unmanaged.FillPhraseSet( + (ulong*)initialBufferPointer, + (ulong*)bufferPointer, + (ulong*)allWordsPointer, + wordIndexesPointer, + permutationsPointer + i, + wordIndexes.Length); + action(this.GetBytes(j), match); + break; + } + } } } } diff --git a/dotnet/WhiteRabbit/Program.cs b/dotnet/WhiteRabbit/Program.cs index b88b699..fcfbb96 100644 --- a/dotnet/WhiteRabbit/Program.cs +++ b/dotnet/WhiteRabbit/Program.cs @@ -50,17 +50,15 @@ Console.WriteLine("Only 64-bit systems are supported due to MD5Digest optimizations"); } - Vector expectedHashesFirstComponents; + var expectedHashesFirstComponentsArray = new uint[8]; { - var expectedHashesFirstComponentsArray = new uint[Vector.Count]; int i = 0; foreach (var expectedHash in ConfigurationManager.AppSettings["ExpectedHashes"].Split(',')) { expectedHashesFirstComponentsArray[i] = HexadecimalStringToUnsignedIntArray(expectedHash)[0]; - i++; + expectedHashesFirstComponentsArray[i + 1] = HexadecimalStringToUnsignedIntArray(expectedHash)[0]; + i += 2; } - - expectedHashesFirstComponents = new Vector(expectedHashesFirstComponentsArray); } var processor = new StringsProcessor( @@ -77,7 +75,7 @@ stopwatch.Restart(); - processor.CheckPhrases(expectedHashesFirstComponents, (phraseBytes, hashFirstComponent) => + processor.CheckPhrases(expectedHashesFirstComponentsArray, (phraseBytes, hashFirstComponent) => { var phrase = Encoding.ASCII.GetString(phraseBytes); var hash = ComputeFullMD5(phraseBytes); diff --git a/dotnet/WhiteRabbit/StringsProcessor.cs b/dotnet/WhiteRabbit/StringsProcessor.cs index 1b01936..bd69920 100644 --- a/dotnet/WhiteRabbit/StringsProcessor.cs +++ b/dotnet/WhiteRabbit/StringsProcessor.cs @@ -61,13 +61,13 @@ private int NumberOfCharacters { get; } - public void CheckPhrases(Vector expectedHashes, Action action) + public void CheckPhrases(uint[] expectedHashesVector, Action action) { // task of finding anagrams could be reduced to the task of finding sequences of dictionary vectors with the target sum var sums = this.VectorsProcessor.GenerateSequences(); // converting sequences of vectors to the sequences of words... - Parallel.ForEach(sums, new ParallelOptions { MaxDegreeOfParallelism = Constants.NumberOfThreads }, sum => ProcessSum(sum, expectedHashes, action)); + Parallel.ForEach(sums, new ParallelOptions { MaxDegreeOfParallelism = Constants.NumberOfThreads }, sum => ProcessSum(sum, expectedHashesVector, action)); } public long GetPhrasesCount() @@ -119,7 +119,7 @@ return result; } - private void ProcessSum(int[] sum, Vector expectedHashes, Action action) + private void ProcessSum(int[] sum, uint[] expectedHashesVector, Action action) { var initialPhraseSet = new PhraseSet(); initialPhraseSet.Init(); @@ -135,7 +135,7 @@ this.AllWords, wordsArray, PrecomputedPermutationsGenerator.HamiltonianPermutations(wordsArray.Length, permutationsFilter), - expectedHashes, + expectedHashesVector, action); } }