Hash checking optimization

8 years ago · bbc7761333
parent b6afbe9528
commit bbc7761333
8 changed files with 62 additions and 53 deletions
--- a/README.md
+++ b/README.md
@ -47,12 +47,12 @@ Number of words|Time to check all anagrams no longer than that|Time to solve "ea
 ---------------|----------------------------------------------|-------------------------|-----------------------------------|-------------------------|---------------------------------------------
 3|0.04s||||4560
 4|0.45s|||0.08s|7,431,984
-5|10s|0.15s|0.06s|0.29s|1,347,437,484
-6|5 minutes|0.9s|0.2s|2.3s|58,405,904,844
-7|94 minutes|5.1s|0.7s|14.5s|1,070,307,744,114
-8|16 hours|19s|2s|59s|10,893,594,396,594
-9||49s|4.4s|2.7 minutes|70,596,864,409,954
-10||89s|6.5s|5.2 minutes|314,972,701,475,754
+5|9.6s|0.15s|0.06s|0.29s|1,347,437,484
+6|4.5 minutes|0.85s|0.17s|2.05s|58,405,904,844
+7|83 minutes|4.7s|0.6s|13.3s|1,070,307,744,114
+8|14 hours|17.6s|1.8s|55s|10,893,594,396,594
+9||45s|4s|2.5 minutes|70,596,864,409,954
+10||80s|5.8s|4.8 minutes|314,972,701,475,754

 Note that all measurements were done on a Release build; Debug build is significantly slower.

--- a/dotnet/WhiteRabbit.UnmanagedBridge/WhiteRabbit.UnmanagedBridge.cpp
+++ b/dotnet/WhiteRabbit.UnmanagedBridge/WhiteRabbit.UnmanagedBridge.cpp
@ -6,10 +6,10 @@
 #include "md5.h"
 #include "phraseset.h"

-void WhiteRabbitUnmanagedBridge::MD5Unmanaged::ComputeMD5(unsigned __int32 * input)
+void WhiteRabbitUnmanagedBridge::MD5Unmanaged::ComputeMD5(unsigned __int32 * input, unsigned __int32 * expected)
 {
 #if AVX2
-    md5(input + 0 * 8 * 8);
+    md5(input + 0 * 8 * 8, expected);
 #elif SIMD
    md5(input + 0 * 8 * 4);
    md5(input + 1 * 8 * 4);
--- a/dotnet/WhiteRabbit.UnmanagedBridge/WhiteRabbit.UnmanagedBridge.h
+++ b/dotnet/WhiteRabbit.UnmanagedBridge/WhiteRabbit.UnmanagedBridge.h
@ -12,7 +12,7 @@ namespace WhiteRabbitUnmanagedBridge {
 	{
        public:
            literal int PhrasesPerSet = PHRASES_PER_SET;
-            static void ComputeMD5(unsigned int* input);
+            static void ComputeMD5(unsigned int* input, unsigned __int32 * expected);
            static void FillPhraseSet(unsigned __int64* initialBufferPointer, unsigned __int64* bufferPointer, unsigned __int64* allWordsPointer, __int32* wordIndexes, unsigned __int64* permutationsPointer, int numberOfWords);
 	};
 }
--- a/dotnet/WhiteRabbit.UnmanagedBridge/md5.cpp
+++ b/dotnet/WhiteRabbit.UnmanagedBridge/md5.cpp
@ -81,23 +81,18 @@ _mm256_set_epi32( \
    input[offset + 8 * 8]) \
 )

-#define WRITE_TO_OUTPUT(a, output) \
-    output[7 + 0 * 8] = a.m_V0.m256i_u32[0]; \
-    output[7 + 1 * 8] = a.m_V0.m256i_u32[1]; \
-    output[7 + 2 * 8] = a.m_V0.m256i_u32[2]; \
-    output[7 + 3 * 8] = a.m_V0.m256i_u32[3]; \
-    output[7 + 4 * 8] = a.m_V0.m256i_u32[4]; \
-    output[7 + 5 * 8] = a.m_V0.m256i_u32[5]; \
-    output[7 + 6 * 8] = a.m_V0.m256i_u32[6]; \
-    output[7 + 7 * 8] = a.m_V0.m256i_u32[7]; \
-    output[7 + 8 * 8] = a.m_V1.m256i_u32[0]; \
-    output[7 + 9 * 8] = a.m_V1.m256i_u32[1]; \
-    output[7 + 10 * 8] = a.m_V1.m256i_u32[2]; \
-    output[7 + 11 * 8] = a.m_V1.m256i_u32[3]; \
-    output[7 + 12 * 8] = a.m_V1.m256i_u32[4]; \
-    output[7 + 13 * 8] = a.m_V1.m256i_u32[5]; \
-    output[7 + 14 * 8] = a.m_V1.m256i_u32[6]; \
-    output[7 + 15 * 8] = a.m_V1.m256i_u32[7];
+#define PERMUTE_MASK 85
+
+#define WRITE_TO_OUTPUT(a, output, expected) \
+    output[0] = _mm256_movemask_epi8(_mm256_cmpeq_epi32(*expected, _mm256_permute4x64_epi64(a.m_V0, 0 * PERMUTE_MASK))); \
+    output[1] = _mm256_movemask_epi8(_mm256_cmpeq_epi32(*expected, _mm256_permute4x64_epi64(a.m_V0, 1 * PERMUTE_MASK))); \
+    output[2] = _mm256_movemask_epi8(_mm256_cmpeq_epi32(*expected, _mm256_permute4x64_epi64(a.m_V0, 2 * PERMUTE_MASK))); \
+    output[3] = _mm256_movemask_epi8(_mm256_cmpeq_epi32(*expected, _mm256_permute4x64_epi64(a.m_V0, 3 * PERMUTE_MASK))); \
+    output[4] = _mm256_movemask_epi8(_mm256_cmpeq_epi32(*expected, _mm256_permute4x64_epi64(a.m_V1, 0 * PERMUTE_MASK))); \
+    output[5] = _mm256_movemask_epi8(_mm256_cmpeq_epi32(*expected, _mm256_permute4x64_epi64(a.m_V1, 1 * PERMUTE_MASK))); \
+    output[6] = _mm256_movemask_epi8(_mm256_cmpeq_epi32(*expected, _mm256_permute4x64_epi64(a.m_V1, 2 * PERMUTE_MASK))); \
+    output[7] = _mm256_movemask_epi8(_mm256_cmpeq_epi32(*expected, _mm256_permute4x64_epi64(a.m_V1, 3 * PERMUTE_MASK))); \
+    output[8] = _mm256_movemask_epi8(_mm256_or_si256(*((__m256i*)output), _mm256_slli_epi16(*((__m256i*)output), 4)));

 #elif SIMD

@ -246,7 +241,7 @@ static const MD5Parameters Parameters = {
 #define Step4(r, a, b, c, d, k, w) StepOuter(r, a, b, OP_ADD(I(c, b, d), OP_ADD(CREATE_VECTOR(k), OP_ADD(a, w))))
 #define Step4E(r, a, b, c, d, k)   StepOuter(r, a, b, OP_ADD(I(c, b, d), OP_ADD(CREATE_VECTOR(k), a)))

-void md5(unsigned __int32 * input)
+void md5(unsigned __int32 * input, unsigned __int32 * expected)
 {
    MD5Vector a = CREATE_VECTOR(Parameters.Init[0]);
    MD5Vector b = CREATE_VECTOR(Parameters.Init[1]);
@ -329,6 +324,6 @@ void md5(unsigned __int32 * input)

    a = OP_ADD(CREATE_VECTOR(Parameters.Init[0]), a);

-    WRITE_TO_OUTPUT(a, input);
+    WRITE_TO_OUTPUT(a, ((__int32*)input), ((__m256i*)expected));
 }
 #pragma managed
--- a/dotnet/WhiteRabbit.UnmanagedBridge/md5.h
+++ b/dotnet/WhiteRabbit.UnmanagedBridge/md5.h
@ -1,3 +1,3 @@
 #pragma once

-void md5(unsigned int* input);
+void md5(unsigned int* input, unsigned __int32 * expected);
--- a/dotnet/WhiteRabbit/PhraseSet.cs
+++ b/dotnet/WhiteRabbit/PhraseSet.cs
@ -31,7 +31,7 @@
            }
        }

-        public unsafe void ProcessPermutations(PhraseSet initialPhraseSet, Word[] allWords, int[] wordIndexes, ulong[] permutations, Vector<uint> expectedHashes, Action<byte[], uint> action)
+        public unsafe void ProcessPermutations(PhraseSet initialPhraseSet, Word[] allWords, int[] wordIndexes, ulong[] permutations, uint[] expectedHashesVector, Action<byte[], uint> action)
        {
            fixed (uint* bufferPointer = this.Buffer, initialBufferPointer = initialPhraseSet.Buffer)
            {
@ -41,23 +41,39 @@
                    {
                        fixed (Word* allWordsPointer = allWords)
                        {
-                            for (var i = 0; i < permutations.Length; i += Constants.PhrasesPerSet)
+                            fixed (uint* expectedHashesPointer = expectedHashesVector)
                            {
-                                MD5Unmanaged.FillPhraseSet(
-                                    (ulong*)initialBufferPointer,
-                                    (ulong*)bufferPointer,
-                                    (ulong*)allWordsPointer,
-                                    wordIndexesPointer,
-                                    permutationsPointer + i,
-                                    wordIndexes.Length);
+                                for (var i = 0; i < permutations.Length; i += Constants.PhrasesPerSet)
+                                {
+                                    MD5Unmanaged.FillPhraseSet(
+                                        (ulong*)initialBufferPointer,
+                                        (ulong*)bufferPointer,
+                                        (ulong*)allWordsPointer,
+                                        wordIndexesPointer,
+                                        permutationsPointer + i,
+                                        wordIndexes.Length);

-                                MD5Unmanaged.ComputeMD5(bufferPointer);
+                                    MD5Unmanaged.ComputeMD5(bufferPointer, expectedHashesPointer);

-                                for (var j = 0; j < Constants.PhrasesPerSet; j++)
-                                {
-                                    if (Vector.EqualsAny(expectedHashes, new Vector<uint>(bufferPointer[j * 8 + 7])))
+                                    if (bufferPointer[Constants.PhrasesPerSet / 2] != 0)
                                    {
-                                        action(this.GetBytes(j), bufferPointer[j * 8 + 7]);
+                                        for (var j = 0; j < Constants.PhrasesPerSet; j++)
+                                        {
+                                            var match = (bufferPointer[j / 2] >> (4 * (j % 2))) & 0xF0F0F0F;
+                                            if (match != 0)
+                                            {
+                                                var bufferInfo = ((ulong)bufferPointer[Constants.PhrasesPerSet] << 32) | bufferPointer[j];
+                                                MD5Unmanaged.FillPhraseSet(
+                                                    (ulong*)initialBufferPointer,
+                                                    (ulong*)bufferPointer,
+                                                    (ulong*)allWordsPointer,
+                                                    wordIndexesPointer,
+                                                    permutationsPointer + i,
+                                                    wordIndexes.Length);
+                                                action(this.GetBytes(j), match);
+                                                break;
+                                            }
+                                        }
                                    }
                                }
                            }
--- a/dotnet/WhiteRabbit/Program.cs
+++ b/dotnet/WhiteRabbit/Program.cs
@ -50,17 +50,15 @@
                Console.WriteLine("Only 64-bit systems are supported due to MD5Digest optimizations");
            }

-            Vector<uint> expectedHashesFirstComponents;
+            var expectedHashesFirstComponentsArray = new uint[8];
            {
-                var expectedHashesFirstComponentsArray = new uint[Vector<uint>.Count];
                int i = 0;
                foreach (var expectedHash in ConfigurationManager.AppSettings["ExpectedHashes"].Split(','))
                {
                    expectedHashesFirstComponentsArray[i] = HexadecimalStringToUnsignedIntArray(expectedHash)[0];
-                    i++;
+                    expectedHashesFirstComponentsArray[i + 1] = HexadecimalStringToUnsignedIntArray(expectedHash)[0];
+                    i += 2;
                }
-
-                expectedHashesFirstComponents = new Vector<uint>(expectedHashesFirstComponentsArray);
            }

            var processor = new StringsProcessor(
@ -77,7 +75,7 @@

            stopwatch.Restart();

-            processor.CheckPhrases(expectedHashesFirstComponents, (phraseBytes, hashFirstComponent) =>
+            processor.CheckPhrases(expectedHashesFirstComponentsArray, (phraseBytes, hashFirstComponent) =>
            {
                var phrase = Encoding.ASCII.GetString(phraseBytes);
                var hash = ComputeFullMD5(phraseBytes);
--- a/dotnet/WhiteRabbit/StringsProcessor.cs
+++ b/dotnet/WhiteRabbit/StringsProcessor.cs
@ -61,13 +61,13 @@

        private int NumberOfCharacters { get; }

-        public void CheckPhrases(Vector<uint> expectedHashes, Action<byte[], uint> action)
+        public void CheckPhrases(uint[] expectedHashesVector, Action<byte[], uint> action)
        {
            // task of finding anagrams could be reduced to the task of finding sequences of dictionary vectors with the target sum
            var sums = this.VectorsProcessor.GenerateSequences();

            // converting sequences of vectors to the sequences of words...
-            Parallel.ForEach(sums, new ParallelOptions { MaxDegreeOfParallelism = Constants.NumberOfThreads }, sum => ProcessSum(sum, expectedHashes, action));
+            Parallel.ForEach(sums, new ParallelOptions { MaxDegreeOfParallelism = Constants.NumberOfThreads }, sum => ProcessSum(sum, expectedHashesVector, action));
        }

        public long GetPhrasesCount()
@ -119,7 +119,7 @@
            return result;
        }

-        private void ProcessSum(int[] sum, Vector<uint> expectedHashes, Action<byte[], uint> action)
+        private void ProcessSum(int[] sum, uint[] expectedHashesVector, Action<byte[], uint> action)
        {
            var initialPhraseSet = new PhraseSet();
            initialPhraseSet.Init();
@ -135,7 +135,7 @@
                    this.AllWords,
                    wordsArray,
                    PrecomputedPermutationsGenerator.HamiltonianPermutations(wordsArray.Length, permutationsFilter),
-                    expectedHashes,
+                    expectedHashesVector,
                    action);
            }
        }