From a41a57b0e4517737b8908424b8b4de103e75bae2 Mon Sep 17 00:00:00 2001 From: inga-lovinde <52715130+inga-lovinde@users.noreply.github.com> Date: Fri, 12 May 2017 11:03:29 +0300 Subject: [PATCH] Microoptimization --- dotnet/WhiteRabbit.UnmanagedBridge/md5.cpp | 20 +++++++++----------- dotnet/WhiteRabbit/PhraseSet.cs | 6 ++++-- 2 files changed, 13 insertions(+), 13 deletions(-) diff --git a/dotnet/WhiteRabbit.UnmanagedBridge/md5.cpp b/dotnet/WhiteRabbit.UnmanagedBridge/md5.cpp index 600eb8a..50dbbf8 100644 --- a/dotnet/WhiteRabbit.UnmanagedBridge/md5.cpp +++ b/dotnet/WhiteRabbit.UnmanagedBridge/md5.cpp @@ -81,18 +81,16 @@ _mm256_set_epi32( \ input[offset + 8 * 8]) \ ) -#define PERMUTE_MASK 85 - #define WRITE_TO_OUTPUT(a, output, expected) \ - output[0] = _mm256_movemask_epi8(_mm256_cmpeq_epi32(*expected, _mm256_permute4x64_epi64(a.m_V0, 0 * PERMUTE_MASK))); \ - output[1] = _mm256_movemask_epi8(_mm256_cmpeq_epi32(*expected, _mm256_permute4x64_epi64(a.m_V0, 1 * PERMUTE_MASK))); \ - output[2] = _mm256_movemask_epi8(_mm256_cmpeq_epi32(*expected, _mm256_permute4x64_epi64(a.m_V0, 2 * PERMUTE_MASK))); \ - output[3] = _mm256_movemask_epi8(_mm256_cmpeq_epi32(*expected, _mm256_permute4x64_epi64(a.m_V0, 3 * PERMUTE_MASK))); \ - output[4] = _mm256_movemask_epi8(_mm256_cmpeq_epi32(*expected, _mm256_permute4x64_epi64(a.m_V1, 0 * PERMUTE_MASK))); \ - output[5] = _mm256_movemask_epi8(_mm256_cmpeq_epi32(*expected, _mm256_permute4x64_epi64(a.m_V1, 1 * PERMUTE_MASK))); \ - output[6] = _mm256_movemask_epi8(_mm256_cmpeq_epi32(*expected, _mm256_permute4x64_epi64(a.m_V1, 2 * PERMUTE_MASK))); \ - output[7] = _mm256_movemask_epi8(_mm256_cmpeq_epi32(*expected, _mm256_permute4x64_epi64(a.m_V1, 3 * PERMUTE_MASK))); \ - output[8] = _mm256_movemask_epi8(_mm256_or_si256(*((__m256i*)output), _mm256_slli_epi16(*((__m256i*)output), 4))); + output[0] = _mm256_movemask_epi8(_mm256_cmpeq_epi32(*expected, _mm256_permute4x64_epi64(a.m_V0, 0 * 0x55))); \ + output[1] = _mm256_movemask_epi8(_mm256_cmpeq_epi32(*expected, _mm256_permute4x64_epi64(a.m_V1, 0 * 0x55))); \ + output[2] = _mm256_movemask_epi8(_mm256_cmpeq_epi32(*expected, _mm256_permute4x64_epi64(a.m_V0, 1 * 0x55))); \ + output[3] = _mm256_movemask_epi8(_mm256_cmpeq_epi32(*expected, _mm256_permute4x64_epi64(a.m_V1, 1 * 0x55))); \ + output[4] = _mm256_movemask_epi8(_mm256_cmpeq_epi32(*expected, _mm256_permute4x64_epi64(a.m_V0, 2 * 0x55))); \ + output[5] = _mm256_movemask_epi8(_mm256_cmpeq_epi32(*expected, _mm256_permute4x64_epi64(a.m_V1, 2 * 0x55))); \ + output[6] = _mm256_movemask_epi8(_mm256_cmpeq_epi32(*expected, _mm256_permute4x64_epi64(a.m_V0, 3 * 0x55))); \ + output[7] = _mm256_movemask_epi8(_mm256_cmpeq_epi32(*expected, _mm256_permute4x64_epi64(a.m_V1, 3 * 0x55))); \ + output[8] = _mm256_movemask_epi8(_mm256_cmpeq_epi8(*((__m256i*)output), _mm256_setzero_si256())); #elif SIMD diff --git a/dotnet/WhiteRabbit/PhraseSet.cs b/dotnet/WhiteRabbit/PhraseSet.cs index 8878378..1c93bf3 100644 --- a/dotnet/WhiteRabbit/PhraseSet.cs +++ b/dotnet/WhiteRabbit/PhraseSet.cs @@ -55,11 +55,13 @@ MD5Unmanaged.ComputeMD5(bufferPointer, expectedHashesPointer); - if (bufferPointer[Constants.PhrasesPerSet / 2] != 0) + if (bufferPointer[Constants.PhrasesPerSet / 2] != 0xFFFFFFFF) { for (var j = 0; j < Constants.PhrasesPerSet; j++) { - var match = (bufferPointer[j / 2] >> (4 * (j % 2))) & 0xF0F0F0F; + // 16 matches are packed in 8 32-bit numbers: [0,1], [8,9], [2,3], [10,11], [4, 5], [12, 13], [6, 7], [14, 15] + var position = ((j / 2) % 4) * 2 + (j / 8); + var match = (bufferPointer[position] >> (4 * (j % 2))) & 0xF0F0F0F; if (match != 0) { var bufferInfo = ((ulong)bufferPointer[Constants.PhrasesPerSet] << 32) | bufferPointer[j];