Hash checking optimization

master
Inga 🏳‍🌈 8 years ago
parent b6afbe9528
commit bbc7761333
  1. 12
      README.md
  2. 4
      dotnet/WhiteRabbit.UnmanagedBridge/WhiteRabbit.UnmanagedBridge.cpp
  3. 2
      dotnet/WhiteRabbit.UnmanagedBridge/WhiteRabbit.UnmanagedBridge.h
  4. 33
      dotnet/WhiteRabbit.UnmanagedBridge/md5.cpp
  5. 2
      dotnet/WhiteRabbit.UnmanagedBridge/md5.h
  6. 44
      dotnet/WhiteRabbit/PhraseSet.cs
  7. 10
      dotnet/WhiteRabbit/Program.cs
  8. 8
      dotnet/WhiteRabbit/StringsProcessor.cs

@ -47,12 +47,12 @@ Number of words|Time to check all anagrams no longer than that|Time to solve "ea
---------------|----------------------------------------------|-------------------------|-----------------------------------|-------------------------|--------------------------------------------- ---------------|----------------------------------------------|-------------------------|-----------------------------------|-------------------------|---------------------------------------------
3|0.04s||||4560 3|0.04s||||4560
4|0.45s|||0.08s|7,431,984 4|0.45s|||0.08s|7,431,984
5|10s|0.15s|0.06s|0.29s|1,347,437,484 5|9.6s|0.15s|0.06s|0.29s|1,347,437,484
6|5 minutes|0.9s|0.2s|2.3s|58,405,904,844 6|4.5 minutes|0.85s|0.17s|2.05s|58,405,904,844
7|94 minutes|5.1s|0.7s|14.5s|1,070,307,744,114 7|83 minutes|4.7s|0.6s|13.3s|1,070,307,744,114
8|16 hours|19s|2s|59s|10,893,594,396,594 8|14 hours|17.6s|1.8s|55s|10,893,594,396,594
9||49s|4.4s|2.7 minutes|70,596,864,409,954 9||45s|4s|2.5 minutes|70,596,864,409,954
10||89s|6.5s|5.2 minutes|314,972,701,475,754 10||80s|5.8s|4.8 minutes|314,972,701,475,754
Note that all measurements were done on a Release build; Debug build is significantly slower. Note that all measurements were done on a Release build; Debug build is significantly slower.

@ -6,10 +6,10 @@
#include "md5.h" #include "md5.h"
#include "phraseset.h" #include "phraseset.h"
void WhiteRabbitUnmanagedBridge::MD5Unmanaged::ComputeMD5(unsigned __int32 * input) void WhiteRabbitUnmanagedBridge::MD5Unmanaged::ComputeMD5(unsigned __int32 * input, unsigned __int32 * expected)
{ {
#if AVX2 #if AVX2
md5(input + 0 * 8 * 8); md5(input + 0 * 8 * 8, expected);
#elif SIMD #elif SIMD
md5(input + 0 * 8 * 4); md5(input + 0 * 8 * 4);
md5(input + 1 * 8 * 4); md5(input + 1 * 8 * 4);

@ -12,7 +12,7 @@ namespace WhiteRabbitUnmanagedBridge {
{ {
public: public:
literal int PhrasesPerSet = PHRASES_PER_SET; literal int PhrasesPerSet = PHRASES_PER_SET;
static void ComputeMD5(unsigned int* input); static void ComputeMD5(unsigned int* input, unsigned __int32 * expected);
static void FillPhraseSet(unsigned __int64* initialBufferPointer, unsigned __int64* bufferPointer, unsigned __int64* allWordsPointer, __int32* wordIndexes, unsigned __int64* permutationsPointer, int numberOfWords); static void FillPhraseSet(unsigned __int64* initialBufferPointer, unsigned __int64* bufferPointer, unsigned __int64* allWordsPointer, __int32* wordIndexes, unsigned __int64* permutationsPointer, int numberOfWords);
}; };
} }

@ -81,23 +81,18 @@ _mm256_set_epi32( \
input[offset + 8 * 8]) \ input[offset + 8 * 8]) \
) )
#define WRITE_TO_OUTPUT(a, output) \ #define PERMUTE_MASK 85
output[7 + 0 * 8] = a.m_V0.m256i_u32[0]; \
output[7 + 1 * 8] = a.m_V0.m256i_u32[1]; \ #define WRITE_TO_OUTPUT(a, output, expected) \
output[7 + 2 * 8] = a.m_V0.m256i_u32[2]; \ output[0] = _mm256_movemask_epi8(_mm256_cmpeq_epi32(*expected, _mm256_permute4x64_epi64(a.m_V0, 0 * PERMUTE_MASK))); \
output[7 + 3 * 8] = a.m_V0.m256i_u32[3]; \ output[1] = _mm256_movemask_epi8(_mm256_cmpeq_epi32(*expected, _mm256_permute4x64_epi64(a.m_V0, 1 * PERMUTE_MASK))); \
output[7 + 4 * 8] = a.m_V0.m256i_u32[4]; \ output[2] = _mm256_movemask_epi8(_mm256_cmpeq_epi32(*expected, _mm256_permute4x64_epi64(a.m_V0, 2 * PERMUTE_MASK))); \
output[7 + 5 * 8] = a.m_V0.m256i_u32[5]; \ output[3] = _mm256_movemask_epi8(_mm256_cmpeq_epi32(*expected, _mm256_permute4x64_epi64(a.m_V0, 3 * PERMUTE_MASK))); \
output[7 + 6 * 8] = a.m_V0.m256i_u32[6]; \ output[4] = _mm256_movemask_epi8(_mm256_cmpeq_epi32(*expected, _mm256_permute4x64_epi64(a.m_V1, 0 * PERMUTE_MASK))); \
output[7 + 7 * 8] = a.m_V0.m256i_u32[7]; \ output[5] = _mm256_movemask_epi8(_mm256_cmpeq_epi32(*expected, _mm256_permute4x64_epi64(a.m_V1, 1 * PERMUTE_MASK))); \
output[7 + 8 * 8] = a.m_V1.m256i_u32[0]; \ output[6] = _mm256_movemask_epi8(_mm256_cmpeq_epi32(*expected, _mm256_permute4x64_epi64(a.m_V1, 2 * PERMUTE_MASK))); \
output[7 + 9 * 8] = a.m_V1.m256i_u32[1]; \ output[7] = _mm256_movemask_epi8(_mm256_cmpeq_epi32(*expected, _mm256_permute4x64_epi64(a.m_V1, 3 * PERMUTE_MASK))); \
output[7 + 10 * 8] = a.m_V1.m256i_u32[2]; \ output[8] = _mm256_movemask_epi8(_mm256_or_si256(*((__m256i*)output), _mm256_slli_epi16(*((__m256i*)output), 4)));
output[7 + 11 * 8] = a.m_V1.m256i_u32[3]; \
output[7 + 12 * 8] = a.m_V1.m256i_u32[4]; \
output[7 + 13 * 8] = a.m_V1.m256i_u32[5]; \
output[7 + 14 * 8] = a.m_V1.m256i_u32[6]; \
output[7 + 15 * 8] = a.m_V1.m256i_u32[7];
#elif SIMD #elif SIMD
@ -246,7 +241,7 @@ static const MD5Parameters Parameters = {
#define Step4(r, a, b, c, d, k, w) StepOuter(r, a, b, OP_ADD(I(c, b, d), OP_ADD(CREATE_VECTOR(k), OP_ADD(a, w)))) #define Step4(r, a, b, c, d, k, w) StepOuter(r, a, b, OP_ADD(I(c, b, d), OP_ADD(CREATE_VECTOR(k), OP_ADD(a, w))))
#define Step4E(r, a, b, c, d, k) StepOuter(r, a, b, OP_ADD(I(c, b, d), OP_ADD(CREATE_VECTOR(k), a))) #define Step4E(r, a, b, c, d, k) StepOuter(r, a, b, OP_ADD(I(c, b, d), OP_ADD(CREATE_VECTOR(k), a)))
void md5(unsigned __int32 * input) void md5(unsigned __int32 * input, unsigned __int32 * expected)
{ {
MD5Vector a = CREATE_VECTOR(Parameters.Init[0]); MD5Vector a = CREATE_VECTOR(Parameters.Init[0]);
MD5Vector b = CREATE_VECTOR(Parameters.Init[1]); MD5Vector b = CREATE_VECTOR(Parameters.Init[1]);
@ -329,6 +324,6 @@ void md5(unsigned __int32 * input)
a = OP_ADD(CREATE_VECTOR(Parameters.Init[0]), a); a = OP_ADD(CREATE_VECTOR(Parameters.Init[0]), a);
WRITE_TO_OUTPUT(a, input); WRITE_TO_OUTPUT(a, ((__int32*)input), ((__m256i*)expected));
} }
#pragma managed #pragma managed

@ -1,3 +1,3 @@
#pragma once #pragma once
void md5(unsigned int* input); void md5(unsigned int* input, unsigned __int32 * expected);

@ -31,7 +31,7 @@
} }
} }
public unsafe void ProcessPermutations(PhraseSet initialPhraseSet, Word[] allWords, int[] wordIndexes, ulong[] permutations, Vector<uint> expectedHashes, Action<byte[], uint> action) public unsafe void ProcessPermutations(PhraseSet initialPhraseSet, Word[] allWords, int[] wordIndexes, ulong[] permutations, uint[] expectedHashesVector, Action<byte[], uint> action)
{ {
fixed (uint* bufferPointer = this.Buffer, initialBufferPointer = initialPhraseSet.Buffer) fixed (uint* bufferPointer = this.Buffer, initialBufferPointer = initialPhraseSet.Buffer)
{ {
@ -41,23 +41,39 @@
{ {
fixed (Word* allWordsPointer = allWords) fixed (Word* allWordsPointer = allWords)
{ {
for (var i = 0; i < permutations.Length; i += Constants.PhrasesPerSet) fixed (uint* expectedHashesPointer = expectedHashesVector)
{ {
MD5Unmanaged.FillPhraseSet( for (var i = 0; i < permutations.Length; i += Constants.PhrasesPerSet)
(ulong*)initialBufferPointer, {
(ulong*)bufferPointer, MD5Unmanaged.FillPhraseSet(
(ulong*)allWordsPointer, (ulong*)initialBufferPointer,
wordIndexesPointer, (ulong*)bufferPointer,
permutationsPointer + i, (ulong*)allWordsPointer,
wordIndexes.Length); wordIndexesPointer,
permutationsPointer + i,
wordIndexes.Length);
MD5Unmanaged.ComputeMD5(bufferPointer); MD5Unmanaged.ComputeMD5(bufferPointer, expectedHashesPointer);
for (var j = 0; j < Constants.PhrasesPerSet; j++) if (bufferPointer[Constants.PhrasesPerSet / 2] != 0)
{
if (Vector.EqualsAny(expectedHashes, new Vector<uint>(bufferPointer[j * 8 + 7])))
{ {
action(this.GetBytes(j), bufferPointer[j * 8 + 7]); for (var j = 0; j < Constants.PhrasesPerSet; j++)
{
var match = (bufferPointer[j / 2] >> (4 * (j % 2))) & 0xF0F0F0F;
if (match != 0)
{
var bufferInfo = ((ulong)bufferPointer[Constants.PhrasesPerSet] << 32) | bufferPointer[j];
MD5Unmanaged.FillPhraseSet(
(ulong*)initialBufferPointer,
(ulong*)bufferPointer,
(ulong*)allWordsPointer,
wordIndexesPointer,
permutationsPointer + i,
wordIndexes.Length);
action(this.GetBytes(j), match);
break;
}
}
} }
} }
} }

@ -50,17 +50,15 @@
Console.WriteLine("Only 64-bit systems are supported due to MD5Digest optimizations"); Console.WriteLine("Only 64-bit systems are supported due to MD5Digest optimizations");
} }
Vector<uint> expectedHashesFirstComponents; var expectedHashesFirstComponentsArray = new uint[8];
{ {
var expectedHashesFirstComponentsArray = new uint[Vector<uint>.Count];
int i = 0; int i = 0;
foreach (var expectedHash in ConfigurationManager.AppSettings["ExpectedHashes"].Split(',')) foreach (var expectedHash in ConfigurationManager.AppSettings["ExpectedHashes"].Split(','))
{ {
expectedHashesFirstComponentsArray[i] = HexadecimalStringToUnsignedIntArray(expectedHash)[0]; expectedHashesFirstComponentsArray[i] = HexadecimalStringToUnsignedIntArray(expectedHash)[0];
i++; expectedHashesFirstComponentsArray[i + 1] = HexadecimalStringToUnsignedIntArray(expectedHash)[0];
i += 2;
} }
expectedHashesFirstComponents = new Vector<uint>(expectedHashesFirstComponentsArray);
} }
var processor = new StringsProcessor( var processor = new StringsProcessor(
@ -77,7 +75,7 @@
stopwatch.Restart(); stopwatch.Restart();
processor.CheckPhrases(expectedHashesFirstComponents, (phraseBytes, hashFirstComponent) => processor.CheckPhrases(expectedHashesFirstComponentsArray, (phraseBytes, hashFirstComponent) =>
{ {
var phrase = Encoding.ASCII.GetString(phraseBytes); var phrase = Encoding.ASCII.GetString(phraseBytes);
var hash = ComputeFullMD5(phraseBytes); var hash = ComputeFullMD5(phraseBytes);

@ -61,13 +61,13 @@
private int NumberOfCharacters { get; } private int NumberOfCharacters { get; }
public void CheckPhrases(Vector<uint> expectedHashes, Action<byte[], uint> action) public void CheckPhrases(uint[] expectedHashesVector, Action<byte[], uint> action)
{ {
// task of finding anagrams could be reduced to the task of finding sequences of dictionary vectors with the target sum // task of finding anagrams could be reduced to the task of finding sequences of dictionary vectors with the target sum
var sums = this.VectorsProcessor.GenerateSequences(); var sums = this.VectorsProcessor.GenerateSequences();
// converting sequences of vectors to the sequences of words... // converting sequences of vectors to the sequences of words...
Parallel.ForEach(sums, new ParallelOptions { MaxDegreeOfParallelism = Constants.NumberOfThreads }, sum => ProcessSum(sum, expectedHashes, action)); Parallel.ForEach(sums, new ParallelOptions { MaxDegreeOfParallelism = Constants.NumberOfThreads }, sum => ProcessSum(sum, expectedHashesVector, action));
} }
public long GetPhrasesCount() public long GetPhrasesCount()
@ -119,7 +119,7 @@
return result; return result;
} }
private void ProcessSum(int[] sum, Vector<uint> expectedHashes, Action<byte[], uint> action) private void ProcessSum(int[] sum, uint[] expectedHashesVector, Action<byte[], uint> action)
{ {
var initialPhraseSet = new PhraseSet(); var initialPhraseSet = new PhraseSet();
initialPhraseSet.Init(); initialPhraseSet.Init();
@ -135,7 +135,7 @@
this.AllWords, this.AllWords,
wordsArray, wordsArray,
PrecomputedPermutationsGenerator.HamiltonianPermutations(wordsArray.Length, permutationsFilter), PrecomputedPermutationsGenerator.HamiltonianPermutations(wordsArray.Length, permutationsFilter),
expectedHashes, expectedHashesVector,
action); action);
} }
} }

Loading…
Cancel
Save