Hash checking optimization

master
Inga 🏳‍🌈 7 years ago
parent b6afbe9528
commit bbc7761333
  1. 12
      README.md
  2. 4
      dotnet/WhiteRabbit.UnmanagedBridge/WhiteRabbit.UnmanagedBridge.cpp
  3. 2
      dotnet/WhiteRabbit.UnmanagedBridge/WhiteRabbit.UnmanagedBridge.h
  4. 33
      dotnet/WhiteRabbit.UnmanagedBridge/md5.cpp
  5. 2
      dotnet/WhiteRabbit.UnmanagedBridge/md5.h
  6. 44
      dotnet/WhiteRabbit/PhraseSet.cs
  7. 10
      dotnet/WhiteRabbit/Program.cs
  8. 8
      dotnet/WhiteRabbit/StringsProcessor.cs

@ -47,12 +47,12 @@ Number of words|Time to check all anagrams no longer than that|Time to solve "ea
---------------|----------------------------------------------|-------------------------|-----------------------------------|-------------------------|---------------------------------------------
3|0.04s||||4560
4|0.45s|||0.08s|7,431,984
5|10s|0.15s|0.06s|0.29s|1,347,437,484
6|5 minutes|0.9s|0.2s|2.3s|58,405,904,844
7|94 minutes|5.1s|0.7s|14.5s|1,070,307,744,114
8|16 hours|19s|2s|59s|10,893,594,396,594
9||49s|4.4s|2.7 minutes|70,596,864,409,954
10||89s|6.5s|5.2 minutes|314,972,701,475,754
5|9.6s|0.15s|0.06s|0.29s|1,347,437,484
6|4.5 minutes|0.85s|0.17s|2.05s|58,405,904,844
7|83 minutes|4.7s|0.6s|13.3s|1,070,307,744,114
8|14 hours|17.6s|1.8s|55s|10,893,594,396,594
9||45s|4s|2.5 minutes|70,596,864,409,954
10||80s|5.8s|4.8 minutes|314,972,701,475,754
Note that all measurements were done on a Release build; Debug build is significantly slower.

@ -6,10 +6,10 @@
#include "md5.h"
#include "phraseset.h"
void WhiteRabbitUnmanagedBridge::MD5Unmanaged::ComputeMD5(unsigned __int32 * input)
void WhiteRabbitUnmanagedBridge::MD5Unmanaged::ComputeMD5(unsigned __int32 * input, unsigned __int32 * expected)
{
#if AVX2
md5(input + 0 * 8 * 8);
md5(input + 0 * 8 * 8, expected);
#elif SIMD
md5(input + 0 * 8 * 4);
md5(input + 1 * 8 * 4);

@ -12,7 +12,7 @@ namespace WhiteRabbitUnmanagedBridge {
{
public:
literal int PhrasesPerSet = PHRASES_PER_SET;
static void ComputeMD5(unsigned int* input);
static void ComputeMD5(unsigned int* input, unsigned __int32 * expected);
static void FillPhraseSet(unsigned __int64* initialBufferPointer, unsigned __int64* bufferPointer, unsigned __int64* allWordsPointer, __int32* wordIndexes, unsigned __int64* permutationsPointer, int numberOfWords);
};
}

@ -81,23 +81,18 @@ _mm256_set_epi32( \
input[offset + 8 * 8]) \
)
#define WRITE_TO_OUTPUT(a, output) \
output[7 + 0 * 8] = a.m_V0.m256i_u32[0]; \
output[7 + 1 * 8] = a.m_V0.m256i_u32[1]; \
output[7 + 2 * 8] = a.m_V0.m256i_u32[2]; \
output[7 + 3 * 8] = a.m_V0.m256i_u32[3]; \
output[7 + 4 * 8] = a.m_V0.m256i_u32[4]; \
output[7 + 5 * 8] = a.m_V0.m256i_u32[5]; \
output[7 + 6 * 8] = a.m_V0.m256i_u32[6]; \
output[7 + 7 * 8] = a.m_V0.m256i_u32[7]; \
output[7 + 8 * 8] = a.m_V1.m256i_u32[0]; \
output[7 + 9 * 8] = a.m_V1.m256i_u32[1]; \
output[7 + 10 * 8] = a.m_V1.m256i_u32[2]; \
output[7 + 11 * 8] = a.m_V1.m256i_u32[3]; \
output[7 + 12 * 8] = a.m_V1.m256i_u32[4]; \
output[7 + 13 * 8] = a.m_V1.m256i_u32[5]; \
output[7 + 14 * 8] = a.m_V1.m256i_u32[6]; \
output[7 + 15 * 8] = a.m_V1.m256i_u32[7];
#define PERMUTE_MASK 85
#define WRITE_TO_OUTPUT(a, output, expected) \
output[0] = _mm256_movemask_epi8(_mm256_cmpeq_epi32(*expected, _mm256_permute4x64_epi64(a.m_V0, 0 * PERMUTE_MASK))); \
output[1] = _mm256_movemask_epi8(_mm256_cmpeq_epi32(*expected, _mm256_permute4x64_epi64(a.m_V0, 1 * PERMUTE_MASK))); \
output[2] = _mm256_movemask_epi8(_mm256_cmpeq_epi32(*expected, _mm256_permute4x64_epi64(a.m_V0, 2 * PERMUTE_MASK))); \
output[3] = _mm256_movemask_epi8(_mm256_cmpeq_epi32(*expected, _mm256_permute4x64_epi64(a.m_V0, 3 * PERMUTE_MASK))); \
output[4] = _mm256_movemask_epi8(_mm256_cmpeq_epi32(*expected, _mm256_permute4x64_epi64(a.m_V1, 0 * PERMUTE_MASK))); \
output[5] = _mm256_movemask_epi8(_mm256_cmpeq_epi32(*expected, _mm256_permute4x64_epi64(a.m_V1, 1 * PERMUTE_MASK))); \
output[6] = _mm256_movemask_epi8(_mm256_cmpeq_epi32(*expected, _mm256_permute4x64_epi64(a.m_V1, 2 * PERMUTE_MASK))); \
output[7] = _mm256_movemask_epi8(_mm256_cmpeq_epi32(*expected, _mm256_permute4x64_epi64(a.m_V1, 3 * PERMUTE_MASK))); \
output[8] = _mm256_movemask_epi8(_mm256_or_si256(*((__m256i*)output), _mm256_slli_epi16(*((__m256i*)output), 4)));
#elif SIMD
@ -246,7 +241,7 @@ static const MD5Parameters Parameters = {
#define Step4(r, a, b, c, d, k, w) StepOuter(r, a, b, OP_ADD(I(c, b, d), OP_ADD(CREATE_VECTOR(k), OP_ADD(a, w))))
#define Step4E(r, a, b, c, d, k) StepOuter(r, a, b, OP_ADD(I(c, b, d), OP_ADD(CREATE_VECTOR(k), a)))
void md5(unsigned __int32 * input)
void md5(unsigned __int32 * input, unsigned __int32 * expected)
{
MD5Vector a = CREATE_VECTOR(Parameters.Init[0]);
MD5Vector b = CREATE_VECTOR(Parameters.Init[1]);
@ -329,6 +324,6 @@ void md5(unsigned __int32 * input)
a = OP_ADD(CREATE_VECTOR(Parameters.Init[0]), a);
WRITE_TO_OUTPUT(a, input);
WRITE_TO_OUTPUT(a, ((__int32*)input), ((__m256i*)expected));
}
#pragma managed

@ -1,3 +1,3 @@
#pragma once
void md5(unsigned int* input);
void md5(unsigned int* input, unsigned __int32 * expected);

@ -31,7 +31,7 @@
}
}
public unsafe void ProcessPermutations(PhraseSet initialPhraseSet, Word[] allWords, int[] wordIndexes, ulong[] permutations, Vector<uint> expectedHashes, Action<byte[], uint> action)
public unsafe void ProcessPermutations(PhraseSet initialPhraseSet, Word[] allWords, int[] wordIndexes, ulong[] permutations, uint[] expectedHashesVector, Action<byte[], uint> action)
{
fixed (uint* bufferPointer = this.Buffer, initialBufferPointer = initialPhraseSet.Buffer)
{
@ -41,23 +41,39 @@
{
fixed (Word* allWordsPointer = allWords)
{
for (var i = 0; i < permutations.Length; i += Constants.PhrasesPerSet)
fixed (uint* expectedHashesPointer = expectedHashesVector)
{
MD5Unmanaged.FillPhraseSet(
(ulong*)initialBufferPointer,
(ulong*)bufferPointer,
(ulong*)allWordsPointer,
wordIndexesPointer,
permutationsPointer + i,
wordIndexes.Length);
for (var i = 0; i < permutations.Length; i += Constants.PhrasesPerSet)
{
MD5Unmanaged.FillPhraseSet(
(ulong*)initialBufferPointer,
(ulong*)bufferPointer,
(ulong*)allWordsPointer,
wordIndexesPointer,
permutationsPointer + i,
wordIndexes.Length);
MD5Unmanaged.ComputeMD5(bufferPointer);
MD5Unmanaged.ComputeMD5(bufferPointer, expectedHashesPointer);
for (var j = 0; j < Constants.PhrasesPerSet; j++)
{
if (Vector.EqualsAny(expectedHashes, new Vector<uint>(bufferPointer[j * 8 + 7])))
if (bufferPointer[Constants.PhrasesPerSet / 2] != 0)
{
action(this.GetBytes(j), bufferPointer[j * 8 + 7]);
for (var j = 0; j < Constants.PhrasesPerSet; j++)
{
var match = (bufferPointer[j / 2] >> (4 * (j % 2))) & 0xF0F0F0F;
if (match != 0)
{
var bufferInfo = ((ulong)bufferPointer[Constants.PhrasesPerSet] << 32) | bufferPointer[j];
MD5Unmanaged.FillPhraseSet(
(ulong*)initialBufferPointer,
(ulong*)bufferPointer,
(ulong*)allWordsPointer,
wordIndexesPointer,
permutationsPointer + i,
wordIndexes.Length);
action(this.GetBytes(j), match);
break;
}
}
}
}
}

@ -50,17 +50,15 @@
Console.WriteLine("Only 64-bit systems are supported due to MD5Digest optimizations");
}
Vector<uint> expectedHashesFirstComponents;
var expectedHashesFirstComponentsArray = new uint[8];
{
var expectedHashesFirstComponentsArray = new uint[Vector<uint>.Count];
int i = 0;
foreach (var expectedHash in ConfigurationManager.AppSettings["ExpectedHashes"].Split(','))
{
expectedHashesFirstComponentsArray[i] = HexadecimalStringToUnsignedIntArray(expectedHash)[0];
i++;
expectedHashesFirstComponentsArray[i + 1] = HexadecimalStringToUnsignedIntArray(expectedHash)[0];
i += 2;
}
expectedHashesFirstComponents = new Vector<uint>(expectedHashesFirstComponentsArray);
}
var processor = new StringsProcessor(
@ -77,7 +75,7 @@
stopwatch.Restart();
processor.CheckPhrases(expectedHashesFirstComponents, (phraseBytes, hashFirstComponent) =>
processor.CheckPhrases(expectedHashesFirstComponentsArray, (phraseBytes, hashFirstComponent) =>
{
var phrase = Encoding.ASCII.GetString(phraseBytes);
var hash = ComputeFullMD5(phraseBytes);

@ -61,13 +61,13 @@
private int NumberOfCharacters { get; }
public void CheckPhrases(Vector<uint> expectedHashes, Action<byte[], uint> action)
public void CheckPhrases(uint[] expectedHashesVector, Action<byte[], uint> action)
{
// task of finding anagrams could be reduced to the task of finding sequences of dictionary vectors with the target sum
var sums = this.VectorsProcessor.GenerateSequences();
// converting sequences of vectors to the sequences of words...
Parallel.ForEach(sums, new ParallelOptions { MaxDegreeOfParallelism = Constants.NumberOfThreads }, sum => ProcessSum(sum, expectedHashes, action));
Parallel.ForEach(sums, new ParallelOptions { MaxDegreeOfParallelism = Constants.NumberOfThreads }, sum => ProcessSum(sum, expectedHashesVector, action));
}
public long GetPhrasesCount()
@ -119,7 +119,7 @@
return result;
}
private void ProcessSum(int[] sum, Vector<uint> expectedHashes, Action<byte[], uint> action)
private void ProcessSum(int[] sum, uint[] expectedHashesVector, Action<byte[], uint> action)
{
var initialPhraseSet = new PhraseSet();
initialPhraseSet.Init();
@ -135,7 +135,7 @@
this.AllWords,
wordsArray,
PrecomputedPermutationsGenerator.HamiltonianPermutations(wordsArray.Length, permutationsFilter),
expectedHashes,
expectedHashesVector,
action);
}
}

Loading…
Cancel
Save