From 16bc5f2c98e58498b00dfcb2d65b2011580a8288 Mon Sep 17 00:00:00 2001 From: inga-lovinde <52715130+inga-lovinde@users.noreply.github.com> Date: Fri, 5 May 2017 12:55:36 +0300 Subject: [PATCH] Optimized memory allocations (MD5 is stored inside a PhraseSet) --- README.md | 10 +++---- .../WhiteRabbit.UnmanagedBridge.cpp | 16 +++++------ .../WhiteRabbit.UnmanagedBridge.h | 2 +- dotnet/WhiteRabbit.UnmanagedBridge/md5.cpp | 24 ++++++++++------ dotnet/WhiteRabbit.UnmanagedBridge/md5.h | 2 +- dotnet/WhiteRabbit/MD5Digest.cs | 12 ++------ dotnet/WhiteRabbit/PhraseSet.cs | 28 +++++++++++++------ dotnet/WhiteRabbit/Program.cs | 5 ++-- 8 files changed, 56 insertions(+), 43 deletions(-) diff --git a/README.md b/README.md index ba86faa..4c214d0 100644 --- a/README.md +++ b/README.md @@ -46,11 +46,11 @@ Multi-threaded performance with RyuJIT (.NET 4.6, 64-bit system) on i5-6500 is a Number of words|Time to check all anagrams no longer than that|Time to solve "easy" hash|Time to solve "more difficult" hash|Time to solve "hard" hash|Number of unique anagrams no longer than that ---------------|----------------------------------------------|-------------------------|-----------------------------------|-------------------------|--------------------------------------------- 3|0.04s||||4560 -4|0.65s|||0.1s|7,431,984 -5|19s|0.3s|0.1s|0.5s|1,347,437,484 -6|10.5 minutes|1.9s|0.35s|4.7s|58,405,904,844 -7|3.2 hours|11s|1.4s|31.5s|1,070,307,744,114 -8|49 hours|40s|4.1s|2.1 minutes|10,893,594,396,594 +4|0.45s|||0.08s|7,431,984 +5|18s|0.25s|0.07s|0.5s|1,347,437,484 +6|9.5 minutes|1.7s|0.3s|4.3s|58,405,904,844 +7|3.2 hours|10s|1.3s|28s|1,070,307,744,114 +8|49 hours|37s|3.8s|1.9 minutes|10,893,594,396,594 9||2.5 minutes|13s|9.5 minutes|70,596,864,409,954 10||5 minutes|21s|17.5 minutes|314,972,701,475,754 diff --git a/dotnet/WhiteRabbit.UnmanagedBridge/WhiteRabbit.UnmanagedBridge.cpp b/dotnet/WhiteRabbit.UnmanagedBridge/WhiteRabbit.UnmanagedBridge.cpp index 2a1c516..9761b74 100644 --- a/dotnet/WhiteRabbit.UnmanagedBridge/WhiteRabbit.UnmanagedBridge.cpp +++ b/dotnet/WhiteRabbit.UnmanagedBridge/WhiteRabbit.UnmanagedBridge.cpp @@ -6,23 +6,23 @@ #include "md5.h" #include "phraseset.h" -void WhiteRabbitUnmanagedBridge::MD5Unmanaged::ComputeMD5(unsigned __int32 * input, unsigned __int32 * output) +void WhiteRabbitUnmanagedBridge::MD5Unmanaged::ComputeMD5(unsigned __int32 * input) { #if AVX2 - md5(input + 0 * 8 * 8, output + 0 * 8); - md5(input + 1 * 8 * 8, output + 1 * 8); + md5(input + 0 * 8 * 8); + md5(input + 1 * 8 * 8); #elif SIMD - md5(input + 0 * 8 * 4, output + 0 * 4); - md5(input + 1 * 8 * 4, output + 1 * 4); + md5(input + 0 * 8 * 4); + md5(input + 1 * 8 * 4); if (input[2 * 8 * 4] != 0) { - md5(input + 2 * 8 * 4, output + 0 * 4); - md5(input + 3 * 8 * 4, output + 1 * 4); + md5(input + 2 * 8 * 4); + md5(input + 3 * 8 * 4); } #else for (int i = 0; i < 16; i++) { - md5(input + i * 8, output + i); + md5(input + i * 8); } #endif } diff --git a/dotnet/WhiteRabbit.UnmanagedBridge/WhiteRabbit.UnmanagedBridge.h b/dotnet/WhiteRabbit.UnmanagedBridge/WhiteRabbit.UnmanagedBridge.h index 1299040..afc6add 100644 --- a/dotnet/WhiteRabbit.UnmanagedBridge/WhiteRabbit.UnmanagedBridge.h +++ b/dotnet/WhiteRabbit.UnmanagedBridge/WhiteRabbit.UnmanagedBridge.h @@ -12,7 +12,7 @@ namespace WhiteRabbitUnmanagedBridge { { public: literal int PhrasesPerSet = PHRASES_PER_SET; - static void ComputeMD5(unsigned int* input, unsigned int* output); + static void ComputeMD5(unsigned int* input); static void FillPhraseSet(__int64* bufferPointer, __int64* allWordsPointer, __int32* wordIndexes, unsigned __int64* permutationsPointer, int permutationOffset, int numberOfCharacters, int numberOfWords); }; } diff --git a/dotnet/WhiteRabbit.UnmanagedBridge/md5.cpp b/dotnet/WhiteRabbit.UnmanagedBridge/md5.cpp index 02b87a3..e105fbb 100644 --- a/dotnet/WhiteRabbit.UnmanagedBridge/md5.cpp +++ b/dotnet/WhiteRabbit.UnmanagedBridge/md5.cpp @@ -29,10 +29,14 @@ typedef __m256i MD5Vector; input[offset + 0 * 8]) #define WRITE_TO_OUTPUT(a, output) \ - ((unsigned __int64*)output)[0] = a.m256i_u64[0]; \ - ((unsigned __int64*)output)[1] = a.m256i_u64[1]; \ - ((unsigned __int64*)output)[2] = a.m256i_u64[2]; \ - ((unsigned __int64*)output)[3] = a.m256i_u64[3]; + output[7 + 0 * 8] = a.m256i_u32[0]; \ + output[7 + 1 * 8] = a.m256i_u32[1]; \ + output[7 + 2 * 8] = a.m256i_u32[2]; \ + output[7 + 3 * 8] = a.m256i_u32[3]; \ + output[7 + 4 * 8] = a.m256i_u32[4]; \ + output[7 + 5 * 8] = a.m256i_u32[5]; \ + output[7 + 6 * 8] = a.m256i_u32[6]; \ + output[7 + 7 * 8] = a.m256i_u32[7]; #elif SIMD @@ -55,8 +59,10 @@ typedef __m128i MD5Vector; input[offset + 0 * 8]) #define WRITE_TO_OUTPUT(a, output) \ - ((unsigned __int64*)output)[0] = a.m128i_u64[0]; \ - ((unsigned __int64*)output)[1] = a.m128i_u64[1]; + output[7 + 0 * 8] = a.m128i_u32[0]; \ + output[7 + 1 * 8] = a.m128i_u32[1]; \ + output[7 + 2 * 8] = a.m128i_u32[2]; \ + output[7 + 3 * 8] = a.m128i_u32[3]; #else @@ -74,7 +80,7 @@ typedef unsigned int MD5Vector; #define CREATE_VECTOR_FROM_INPUT(input, offset) (input[offset]) #define WRITE_TO_OUTPUT(a, output) \ - output[0] = a; + output[7] = a; #endif #define OP_NEG(a) OP_ANDNOT(a, CREATE_VECTOR(0xffffffff)) @@ -179,7 +185,7 @@ static const MD5Parameters Parameters = { #define Step4(r, a, b, c, d, k, w) StepOuter(r, a, b, OP_ADD(I(c, b, d), OP_ADD(CREATE_VECTOR(k), OP_ADD(a, w)))) #define Step4E(r, a, b, c, d, k) StepOuter(r, a, b, OP_ADD(I(c, b, d), OP_ADD(CREATE_VECTOR(k), a))) -void md5(unsigned __int32 * input, unsigned __int32 * output) +void md5(unsigned __int32 * input) { MD5Vector a = CREATE_VECTOR(Parameters.Init[0]); MD5Vector b = CREATE_VECTOR(Parameters.Init[1]); @@ -262,6 +268,6 @@ void md5(unsigned __int32 * input, unsigned __int32 * output) a = OP_ADD(CREATE_VECTOR(Parameters.Init[0]), a); - WRITE_TO_OUTPUT(a, output); + WRITE_TO_OUTPUT(a, input); } #pragma managed diff --git a/dotnet/WhiteRabbit.UnmanagedBridge/md5.h b/dotnet/WhiteRabbit.UnmanagedBridge/md5.h index 737b9dc..6f5ff4f 100644 --- a/dotnet/WhiteRabbit.UnmanagedBridge/md5.h +++ b/dotnet/WhiteRabbit.UnmanagedBridge/md5.h @@ -1,3 +1,3 @@ #pragma once -void md5(unsigned int* input, unsigned int* output); +void md5(unsigned int* input); diff --git a/dotnet/WhiteRabbit/MD5Digest.cs b/dotnet/WhiteRabbit/MD5Digest.cs index 7cfa311..6a8fb6d 100644 --- a/dotnet/WhiteRabbit/MD5Digest.cs +++ b/dotnet/WhiteRabbit/MD5Digest.cs @@ -7,18 +7,12 @@ { // It only returns first component of MD5 hash [MethodImpl(MethodImplOptions.AggressiveInlining)] - public static unsafe uint[] Compute(PhraseSet input) + public static unsafe void Compute(PhraseSet input) { - var result = new uint[Constants.PhrasesPerSet]; - fixed (uint* resultPointer = result) + fixed (uint* inputBuffer = input.Buffer) { - fixed (long* inputBuffer = input.Buffer) - { - MD5Unmanaged.ComputeMD5((uint*)inputBuffer, resultPointer); - } + MD5Unmanaged.ComputeMD5(inputBuffer); } - - return result; } } } diff --git a/dotnet/WhiteRabbit/PhraseSet.cs b/dotnet/WhiteRabbit/PhraseSet.cs index 38c5316..6662b27 100644 --- a/dotnet/WhiteRabbit/PhraseSet.cs +++ b/dotnet/WhiteRabbit/PhraseSet.cs @@ -1,19 +1,20 @@ namespace WhiteRabbit { using System.Diagnostics; + using System.Runtime.CompilerServices; // Anagram representation optimized for MD5 internal struct PhraseSet { - public long[] Buffer; + public uint[] Buffer; public unsafe PhraseSet(Word[] allWords, int[] wordIndexes, ulong[] permutations, int permutationOffset, int numberOfCharacters) { Debug.Assert(numberOfCharacters + wordIndexes.Length - 1 < 27); - this.Buffer = new long[4 * Constants.PhrasesPerSet]; + this.Buffer = new uint[8 * Constants.PhrasesPerSet]; - fixed (long* bufferPointer = this.Buffer) + fixed (uint* bufferPointer = this.Buffer) { fixed (ulong* permutationsPointer = permutations) { @@ -21,25 +22,36 @@ { fixed (Word* allWordsPointer = allWords) { - WhiteRabbitUnmanagedBridge.MD5Unmanaged.FillPhraseSet(bufferPointer, (long*)allWordsPointer, wordIndexesPointer, permutationsPointer, permutationOffset, numberOfCharacters, wordIndexes.Length); + WhiteRabbitUnmanagedBridge.MD5Unmanaged.FillPhraseSet((long*)bufferPointer, (long*)allWordsPointer, wordIndexesPointer, permutationsPointer, permutationOffset, numberOfCharacters, wordIndexes.Length); } } } } } - private static unsafe void FillPhraseSet(long* bufferPointer, long* allWordsPointer, int* wordIndexes, ulong* permutationsPointer, int permutationOffset, int numberOfCharacters, int numberOfWords) + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public uint GetMD5(int number) { + return this.Buffer[number * 8 + 7]; } public unsafe byte[] GetBytes(int number) { Debug.Assert(number < Constants.PhrasesPerSet); - fixed(long* bufferPointer = this.Buffer) + fixed(uint* bufferPointer = this.Buffer) { - var phrasePointer = bufferPointer + 4 * number; - var length = ((uint*)phrasePointer)[7] >> 3; + var phrasePointer = bufferPointer + 8 * number; + var length = 0; + for (var i = 27; i >= 0; i--) + { + if (((byte*)phrasePointer)[i] == 128) + { + length = i; + break; + } + } + var result = new byte[length]; for (var i = 0; i < length; i++) { diff --git a/dotnet/WhiteRabbit/Program.cs b/dotnet/WhiteRabbit/Program.cs index 7aadf19..c28cf37 100644 --- a/dotnet/WhiteRabbit/Program.cs +++ b/dotnet/WhiteRabbit/Program.cs @@ -80,7 +80,7 @@ processor.CheckPhrases(phraseSet => { - var hashesFirstComponents = MD5Digest.Compute(phraseSet); + MD5Digest.Compute(phraseSet); for (var i = 0; i < Constants.PhrasesPerSet; i++) { Debug.Assert( @@ -88,10 +88,11 @@ $"StringsProcessor produced incorrect anagram: {ToString(phraseSet, i)}"); if (Vector.EqualsAny(expectedHashesFirstComponents, new Vector(hashesFirstComponents[i]))) + if (Vector.EqualsAny(expectedHashesFirstComponents, new Vector(phraseSet.GetMD5(i)))) { var phrase = ToString(phraseSet, i); var hash = ComputeFullMD5(phrase); - Console.WriteLine($"Found phrase for {hash} ({hashesFirstComponents[i]:x8}): {phrase}; time from start is {stopwatch.Elapsed}"); + Console.WriteLine($"Found phrase for {hash} ({phraseSet.GetMD5(i):x8}): {phrase}; time from start is {stopwatch.Elapsed}"); } } });