From f2015b3d011f06c8fcde5310b80bbd164b99f119 Mon Sep 17 00:00:00 2001 From: inga-lovinde <52715130+inga-lovinde@users.noreply.github.com> Date: Fri, 12 May 2017 15:54:36 +0300 Subject: [PATCH] Refactored macros to templates --- dotnet/WhiteRabbit.UnmanagedBridge/md5.cpp | 373 ++++++++---------- .../WhiteRabbit.UnmanagedBridge/phraseset.cpp | 152 ++----- 2 files changed, 206 insertions(+), 319 deletions(-) diff --git a/dotnet/WhiteRabbit.UnmanagedBridge/md5.cpp b/dotnet/WhiteRabbit.UnmanagedBridge/md5.cpp index 50dbbf8..df2fdf4 100644 --- a/dotnet/WhiteRabbit.UnmanagedBridge/md5.cpp +++ b/dotnet/WhiteRabbit.UnmanagedBridge/md5.cpp @@ -7,13 +7,12 @@ #if AVX2 -//typedef __m256i MD5Vector; struct MD5Vector { __m256i m_V0; __m256i m_V1; - inline MD5Vector() {} - inline MD5Vector(__m256i C0, __m256i C1) :m_V0(C0), m_V1(C1) {} + __forceinline MD5Vector() {} + __forceinline MD5Vector(__m256i C0, __m256i C1) :m_V0(C0), m_V1(C1) {} __forceinline MD5Vector MXor(MD5Vector R) const { @@ -30,66 +29,81 @@ struct MD5Vector return MD5Vector(_mm256_andnot_si256(m_V0, R.m_V0), _mm256_andnot_si256(m_V1, R.m_V1)); } - __forceinline MD5Vector MOr(MD5Vector R) const + __forceinline const MD5Vector MOr(const MD5Vector R) const { return MD5Vector(_mm256_or_si256(m_V0, R.m_V0), _mm256_or_si256(m_V1, R.m_V1)); } - __forceinline MD5Vector MAdd(MD5Vector R) const + __forceinline const MD5Vector MAdd(const MD5Vector R) const { return MD5Vector(_mm256_add_epi32(m_V0, R.m_V0), _mm256_add_epi32(m_V1, R.m_V1)); } - __forceinline MD5Vector MShiftLeft(int shift) const + __forceinline const MD5Vector MShiftLeft(const int shift) const { return MD5Vector(_mm256_slli_epi32(m_V0, shift), _mm256_slli_epi32(m_V1, shift)); } - __forceinline MD5Vector MShiftRight(int shift) const + __forceinline const MD5Vector MShiftRight(const int shift) const { return MD5Vector(_mm256_srli_epi32(m_V0, shift), _mm256_srli_epi32(m_V1, shift)); } -}; -#define OP_XOR(a, b) a.MXor(b) -#define OP_AND(a, b) a.MAnd(b) -#define OP_ANDNOT(a, b) a.MAndNot(b) -#define OP_OR(a, b) a.MOr(b) -#define OP_ADD(a, b) a.MAdd(b) -#define OP_ROT(a, r) OP_OR(a.MShiftLeft(r), a.MShiftRight(32 - (r))) -#define OP_BLEND(a, b, x) OP_OR(OP_AND(x, b), OP_ANDNOT(x, a)) + template + __forceinline const MD5Vector Permute() const + { + return MD5Vector(_mm256_permute4x64_epi64(m_V0, imm8), _mm256_permute4x64_epi64(m_V1, imm8)); + } -#define CREATE_VECTOR(a) MD5Vector(_mm256_set1_epi32(a), _mm256_set1_epi32(a)) -#define CREATE_VECTOR_FROM_INPUT(input, offset) MD5Vector(\ -_mm256_set_epi32( \ - input[offset + 7 * 8], \ - input[offset + 6 * 8], \ - input[offset + 5 * 8], \ - input[offset + 4 * 8], \ - input[offset + 3 * 8], \ - input[offset + 2 * 8], \ - input[offset + 1 * 8], \ - input[offset + 0 * 8]), \ -_mm256_set_epi32( \ - input[offset + 15 * 8], \ - input[offset + 14 * 8], \ - input[offset + 13 * 8], \ - input[offset + 12 * 8], \ - input[offset + 11 * 8], \ - input[offset + 10 * 8], \ - input[offset + 9 * 8], \ - input[offset + 8 * 8]) \ -) + __forceinline const MD5Vector CompareEquality32(const __m256i other) const + { + return MD5Vector(_mm256_cmpeq_epi32(m_V0, other), _mm256_cmpeq_epi32(m_V1, other)); + } + + __forceinline void WriteMoveMask8(__int32 * output) const + { + output[0] = _mm256_movemask_epi8(m_V0); + output[1] = _mm256_movemask_epi8(m_V1); + } +}; + +__forceinline const MD5Vector OP_XOR(const MD5Vector a, const MD5Vector b) { return a.MXor(b); } +__forceinline const MD5Vector OP_AND(const MD5Vector a, const MD5Vector b) { return a.MAnd(b); } +__forceinline const MD5Vector OP_ANDNOT(const MD5Vector a, const MD5Vector b) { return a.MAndNot(b); } +__forceinline const MD5Vector OP_OR(const MD5Vector a, const MD5Vector b) { return a.MOr(b); } +__forceinline const MD5Vector OP_ADD(const MD5Vector a, const MD5Vector b) { return a.MAdd(b); } +template +__forceinline const MD5Vector OP_ROT(const MD5Vector a) { return OP_OR(a.MShiftLeft(r), a.MShiftRight(32 - (r))); } +__forceinline const MD5Vector OP_BLEND(const MD5Vector a, const MD5Vector b, const MD5Vector x) { return OP_OR(OP_AND(x, b), OP_ANDNOT(x, a)); } + +__forceinline const MD5Vector CREATE_VECTOR(const int a) { return MD5Vector(_mm256_set1_epi32(a), _mm256_set1_epi32(a)); } +__forceinline const MD5Vector CREATE_VECTOR_FROM_INPUT(const unsigned __int32* input, const size_t offset) { + return MD5Vector( + _mm256_set_epi32( + input[offset + 7 * 8], + input[offset + 6 * 8], + input[offset + 5 * 8], + input[offset + 4 * 8], + input[offset + 3 * 8], + input[offset + 2 * 8], + input[offset + 1 * 8], + input[offset + 0 * 8]), + _mm256_set_epi32( + input[offset + 15 * 8], + input[offset + 14 * 8], + input[offset + 13 * 8], + input[offset + 12 * 8], + input[offset + 11 * 8], + input[offset + 10 * 8], + input[offset + 9 * 8], + input[offset + 8 * 8])); +} #define WRITE_TO_OUTPUT(a, output, expected) \ - output[0] = _mm256_movemask_epi8(_mm256_cmpeq_epi32(*expected, _mm256_permute4x64_epi64(a.m_V0, 0 * 0x55))); \ - output[1] = _mm256_movemask_epi8(_mm256_cmpeq_epi32(*expected, _mm256_permute4x64_epi64(a.m_V1, 0 * 0x55))); \ - output[2] = _mm256_movemask_epi8(_mm256_cmpeq_epi32(*expected, _mm256_permute4x64_epi64(a.m_V0, 1 * 0x55))); \ - output[3] = _mm256_movemask_epi8(_mm256_cmpeq_epi32(*expected, _mm256_permute4x64_epi64(a.m_V1, 1 * 0x55))); \ - output[4] = _mm256_movemask_epi8(_mm256_cmpeq_epi32(*expected, _mm256_permute4x64_epi64(a.m_V0, 2 * 0x55))); \ - output[5] = _mm256_movemask_epi8(_mm256_cmpeq_epi32(*expected, _mm256_permute4x64_epi64(a.m_V1, 2 * 0x55))); \ - output[6] = _mm256_movemask_epi8(_mm256_cmpeq_epi32(*expected, _mm256_permute4x64_epi64(a.m_V0, 3 * 0x55))); \ - output[7] = _mm256_movemask_epi8(_mm256_cmpeq_epi32(*expected, _mm256_permute4x64_epi64(a.m_V1, 3 * 0x55))); \ + a.Permute<0 * 0x55>().CompareEquality32(*expected).WriteMoveMask8(output); \ + a.Permute<1 * 0x55>().CompareEquality32(*expected).WriteMoveMask8(output + 2); \ + a.Permute<2 * 0x55>().CompareEquality32(*expected).WriteMoveMask8(output + 4); \ + a.Permute<3 * 0x55>().CompareEquality32(*expected).WriteMoveMask8(output + 6); \ output[8] = _mm256_movemask_epi8(_mm256_cmpeq_epi8(*((__m256i*)output), _mm256_setzero_si256())); #elif SIMD @@ -139,112 +153,59 @@ typedef unsigned int MD5Vector; #define OP_NEG(a) OP_ANDNOT(a, CREATE_VECTOR(0xffffffff)) -typedef struct { - unsigned int K[64]; - unsigned int Init[4]; -} MD5Parameters; +__forceinline const MD5Vector Blend(const MD5Vector a, const MD5Vector b, const MD5Vector x) { return OP_BLEND(a, b, x); } +__forceinline const MD5Vector Xor(const MD5Vector a, const MD5Vector b, const MD5Vector c) { return OP_XOR(a, OP_XOR(b, c)); } +__forceinline const MD5Vector I(const MD5Vector a, const MD5Vector b, const MD5Vector c) { return OP_XOR(a, OP_OR(b, OP_NEG(c))); } -static const MD5Parameters Parameters = { - { - 0xd76aa478, - 0xe8c7b756, - 0x242070db, - 0xc1bdceee, - 0xf57c0faf, - 0x4787c62a, - 0xa8304613, - 0xfd469501, - 0x698098d8, - 0x8b44f7af, - 0xffff5bb1, - 0x895cd7be, - 0x6b901122, - 0xfd987193, - 0xa679438e, - 0x49b40821, - 0xf61e2562, - 0xc040b340, - 0x265e5a51, - 0xe9b6c7aa, - 0xd62f105d, - 0x02441453, - 0xd8a1e681, - 0xe7d3fbc8, - 0x21e1cde6, - 0xc33707d6, - 0xf4d50d87, - 0x455a14ed, - 0xa9e3e905, - 0xfcefa3f8, - 0x676f02d9, - 0x8d2a4c8a, - 0xfffa3942, - 0x8771f681, - 0x6d9d6122, - 0xfde5380c, - 0xa4beea44, - 0x4bdecfa9, - 0xf6bb4b60, - 0xbebfbc70, - 0x289b7ec6, - 0xeaa127fa, - 0xd4ef3085, - 0x04881d05, - 0xd9d4d039, - 0xe6db99e5, - 0x1fa27cf8, - 0xc4ac5665, - 0xf4292244, - 0x432aff97, - 0xab9423a7, - 0xfc93a039, - 0x655b59c3, - 0x8f0ccc92, - 0xffeff47d, - 0x85845dd1, - 0x6fa87e4f, - 0xfe2ce6e0, - 0xa3014314, - 0x4e0811a1, - 0xf7537e82, - 0xbd3af235, - 0x2ad7d2bb, - 0xeb86d391, - }, - { - 0x67452301, - 0xefcdab89, - 0x98badcfe, - 0x10325476, - }, -}; +template +__forceinline const MD5Vector StepOuter(const MD5Vector a, const MD5Vector b, const MD5Vector x) { return OP_ADD(b, OP_ROT(x)); } + +template +__forceinline const MD5Vector Step1(const MD5Vector a, const MD5Vector b, const MD5Vector c, const MD5Vector d, const MD5Vector w) { + return StepOuter(a, b, OP_ADD(Blend(d, c, b), OP_ADD(CREATE_VECTOR(k), OP_ADD(a, w)))); +} + +template +__forceinline const MD5Vector Step1(const MD5Vector a, const MD5Vector b, const MD5Vector c, const MD5Vector d) { + return StepOuter(a, b, OP_ADD(Blend(d, c, b), OP_ADD(CREATE_VECTOR(k), a))); +} -#define Blend(a, b, x) OP_BLEND(a, b, x) -#define Xor(a, b, c) OP_XOR(a, OP_XOR(b, c)) -#define I(a, b, c) OP_XOR(a, OP_OR(b, OP_NEG(c))) +template +__forceinline const MD5Vector Step2(const MD5Vector a, const MD5Vector b, const MD5Vector c, const MD5Vector d, const MD5Vector w) { + return StepOuter(a, c, OP_ADD(Blend(d, c, b), OP_ADD(CREATE_VECTOR(k), OP_ADD(a, w)))); +} -#define StepOuter(r, a, b, x) \ - a = x; \ - a = OP_ADD(b, OP_ROT(a, r)); +template +__forceinline const MD5Vector Step2(const MD5Vector a, const MD5Vector b, const MD5Vector c, const MD5Vector d) { + return StepOuter(a, c, OP_ADD(Blend(d, c, b), OP_ADD(CREATE_VECTOR(k), a))); +} -#define Step1(r, a, b, c, d, k, w) StepOuter(r, a, b, OP_ADD(Blend(d, c, b), OP_ADD(CREATE_VECTOR(k), OP_ADD(a, w)))) -#define Step1E(r, a, b, c, d, k) StepOuter(r, a, b, OP_ADD(Blend(d, c, b), OP_ADD(CREATE_VECTOR(k), a))) +template +__forceinline const MD5Vector Step3(const MD5Vector a, const MD5Vector b, const MD5Vector c, const MD5Vector d, const MD5Vector w) { + return StepOuter(a, b, OP_ADD(Xor(b, c, d), OP_ADD(CREATE_VECTOR(k), OP_ADD(a, w)))); +} -#define Step2(r, a, b, c, d, k, w) StepOuter(r, a, c, OP_ADD(Blend(d, c, b), OP_ADD(CREATE_VECTOR(k), OP_ADD(a, w)))) -#define Step2E(r, a, b, c, d, k) StepOuter(r, a, c, OP_ADD(Blend(d, c, b), OP_ADD(CREATE_VECTOR(k), a))) +template +__forceinline const MD5Vector Step3(const MD5Vector a, const MD5Vector b, const MD5Vector c, const MD5Vector d) { + return StepOuter(a, b, OP_ADD(Xor(b, c, d), OP_ADD(CREATE_VECTOR(k), a))); +} -#define Step3(r, a, b, c, d, k, w) StepOuter(r, a, b, OP_ADD(Xor(b, c, d), OP_ADD(CREATE_VECTOR(k), OP_ADD(a, w)))) -#define Step3E(r, a, b, c, d, k) StepOuter(r, a, b, OP_ADD(Xor(b, c, d), OP_ADD(CREATE_VECTOR(k), a))) +template +__forceinline const MD5Vector Step4(const MD5Vector a, const MD5Vector b, const MD5Vector c, const MD5Vector d, const MD5Vector w) { + return StepOuter(a, b, OP_ADD(I(c, b, d), OP_ADD(CREATE_VECTOR(k), OP_ADD(a, w)))); +} -#define Step4(r, a, b, c, d, k, w) StepOuter(r, a, b, OP_ADD(I(c, b, d), OP_ADD(CREATE_VECTOR(k), OP_ADD(a, w)))) -#define Step4E(r, a, b, c, d, k) StepOuter(r, a, b, OP_ADD(I(c, b, d), OP_ADD(CREATE_VECTOR(k), a))) +template +__forceinline const MD5Vector Step4(const MD5Vector a, const MD5Vector b, const MD5Vector c, const MD5Vector d) { + return StepOuter(a, b, OP_ADD(I(c, b, d), OP_ADD(CREATE_VECTOR(k), a))); +} void md5(unsigned __int32 * input, unsigned __int32 * expected) { - MD5Vector a = CREATE_VECTOR(Parameters.Init[0]); - MD5Vector b = CREATE_VECTOR(Parameters.Init[1]); - MD5Vector c = CREATE_VECTOR(Parameters.Init[2]); - MD5Vector d = CREATE_VECTOR(Parameters.Init[3]); + MD5Vector a = CREATE_VECTOR(0x67452301); + MD5Vector b = CREATE_VECTOR(0xefcdab89); + MD5Vector c = CREATE_VECTOR(0x98badcfe); + MD5Vector d = CREATE_VECTOR(0x10325476); MD5Vector inputVector0 = CREATE_VECTOR_FROM_INPUT(input, 0); MD5Vector inputVector1 = CREATE_VECTOR_FROM_INPUT(input, 1); @@ -255,72 +216,72 @@ void md5(unsigned __int32 * input, unsigned __int32 * expected) MD5Vector inputVector6 = CREATE_VECTOR_FROM_INPUT(input, 6); MD5Vector inputVector7 = CREATE_VECTOR_FROM_INPUT(input, 7); - a = Step1 ( 7, a, b, c, d, Parameters.K[ 0], inputVector0); - d = Step1 (12, d, a, b, c, Parameters.K[ 1], inputVector1); - c = Step1 (17, c, d, a, b, Parameters.K[ 2], inputVector2); - b = Step1 (22, b, c, d, a, Parameters.K[ 3], inputVector3); - a = Step1 ( 7, a, b, c, d, Parameters.K[ 4], inputVector4); - d = Step1 (12, d, a, b, c, Parameters.K[ 5], inputVector5); - c = Step1 (17, c, d, a, b, Parameters.K[ 6], inputVector6); - b = Step1E(22, b, c, d, a, Parameters.K[ 7]); - a = Step1E( 7, a, b, c, d, Parameters.K[ 8]); - d = Step1E(12, d, a, b, c, Parameters.K[ 9]); - c = Step1E(17, c, d, a, b, Parameters.K[10]); - b = Step1E(22, b, c, d, a, Parameters.K[11]); - a = Step1E( 7, a, b, c, d, Parameters.K[12]); - d = Step1E(12, d, a, b, c, Parameters.K[13]); - c = Step1 (17, c, d, a, b, Parameters.K[14], inputVector7); - b = Step1E(22, b, c, d, a, Parameters.K[15]); - - a = Step2 ( 5, a, d, b, c, Parameters.K[16], inputVector1); - d = Step2 ( 9, d, c, a, b, Parameters.K[17], inputVector6); - c = Step2E(14, c, b, d, a, Parameters.K[18]); - b = Step2 (20, b, a, c, d, Parameters.K[19], inputVector0); - a = Step2 ( 5, a, d, b, c, Parameters.K[20], inputVector5); - d = Step2E( 9, d, c, a, b, Parameters.K[21]); - c = Step2E(14, c, b, d, a, Parameters.K[22]); - b = Step2 (20, b, a, c, d, Parameters.K[23], inputVector4); - a = Step2E( 5, a, d, b, c, Parameters.K[24]); - d = Step2 ( 9, d, c, a, b, Parameters.K[25], inputVector7); - c = Step2 (14, c, b, d, a, Parameters.K[26], inputVector3); - b = Step2E(20, b, a, c, d, Parameters.K[27]); - a = Step2E( 5, a, d, b, c, Parameters.K[28]); - d = Step2 ( 9, d, c, a, b, Parameters.K[29], inputVector2); - c = Step2E(14, c, b, d, a, Parameters.K[30]); - b = Step2E(20, b, a, c, d, Parameters.K[31]); - - a = Step3 ( 4, a, b, c, d, Parameters.K[32], inputVector5); - d = Step3E(11, d, a, b, c, Parameters.K[33]); - c = Step3E(16, c, d, a, b, Parameters.K[34]); - b = Step3 (23, b, c, d, a, Parameters.K[35], inputVector7); - a = Step3 ( 4, a, b, c, d, Parameters.K[36], inputVector1); - d = Step3 (11, d, a, b, c, Parameters.K[37], inputVector4); - c = Step3E(16, c, d, a, b, Parameters.K[38]); - b = Step3E(23, b, c, d, a, Parameters.K[39]); - a = Step3E( 4, a, b, c, d, Parameters.K[40]); - d = Step3 (11, d, a, b, c, Parameters.K[41], inputVector0); - c = Step3 (16, c, d, a, b, Parameters.K[42], inputVector3); - b = Step3 (23, b, c, d, a, Parameters.K[43], inputVector6); - a = Step3E( 4, a, b, c, d, Parameters.K[44]); - d = Step3E(11, d, a, b, c, Parameters.K[45]); - c = Step3E(16, c, d, a, b, Parameters.K[46]); - b = Step3 (23, b, c, d, a, Parameters.K[47], inputVector2); - - a = Step4 ( 6, a, b, c, d, Parameters.K[48], inputVector0); - d = Step4E(10, d, a, b, c, Parameters.K[49]); - c = Step4 (15, c, d, a, b, Parameters.K[50], inputVector7); - b = Step4 (21, b, c, d, a, Parameters.K[51], inputVector5); - a = Step4E( 6, a, b, c, d, Parameters.K[52]); - d = Step4 (10, d, a, b, c, Parameters.K[53], inputVector3); - c = Step4E(15, c, d, a, b, Parameters.K[54]); - b = Step4 (21, b, c, d, a, Parameters.K[55], inputVector1); - a = Step4E( 6, a, b, c, d, Parameters.K[56]); - d = Step4E(10, d, a, b, c, Parameters.K[57]); - c = Step4 (15, c, d, a, b, Parameters.K[58], inputVector6); - b = Step4E(21, b, c, d, a, Parameters.K[59]); - a = Step4 ( 6, a, b, c, d, Parameters.K[60], inputVector4); - - a = OP_ADD(CREATE_VECTOR(Parameters.Init[0]), a); + a = Step1< 7, 0xd76aa478>(a, b, c, d, inputVector0); + d = Step1<12, 0xe8c7b756>(d, a, b, c, inputVector1); + c = Step1<17, 0x242070db>(c, d, a, b, inputVector2); + b = Step1<22, 0xc1bdceee>(b, c, d, a, inputVector3); + a = Step1< 7, 0xf57c0faf>(a, b, c, d, inputVector4); + d = Step1<12, 0x4787c62a>(d, a, b, c, inputVector5); + c = Step1<17, 0xa8304613>(c, d, a, b, inputVector6); + b = Step1<22, 0xfd469501>(b, c, d, a); + a = Step1< 7, 0x698098d8>(a, b, c, d); + d = Step1<12, 0x8b44f7af>(d, a, b, c); + c = Step1<17, 0xffff5bb1>(c, d, a, b); + b = Step1<22, 0x895cd7be>(b, c, d, a); + a = Step1< 7, 0x6b901122>(a, b, c, d); + d = Step1<12, 0xfd987193>(d, a, b, c); + c = Step1<17, 0xa679438e>(c, d, a, b, inputVector7); + b = Step1<22, 0x49b40821>(b, c, d, a); + + a = Step2< 5, 0xf61e2562>(a, d, b, c, inputVector1); + d = Step2< 9, 0xc040b340>(d, c, a, b, inputVector6); + c = Step2<14, 0x265e5a51>(c, b, d, a); + b = Step2<20, 0xe9b6c7aa>(b, a, c, d, inputVector0); + a = Step2< 5, 0xd62f105d>(a, d, b, c, inputVector5); + d = Step2< 9, 0x02441453>(d, c, a, b); + c = Step2<14, 0xd8a1e681>(c, b, d, a); + b = Step2<20, 0xe7d3fbc8>(b, a, c, d, inputVector4); + a = Step2< 5, 0x21e1cde6>(a, d, b, c); + d = Step2< 9, 0xc33707d6>(d, c, a, b, inputVector7); + c = Step2<14, 0xf4d50d87>(c, b, d, a, inputVector3); + b = Step2<20, 0x455a14ed>(b, a, c, d); + a = Step2< 5, 0xa9e3e905>(a, d, b, c); + d = Step2< 9, 0xfcefa3f8>(d, c, a, b, inputVector2); + c = Step2<14, 0x676f02d9>(c, b, d, a); + b = Step2<20, 0x8d2a4c8a>(b, a, c, d); + + a = Step3< 4, 0xfffa3942>(a, b, c, d, inputVector5); + d = Step3<11, 0x8771f681>(d, a, b, c); + c = Step3<16, 0x6d9d6122>(c, d, a, b); + b = Step3<23, 0xfde5380c>(b, c, d, a, inputVector7); + a = Step3< 4, 0xa4beea44>(a, b, c, d, inputVector1); + d = Step3<11, 0x4bdecfa9>(d, a, b, c, inputVector4); + c = Step3<16, 0xf6bb4b60>(c, d, a, b); + b = Step3<23, 0xbebfbc70>(b, c, d, a); + a = Step3< 4, 0x289b7ec6>(a, b, c, d); + d = Step3<11, 0xeaa127fa>(d, a, b, c, inputVector0); + c = Step3<16, 0xd4ef3085>(c, d, a, b, inputVector3); + b = Step3<23, 0x04881d05>(b, c, d, a, inputVector6); + a = Step3< 4, 0xd9d4d039>(a, b, c, d); + d = Step3<11, 0xe6db99e5>(d, a, b, c); + c = Step3<16, 0x1fa27cf8>(c, d, a, b); + b = Step3<23, 0xc4ac5665>(b, c, d, a, inputVector2); + + a = Step4< 6, 0xf4292244>(a, b, c, d, inputVector0); + d = Step4<10, 0x432aff97>(d, a, b, c); + c = Step4<15, 0xab9423a7>(c, d, a, b, inputVector7); + b = Step4<21, 0xfc93a039>(b, c, d, a, inputVector5); + a = Step4< 6, 0x655b59c3>(a, b, c, d); + d = Step4<10, 0x8f0ccc92>(d, a, b, c, inputVector3); + c = Step4<15, 0xffeff47d>(c, d, a, b); + b = Step4<21, 0x85845dd1>(b, c, d, a, inputVector1); + a = Step4< 6, 0x6fa87e4f>(a, b, c, d); + d = Step4<10, 0xfe2ce6e0>(d, a, b, c); + c = Step4<15, 0xa3014314>(c, d, a, b, inputVector6); + b = Step4<21, 0x4e0811a1>(b, c, d, a); + a = Step4< 6, 0xf7537e82>(a, b, c, d, inputVector4); + + a = OP_ADD(CREATE_VECTOR(0x67452301), a); WRITE_TO_OUTPUT(a, ((__int32*)input), ((__m256i*)expected)); } diff --git a/dotnet/WhiteRabbit.UnmanagedBridge/phraseset.cpp b/dotnet/WhiteRabbit.UnmanagedBridge/phraseset.cpp index 38f3fcb..86fb4ae 100644 --- a/dotnet/WhiteRabbit.UnmanagedBridge/phraseset.cpp +++ b/dotnet/WhiteRabbit.UnmanagedBridge/phraseset.cpp @@ -6,116 +6,42 @@ #pragma unmanaged -#define REPEAT_PHRASES(macro) \ - macro(0); \ - macro(1); \ - macro(2); \ - macro(3); \ - macro(4); \ - macro(5); \ - macro(6); \ - macro(7); \ - macro(8); \ - macro(9); \ - macro(10); \ - macro(11); \ - macro(12); \ - macro(13); \ - macro(14); \ - macro(15); - -#define INIT_WORD(phraseNumber) \ - auto permutation = permutationsPointer[phraseNumber]; \ - unsigned __int64 cumulativeWordOffset = 0; \ - auto phrase = avx2initialBuffer[phraseNumber]; - -#define PROCESS_WORD(phraseNumber, wordNumber) \ - { \ - auto currentWord = allWordsPointer + wordIndexes[_bextr_u64(permutation, 4 * wordNumber, 4)] * 128; \ - phrase = _mm256_xor_si256(phrase, *(__m256i*)(currentWord + cumulativeWordOffset)); \ - cumulativeWordOffset += currentWord[127]; \ - } - -#define DONE_WORD(phraseNumber) \ - avx2buffer[phraseNumber] = phrase; - -#define REPEAT_WORDS_SIMPLE1(phraseNumber) \ - { \ - PROCESS_WORD(phraseNumber, 0); \ - } - -#define REPEAT_WORDS_SIMPLE2(phraseNumber) \ - { \ - REPEAT_WORDS_SIMPLE1(phraseNumber); \ - PROCESS_WORD(phraseNumber, 1); \ - } - -#define REPEAT_WORDS_SIMPLE3(phraseNumber) \ - { \ - REPEAT_WORDS_SIMPLE2(phraseNumber); \ - PROCESS_WORD(phraseNumber, 2); \ - } - -#define REPEAT_WORDS_SIMPLE4(phraseNumber) \ - { \ - REPEAT_WORDS_SIMPLE3(phraseNumber); \ - PROCESS_WORD(phraseNumber, 3); \ - } - -#define REPEAT_WORDS_SIMPLE5(phraseNumber) \ - { \ - REPEAT_WORDS_SIMPLE4(phraseNumber); \ - PROCESS_WORD(phraseNumber, 4); \ - } - -#define REPEAT_WORDS_SIMPLE6(phraseNumber) \ - { \ - REPEAT_WORDS_SIMPLE5(phraseNumber); \ - PROCESS_WORD(phraseNumber, 5); \ - } - -#define REPEAT_WORDS_SIMPLE7(phraseNumber) \ - { \ - REPEAT_WORDS_SIMPLE6(phraseNumber); \ - PROCESS_WORD(phraseNumber, 6); \ - } - -#define REPEAT_WORDS_SIMPLE8(phraseNumber) \ - { \ - REPEAT_WORDS_SIMPLE7(phraseNumber); \ - PROCESS_WORD(phraseNumber, 7); \ +template +class Processor +{ +public: + template + static __forceinline const __m256i ProcessWord(const __m256i phrase, const unsigned __int64 cumulativeWordOffset, const unsigned __int64 permutation, unsigned __int64* allWordsPointer, __int32* wordIndexes) + { + auto currentWord = allWordsPointer + wordIndexes[_bextr_u64(permutation, 4 * wordNumber, 4)] * 128; + + return ProcessWord( + _mm256_xor_si256(phrase, *(__m256i*)(currentWord + cumulativeWordOffset)), + cumulativeWordOffset + currentWord[127], + permutation, + allWordsPointer, + wordIndexes); } -#define REPEAT_WORDS_SIMPLE9(phraseNumber) \ - { \ - REPEAT_WORDS_SIMPLE8(phraseNumber); \ - PROCESS_WORD(phraseNumber, 8); \ + template<> + static __forceinline const __m256i ProcessWord(const __m256i phrase, const unsigned __int64 cumulativeWordOffset, const unsigned __int64 permutation, unsigned __int64* allWordsPointer, __int32* wordIndexes) + { + return phrase; } -#define REPEAT_WORDS_SIMPLE10(phraseNumber) \ - { \ - REPEAT_WORDS_SIMPLE9(phraseNumber); \ - PROCESS_WORD(phraseNumber, 9); \ + template + static __forceinline void ProcessWordsForPhrase(__m256i* avx2initialBuffer, __m256i* avx2buffer, unsigned __int64* allWordsPointer, __int32* wordIndexes, unsigned __int64* permutationsPointer) + { + avx2buffer[phraseNumber] = ProcessWord<0>(*avx2initialBuffer, 0, permutationsPointer[phraseNumber], allWordsPointer, wordIndexes); + ProcessWordsForPhrase(avx2initialBuffer, avx2buffer, allWordsPointer, wordIndexes, permutationsPointer); } -#define REPEAT_WORDS(phraseNumber, repeater) \ - { \ - INIT_WORD(phraseNumber); \ - repeater(phraseNumber); \ - DONE_WORD(phraseNumber); \ + template<> + static __forceinline void ProcessWordsForPhrase(__m256i* avx2initialBuffer, __m256i* avx2buffer, unsigned __int64* allWordsPointer, __int32* wordIndexes, unsigned __int64* permutationsPointer) + { + return; } - -#define REPEAT_WORDS1(phraseNumber) REPEAT_WORDS(phraseNumber, REPEAT_WORDS_SIMPLE1) -#define REPEAT_WORDS2(phraseNumber) REPEAT_WORDS(phraseNumber, REPEAT_WORDS_SIMPLE2) -#define REPEAT_WORDS3(phraseNumber) REPEAT_WORDS(phraseNumber, REPEAT_WORDS_SIMPLE3) -#define REPEAT_WORDS4(phraseNumber) REPEAT_WORDS(phraseNumber, REPEAT_WORDS_SIMPLE4) -#define REPEAT_WORDS5(phraseNumber) REPEAT_WORDS(phraseNumber, REPEAT_WORDS_SIMPLE5) -#define REPEAT_WORDS6(phraseNumber) REPEAT_WORDS(phraseNumber, REPEAT_WORDS_SIMPLE6) -#define REPEAT_WORDS7(phraseNumber) REPEAT_WORDS(phraseNumber, REPEAT_WORDS_SIMPLE7) -#define REPEAT_WORDS8(phraseNumber) REPEAT_WORDS(phraseNumber, REPEAT_WORDS_SIMPLE8) -#define REPEAT_WORDS9(phraseNumber) REPEAT_WORDS(phraseNumber, REPEAT_WORDS_SIMPLE9) -#define REPEAT_WORDS10(phraseNumber) REPEAT_WORDS(phraseNumber, REPEAT_WORDS_SIMPLE10) - +}; void fillPhraseSet(unsigned __int64* initialBufferPointer, unsigned __int64* bufferPointer, unsigned __int64* allWordsPointer, __int32* wordIndexes, unsigned __int64* permutationsPointer, int numberOfWords) { @@ -125,34 +51,34 @@ void fillPhraseSet(unsigned __int64* initialBufferPointer, unsigned __int64* buf switch (numberOfWords) { case 1: - REPEAT_PHRASES(REPEAT_WORDS1); + Processor<1>::ProcessWordsForPhrase<0>(avx2initialBuffer, avx2buffer, allWordsPointer, wordIndexes, permutationsPointer); break; case 2: - REPEAT_PHRASES(REPEAT_WORDS2); + Processor<2>::ProcessWordsForPhrase<0>(avx2initialBuffer, avx2buffer, allWordsPointer, wordIndexes, permutationsPointer); break; case 3: - REPEAT_PHRASES(REPEAT_WORDS3); + Processor<3>::ProcessWordsForPhrase<0>(avx2initialBuffer, avx2buffer, allWordsPointer, wordIndexes, permutationsPointer); break; case 4: - REPEAT_PHRASES(REPEAT_WORDS4); + Processor<4>::ProcessWordsForPhrase<0>(avx2initialBuffer, avx2buffer, allWordsPointer, wordIndexes, permutationsPointer); break; case 5: - REPEAT_PHRASES(REPEAT_WORDS5); + Processor<5>::ProcessWordsForPhrase<0>(avx2initialBuffer, avx2buffer, allWordsPointer, wordIndexes, permutationsPointer); break; case 6: - REPEAT_PHRASES(REPEAT_WORDS6); + Processor<6>::ProcessWordsForPhrase<0>(avx2initialBuffer, avx2buffer, allWordsPointer, wordIndexes, permutationsPointer); break; case 7: - REPEAT_PHRASES(REPEAT_WORDS7); + Processor<7>::ProcessWordsForPhrase<0>(avx2initialBuffer, avx2buffer, allWordsPointer, wordIndexes, permutationsPointer); break; case 8: - REPEAT_PHRASES(REPEAT_WORDS8); + Processor<8>::ProcessWordsForPhrase<0>(avx2initialBuffer, avx2buffer, allWordsPointer, wordIndexes, permutationsPointer); break; case 9: - REPEAT_PHRASES(REPEAT_WORDS9); + Processor<9>::ProcessWordsForPhrase<0>(avx2initialBuffer, avx2buffer, allWordsPointer, wordIndexes, permutationsPointer); break; case 10: - REPEAT_PHRASES(REPEAT_WORDS10); + Processor<10>::ProcessWordsForPhrase<0>(avx2initialBuffer, avx2buffer, allWordsPointer, wordIndexes, permutationsPointer); break; } }