From 836361a66c31e415d46bc6dea39666ee5456957c Mon Sep 17 00:00:00 2001 From: inga-lovinde <52715130+inga-lovinde@users.noreply.github.com> Date: Wed, 5 Apr 2017 13:26:19 +0300 Subject: [PATCH] Refactoring + SIMD/AVX support --- dotnet/WhiteRabbit.UnmanagedBridge/md5.cpp | 241 ++++++++++++--------- 1 file changed, 140 insertions(+), 101 deletions(-) diff --git a/dotnet/WhiteRabbit.UnmanagedBridge/md5.cpp b/dotnet/WhiteRabbit.UnmanagedBridge/md5.cpp index 819ee76..62f04d2 100644 --- a/dotnet/WhiteRabbit.UnmanagedBridge/md5.cpp +++ b/dotnet/WhiteRabbit.UnmanagedBridge/md5.cpp @@ -1,102 +1,139 @@ #include "stdafx.h" - #include "md5.h" #include "intrin.h" -#include "immintrin.h" #pragma unmanaged -#define OP_XOR(a, b) (a ^ b) -#define OP_AND(a, b) (a & b) -#define OP_OR(a, b) (a | b) -#define OP_NEG(a) (~a) -#define OP_ADD(a, b) (a + b) -#define OP_ROT(a, r) (_rotl(a, r)) -#define OP_BLEND(a, b, x) (OP_OR(OP_AND(x, b), OP_AND(OP_NEG(x), a))) +#if AVX2 +typedef __m256i MD5Vector; + +#define OP_XOR(a, b) _mm256_xor_si256(a, b) +#define OP_AND(a, b) _mm256_and_si256(a, b) +#define OP_ANDNOT(a, b) _mm256_andnot_si256(a, b) +#define OP_OR(a, b) _mm256_or_si256(a, b) +#define OP_ADD(a, b) _mm256_add_epi32(a, b) +#define OP_ROT(a, r) OP_OR(_mm256_slli_epi32(a, r), _mm256_srli_epi32(a, 32 - (r))) +#define OP_BLEND(a, b, x) OP_OR(OP_AND(x, b), OP_ANDNOT(x, a)) + +#define CREATE_VECTOR(a) _mm256_set1_epi32(a) +#define CREATE_VECTOR_FROM_INPUT(input, offset) _mm256_set1_epi32(input[offset]) + +#define WRITE_TO_OUTPUT(a, output) \ + output[0] = a.m256i_u32[0]; +#elif SIMD +typedef __m128i MD5Vector; + +#define OP_XOR(a, b) _mm_xor_si128(a, b) +#define OP_AND(a, b) _mm_and_si128(a, b) +#define OP_ANDNOT(a, b) _mm_andnot_si128(a, b) +#define OP_OR(a, b) _mm_or_si128(a, b) +#define OP_ADD(a, b) _mm_add_epi32(a, b) +#define OP_ROT(a, r) OP_OR(_mm_slli_epi32(a, r), _mm_srli_epi32(a, 32 - (r))) +#define OP_BLEND(a, b, x) OP_OR(OP_AND(x, b), OP_ANDNOT(x, a)) +#define CREATE_VECTOR(a) _mm_set1_epi32(a) +#define CREATE_VECTOR_FROM_INPUT(input, offset) _mm_set1_epi32(input[offset]) + +#define WRITE_TO_OUTPUT(a, output) \ + output[0] = a.m128i_u32[0]; +#else typedef unsigned int MD5Vector; -#define CREATE_VECTOR(a) (a) +#define OP_XOR(a, b) (a) ^ (b) +#define OP_AND(a, b) (a) & (b) +#define OP_ANDNOT(a, b) ~(a) & (b) +#define OP_OR(a, b) (a) | (b) +#define OP_ADD(a, b) (a) + (b) +#define OP_ROT(a, r) _rotl(a, r) +#define OP_BLEND(a, b, x) ((x) & (b)) | (~(x) & (a)) + +#define CREATE_VECTOR(a) a #define CREATE_VECTOR_FROM_INPUT(input, offset) (input[offset]) +#define WRITE_TO_OUTPUT(a, output) \ + output[0] = a; +#endif + +#define OP_NEG(a) OP_ANDNOT(a, CREATE_VECTOR(0xffffffff)) + typedef struct { - MD5Vector K[64]; - MD5Vector Init[4]; + unsigned int K[64]; + unsigned int Init[4]; } MD5Parameters; static const MD5Parameters Parameters = { { - CREATE_VECTOR(0xd76aa478), - CREATE_VECTOR(0xe8c7b756), - CREATE_VECTOR(0x242070db), - CREATE_VECTOR(0xc1bdceee), - CREATE_VECTOR(0xf57c0faf), - CREATE_VECTOR(0x4787c62a), - CREATE_VECTOR(0xa8304613), - CREATE_VECTOR(0xfd469501), - CREATE_VECTOR(0x698098d8), - CREATE_VECTOR(0x8b44f7af), - CREATE_VECTOR(0xffff5bb1), - CREATE_VECTOR(0x895cd7be), - CREATE_VECTOR(0x6b901122), - CREATE_VECTOR(0xfd987193), - CREATE_VECTOR(0xa679438e), - CREATE_VECTOR(0x49b40821), - CREATE_VECTOR(0xf61e2562), - CREATE_VECTOR(0xc040b340), - CREATE_VECTOR(0x265e5a51), - CREATE_VECTOR(0xe9b6c7aa), - CREATE_VECTOR(0xd62f105d), - CREATE_VECTOR(0x02441453), - CREATE_VECTOR(0xd8a1e681), - CREATE_VECTOR(0xe7d3fbc8), - CREATE_VECTOR(0x21e1cde6), - CREATE_VECTOR(0xc33707d6), - CREATE_VECTOR(0xf4d50d87), - CREATE_VECTOR(0x455a14ed), - CREATE_VECTOR(0xa9e3e905), - CREATE_VECTOR(0xfcefa3f8), - CREATE_VECTOR(0x676f02d9), - CREATE_VECTOR(0x8d2a4c8a), - CREATE_VECTOR(0xfffa3942), - CREATE_VECTOR(0x8771f681), - CREATE_VECTOR(0x6d9d6122), - CREATE_VECTOR(0xfde5380c), - CREATE_VECTOR(0xa4beea44), - CREATE_VECTOR(0x4bdecfa9), - CREATE_VECTOR(0xf6bb4b60), - CREATE_VECTOR(0xbebfbc70), - CREATE_VECTOR(0x289b7ec6), - CREATE_VECTOR(0xeaa127fa), - CREATE_VECTOR(0xd4ef3085), - CREATE_VECTOR(0x04881d05), - CREATE_VECTOR(0xd9d4d039), - CREATE_VECTOR(0xe6db99e5), - CREATE_VECTOR(0x1fa27cf8), - CREATE_VECTOR(0xc4ac5665), - CREATE_VECTOR(0xf4292244), - CREATE_VECTOR(0x432aff97), - CREATE_VECTOR(0xab9423a7), - CREATE_VECTOR(0xfc93a039), - CREATE_VECTOR(0x655b59c3), - CREATE_VECTOR(0x8f0ccc92), - CREATE_VECTOR(0xffeff47d), - CREATE_VECTOR(0x85845dd1), - CREATE_VECTOR(0x6fa87e4f), - CREATE_VECTOR(0xfe2ce6e0), - CREATE_VECTOR(0xa3014314), - CREATE_VECTOR(0x4e0811a1), - CREATE_VECTOR(0xf7537e82), - CREATE_VECTOR(0xbd3af235), - CREATE_VECTOR(0x2ad7d2bb), - CREATE_VECTOR(0xeb86d391), + 0xd76aa478, + 0xe8c7b756, + 0x242070db, + 0xc1bdceee, + 0xf57c0faf, + 0x4787c62a, + 0xa8304613, + 0xfd469501, + 0x698098d8, + 0x8b44f7af, + 0xffff5bb1, + 0x895cd7be, + 0x6b901122, + 0xfd987193, + 0xa679438e, + 0x49b40821, + 0xf61e2562, + 0xc040b340, + 0x265e5a51, + 0xe9b6c7aa, + 0xd62f105d, + 0x02441453, + 0xd8a1e681, + 0xe7d3fbc8, + 0x21e1cde6, + 0xc33707d6, + 0xf4d50d87, + 0x455a14ed, + 0xa9e3e905, + 0xfcefa3f8, + 0x676f02d9, + 0x8d2a4c8a, + 0xfffa3942, + 0x8771f681, + 0x6d9d6122, + 0xfde5380c, + 0xa4beea44, + 0x4bdecfa9, + 0xf6bb4b60, + 0xbebfbc70, + 0x289b7ec6, + 0xeaa127fa, + 0xd4ef3085, + 0x04881d05, + 0xd9d4d039, + 0xe6db99e5, + 0x1fa27cf8, + 0xc4ac5665, + 0xf4292244, + 0x432aff97, + 0xab9423a7, + 0xfc93a039, + 0x655b59c3, + 0x8f0ccc92, + 0xffeff47d, + 0x85845dd1, + 0x6fa87e4f, + 0xfe2ce6e0, + 0xa3014314, + 0x4e0811a1, + 0xf7537e82, + 0xbd3af235, + 0x2ad7d2bb, + 0xeb86d391, }, { - CREATE_VECTOR(0x67452301), - CREATE_VECTOR(0xefcdab89), - CREATE_VECTOR(0x98badcfe), - CREATE_VECTOR(0x10325476), + 0x67452301, + 0xefcdab89, + 0x98badcfe, + 0x10325476, }, }; @@ -122,59 +159,59 @@ inline MD5Vector LeftRotate(MD5Vector x) } template -inline MD5Vector Step1(MD5Vector a, MD5Vector b, MD5Vector c, MD5Vector d, MD5Vector k, MD5Vector w) +inline MD5Vector Step1(MD5Vector a, MD5Vector b, MD5Vector c, MD5Vector d, unsigned int k, MD5Vector w) { - return OP_ADD(b, LeftRotate(OP_ADD(Blend(d, c, b), OP_ADD(k, OP_ADD(a, w))))); + return OP_ADD(b, LeftRotate(OP_ADD(Blend(d, c, b), OP_ADD(CREATE_VECTOR(k), OP_ADD(a, w))))); } template -inline MD5Vector Step1(MD5Vector a, MD5Vector b, MD5Vector c, MD5Vector d, MD5Vector k) +inline MD5Vector Step1(MD5Vector a, MD5Vector b, MD5Vector c, MD5Vector d, unsigned int k) { - return OP_ADD(b, LeftRotate(OP_ADD(Blend(d, c, b), OP_ADD(k, a)))); + return OP_ADD(b, LeftRotate(OP_ADD(Blend(d, c, b), OP_ADD(CREATE_VECTOR(k), a)))); } template -inline MD5Vector Step2(MD5Vector a, MD5Vector b, MD5Vector c, MD5Vector d, MD5Vector k, MD5Vector w) +inline MD5Vector Step2(MD5Vector a, MD5Vector b, MD5Vector c, MD5Vector d, unsigned int k, MD5Vector w) { - return OP_ADD(c, LeftRotate(OP_ADD(Blend(d, c, b), OP_ADD(k, OP_ADD(a, w))))); + return OP_ADD(c, LeftRotate(OP_ADD(Blend(d, c, b), OP_ADD(CREATE_VECTOR(k), OP_ADD(a, w))))); } template -inline MD5Vector Step2(MD5Vector a, MD5Vector b, MD5Vector c, MD5Vector d, MD5Vector k) +inline MD5Vector Step2(MD5Vector a, MD5Vector b, MD5Vector c, MD5Vector d, unsigned int k) { - return OP_ADD(c, LeftRotate(OP_ADD(Blend(d, c, b), OP_ADD(k, a)))); + return OP_ADD(c, LeftRotate(OP_ADD(Blend(d, c, b), OP_ADD(CREATE_VECTOR(k), a)))); } template -inline MD5Vector Step3(MD5Vector a, MD5Vector b, MD5Vector c, MD5Vector d, MD5Vector k, MD5Vector w) +inline MD5Vector Step3(MD5Vector a, MD5Vector b, MD5Vector c, MD5Vector d, unsigned int k, MD5Vector w) { - return OP_ADD(b, LeftRotate(OP_ADD(Xor(b, c, d), OP_ADD(k, OP_ADD(a, w))))); + return OP_ADD(b, LeftRotate(OP_ADD(Xor(b, c, d), OP_ADD(CREATE_VECTOR(k), OP_ADD(a, w))))); } template -inline MD5Vector Step3(MD5Vector a, MD5Vector b, MD5Vector c, MD5Vector d, MD5Vector k) +inline MD5Vector Step3(MD5Vector a, MD5Vector b, MD5Vector c, MD5Vector d, unsigned int k) { - return OP_ADD(b, LeftRotate(OP_ADD(Xor(b, c, d), OP_ADD(k, a)))); + return OP_ADD(b, LeftRotate(OP_ADD(Xor(b, c, d), OP_ADD(CREATE_VECTOR(k), a)))); } template -inline MD5Vector Step4(MD5Vector a, MD5Vector b, MD5Vector c, MD5Vector d, MD5Vector k, MD5Vector w) +inline MD5Vector Step4(MD5Vector a, MD5Vector b, MD5Vector c, MD5Vector d, unsigned int k, MD5Vector w) { - return OP_ADD(b, LeftRotate(OP_ADD(I(c, b, d), OP_ADD(k, OP_ADD(a, w))))); + return OP_ADD(b, LeftRotate(OP_ADD(I(c, b, d), OP_ADD(CREATE_VECTOR(k), OP_ADD(a, w))))); } template -inline MD5Vector Step4(MD5Vector a, MD5Vector b, MD5Vector c, MD5Vector d, MD5Vector k) +inline MD5Vector Step4(MD5Vector a, MD5Vector b, MD5Vector c, MD5Vector d, unsigned int k) { - return OP_ADD(b, LeftRotate(OP_ADD(I(c, b, d), OP_ADD(k, a)))); + return OP_ADD(b, LeftRotate(OP_ADD(I(c, b, d), OP_ADD(CREATE_VECTOR(k), a)))); } void md5(unsigned int * input, unsigned int * output) { - MD5Vector a = Parameters.Init[0]; - MD5Vector b = Parameters.Init[1]; - MD5Vector c = Parameters.Init[2]; - MD5Vector d = Parameters.Init[3]; + MD5Vector a = CREATE_VECTOR(Parameters.Init[0]); + MD5Vector b = CREATE_VECTOR(Parameters.Init[1]); + MD5Vector c = CREATE_VECTOR(Parameters.Init[2]); + MD5Vector d = CREATE_VECTOR(Parameters.Init[3]); MD5Vector inputVectors[8] = { CREATE_VECTOR_FROM_INPUT(input, 0), @@ -252,6 +289,8 @@ void md5(unsigned int * input, unsigned int * output) b = Step4<21>(b, c, d, a, Parameters.K[59]); a = Step4< 6>(a, b, c, d, Parameters.K[60], inputVectors[4]); - output[0] = OP_ADD(Parameters.Init[0], a); + a = OP_ADD(CREATE_VECTOR(Parameters.Init[0]), a); + + WRITE_TO_OUTPUT(a, output); } #pragma managed