Refactoring + SIMD/AVX support

unmanaged
Inga 🏳‍🌈 8 years ago
parent c60d4cbcaf
commit 836361a66c
  1. 241
      dotnet/WhiteRabbit.UnmanagedBridge/md5.cpp

@ -1,102 +1,139 @@
#include "stdafx.h" #include "stdafx.h"
#include "md5.h" #include "md5.h"
#include "intrin.h" #include "intrin.h"
#include "immintrin.h"
#pragma unmanaged #pragma unmanaged
#define OP_XOR(a, b) (a ^ b) #if AVX2
#define OP_AND(a, b) (a & b) typedef __m256i MD5Vector;
#define OP_OR(a, b) (a | b)
#define OP_NEG(a) (~a) #define OP_XOR(a, b) _mm256_xor_si256(a, b)
#define OP_ADD(a, b) (a + b) #define OP_AND(a, b) _mm256_and_si256(a, b)
#define OP_ROT(a, r) (_rotl(a, r)) #define OP_ANDNOT(a, b) _mm256_andnot_si256(a, b)
#define OP_BLEND(a, b, x) (OP_OR(OP_AND(x, b), OP_AND(OP_NEG(x), a))) #define OP_OR(a, b) _mm256_or_si256(a, b)
#define OP_ADD(a, b) _mm256_add_epi32(a, b)
#define OP_ROT(a, r) OP_OR(_mm256_slli_epi32(a, r), _mm256_srli_epi32(a, 32 - (r)))
#define OP_BLEND(a, b, x) OP_OR(OP_AND(x, b), OP_ANDNOT(x, a))
#define CREATE_VECTOR(a) _mm256_set1_epi32(a)
#define CREATE_VECTOR_FROM_INPUT(input, offset) _mm256_set1_epi32(input[offset])
#define WRITE_TO_OUTPUT(a, output) \
output[0] = a.m256i_u32[0];
#elif SIMD
typedef __m128i MD5Vector;
#define OP_XOR(a, b) _mm_xor_si128(a, b)
#define OP_AND(a, b) _mm_and_si128(a, b)
#define OP_ANDNOT(a, b) _mm_andnot_si128(a, b)
#define OP_OR(a, b) _mm_or_si128(a, b)
#define OP_ADD(a, b) _mm_add_epi32(a, b)
#define OP_ROT(a, r) OP_OR(_mm_slli_epi32(a, r), _mm_srli_epi32(a, 32 - (r)))
#define OP_BLEND(a, b, x) OP_OR(OP_AND(x, b), OP_ANDNOT(x, a))
#define CREATE_VECTOR(a) _mm_set1_epi32(a)
#define CREATE_VECTOR_FROM_INPUT(input, offset) _mm_set1_epi32(input[offset])
#define WRITE_TO_OUTPUT(a, output) \
output[0] = a.m128i_u32[0];
#else
typedef unsigned int MD5Vector; typedef unsigned int MD5Vector;
#define CREATE_VECTOR(a) (a) #define OP_XOR(a, b) (a) ^ (b)
#define OP_AND(a, b) (a) & (b)
#define OP_ANDNOT(a, b) ~(a) & (b)
#define OP_OR(a, b) (a) | (b)
#define OP_ADD(a, b) (a) + (b)
#define OP_ROT(a, r) _rotl(a, r)
#define OP_BLEND(a, b, x) ((x) & (b)) | (~(x) & (a))
#define CREATE_VECTOR(a) a
#define CREATE_VECTOR_FROM_INPUT(input, offset) (input[offset]) #define CREATE_VECTOR_FROM_INPUT(input, offset) (input[offset])
#define WRITE_TO_OUTPUT(a, output) \
output[0] = a;
#endif
#define OP_NEG(a) OP_ANDNOT(a, CREATE_VECTOR(0xffffffff))
typedef struct { typedef struct {
MD5Vector K[64]; unsigned int K[64];
MD5Vector Init[4]; unsigned int Init[4];
} MD5Parameters; } MD5Parameters;
static const MD5Parameters Parameters = { static const MD5Parameters Parameters = {
{ {
CREATE_VECTOR(0xd76aa478), 0xd76aa478,
CREATE_VECTOR(0xe8c7b756), 0xe8c7b756,
CREATE_VECTOR(0x242070db), 0x242070db,
CREATE_VECTOR(0xc1bdceee), 0xc1bdceee,
CREATE_VECTOR(0xf57c0faf), 0xf57c0faf,
CREATE_VECTOR(0x4787c62a), 0x4787c62a,
CREATE_VECTOR(0xa8304613), 0xa8304613,
CREATE_VECTOR(0xfd469501), 0xfd469501,
CREATE_VECTOR(0x698098d8), 0x698098d8,
CREATE_VECTOR(0x8b44f7af), 0x8b44f7af,
CREATE_VECTOR(0xffff5bb1), 0xffff5bb1,
CREATE_VECTOR(0x895cd7be), 0x895cd7be,
CREATE_VECTOR(0x6b901122), 0x6b901122,
CREATE_VECTOR(0xfd987193), 0xfd987193,
CREATE_VECTOR(0xa679438e), 0xa679438e,
CREATE_VECTOR(0x49b40821), 0x49b40821,
CREATE_VECTOR(0xf61e2562), 0xf61e2562,
CREATE_VECTOR(0xc040b340), 0xc040b340,
CREATE_VECTOR(0x265e5a51), 0x265e5a51,
CREATE_VECTOR(0xe9b6c7aa), 0xe9b6c7aa,
CREATE_VECTOR(0xd62f105d), 0xd62f105d,
CREATE_VECTOR(0x02441453), 0x02441453,
CREATE_VECTOR(0xd8a1e681), 0xd8a1e681,
CREATE_VECTOR(0xe7d3fbc8), 0xe7d3fbc8,
CREATE_VECTOR(0x21e1cde6), 0x21e1cde6,
CREATE_VECTOR(0xc33707d6), 0xc33707d6,
CREATE_VECTOR(0xf4d50d87), 0xf4d50d87,
CREATE_VECTOR(0x455a14ed), 0x455a14ed,
CREATE_VECTOR(0xa9e3e905), 0xa9e3e905,
CREATE_VECTOR(0xfcefa3f8), 0xfcefa3f8,
CREATE_VECTOR(0x676f02d9), 0x676f02d9,
CREATE_VECTOR(0x8d2a4c8a), 0x8d2a4c8a,
CREATE_VECTOR(0xfffa3942), 0xfffa3942,
CREATE_VECTOR(0x8771f681), 0x8771f681,
CREATE_VECTOR(0x6d9d6122), 0x6d9d6122,
CREATE_VECTOR(0xfde5380c), 0xfde5380c,
CREATE_VECTOR(0xa4beea44), 0xa4beea44,
CREATE_VECTOR(0x4bdecfa9), 0x4bdecfa9,
CREATE_VECTOR(0xf6bb4b60), 0xf6bb4b60,
CREATE_VECTOR(0xbebfbc70), 0xbebfbc70,
CREATE_VECTOR(0x289b7ec6), 0x289b7ec6,
CREATE_VECTOR(0xeaa127fa), 0xeaa127fa,
CREATE_VECTOR(0xd4ef3085), 0xd4ef3085,
CREATE_VECTOR(0x04881d05), 0x04881d05,
CREATE_VECTOR(0xd9d4d039), 0xd9d4d039,
CREATE_VECTOR(0xe6db99e5), 0xe6db99e5,
CREATE_VECTOR(0x1fa27cf8), 0x1fa27cf8,
CREATE_VECTOR(0xc4ac5665), 0xc4ac5665,
CREATE_VECTOR(0xf4292244), 0xf4292244,
CREATE_VECTOR(0x432aff97), 0x432aff97,
CREATE_VECTOR(0xab9423a7), 0xab9423a7,
CREATE_VECTOR(0xfc93a039), 0xfc93a039,
CREATE_VECTOR(0x655b59c3), 0x655b59c3,
CREATE_VECTOR(0x8f0ccc92), 0x8f0ccc92,
CREATE_VECTOR(0xffeff47d), 0xffeff47d,
CREATE_VECTOR(0x85845dd1), 0x85845dd1,
CREATE_VECTOR(0x6fa87e4f), 0x6fa87e4f,
CREATE_VECTOR(0xfe2ce6e0), 0xfe2ce6e0,
CREATE_VECTOR(0xa3014314), 0xa3014314,
CREATE_VECTOR(0x4e0811a1), 0x4e0811a1,
CREATE_VECTOR(0xf7537e82), 0xf7537e82,
CREATE_VECTOR(0xbd3af235), 0xbd3af235,
CREATE_VECTOR(0x2ad7d2bb), 0x2ad7d2bb,
CREATE_VECTOR(0xeb86d391), 0xeb86d391,
}, },
{ {
CREATE_VECTOR(0x67452301), 0x67452301,
CREATE_VECTOR(0xefcdab89), 0xefcdab89,
CREATE_VECTOR(0x98badcfe), 0x98badcfe,
CREATE_VECTOR(0x10325476), 0x10325476,
}, },
}; };
@ -122,59 +159,59 @@ inline MD5Vector LeftRotate(MD5Vector x)
} }
template<int r> template<int r>
inline MD5Vector Step1(MD5Vector a, MD5Vector b, MD5Vector c, MD5Vector d, MD5Vector k, MD5Vector w) inline MD5Vector Step1(MD5Vector a, MD5Vector b, MD5Vector c, MD5Vector d, unsigned int k, MD5Vector w)
{ {
return OP_ADD(b, LeftRotate<r>(OP_ADD(Blend(d, c, b), OP_ADD(k, OP_ADD(a, w))))); return OP_ADD(b, LeftRotate<r>(OP_ADD(Blend(d, c, b), OP_ADD(CREATE_VECTOR(k), OP_ADD(a, w)))));
} }
template<int r> template<int r>
inline MD5Vector Step1(MD5Vector a, MD5Vector b, MD5Vector c, MD5Vector d, MD5Vector k) inline MD5Vector Step1(MD5Vector a, MD5Vector b, MD5Vector c, MD5Vector d, unsigned int k)
{ {
return OP_ADD(b, LeftRotate<r>(OP_ADD(Blend(d, c, b), OP_ADD(k, a)))); return OP_ADD(b, LeftRotate<r>(OP_ADD(Blend(d, c, b), OP_ADD(CREATE_VECTOR(k), a))));
} }
template<int r> template<int r>
inline MD5Vector Step2(MD5Vector a, MD5Vector b, MD5Vector c, MD5Vector d, MD5Vector k, MD5Vector w) inline MD5Vector Step2(MD5Vector a, MD5Vector b, MD5Vector c, MD5Vector d, unsigned int k, MD5Vector w)
{ {
return OP_ADD(c, LeftRotate<r>(OP_ADD(Blend(d, c, b), OP_ADD(k, OP_ADD(a, w))))); return OP_ADD(c, LeftRotate<r>(OP_ADD(Blend(d, c, b), OP_ADD(CREATE_VECTOR(k), OP_ADD(a, w)))));
} }
template<int r> template<int r>
inline MD5Vector Step2(MD5Vector a, MD5Vector b, MD5Vector c, MD5Vector d, MD5Vector k) inline MD5Vector Step2(MD5Vector a, MD5Vector b, MD5Vector c, MD5Vector d, unsigned int k)
{ {
return OP_ADD(c, LeftRotate<r>(OP_ADD(Blend(d, c, b), OP_ADD(k, a)))); return OP_ADD(c, LeftRotate<r>(OP_ADD(Blend(d, c, b), OP_ADD(CREATE_VECTOR(k), a))));
} }
template<int r> template<int r>
inline MD5Vector Step3(MD5Vector a, MD5Vector b, MD5Vector c, MD5Vector d, MD5Vector k, MD5Vector w) inline MD5Vector Step3(MD5Vector a, MD5Vector b, MD5Vector c, MD5Vector d, unsigned int k, MD5Vector w)
{ {
return OP_ADD(b, LeftRotate<r>(OP_ADD(Xor(b, c, d), OP_ADD(k, OP_ADD(a, w))))); return OP_ADD(b, LeftRotate<r>(OP_ADD(Xor(b, c, d), OP_ADD(CREATE_VECTOR(k), OP_ADD(a, w)))));
} }
template<int r> template<int r>
inline MD5Vector Step3(MD5Vector a, MD5Vector b, MD5Vector c, MD5Vector d, MD5Vector k) inline MD5Vector Step3(MD5Vector a, MD5Vector b, MD5Vector c, MD5Vector d, unsigned int k)
{ {
return OP_ADD(b, LeftRotate<r>(OP_ADD(Xor(b, c, d), OP_ADD(k, a)))); return OP_ADD(b, LeftRotate<r>(OP_ADD(Xor(b, c, d), OP_ADD(CREATE_VECTOR(k), a))));
} }
template<int r> template<int r>
inline MD5Vector Step4(MD5Vector a, MD5Vector b, MD5Vector c, MD5Vector d, MD5Vector k, MD5Vector w) inline MD5Vector Step4(MD5Vector a, MD5Vector b, MD5Vector c, MD5Vector d, unsigned int k, MD5Vector w)
{ {
return OP_ADD(b, LeftRotate<r>(OP_ADD(I(c, b, d), OP_ADD(k, OP_ADD(a, w))))); return OP_ADD(b, LeftRotate<r>(OP_ADD(I(c, b, d), OP_ADD(CREATE_VECTOR(k), OP_ADD(a, w)))));
} }
template<int r> template<int r>
inline MD5Vector Step4(MD5Vector a, MD5Vector b, MD5Vector c, MD5Vector d, MD5Vector k) inline MD5Vector Step4(MD5Vector a, MD5Vector b, MD5Vector c, MD5Vector d, unsigned int k)
{ {
return OP_ADD(b, LeftRotate<r>(OP_ADD(I(c, b, d), OP_ADD(k, a)))); return OP_ADD(b, LeftRotate<r>(OP_ADD(I(c, b, d), OP_ADD(CREATE_VECTOR(k), a))));
} }
void md5(unsigned int * input, unsigned int * output) void md5(unsigned int * input, unsigned int * output)
{ {
MD5Vector a = Parameters.Init[0]; MD5Vector a = CREATE_VECTOR(Parameters.Init[0]);
MD5Vector b = Parameters.Init[1]; MD5Vector b = CREATE_VECTOR(Parameters.Init[1]);
MD5Vector c = Parameters.Init[2]; MD5Vector c = CREATE_VECTOR(Parameters.Init[2]);
MD5Vector d = Parameters.Init[3]; MD5Vector d = CREATE_VECTOR(Parameters.Init[3]);
MD5Vector inputVectors[8] = { MD5Vector inputVectors[8] = {
CREATE_VECTOR_FROM_INPUT(input, 0), CREATE_VECTOR_FROM_INPUT(input, 0),
@ -252,6 +289,8 @@ void md5(unsigned int * input, unsigned int * output)
b = Step4<21>(b, c, d, a, Parameters.K[59]); b = Step4<21>(b, c, d, a, Parameters.K[59]);
a = Step4< 6>(a, b, c, d, Parameters.K[60], inputVectors[4]); a = Step4< 6>(a, b, c, d, Parameters.K[60], inputVectors[4]);
output[0] = OP_ADD(Parameters.Init[0], a); a = OP_ADD(CREATE_VECTOR(Parameters.Init[0]), a);
WRITE_TO_OUTPUT(a, output);
} }
#pragma managed #pragma managed

Loading…
Cancel
Save