Refactored to use phrasesets

unmanaged
Inga 🏳‍🌈 7 years ago
parent 15e2687f31
commit fba2d3e10e
  1. 7
      dotnet/WhiteRabbit/Constants.cs
  2. 23
      dotnet/WhiteRabbit/MD5Digest.cs
  3. 52
      dotnet/WhiteRabbit/Phrase.cs
  4. 60
      dotnet/WhiteRabbit/PhraseSet.cs
  5. 30
      dotnet/WhiteRabbit/PrecomputedPermutationsGenerator.cs
  6. 49
      dotnet/WhiteRabbit/Program.cs
  7. 10
      dotnet/WhiteRabbit/StringsProcessor.cs
  8. 3
      dotnet/WhiteRabbit/WhiteRabbit.csproj

@ -0,0 +1,7 @@
namespace WhiteRabbit
{
internal class Constants
{
public const int PhrasesPerSet = 1;
}
}

@ -14,16 +14,21 @@ namespace WhiteRabbit
internal static class MD5Digest internal static class MD5Digest
{ {
[MethodImpl(MethodImplOptions.AggressiveInlining)] [MethodImpl(MethodImplOptions.AggressiveInlining)]
public static unsafe Vector<uint> Compute(Phrase input) public static unsafe Vector<uint>[] Compute(PhraseSet input)
{ {
var result = stackalloc uint[4]; var rawResult = new uint[4 * Constants.PhrasesPerSet];
MD5Unmanaged.ComputeMD5(input.Buffer, result); fixed (uint* resultPointer = rawResult)
return new Vector<uint>(new[] { {
result[0], MD5Unmanaged.ComputeMD5(input.Buffer, resultPointer);
result[1], }
result[2],
result[3], var result = new Vector<uint>[Constants.PhrasesPerSet];
}); for (var i = 0; i < Constants.PhrasesPerSet; i++)
{
result[i] = new Vector<uint>(rawResult, 4 * i);
}
return result;
} }
} }
} }

@ -1,52 +0,0 @@
namespace WhiteRabbit
{
// Anagram representation optimized for MD5
internal unsafe struct Phrase
{
public fixed uint Buffer[8];
public Phrase(byte[][] words, PermutationsGenerator.Permutation permutation, int numberOfCharacters)
{
fixed (uint* bufferPointer = this.Buffer)
{
var length = numberOfCharacters + words.Length - 1;
byte[] currentWord = words[permutation[0]];
var j = 0;
var wordIndex = 0;
var currentPointer = (byte*)bufferPointer;
byte* lastPointer = currentPointer + length;
for (; currentPointer < lastPointer; currentPointer++)
{
if (j >= currentWord.Length)
{
j = 0;
wordIndex++;
currentWord = words[permutation[wordIndex]];
}
*currentPointer = currentWord[j];
j++;
}
*currentPointer = 128;
bufferPointer[7] = (uint)(length << 3);
}
}
public byte[] GetBytes()
{
fixed(uint* bufferPointer = this.Buffer)
{
var length = bufferPointer[7] >> 3;
var result = new byte[length];
for (var i = 0; i < length; i++)
{
result[i] = ((byte*)bufferPointer)[i];
}
return result;
}
}
}
}

@ -0,0 +1,60 @@
namespace WhiteRabbit
{
// Anagram representation optimized for MD5
internal unsafe struct PhraseSet
{
public fixed uint Buffer[8 * Constants.PhrasesPerSet];
public PhraseSet(byte[][] words, PermutationsGenerator.Permutation[] permutations, int offset, int numberOfCharacters)
{
fixed (uint* bufferPointer = this.Buffer)
{
var length = numberOfCharacters + words.Length - 1;
for (var i = 0; i < Constants.PhrasesPerSet; i++)
{
var permutation = permutations[offset + i];
var startPointer = bufferPointer + i * 8;
byte[] currentWord = words[permutations[offset][0]];
var j = 0;
var wordIndex = 0;
var currentPointer = (byte*)startPointer;
byte* lastPointer = currentPointer + length;
for (; currentPointer < lastPointer; currentPointer++)
{
if (j >= currentWord.Length)
{
j = 0;
wordIndex++;
currentWord = words[permutation[wordIndex]];
}
*currentPointer = currentWord[j];
j++;
}
*currentPointer = 128;
startPointer[7] = (uint)(length << 3);
}
}
}
public byte[] GetBytes(int number)
{
System.Diagnostics.Debug.Assert(number < Constants.PhrasesPerSet);
fixed(uint* bufferPointer = this.Buffer)
{
var phrasePointer = bufferPointer + 8 * number;
var length = phrasePointer[7] >> 3;
var result = new byte[length];
for (var i = 0; i < length; i++)
{
result[i] = ((byte*)phrasePointer)[i];
}
return result;
}
}
}
}

@ -7,24 +7,30 @@
{ {
private static PermutationsGenerator.Permutation[][] Permutations { get; } = new[] private static PermutationsGenerator.Permutation[][] Permutations { get; } = new[]
{ {
PermutationsGenerator.HamiltonianPermutations(0).ToArray(), GeneratePermutations(0),
PermutationsGenerator.HamiltonianPermutations(1).ToArray(), GeneratePermutations(1),
PermutationsGenerator.HamiltonianPermutations(2).ToArray(), GeneratePermutations(2),
PermutationsGenerator.HamiltonianPermutations(3).ToArray(), GeneratePermutations(3),
PermutationsGenerator.HamiltonianPermutations(4).ToArray(), GeneratePermutations(4),
PermutationsGenerator.HamiltonianPermutations(5).ToArray(), GeneratePermutations(5),
PermutationsGenerator.HamiltonianPermutations(6).ToArray(), GeneratePermutations(6),
PermutationsGenerator.HamiltonianPermutations(7).ToArray(), GeneratePermutations(7),
}; };
public static IEnumerable<PermutationsGenerator.Permutation> HamiltonianPermutations(int n) public static PermutationsGenerator.Permutation[] HamiltonianPermutations(int n)
{ {
if (n > 9) return Permutations[n];
}
private static PermutationsGenerator.Permutation[] GeneratePermutations(int n)
{
var result = PermutationsGenerator.HamiltonianPermutations(n).ToArray();
if (result.Length % Constants.PhrasesPerSet == 0)
{ {
return PermutationsGenerator.HamiltonianPermutations(n); return result;
} }
return Permutations[n]; return result.Concat(Enumerable.Repeat(result[0], Constants.PhrasesPerSet - (result.Length % Constants.PhrasesPerSet))).ToArray();
} }
} }
} }

@ -49,10 +49,6 @@
.Select(hash => new Vector<uint>(HexadecimalStringToUnsignedIntArray(hash))) .Select(hash => new Vector<uint>(HexadecimalStringToUnsignedIntArray(hash)))
.ToArray(); .ToArray();
#if DEBUG
var anagramsBag = new ConcurrentBag<string>();
#endif
var processor = new StringsProcessor( var processor = new StringsProcessor(
Encoding.ASCII.GetBytes(sourcePhrase), Encoding.ASCII.GetBytes(sourcePhrase),
maxWordsInPhrase, maxWordsInPhrase,
@ -70,41 +66,24 @@
processor.GeneratePhrases() processor.GeneratePhrases()
.ForAll(phraseBytes => .ForAll(phraseBytes =>
{ {
Debug.Assert( var hashVectors = MD5Digest.Compute(phraseBytes);
sourceChars == ToOrderedChars(ToString(phraseBytes)), for (var i = 0; i < Constants.PhrasesPerSet; i++)
$"StringsProcessor produced incorrect anagram: {ToString(phraseBytes)}");
var hashVector = MD5Digest.Compute(phraseBytes);
if (Array.IndexOf(expectedHashesAsVectors, hashVector) >= 0)
{ {
var phrase = ToString(phraseBytes); Debug.Assert(
var hash = VectorToHexadecimalString(hashVector); sourceChars == ToOrderedChars(ToString(phraseBytes, i)),
Console.WriteLine($"Found phrase for {hash}: {phrase}; time from start is {stopwatch.Elapsed}"); $"StringsProcessor produced incorrect anagram: {ToString(phraseBytes, i)}");
if (Array.IndexOf(expectedHashesAsVectors, hashVectors[i]) >= 0)
{
var phrase = ToString(phraseBytes, i);
var hash = VectorToHexadecimalString(hashVectors[i]);
Console.WriteLine($"Found phrase for {hash}: {phrase}; time from start is {stopwatch.Elapsed}");
}
} }
#if DEBUG
anagramsBag.Add(ToString(phraseBytes));
#endif
}); });
Console.WriteLine($"Done; time from start: {stopwatch.Elapsed}"); Console.WriteLine($"Done; time from start: {stopwatch.Elapsed}");
#if DEBUG
var anagramsArray = anagramsBag.ToArray();
var anagramsSet = new HashSet<string>(anagramsArray);
Array.Sort(anagramsArray);
Console.WriteLine("All anagrams:");
for (var i = 0; i < anagramsArray.Length; i++)
{
Console.WriteLine(anagramsArray[i]);
}
// Duplicate anagrams are expected, as e.g. "norway spoils tut tut" will be taken twice:
// as "norway1 spoils2 tut3 tut4" and "norway1 spoils2 tut4 tut3"
// (in addition to e.g. "norway1 tut3 spoils2 tut4")
Console.WriteLine($"Total anagrams count: {anagramsArray.Length}; unique anagrams: {anagramsSet.Count}; time from start: {stopwatch.Elapsed}");
#endif
} }
// Code taken from http://stackoverflow.com/a/321404/831314 // Code taken from http://stackoverflow.com/a/321404/831314
@ -131,9 +110,9 @@
return hex.Substring(6, 2) + hex.Substring(4, 2) + hex.Substring(2, 2) + hex.Substring(0, 2); return hex.Substring(6, 2) + hex.Substring(4, 2) + hex.Substring(2, 2) + hex.Substring(0, 2);
} }
private static string ToString(Phrase phrase) private static string ToString(PhraseSet phrase, int offset)
{ {
return Encoding.ASCII.GetString(phrase.GetBytes()); return Encoding.ASCII.GetString(phrase.GetBytes(offset));
} }
private static IEnumerable<byte[]> ReadInput() private static IEnumerable<byte[]> ReadInput()

@ -52,7 +52,7 @@
#if SINGLE_THREADED #if SINGLE_THREADED
public IEnumerable<byte[]> GeneratePhrases() public IEnumerable<byte[]> GeneratePhrases()
#else #else
public ParallelQuery<Phrase> GeneratePhrases() public ParallelQuery<PhraseSet> GeneratePhrases()
#endif #endif
{ {
// task of finding anagrams could be reduced to the task of finding sequences of dictionary vectors with the target sum // task of finding anagrams could be reduced to the task of finding sequences of dictionary vectors with the target sum
@ -85,11 +85,13 @@
return words; return words;
} }
private IEnumerable<Phrase> ConvertWordsToPhrases(byte[][] words) private IEnumerable<PhraseSet> ConvertWordsToPhrases(byte[][] words)
{ {
foreach (var permutation in PrecomputedPermutationsGenerator.HamiltonianPermutations(words.Length)) var permutations = PrecomputedPermutationsGenerator.HamiltonianPermutations(words.Length);
var permutationsLength = permutations.Length;
for (var i = 0; i < permutationsLength; i++)
{ {
yield return new Phrase(words, permutation, this.NumberOfCharacters); yield return new PhraseSet(words, permutations, i, this.NumberOfCharacters);
} }
} }
} }

@ -58,9 +58,10 @@
</ItemGroup> </ItemGroup>
<ItemGroup> <ItemGroup>
<Compile Include="ByteArrayEqualityComparer.cs" /> <Compile Include="ByteArrayEqualityComparer.cs" />
<Compile Include="Constants.cs" />
<Compile Include="Flattener.cs" /> <Compile Include="Flattener.cs" />
<Compile Include="MD5Digest.cs" /> <Compile Include="MD5Digest.cs" />
<Compile Include="Phrase.cs" /> <Compile Include="PhraseSet.cs" />
<Compile Include="PrecomputedPermutationsGenerator.cs" /> <Compile Include="PrecomputedPermutationsGenerator.cs" />
<Compile Include="PermutationsGenerator.cs" /> <Compile Include="PermutationsGenerator.cs" />
<Compile Include="StringsProcessor.cs" /> <Compile Include="StringsProcessor.cs" />

Loading…
Cancel
Save