Refactored to use phrasesets

unmanaged
Inga 🏳‍🌈 7 years ago
parent 15e2687f31
commit fba2d3e10e
  1. 7
      dotnet/WhiteRabbit/Constants.cs
  2. 23
      dotnet/WhiteRabbit/MD5Digest.cs
  3. 52
      dotnet/WhiteRabbit/Phrase.cs
  4. 60
      dotnet/WhiteRabbit/PhraseSet.cs
  5. 30
      dotnet/WhiteRabbit/PrecomputedPermutationsGenerator.cs
  6. 49
      dotnet/WhiteRabbit/Program.cs
  7. 10
      dotnet/WhiteRabbit/StringsProcessor.cs
  8. 3
      dotnet/WhiteRabbit/WhiteRabbit.csproj

@ -0,0 +1,7 @@
namespace WhiteRabbit
{
internal class Constants
{
public const int PhrasesPerSet = 1;
}
}

@ -14,16 +14,21 @@ namespace WhiteRabbit
internal static class MD5Digest
{
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static unsafe Vector<uint> Compute(Phrase input)
public static unsafe Vector<uint>[] Compute(PhraseSet input)
{
var result = stackalloc uint[4];
MD5Unmanaged.ComputeMD5(input.Buffer, result);
return new Vector<uint>(new[] {
result[0],
result[1],
result[2],
result[3],
});
var rawResult = new uint[4 * Constants.PhrasesPerSet];
fixed (uint* resultPointer = rawResult)
{
MD5Unmanaged.ComputeMD5(input.Buffer, resultPointer);
}
var result = new Vector<uint>[Constants.PhrasesPerSet];
for (var i = 0; i < Constants.PhrasesPerSet; i++)
{
result[i] = new Vector<uint>(rawResult, 4 * i);
}
return result;
}
}
}

@ -1,52 +0,0 @@
namespace WhiteRabbit
{
// Anagram representation optimized for MD5
internal unsafe struct Phrase
{
public fixed uint Buffer[8];
public Phrase(byte[][] words, PermutationsGenerator.Permutation permutation, int numberOfCharacters)
{
fixed (uint* bufferPointer = this.Buffer)
{
var length = numberOfCharacters + words.Length - 1;
byte[] currentWord = words[permutation[0]];
var j = 0;
var wordIndex = 0;
var currentPointer = (byte*)bufferPointer;
byte* lastPointer = currentPointer + length;
for (; currentPointer < lastPointer; currentPointer++)
{
if (j >= currentWord.Length)
{
j = 0;
wordIndex++;
currentWord = words[permutation[wordIndex]];
}
*currentPointer = currentWord[j];
j++;
}
*currentPointer = 128;
bufferPointer[7] = (uint)(length << 3);
}
}
public byte[] GetBytes()
{
fixed(uint* bufferPointer = this.Buffer)
{
var length = bufferPointer[7] >> 3;
var result = new byte[length];
for (var i = 0; i < length; i++)
{
result[i] = ((byte*)bufferPointer)[i];
}
return result;
}
}
}
}

@ -0,0 +1,60 @@
namespace WhiteRabbit
{
// Anagram representation optimized for MD5
internal unsafe struct PhraseSet
{
public fixed uint Buffer[8 * Constants.PhrasesPerSet];
public PhraseSet(byte[][] words, PermutationsGenerator.Permutation[] permutations, int offset, int numberOfCharacters)
{
fixed (uint* bufferPointer = this.Buffer)
{
var length = numberOfCharacters + words.Length - 1;
for (var i = 0; i < Constants.PhrasesPerSet; i++)
{
var permutation = permutations[offset + i];
var startPointer = bufferPointer + i * 8;
byte[] currentWord = words[permutations[offset][0]];
var j = 0;
var wordIndex = 0;
var currentPointer = (byte*)startPointer;
byte* lastPointer = currentPointer + length;
for (; currentPointer < lastPointer; currentPointer++)
{
if (j >= currentWord.Length)
{
j = 0;
wordIndex++;
currentWord = words[permutation[wordIndex]];
}
*currentPointer = currentWord[j];
j++;
}
*currentPointer = 128;
startPointer[7] = (uint)(length << 3);
}
}
}
public byte[] GetBytes(int number)
{
System.Diagnostics.Debug.Assert(number < Constants.PhrasesPerSet);
fixed(uint* bufferPointer = this.Buffer)
{
var phrasePointer = bufferPointer + 8 * number;
var length = phrasePointer[7] >> 3;
var result = new byte[length];
for (var i = 0; i < length; i++)
{
result[i] = ((byte*)phrasePointer)[i];
}
return result;
}
}
}
}

@ -7,24 +7,30 @@
{
private static PermutationsGenerator.Permutation[][] Permutations { get; } = new[]
{
PermutationsGenerator.HamiltonianPermutations(0).ToArray(),
PermutationsGenerator.HamiltonianPermutations(1).ToArray(),
PermutationsGenerator.HamiltonianPermutations(2).ToArray(),
PermutationsGenerator.HamiltonianPermutations(3).ToArray(),
PermutationsGenerator.HamiltonianPermutations(4).ToArray(),
PermutationsGenerator.HamiltonianPermutations(5).ToArray(),
PermutationsGenerator.HamiltonianPermutations(6).ToArray(),
PermutationsGenerator.HamiltonianPermutations(7).ToArray(),
GeneratePermutations(0),
GeneratePermutations(1),
GeneratePermutations(2),
GeneratePermutations(3),
GeneratePermutations(4),
GeneratePermutations(5),
GeneratePermutations(6),
GeneratePermutations(7),
};
public static IEnumerable<PermutationsGenerator.Permutation> HamiltonianPermutations(int n)
public static PermutationsGenerator.Permutation[] HamiltonianPermutations(int n)
{
if (n > 9)
return Permutations[n];
}
private static PermutationsGenerator.Permutation[] GeneratePermutations(int n)
{
var result = PermutationsGenerator.HamiltonianPermutations(n).ToArray();
if (result.Length % Constants.PhrasesPerSet == 0)
{
return PermutationsGenerator.HamiltonianPermutations(n);
return result;
}
return Permutations[n];
return result.Concat(Enumerable.Repeat(result[0], Constants.PhrasesPerSet - (result.Length % Constants.PhrasesPerSet))).ToArray();
}
}
}

@ -49,10 +49,6 @@
.Select(hash => new Vector<uint>(HexadecimalStringToUnsignedIntArray(hash)))
.ToArray();
#if DEBUG
var anagramsBag = new ConcurrentBag<string>();
#endif
var processor = new StringsProcessor(
Encoding.ASCII.GetBytes(sourcePhrase),
maxWordsInPhrase,
@ -70,41 +66,24 @@
processor.GeneratePhrases()
.ForAll(phraseBytes =>
{
Debug.Assert(
sourceChars == ToOrderedChars(ToString(phraseBytes)),
$"StringsProcessor produced incorrect anagram: {ToString(phraseBytes)}");
var hashVector = MD5Digest.Compute(phraseBytes);
if (Array.IndexOf(expectedHashesAsVectors, hashVector) >= 0)
var hashVectors = MD5Digest.Compute(phraseBytes);
for (var i = 0; i < Constants.PhrasesPerSet; i++)
{
var phrase = ToString(phraseBytes);
var hash = VectorToHexadecimalString(hashVector);
Console.WriteLine($"Found phrase for {hash}: {phrase}; time from start is {stopwatch.Elapsed}");
Debug.Assert(
sourceChars == ToOrderedChars(ToString(phraseBytes, i)),
$"StringsProcessor produced incorrect anagram: {ToString(phraseBytes, i)}");
if (Array.IndexOf(expectedHashesAsVectors, hashVectors[i]) >= 0)
{
var phrase = ToString(phraseBytes, i);
var hash = VectorToHexadecimalString(hashVectors[i]);
Console.WriteLine($"Found phrase for {hash}: {phrase}; time from start is {stopwatch.Elapsed}");
}
}
#if DEBUG
anagramsBag.Add(ToString(phraseBytes));
#endif
});
Console.WriteLine($"Done; time from start: {stopwatch.Elapsed}");
#if DEBUG
var anagramsArray = anagramsBag.ToArray();
var anagramsSet = new HashSet<string>(anagramsArray);
Array.Sort(anagramsArray);
Console.WriteLine("All anagrams:");
for (var i = 0; i < anagramsArray.Length; i++)
{
Console.WriteLine(anagramsArray[i]);
}
// Duplicate anagrams are expected, as e.g. "norway spoils tut tut" will be taken twice:
// as "norway1 spoils2 tut3 tut4" and "norway1 spoils2 tut4 tut3"
// (in addition to e.g. "norway1 tut3 spoils2 tut4")
Console.WriteLine($"Total anagrams count: {anagramsArray.Length}; unique anagrams: {anagramsSet.Count}; time from start: {stopwatch.Elapsed}");
#endif
}
// Code taken from http://stackoverflow.com/a/321404/831314
@ -131,9 +110,9 @@
return hex.Substring(6, 2) + hex.Substring(4, 2) + hex.Substring(2, 2) + hex.Substring(0, 2);
}
private static string ToString(Phrase phrase)
private static string ToString(PhraseSet phrase, int offset)
{
return Encoding.ASCII.GetString(phrase.GetBytes());
return Encoding.ASCII.GetString(phrase.GetBytes(offset));
}
private static IEnumerable<byte[]> ReadInput()

@ -52,7 +52,7 @@
#if SINGLE_THREADED
public IEnumerable<byte[]> GeneratePhrases()
#else
public ParallelQuery<Phrase> GeneratePhrases()
public ParallelQuery<PhraseSet> GeneratePhrases()
#endif
{
// task of finding anagrams could be reduced to the task of finding sequences of dictionary vectors with the target sum
@ -85,11 +85,13 @@
return words;
}
private IEnumerable<Phrase> ConvertWordsToPhrases(byte[][] words)
private IEnumerable<PhraseSet> ConvertWordsToPhrases(byte[][] words)
{
foreach (var permutation in PrecomputedPermutationsGenerator.HamiltonianPermutations(words.Length))
var permutations = PrecomputedPermutationsGenerator.HamiltonianPermutations(words.Length);
var permutationsLength = permutations.Length;
for (var i = 0; i < permutationsLength; i++)
{
yield return new Phrase(words, permutation, this.NumberOfCharacters);
yield return new PhraseSet(words, permutations, i, this.NumberOfCharacters);
}
}
}

@ -58,9 +58,10 @@
</ItemGroup>
<ItemGroup>
<Compile Include="ByteArrayEqualityComparer.cs" />
<Compile Include="Constants.cs" />
<Compile Include="Flattener.cs" />
<Compile Include="MD5Digest.cs" />
<Compile Include="Phrase.cs" />
<Compile Include="PhraseSet.cs" />
<Compile Include="PrecomputedPermutationsGenerator.cs" />
<Compile Include="PermutationsGenerator.cs" />
<Compile Include="StringsProcessor.cs" />

Loading…
Cancel
Save