New optimization: there is no point in checking too small vectors

feature-optimized-md5
Inga 🏳‍🌈 7 years ago
parent 8210dd27b3
commit 5d2cd465d4
  1. 9
      WhiteRabbit/VectorsConverter.cs
  2. 72
      WhiteRabbit/VectorsProcessor.cs
  3. 2
      WhiteRabbit/WhiteRabbit.csproj

@ -1,5 +1,6 @@
namespace WhiteRabbit
{
using System;
using System.Collections.Generic;
using System.Linq;
using System.Numerics;
@ -14,6 +15,12 @@
{
var rawNumberOfOccurrences = sourceString.GroupBy(ch => ch).ToDictionary(group => group.Key, group => group.Count());
this.IntToChar = rawNumberOfOccurrences.OrderBy(kvp => kvp.Key).Select(kvp => kvp.Key).ToArray();
if (this.IntToChar.Length > Vector<byte>.Count)
{
throw new ArgumentException($"String should not contain more than {Vector<byte>.Count} different characters", nameof(sourceString));
}
this.CharToInt = Enumerable.Range(0, this.IntToChar.Length).ToDictionary(i => this.IntToChar[i], i => i);
}
@ -28,7 +35,7 @@
return null;
}
var arr = new byte[16];
var arr = new byte[Vector<byte>.Count];
foreach (var ch in word)
{
arr[this.CharToInt[ch]]++;

@ -17,22 +17,36 @@
public VectorsProcessor(Vector<byte> target, int maxVectorsCount, IEnumerable<Vector<byte>> dictionary, Func<Vector<byte>, string> vectorToString)
{
#if SUPPORT_LARGE_STRINGS
if (Enumerable.Range(0, Vector<byte>.Count).Any(i => target[i] > 8))
{
throw new ArgumentException("Every value should be at most 8 (at most 8 same characters allowed in the source string)", nameof(target));
}
#else
if (Enumerable.Range(0, Vector<byte>.Count).Any(i => target[i] > 4))
{
throw new ArgumentException("Every value should be at most 4 (at most 4 same characters allowed in the source string)", nameof(target));
}
#endif
this.Target = target;
#if !SUPPORT_LARGE_STRINGS
this.TargetComplement = new Vector<byte>(Enumerable.Range(0, Vector<byte>.Count).Select(i => (byte)(this.Target[i] == 0 ? 0 : (byte)(12 / this.Target[i]))).ToArray());
#endif
this.TargetNorm = Vector.Dot(target, Vector<byte>.One);
this.MaxVectorsCount = maxVectorsCount;
this.VectorToString = vectorToString;
this.Dictionary = ImmutableStack.Create<Vector<byte>>(FilterVectors(dictionary, target).ToArray());
this.Dictionary = ImmutableStack.Create(FilterVectors(dictionary, target, this.TargetComplement).ToArray());
}
/// <summary>
/// Negative sign bit.
/// (byte)b &amp; (byte)128 equals zero for non-negative (0..127) bytes and equals (byte)128 for negative (128..255) bytes.
/// Similarly, vector &amp; Negative equals zero if all bytes are non-negative, and does not equal zero if some bytes are negative.
/// Use <code>(vector &amp; Negative) == Vector&lt;byte&gt;.Zero</code> to determine if all components are non-negative.
/// </summary>
private static Vector<byte> Negative { get; } = new Vector<byte>(Enumerable.Repeat((byte)128, 16).ToArray());
private Vector<byte> Target { get; }
private Vector<byte> TargetComplement { get; }
private byte TargetNorm { get; }
private int MaxVectorsCount { get; }
private ImmutableStack<Vector<byte>> Dictionary { get; }
@ -57,22 +71,26 @@
// And total number of quintuplets becomes reasonable 1412M.
// Also, it produces the intended results faster (as these are more likely to contain longer words - e.g. "poultry outwits ants" is more likely than "p o u l t r y o u t w i t s a n t s").
// This method basically gives us the 1-norm of the vector in the space rescaled so that the target is [1, 1, ..., 1].
private static int GetVectorWeight(Vector<byte> vector, Vector<byte> target)
private static int GetVectorWeight(Vector<byte> vector, Vector<byte> target, Vector<byte> targetComplement)
{
#if SUPPORT_LARGE_STRINGS
var weight = 0;
for (var i = 0; target[i] != 0; i++)
{
weight += (720 * vector[i]) / target[i]; // 720 = 6!, so that the result will be a whole number (unless Target[i] > 6)
weight += (840 * vector[i]) / target[i]; // 840 = LCM(1, 2, .., 8), so that the result will be a whole number (unless Target[i] > 8)
}
return weight;
#else
return Vector.Dot(vector, targetComplement);
#endif
}
private static IEnumerable<Vector<byte>> FilterVectors(IEnumerable<Vector<byte>> vectors, Vector<byte> target)
private static IEnumerable<Vector<byte>> FilterVectors(IEnumerable<Vector<byte>> vectors, Vector<byte> target, Vector<byte> targetComplement)
{
return vectors
.Where(vector => ((target - vector) & Negative) == Vector<byte>.Zero)
.OrderBy(vector => GetVectorWeight(vector, target));
.Where(vector => Vector.GreaterThanOrEqualAll(target, vector))
.OrderBy(vector => GetVectorWeight(vector, target, targetComplement));
}
[Conditional("DEBUG")]
@ -91,9 +109,17 @@
// That way, the complexity of search goes down by a factor of MaxVectorsCount! (as if [x, y] does not add up to a required target, there is no point in checking [y, x])
private IEnumerable<Vector<byte>[]> GenerateUnorderedSequences(Vector<byte> remainder, ImmutableStack<Vector<byte>> partialSumStack, ImmutableStack<Vector<byte>> dictionaryStack)
{
var count = partialSumStack.Count() + 1;
if (count < this.MaxVectorsCount)
var allowedRemainingWords = this.MaxVectorsCount - partialSumStack.Count();
if (allowedRemainingWords > 1)
{
#if !SUPPORT_LARGE_STRINGS
// e.g. if remainder norm is 7, 8 or 9, and allowedRemainingWords is 3,
// we need the largest remaining word to have a norm of at least 3
var remainderNorm = Vector.Dot(remainder, this.TargetComplement);
var requiredRemainder = (remainderNorm + allowedRemainingWords - 1) / allowedRemainingWords;
#endif
var dictionaryTail = dictionaryStack;
while (!dictionaryTail.IsEmpty)
{
@ -102,13 +128,19 @@
this.DebugState(partialSumStack, currentVector);
var newRemainder = remainder - currentVector;
if (newRemainder == Vector<byte>.Zero)
if (currentVector == remainder)
{
yield return partialSumStack.Push(currentVector).Reverse().ToArray();
}
else if ((newRemainder & Negative) == Vector<byte>.Zero)
#if !SUPPORT_LARGE_STRINGS
else if (Vector.Dot(currentVector, this.TargetComplement) < requiredRemainder)
{
break;
}
#endif
else if (Vector.LessThanOrEqualAll(currentVector, remainder))
{
var newRemainder = remainder - currentVector;
foreach (var result in this.GenerateUnorderedSequences(newRemainder, partialSumStack.Push(currentVector), dictionaryTail))
{
yield return result;
@ -118,7 +150,7 @@
dictionaryTail = nextDictionaryTail;
}
}
else if (count == this.MaxVectorsCount)
else
{
var dictionaryTail = dictionaryStack;
while (!dictionaryTail.IsEmpty)

@ -19,7 +19,7 @@
<DebugType>full</DebugType>
<Optimize>false</Optimize>
<OutputPath>bin\Debug\</OutputPath>
<DefineConstants>DEBUG;TRACE</DefineConstants>
<DefineConstants>TRACE;DEBUG</DefineConstants>
<ErrorReport>prompt</ErrorReport>
<WarningLevel>4</WarningLevel>
<DocumentationFile>bin\Debug\WhiteRabbit.XML</DocumentationFile>

Loading…
Cancel
Save