namespace WhiteRabbit { using System; using System.Collections.Generic; using System.Collections.Immutable; using System.Diagnostics; using System.Linq; using System.Numerics; internal class VectorsProcessor { // Ensure that permutations are precomputed prior to main run, so that processing times will be correct static VectorsProcessor() { PrecomputedPermutationsGenerator.HamiltonianPermutations(0); } public VectorsProcessor(Vector target, int maxVectorsCount, IEnumerable> dictionary, Func, string> vectorToString) { #if SUPPORT_LARGE_STRINGS if (Enumerable.Range(0, Vector.Count).Any(i => target[i] > 8)) { throw new ArgumentException("Every value should be at most 8 (at most 8 same characters allowed in the source string)", nameof(target)); } #else if (Enumerable.Range(0, Vector.Count).Any(i => target[i] > 4)) { throw new ArgumentException("Every value should be at most 4 (at most 4 same characters allowed in the source string)", nameof(target)); } #endif this.Target = target; #if !SUPPORT_LARGE_STRINGS this.TargetComplement = new Vector(Enumerable.Range(0, Vector.Count).Select(i => (byte)(this.Target[i] == 0 ? 0 : (byte)(12 / this.Target[i]))).ToArray()); #endif this.TargetNorm = Vector.Dot(target, Vector.One); this.MaxVectorsCount = maxVectorsCount; this.VectorToString = vectorToString; this.Dictionary = ImmutableArray.Create(FilterVectors(dictionary, target, this.TargetComplement).ToArray()); } private Vector Target { get; } private Vector TargetComplement { get; } private byte TargetNorm { get; } private int MaxVectorsCount { get; } private ImmutableArray Dictionary { get; } private Func, string> VectorToString { get; } private long Iterations { get; set; } = 0; // Produces all sequences of vectors with the target sum public ParallelQuery[]> GenerateSequences() { return this.GenerateUnorderedSequences(this.Target, this.MaxVectorsCount, 0) .AsParallel() .Select(Enumerable.ToArray) .SelectMany(this.GeneratePermutations); } // We want words with more letters (and among these, words with more "rare" letters) to appear first, to reduce the searching time somewhat. // Applying such a sort, we reduce the total number of triplets to check for anagrams from ~62M to ~29M. // Total number of quadruplets is reduced from 1468M to mere 311M. // And total number of quintuplets becomes reasonable 1412M. // Also, it produces the intended results faster (as these are more likely to contain longer words - e.g. "poultry outwits ants" is more likely than "p o u l t r y o u t w i t s a n t s"). // This method basically gives us the 1-norm of the vector in the space rescaled so that the target is [1, 1, ..., 1]. #if SUPPORT_LARGE_STRINGS private static int GetVectorWeight(Vector vector, Vector target) { var weight = 0; for (var i = 0; target[i] != 0; i++) { weight += (840 * vector[i]) / target[i]; // 840 = LCM(1, 2, .., 8), so that the result will be a whole number (unless Target[i] > 8) } return weight; } #else private static byte GetVectorWeight(Vector vector, Vector targetComplement) { return Vector.Dot(vector, targetComplement); } #endif private static VectorInfo[] FilterVectors(IEnumerable> vectors, Vector target, Vector targetComplement) { return vectors .Where(vector => Vector.GreaterThanOrEqualAll(target, vector)) #if SUPPORT_LARGE_STRINGS .Select(vector => new { vector = vector, weight = GetVectorWeight(vector, target) }) #else .Select(vector => new { vector = vector, weight = GetVectorWeight(vector, targetComplement) }) #endif .OrderByDescending(tuple => tuple.weight) #if SUPPORT_LARGE_STRINGS .Select(tuple => new VectorInfo(tuple.vector, 0))) #else .Select(tuple => new VectorInfo(tuple.vector, tuple.weight)) #endif .ToArray(); } [Conditional("DEBUG")] private void DebugState(int allowedRemainingWords, Vector currentVector) { this.Iterations++; if (this.Iterations % 1000000 == 0) { Console.WriteLine($"Iteration #{this.Iterations}: {allowedRemainingWords}, {this.VectorToString(currentVector)}"); } } // This method takes most of the time, so everything related to it must be optimized. // In every sequence, next vector always goes after the previous one from dictionary. // E.g. if dictionary is [x, y, z], then only [x, y] sequence could be generated, and [y, x] will never be generated. // That way, the complexity of search goes down by a factor of MaxVectorsCount! (as if [x, y] does not add up to a required target, there is no point in checking [y, x]) private IEnumerable>> GenerateUnorderedSequences(Vector remainder, int allowedRemainingWords, int currentDictionaryPosition) { #if !SUPPORT_LARGE_STRINGS var remainderNorm = Vector.Dot(remainder, this.TargetComplement); #endif if (allowedRemainingWords > 1) { var newAllowedRemainingWords = allowedRemainingWords - 1; #if !SUPPORT_LARGE_STRINGS // e.g. if remainder norm is 7, 8 or 9, and allowedRemainingWords is 3, // we need the largest remaining word to have a norm of at least 3 var requiredRemainder = (remainderNorm + allowedRemainingWords - 1) / allowedRemainingWords; #endif for (var i = FindFirstWithNormLessOrEqual(remainderNorm, currentDictionaryPosition); i < this.Dictionary.Length; i++) { Vector currentVector = this.Dictionary[i].Vector; this.DebugState(allowedRemainingWords, currentVector); if (currentVector == remainder) { yield return ImmutableStack.Create(currentVector); } #if !SUPPORT_LARGE_STRINGS else if (this.Dictionary[i].Norm < requiredRemainder) { break; } #endif else if (Vector.LessThanOrEqualAll(currentVector, remainder)) { var newRemainder = remainder - currentVector; foreach (var result in this.GenerateUnorderedSequences(newRemainder, newAllowedRemainingWords, i)) { yield return result.Push(currentVector); } } } } else { for (var i = FindFirstWithNormLessOrEqual(remainderNorm, currentDictionaryPosition); i < this.Dictionary.Length; i++) { Vector currentVector = this.Dictionary[i].Vector; this.DebugState(allowedRemainingWords, currentVector); if (currentVector == remainder) { yield return ImmutableStack.Create(currentVector); } #if !SUPPORT_LARGE_STRINGS else if (this.Dictionary[i].Norm < remainderNorm) { break; } #endif } } } // BCL BinarySearch would find any vector with required norm, not the first one; or would find nothing if there is no such vector private int FindFirstWithNormLessOrEqual(byte expectedNorm, int offset) { var start = offset; var end = this.Dictionary.Length - 1; if (this.Dictionary[start].Norm <= expectedNorm) { return start; } if (this.Dictionary[end].Norm > expectedNorm) { return this.Dictionary.Length; } // Norm for start is always greater than expected norm, or start is the required position; norm for end is always less than or equal to expected norm // The loop always ends, because the difference always decreases; if start + 1 = end, then middle will be equal to start, and either end := middle = start or start := middle + 1 = end. while (start < end) { var middle = (start + end) / 2; var newNorm = this.Dictionary[middle].Norm; if (this.Dictionary[middle].Norm <= expectedNorm) { end = middle; } else { start = middle + 1; } } return start; } private IEnumerable GeneratePermutations(T[] original) { foreach (var permutation in PrecomputedPermutationsGenerator.HamiltonianPermutations(original.Length)) { yield return permutation.Select(i => original[i]).ToArray(); } } private struct VectorInfo { public VectorInfo(Vector vector, byte norm) { this.Vector = vector; this.Norm = norm; } public Vector Vector { get; } public byte Norm { get; } } } }