diff --git a/WhiteRabbit/VectorsConverter.cs b/WhiteRabbit/VectorsConverter.cs index 491c4ae..6421f5c 100644 --- a/WhiteRabbit/VectorsConverter.cs +++ b/WhiteRabbit/VectorsConverter.cs @@ -1,5 +1,6 @@ namespace WhiteRabbit { + using System; using System.Collections.Generic; using System.Linq; using System.Numerics; @@ -14,6 +15,12 @@ { var rawNumberOfOccurrences = sourceString.GroupBy(ch => ch).ToDictionary(group => group.Key, group => group.Count()); this.IntToChar = rawNumberOfOccurrences.OrderBy(kvp => kvp.Key).Select(kvp => kvp.Key).ToArray(); + + if (this.IntToChar.Length > Vector.Count) + { + throw new ArgumentException($"String should not contain more than {Vector.Count} different characters", nameof(sourceString)); + } + this.CharToInt = Enumerable.Range(0, this.IntToChar.Length).ToDictionary(i => this.IntToChar[i], i => i); } @@ -28,7 +35,7 @@ return null; } - var arr = new byte[16]; + var arr = new byte[Vector.Count]; foreach (var ch in word) { arr[this.CharToInt[ch]]++; diff --git a/WhiteRabbit/VectorsProcessor.cs b/WhiteRabbit/VectorsProcessor.cs index 4a7eaa2..1a47a9b 100644 --- a/WhiteRabbit/VectorsProcessor.cs +++ b/WhiteRabbit/VectorsProcessor.cs @@ -17,22 +17,36 @@ public VectorsProcessor(Vector target, int maxVectorsCount, IEnumerable> dictionary, Func, string> vectorToString) { +#if SUPPORT_LARGE_STRINGS + if (Enumerable.Range(0, Vector.Count).Any(i => target[i] > 8)) + { + throw new ArgumentException("Every value should be at most 8 (at most 8 same characters allowed in the source string)", nameof(target)); + } +#else + if (Enumerable.Range(0, Vector.Count).Any(i => target[i] > 4)) + { + throw new ArgumentException("Every value should be at most 4 (at most 4 same characters allowed in the source string)", nameof(target)); + } +#endif + this.Target = target; + +#if !SUPPORT_LARGE_STRINGS + this.TargetComplement = new Vector(Enumerable.Range(0, Vector.Count).Select(i => (byte)(this.Target[i] == 0 ? 0 : (byte)(12 / this.Target[i]))).ToArray()); +#endif + + this.TargetNorm = Vector.Dot(target, Vector.One); this.MaxVectorsCount = maxVectorsCount; this.VectorToString = vectorToString; - this.Dictionary = ImmutableStack.Create>(FilterVectors(dictionary, target).ToArray()); + this.Dictionary = ImmutableStack.Create(FilterVectors(dictionary, target, this.TargetComplement).ToArray()); } - /// - /// Negative sign bit. - /// (byte)b & (byte)128 equals zero for non-negative (0..127) bytes and equals (byte)128 for negative (128..255) bytes. - /// Similarly, vector & Negative equals zero if all bytes are non-negative, and does not equal zero if some bytes are negative. - /// Use (vector & Negative) == Vector<byte>.Zero to determine if all components are non-negative. - /// - private static Vector Negative { get; } = new Vector(Enumerable.Repeat((byte)128, 16).ToArray()); - private Vector Target { get; } + private Vector TargetComplement { get; } + + private byte TargetNorm { get; } + private int MaxVectorsCount { get; } private ImmutableStack> Dictionary { get; } @@ -57,22 +71,26 @@ // And total number of quintuplets becomes reasonable 1412M. // Also, it produces the intended results faster (as these are more likely to contain longer words - e.g. "poultry outwits ants" is more likely than "p o u l t r y o u t w i t s a n t s"). // This method basically gives us the 1-norm of the vector in the space rescaled so that the target is [1, 1, ..., 1]. - private static int GetVectorWeight(Vector vector, Vector target) + private static int GetVectorWeight(Vector vector, Vector target, Vector targetComplement) { +#if SUPPORT_LARGE_STRINGS var weight = 0; for (var i = 0; target[i] != 0; i++) { - weight += (720 * vector[i]) / target[i]; // 720 = 6!, so that the result will be a whole number (unless Target[i] > 6) + weight += (840 * vector[i]) / target[i]; // 840 = LCM(1, 2, .., 8), so that the result will be a whole number (unless Target[i] > 8) } return weight; +#else + return Vector.Dot(vector, targetComplement); +#endif } - private static IEnumerable> FilterVectors(IEnumerable> vectors, Vector target) + private static IEnumerable> FilterVectors(IEnumerable> vectors, Vector target, Vector targetComplement) { return vectors - .Where(vector => ((target - vector) & Negative) == Vector.Zero) - .OrderBy(vector => GetVectorWeight(vector, target)); + .Where(vector => Vector.GreaterThanOrEqualAll(target, vector)) + .OrderBy(vector => GetVectorWeight(vector, target, targetComplement)); } [Conditional("DEBUG")] @@ -91,9 +109,17 @@ // That way, the complexity of search goes down by a factor of MaxVectorsCount! (as if [x, y] does not add up to a required target, there is no point in checking [y, x]) private IEnumerable[]> GenerateUnorderedSequences(Vector remainder, ImmutableStack> partialSumStack, ImmutableStack> dictionaryStack) { - var count = partialSumStack.Count() + 1; - if (count < this.MaxVectorsCount) + var allowedRemainingWords = this.MaxVectorsCount - partialSumStack.Count(); + if (allowedRemainingWords > 1) { + +#if !SUPPORT_LARGE_STRINGS + // e.g. if remainder norm is 7, 8 or 9, and allowedRemainingWords is 3, + // we need the largest remaining word to have a norm of at least 3 + var remainderNorm = Vector.Dot(remainder, this.TargetComplement); + var requiredRemainder = (remainderNorm + allowedRemainingWords - 1) / allowedRemainingWords; +#endif + var dictionaryTail = dictionaryStack; while (!dictionaryTail.IsEmpty) { @@ -102,13 +128,19 @@ this.DebugState(partialSumStack, currentVector); - var newRemainder = remainder - currentVector; - if (newRemainder == Vector.Zero) + if (currentVector == remainder) { yield return partialSumStack.Push(currentVector).Reverse().ToArray(); } - else if ((newRemainder & Negative) == Vector.Zero) +#if !SUPPORT_LARGE_STRINGS + else if (Vector.Dot(currentVector, this.TargetComplement) < requiredRemainder) + { + break; + } +#endif + else if (Vector.LessThanOrEqualAll(currentVector, remainder)) { + var newRemainder = remainder - currentVector; foreach (var result in this.GenerateUnorderedSequences(newRemainder, partialSumStack.Push(currentVector), dictionaryTail)) { yield return result; @@ -118,7 +150,7 @@ dictionaryTail = nextDictionaryTail; } } - else if (count == this.MaxVectorsCount) + else { var dictionaryTail = dictionaryStack; while (!dictionaryTail.IsEmpty) diff --git a/WhiteRabbit/WhiteRabbit.csproj b/WhiteRabbit/WhiteRabbit.csproj index 483df1a..0d1a9cb 100644 --- a/WhiteRabbit/WhiteRabbit.csproj +++ b/WhiteRabbit/WhiteRabbit.csproj @@ -19,7 +19,7 @@ full false bin\Debug\ - DEBUG;TRACE + TRACE;DEBUG prompt 4 bin\Debug\WhiteRabbit.XML