diff --git a/WhiteRabbit/PrecomputedPermutationsGenerator.cs b/WhiteRabbit/PrecomputedPermutationsGenerator.cs new file mode 100644 index 0000000..f26ce24 --- /dev/null +++ b/WhiteRabbit/PrecomputedPermutationsGenerator.cs @@ -0,0 +1,37 @@ +namespace WhiteRabbit +{ + using System.Collections.Generic; + using System.Linq; + + internal class PrecomputedPermutationsGenerator + { + private static PermutationsGenerator.Permutation[] Permutations1 { get; } = PermutationsGenerator.HamiltonianPermutations(1).ToArray(); + + private static PermutationsGenerator.Permutation[] Permutations2 { get; } = PermutationsGenerator.HamiltonianPermutations(2).ToArray(); + + private static PermutationsGenerator.Permutation[] Permutations3 { get; } = PermutationsGenerator.HamiltonianPermutations(3).ToArray(); + + private static PermutationsGenerator.Permutation[] Permutations4 { get; } = PermutationsGenerator.HamiltonianPermutations(4).ToArray(); + + private static PermutationsGenerator.Permutation[] Permutations5 { get; } = PermutationsGenerator.HamiltonianPermutations(5).ToArray(); + + public static IEnumerable HamiltonianPermutations(int n) + { + switch(n) + { + case 1: + return Permutations1; + case 2: + return Permutations2; + case 3: + return Permutations3; + case 4: + return Permutations4; + case 5: + return Permutations5; + default: + return PermutationsGenerator.HamiltonianPermutations(n); + } + } + } +} diff --git a/WhiteRabbit/Program.cs b/WhiteRabbit/Program.cs index fe6f2a4..a80e8db 100644 --- a/WhiteRabbit/Program.cs +++ b/WhiteRabbit/Program.cs @@ -3,6 +3,7 @@ using System; using System.Collections.Generic; using System.Linq; + using System.Numerics; using System.Security.Cryptography; using System.Text; @@ -16,20 +17,43 @@ /// public static void Main() { - var processor = new StringsProcessor("poultry outwits ants", 3); - foreach (var phrase in processor.GeneratePhrases(ReadInput())) + var processor = new StringsProcessor("poultry outwits ants", 4); + var expectedHashes = new[] { - var hash = GetHash(phrase); - Console.WriteLine(hash + ": " + phrase); + "e4820b45d2277f3844eac66c903e84be", + "23170acc097c24edb98fc5488ab033fe", + "665e5bcb0c20062fe8abaaf4628bb154", + }; + + var expectedHashesAsVectors = new HashSet>(expectedHashes.Select(hash => new Vector(StringToByteArray(hash)))); + + foreach (var result in AddHashes(processor.GeneratePhrases(ReadInput()))) + { + if (expectedHashesAsVectors.Contains(result.Item2)) + { + Console.WriteLine("Found phrase: " + result.Item1); + } } } - private static string GetHash(string input) + // Code taken from http://stackoverflow.com/a/321404/831314 + private static byte[] StringToByteArray(string hex) + { + return Enumerable.Range(0, hex.Length) + .Where(x => x % 2 == 0) + .Select(x => Convert.ToByte(hex.Substring(x, 2), 16)) + .ToArray(); + } + + private static IEnumerable>> AddHashes(IEnumerable input) { using (MD5 hasher = MD5.Create()) { - var data = hasher.ComputeHash(Encoding.UTF8.GetBytes(input)); - return string.Concat(data.Select(b => b.ToString("x2"))); + foreach (var line in input) + { + var data = hasher.ComputeHash(Encoding.ASCII.GetBytes(line)); + yield return Tuple.Create(line, new Vector(data)); + } } } diff --git a/WhiteRabbit/StringsProcessor.cs b/WhiteRabbit/StringsProcessor.cs index d34b53a..a585410 100644 --- a/WhiteRabbit/StringsProcessor.cs +++ b/WhiteRabbit/StringsProcessor.cs @@ -22,6 +22,7 @@ public IEnumerable GeneratePhrases(IEnumerable words) { + // Dictionary of vectors to array of words represented by this vector var formattedWords = words .Distinct() .Select(word => new { word, vector = this.VectorsConverter.GetVector(word) }) @@ -30,9 +31,10 @@ .GroupBy(tuple => tuple.vector) .ToDictionary(group => group.Key, group => group.Select(tuple => tuple.word).ToArray()); + // task of finding anagrams could be reduced to the task of finding sequences of dictionary vectors with the target sum + var sums = this.VectorsProcessor.GenerateSequences(formattedWords.Keys); - var sums = this.VectorsProcessor.GenerateSums(formattedWords.Keys); - + // converting sequences of vectors to the sequences of words... var anagramsWords = sums .Select(sum => ImmutableStack.Create(sum.Select(vector => formattedWords[vector]).ToArray())) .SelectMany(Flatten) @@ -41,14 +43,15 @@ return anagramsWords.Select(list => string.Join(" ", list)); } - private IEnumerable> Flatten(ImmutableStack phrase) + // Converts e.g. pair of variants [[a, b, c], [d, e]] into all possible pairs: [[a, d], [a, e], [b, d], [b, e], [c, d], [c, e]] + private IEnumerable> Flatten(ImmutableStack phrase) { if (phrase.IsEmpty) { - return new[] { ImmutableStack.Create() }; + return new[] { ImmutableStack.Create() }; } - string[] wordVariants; + T[] wordVariants; var newStack = phrase.Pop(out wordVariants); return Flatten(newStack).SelectMany(remainder => wordVariants.Select(word => remainder.Push(word))); } diff --git a/WhiteRabbit/VectorsConverter.cs b/WhiteRabbit/VectorsConverter.cs index dbeea65..8c16d7f 100644 --- a/WhiteRabbit/VectorsConverter.cs +++ b/WhiteRabbit/VectorsConverter.cs @@ -4,6 +4,10 @@ using System.Linq; using System.Numerics; + /// + /// Converts strings to vectors containing chars count, based on a source string. + /// E.g. for source string "abc", string "a" is converted to [1, 0, 0], while string "bcb" is converted to [0, 2, 1]. + /// internal class VectorsConverter { public VectorsConverter(string sourceString) diff --git a/WhiteRabbit/VectorsProcessor.cs b/WhiteRabbit/VectorsProcessor.cs index 6d457d4..def5f2a 100644 --- a/WhiteRabbit/VectorsProcessor.cs +++ b/WhiteRabbit/VectorsProcessor.cs @@ -32,14 +32,16 @@ private long Iterations { get; set; } = 0; - public IEnumerable[]> GenerateSums(IEnumerable> vectors) + + // Produces all sequences of vectors with the target sum + public IEnumerable[]> GenerateSequences(IEnumerable> vectors) { var filteredVectors = FilterVectors(vectors); var dictionary = ImmutableStack.Create(filteredVectors.ToArray()); - var orderedSums = GenerateOrderedSums(this.Target, ImmutableStack.Create>(), dictionary); - var allSums = orderedSums.SelectMany(GeneratePermutations); + var unorderedSequences = GenerateUnorderedSequences(this.Target, ImmutableStack.Create>(), dictionary); + var allSequences = unorderedSequences.SelectMany(GeneratePermutations); - return allSums; + return allSequences; } private IEnumerable> FilterVectors(IEnumerable> vectors) @@ -58,8 +60,11 @@ } } - // This method takes most of the time, so everything related to it must be optimized - private IEnumerable[]> GenerateOrderedSums(Vector remainder, ImmutableStack> partialSumStack, ImmutableStack> dictionaryStack) + // This method takes most of the time, so everything related to it must be optimized. + // In every sequence, next vector always goes after the previous one from dictionary. + // E.g. if dictionary is [x, y, z], then only [x, y] sequence could be generated, and [y, x] will never be generated. + // That way, the complexity of search goes down by a factor of MaxVectorsCount! (as if [x, y] does not add up to a required target, there is no point in checking [y, x]) + private IEnumerable[]> GenerateUnorderedSequences(Vector remainder, ImmutableStack> partialSumStack, ImmutableStack> dictionaryStack) { var count = partialSumStack.Count() + 1; if (count < this.MaxVectorsCount) @@ -79,7 +84,7 @@ } else if ((newRemainder & Negative) == Vector.Zero) { - foreach (var result in GenerateOrderedSums(newRemainder, partialSumStack.Push(currentVector), dictionaryTail)) + foreach (var result in GenerateUnorderedSequences(newRemainder, partialSumStack.Push(currentVector), dictionaryTail)) { yield return result; } @@ -109,7 +114,7 @@ private IEnumerable GeneratePermutations(T[] original) { - foreach (var permutation in PermutationsGenerator.HamiltonianPermutations(original.Length)) + foreach (var permutation in PrecomputedPermutationsGenerator.HamiltonianPermutations(original.Length)) { yield return permutation.Select(i => original[i]).ToArray(); } diff --git a/WhiteRabbit/WhiteRabbit.csproj b/WhiteRabbit/WhiteRabbit.csproj index ad3c285..1af7ec9 100644 --- a/WhiteRabbit/WhiteRabbit.csproj +++ b/WhiteRabbit/WhiteRabbit.csproj @@ -53,6 +53,7 @@ +