namespace WhiteRabbit { using System.Collections.Generic; using System.Collections.Immutable; using System.Linq; using System.Numerics; using System.Threading.Tasks.Dataflow; internal class StringsProcessor { public StringsProcessor(string sourceString, int maxWordsCount, IEnumerable words) { var filteredSource = new string(sourceString.Where(ch => ch != ' ').ToArray()); this.VectorsConverter = new VectorsConverter(filteredSource); // Dictionary of vectors to array of words represented by this vector this.VectorsToWords = words .Distinct() .Select(word => new { word, vector = this.VectorsConverter.GetVector(word) }) .Where(tuple => tuple.vector != null) .Select(tuple => new { tuple.word, vector = tuple.vector.Value }) .GroupBy(tuple => tuple.vector) .ToDictionary(group => group.Key, group => group.Select(tuple => tuple.word).ToArray()); this.VectorsProcessor = new VectorsProcessor( this.VectorsConverter.GetVector(filteredSource).Value, maxWordsCount, this.VectorsToWords.Keys, this.VectorsConverter.GetString); } private VectorsProcessor VectorsProcessor { get; } private VectorsConverter VectorsConverter { get; } private Dictionary, string[]> VectorsToWords { get; } public void PostUnorderedSequences(ITargetBlock[]> target) { this.VectorsProcessor.GenerateUnorderedSequences().WriteToTargetBlock(target); } public IPropagatorBlock[], string> CreateUnorderedSequencesToPhrasesTransform() { return DataflowBlockHelpers.Id[]>() .PipeMany(this.VectorsProcessor.UnorderedSequenceToOrderedSequences) .Pipe(this.OrderedSequenceToWordVariants) .PipeMany(this.WordVariantsToFlatWords) .Pipe(this.FlatWordsToPhrase); } // Converts e.g. pair of variants [[a, b, c], [d, e]] into all possible pairs: [[a, d], [a, e], [b, d], [b, e], [c, d], [c, e]] private IEnumerable> Flatten(ImmutableStack phrase) { if (phrase.IsEmpty) { return new[] { ImmutableStack.Create() }; } T[] wordVariants; var newStack = phrase.Pop(out wordVariants); return this.Flatten(newStack).SelectMany(remainder => wordVariants.Select(word => remainder.Push(word))); } private ImmutableStack OrderedSequenceToWordVariants(Vector[] sum) { return ImmutableStack.CreateRange(sum.Select(vector => this.VectorsToWords[vector])); } private IEnumerable> WordVariantsToFlatWords(ImmutableStack wordVariants) { return this.Flatten(wordVariants); } private string FlatWordsToPhrase(ImmutableStack words) { return string.Join(" ", words); } } }