namespace WhiteRabbit { using System.Collections.Generic; using System.Collections.Immutable; using System.Linq; using System.Numerics; using System.Threading.Tasks.Dataflow; internal class StringsProcessor { public StringsProcessor(string sourceString, int maxWordsCount, IEnumerable words) { var filteredSource = new string(sourceString.Where(ch => ch != ' ').ToArray()); this.VectorsConverter = new VectorsConverter(filteredSource); // Dictionary of vectors to array of words represented by this vector this.VectorsToWords = words .Distinct() .Select(word => new { word, vector = this.VectorsConverter.GetVector(word) }) .Where(tuple => tuple.vector != null) .Select(tuple => new { tuple.word, vector = tuple.vector.Value }) .GroupBy(tuple => tuple.vector) .ToDictionary(group => group.Key, group => group.Select(tuple => tuple.word).ToArray()); this.VectorsProcessor = new VectorsProcessor( this.VectorsConverter.GetVector(filteredSource).Value, maxWordsCount, this.VectorsToWords.Keys, this.VectorsConverter.GetString); } private VectorsProcessor VectorsProcessor { get; } private VectorsConverter VectorsConverter { get; } private Dictionary, string[]> VectorsToWords { get; } public void PostUnorderedSequences(ITargetBlock[]> target) => this.VectorsProcessor.PostUnorderedSequences(target); public IPropagatorBlock[], string> CreateUnorderedSequencesToPhrasesTransform() { var unorderedSequencesToOrderedSequences = this.VectorsProcessor.CreateUnorderedSequencesToOrderedSequencesTransform(); var orderedSequencesToWordVariants = this.CreateOrderedSequencesToWordVariantsTransform(); var wordVariantsToFlatWords = this.CreateWordVariantsToFlatWordsTransform(); var flatWordsToPhrases = this.CreateFlatWordsToPhrasesTransform(); unorderedSequencesToOrderedSequences.LinkForever(orderedSequencesToWordVariants); orderedSequencesToWordVariants.LinkForever(wordVariantsToFlatWords); wordVariantsToFlatWords.LinkForever(flatWordsToPhrases); return DataflowBlock.Encapsulate(unorderedSequencesToOrderedSequences, flatWordsToPhrases); } // Converts e.g. pair of variants [[a, b, c], [d, e]] into all possible pairs: [[a, d], [a, e], [b, d], [b, e], [c, d], [c, e]] private IEnumerable> Flatten(ImmutableStack phrase) { if (phrase.IsEmpty) { return new[] { ImmutableStack.Create() }; } T[] wordVariants; var newStack = phrase.Pop(out wordVariants); return this.Flatten(newStack).SelectMany(remainder => wordVariants.Select(word => remainder.Push(word))); } private IPropagatorBlock[], ImmutableStack> CreateOrderedSequencesToWordVariantsTransform() { return new TransformBlock[], ImmutableStack>(sum => { return ImmutableStack.CreateRange(sum.Select(vector => this.VectorsToWords[vector])); }); } private IPropagatorBlock, ImmutableStack> CreateWordVariantsToFlatWordsTransform() { return new TransformManyBlock, ImmutableStack>(wordVariants => { return this.Flatten(wordVariants); }); } private IPropagatorBlock, string> CreateFlatWordsToPhrasesTransform() { return new TransformBlock, string>(words => { return string.Join(" ", words); }); } } }