namespace WhiteRabbit { using System; using System.Collections.Generic; using System.Linq; using System.Numerics; /// /// Converts strings to vectors containing chars count, based on a source string. /// E.g. for source string "abc", string "a" is converted to [1, 0, 0], while string "bcb" is converted to [0, 2, 1]. /// internal sealed class VectorsConverter { public VectorsConverter(byte[] sourceString) { var rawNumberOfOccurrences = sourceString.GroupBy(ch => ch).ToDictionary(group => group.Key, group => group.Count()); this.IntToChar = rawNumberOfOccurrences.OrderBy(kvp => kvp.Key).Select(kvp => kvp.Key).ToArray(); if (this.IntToChar.Length > Vector.Count) { throw new ArgumentException($"String should not contain more than {Vector.Count} different characters", nameof(sourceString)); } this.CharToInt = Enumerable.Range(0, this.IntToChar.Length).ToDictionary(i => this.IntToChar[i], i => i); } private Dictionary CharToInt { get; } private byte[] IntToChar { get; } public Vector? GetVector(byte[] word) { if (word.Any(ch => !this.CharToInt.ContainsKey(ch))) { return null; } var arr = new byte[Vector.Count]; foreach (var ch in word) { arr[this.CharToInt[ch]]++; } return new Vector(arr); } } }