namespace WhiteRabbit
{
using System;
using System.Collections.Generic;
using System.Linq;
using System.Numerics;
///
/// Converts strings to vectors containing chars count, based on a source string.
/// E.g. for source string "abc", string "a" is converted to [1, 0, 0], while string "bcb" is converted to [0, 2, 1].
///
internal sealed class VectorsConverter
{
public VectorsConverter(byte[] sourceString)
{
var rawNumberOfOccurrences = sourceString.GroupBy(ch => ch).ToDictionary(group => group.Key, group => group.Count());
this.IntToChar = rawNumberOfOccurrences.OrderBy(kvp => kvp.Key).Select(kvp => kvp.Key).ToArray();
if (this.IntToChar.Length > Vector.Count)
{
throw new ArgumentException($"String should not contain more than {Vector.Count} different characters", nameof(sourceString));
}
this.CharToInt = Enumerable.Range(0, this.IntToChar.Length).ToDictionary(i => this.IntToChar[i], i => i);
}
private Dictionary CharToInt { get; }
private byte[] IntToChar { get; }
public Vector? GetVector(byte[] word)
{
if (word.Any(ch => !this.CharToInt.ContainsKey(ch)))
{
return null;
}
var arr = new byte[Vector.Count];
foreach (var ch in word)
{
arr[this.CharToInt[ch]]++;
}
return new Vector(arr);
}
}
}