@ -17,22 +17,36 @@
public VectorsProcessor ( Vector < byte > target , int maxVectorsCount , IEnumerable < Vector < byte > > dictionary , Func < Vector < byte > , string > vectorToString )
public VectorsProcessor ( Vector < byte > target , int maxVectorsCount , IEnumerable < Vector < byte > > dictionary , Func < Vector < byte > , string > vectorToString )
{
{
#if SUPPORT_LARGE_STRINGS
if ( Enumerable . Range ( 0 , Vector < byte > . Count ) . Any ( i = > target [ i ] > 8 ) )
{
throw new ArgumentException ( "Every value should be at most 8 (at most 8 same characters allowed in the source string)" , nameof ( target ) ) ;
}
# else
if ( Enumerable . Range ( 0 , Vector < byte > . Count ) . Any ( i = > target [ i ] > 4 ) )
{
throw new ArgumentException ( "Every value should be at most 4 (at most 4 same characters allowed in the source string)" , nameof ( target ) ) ;
}
# endif
this . Target = target ;
this . Target = target ;
#if !SUPPORT_LARGE_STRINGS
this . TargetComplement = new Vector < byte > ( Enumerable . Range ( 0 , Vector < byte > . Count ) . Select ( i = > ( byte ) ( this . Target [ i ] = = 0 ? 0 : ( byte ) ( 1 2 / this . Target [ i ] ) ) ) . ToArray ( ) ) ;
# endif
this . TargetNorm = Vector . Dot ( target , Vector < byte > . One ) ;
this . MaxVectorsCount = maxVectorsCount ;
this . MaxVectorsCount = maxVectorsCount ;
this . VectorToString = vectorToString ;
this . VectorToString = vectorToString ;
this . Dictionary = ImmutableStack . Create < Vector < byte > > ( FilterVectors ( dictionary , target ) . ToArray ( ) ) ;
this . Dictionary = ImmutableStack . Create ( FilterVectors ( dictionary , target , this . TargetComplemen t ) . ToArray ( ) ) ;
}
}
/// <summary>
/// Negative sign bit.
/// (byte)b & (byte)128 equals zero for non-negative (0..127) bytes and equals (byte)128 for negative (128..255) bytes.
/// Similarly, vector & Negative equals zero if all bytes are non-negative, and does not equal zero if some bytes are negative.
/// Use <code>(vector & Negative) == Vector<byte>.Zero</code> to determine if all components are non-negative.
/// </summary>
private static Vector < byte > Negative { get ; } = new Vector < byte > ( Enumerable . Repeat ( ( byte ) 1 2 8 , 1 6 ) . ToArray ( ) ) ;
private Vector < byte > Target { get ; }
private Vector < byte > Target { get ; }
private Vector < byte > TargetComplement { get ; }
private byte TargetNorm { get ; }
private int MaxVectorsCount { get ; }
private int MaxVectorsCount { get ; }
private ImmutableStack < Vector < byte > > Dictionary { get ; }
private ImmutableStack < Vector < byte > > Dictionary { get ; }
@ -57,22 +71,26 @@
// And total number of quintuplets becomes reasonable 1412M.
// And total number of quintuplets becomes reasonable 1412M.
// Also, it produces the intended results faster (as these are more likely to contain longer words - e.g. "poultry outwits ants" is more likely than "p o u l t r y o u t w i t s a n t s").
// Also, it produces the intended results faster (as these are more likely to contain longer words - e.g. "poultry outwits ants" is more likely than "p o u l t r y o u t w i t s a n t s").
// This method basically gives us the 1-norm of the vector in the space rescaled so that the target is [1, 1, ..., 1].
// This method basically gives us the 1-norm of the vector in the space rescaled so that the target is [1, 1, ..., 1].
private static int GetVectorWeight ( Vector < byte > vector , Vector < byte > target )
private static int GetVectorWeight ( Vector < byte > vector , Vector < byte > target , Vector < byte > targetComplement )
{
{
#if SUPPORT_LARGE_STRINGS
var weight = 0 ;
var weight = 0 ;
for ( var i = 0 ; target [ i ] ! = 0 ; i + + )
for ( var i = 0 ; target [ i ] ! = 0 ; i + + )
{
{
weight + = ( 7 2 0 * vector [ i ] ) / target [ i ] ; // 720 = 6!, so that the result will be a whole number (unless Target[i] > 6 )
weight + = ( 8 4 0 * vector [ i ] ) / target [ i ] ; // 840 = LCM(1, 2, .., 8), so that the result will be a whole number (unless Target[i] > 8 )
}
}
return weight ;
return weight ;
# else
return Vector . Dot ( vector , targetComplement ) ;
# endif
}
}
private static IEnumerable < Vector < byte > > FilterVectors ( IEnumerable < Vector < byte > > vectors , Vector < byte > target )
private static IEnumerable < Vector < byte > > FilterVectors ( IEnumerable < Vector < byte > > vectors , Vector < byte > target , Vector < byte > targetComplement )
{
{
return vectors
return vectors
. Where ( vector = > ( ( target - vector ) & Negative ) = = Vector < byte > . Zero )
. Where ( vector = > Vector . GreaterThanOrEqualAll ( target , vector ) )
. OrderBy ( vector = > GetVectorWeight ( vector , target ) ) ;
. OrderBy ( vector = > GetVectorWeight ( vector , target , targetComplement ) ) ;
}
}
[Conditional("DEBUG")]
[Conditional("DEBUG")]
@ -91,9 +109,17 @@
// That way, the complexity of search goes down by a factor of MaxVectorsCount! (as if [x, y] does not add up to a required target, there is no point in checking [y, x])
// That way, the complexity of search goes down by a factor of MaxVectorsCount! (as if [x, y] does not add up to a required target, there is no point in checking [y, x])
private IEnumerable < Vector < byte > [ ] > GenerateUnorderedSequences ( Vector < byte > remainder , ImmutableStack < Vector < byte > > partialSumStack , ImmutableStack < Vector < byte > > dictionaryStack )
private IEnumerable < Vector < byte > [ ] > GenerateUnorderedSequences ( Vector < byte > remainder , ImmutableStack < Vector < byte > > partialSumStack , ImmutableStack < Vector < byte > > dictionaryStack )
{
{
var count = partialSumStack . Count ( ) + 1 ;
var allowedRemainingWords = this . MaxVectorsCount - partialSumStack . Count ( ) ;
if ( count < this . MaxVectorsCount )
if ( allowedRemainingWords > 1 )
{
{
#if !SUPPORT_LARGE_STRINGS
// e.g. if remainder norm is 7, 8 or 9, and allowedRemainingWords is 3,
// we need the largest remaining word to have a norm of at least 3
var remainderNorm = Vector . Dot ( remainder , this . TargetComplement ) ;
var requiredRemainder = ( remainderNorm + allowedRemainingWords - 1 ) / allowedRemainingWords ;
# endif
var dictionaryTail = dictionaryStack ;
var dictionaryTail = dictionaryStack ;
while ( ! dictionaryTail . IsEmpty )
while ( ! dictionaryTail . IsEmpty )
{
{
@ -102,13 +128,19 @@
this . DebugState ( partialSumStack , currentVector ) ;
this . DebugState ( partialSumStack , currentVector ) ;
var newRemainder = remainder - currentVector ;
if ( currentVector = = remainder )
if ( newRemainder = = Vector < byte > . Zero )
{
{
yield return partialSumStack . Push ( currentVector ) . Reverse ( ) . ToArray ( ) ;
yield return partialSumStack . Push ( currentVector ) . Reverse ( ) . ToArray ( ) ;
}
}
else if ( ( newRemainder & Negative ) = = Vector < byte > . Zero )
#if !SUPPORT_LARGE_STRINGS
else if ( Vector . Dot ( currentVector , this . TargetComplement ) < requiredRemainder )
{
break ;
}
# endif
else if ( Vector . LessThanOrEqualAll ( currentVector , remainder ) )
{
{
var newRemainder = remainder - currentVector ;
foreach ( var result in this . GenerateUnorderedSequences ( newRemainder , partialSumStack . Push ( currentVector ) , dictionaryTail ) )
foreach ( var result in this . GenerateUnorderedSequences ( newRemainder , partialSumStack . Push ( currentVector ) , dictionaryTail ) )
{
{
yield return result ;
yield return result ;
@ -118,7 +150,7 @@
dictionaryTail = nextDictionaryTail ;
dictionaryTail = nextDictionaryTail ;
}
}
}
}
else if ( count = = this . MaxVectorsCount )
else
{
{
var dictionaryTail = dictionaryStack ;
var dictionaryTail = dictionaryStack ;
while ( ! dictionaryTail . IsEmpty )
while ( ! dictionaryTail . IsEmpty )