Implemented generation of all anagrams (permutations / substitutions)

main
Inga 🏳‍🌈 4 years ago
parent d6ba6a9199
commit 956c550998
  1. 1
      Cargo.toml
  2. 2
      README.md
  3. 7
      src/anagram_finder.rs
  4. 44
      src/anagram_logger.rs
  5. 1
      src/lib.rs
  6. 9
      src/main.rs
  7. 42
      src/permutation_type.rs
  8. 59
      src/permutations_cache.rs

@ -7,6 +7,7 @@ edition = "2018"
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
[dependencies]
bit_field = "0.10.1"
crunchy = "0.2.2"
packed_simd = { version = "0.3.4", package = "packed_simd_2", features = ["into_bits"] }
permutohedron = "0.2.4"

@ -21,7 +21,7 @@ which produce given MD5 hashes.
in the dictionary; and it makes the total number of anagrams astronomically large)
This is a working draft, so far the code is extremely dirty (this is my first Rust project),
and it only lists all anagrams (not including words reordering)
and it only lists all anagrams
and does not yet do actual MD5 calculation.
## Algorithm description

@ -15,9 +15,10 @@ fn find_anagrams_recursive(remainder: &vector_alphabet::Vector, remaining_depth:
.take_while(|(_, vector)| vector.norm * remaining_depth >= remainder.norm)
.map(|(index, vector)| match remainder.safe_substract(&vector) {
Some(new_remainder) => find_anagrams_recursive(&new_remainder, remaining_depth-1, word_vectors, index)
.iter()
.map(|partial_phrase| {
vec![index].iter().chain(partial_phrase).cloned().collect()
.into_iter()
.map(|mut partial_phrase| {
partial_phrase.push(index);
partial_phrase
})
.collect(),
_ => vec![],

@ -1,6 +1,8 @@
use crate::dictionary_builder;
use crate::dictionary_builder::Dictionary;
use crate::dictionary_builder::WordInfo;
use crate::permutations_cache::PermutationsCache;
pub fn get_anagram_view(anagram: Vec<usize>, dictionary: &dictionary_builder::Dictionary) -> String {
pub fn get_anagram_view(anagram: &Vec<usize>, dictionary: &Dictionary) -> String {
anagram.iter()
.map(|&index| {
let word_options = &dictionary.words[index];
@ -13,3 +15,41 @@ pub fn get_anagram_view(anagram: Vec<usize>, dictionary: &dictionary_builder::Di
.collect::<Vec<_>>()
.join(" ")
}
fn generate_substitutions<T: Copy>(simple_dictionary: &[Vec<T>], remaining_length: usize) -> Vec<Vec<T>> {
if remaining_length == 0 {
return vec![Vec::new()];
}
return simple_dictionary[remaining_length - 1].iter()
.flat_map(|&value| {
generate_substitutions(simple_dictionary, remaining_length - 1).into_iter()
.map(move |mut partial_substitution| {
partial_substitution.push(value);
partial_substitution
})
})
.collect();
}
pub fn log_anagrams(anagram_vector: &Vec<usize>, dictionary: &Dictionary, permutations: &PermutationsCache) -> () {
let simple_vector: Vec<usize> = (0..anagram_vector.len()).collect();
let simple_dictionary: Vec<Vec<&WordInfo>> = (0..anagram_vector.len())
.map(|i| dictionary.words[anagram_vector[i]].iter().map(|word_info| word_info).collect())
.collect();
let substitutions: Vec<Vec<&WordInfo>> = generate_substitutions::<&WordInfo>(&simple_dictionary, simple_dictionary.len());
permutations.get_permuted_vectors(&simple_vector).iter()
.flat_map(|permuted_vector| {
substitutions.iter().map(move |substitution| {
permuted_vector.iter().map(|&index| substitution[index]).collect::<Vec<_>>()
})
})
.for_each(|anagram| {
let phrase = anagram.iter()
.map(|word_info| word_info.word.clone())
.collect::<Vec<_>>()
.join(" ");
println!("{}", phrase);
})
}

@ -4,6 +4,7 @@ pub mod anagram_finder;
pub mod anagram_logger;
pub mod dictionary_builder;
pub mod hash_computer;
pub mod permutation_type;
pub mod permutations_cache;
pub mod read_lines;
pub mod vector_alphabet;

@ -8,6 +8,7 @@ use trustpilot_challenge_rust::anagram_finder;
use trustpilot_challenge_rust::anagram_logger;
use trustpilot_challenge_rust::dictionary_builder;
use trustpilot_challenge_rust::hash_computer;
use trustpilot_challenge_rust::permutations_cache;
use trustpilot_challenge_rust::read_lines;
fn main() {
@ -39,9 +40,11 @@ fn main() {
let dictionary = dictionary_builder::build_dictionary(phrase, words);
for number_of_words in 0..=max_number_of_words {
for number_of_words in 1..=max_number_of_words {
//println!("======= Number of words: {} =======", number_of_words);
let permutations = permutations_cache::PermutationsCache::new(number_of_words);
let result = anagram_finder::find_anagrams(&dictionary, number_of_words);
result.into_par_iter()
.for_each(|anagram| println!("{}", anagram_logger::get_anagram_view(anagram, &dictionary)));
result.par_iter()
.for_each(|anagram_vector| anagram_logger::log_anagrams(anagram_vector, &dictionary, &permutations));
}
}

@ -0,0 +1,42 @@
use bit_field::BitField;
// permutation type is a bit field,
// 1 at i-th position means that in the permutation,
// value i must occur earlier than the value i+1
// For a given ordered vector (similar to what vec.dedup needs - that is,
// matching values only appear next to each other),
// it should return a permutation type with 1 in position of all values
// which are equal to the next values,
// so that all permutations of this type applied to the vector
// will produce all unique permutations of its values
pub fn get_required_permutation_type<T: Eq>(ordered_vector_to_permute: &[T]) -> u16 {
let mut result: u16 = 0;
for i in 0..(ordered_vector_to_permute.len() - 1) {
if ordered_vector_to_permute[i] == ordered_vector_to_permute[i+1] {
result.set_bit(i, true);
}
}
result
}
pub fn get_supported_permutation_types(inverse_permutation: &[usize]) -> Vec<u16> {
assert!(inverse_permutation.len() <= 16);
let mut max_supported_type: u16 = 0;
for i in 0..(inverse_permutation.len()-1) {
if inverse_permutation[i] < inverse_permutation[i+1] {
max_supported_type.set_bit(i, true);
}
}
let mut supported_types: Vec<u16> = Vec::new();
for i in 0..=u16::MAX {
if (i & max_supported_type) == i {
supported_types.push(i);
}
}
supported_types
}

@ -1 +1,58 @@
use permutohedron::Heap;
use permutohedron::Heap;
use crate::permutation_type::get_required_permutation_type;
use crate::permutation_type::get_supported_permutation_types;
pub struct PermutationsCache {
permutations_by_type: Vec<Vec<Vec<usize>>>,
set_length: usize,
}
fn invert_permutation(permutation: &Vec<usize>) -> Vec<usize> {
let mut result = permutation.clone();
for i in 0..permutation.len() {
result[permutation[i]] = i;
}
result
}
impl PermutationsCache {
pub fn new(set_length: usize) -> PermutationsCache {
assert!(set_length <= 16);
let mut permutations_by_type: Vec<Vec<Vec<usize>>> = Vec::with_capacity(1 << 16);
for _i in 0..=u16::MAX {
permutations_by_type.push(Vec::new());
}
let mut placeholder: Vec<usize> = (0..set_length).collect();
let heap = Heap::new(&mut placeholder);
for permutation in heap {
for permutation_type in get_supported_permutation_types(&invert_permutation(&permutation)) {
permutations_by_type[permutation_type as usize].push(permutation.clone());
}
}
PermutationsCache {
permutations_by_type,
set_length,
}
}
pub fn get_permuted_vectors<T: Eq + Copy>(&self, ordered_vector_to_permute: &Vec<T>) -> Vec<Vec<T>> {
//println!("set_length: {}, vector: {:?}", self.set_length, ordered_vector_to_permute);
assert_eq!(ordered_vector_to_permute.len(), self.set_length);
let permutation_type = get_required_permutation_type(ordered_vector_to_permute);
let permutations = &self.permutations_by_type[permutation_type as usize];
return permutations.iter()
.map(|permutation| {
permutation.iter()
.map(|&index| ordered_vector_to_permute[index])
.collect()
})
.collect()
}
}
Loading…
Cancel
Save