From 956c5509987b9dad59a81291d561217d2dedcc94 Mon Sep 17 00:00:00 2001 From: inga-lovinde <52715130+inga-lovinde@users.noreply.github.com> Date: Sun, 6 Dec 2020 21:42:44 +0100 Subject: [PATCH] Implemented generation of all anagrams (permutations / substitutions) --- Cargo.toml | 1 + README.md | 2 +- src/anagram_finder.rs | 7 +++-- src/anagram_logger.rs | 44 +++++++++++++++++++++++++++-- src/lib.rs | 1 + src/main.rs | 9 ++++-- src/permutation_type.rs | 42 ++++++++++++++++++++++++++++ src/permutations_cache.rs | 59 ++++++++++++++++++++++++++++++++++++++- 8 files changed, 155 insertions(+), 10 deletions(-) create mode 100644 src/permutation_type.rs diff --git a/Cargo.toml b/Cargo.toml index cc59c88..8e9de58 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -7,6 +7,7 @@ edition = "2018" # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html [dependencies] +bit_field = "0.10.1" crunchy = "0.2.2" packed_simd = { version = "0.3.4", package = "packed_simd_2", features = ["into_bits"] } permutohedron = "0.2.4" diff --git a/README.md b/README.md index ce480e8..950eebe 100644 --- a/README.md +++ b/README.md @@ -21,7 +21,7 @@ which produce given MD5 hashes. in the dictionary; and it makes the total number of anagrams astronomically large) This is a working draft, so far the code is extremely dirty (this is my first Rust project), -and it only lists all anagrams (not including words reordering) +and it only lists all anagrams and does not yet do actual MD5 calculation. ## Algorithm description diff --git a/src/anagram_finder.rs b/src/anagram_finder.rs index a0571f0..2237342 100644 --- a/src/anagram_finder.rs +++ b/src/anagram_finder.rs @@ -15,9 +15,10 @@ fn find_anagrams_recursive(remainder: &vector_alphabet::Vector, remaining_depth: .take_while(|(_, vector)| vector.norm * remaining_depth >= remainder.norm) .map(|(index, vector)| match remainder.safe_substract(&vector) { Some(new_remainder) => find_anagrams_recursive(&new_remainder, remaining_depth-1, word_vectors, index) - .iter() - .map(|partial_phrase| { - vec![index].iter().chain(partial_phrase).cloned().collect() + .into_iter() + .map(|mut partial_phrase| { + partial_phrase.push(index); + partial_phrase }) .collect(), _ => vec![], diff --git a/src/anagram_logger.rs b/src/anagram_logger.rs index ee624e2..5374367 100644 --- a/src/anagram_logger.rs +++ b/src/anagram_logger.rs @@ -1,6 +1,8 @@ -use crate::dictionary_builder; +use crate::dictionary_builder::Dictionary; +use crate::dictionary_builder::WordInfo; +use crate::permutations_cache::PermutationsCache; -pub fn get_anagram_view(anagram: Vec, dictionary: &dictionary_builder::Dictionary) -> String { +pub fn get_anagram_view(anagram: &Vec, dictionary: &Dictionary) -> String { anagram.iter() .map(|&index| { let word_options = &dictionary.words[index]; @@ -13,3 +15,41 @@ pub fn get_anagram_view(anagram: Vec, dictionary: &dictionary_builder::Di .collect::>() .join(" ") } + +fn generate_substitutions(simple_dictionary: &[Vec], remaining_length: usize) -> Vec> { + if remaining_length == 0 { + return vec![Vec::new()]; + } + + return simple_dictionary[remaining_length - 1].iter() + .flat_map(|&value| { + generate_substitutions(simple_dictionary, remaining_length - 1).into_iter() + .map(move |mut partial_substitution| { + partial_substitution.push(value); + partial_substitution + }) + }) + .collect(); +} + +pub fn log_anagrams(anagram_vector: &Vec, dictionary: &Dictionary, permutations: &PermutationsCache) -> () { + let simple_vector: Vec = (0..anagram_vector.len()).collect(); + let simple_dictionary: Vec> = (0..anagram_vector.len()) + .map(|i| dictionary.words[anagram_vector[i]].iter().map(|word_info| word_info).collect()) + .collect(); + let substitutions: Vec> = generate_substitutions::<&WordInfo>(&simple_dictionary, simple_dictionary.len()); + + permutations.get_permuted_vectors(&simple_vector).iter() + .flat_map(|permuted_vector| { + substitutions.iter().map(move |substitution| { + permuted_vector.iter().map(|&index| substitution[index]).collect::>() + }) + }) + .for_each(|anagram| { + let phrase = anagram.iter() + .map(|word_info| word_info.word.clone()) + .collect::>() + .join(" "); + println!("{}", phrase); + }) +} \ No newline at end of file diff --git a/src/lib.rs b/src/lib.rs index 6875814..6275e08 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -4,6 +4,7 @@ pub mod anagram_finder; pub mod anagram_logger; pub mod dictionary_builder; pub mod hash_computer; +pub mod permutation_type; pub mod permutations_cache; pub mod read_lines; pub mod vector_alphabet; diff --git a/src/main.rs b/src/main.rs index aa7cb4d..3d78c42 100644 --- a/src/main.rs +++ b/src/main.rs @@ -8,6 +8,7 @@ use trustpilot_challenge_rust::anagram_finder; use trustpilot_challenge_rust::anagram_logger; use trustpilot_challenge_rust::dictionary_builder; use trustpilot_challenge_rust::hash_computer; +use trustpilot_challenge_rust::permutations_cache; use trustpilot_challenge_rust::read_lines; fn main() { @@ -39,9 +40,11 @@ fn main() { let dictionary = dictionary_builder::build_dictionary(phrase, words); - for number_of_words in 0..=max_number_of_words { + for number_of_words in 1..=max_number_of_words { + //println!("======= Number of words: {} =======", number_of_words); + let permutations = permutations_cache::PermutationsCache::new(number_of_words); let result = anagram_finder::find_anagrams(&dictionary, number_of_words); - result.into_par_iter() - .for_each(|anagram| println!("{}", anagram_logger::get_anagram_view(anagram, &dictionary))); + result.par_iter() + .for_each(|anagram_vector| anagram_logger::log_anagrams(anagram_vector, &dictionary, &permutations)); } } diff --git a/src/permutation_type.rs b/src/permutation_type.rs new file mode 100644 index 0000000..02e6c4b --- /dev/null +++ b/src/permutation_type.rs @@ -0,0 +1,42 @@ +use bit_field::BitField; + +// permutation type is a bit field, +// 1 at i-th position means that in the permutation, +// value i must occur earlier than the value i+1 + +// For a given ordered vector (similar to what vec.dedup needs - that is, +// matching values only appear next to each other), +// it should return a permutation type with 1 in position of all values +// which are equal to the next values, +// so that all permutations of this type applied to the vector +// will produce all unique permutations of its values +pub fn get_required_permutation_type(ordered_vector_to_permute: &[T]) -> u16 { + let mut result: u16 = 0; + for i in 0..(ordered_vector_to_permute.len() - 1) { + if ordered_vector_to_permute[i] == ordered_vector_to_permute[i+1] { + result.set_bit(i, true); + } + } + + result +} + +pub fn get_supported_permutation_types(inverse_permutation: &[usize]) -> Vec { + assert!(inverse_permutation.len() <= 16); + + let mut max_supported_type: u16 = 0; + for i in 0..(inverse_permutation.len()-1) { + if inverse_permutation[i] < inverse_permutation[i+1] { + max_supported_type.set_bit(i, true); + } + } + + let mut supported_types: Vec = Vec::new(); + for i in 0..=u16::MAX { + if (i & max_supported_type) == i { + supported_types.push(i); + } + } + + supported_types +} diff --git a/src/permutations_cache.rs b/src/permutations_cache.rs index 98f198a..3c871af 100644 --- a/src/permutations_cache.rs +++ b/src/permutations_cache.rs @@ -1 +1,58 @@ -use permutohedron::Heap; \ No newline at end of file +use permutohedron::Heap; +use crate::permutation_type::get_required_permutation_type; +use crate::permutation_type::get_supported_permutation_types; + +pub struct PermutationsCache { + permutations_by_type: Vec>>, + set_length: usize, +} + +fn invert_permutation(permutation: &Vec) -> Vec { + let mut result = permutation.clone(); + for i in 0..permutation.len() { + result[permutation[i]] = i; + } + + result +} + +impl PermutationsCache { + pub fn new(set_length: usize) -> PermutationsCache { + assert!(set_length <= 16); + + let mut permutations_by_type: Vec>> = Vec::with_capacity(1 << 16); + + for _i in 0..=u16::MAX { + permutations_by_type.push(Vec::new()); + } + + let mut placeholder: Vec = (0..set_length).collect(); + let heap = Heap::new(&mut placeholder); + for permutation in heap { + for permutation_type in get_supported_permutation_types(&invert_permutation(&permutation)) { + permutations_by_type[permutation_type as usize].push(permutation.clone()); + } + } + + PermutationsCache { + permutations_by_type, + set_length, + } + } + + pub fn get_permuted_vectors(&self, ordered_vector_to_permute: &Vec) -> Vec> { + //println!("set_length: {}, vector: {:?}", self.set_length, ordered_vector_to_permute); + assert_eq!(ordered_vector_to_permute.len(), self.set_length); + + let permutation_type = get_required_permutation_type(ordered_vector_to_permute); + let permutations = &self.permutations_by_type[permutation_type as usize]; + + return permutations.iter() + .map(|permutation| { + permutation.iter() + .map(|&index| ordered_vector_to_permute[index]) + .collect() + }) + .collect() + } +} \ No newline at end of file