From 02eb57cd8d7d3e373b03bf61d503c033b32048ba Mon Sep 17 00:00:00 2001 From: inga-lovinde <52715130+inga-lovinde@users.noreply.github.com> Date: Wed, 9 Dec 2020 22:09:08 +0100 Subject: [PATCH] Code reorganized --- src/anagram_analyzer.rs | 17 +++--- src/dictionary_builder.rs | 76 +++++++++++++------------- src/lib.rs | 5 +- src/main.rs | 41 ++------------ src/permutations_cache.rs | 1 - src/{anagram_logger.rs => solution.rs} | 24 ++++++-- src/solver.rs | 68 +++++++++++++++++++++++ tests/solver_tests.rs | 2 + 8 files changed, 146 insertions(+), 88 deletions(-) rename src/{anagram_logger.rs => solution.rs} (69%) create mode 100644 src/solver.rs create mode 100644 tests/solver_tests.rs diff --git a/src/anagram_analyzer.rs b/src/anagram_analyzer.rs index 94642a5..2d3c7d6 100644 --- a/src/anagram_analyzer.rs +++ b/src/anagram_analyzer.rs @@ -1,9 +1,9 @@ use packed_simd::u8x32; -use crate::anagram_logger; use crate::dictionary_builder::Dictionary; use crate::hash_computer::CHUNK_SIZE; use crate::hash_computer::find_hashes; use crate::permutations_cache::PermutationsCache; +use crate::solution::Solution; fn generate_vector_substitutions<'a>(simple_dictionary: &'a Dictionary, permutation: &'a [usize], current_phrase: u8x32, current_phrase_length: usize) -> Box + 'a> { if permutation.len() == 0 { @@ -17,20 +17,21 @@ fn generate_vector_substitutions<'a>(simple_dictionary: &'a Dictionary, permutat return Box::new(result); } -fn process_anagram_chunk(chunk: &[u8x32; CHUNK_SIZE], phrase_length: usize, hashes_to_find: &[u32]) -> () { +fn process_anagram_chunk(chunk: &[u8x32; CHUNK_SIZE], phrase_length: usize, hashes_to_find: &[u32], solutions: &mut Vec) -> () { match find_hashes(chunk, phrase_length, hashes_to_find) { Some(anagrams) => { for anagram in anagrams { - anagram_logger::log_anagram_with_hash(anagram, phrase_length); + solutions.push(Solution::from_simd(anagram, phrase_length)); } } _ => () } } -pub fn analyze_anagrams(anagram_vector: &Vec, dictionary: &Dictionary, permutations: &PermutationsCache, phrase_length: usize, hashes_to_find: &[u32]) -> () { +pub fn analyze_anagrams(anagram_vector: Vec, dictionary: &Dictionary, permutations: &PermutationsCache, phrase_length: usize, hashes_to_find: &[u32]) -> Vec { let mut chunk: [u8x32; CHUNK_SIZE] = [u8x32::splat(0); CHUNK_SIZE]; let mut chunk_position: usize = 0; + let mut result: Vec<_> = Vec::new(); //let mut total: usize = 0; permutations.get_permuted_vectors(&anagram_vector).iter() @@ -44,13 +45,15 @@ pub fn analyze_anagrams(anagram_vector: &Vec, dictionary: &Dictionary, pe chunk_position = (chunk_position + 1) % CHUNK_SIZE; //total = total + 1; if chunk_position == 0 { - process_anagram_chunk(&chunk, phrase_length, hashes_to_find); + process_anagram_chunk(&chunk, phrase_length, hashes_to_find, &mut result); } }); if chunk_position != 0 { - process_anagram_chunk(&chunk, phrase_length, hashes_to_find); + process_anagram_chunk(&chunk, phrase_length, hashes_to_find, &mut result); } - //println!("{} {}", anagram_logger::get_anagram_vector_view(anagram_vector, dictionary), total); + //println!("{} {}", anagram_logger::get_anagram_vector_view(&anagram_vector, dictionary), total); + + result } \ No newline at end of file diff --git a/src/dictionary_builder.rs b/src/dictionary_builder.rs index 7846d3b..017c4b3 100644 --- a/src/dictionary_builder.rs +++ b/src/dictionary_builder.rs @@ -42,52 +42,54 @@ pub struct Dictionary { pub words: Vec>, } -pub fn build_dictionary(phrase: &String, unique_words: Vec) -> Dictionary { - let alphabet = vector_alphabet::Alphabet::new(phrase).unwrap(); +impl Dictionary { + pub fn from_phrase_and_words(phrase: &str, unique_words: Vec) -> Dictionary { + let alphabet = vector_alphabet::Alphabet::new(phrase).unwrap(); - let phrase_with_metadata = alphabet.vectorize(phrase).unwrap(); + let phrase_with_metadata = alphabet.vectorize(phrase).unwrap(); - let words_with_vectors: Vec<_> = unique_words - .into_iter() - .map(|word| { - let vector_option = alphabet.vectorize(&word); - match vector_option { - Some(vector_with_metadata) => { - if vector_with_metadata.vector.is_subset_of(&phrase_with_metadata.vector) { - return Some((word, vector_with_metadata)); - } else { + let words_with_vectors: Vec<_> = unique_words + .into_iter() + .map(|word| { + let vector_option = alphabet.vectorize(&word); + match vector_option { + Some(vector_with_metadata) => { + if vector_with_metadata.vector.is_subset_of(&phrase_with_metadata.vector) { + return Some((word, vector_with_metadata)); + } else { + return None; + } + } + None => { return None; } } - None => { - return None; - } - } - }) - .flatten() - .collect(); + }) + .flatten() + .collect(); - let mut words_by_vectors: HashMap<_, _> = HashMap::new(); - for (word, vector_with_metadata) in words_with_vectors { - let (_, words_for_vector) = words_by_vectors.entry(vector_with_metadata.key).or_insert((vector_with_metadata.vector, vec![])); - words_for_vector.push(WordInfo::new(word)); - } + let mut words_by_vectors: HashMap<_, _> = HashMap::new(); + for (word, vector_with_metadata) in words_with_vectors { + let (_, words_for_vector) = words_by_vectors.entry(vector_with_metadata.key).or_insert((vector_with_metadata.vector, vec![])); + words_for_vector.push(WordInfo::new(word)); + } - let mut words_by_vectors: Vec<_> = words_by_vectors.into_values().collect(); - words_by_vectors.sort_by_key(|(vector, _)| vector.norm); - words_by_vectors.reverse(); + let mut words_by_vectors: Vec<_> = words_by_vectors.into_values().collect(); + words_by_vectors.sort_by_key(|(vector, _)| vector.norm); + words_by_vectors.reverse(); - let mut vectors = vec![]; - let mut words_by_vectors_vec = vec![]; + let mut vectors = vec![]; + let mut words_by_vectors_vec = vec![]; - for (vector, words_by_vector) in words_by_vectors { - vectors.push(vector); - words_by_vectors_vec.push(words_by_vector); - } + for (vector, words_by_vector) in words_by_vectors { + vectors.push(vector); + words_by_vectors_vec.push(words_by_vector); + } - Dictionary { - phrase_vector: phrase_with_metadata.vector, - vectors, - words: words_by_vectors_vec, + Dictionary { + phrase_vector: phrase_with_metadata.vector, + vectors, + words: words_by_vectors_vec, + } } } diff --git a/src/lib.rs b/src/lib.rs index 4d3710a..1a087fc 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,11 +1,14 @@ #![feature(map_into_keys_values)] +#![feature(trait_alias)] pub mod anagram_analyzer; pub mod anagram_finder; -pub mod anagram_logger; pub mod dictionary_builder; pub mod hash_computer; pub mod permutation_type; pub mod permutations_cache; pub mod read_lines; +pub mod solution; +pub mod solver; pub mod vector_alphabet; + diff --git a/src/main.rs b/src/main.rs index 8c3336b..4a13480 100644 --- a/src/main.rs +++ b/src/main.rs @@ -1,15 +1,8 @@ -#![feature(map_into_keys_values)] - -use std::cmp; use std::env; use rayon::prelude::*; -use trustpilot_challenge_rust::anagram_analyzer; -use trustpilot_challenge_rust::anagram_finder; -use trustpilot_challenge_rust::dictionary_builder; -use trustpilot_challenge_rust::hash_computer; -use trustpilot_challenge_rust::permutations_cache; use trustpilot_challenge_rust::read_lines; +use trustpilot_challenge_rust::solver::Solver; fn main() { let args: Vec<_> = env::args().collect(); @@ -20,34 +13,10 @@ fn main() { let max_requested_number_of_words = (&args[3]).parse::().unwrap(); let phrase = &args[4]; - let phrase_byte_length_without_spaces = phrase.as_bytes().into_iter().filter(|&b| *b != b' ').count(); - let max_supported_number_of_words = (hash_computer::MAX_PHRASE_LENGTH - phrase_byte_length_without_spaces) + 1; - - if max_requested_number_of_words > max_supported_number_of_words { - println!("Requested number of words unsupported; using {} as maximum number of words", max_supported_number_of_words); - } - let max_number_of_words = cmp::min(max_requested_number_of_words, max_supported_number_of_words); - - let mut words = read_lines::lines_from_file(words_file_path).unwrap(); - words.sort(); - words.dedup(); - - let dictionary = dictionary_builder::build_dictionary(phrase, words); - + let words = read_lines::lines_from_file(words_file_path).unwrap(); let hashes_strings = read_lines::lines_from_file(hashes_file_path).unwrap(); - let mut hashes_to_find: Vec = Vec::new(); - for hash_string in hashes_strings { - let hash: u128 = u128::from_str_radix(&hash_string, 16).unwrap(); - hashes_to_find.push(((hash >> 96) as u32).to_be()); - } - for number_of_words in 1..=max_number_of_words { - let phrase_length = phrase_byte_length_without_spaces + number_of_words - 1; - let permutations = permutations_cache::PermutationsCache::new(number_of_words); - let anagram_vectors = anagram_finder::find_anagrams(&dictionary, number_of_words); - anagram_vectors.par_iter() - .for_each(|anagram_vector| { - anagram_analyzer::analyze_anagrams(anagram_vector, &dictionary, &permutations, phrase_length, &hashes_to_find) - }); - } + let solver = Solver::create_from_input_data(words, hashes_strings, max_requested_number_of_words, phrase); + solver.find_solutions() + .for_each(|solution| println!("{} {}", solution.hash, solution.anagram_string)); } diff --git a/src/permutations_cache.rs b/src/permutations_cache.rs index 3c871af..858fe3f 100644 --- a/src/permutations_cache.rs +++ b/src/permutations_cache.rs @@ -41,7 +41,6 @@ impl PermutationsCache { } pub fn get_permuted_vectors(&self, ordered_vector_to_permute: &Vec) -> Vec> { - //println!("set_length: {}, vector: {:?}", self.set_length, ordered_vector_to_permute); assert_eq!(ordered_vector_to_permute.len(), self.set_length); let permutation_type = get_required_permutation_type(ordered_vector_to_permute); diff --git a/src/anagram_logger.rs b/src/solution.rs similarity index 69% rename from src/anagram_logger.rs rename to src/solution.rs index 308314c..8486faa 100644 --- a/src/anagram_logger.rs +++ b/src/solution.rs @@ -2,6 +2,24 @@ use md5; use packed_simd::u8x32; use crate::dictionary_builder::Dictionary; +#[derive(Debug)] +pub struct Solution { + pub anagram_string: String, + pub hash: String, +} + +impl Solution { + pub fn from_simd(simd_vector: u8x32, phrase_length: usize) -> Solution { + let anagram_string = get_anagram_string_from_simd(simd_vector, phrase_length); + let hash = format!("{:x}", md5::compute(anagram_string.as_bytes())); + + Solution { + anagram_string, + hash, + } + } +} + pub fn get_anagram_vector_view(anagram: &Vec, dictionary: &Dictionary) -> String { anagram.iter() .map(|&index| { @@ -26,9 +44,3 @@ fn get_anagram_string_from_simd(simd_vector: u8x32, phrase_length: usize) -> Str pub fn log_anagram(simd_vector: u8x32, phrase_length: usize) -> () { println!("{}", get_anagram_string_from_simd(simd_vector, phrase_length)); } - -pub fn log_anagram_with_hash(simd_vector: u8x32, phrase_length: usize) -> () { - let anagram_string = get_anagram_string_from_simd(simd_vector, phrase_length); - let hash = md5::compute(anagram_string.as_bytes()); - println!("{:x} {}", hash, anagram_string); -} diff --git a/src/solver.rs b/src/solver.rs new file mode 100644 index 0000000..594bb69 --- /dev/null +++ b/src/solver.rs @@ -0,0 +1,68 @@ +use std::cmp; +use rayon::prelude::*; + +use crate::anagram_analyzer; +use crate::anagram_finder; +use crate::dictionary_builder::Dictionary; +use crate::hash_computer; +use crate::permutations_cache::PermutationsCache; +use crate::solution::Solution; + +pub struct Solver { + dictionary: Dictionary, + hashes_to_find: Vec, + phrase_byte_length_without_spaces: usize, + max_number_of_words: usize, +} + +impl Solver { + pub fn create_from_input_data(words: Vec, hashes_strings: Vec, max_requested_number_of_words: usize, phrase: &str) -> Solver { + let phrase_byte_length_without_spaces = phrase.as_bytes().into_iter().filter(|&b| *b != b' ').count(); + let max_supported_number_of_words = (hash_computer::MAX_PHRASE_LENGTH - phrase_byte_length_without_spaces) + 1; + + if max_requested_number_of_words > max_supported_number_of_words { + println!("Requested number of words unsupported; using {} as maximum number of words", max_supported_number_of_words); + } + let max_number_of_words = cmp::min(max_requested_number_of_words, max_supported_number_of_words); + + let mut words = words; + words.sort(); + words.dedup(); + + let dictionary = Dictionary::from_phrase_and_words(phrase, words); + + let mut hashes_to_find: Vec = Vec::new(); + for hash_string in hashes_strings { + let hash: u128 = u128::from_str_radix(&hash_string, 16).unwrap(); + hashes_to_find.push(((hash >> 96) as u32).to_be()); + } + + Solver { + dictionary, + hashes_to_find, + phrase_byte_length_without_spaces, + max_number_of_words, + } + } + + fn solve_for_anagram_vectors<'a>(&'a self, anagram_vectors: Vec>, permutations: PermutationsCache, phrase_length: usize) -> impl ParallelIterator + 'a { + anagram_vectors.into_par_iter() + .flat_map(move |anagram_vector| { + anagram_analyzer::analyze_anagrams(anagram_vector, &self.dictionary, &permutations, phrase_length, &self.hashes_to_find) + }) + } + + fn solve_for_number_of_words<'a>(&'a self, number_of_words: usize) -> impl ParallelIterator + 'a { + let phrase_length = self.phrase_byte_length_without_spaces + number_of_words - 1; + let permutations = PermutationsCache::new(number_of_words); + let anagram_vectors = anagram_finder::find_anagrams(&self.dictionary, number_of_words); + self.solve_for_anagram_vectors(anagram_vectors, permutations, phrase_length) + } + + pub fn find_solutions<'a>(&'a self) -> impl ParallelIterator + 'a { + (1..=self.max_number_of_words).into_par_iter() + .flat_map(move |number_of_words| { + self.solve_for_number_of_words(number_of_words) + }) + } +} diff --git a/tests/solver_tests.rs b/tests/solver_tests.rs new file mode 100644 index 0000000..d893a57 --- /dev/null +++ b/tests/solver_tests.rs @@ -0,0 +1,2 @@ +extern crate trustpilot_challenge_rust; +use trustpilot_challenge_rust::hash_computer;