From 02eb57cd8d7d3e373b03bf61d503c033b32048ba Mon Sep 17 00:00:00 2001
From: inga-lovinde <52715130+inga-lovinde@users.noreply.github.com>
Date: Wed, 9 Dec 2020 22:09:08 +0100
Subject: [PATCH] Code reorganized

---
 src/anagram_analyzer.rs                | 17 +++---
 src/dictionary_builder.rs              | 76 +++++++++++++-------------
 src/lib.rs                             |  5 +-
 src/main.rs                            | 41 ++------------
 src/permutations_cache.rs              |  1 -
 src/{anagram_logger.rs => solution.rs} | 24 ++++++--
 src/solver.rs                          | 68 +++++++++++++++++++++++
 tests/solver_tests.rs                  |  2 +
 8 files changed, 146 insertions(+), 88 deletions(-)
 rename src/{anagram_logger.rs => solution.rs} (69%)
 create mode 100644 src/solver.rs
 create mode 100644 tests/solver_tests.rs
diff --git a/src/anagram_analyzer.rs b/src/anagram_analyzer.rs
index 94642a5..2d3c7d6 100644
--- a/src/anagram_analyzer.rs
+++ b/src/anagram_analyzer.rs
@@ -1,9 +1,9 @@
 use packed_simd::u8x32;
-use crate::anagram_logger;
 use crate::dictionary_builder::Dictionary;
 use crate::hash_computer::CHUNK_SIZE;
 use crate::hash_computer::find_hashes;
 use crate::permutations_cache::PermutationsCache;
+use crate::solution::Solution;
 
 fn generate_vector_substitutions<'a>(simple_dictionary: &'a Dictionary, permutation: &'a [usize], current_phrase: u8x32, current_phrase_length: usize) -> Box<dyn Iterator<Item = u8x32> + 'a> {
     if permutation.len() == 0 {
@@ -17,20 +17,21 @@ fn generate_vector_substitutions<'a>(simple_dictionary: &'a Dictionary, permutat
     return Box::new(result);
 }
 
-fn process_anagram_chunk(chunk: &[u8x32; CHUNK_SIZE], phrase_length: usize, hashes_to_find: &[u32]) -> () {
+fn process_anagram_chunk(chunk: &[u8x32; CHUNK_SIZE], phrase_length: usize, hashes_to_find: &[u32], solutions: &mut Vec<Solution>) -> () {
     match find_hashes(chunk, phrase_length, hashes_to_find) {
         Some(anagrams) => {
             for anagram in anagrams {
-                anagram_logger::log_anagram_with_hash(anagram, phrase_length);
+                solutions.push(Solution::from_simd(anagram, phrase_length));
             }
         }
         _ => ()
     }
 }
 
-pub fn analyze_anagrams(anagram_vector: &Vec<usize>, dictionary: &Dictionary, permutations: &PermutationsCache, phrase_length: usize, hashes_to_find: &[u32]) -> () {
+pub fn analyze_anagrams(anagram_vector: Vec<usize>, dictionary: &Dictionary, permutations: &PermutationsCache, phrase_length: usize, hashes_to_find: &[u32]) -> Vec<Solution> {
     let mut chunk: [u8x32; CHUNK_SIZE] = [u8x32::splat(0); CHUNK_SIZE];
     let mut chunk_position: usize = 0;
+    let mut result: Vec<_> = Vec::new();
     //let mut total: usize = 0;
 
     permutations.get_permuted_vectors(&anagram_vector).iter()
@@ -44,13 +45,15 @@ pub fn analyze_anagrams(anagram_vector: &Vec<usize>, dictionary: &Dictionary, pe
             chunk_position = (chunk_position + 1) % CHUNK_SIZE;
             //total = total + 1;
             if chunk_position == 0 {
-                process_anagram_chunk(&chunk, phrase_length, hashes_to_find);
+                process_anagram_chunk(&chunk, phrase_length, hashes_to_find, &mut result);
             }
         });
 
     if chunk_position != 0 {
-        process_anagram_chunk(&chunk, phrase_length, hashes_to_find);
+        process_anagram_chunk(&chunk, phrase_length, hashes_to_find, &mut result);
     }
 
-    //println!("{} {}", anagram_logger::get_anagram_vector_view(anagram_vector, dictionary), total);
+    //println!("{} {}", anagram_logger::get_anagram_vector_view(&anagram_vector, dictionary), total);
+
+    result
 }
\ No newline at end of file
diff --git a/src/dictionary_builder.rs b/src/dictionary_builder.rs
index 7846d3b..017c4b3 100644
--- a/src/dictionary_builder.rs
+++ b/src/dictionary_builder.rs
@@ -42,52 +42,54 @@ pub struct Dictionary {
     pub words: Vec<Vec<WordInfo>>,
 }
 
-pub fn build_dictionary(phrase: &String, unique_words: Vec<String>) -> Dictionary {
-    let alphabet = vector_alphabet::Alphabet::new(phrase).unwrap();
+impl Dictionary {
+    pub fn from_phrase_and_words(phrase: &str, unique_words: Vec<String>) -> Dictionary {
+        let alphabet = vector_alphabet::Alphabet::new(phrase).unwrap();
 
-    let phrase_with_metadata = alphabet.vectorize(phrase).unwrap();
+        let phrase_with_metadata = alphabet.vectorize(phrase).unwrap();
 
-    let words_with_vectors: Vec<_> = unique_words
-        .into_iter()
-        .map(|word| {
-            let vector_option = alphabet.vectorize(&word);
-            match vector_option {
-                Some(vector_with_metadata) => {
-                    if vector_with_metadata.vector.is_subset_of(&phrase_with_metadata.vector) {
-                        return Some((word, vector_with_metadata));
-                    } else {
+        let words_with_vectors: Vec<_> = unique_words
+            .into_iter()
+            .map(|word| {
+                let vector_option = alphabet.vectorize(&word);
+                match vector_option {
+                    Some(vector_with_metadata) => {
+                        if vector_with_metadata.vector.is_subset_of(&phrase_with_metadata.vector) {
+                            return Some((word, vector_with_metadata));
+                        } else {
+                            return None;
+                        }
+                    }
+                    None => {
                         return None;
                     }
                 }
-                None => {
-                    return None;
-                }
-            }
-        })
-        .flatten()
-        .collect();
+            })
+            .flatten()
+            .collect();
 
-    let mut words_by_vectors: HashMap<_, _> = HashMap::new();
-    for (word, vector_with_metadata) in words_with_vectors {
-        let (_, words_for_vector) = words_by_vectors.entry(vector_with_metadata.key).or_insert((vector_with_metadata.vector, vec![]));
-        words_for_vector.push(WordInfo::new(word));
-    }
+        let mut words_by_vectors: HashMap<_, _> = HashMap::new();
+        for (word, vector_with_metadata) in words_with_vectors {
+            let (_, words_for_vector) = words_by_vectors.entry(vector_with_metadata.key).or_insert((vector_with_metadata.vector, vec![]));
+            words_for_vector.push(WordInfo::new(word));
+        }
 
-    let mut words_by_vectors: Vec<_> = words_by_vectors.into_values().collect();
-    words_by_vectors.sort_by_key(|(vector, _)| vector.norm);
-    words_by_vectors.reverse();
+        let mut words_by_vectors: Vec<_> = words_by_vectors.into_values().collect();
+        words_by_vectors.sort_by_key(|(vector, _)| vector.norm);
+        words_by_vectors.reverse();
 
-    let mut vectors = vec![];
-    let mut words_by_vectors_vec = vec![];
+        let mut vectors = vec![];
+        let mut words_by_vectors_vec = vec![];
 
-    for (vector, words_by_vector) in words_by_vectors {
-        vectors.push(vector);
-        words_by_vectors_vec.push(words_by_vector);
-    }
+        for (vector, words_by_vector) in words_by_vectors {
+            vectors.push(vector);
+            words_by_vectors_vec.push(words_by_vector);
+        }
 
-    Dictionary {
-        phrase_vector: phrase_with_metadata.vector,
-        vectors,
-        words: words_by_vectors_vec,
+        Dictionary {
+            phrase_vector: phrase_with_metadata.vector,
+            vectors,
+            words: words_by_vectors_vec,
+        }
     }
 }
diff --git a/src/lib.rs b/src/lib.rs
index 4d3710a..1a087fc 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -1,11 +1,14 @@
 #![feature(map_into_keys_values)]
+#![feature(trait_alias)]
 
 pub mod anagram_analyzer;
 pub mod anagram_finder;
-pub mod anagram_logger;
 pub mod dictionary_builder;
 pub mod hash_computer;
 pub mod permutation_type;
 pub mod permutations_cache;
 pub mod read_lines;
+pub mod solution;
+pub mod solver;
 pub mod vector_alphabet;
+
diff --git a/src/main.rs b/src/main.rs
index 8c3336b..4a13480 100644
--- a/src/main.rs
+++ b/src/main.rs
@@ -1,15 +1,8 @@
-#![feature(map_into_keys_values)]
-
-use std::cmp;
 use std::env;
 use rayon::prelude::*;
 
-use trustpilot_challenge_rust::anagram_analyzer;
-use trustpilot_challenge_rust::anagram_finder;
-use trustpilot_challenge_rust::dictionary_builder;
-use trustpilot_challenge_rust::hash_computer;
-use trustpilot_challenge_rust::permutations_cache;
 use trustpilot_challenge_rust::read_lines;
+use trustpilot_challenge_rust::solver::Solver;
 
 fn main() {
     let args: Vec<_> = env::args().collect();
@@ -20,34 +13,10 @@ fn main() {
     let max_requested_number_of_words = (&args[3]).parse::<usize>().unwrap();
     let phrase = &args[4];
 
-    let phrase_byte_length_without_spaces = phrase.as_bytes().into_iter().filter(|&b| *b != b' ').count();
-    let max_supported_number_of_words = (hash_computer::MAX_PHRASE_LENGTH - phrase_byte_length_without_spaces) + 1;
-
-    if max_requested_number_of_words > max_supported_number_of_words {
-        println!("Requested number of words unsupported; using {} as maximum number of words", max_supported_number_of_words);
-    }
-    let max_number_of_words = cmp::min(max_requested_number_of_words, max_supported_number_of_words);
-
-    let mut words = read_lines::lines_from_file(words_file_path).unwrap();
-    words.sort();
-    words.dedup();
-
-    let dictionary = dictionary_builder::build_dictionary(phrase, words);
-
+    let words = read_lines::lines_from_file(words_file_path).unwrap();
     let hashes_strings = read_lines::lines_from_file(hashes_file_path).unwrap();
-    let mut hashes_to_find: Vec<u32> = Vec::new();
-    for hash_string in hashes_strings {
-        let hash: u128 = u128::from_str_radix(&hash_string, 16).unwrap();
-        hashes_to_find.push(((hash >> 96) as u32).to_be());
-    }
 
-    for number_of_words in 1..=max_number_of_words {
-        let phrase_length = phrase_byte_length_without_spaces + number_of_words - 1;
-        let permutations = permutations_cache::PermutationsCache::new(number_of_words);
-        let anagram_vectors = anagram_finder::find_anagrams(&dictionary, number_of_words);
-        anagram_vectors.par_iter()
-            .for_each(|anagram_vector| {
-                anagram_analyzer::analyze_anagrams(anagram_vector, &dictionary, &permutations, phrase_length, &hashes_to_find)
-            });
-    }
+    let solver = Solver::create_from_input_data(words, hashes_strings, max_requested_number_of_words, phrase);
+    solver.find_solutions()
+        .for_each(|solution| println!("{} {}", solution.hash, solution.anagram_string));
 }
diff --git a/src/permutations_cache.rs b/src/permutations_cache.rs
index 3c871af..858fe3f 100644
--- a/src/permutations_cache.rs
+++ b/src/permutations_cache.rs
@@ -41,7 +41,6 @@ impl PermutationsCache {
     }
 
     pub fn get_permuted_vectors<T: Eq + Copy>(&self, ordered_vector_to_permute: &Vec<T>) -> Vec<Vec<T>> {
-        //println!("set_length: {}, vector: {:?}", self.set_length, ordered_vector_to_permute);
         assert_eq!(ordered_vector_to_permute.len(), self.set_length);
 
         let permutation_type = get_required_permutation_type(ordered_vector_to_permute);
diff --git a/src/anagram_logger.rs b/src/solution.rs
similarity index 69%
rename from src/anagram_logger.rs
rename to src/solution.rs
index 308314c..8486faa 100644
--- a/src/anagram_logger.rs
+++ b/src/solution.rs
@@ -2,6 +2,24 @@ use md5;
 use packed_simd::u8x32;
 use crate::dictionary_builder::Dictionary;
 
+#[derive(Debug)]
+pub struct Solution {
+    pub anagram_string: String,
+    pub hash: String,
+}
+
+impl Solution {
+    pub fn from_simd(simd_vector: u8x32, phrase_length: usize) -> Solution {
+        let anagram_string = get_anagram_string_from_simd(simd_vector, phrase_length);
+        let hash = format!("{:x}", md5::compute(anagram_string.as_bytes()));
+
+        Solution {
+            anagram_string,
+            hash,
+        }
+    }
+}
+
 pub fn get_anagram_vector_view(anagram: &Vec<usize>, dictionary: &Dictionary) -> String {
     anagram.iter()
         .map(|&index| {
@@ -26,9 +44,3 @@ fn get_anagram_string_from_simd(simd_vector: u8x32, phrase_length: usize) -> Str
 pub fn log_anagram(simd_vector: u8x32, phrase_length: usize) -> () {
     println!("{}", get_anagram_string_from_simd(simd_vector, phrase_length));
 }
-
-pub fn log_anagram_with_hash(simd_vector: u8x32, phrase_length: usize) -> () {
-    let anagram_string = get_anagram_string_from_simd(simd_vector, phrase_length);
-    let hash = md5::compute(anagram_string.as_bytes());
-    println!("{:x} {}", hash, anagram_string);
-}
diff --git a/src/solver.rs b/src/solver.rs
new file mode 100644
index 0000000..594bb69
--- /dev/null
+++ b/src/solver.rs
@@ -0,0 +1,68 @@
+use std::cmp;
+use rayon::prelude::*;
+
+use crate::anagram_analyzer;
+use crate::anagram_finder;
+use crate::dictionary_builder::Dictionary;
+use crate::hash_computer;
+use crate::permutations_cache::PermutationsCache;
+use crate::solution::Solution;
+
+pub struct Solver {
+    dictionary: Dictionary,
+    hashes_to_find: Vec<u32>,
+    phrase_byte_length_without_spaces: usize,
+    max_number_of_words: usize,
+}
+
+impl Solver {
+    pub fn create_from_input_data(words: Vec<String>, hashes_strings: Vec<String>, max_requested_number_of_words: usize, phrase: &str) -> Solver {
+        let phrase_byte_length_without_spaces = phrase.as_bytes().into_iter().filter(|&b| *b != b' ').count();
+        let max_supported_number_of_words = (hash_computer::MAX_PHRASE_LENGTH - phrase_byte_length_without_spaces) + 1;
+    
+        if max_requested_number_of_words > max_supported_number_of_words {
+            println!("Requested number of words unsupported; using {} as maximum number of words", max_supported_number_of_words);
+        }
+        let max_number_of_words = cmp::min(max_requested_number_of_words, max_supported_number_of_words);
+    
+        let mut words = words;
+        words.sort();
+        words.dedup();
+    
+        let dictionary = Dictionary::from_phrase_and_words(phrase, words);
+    
+        let mut hashes_to_find: Vec<u32> = Vec::new();
+        for hash_string in hashes_strings {
+            let hash: u128 = u128::from_str_radix(&hash_string, 16).unwrap();
+            hashes_to_find.push(((hash >> 96) as u32).to_be());
+        }
+
+        Solver {
+            dictionary,
+            hashes_to_find,
+            phrase_byte_length_without_spaces,
+            max_number_of_words,
+        }
+    }
+
+    fn solve_for_anagram_vectors<'a>(&'a self, anagram_vectors: Vec<Vec<usize>>, permutations: PermutationsCache, phrase_length: usize) -> impl ParallelIterator<Item = Solution> + 'a {
+        anagram_vectors.into_par_iter()
+            .flat_map(move |anagram_vector| {
+                anagram_analyzer::analyze_anagrams(anagram_vector, &self.dictionary, &permutations, phrase_length, &self.hashes_to_find)
+            })
+    }
+    
+    fn solve_for_number_of_words<'a>(&'a self, number_of_words: usize) -> impl ParallelIterator<Item = Solution> + 'a {
+        let phrase_length = self.phrase_byte_length_without_spaces + number_of_words - 1;
+        let permutations = PermutationsCache::new(number_of_words);
+        let anagram_vectors = anagram_finder::find_anagrams(&self.dictionary, number_of_words);
+        self.solve_for_anagram_vectors(anagram_vectors, permutations, phrase_length)
+    }
+    
+    pub fn find_solutions<'a>(&'a self) -> impl ParallelIterator<Item = Solution> + 'a {
+        (1..=self.max_number_of_words).into_par_iter()
+            .flat_map(move |number_of_words| {
+                self.solve_for_number_of_words(number_of_words)
+            })
+    }
+}
diff --git a/tests/solver_tests.rs b/tests/solver_tests.rs
new file mode 100644
index 0000000..d893a57
--- /dev/null
+++ b/tests/solver_tests.rs
@@ -0,0 +1,2 @@
+extern crate trustpilot_challenge_rust;
+use trustpilot_challenge_rust::hash_computer;