Code reorganized

main
Inga 🏳‍🌈 3 years ago
parent 2d71f1733b
commit 02eb57cd8d
  1. 17
      src/anagram_analyzer.rs
  2. 76
      src/dictionary_builder.rs
  3. 5
      src/lib.rs
  4. 41
      src/main.rs
  5. 1
      src/permutations_cache.rs
  6. 24
      src/solution.rs
  7. 68
      src/solver.rs
  8. 2
      tests/solver_tests.rs

@ -1,9 +1,9 @@
use packed_simd::u8x32;
use crate::anagram_logger;
use crate::dictionary_builder::Dictionary;
use crate::hash_computer::CHUNK_SIZE;
use crate::hash_computer::find_hashes;
use crate::permutations_cache::PermutationsCache;
use crate::solution::Solution;
fn generate_vector_substitutions<'a>(simple_dictionary: &'a Dictionary, permutation: &'a [usize], current_phrase: u8x32, current_phrase_length: usize) -> Box<dyn Iterator<Item = u8x32> + 'a> {
if permutation.len() == 0 {
@ -17,20 +17,21 @@ fn generate_vector_substitutions<'a>(simple_dictionary: &'a Dictionary, permutat
return Box::new(result);
}
fn process_anagram_chunk(chunk: &[u8x32; CHUNK_SIZE], phrase_length: usize, hashes_to_find: &[u32]) -> () {
fn process_anagram_chunk(chunk: &[u8x32; CHUNK_SIZE], phrase_length: usize, hashes_to_find: &[u32], solutions: &mut Vec<Solution>) -> () {
match find_hashes(chunk, phrase_length, hashes_to_find) {
Some(anagrams) => {
for anagram in anagrams {
anagram_logger::log_anagram_with_hash(anagram, phrase_length);
solutions.push(Solution::from_simd(anagram, phrase_length));
}
}
_ => ()
}
}
pub fn analyze_anagrams(anagram_vector: &Vec<usize>, dictionary: &Dictionary, permutations: &PermutationsCache, phrase_length: usize, hashes_to_find: &[u32]) -> () {
pub fn analyze_anagrams(anagram_vector: Vec<usize>, dictionary: &Dictionary, permutations: &PermutationsCache, phrase_length: usize, hashes_to_find: &[u32]) -> Vec<Solution> {
let mut chunk: [u8x32; CHUNK_SIZE] = [u8x32::splat(0); CHUNK_SIZE];
let mut chunk_position: usize = 0;
let mut result: Vec<_> = Vec::new();
//let mut total: usize = 0;
permutations.get_permuted_vectors(&anagram_vector).iter()
@ -44,13 +45,15 @@ pub fn analyze_anagrams(anagram_vector: &Vec<usize>, dictionary: &Dictionary, pe
chunk_position = (chunk_position + 1) % CHUNK_SIZE;
//total = total + 1;
if chunk_position == 0 {
process_anagram_chunk(&chunk, phrase_length, hashes_to_find);
process_anagram_chunk(&chunk, phrase_length, hashes_to_find, &mut result);
}
});
if chunk_position != 0 {
process_anagram_chunk(&chunk, phrase_length, hashes_to_find);
process_anagram_chunk(&chunk, phrase_length, hashes_to_find, &mut result);
}
//println!("{} {}", anagram_logger::get_anagram_vector_view(anagram_vector, dictionary), total);
//println!("{} {}", anagram_logger::get_anagram_vector_view(&anagram_vector, dictionary), total);
result
}

@ -42,52 +42,54 @@ pub struct Dictionary {
pub words: Vec<Vec<WordInfo>>,
}
pub fn build_dictionary(phrase: &String, unique_words: Vec<String>) -> Dictionary {
let alphabet = vector_alphabet::Alphabet::new(phrase).unwrap();
impl Dictionary {
pub fn from_phrase_and_words(phrase: &str, unique_words: Vec<String>) -> Dictionary {
let alphabet = vector_alphabet::Alphabet::new(phrase).unwrap();
let phrase_with_metadata = alphabet.vectorize(phrase).unwrap();
let phrase_with_metadata = alphabet.vectorize(phrase).unwrap();
let words_with_vectors: Vec<_> = unique_words
.into_iter()
.map(|word| {
let vector_option = alphabet.vectorize(&word);
match vector_option {
Some(vector_with_metadata) => {
if vector_with_metadata.vector.is_subset_of(&phrase_with_metadata.vector) {
return Some((word, vector_with_metadata));
} else {
let words_with_vectors: Vec<_> = unique_words
.into_iter()
.map(|word| {
let vector_option = alphabet.vectorize(&word);
match vector_option {
Some(vector_with_metadata) => {
if vector_with_metadata.vector.is_subset_of(&phrase_with_metadata.vector) {
return Some((word, vector_with_metadata));
} else {
return None;
}
}
None => {
return None;
}
}
None => {
return None;
}
}
})
.flatten()
.collect();
})
.flatten()
.collect();
let mut words_by_vectors: HashMap<_, _> = HashMap::new();
for (word, vector_with_metadata) in words_with_vectors {
let (_, words_for_vector) = words_by_vectors.entry(vector_with_metadata.key).or_insert((vector_with_metadata.vector, vec![]));
words_for_vector.push(WordInfo::new(word));
}
let mut words_by_vectors: HashMap<_, _> = HashMap::new();
for (word, vector_with_metadata) in words_with_vectors {
let (_, words_for_vector) = words_by_vectors.entry(vector_with_metadata.key).or_insert((vector_with_metadata.vector, vec![]));
words_for_vector.push(WordInfo::new(word));
}
let mut words_by_vectors: Vec<_> = words_by_vectors.into_values().collect();
words_by_vectors.sort_by_key(|(vector, _)| vector.norm);
words_by_vectors.reverse();
let mut words_by_vectors: Vec<_> = words_by_vectors.into_values().collect();
words_by_vectors.sort_by_key(|(vector, _)| vector.norm);
words_by_vectors.reverse();
let mut vectors = vec![];
let mut words_by_vectors_vec = vec![];
let mut vectors = vec![];
let mut words_by_vectors_vec = vec![];
for (vector, words_by_vector) in words_by_vectors {
vectors.push(vector);
words_by_vectors_vec.push(words_by_vector);
}
for (vector, words_by_vector) in words_by_vectors {
vectors.push(vector);
words_by_vectors_vec.push(words_by_vector);
}
Dictionary {
phrase_vector: phrase_with_metadata.vector,
vectors,
words: words_by_vectors_vec,
Dictionary {
phrase_vector: phrase_with_metadata.vector,
vectors,
words: words_by_vectors_vec,
}
}
}

@ -1,11 +1,14 @@
#![feature(map_into_keys_values)]
#![feature(trait_alias)]
pub mod anagram_analyzer;
pub mod anagram_finder;
pub mod anagram_logger;
pub mod dictionary_builder;
pub mod hash_computer;
pub mod permutation_type;
pub mod permutations_cache;
pub mod read_lines;
pub mod solution;
pub mod solver;
pub mod vector_alphabet;

@ -1,15 +1,8 @@
#![feature(map_into_keys_values)]
use std::cmp;
use std::env;
use rayon::prelude::*;
use trustpilot_challenge_rust::anagram_analyzer;
use trustpilot_challenge_rust::anagram_finder;
use trustpilot_challenge_rust::dictionary_builder;
use trustpilot_challenge_rust::hash_computer;
use trustpilot_challenge_rust::permutations_cache;
use trustpilot_challenge_rust::read_lines;
use trustpilot_challenge_rust::solver::Solver;
fn main() {
let args: Vec<_> = env::args().collect();
@ -20,34 +13,10 @@ fn main() {
let max_requested_number_of_words = (&args[3]).parse::<usize>().unwrap();
let phrase = &args[4];
let phrase_byte_length_without_spaces = phrase.as_bytes().into_iter().filter(|&b| *b != b' ').count();
let max_supported_number_of_words = (hash_computer::MAX_PHRASE_LENGTH - phrase_byte_length_without_spaces) + 1;
if max_requested_number_of_words > max_supported_number_of_words {
println!("Requested number of words unsupported; using {} as maximum number of words", max_supported_number_of_words);
}
let max_number_of_words = cmp::min(max_requested_number_of_words, max_supported_number_of_words);
let mut words = read_lines::lines_from_file(words_file_path).unwrap();
words.sort();
words.dedup();
let dictionary = dictionary_builder::build_dictionary(phrase, words);
let words = read_lines::lines_from_file(words_file_path).unwrap();
let hashes_strings = read_lines::lines_from_file(hashes_file_path).unwrap();
let mut hashes_to_find: Vec<u32> = Vec::new();
for hash_string in hashes_strings {
let hash: u128 = u128::from_str_radix(&hash_string, 16).unwrap();
hashes_to_find.push(((hash >> 96) as u32).to_be());
}
for number_of_words in 1..=max_number_of_words {
let phrase_length = phrase_byte_length_without_spaces + number_of_words - 1;
let permutations = permutations_cache::PermutationsCache::new(number_of_words);
let anagram_vectors = anagram_finder::find_anagrams(&dictionary, number_of_words);
anagram_vectors.par_iter()
.for_each(|anagram_vector| {
anagram_analyzer::analyze_anagrams(anagram_vector, &dictionary, &permutations, phrase_length, &hashes_to_find)
});
}
let solver = Solver::create_from_input_data(words, hashes_strings, max_requested_number_of_words, phrase);
solver.find_solutions()
.for_each(|solution| println!("{} {}", solution.hash, solution.anagram_string));
}

@ -41,7 +41,6 @@ impl PermutationsCache {
}
pub fn get_permuted_vectors<T: Eq + Copy>(&self, ordered_vector_to_permute: &Vec<T>) -> Vec<Vec<T>> {
//println!("set_length: {}, vector: {:?}", self.set_length, ordered_vector_to_permute);
assert_eq!(ordered_vector_to_permute.len(), self.set_length);
let permutation_type = get_required_permutation_type(ordered_vector_to_permute);

@ -2,6 +2,24 @@ use md5;
use packed_simd::u8x32;
use crate::dictionary_builder::Dictionary;
#[derive(Debug)]
pub struct Solution {
pub anagram_string: String,
pub hash: String,
}
impl Solution {
pub fn from_simd(simd_vector: u8x32, phrase_length: usize) -> Solution {
let anagram_string = get_anagram_string_from_simd(simd_vector, phrase_length);
let hash = format!("{:x}", md5::compute(anagram_string.as_bytes()));
Solution {
anagram_string,
hash,
}
}
}
pub fn get_anagram_vector_view(anagram: &Vec<usize>, dictionary: &Dictionary) -> String {
anagram.iter()
.map(|&index| {
@ -26,9 +44,3 @@ fn get_anagram_string_from_simd(simd_vector: u8x32, phrase_length: usize) -> Str
pub fn log_anagram(simd_vector: u8x32, phrase_length: usize) -> () {
println!("{}", get_anagram_string_from_simd(simd_vector, phrase_length));
}
pub fn log_anagram_with_hash(simd_vector: u8x32, phrase_length: usize) -> () {
let anagram_string = get_anagram_string_from_simd(simd_vector, phrase_length);
let hash = md5::compute(anagram_string.as_bytes());
println!("{:x} {}", hash, anagram_string);
}

@ -0,0 +1,68 @@
use std::cmp;
use rayon::prelude::*;
use crate::anagram_analyzer;
use crate::anagram_finder;
use crate::dictionary_builder::Dictionary;
use crate::hash_computer;
use crate::permutations_cache::PermutationsCache;
use crate::solution::Solution;
pub struct Solver {
dictionary: Dictionary,
hashes_to_find: Vec<u32>,
phrase_byte_length_without_spaces: usize,
max_number_of_words: usize,
}
impl Solver {
pub fn create_from_input_data(words: Vec<String>, hashes_strings: Vec<String>, max_requested_number_of_words: usize, phrase: &str) -> Solver {
let phrase_byte_length_without_spaces = phrase.as_bytes().into_iter().filter(|&b| *b != b' ').count();
let max_supported_number_of_words = (hash_computer::MAX_PHRASE_LENGTH - phrase_byte_length_without_spaces) + 1;
if max_requested_number_of_words > max_supported_number_of_words {
println!("Requested number of words unsupported; using {} as maximum number of words", max_supported_number_of_words);
}
let max_number_of_words = cmp::min(max_requested_number_of_words, max_supported_number_of_words);
let mut words = words;
words.sort();
words.dedup();
let dictionary = Dictionary::from_phrase_and_words(phrase, words);
let mut hashes_to_find: Vec<u32> = Vec::new();
for hash_string in hashes_strings {
let hash: u128 = u128::from_str_radix(&hash_string, 16).unwrap();
hashes_to_find.push(((hash >> 96) as u32).to_be());
}
Solver {
dictionary,
hashes_to_find,
phrase_byte_length_without_spaces,
max_number_of_words,
}
}
fn solve_for_anagram_vectors<'a>(&'a self, anagram_vectors: Vec<Vec<usize>>, permutations: PermutationsCache, phrase_length: usize) -> impl ParallelIterator<Item = Solution> + 'a {
anagram_vectors.into_par_iter()
.flat_map(move |anagram_vector| {
anagram_analyzer::analyze_anagrams(anagram_vector, &self.dictionary, &permutations, phrase_length, &self.hashes_to_find)
})
}
fn solve_for_number_of_words<'a>(&'a self, number_of_words: usize) -> impl ParallelIterator<Item = Solution> + 'a {
let phrase_length = self.phrase_byte_length_without_spaces + number_of_words - 1;
let permutations = PermutationsCache::new(number_of_words);
let anagram_vectors = anagram_finder::find_anagrams(&self.dictionary, number_of_words);
self.solve_for_anagram_vectors(anagram_vectors, permutations, phrase_length)
}
pub fn find_solutions<'a>(&'a self) -> impl ParallelIterator<Item = Solution> + 'a {
(1..=self.max_number_of_words).into_par_iter()
.flat_map(move |number_of_words| {
self.solve_for_number_of_words(number_of_words)
})
}
}

@ -0,0 +1,2 @@
extern crate trustpilot_challenge_rust;
use trustpilot_challenge_rust::hash_computer;
Loading…
Cancel
Save