diff --git a/README.md b/README.md index e9c1c83..664bd65 100644 --- a/README.md +++ b/README.md @@ -167,4 +167,16 @@ In addition to the right solutions it will also output some wrong ones, because for performance and transparency reasons only the first 8 bytes of hashes are compared. This means that for every requested hash there is 1/1^32 chance of collision, so for 10 requested hashes you will get one false positive every 430 millions of anagrams, on average, -which allows one to roughly measure the perfomance of MD5 calculation. \ No newline at end of file +which allows one to roughly measure the perfomance of MD5 calculation. + +(On my laptop with Ryzen 3550H, search of all solutions of up to 5 words takes around 100 seconds, +and there are about 1.35 billion 5-word anagrams which means that it computes +more than 100 millions hashes *per second*, and that's in addition to actually finding the anagrams!) + +It might also output some duplicate 2-word or 3-word solutions which is, again, +caused by optimization: we have to pack anagrams into 8-chunks, +and the number of permutations and substitutions (that is, the number of +all possible anagrams obtained from a given set of vectors in the character space) +could be not a multiple of 8. +In that case, the last chunk might also contain some leftovers from the previous one, +and if previous chunk produced some solutions, the last chunk may produce the same solutions again. \ No newline at end of file diff --git a/src/anagram_analyzer.rs b/src/anagram_analyzer.rs index 9b704e7..94642a5 100644 --- a/src/anagram_analyzer.rs +++ b/src/anagram_analyzer.rs @@ -1,5 +1,5 @@ use packed_simd::u8x32; -use crate::anagram_logger::log_anagram; +use crate::anagram_logger; use crate::dictionary_builder::Dictionary; use crate::hash_computer::CHUNK_SIZE; use crate::hash_computer::find_hashes; @@ -21,7 +21,7 @@ fn process_anagram_chunk(chunk: &[u8x32; CHUNK_SIZE], phrase_length: usize, hash match find_hashes(chunk, phrase_length, hashes_to_find) { Some(anagrams) => { for anagram in anagrams { - log_anagram(anagram, phrase_length); + anagram_logger::log_anagram_with_hash(anagram, phrase_length); } } _ => () @@ -31,18 +31,26 @@ fn process_anagram_chunk(chunk: &[u8x32; CHUNK_SIZE], phrase_length: usize, hash pub fn analyze_anagrams(anagram_vector: &Vec, dictionary: &Dictionary, permutations: &PermutationsCache, phrase_length: usize, hashes_to_find: &[u32]) -> () { let mut chunk: [u8x32; CHUNK_SIZE] = [u8x32::splat(0); CHUNK_SIZE]; let mut chunk_position: usize = 0; + //let mut total: usize = 0; permutations.get_permuted_vectors(&anagram_vector).iter() .flat_map(|permuted_vector| { generate_vector_substitutions(&dictionary, &permuted_vector, u8x32::splat(0), 0) }) .for_each(|anagram| { + //anagram_logger::log_anagram(anagram, phrase_length); chunk[chunk_position] = anagram; + chunk_position = (chunk_position + 1) % CHUNK_SIZE; + //total = total + 1; if chunk_position == 0 { process_anagram_chunk(&chunk, phrase_length, hashes_to_find); } }); - process_anagram_chunk(&chunk, phrase_length, hashes_to_find); + if chunk_position != 0 { + process_anagram_chunk(&chunk, phrase_length, hashes_to_find); + } + + //println!("{} {}", anagram_logger::get_anagram_vector_view(anagram_vector, dictionary), total); } \ No newline at end of file diff --git a/src/anagram_logger.rs b/src/anagram_logger.rs index c462a86..308314c 100644 --- a/src/anagram_logger.rs +++ b/src/anagram_logger.rs @@ -1,5 +1,20 @@ use md5; use packed_simd::u8x32; +use crate::dictionary_builder::Dictionary; + +pub fn get_anagram_vector_view(anagram: &Vec, dictionary: &Dictionary) -> String { + anagram.iter() + .map(|&index| { + let word_options = &dictionary.words[index]; + if word_options.len() == 1 { + word_options[0].word.clone() + } else { + format!("[{}]", word_options.iter().map(|word_info| word_info.word.clone()).collect::>().join(",")) + } + }) + .collect::>() + .join(" ") +} fn get_anagram_string_from_simd(simd_vector: u8x32, phrase_length: usize) -> String { let mut string_bytes: [u8; 32] = [0; 32]; @@ -9,6 +24,10 @@ fn get_anagram_string_from_simd(simd_vector: u8x32, phrase_length: usize) -> Str } pub fn log_anagram(simd_vector: u8x32, phrase_length: usize) -> () { + println!("{}", get_anagram_string_from_simd(simd_vector, phrase_length)); +} + +pub fn log_anagram_with_hash(simd_vector: u8x32, phrase_length: usize) -> () { let anagram_string = get_anagram_string_from_simd(simd_vector, phrase_length); let hash = md5::compute(anagram_string.as_bytes()); println!("{:x} {}", hash, anagram_string);