Minor performance improvement; readme updated

3 years ago · 2c68d8bc71
parent e57b327c57
commit 2c68d8bc71
3 changed files with 43 additions and 4 deletions
--- a/README.md
+++ b/README.md
@ -168,3 +168,15 @@ because for performance and transparency reasons only the first 8 bytes of hashe
 This means that for every requested hash there is 1/1^32 chance of collision,
 so for 10 requested hashes you will get one false positive every 430 millions of anagrams, on average,
 which allows one to roughly measure the perfomance of MD5 calculation.
 (On my laptop with Ryzen 3550H, search of all solutions of up to 5 words takes around 100 seconds,
 and there are about 1.35 billion 5-word anagrams which means that it computes
 more than 100 millions hashes *per second*, and that's in addition to actually finding the anagrams!)
 It might also output some duplicate 2-word or 3-word solutions which is, again,
 caused by optimization: we have to pack anagrams into 8-chunks,
 and the number of permutations and substitutions (that is, the number of
 all possible anagrams obtained from a given set of vectors in the character space)
 could be not a multiple of 8.
 In that case, the last chunk might also contain some leftovers from the previous one,
 and if previous chunk produced some solutions, the last chunk may produce the same solutions again.
--- a/src/anagram_analyzer.rs
+++ b/src/anagram_analyzer.rs
@ -1,5 +1,5 @@
 use packed_simd::u8x32;
-use crate::anagram_logger::log_anagram;
+use crate::anagram_logger;
 use crate::dictionary_builder::Dictionary;
 use crate::hash_computer::CHUNK_SIZE;
 use crate::hash_computer::find_hashes;
@ -21,7 +21,7 @@ fn process_anagram_chunk(chunk: &[u8x32; CHUNK_SIZE], phrase_length: usize, hash
    match find_hashes(chunk, phrase_length, hashes_to_find) {
        Some(anagrams) => {
            for anagram in anagrams {
-                log_anagram(anagram, phrase_length);
+                anagram_logger::log_anagram_with_hash(anagram, phrase_length);
            }
        }
        _ => ()
@ -31,18 +31,26 @@ fn process_anagram_chunk(chunk: &[u8x32; CHUNK_SIZE], phrase_length: usize, hash
 pub fn analyze_anagrams(anagram_vector: &Vec<usize>, dictionary: &Dictionary, permutations: &PermutationsCache, phrase_length: usize, hashes_to_find: &[u32]) -> () {
    let mut chunk: [u8x32; CHUNK_SIZE] = [u8x32::splat(0); CHUNK_SIZE];
    let mut chunk_position: usize = 0;
    //let mut total: usize = 0;
    permutations.get_permuted_vectors(&anagram_vector).iter()
        .flat_map(|permuted_vector| {
            generate_vector_substitutions(&dictionary, &permuted_vector, u8x32::splat(0), 0)
        })
        .for_each(|anagram| {
            //anagram_logger::log_anagram(anagram, phrase_length);
            chunk[chunk_position] = anagram;
            chunk_position = (chunk_position + 1) % CHUNK_SIZE;
            //total = total + 1;
            if chunk_position == 0 {
                process_anagram_chunk(&chunk, phrase_length, hashes_to_find);
            }
        });
    if chunk_position != 0 {
        process_anagram_chunk(&chunk, phrase_length, hashes_to_find);
    }
    //println!("{} {}", anagram_logger::get_anagram_vector_view(anagram_vector, dictionary), total);
 }
--- a/src/anagram_logger.rs
+++ b/src/anagram_logger.rs
@ -1,5 +1,20 @@
 use md5;
 use packed_simd::u8x32;
 use crate::dictionary_builder::Dictionary;
 pub fn get_anagram_vector_view(anagram: &Vec<usize>, dictionary: &Dictionary) -> String {
    anagram.iter()
        .map(|&index| {
            let word_options = &dictionary.words[index];
            if word_options.len() == 1 {
                word_options[0].word.clone()
            } else {
                format!("[{}]", word_options.iter().map(|word_info| word_info.word.clone()).collect::<Vec<_>>().join(","))
            }
        })
        .collect::<Vec<_>>()
        .join(" ")
 }
 fn get_anagram_string_from_simd(simd_vector: u8x32, phrase_length: usize) -> String {
    let mut string_bytes: [u8; 32] = [0; 32];
@ -9,6 +24,10 @@ fn get_anagram_string_from_simd(simd_vector: u8x32, phrase_length: usize) -> Str
 }
 pub fn log_anagram(simd_vector: u8x32, phrase_length: usize) -> () {
    println!("{}", get_anagram_string_from_simd(simd_vector, phrase_length));
 }
 pub fn log_anagram_with_hash(simd_vector: u8x32, phrase_length: usize) -> () {
    let anagram_string = get_anagram_string_from_simd(simd_vector, phrase_length);
    let hash = md5::compute(anagram_string.as_bytes());
    println!("{:x} {}", hash, anagram_string);