Minor performance improvement; readme updated

main
Inga 🏳‍🌈 4 years ago
parent e57b327c57
commit 2c68d8bc71
  1. 12
      README.md
  2. 12
      src/anagram_analyzer.rs
  3. 19
      src/anagram_logger.rs

@ -168,3 +168,15 @@ because for performance and transparency reasons only the first 8 bytes of hashe
This means that for every requested hash there is 1/1^32 chance of collision, This means that for every requested hash there is 1/1^32 chance of collision,
so for 10 requested hashes you will get one false positive every 430 millions of anagrams, on average, so for 10 requested hashes you will get one false positive every 430 millions of anagrams, on average,
which allows one to roughly measure the perfomance of MD5 calculation. which allows one to roughly measure the perfomance of MD5 calculation.
(On my laptop with Ryzen 3550H, search of all solutions of up to 5 words takes around 100 seconds,
and there are about 1.35 billion 5-word anagrams which means that it computes
more than 100 millions hashes *per second*, and that's in addition to actually finding the anagrams!)
It might also output some duplicate 2-word or 3-word solutions which is, again,
caused by optimization: we have to pack anagrams into 8-chunks,
and the number of permutations and substitutions (that is, the number of
all possible anagrams obtained from a given set of vectors in the character space)
could be not a multiple of 8.
In that case, the last chunk might also contain some leftovers from the previous one,
and if previous chunk produced some solutions, the last chunk may produce the same solutions again.

@ -1,5 +1,5 @@
use packed_simd::u8x32; use packed_simd::u8x32;
use crate::anagram_logger::log_anagram; use crate::anagram_logger;
use crate::dictionary_builder::Dictionary; use crate::dictionary_builder::Dictionary;
use crate::hash_computer::CHUNK_SIZE; use crate::hash_computer::CHUNK_SIZE;
use crate::hash_computer::find_hashes; use crate::hash_computer::find_hashes;
@ -21,7 +21,7 @@ fn process_anagram_chunk(chunk: &[u8x32; CHUNK_SIZE], phrase_length: usize, hash
match find_hashes(chunk, phrase_length, hashes_to_find) { match find_hashes(chunk, phrase_length, hashes_to_find) {
Some(anagrams) => { Some(anagrams) => {
for anagram in anagrams { for anagram in anagrams {
log_anagram(anagram, phrase_length); anagram_logger::log_anagram_with_hash(anagram, phrase_length);
} }
} }
_ => () _ => ()
@ -31,18 +31,26 @@ fn process_anagram_chunk(chunk: &[u8x32; CHUNK_SIZE], phrase_length: usize, hash
pub fn analyze_anagrams(anagram_vector: &Vec<usize>, dictionary: &Dictionary, permutations: &PermutationsCache, phrase_length: usize, hashes_to_find: &[u32]) -> () { pub fn analyze_anagrams(anagram_vector: &Vec<usize>, dictionary: &Dictionary, permutations: &PermutationsCache, phrase_length: usize, hashes_to_find: &[u32]) -> () {
let mut chunk: [u8x32; CHUNK_SIZE] = [u8x32::splat(0); CHUNK_SIZE]; let mut chunk: [u8x32; CHUNK_SIZE] = [u8x32::splat(0); CHUNK_SIZE];
let mut chunk_position: usize = 0; let mut chunk_position: usize = 0;
//let mut total: usize = 0;
permutations.get_permuted_vectors(&anagram_vector).iter() permutations.get_permuted_vectors(&anagram_vector).iter()
.flat_map(|permuted_vector| { .flat_map(|permuted_vector| {
generate_vector_substitutions(&dictionary, &permuted_vector, u8x32::splat(0), 0) generate_vector_substitutions(&dictionary, &permuted_vector, u8x32::splat(0), 0)
}) })
.for_each(|anagram| { .for_each(|anagram| {
//anagram_logger::log_anagram(anagram, phrase_length);
chunk[chunk_position] = anagram; chunk[chunk_position] = anagram;
chunk_position = (chunk_position + 1) % CHUNK_SIZE; chunk_position = (chunk_position + 1) % CHUNK_SIZE;
//total = total + 1;
if chunk_position == 0 { if chunk_position == 0 {
process_anagram_chunk(&chunk, phrase_length, hashes_to_find); process_anagram_chunk(&chunk, phrase_length, hashes_to_find);
} }
}); });
if chunk_position != 0 {
process_anagram_chunk(&chunk, phrase_length, hashes_to_find); process_anagram_chunk(&chunk, phrase_length, hashes_to_find);
}
//println!("{} {}", anagram_logger::get_anagram_vector_view(anagram_vector, dictionary), total);
} }

@ -1,5 +1,20 @@
use md5; use md5;
use packed_simd::u8x32; use packed_simd::u8x32;
use crate::dictionary_builder::Dictionary;
pub fn get_anagram_vector_view(anagram: &Vec<usize>, dictionary: &Dictionary) -> String {
anagram.iter()
.map(|&index| {
let word_options = &dictionary.words[index];
if word_options.len() == 1 {
word_options[0].word.clone()
} else {
format!("[{}]", word_options.iter().map(|word_info| word_info.word.clone()).collect::<Vec<_>>().join(","))
}
})
.collect::<Vec<_>>()
.join(" ")
}
fn get_anagram_string_from_simd(simd_vector: u8x32, phrase_length: usize) -> String { fn get_anagram_string_from_simd(simd_vector: u8x32, phrase_length: usize) -> String {
let mut string_bytes: [u8; 32] = [0; 32]; let mut string_bytes: [u8; 32] = [0; 32];
@ -9,6 +24,10 @@ fn get_anagram_string_from_simd(simd_vector: u8x32, phrase_length: usize) -> Str
} }
pub fn log_anagram(simd_vector: u8x32, phrase_length: usize) -> () { pub fn log_anagram(simd_vector: u8x32, phrase_length: usize) -> () {
println!("{}", get_anagram_string_from_simd(simd_vector, phrase_length));
}
pub fn log_anagram_with_hash(simd_vector: u8x32, phrase_length: usize) -> () {
let anagram_string = get_anagram_string_from_simd(simd_vector, phrase_length); let anagram_string = get_anagram_string_from_simd(simd_vector, phrase_length);
let hash = md5::compute(anagram_string.as_bytes()); let hash = md5::compute(anagram_string.as_bytes());
println!("{:x} {}", hash, anagram_string); println!("{:x} {}", hash, anagram_string);

Loading…
Cancel
Save