parent
89246c63da
commit
ddd9ae9b33
@ -0,0 +1,10 @@ |
|||||||
|
[package] |
||||||
|
name = "hello_cargo" |
||||||
|
version = "0.1.0" |
||||||
|
authors = ["inga-lovinde <52715130+inga-lovinde@users.noreply.github.com>"] |
||||||
|
edition = "2018" |
||||||
|
|
||||||
|
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html |
||||||
|
|
||||||
|
[dependencies] |
||||||
|
packed_simd = { version = "0.3.4", package = "packed_simd_2" } |
@ -1 +1,29 @@ |
|||||||
# TrustPilotChallengeRust |
# TrustPilotChallengeRust |
||||||
|
|
||||||
|
TrustPilot had this challenge (http://followthewhiterabbit.trustpilot.com/) |
||||||
|
where you had to, given the dictionary, and given three MD5 hashes, |
||||||
|
find anagrams of a phrase *"poultry outwits ants"* which result in these hashes. |
||||||
|
|
||||||
|
My original solution was in mixture of C# and plain C (with a bit of Visual C++ |
||||||
|
as a bridge), and heavily used AVX2 intrinsics for optimization. |
||||||
|
|
||||||
|
Rust now has a decent API frontend for AVX2 intrinsics |
||||||
|
(https://rust-lang.github.io/packed_simd/packed_simd_2/, and soon-to-be `std::simd`), |
||||||
|
so it makes perfect sense to try and reimplement the same ideas with Rust. |
||||||
|
|
||||||
|
The problem will sound a bit different: given a dictionary and given a string, |
||||||
|
find all anagrams no longer than N words and no longer than 27 bytes |
||||||
|
which produce given MD5 hashes. |
||||||
|
|
||||||
|
(The limit on the number of words is neccessary, because there are single-letter words |
||||||
|
in the dictionary; and it makes the total number of anagrams astronomically large) |
||||||
|
|
||||||
|
This is a working draft, so far the code is extremely dirty (this is my first Rust project), |
||||||
|
and it only lists all anagrams (not including words reordering) |
||||||
|
and does not yet do actual MD5 calculation. |
||||||
|
|
||||||
|
How to run to solve the original task for three-word anagrams: |
||||||
|
|
||||||
|
``` |
||||||
|
cargo run data\words.txt data\hashes.txt 3 "poultry outwits ants" |
||||||
|
``` |
||||||
|
File diff suppressed because it is too large
Load Diff
@ -0,0 +1,24 @@ |
|||||||
|
use crate::vector_alphabet; |
||||||
|
|
||||||
|
pub fn find_anagrams(remainder: &vector_alphabet::Vector, remaining_depth: usize, current_words: &[(String, vector_alphabet::VectorWithMetadata)]) -> Vec<Vec<String>> { |
||||||
|
if remaining_depth == 0 { |
||||||
|
if remainder.norm == 0 { |
||||||
|
return vec![vec![]]; |
||||||
|
} |
||||||
|
return vec![]; |
||||||
|
} |
||||||
|
|
||||||
|
current_words.iter() |
||||||
|
.enumerate() |
||||||
|
.map(|(index, (word, word_metadata))| match remainder.safe_substract(&word_metadata.vector) { |
||||||
|
Some(new_remainder) => find_anagrams(&new_remainder, remaining_depth-1, ¤t_words[index..]) |
||||||
|
.iter() |
||||||
|
.map(|partial_phrase| { |
||||||
|
vec![word.clone()].iter().chain(partial_phrase).cloned().collect() |
||||||
|
}) |
||||||
|
.collect(), |
||||||
|
_ => vec![], |
||||||
|
}) |
||||||
|
.flatten() |
||||||
|
.collect() |
||||||
|
} |
@ -0,0 +1 @@ |
|||||||
|
pub const MAX_PHRASE_LENGTH: usize = 27; |
@ -0,0 +1,56 @@ |
|||||||
|
use std::env; |
||||||
|
|
||||||
|
mod anagram_finder; |
||||||
|
mod hash_computer; |
||||||
|
mod read_lines; |
||||||
|
mod vector_alphabet; |
||||||
|
|
||||||
|
fn main() { |
||||||
|
let args: Vec<_> = env::args().collect(); |
||||||
|
println!("{:?}", args); |
||||||
|
|
||||||
|
let words_file_path = &args[1]; |
||||||
|
let hashes_file_path = &args[2]; |
||||||
|
let max_number_of_words = &args[3].parse::<usize>().unwrap(); |
||||||
|
let phrase = &args[4]; |
||||||
|
|
||||||
|
let phrase_byte_length_without_spaces = phrase.as_bytes().into_iter().filter(|&b| *b != b' ').count(); |
||||||
|
let result_byte_length = phrase_byte_length_without_spaces + max_number_of_words - 1; |
||||||
|
|
||||||
|
if result_byte_length > hash_computer::MAX_PHRASE_LENGTH { |
||||||
|
panic!("Words number limit exceeded") |
||||||
|
} |
||||||
|
|
||||||
|
let alphabet = vector_alphabet::Alphabet::new(phrase).unwrap(); |
||||||
|
|
||||||
|
let phrase_with_metadata = alphabet.vectorize(phrase).unwrap(); |
||||||
|
|
||||||
|
let mut words = read_lines::lines_from_file(words_file_path).unwrap(); |
||||||
|
words.sort(); |
||||||
|
words.dedup(); |
||||||
|
|
||||||
|
let words_with_vectors: Vec<_> = words |
||||||
|
.into_iter() |
||||||
|
.map(|word| { |
||||||
|
let vector_option = alphabet.vectorize(&word); |
||||||
|
match vector_option { |
||||||
|
Some(vector_with_metadata) => { |
||||||
|
if vector_with_metadata.vector.is_subset_of(&phrase_with_metadata.vector) { |
||||||
|
return Some((word, vector_with_metadata)); |
||||||
|
} else { |
||||||
|
return None; |
||||||
|
} |
||||||
|
} |
||||||
|
None => { |
||||||
|
return None; |
||||||
|
} |
||||||
|
} |
||||||
|
}) |
||||||
|
.flatten() |
||||||
|
.collect(); |
||||||
|
|
||||||
|
let result = anagram_finder::find_anagrams(&phrase_with_metadata.vector, *max_number_of_words, &words_with_vectors); |
||||||
|
for result_words in result { |
||||||
|
println!("{}", result_words.join(" ")) |
||||||
|
} |
||||||
|
} |
@ -0,0 +1,7 @@ |
|||||||
|
use std::fs::File; |
||||||
|
use std::io::{self, BufRead}; |
||||||
|
use std::path::Path; |
||||||
|
|
||||||
|
pub fn lines_from_file(filename: impl AsRef<Path>) -> io::Result<Vec<String>> { |
||||||
|
io::BufReader::new(File::open(filename)?).lines().collect() |
||||||
|
} |
@ -0,0 +1,93 @@ |
|||||||
|
use std::collections::HashMap; |
||||||
|
use packed_simd; |
||||||
|
|
||||||
|
#[derive(Debug)] |
||||||
|
pub struct Vector { |
||||||
|
pub norm: usize, |
||||||
|
simd_vector: packed_simd::u8x32, |
||||||
|
} |
||||||
|
|
||||||
|
#[derive(Debug)] |
||||||
|
pub struct VectorWithMetadata { |
||||||
|
pub key: String, |
||||||
|
pub vector: Vector, |
||||||
|
} |
||||||
|
|
||||||
|
impl Vector { |
||||||
|
fn new(&array: &[u8; 32], norm: usize) -> Vector { |
||||||
|
Vector { |
||||||
|
norm, |
||||||
|
simd_vector: packed_simd::u8x32::from_slice_unaligned(&array), |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
pub fn is_subset_of(&self, other: &Vector) -> bool { |
||||||
|
let comparison_result = packed_simd::u8x32::gt(self.simd_vector, other.simd_vector); |
||||||
|
packed_simd::m8x32::none(comparison_result as packed_simd::m8x32) |
||||||
|
} |
||||||
|
|
||||||
|
pub fn safe_substract(&self, vector_to_substract: &Vector) -> Option<Vector> { |
||||||
|
if vector_to_substract.is_subset_of(self) { |
||||||
|
return Some(Vector { |
||||||
|
norm: self.norm - vector_to_substract.norm, |
||||||
|
simd_vector: self.simd_vector - vector_to_substract.simd_vector |
||||||
|
}); |
||||||
|
} else { |
||||||
|
return None; |
||||||
|
} |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
pub struct Alphabet { |
||||||
|
chars_to_offsets: HashMap<char, usize>, |
||||||
|
} |
||||||
|
|
||||||
|
impl Alphabet { |
||||||
|
pub fn new(phrase: &str) -> Result<Alphabet, &'static str> { |
||||||
|
let mut chars: Vec<_> = phrase.chars().filter(|&ch| ch != ' ').collect(); |
||||||
|
chars.sort(); |
||||||
|
chars.dedup(); |
||||||
|
|
||||||
|
if chars.len() > 32 { |
||||||
|
return Err("Number of different chars should not exceed 32"); |
||||||
|
} |
||||||
|
|
||||||
|
let mut offsets_to_chars: [char; 32] = [' '; 32]; |
||||||
|
let mut chars_to_offsets: HashMap<char, usize> = HashMap::new(); |
||||||
|
for (pos, ch) in chars.iter().enumerate() { |
||||||
|
chars_to_offsets.insert(*ch, pos); |
||||||
|
offsets_to_chars[pos] = *ch; |
||||||
|
} |
||||||
|
|
||||||
|
Ok(Alphabet { |
||||||
|
chars_to_offsets, |
||||||
|
}) |
||||||
|
} |
||||||
|
|
||||||
|
pub fn vectorize(&self, phrase: &str) -> Option<VectorWithMetadata> { |
||||||
|
let mut chars: Vec<_> = phrase.chars().filter(|&ch| ch != ' ').collect(); |
||||||
|
chars.sort(); |
||||||
|
|
||||||
|
let norm = chars.len(); |
||||||
|
|
||||||
|
let mut array: [u8; 32] = [0; 32]; |
||||||
|
for ch in &chars { |
||||||
|
match self.chars_to_offsets.get(&ch) { |
||||||
|
Some(&index) => { |
||||||
|
if array[index] > 127 { |
||||||
|
return None; |
||||||
|
} |
||||||
|
|
||||||
|
array[index] += 1; |
||||||
|
}, |
||||||
|
_ => return None, |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
let key: String = chars.into_iter().collect(); |
||||||
|
return Some(VectorWithMetadata { |
||||||
|
key, |
||||||
|
vector: Vector::new(&array, norm), |
||||||
|
}); |
||||||
|
} |
||||||
|
} |
Loading…
Reference in new issue