Speedup word_is_uppercase_ascii

main
John Zacarias Jekel 1 year ago
parent d898f44220
commit 13a36fc15b
  1. 2
      README.md
  2. 10
      src/helpers.rs
  3. 24
      src/translate_strings.rs
  4. 147
      src/translate_words.rs

@ -102,7 +102,7 @@ See <a href="https://git.jekel.ca/JZJ/anslatortray-rs/wiki/Using-the-anslatortra
Check out the <a href="https://git.jekel.ca/JZJ/anslatortray-rs/wiki/Performance">wiki page about Anslatortray's performance</a>!
Spoiler: It can translate one word in under **140ns** on average in the default UTF-8 mode, and in under **100ns** on average in ASCII-only mode :)
Spoiler: It can translate one word in under **129ns** on average in the default UTF-8 mode, and in under **94ns** on average in ASCII-only mode on my dated system :)
# Useful Links

@ -21,8 +21,10 @@ pub(crate) fn is_y(letter: char) -> bool {
return letter.to_ascii_lowercase() == 'y';
}
//TODO testing
//Returns whether an entire word is upper case or not
pub(crate) fn word_is_uppercase(english_word: &str) -> bool {
//We can't get the last character without iterating through the whole string since this is UTF-8
//So the best we can do is exit out early if we encounter a lower-case character (we can't use the huristic in word_is_uppercase_ascii)
for letter in english_word.chars() {
if letter.is_ascii_lowercase() {
return false;
@ -32,9 +34,11 @@ pub(crate) fn word_is_uppercase(english_word: &str) -> bool {
return true;
}
//TODO testing
//Returns whether an entire word is upper case or not (the word must only contain ASCII characters)
pub(crate) fn word_is_uppercase_ascii(english_word: &str) -> bool {
return word_is_uppercase(english_word);
//Asume length is non-zero
//Heuristic: If the last letter of the word is uppercase, likely the whole word is uppercase
return (english_word.as_bytes()[english_word.as_bytes().len() - 1] as char).is_ascii_uppercase();
}
/* Tests */

@ -34,8 +34,8 @@ use crate::translate_words::translate_word_with_style_reuse_buffers_ascii;
/// "Etlay's ytray omesay edgeway asescay. Atthay isway away ontractioncay, asway ellway asway away ordway erewhay ethay onlyway owelvay isway yway. Eatnay, allway atthay orksway!"
///);
///
///assert_eq!(translate("What if a word has no vowels, like this: bcdfghjklmnpqrstvwxzBCDFGHJKLMNPQRSTVWXZ"),
/// "Atwhay ifway away ordway ashay onay owelsvay, ikelay isthay: bcdfghjklmnpqrstvwxzBCDFGHJKLMNPQRSTVWXZay"
///assert_eq!(translate("What if a word has no vowels, like this: bcdfghjklmnpqrstvwxz"),
/// "Atwhay ifway away ordway ashay onay owelsvay, ikelay isthay: bcdfghjklmnpqrstvwxzay"
///);
///
///assert_eq!(translate("Cool, so the heuristics make pretty good guesses with what they're fed!"),
@ -73,8 +73,8 @@ pub fn translate_ascii(english: &str) -> String {
/// "Etlay's ytray omesay edgeway asescay. Atthay isway away ontractioncay, asway ellway asway away ordway erewhay ethay onlyway owelvay isway yway. Eatnay, allway atthay orksway!"
///);
///
///assert_eq!(translate_way("What if a word has no vowels, like this: bcdfghjklmnpqrstvwxzBCDFGHJKLMNPQRSTVWXZ"),
/// "Atwhay ifway away ordway ashay onay owelsvay, ikelay isthay: bcdfghjklmnpqrstvwxzBCDFGHJKLMNPQRSTVWXZay"
///assert_eq!(translate_way("What if a word has no vowels, like this: bcdfghjklmnpqrstvwxz"),
/// "Atwhay ifway away ordway ashay onay owelsvay, ikelay isthay: bcdfghjklmnpqrstvwxzay"
///);
///
///assert_eq!(translate_way("Cool, so the heuristics make pretty good guesses with what they're fed!"),
@ -109,8 +109,8 @@ pub fn translate_way_ascii(english: &str) -> String {
/// "Etlay's ytray omesay edgeyay asescay. Atthay isyay ayay ontractioncay, asyay ellway asyay ayay ordway erewhay ethay onlyyay owelvay isyay yyay. Eatnay, allyay atthay orksway!"
///);
///
///assert_eq!(translate_yay("What if a word has no vowels, like this: bcdfghjklmnpqrstvwxzBCDFGHJKLMNPQRSTVWXZ"),
/// "Atwhay ifyay ayay ordway ashay onay owelsvay, ikelay isthay: bcdfghjklmnpqrstvwxzBCDFGHJKLMNPQRSTVWXZay"
///assert_eq!(translate_yay("What if a word has no vowels, like this: bcdfghjklmnpqrstvwxz"),
/// "Atwhay ifyay ayay ordway ashay onay owelsvay, ikelay isthay: bcdfghjklmnpqrstvwxzay"
///);
///
///assert_eq!(translate_yay("Cool, so the heuristics make pretty good guesses with what they're fed!"),
@ -157,8 +157,8 @@ pub fn translate_hay_ascii(english: &str) -> String {
///assert_eq!(translate_ferb("Let's try some edge cases. That is a contraction, as well as a word where the only vowel is y. Neat, all that works!"),
/// "Etlerb's ytrerb omeserb edgeferb asescerb. Attherb isferb aferb ontractioncerb, asferb ellwerb asferb aferb ordwerb erewherb etherb onlyferb owelverb isferb yferb. Eatnerb, allferb attherb orkswerb!"
///);
///assert_eq!(translate_ferb("What if a word has no vowels, like this: bcdfghjklmnpqrstvwxzBCDFGHJKLMNPQRSTVWXZ"),
/// "Atwherb ifferb aferb ordwerb asherb onerb owelsverb, ikelerb istherb: bcdfghjklmnpqrstvwxzBCDFGHJKLMNPQRSTVWXZerb"
///assert_eq!(translate_ferb("What if a word has no vowels, like this: bcdfghjklmnpqrstvwxz"),
/// "Atwherb ifferb aferb ordwerb asherb onerb owelsverb, ikelerb istherb: bcdfghjklmnpqrstvwxzerb"
///);
///assert_eq!(translate_ferb("Cool, so the heuristics make pretty good guesses with what they're fed!"),
/// "Oolcerb, oserb etherb euristicsherb akemerb ettyprerb oodgerb uessesgerb ithwerb atwherb eytherb're edferb!"
@ -205,8 +205,8 @@ pub fn translate_ferb_ascii(english: &str) -> String {
/// special_case_suffix + " atth" + suffix + " orksw" + suffix + "!"
///);
///
///assert_eq!(translate_with_style("What if a word has no vowels, like this: bcdfghjklmnpqrstvwxzBCDFGHJKLMNPQRSTVWXZ", suffix, special_case_suffix),
/// "Atwh".to_string() + suffix + " if" + special_case_suffix + " a" + special_case_suffix + " ordw" + suffix + " ash" + suffix + " on" + suffix + " owelsv" + suffix + ", ikel" + suffix + " isth" + suffix + ": bcdfghjklmnpqrstvwxzBCDFGHJKLMNPQRSTVWXZ" + suffix
///assert_eq!(translate_with_style("What if a word has no vowels, like this: bcdfghjklmnpqrstvwxz", suffix, special_case_suffix),
/// "Atwh".to_string() + suffix + " if" + special_case_suffix + " a" + special_case_suffix + " ordw" + suffix + " ash" + suffix + " on" + suffix + " owelsv" + suffix + ", ikel" + suffix + " isth" + suffix + ": bcdfghjklmnpqrstvwxz" + suffix
///);
///
///assert_eq!(translate_with_style("Cool, so the heuristics make pretty good guesses with what they're fed!", suffix, special_case_suffix),
@ -505,8 +505,8 @@ mod tests {
special_case_suffix + " atth" + suffix + " orksw" + suffix + "!"
);
assert_eq!(translate_with_style_ascii("What if a word has no vowels, like this: bcdfghjklmnpqrstvwxzBCDFGHJKLMNPQRSTVWXZ", suffix, special_case_suffix),
"Atwh".to_string() + suffix + " if" + special_case_suffix + " a" + special_case_suffix + " ordw" + suffix + " ash" + suffix + " on" + suffix + " owelsv" + suffix + ", ikel" + suffix + " isth" + suffix + ": bcdfghjklmnpqrstvwxzBCDFGHJKLMNPQRSTVWXZ" + suffix
assert_eq!(translate_with_style_ascii("What if a word has no vowels, like this: bcdfghjklmnpqrstvwxz", suffix, special_case_suffix),
"Atwh".to_string() + suffix + " if" + special_case_suffix + " a" + special_case_suffix + " ordw" + suffix + " ash" + suffix + " on" + suffix + " owelsv" + suffix + ", ikel" + suffix + " isth" + suffix + ": bcdfghjklmnpqrstvwxz" + suffix
);
assert_eq!(translate_with_style_ascii("Cool, so the heuristics make pretty good guesses with what they're fed!", suffix, special_case_suffix),

@ -104,12 +104,88 @@ pub(crate) fn translate_word_with_style_reuse_buffers_ascii (
suffix_lower: &str, special_case_suffix_lower: &str, suffix_upper: &str, special_case_suffix_upper: &str,
buffer_to_append_to: &mut String, starting_consonants: &mut String
) {
//TODO make optimizations since we can assume the string is UTF8 safe
translate_word_with_style_reuse_buffers (
english_word,
suffix_lower, special_case_suffix_lower, suffix_upper, special_case_suffix_upper,
buffer_to_append_to, starting_consonants
);
if english_word.is_empty() {
return;
}
if english_word.len() == 1 {
buffer_to_append_to.push_str(english_word);
buffer_to_append_to.push_str(special_case_suffix_lower);
return;
}
//TODO more ascii optimizations
let mut iterator = english_word.chars().peekable();
//Check the first letter
let first_letter: char = iterator.next().unwrap();
//Check if the word is uppercase
let word_uppercase = word_is_uppercase_ascii(&english_word);
//As a herustic, we consider Y to be a vowel when it is not at the start of the word
//However, if any word is only one letter long, this takes priority and the word is treated like a vowel
let first_letter_was_vowel: bool = {
is_vowel(first_letter)//Not including y
|| if let Some(character) = iterator.peek() { !character.is_alphabetic() } else { true }//Non-alphabetic character after the first letter, or the word ends after the first letter
};
starting_consonants.truncate(0);
if first_letter_was_vowel {
buffer_to_append_to.push(first_letter);
} else {
let first_char_was_upper = first_letter.is_ascii_uppercase();
starting_consonants.push(if word_uppercase { first_letter } else { first_letter.to_ascii_lowercase() });
//Grab all of the starting consonants, and push the first vowel we enounter to buffer_to_append_to
loop {
match iterator.next() {
None => { break; },//The word has no vowels, but it is a herustic to pass it on so that ex. the acroynm binary code decimal or bcd becomes bcdway, etc.
Some(character) => {
if character.is_alphabetic() {
if is_vowel(character) || is_y(character) {//As a herustic, we consider Y to be a vowel when it is not at the start of the word
//The vowel is the first letter of the word; we want it match the capitalization of the first letter of the original word
if first_char_was_upper {
buffer_to_append_to.push(character.to_ascii_uppercase());
} else {
buffer_to_append_to.push(character.to_ascii_lowercase());
}
break;
} else {
starting_consonants.push(character);
}
} else {//The word ended without vowels or we met an apostrophe
break;//It is a herustic to pass it on so that ex. the letter y becomes yway, the word a becomes away, etc.
}
}
}
}
}
//Copy all of the remaining letters up to the end of the word
loop {
match iterator.next() {
None => { break; },//End of the word
Some(character) => { buffer_to_append_to.push(character); }
}
}
//Copy starting consonants and add the suffix, or add the special_case_suffix depending on the circumstances
if first_letter_was_vowel {
if word_uppercase {
buffer_to_append_to.push_str(special_case_suffix_upper);
} else {
buffer_to_append_to.push_str(special_case_suffix_lower);
}
} else {
buffer_to_append_to.push_str(&starting_consonants);
if word_uppercase {
buffer_to_append_to.push_str(suffix_upper);
} else {
buffer_to_append_to.push_str(suffix_lower);
}
}
}
/* Tests */
@ -135,6 +211,27 @@ fn translate_word_with_style(english_word: &str, suffix_lower: &str, special_cas
return pig_latin_word;
}
#[cfg(test)]
fn translate_word_with_style_ascii(english_word: &str, suffix_lower: &str, special_case_suffix_lower: &str) -> String {
let mut suffix_upper = String::with_capacity(suffix_lower.len());
for letter in suffix_lower.chars() {
suffix_upper.push(letter.to_ascii_uppercase());
}
let mut special_case_suffix_upper = String::with_capacity(special_case_suffix_lower.len());
for letter in special_case_suffix_lower.chars() {
special_case_suffix_upper.push(letter.to_ascii_uppercase());
}
let mut pig_latin_word = String::with_capacity(64 * 2);//Longer than all English words to avoid unneeded allocations, times 2 to leave room for whitespace, symbols, and the suffix
let mut starting_consonants_buffer = String::with_capacity(64);//Longer than basically all English words to avoid unneeded allocations, plus the fact that this isn't the whole word
translate_word_with_style_reuse_buffers_ascii (
english_word,
suffix_lower, special_case_suffix_lower, &suffix_upper, &special_case_suffix_upper,
&mut pig_latin_word, &mut starting_consonants_buffer
);
return pig_latin_word;
}
#[cfg(test)]
mod tests {
use super::*;
@ -176,6 +273,44 @@ mod tests {
assert_eq!(translate_word_with_style("nice", suffix, special_case_suffix), "icen".to_string() + suffix);
}
}
#[test]
fn test_translate_word_with_style_ascii() {
let suffix_special_case_suffix_pairs = [
("ay", "way"), ("ay", "yay"), ("ay", "hay"), ("erb", "ferb"), ("ancy", "fancy"), ("orange", "porange"), ("anana", "banana"), ("atin", "latin"), ("ust", "rust")
];
for pair in suffix_special_case_suffix_pairs {
let suffix = pair.0;
let special_case_suffix = pair.1;
assert_eq!(translate_word_with_style_ascii("Hello", suffix, special_case_suffix), "Elloh".to_string() + suffix);
assert_eq!(translate_word_with_style_ascii("World", suffix, special_case_suffix), "Orldw".to_string() + suffix);
assert_eq!(translate_word_with_style_ascii("This", suffix, special_case_suffix), "Isth".to_string() + suffix);
assert_eq!(translate_word_with_style_ascii("is", suffix, special_case_suffix), "is".to_string() + special_case_suffix);
assert_eq!(translate_word_with_style_ascii("a", suffix, special_case_suffix), "a".to_string() + special_case_suffix);
assert_eq!(translate_word_with_style_ascii("test", suffix, special_case_suffix), "estt".to_string() + suffix);
assert_eq!(translate_word_with_style_ascii("of", suffix, special_case_suffix), "of".to_string() + special_case_suffix);
assert_eq!(translate_word_with_style_ascii("the", suffix, special_case_suffix), "eth".to_string() + suffix);
assert_eq!(translate_word_with_style_ascii("function", suffix, special_case_suffix), "unctionf".to_string() + suffix);
assert_eq!(translate_word_with_style_ascii("translate", suffix, special_case_suffix), "anslatetr".to_string() + suffix);
assert_eq!(translate_word_with_style_ascii("word", suffix, special_case_suffix), "ordw".to_string() + suffix);
assert_eq!(translate_word_with_style_ascii("I", suffix, special_case_suffix), "I".to_string() + special_case_suffix);
assert_eq!(translate_word_with_style_ascii("Love", suffix, special_case_suffix), "Ovel".to_string() + suffix);
assert_eq!(translate_word_with_style_ascii("Pig", suffix, special_case_suffix), "Igp".to_string() + suffix);
assert_eq!(translate_word_with_style_ascii("Latin", suffix, special_case_suffix), "Atinl".to_string() + suffix);
assert_eq!(translate_word_with_style_ascii("You", suffix, special_case_suffix), "Ouy".to_string() + suffix);//Y isn't a vowel here
assert_eq!(translate_word_with_style_ascii("should", suffix, special_case_suffix), "ouldsh".to_string() + suffix);
assert_eq!(translate_word_with_style_ascii("try", suffix, special_case_suffix), "ytr".to_string() + suffix);//Y is a vowel here
assert_eq!(translate_word_with_style_ascii("yougurt", suffix, special_case_suffix), "ougurty".to_string() + suffix);//Y isn't a vowel here
//assert_eq!(translate_word_with_style_ascii("it's", suffix, special_case_suffix), "it".to_string() + special_case_suffix + "'s");//Contraction
assert_eq!(translate_word_with_style_ascii("quite", suffix, special_case_suffix), "uiteq".to_string() + suffix);//Awful to pronounce, but correct
assert_eq!(translate_word_with_style_ascii("nice", suffix, special_case_suffix), "icen".to_string() + suffix);
}
}
}
/* Benches */

Loading…
Cancel
Save