From 0179a99a32505e2ba4ab3da51bed31f62b94d0c9 Mon Sep 17 00:00:00 2001 From: John Zacarias Jekel Date: Wed, 14 Dec 2022 11:54:57 -0500 Subject: [PATCH] Optimizations and cleanup --- src/byte_string.rs | 676 +-------------------------------------------- 1 file changed, 14 insertions(+), 662 deletions(-) diff --git a/src/byte_string.rs b/src/byte_string.rs index a92dbd5..1dd836a 100644 --- a/src/byte_string.rs +++ b/src/byte_string.rs @@ -381,37 +381,25 @@ pub(crate) fn translate_with_style_lower_and_upper_suffixes ( let first_letter = english[word_start_index]; global_index += 1; - if (word_start_index + 1) == english.len() {//The word is only one letter long (special case) - //Push the letter and add the lowercase special suffix (even if the letter is uppercase) - pig_latin_string.push(first_letter); - pig_latin_string.extend_from_slice(special_case_suffix_lower); - return;//The entire string is ending, so exit the function - } else if !english[word_start_index + 1].is_ascii_alphabetic() {//The next character exists but is not a letter (special case) + if (global_index == english.len()) || (!english[global_index].is_ascii_alphabetic()) {//The word is only one letter long (special case) //Push the letter and add the lowercase special suffix (even if the letter is uppercase) pig_latin_string.push(first_letter); pig_latin_string.extend_from_slice(special_case_suffix_lower); } else if is_vowel(first_letter) {//The word is longer than a letter and starts with a vowel (special case) //As a heuristic, we consider Y to be a vowel when it is not at the start of the word - //TODO better code reuse - - //Find the end of the word - let word_end_index: usize; + //Get the slice containing the whole word let slice_to_search_for_end = &english[global_index..]; + let word_slice: &[u8]; if let Some(found_end_of_word_index) = slice_to_search_for_end.iter().position(|&x| !x.is_ascii_alphabetic()) {//We found a non-letter that ends the word global_index += found_end_of_word_index; - word_end_index = global_index; + word_slice = &english[word_start_index..global_index]; } else {//The string ended - pig_latin_string.extend_from_slice(slice_to_search_for_end); - if fast_is_ascii_uppercase(english[word_start_index + 1]) {//As a heuristic, we consider the word to be uppercase if the second letter is - pig_latin_string.extend_from_slice(special_case_suffix_upper); - } else {//Word is entirely lowercase, or its first letter is uppercase only - pig_latin_string.extend_from_slice(special_case_suffix_lower); - } - return; + global_index = english.len(); + word_slice = slice_to_search_for_end; } - let word_slice = &english[word_start_index..word_end_index]; + //Translate the word and push it pig_latin_string.extend_from_slice(word_slice); if fast_is_ascii_uppercase(english[word_start_index + 1]) {//As a heuristic, we consider the word to be uppercase if the second letter is pig_latin_string.extend_from_slice(special_case_suffix_upper); @@ -419,43 +407,25 @@ pub(crate) fn translate_with_style_lower_and_upper_suffixes ( pig_latin_string.extend_from_slice(special_case_suffix_lower); } } else {//The word is longer than a letter and doesn't start with a vowel - //Find the first vowel + //Find the first vowel; we assume the word actually has a vowel in it let first_vowel_index: usize; let slice_to_search_for_vowel = &english[global_index..]; - /*if let Some(first_vowel_of_word_index) = slice_to_search_for_vowel.iter().position(|&x| { !x.is_ascii_alphabetic() || is_vowel(x) || is_y(x) }) {//As a heuristic, we consider Y to be a vowel when it is not at the start of the word - if !english[first_vowel_of_word_index].is_ascii_alphabetic() {//The word ended and we never found a vowel - //We used to still check for a contraction suffix after this, but all valid words should have a vowel, - //including contractions. Really anything goes at this point, and this is the fastest and easiest thing to do - let word_slice = &english[word_start_index..(global_index + first_vowel_of_word_index)]; - pig_latin_string.extend_from_slice(word_slice); - pig_latin_string.extend_from_slice(suffix_lower); - continue; - } - global_index += first_vowel_of_word_index; - first_vowel_index = global_index; - } else {//This string ended and we never found a vowel - let word_slice = &english[word_start_index..]; - pig_latin_string.extend_from_slice(word_slice); - pig_latin_string.extend_from_slice(suffix_lower); - return; - }*/ if let Some(first_vowel_of_word_index) = slice_to_search_for_vowel.iter().position(|&x| { is_vowel(x) || is_y(x) }) {//As a heuristic, we consider Y to be a vowel when it is not at the start of the word global_index += first_vowel_of_word_index; - first_vowel_index = global_index; } else {//This string ended and we never found a vowel return;//Just give up } + first_vowel_index = global_index; //Find the end of the word let word_end_index: usize; let slice_to_search_for_end = &english[global_index..]; if let Some(end_of_word_index) = slice_to_search_for_end.iter().position(|&x| !x.is_ascii_alphabetic()) {//We found a non-letter that ends the word global_index += end_of_word_index; - word_end_index = global_index; } else {//The string ended global_index = english.len(); - word_end_index = global_index; } + word_end_index = global_index; //Translate the word //TODO improve code reuse here @@ -501,462 +471,20 @@ pub(crate) fn translate_with_style_lower_and_upper_suffixes ( //Push the normal suffix (lowercase) pig_latin_string.extend_from_slice(suffix_lower); } - - //Don't go on if we reached the end of the string during the word - if word_end_index == english.len() { - return; - } - } - - //At this point, global_index contains the index to the next character to check - } - - //Copies contraction suffixes, if present - if english[global_index] == b'\'' {//TODO if this is true we can also skip the regular inter-word loop on the next iteration - let mut start_of_contraction_suffix_index: usize = global_index;//Inclusive - global_index += 1; - loop { - if global_index == english.len() { - //Copy all of the characters so far (all that remain) and return - let remaining_characters_slice = &english[start_of_contraction_suffix_index..]; - pig_latin_string.extend_from_slice(remaining_characters_slice); - return; - } - - if !english[global_index].is_ascii_alphabetic() {//End of the contraction suffix - break; - } - - global_index += 1; - } - //Copy the contraction suffix as-is - let contraction_suffix_slice = &english[start_of_contraction_suffix_index..global_index]; - pig_latin_string.extend_from_slice(contraction_suffix_slice); - } - } -} - -//Avoids the overhead of having to convert suffixes to uppercase for the standard translation functions at runtime -pub(crate) fn translate_with_style_lower_and_upper_suffixes_old_inprogress ( - english: &[u8], - suffix_lower: &[u8], special_case_suffix_lower: &[u8], suffix_upper: &[u8], special_case_suffix_upper: &[u8], - pig_latin_string: &mut Vec:: -) { - if english.is_empty() { - return; - } - - let mut global_index: usize = 0; - loop { - //Copies characters in-between words - //TODO this could probably be optimized with vector instructions - { - //Fastest so far :) - let mut start_of_in_between_words_index: usize = global_index;//Inclusive - loop { - if english[global_index].is_ascii_alphabetic() {//Start of a word - break; - } - - global_index += 1; - if global_index == english.len() { - //Copy all of the characters so far (all that remain) and return - let remaining_characters_slice = &english[start_of_in_between_words_index..]; - pig_latin_string.extend_from_slice(remaining_characters_slice); - return; - } - } - //Copy the characters in-between words as-is - let in_between_words_characters_slice = &english[start_of_in_between_words_index..global_index]; - pig_latin_string.extend_from_slice(in_between_words_characters_slice); - - //The speed of this is in-between - /* - let mut start_of_in_between_words_index: usize = global_index;//Inclusive - loop { - if english[global_index].is_ascii_alphabetic() {//Start of a word - break; - } - - pig_latin_string.push(english[global_index]); - global_index += 1; - if global_index == english.len() { - return; - } - } - */ - - //This is the slowest - /*let slice_to_search = &english[global_index..]; - if let Some(rel_letter_index) = slice_to_search.iter().position(|&character| character.is_ascii_alphabetic()) { - let abs_letter_index = global_index + rel_letter_index; - let slice_to_copy = &english[global_index..abs_letter_index]; - pig_latin_string.extend_from_slice(slice_to_copy); - global_index = abs_letter_index; - } else { - pig_latin_string.extend_from_slice(slice_to_search); - return; - } - */ - } - - //Translates the current word and pushes the result - { - - //TESTING just call the original function for now - /* - let mut word_start_index: usize = global_index;//Inclusive - { - while global_index < english.len() { - if !english[global_index].is_ascii_alphabetic() { - break; - } - - global_index += 1; - } - let word_slice: &[u8] = &english[word_start_index..global_index]; - translate_word_with_style_reuse_buffers ( - word_slice, - suffix_lower, special_case_suffix_lower, suffix_upper, special_case_suffix_upper, - pig_latin_string - ); - if global_index == english.len() { return; } - }*/ - - //New - let word_start_index = global_index; - let first_letter = english[word_start_index]; - global_index += 1; - - if (word_start_index + 1) == english.len() {//The word is only one letter long (special case) - //Push the letter and add the lowercase special suffix (even if the letter is uppercase) - pig_latin_string.push(first_letter); - pig_latin_string.extend_from_slice(special_case_suffix_lower); - return;//The entire string is ending, so exit the function - } else if !english[word_start_index + 1].is_ascii_alphabetic() {//The next character exists but is not a letter (special case) - //Push the letter and add the lowercase special suffix (even if the letter is uppercase) - pig_latin_string.push(first_letter); - pig_latin_string.extend_from_slice(special_case_suffix_lower); - } else if is_vowel(first_letter) {//The word is longer than a letter and starts with a vowel (special case) - //As a heuristic, we consider Y to be a vowel when it is not at the start of the word - - //TODO better code reuse - - //Find the end of the word - let word_end_index: usize; - let slice_to_search_for_end = &english[global_index..]; - if let Some(found_end_of_word_index) = slice_to_search_for_end.iter().position(|&x| !x.is_ascii_alphabetic()) {//We found a non-letter that ends the word - global_index += found_end_of_word_index; - word_end_index = global_index; - } else {//The string ended - pig_latin_string.extend_from_slice(slice_to_search_for_end); - if fast_is_ascii_uppercase(english[word_start_index + 1]) {//As a heuristic, we consider the word to be uppercase if the second letter is - pig_latin_string.extend_from_slice(special_case_suffix_upper); - } else {//Word is entirely lowercase, or its first letter is uppercase only - pig_latin_string.extend_from_slice(special_case_suffix_lower); - } - return; - } - - let word_slice = &english[word_start_index..word_end_index]; - pig_latin_string.extend_from_slice(word_slice); - if fast_is_ascii_uppercase(english[word_start_index + 1]) {//As a heuristic, we consider the word to be uppercase if the second letter is - pig_latin_string.extend_from_slice(special_case_suffix_upper); - } else {//Word is entirely lowercase, or its first letter is uppercase only - pig_latin_string.extend_from_slice(special_case_suffix_lower); - } - } else {//The word is longer than a letter and doesn't start with a vowel - //Find the first vowel's index - /*loop { - if is_vowel(english[global_index]) || is_y(english[global_index]) {//As a heuristic, we consider Y to be a vowel when it is not at the start of the word - break; - } - - global_index += 1; - if global_index == english.len() { - return;//todo!();//TODO//No vowels in the word - } - } - let first_vowel_index: usize = global_index; - */ - - - //FIXME what if the word has no vowels, we go into the next word, and then enounter a vowel? - let first_vowel_index: usize; - let slice_to_search_for_vowel = &english[global_index..]; - if let Some(first_vowel_of_word_index) = slice_to_search_for_vowel.iter().position(|&x| { is_vowel(x) || is_y(x) || !x.is_ascii_alphabetic() }) {//As a heuristic, we consider Y to be a vowel when it is not at the start of the word - if english[first_vowel_of_word_index].is_ascii_alphabetic() { - global_index += first_vowel_of_word_index; - first_vowel_index = global_index; - - let word_end_index: usize; - let slice_to_search_for_end = &english[global_index..]; - if let Some(end_of_word_index) = slice_to_search_for_end.iter().position(|&x| !x.is_ascii_alphabetic()) {//We found a non-letter that ends the word - global_index += end_of_word_index; - word_end_index = global_index; - } else {//The string ended - global_index = english.len(); - word_end_index = global_index; - } - - //TODO improve code reuse here - if fast_is_ascii_uppercase(first_letter) {//Check if the first letter is uppercase - if fast_is_ascii_uppercase(english[word_start_index + 1]) {//As a heuristic, we consider the word to be uppercase if the second letter is - //Push the vowel and all letters after it - let vowel_to_end_slice = &english[first_vowel_index..word_end_index]; - pig_latin_string.extend_from_slice(vowel_to_end_slice); - - //Push the starting consonants - let start_to_vowel_slice = &english[word_start_index..first_vowel_index]; - pig_latin_string.extend_from_slice(start_to_vowel_slice); - - //Push the normal suffix (uppercase) - pig_latin_string.extend_from_slice(suffix_upper); - } else {//Word starts with an uppercase letter, but is otherwise lowercase - //Push the vowel, matching the starting case of the original word - pig_latin_string.push(fast_to_ascii_uppercase(english[first_vowel_index])); - - //Push all letters after the vowel - let after_vowel_slice = &english[(first_vowel_index + 1)..word_end_index]; - pig_latin_string.extend_from_slice(after_vowel_slice); - - //Push the first starting consonant, which should be lowercase now - pig_latin_string.push(fast_to_ascii_lowercase(english[word_start_index])); - - //Push the remaining starting consonants - let after_start_to_vowel_slice = &english[(word_start_index + 1)..first_vowel_index]; - pig_latin_string.extend_from_slice(after_start_to_vowel_slice); - - //Push the normal suffix - pig_latin_string.extend_from_slice(suffix_lower); - } - } else {//Word is entirely lowercase - //Push the vowel and all letters after it - let vowel_to_end_slice = &english[first_vowel_index..word_end_index]; - pig_latin_string.extend_from_slice(vowel_to_end_slice); - - //Push the starting consonants - let start_to_vowel_slice = &english[word_start_index..first_vowel_index]; - pig_latin_string.extend_from_slice(start_to_vowel_slice); - - //Push the normal suffix (lowercase) - pig_latin_string.extend_from_slice(suffix_lower); - } - } else { - let word_slice = &english[word_start_index..(global_index + first_vowel_of_word_index)]; - pig_latin_string.extend_from_slice(word_slice); - pig_latin_string.extend_from_slice(suffix_lower); - //todo!();//We should skip the next section - } - } else {//This word has no vowels - /*if let Some(end_of_word_index) = slice_to_search_for_vowel.iter().position(|&x| !x.is_ascii_alphabetic()) {//We found a non-letter that ends the word - let word_slice = &english[word_start_index..(global_index + end_of_word_index)]; - pig_latin_string.extend_from_slice(word_slice); - pig_latin_string.extend_from_slice(suffix_lower); - todo!();//We should skip the next section - } else {//The string ended - /*let word_slice = &english[word_start_index..]; - pig_latin_string.extend_from_slice(word_slice); - pig_latin_string.extend_from_slice(suffix_lower); - return; - */ - todo!(); - } - */ - //return; - let word_slice = &english[word_start_index..]; - pig_latin_string.extend_from_slice(word_slice); - pig_latin_string.extend_from_slice(suffix_lower); - return; - } - /*let first_vowel_index: usize; - let slice_to_search_for_vowel = &english[global_index..]; - if let Some(first_vowel_of_word_index) = slice_to_search_for_vowel.iter().position(|&x| { is_vowel(x) || is_y(x) }) {//As a heuristic, we consider Y to be a vowel when it is not at the start of the word - global_index += first_vowel_of_word_index; - first_vowel_index = global_index; - } else {//This word has no vowels - if let Some(end_of_word_index) = slice_to_search_for_vowel.iter().position(|&x| !x.is_ascii_alphabetic()) {//We found a non-letter that ends the word - let word_slice = &english[word_start_index..(global_index + end_of_word_index)]; - pig_latin_string.extend_from_slice(word_slice); - pig_latin_string.extend_from_slice(suffix_lower); - todo!();//We should skip the next section - } else {//The string ended - /*let word_slice = &english[word_start_index..]; - pig_latin_string.extend_from_slice(word_slice); - pig_latin_string.extend_from_slice(suffix_lower); - return; - */ - todo!(); - } - //return; - } - */ - - //Find the end of the word - /*loop { - if !english[global_index].is_ascii_alphabetic() { - break; - } - - global_index += 1; - if global_index == english.len() { - return;//todo!();//TODO//Word ended - } - } - let word_end_index: usize = global_index; - */ - /* - let word_end_index: usize; - let slice_to_search_for_end = &english[global_index..]; - if let Some(end_of_word_index) = slice_to_search_for_end.iter().position(|&x| !x.is_ascii_alphabetic()) {//We found a non-letter that ends the word - global_index += end_of_word_index; - word_end_index = global_index; - } else {//The string ended - global_index = english.len(); - word_end_index = global_index; - } - - //TODO improve code reuse here - if fast_is_ascii_uppercase(first_letter) {//Check if the first letter is uppercase - if fast_is_ascii_uppercase(english[word_start_index + 1]) {//As a heuristic, we consider the word to be uppercase if the second letter is - //Push the vowel and all letters after it - let vowel_to_end_slice = &english[first_vowel_index..word_end_index]; - pig_latin_string.extend_from_slice(vowel_to_end_slice); - - //Push the starting consonants - let start_to_vowel_slice = &english[word_start_index..first_vowel_index]; - pig_latin_string.extend_from_slice(start_to_vowel_slice); - - //Push the normal suffix (uppercase) - pig_latin_string.extend_from_slice(suffix_upper); - } else {//Word starts with an uppercase letter, but is otherwise lowercase - //Push the vowel, matching the starting case of the original word - pig_latin_string.push(fast_to_ascii_uppercase(english[first_vowel_index])); - - //Push all letters after the vowel - let after_vowel_slice = &english[(first_vowel_index + 1)..word_end_index]; - pig_latin_string.extend_from_slice(after_vowel_slice); - - //Push the first starting consonant, which should be lowercase now - pig_latin_string.push(fast_to_ascii_lowercase(english[word_start_index])); - - //Push the remaining starting consonants - let after_start_to_vowel_slice = &english[(word_start_index + 1)..first_vowel_index]; - pig_latin_string.extend_from_slice(after_start_to_vowel_slice); - - //Push the normal suffix - pig_latin_string.extend_from_slice(suffix_lower); - } - } else {//Word is entirely lowercase - //Push the vowel and all letters after it - let vowel_to_end_slice = &english[first_vowel_index..word_end_index]; - pig_latin_string.extend_from_slice(vowel_to_end_slice); - - //Push the starting consonants - let start_to_vowel_slice = &english[word_start_index..first_vowel_index]; - pig_latin_string.extend_from_slice(start_to_vowel_slice); - - //Push the normal suffix (lowercase) - pig_latin_string.extend_from_slice(suffix_lower); - } - */ } - //OLD don't use - /* - if (global_index + 1) == english.len() {//The word is only one letter long (special case) - //Push the letter and add the lowercase special suffix (even if the letter is uppercase) - pig_latin_string.push(first_letter); - pig_latin_string.extend_from_slice(special_case_suffix_lower); + //Don't go on if we reached the end of the string during the word + if global_index == english.len() { return; } - let second_character = english[global_index + 1]; - if !second_character.is_ascii_alphabetic() {//The word is only one letter long (special case) - //Push the letter and add the lowercase special suffix (even if the letter is uppercase) - pig_latin_string.push(first_letter); - pig_latin_string.extend_from_slice(special_case_suffix_lower); - global_index += 1; - } else {//The word is more than one letter long - let mut word_start_index: usize = global_index;//Inclusive - global_index += 1; - //TODO case if word starts with vowel - if is_vowel(first_letter) {//As a herustic, we consider Y to be a vowel when it is not at the start of the word - //Copy all remaining letters in the word and append the special suffix - //TODO what about uppercase words? - /*loop { - if global_index == english.len() { - //Copy all of the characters so far (all that remain) and return - let remaining_characters_slice = &english[start_of_contraction_suffix_index..]; - pig_latin_string.extend_from_slice(remaining_characters_slice); - return; - } - - if !english[global_index].is_ascii_alphabetic() {//End of the contraction suffix - break; - } - - global_index += 1; - } - */ - } else { - //Find the first vowel's index - loop { - if is_vowel(english[global_index]) || is_y(english[global_index]) {//As a herustic, we consider Y to be a vowel when it is not at the start of the word - break; - } - - global_index += 1; - if global_index == english.len() { - todo!();//No vowels in the word - } - } - let first_vowel_index: usize = global_index; - - //Find the end of the word - loop { - if !english[global_index].is_ascii_alphabetic() { - break; - } - - global_index += 1; - if global_index == english.len() { - todo!();//Word ended - } - } - let word_end_index: usize = global_index; - - //Translate the word - //Push the vowel and all letters after it - let vowel_to_end_slice = &english[first_vowel_index..word_end_index]; - pig_latin_string.extend_from_slice(vowel_to_end_slice); - - //Push the starting consonants - let start_to_vowel_slice = &english[word_start_index..first_vowel_index]; - pig_latin_string.extend_from_slice(start_to_vowel_slice); - - //Push the normal suffix - pig_latin_string.extend_from_slice(suffix_lower); - }*/ - /*let mut word_start_index: usize = global_index;//Inclusive - loop { - - } - */ - //} - /**/ - //let mut first_vowel_index: usize = 0xDEADBEEF;//Also exclusive end of starting consonants - /*loop { - break;//TODO - }*/ - /**/ - //TODO Wrap-up this section here + //At this point, global_index contains the index to the next character to check } //Copies contraction suffixes, if present if english[global_index] == b'\'' {//TODO if this is true we can also skip the regular inter-word loop on the next iteration let mut start_of_contraction_suffix_index: usize = global_index;//Inclusive - global_index += 1; + global_index += 1;//We skip over the apostrophe for the loop below, but we still want to copy it in the end loop { if global_index == english.len() { //Copy all of the characters so far (all that remain) and return @@ -978,182 +506,6 @@ pub(crate) fn translate_with_style_lower_and_upper_suffixes_old_inprogress ( } } -//Avoids the overhead of having to convert suffixes to uppercase for the standard translation functions at runtime -pub(crate) fn translate_with_style_lower_and_upper_suffixes_abandoned ( - english: &[u8], - suffix_lower: &[u8], special_case_suffix_lower: &[u8], suffix_upper: &[u8], special_case_suffix_upper: &[u8], - pig_latin_string: &mut Vec:: -) { - if english.is_empty() { - return; - } - - #[derive(Debug, Clone, Copy)] - enum State { - InBetweenWords, - InFirstLetterOfWord, - InRegularWord, - FinishingRegularWord, - InWordStartingWithVowel, - InContractionSuffix - }; - - let mut current_state: State = State::InBetweenWords; - let mut word_start_index: usize = 0xDEADBEEF;//Inclusive - let mut first_vowel_index: usize = 0xDEADBEEF;//Also exclusive end of starting consonants - let mut start_of_in_between_words_index: usize = 0;//Inclusive; Also exclusive end of word - - for i in 0..english.len() { - //println!("{:?}", current_state); - match current_state {//FIXME this is a bottleneck; instead have two nested infinite loops within another infinite loops to reduce the size of the match - State::InBetweenWords => { - if english[i].is_ascii_alphabetic() {//Start of a word - //This is the start of the word, so copy all non-word characters up to this point since the last word - let in_between_words_characters_slice = &english[start_of_in_between_words_index..i]; - pig_latin_string.extend_from_slice(in_between_words_characters_slice); - - //Setup things to begin processing the word - word_start_index = i; - current_state = State::InFirstLetterOfWord; - } - }, - State::InFirstLetterOfWord => { - if english[i].is_ascii_alphabetic() {//This word is more than one letter - if is_vowel(english[word_start_index]) {//As a herustic, we consider Y to be a vowel when it is not at the start of the word - current_state = State::InWordStartingWithVowel; - } else { - if is_vowel(english[i]) || is_y(english[i]) {//As a herustic, we consider Y to be a vowel when it is not at the start of the word - first_vowel_index = i; - current_state = State::FinishingRegularWord; - } else { - current_state = State::InRegularWord; - } - } - } else {//This is a one-letter word (special case) - //Push the letter and add the lowercase special suffix (even if the letter is uppercase) - pig_latin_string.push(english[word_start_index]); - pig_latin_string.extend_from_slice(special_case_suffix_lower); - - //Decide the next state - if english[i] == b'\'' { - current_state = State::InContractionSuffix; - } else { - current_state = State::InBetweenWords; - } - start_of_in_between_words_index = i; - } - }, - State::InRegularWord => { - if is_vowel(english[i]) || is_y(english[i]) {//As a herustic, we consider Y to be a vowel when it is not at the start of the word - first_vowel_index = i; - current_state = State::FinishingRegularWord; - } - //TODO handle the case where the word ends here before a vowel is encountered - }, - State::FinishingRegularWord => { - if !english[i].is_ascii_alphabetic() {//End of word - //We now need to actually translate the word - if fast_is_ascii_uppercase(english[word_start_index]) {//The first letter of the word was uppercase - if fast_is_ascii_uppercase(english[first_vowel_index]) {//Heuristic: Assume the word was uppercase if the first vowel is - //TODO - } else { - //Push the vowel, matching the starting case of the original word - pig_latin_string.push(fast_to_ascii_uppercase(english[first_vowel_index])); - - //Push all letters after the vowel - let after_vowel_slice = &english[(first_vowel_index + 1)..i]; - pig_latin_string.extend_from_slice(after_vowel_slice); - - //Push the first starting consonant, which should be lowercase now - pig_latin_string.push(fast_to_ascii_lowercase(english[word_start_index])); - - //Push the remaining starting consonants - let after_start_to_vowel_slice = &english[(word_start_index + 1)..first_vowel_index]; - pig_latin_string.extend_from_slice(after_start_to_vowel_slice); - - //Push the normal suffix - pig_latin_string.extend_from_slice(suffix_lower); - } - } else {//The first letter of the word was lowercase - //Push the vowel and all letters after it - let vowel_to_end_slice = &english[first_vowel_index..i]; - pig_latin_string.extend_from_slice(vowel_to_end_slice); - - //Push the starting consonants - let start_to_vowel_slice = &english[word_start_index..first_vowel_index]; - pig_latin_string.extend_from_slice(start_to_vowel_slice); - - //Push the normal suffix - pig_latin_string.extend_from_slice(suffix_lower); - } - - //Decide the next state - if english[i] == b'\'' { - current_state = State::InContractionSuffix; - } else { - current_state = State::InBetweenWords; - } - start_of_in_between_words_index = i; - } - } - State::InWordStartingWithVowel => { - if !english[i].is_ascii_alphabetic() {//End of word - //We now need to actually translate the word - //TODO handle uppercase - let word_slice = &english[word_start_index..i]; - pig_latin_string.extend_from_slice(word_slice); - pig_latin_string.extend_from_slice(special_case_suffix_lower); - - //Decide the next state - if english[i] == b'\'' { - current_state = State::InContractionSuffix; - } else { - current_state = State::InBetweenWords; - } - start_of_in_between_words_index = i; - } - }, - State::InContractionSuffix => { - if !english[i].is_ascii_alphabetic() {//End of contraction suffix - //Push the contraction suffix - let contraction_suffix_slice = &english[start_of_in_between_words_index..i]; - pig_latin_string.extend_from_slice(contraction_suffix_slice); - - //We're back in-between words - start_of_in_between_words_index = i; - current_state = State::InBetweenWords; - } - }, - } - } - - //Wrap-up based on the state we ended the loop in - match current_state { - State::InBetweenWords => { - //Copy remaining characters - let remaining_characters_slice = &english[start_of_in_between_words_index..]; - pig_latin_string.extend_from_slice(remaining_characters_slice); - }, - State::InFirstLetterOfWord => { - //TODO - }, - State::InRegularWord => { - //TODO - }, - State::FinishingRegularWord => { - //TODO - } - State::InWordStartingWithVowel => { - //TODO - }, - State::InContractionSuffix => { - //Push the contraction suffix - let contraction_suffix_slice = &english[start_of_in_between_words_index..]; - pig_latin_string.extend_from_slice(contraction_suffix_slice); - }, - } -} - //Avoids the overhead of having to convert suffixes to uppercase for the standard translation functions at runtime pub(crate) fn translate_with_style_lower_and_upper_suffixes_old ( english: &[u8],