Optimizations and cleanup

main
John Zacarias Jekel 1 year ago
parent 1dabce5224
commit 0179a99a32
  1. 676
      src/byte_string.rs

@ -381,37 +381,25 @@ pub(crate) fn translate_with_style_lower_and_upper_suffixes (
let first_letter = english[word_start_index];
global_index += 1;
if (word_start_index + 1) == english.len() {//The word is only one letter long (special case)
//Push the letter and add the lowercase special suffix (even if the letter is uppercase)
pig_latin_string.push(first_letter);
pig_latin_string.extend_from_slice(special_case_suffix_lower);
return;//The entire string is ending, so exit the function
} else if !english[word_start_index + 1].is_ascii_alphabetic() {//The next character exists but is not a letter (special case)
if (global_index == english.len()) || (!english[global_index].is_ascii_alphabetic()) {//The word is only one letter long (special case)
//Push the letter and add the lowercase special suffix (even if the letter is uppercase)
pig_latin_string.push(first_letter);
pig_latin_string.extend_from_slice(special_case_suffix_lower);
} else if is_vowel(first_letter) {//The word is longer than a letter and starts with a vowel (special case)
//As a heuristic, we consider Y to be a vowel when it is not at the start of the word
//TODO better code reuse
//Find the end of the word
let word_end_index: usize;
//Get the slice containing the whole word
let slice_to_search_for_end = &english[global_index..];
let word_slice: &[u8];
if let Some(found_end_of_word_index) = slice_to_search_for_end.iter().position(|&x| !x.is_ascii_alphabetic()) {//We found a non-letter that ends the word
global_index += found_end_of_word_index;
word_end_index = global_index;
word_slice = &english[word_start_index..global_index];
} else {//The string ended
pig_latin_string.extend_from_slice(slice_to_search_for_end);
if fast_is_ascii_uppercase(english[word_start_index + 1]) {//As a heuristic, we consider the word to be uppercase if the second letter is
pig_latin_string.extend_from_slice(special_case_suffix_upper);
} else {//Word is entirely lowercase, or its first letter is uppercase only
pig_latin_string.extend_from_slice(special_case_suffix_lower);
}
return;
global_index = english.len();
word_slice = slice_to_search_for_end;
}
let word_slice = &english[word_start_index..word_end_index];
//Translate the word and push it
pig_latin_string.extend_from_slice(word_slice);
if fast_is_ascii_uppercase(english[word_start_index + 1]) {//As a heuristic, we consider the word to be uppercase if the second letter is
pig_latin_string.extend_from_slice(special_case_suffix_upper);
@ -419,43 +407,25 @@ pub(crate) fn translate_with_style_lower_and_upper_suffixes (
pig_latin_string.extend_from_slice(special_case_suffix_lower);
}
} else {//The word is longer than a letter and doesn't start with a vowel
//Find the first vowel
//Find the first vowel; we assume the word actually has a vowel in it
let first_vowel_index: usize;
let slice_to_search_for_vowel = &english[global_index..];
/*if let Some(first_vowel_of_word_index) = slice_to_search_for_vowel.iter().position(|&x| { !x.is_ascii_alphabetic() || is_vowel(x) || is_y(x) }) {//As a heuristic, we consider Y to be a vowel when it is not at the start of the word
if !english[first_vowel_of_word_index].is_ascii_alphabetic() {//The word ended and we never found a vowel
//We used to still check for a contraction suffix after this, but all valid words should have a vowel,
//including contractions. Really anything goes at this point, and this is the fastest and easiest thing to do
let word_slice = &english[word_start_index..(global_index + first_vowel_of_word_index)];
pig_latin_string.extend_from_slice(word_slice);
pig_latin_string.extend_from_slice(suffix_lower);
continue;
}
global_index += first_vowel_of_word_index;
first_vowel_index = global_index;
} else {//This string ended and we never found a vowel
let word_slice = &english[word_start_index..];
pig_latin_string.extend_from_slice(word_slice);
pig_latin_string.extend_from_slice(suffix_lower);
return;
}*/
if let Some(first_vowel_of_word_index) = slice_to_search_for_vowel.iter().position(|&x| { is_vowel(x) || is_y(x) }) {//As a heuristic, we consider Y to be a vowel when it is not at the start of the word
global_index += first_vowel_of_word_index;
first_vowel_index = global_index;
} else {//This string ended and we never found a vowel
return;//Just give up
}
first_vowel_index = global_index;
//Find the end of the word
let word_end_index: usize;
let slice_to_search_for_end = &english[global_index..];
if let Some(end_of_word_index) = slice_to_search_for_end.iter().position(|&x| !x.is_ascii_alphabetic()) {//We found a non-letter that ends the word
global_index += end_of_word_index;
word_end_index = global_index;
} else {//The string ended
global_index = english.len();
word_end_index = global_index;
}
word_end_index = global_index;
//Translate the word
//TODO improve code reuse here
@ -501,462 +471,20 @@ pub(crate) fn translate_with_style_lower_and_upper_suffixes (
//Push the normal suffix (lowercase)
pig_latin_string.extend_from_slice(suffix_lower);
}
//Don't go on if we reached the end of the string during the word
if word_end_index == english.len() {
return;
}
}
//At this point, global_index contains the index to the next character to check
}
//Copies contraction suffixes, if present
if english[global_index] == b'\'' {//TODO if this is true we can also skip the regular inter-word loop on the next iteration
let mut start_of_contraction_suffix_index: usize = global_index;//Inclusive
global_index += 1;
loop {
if global_index == english.len() {
//Copy all of the characters so far (all that remain) and return
let remaining_characters_slice = &english[start_of_contraction_suffix_index..];
pig_latin_string.extend_from_slice(remaining_characters_slice);
return;
}
if !english[global_index].is_ascii_alphabetic() {//End of the contraction suffix
break;
}
global_index += 1;
}
//Copy the contraction suffix as-is
let contraction_suffix_slice = &english[start_of_contraction_suffix_index..global_index];
pig_latin_string.extend_from_slice(contraction_suffix_slice);
}
}
}
//Avoids the overhead of having to convert suffixes to uppercase for the standard translation functions at runtime
pub(crate) fn translate_with_style_lower_and_upper_suffixes_old_inprogress (
english: &[u8],
suffix_lower: &[u8], special_case_suffix_lower: &[u8], suffix_upper: &[u8], special_case_suffix_upper: &[u8],
pig_latin_string: &mut Vec::<u8>
) {
if english.is_empty() {
return;
}
let mut global_index: usize = 0;
loop {
//Copies characters in-between words
//TODO this could probably be optimized with vector instructions
{
//Fastest so far :)
let mut start_of_in_between_words_index: usize = global_index;//Inclusive
loop {
if english[global_index].is_ascii_alphabetic() {//Start of a word
break;
}
global_index += 1;
if global_index == english.len() {
//Copy all of the characters so far (all that remain) and return
let remaining_characters_slice = &english[start_of_in_between_words_index..];
pig_latin_string.extend_from_slice(remaining_characters_slice);
return;
}
}
//Copy the characters in-between words as-is
let in_between_words_characters_slice = &english[start_of_in_between_words_index..global_index];
pig_latin_string.extend_from_slice(in_between_words_characters_slice);
//The speed of this is in-between
/*
let mut start_of_in_between_words_index: usize = global_index;//Inclusive
loop {
if english[global_index].is_ascii_alphabetic() {//Start of a word
break;
}
pig_latin_string.push(english[global_index]);
global_index += 1;
if global_index == english.len() {
return;
}
}
*/
//This is the slowest
/*let slice_to_search = &english[global_index..];
if let Some(rel_letter_index) = slice_to_search.iter().position(|&character| character.is_ascii_alphabetic()) {
let abs_letter_index = global_index + rel_letter_index;
let slice_to_copy = &english[global_index..abs_letter_index];
pig_latin_string.extend_from_slice(slice_to_copy);
global_index = abs_letter_index;
} else {
pig_latin_string.extend_from_slice(slice_to_search);
return;
}
*/
}
//Translates the current word and pushes the result
{
//TESTING just call the original function for now
/*
let mut word_start_index: usize = global_index;//Inclusive
{
while global_index < english.len() {
if !english[global_index].is_ascii_alphabetic() {
break;
}
global_index += 1;
}
let word_slice: &[u8] = &english[word_start_index..global_index];
translate_word_with_style_reuse_buffers (
word_slice,
suffix_lower, special_case_suffix_lower, suffix_upper, special_case_suffix_upper,
pig_latin_string
);
if global_index == english.len() { return; }
}*/
//New
let word_start_index = global_index;
let first_letter = english[word_start_index];
global_index += 1;
if (word_start_index + 1) == english.len() {//The word is only one letter long (special case)
//Push the letter and add the lowercase special suffix (even if the letter is uppercase)
pig_latin_string.push(first_letter);
pig_latin_string.extend_from_slice(special_case_suffix_lower);
return;//The entire string is ending, so exit the function
} else if !english[word_start_index + 1].is_ascii_alphabetic() {//The next character exists but is not a letter (special case)
//Push the letter and add the lowercase special suffix (even if the letter is uppercase)
pig_latin_string.push(first_letter);
pig_latin_string.extend_from_slice(special_case_suffix_lower);
} else if is_vowel(first_letter) {//The word is longer than a letter and starts with a vowel (special case)
//As a heuristic, we consider Y to be a vowel when it is not at the start of the word
//TODO better code reuse
//Find the end of the word
let word_end_index: usize;
let slice_to_search_for_end = &english[global_index..];
if let Some(found_end_of_word_index) = slice_to_search_for_end.iter().position(|&x| !x.is_ascii_alphabetic()) {//We found a non-letter that ends the word
global_index += found_end_of_word_index;
word_end_index = global_index;
} else {//The string ended
pig_latin_string.extend_from_slice(slice_to_search_for_end);
if fast_is_ascii_uppercase(english[word_start_index + 1]) {//As a heuristic, we consider the word to be uppercase if the second letter is
pig_latin_string.extend_from_slice(special_case_suffix_upper);
} else {//Word is entirely lowercase, or its first letter is uppercase only
pig_latin_string.extend_from_slice(special_case_suffix_lower);
}
return;
}
let word_slice = &english[word_start_index..word_end_index];
pig_latin_string.extend_from_slice(word_slice);
if fast_is_ascii_uppercase(english[word_start_index + 1]) {//As a heuristic, we consider the word to be uppercase if the second letter is
pig_latin_string.extend_from_slice(special_case_suffix_upper);
} else {//Word is entirely lowercase, or its first letter is uppercase only
pig_latin_string.extend_from_slice(special_case_suffix_lower);
}
} else {//The word is longer than a letter and doesn't start with a vowel
//Find the first vowel's index
/*loop {
if is_vowel(english[global_index]) || is_y(english[global_index]) {//As a heuristic, we consider Y to be a vowel when it is not at the start of the word
break;
}
global_index += 1;
if global_index == english.len() {
return;//todo!();//TODO//No vowels in the word
}
}
let first_vowel_index: usize = global_index;
*/
//FIXME what if the word has no vowels, we go into the next word, and then enounter a vowel?
let first_vowel_index: usize;
let slice_to_search_for_vowel = &english[global_index..];
if let Some(first_vowel_of_word_index) = slice_to_search_for_vowel.iter().position(|&x| { is_vowel(x) || is_y(x) || !x.is_ascii_alphabetic() }) {//As a heuristic, we consider Y to be a vowel when it is not at the start of the word
if english[first_vowel_of_word_index].is_ascii_alphabetic() {
global_index += first_vowel_of_word_index;
first_vowel_index = global_index;
let word_end_index: usize;
let slice_to_search_for_end = &english[global_index..];
if let Some(end_of_word_index) = slice_to_search_for_end.iter().position(|&x| !x.is_ascii_alphabetic()) {//We found a non-letter that ends the word
global_index += end_of_word_index;
word_end_index = global_index;
} else {//The string ended
global_index = english.len();
word_end_index = global_index;
}
//TODO improve code reuse here
if fast_is_ascii_uppercase(first_letter) {//Check if the first letter is uppercase
if fast_is_ascii_uppercase(english[word_start_index + 1]) {//As a heuristic, we consider the word to be uppercase if the second letter is
//Push the vowel and all letters after it
let vowel_to_end_slice = &english[first_vowel_index..word_end_index];
pig_latin_string.extend_from_slice(vowel_to_end_slice);
//Push the starting consonants
let start_to_vowel_slice = &english[word_start_index..first_vowel_index];
pig_latin_string.extend_from_slice(start_to_vowel_slice);
//Push the normal suffix (uppercase)
pig_latin_string.extend_from_slice(suffix_upper);
} else {//Word starts with an uppercase letter, but is otherwise lowercase
//Push the vowel, matching the starting case of the original word
pig_latin_string.push(fast_to_ascii_uppercase(english[first_vowel_index]));
//Push all letters after the vowel
let after_vowel_slice = &english[(first_vowel_index + 1)..word_end_index];
pig_latin_string.extend_from_slice(after_vowel_slice);
//Push the first starting consonant, which should be lowercase now
pig_latin_string.push(fast_to_ascii_lowercase(english[word_start_index]));
//Push the remaining starting consonants
let after_start_to_vowel_slice = &english[(word_start_index + 1)..first_vowel_index];
pig_latin_string.extend_from_slice(after_start_to_vowel_slice);
//Push the normal suffix
pig_latin_string.extend_from_slice(suffix_lower);
}
} else {//Word is entirely lowercase
//Push the vowel and all letters after it
let vowel_to_end_slice = &english[first_vowel_index..word_end_index];
pig_latin_string.extend_from_slice(vowel_to_end_slice);
//Push the starting consonants
let start_to_vowel_slice = &english[word_start_index..first_vowel_index];
pig_latin_string.extend_from_slice(start_to_vowel_slice);
//Push the normal suffix (lowercase)
pig_latin_string.extend_from_slice(suffix_lower);
}
} else {
let word_slice = &english[word_start_index..(global_index + first_vowel_of_word_index)];
pig_latin_string.extend_from_slice(word_slice);
pig_latin_string.extend_from_slice(suffix_lower);
//todo!();//We should skip the next section
}
} else {//This word has no vowels
/*if let Some(end_of_word_index) = slice_to_search_for_vowel.iter().position(|&x| !x.is_ascii_alphabetic()) {//We found a non-letter that ends the word
let word_slice = &english[word_start_index..(global_index + end_of_word_index)];
pig_latin_string.extend_from_slice(word_slice);
pig_latin_string.extend_from_slice(suffix_lower);
todo!();//We should skip the next section
} else {//The string ended
/*let word_slice = &english[word_start_index..];
pig_latin_string.extend_from_slice(word_slice);
pig_latin_string.extend_from_slice(suffix_lower);
return;
*/
todo!();
}
*/
//return;
let word_slice = &english[word_start_index..];
pig_latin_string.extend_from_slice(word_slice);
pig_latin_string.extend_from_slice(suffix_lower);
return;
}
/*let first_vowel_index: usize;
let slice_to_search_for_vowel = &english[global_index..];
if let Some(first_vowel_of_word_index) = slice_to_search_for_vowel.iter().position(|&x| { is_vowel(x) || is_y(x) }) {//As a heuristic, we consider Y to be a vowel when it is not at the start of the word
global_index += first_vowel_of_word_index;
first_vowel_index = global_index;
} else {//This word has no vowels
if let Some(end_of_word_index) = slice_to_search_for_vowel.iter().position(|&x| !x.is_ascii_alphabetic()) {//We found a non-letter that ends the word
let word_slice = &english[word_start_index..(global_index + end_of_word_index)];
pig_latin_string.extend_from_slice(word_slice);
pig_latin_string.extend_from_slice(suffix_lower);
todo!();//We should skip the next section
} else {//The string ended
/*let word_slice = &english[word_start_index..];
pig_latin_string.extend_from_slice(word_slice);
pig_latin_string.extend_from_slice(suffix_lower);
return;
*/
todo!();
}
//return;
}
*/
//Find the end of the word
/*loop {
if !english[global_index].is_ascii_alphabetic() {
break;
}
global_index += 1;
if global_index == english.len() {
return;//todo!();//TODO//Word ended
}
}
let word_end_index: usize = global_index;
*/
/*
let word_end_index: usize;
let slice_to_search_for_end = &english[global_index..];
if let Some(end_of_word_index) = slice_to_search_for_end.iter().position(|&x| !x.is_ascii_alphabetic()) {//We found a non-letter that ends the word
global_index += end_of_word_index;
word_end_index = global_index;
} else {//The string ended
global_index = english.len();
word_end_index = global_index;
}
//TODO improve code reuse here
if fast_is_ascii_uppercase(first_letter) {//Check if the first letter is uppercase
if fast_is_ascii_uppercase(english[word_start_index + 1]) {//As a heuristic, we consider the word to be uppercase if the second letter is
//Push the vowel and all letters after it
let vowel_to_end_slice = &english[first_vowel_index..word_end_index];
pig_latin_string.extend_from_slice(vowel_to_end_slice);
//Push the starting consonants
let start_to_vowel_slice = &english[word_start_index..first_vowel_index];
pig_latin_string.extend_from_slice(start_to_vowel_slice);
//Push the normal suffix (uppercase)
pig_latin_string.extend_from_slice(suffix_upper);
} else {//Word starts with an uppercase letter, but is otherwise lowercase
//Push the vowel, matching the starting case of the original word
pig_latin_string.push(fast_to_ascii_uppercase(english[first_vowel_index]));
//Push all letters after the vowel
let after_vowel_slice = &english[(first_vowel_index + 1)..word_end_index];
pig_latin_string.extend_from_slice(after_vowel_slice);
//Push the first starting consonant, which should be lowercase now
pig_latin_string.push(fast_to_ascii_lowercase(english[word_start_index]));
//Push the remaining starting consonants
let after_start_to_vowel_slice = &english[(word_start_index + 1)..first_vowel_index];
pig_latin_string.extend_from_slice(after_start_to_vowel_slice);
//Push the normal suffix
pig_latin_string.extend_from_slice(suffix_lower);
}
} else {//Word is entirely lowercase
//Push the vowel and all letters after it
let vowel_to_end_slice = &english[first_vowel_index..word_end_index];
pig_latin_string.extend_from_slice(vowel_to_end_slice);
//Push the starting consonants
let start_to_vowel_slice = &english[word_start_index..first_vowel_index];
pig_latin_string.extend_from_slice(start_to_vowel_slice);
//Push the normal suffix (lowercase)
pig_latin_string.extend_from_slice(suffix_lower);
}
*/
}
//OLD don't use
/*
if (global_index + 1) == english.len() {//The word is only one letter long (special case)
//Push the letter and add the lowercase special suffix (even if the letter is uppercase)
pig_latin_string.push(first_letter);
pig_latin_string.extend_from_slice(special_case_suffix_lower);
//Don't go on if we reached the end of the string during the word
if global_index == english.len() {
return;
}
let second_character = english[global_index + 1];
if !second_character.is_ascii_alphabetic() {//The word is only one letter long (special case)
//Push the letter and add the lowercase special suffix (even if the letter is uppercase)
pig_latin_string.push(first_letter);
pig_latin_string.extend_from_slice(special_case_suffix_lower);
global_index += 1;
} else {//The word is more than one letter long
let mut word_start_index: usize = global_index;//Inclusive
global_index += 1;
//TODO case if word starts with vowel
if is_vowel(first_letter) {//As a herustic, we consider Y to be a vowel when it is not at the start of the word
//Copy all remaining letters in the word and append the special suffix
//TODO what about uppercase words?
/*loop {
if global_index == english.len() {
//Copy all of the characters so far (all that remain) and return
let remaining_characters_slice = &english[start_of_contraction_suffix_index..];
pig_latin_string.extend_from_slice(remaining_characters_slice);
return;
}
if !english[global_index].is_ascii_alphabetic() {//End of the contraction suffix
break;
}
global_index += 1;
}
*/
} else {
//Find the first vowel's index
loop {
if is_vowel(english[global_index]) || is_y(english[global_index]) {//As a herustic, we consider Y to be a vowel when it is not at the start of the word
break;
}
global_index += 1;
if global_index == english.len() {
todo!();//No vowels in the word
}
}
let first_vowel_index: usize = global_index;
//Find the end of the word
loop {
if !english[global_index].is_ascii_alphabetic() {
break;
}
global_index += 1;
if global_index == english.len() {
todo!();//Word ended
}
}
let word_end_index: usize = global_index;
//Translate the word
//Push the vowel and all letters after it
let vowel_to_end_slice = &english[first_vowel_index..word_end_index];
pig_latin_string.extend_from_slice(vowel_to_end_slice);
//Push the starting consonants
let start_to_vowel_slice = &english[word_start_index..first_vowel_index];
pig_latin_string.extend_from_slice(start_to_vowel_slice);
//Push the normal suffix
pig_latin_string.extend_from_slice(suffix_lower);
}*/
/*let mut word_start_index: usize = global_index;//Inclusive
loop {
}
*/
//}
/**/
//let mut first_vowel_index: usize = 0xDEADBEEF;//Also exclusive end of starting consonants
/*loop {
break;//TODO
}*/
/**/
//TODO Wrap-up this section here
//At this point, global_index contains the index to the next character to check
}
//Copies contraction suffixes, if present
if english[global_index] == b'\'' {//TODO if this is true we can also skip the regular inter-word loop on the next iteration
let mut start_of_contraction_suffix_index: usize = global_index;//Inclusive
global_index += 1;
global_index += 1;//We skip over the apostrophe for the loop below, but we still want to copy it in the end
loop {
if global_index == english.len() {
//Copy all of the characters so far (all that remain) and return
@ -978,182 +506,6 @@ pub(crate) fn translate_with_style_lower_and_upper_suffixes_old_inprogress (
}
}
//Avoids the overhead of having to convert suffixes to uppercase for the standard translation functions at runtime
pub(crate) fn translate_with_style_lower_and_upper_suffixes_abandoned (
english: &[u8],
suffix_lower: &[u8], special_case_suffix_lower: &[u8], suffix_upper: &[u8], special_case_suffix_upper: &[u8],
pig_latin_string: &mut Vec::<u8>
) {
if english.is_empty() {
return;
}
#[derive(Debug, Clone, Copy)]
enum State {
InBetweenWords,
InFirstLetterOfWord,
InRegularWord,
FinishingRegularWord,
InWordStartingWithVowel,
InContractionSuffix
};
let mut current_state: State = State::InBetweenWords;
let mut word_start_index: usize = 0xDEADBEEF;//Inclusive
let mut first_vowel_index: usize = 0xDEADBEEF;//Also exclusive end of starting consonants
let mut start_of_in_between_words_index: usize = 0;//Inclusive; Also exclusive end of word
for i in 0..english.len() {
//println!("{:?}", current_state);
match current_state {//FIXME this is a bottleneck; instead have two nested infinite loops within another infinite loops to reduce the size of the match
State::InBetweenWords => {
if english[i].is_ascii_alphabetic() {//Start of a word
//This is the start of the word, so copy all non-word characters up to this point since the last word
let in_between_words_characters_slice = &english[start_of_in_between_words_index..i];
pig_latin_string.extend_from_slice(in_between_words_characters_slice);
//Setup things to begin processing the word
word_start_index = i;
current_state = State::InFirstLetterOfWord;
}
},
State::InFirstLetterOfWord => {
if english[i].is_ascii_alphabetic() {//This word is more than one letter
if is_vowel(english[word_start_index]) {//As a herustic, we consider Y to be a vowel when it is not at the start of the word
current_state = State::InWordStartingWithVowel;
} else {
if is_vowel(english[i]) || is_y(english[i]) {//As a herustic, we consider Y to be a vowel when it is not at the start of the word
first_vowel_index = i;
current_state = State::FinishingRegularWord;
} else {
current_state = State::InRegularWord;
}
}
} else {//This is a one-letter word (special case)
//Push the letter and add the lowercase special suffix (even if the letter is uppercase)
pig_latin_string.push(english[word_start_index]);
pig_latin_string.extend_from_slice(special_case_suffix_lower);
//Decide the next state
if english[i] == b'\'' {
current_state = State::InContractionSuffix;
} else {
current_state = State::InBetweenWords;
}
start_of_in_between_words_index = i;
}
},
State::InRegularWord => {
if is_vowel(english[i]) || is_y(english[i]) {//As a herustic, we consider Y to be a vowel when it is not at the start of the word
first_vowel_index = i;
current_state = State::FinishingRegularWord;
}
//TODO handle the case where the word ends here before a vowel is encountered
},
State::FinishingRegularWord => {
if !english[i].is_ascii_alphabetic() {//End of word
//We now need to actually translate the word
if fast_is_ascii_uppercase(english[word_start_index]) {//The first letter of the word was uppercase
if fast_is_ascii_uppercase(english[first_vowel_index]) {//Heuristic: Assume the word was uppercase if the first vowel is
//TODO
} else {
//Push the vowel, matching the starting case of the original word
pig_latin_string.push(fast_to_ascii_uppercase(english[first_vowel_index]));
//Push all letters after the vowel
let after_vowel_slice = &english[(first_vowel_index + 1)..i];
pig_latin_string.extend_from_slice(after_vowel_slice);
//Push the first starting consonant, which should be lowercase now
pig_latin_string.push(fast_to_ascii_lowercase(english[word_start_index]));
//Push the remaining starting consonants
let after_start_to_vowel_slice = &english[(word_start_index + 1)..first_vowel_index];
pig_latin_string.extend_from_slice(after_start_to_vowel_slice);
//Push the normal suffix
pig_latin_string.extend_from_slice(suffix_lower);
}
} else {//The first letter of the word was lowercase
//Push the vowel and all letters after it
let vowel_to_end_slice = &english[first_vowel_index..i];
pig_latin_string.extend_from_slice(vowel_to_end_slice);
//Push the starting consonants
let start_to_vowel_slice = &english[word_start_index..first_vowel_index];
pig_latin_string.extend_from_slice(start_to_vowel_slice);
//Push the normal suffix
pig_latin_string.extend_from_slice(suffix_lower);
}
//Decide the next state
if english[i] == b'\'' {
current_state = State::InContractionSuffix;
} else {
current_state = State::InBetweenWords;
}
start_of_in_between_words_index = i;
}
}
State::InWordStartingWithVowel => {
if !english[i].is_ascii_alphabetic() {//End of word
//We now need to actually translate the word
//TODO handle uppercase
let word_slice = &english[word_start_index..i];
pig_latin_string.extend_from_slice(word_slice);
pig_latin_string.extend_from_slice(special_case_suffix_lower);
//Decide the next state
if english[i] == b'\'' {
current_state = State::InContractionSuffix;
} else {
current_state = State::InBetweenWords;
}
start_of_in_between_words_index = i;
}
},
State::InContractionSuffix => {
if !english[i].is_ascii_alphabetic() {//End of contraction suffix
//Push the contraction suffix
let contraction_suffix_slice = &english[start_of_in_between_words_index..i];
pig_latin_string.extend_from_slice(contraction_suffix_slice);
//We're back in-between words
start_of_in_between_words_index = i;
current_state = State::InBetweenWords;
}
},
}
}
//Wrap-up based on the state we ended the loop in
match current_state {
State::InBetweenWords => {
//Copy remaining characters
let remaining_characters_slice = &english[start_of_in_between_words_index..];
pig_latin_string.extend_from_slice(remaining_characters_slice);
},
State::InFirstLetterOfWord => {
//TODO
},
State::InRegularWord => {
//TODO
},
State::FinishingRegularWord => {
//TODO
}
State::InWordStartingWithVowel => {
//TODO
},
State::InContractionSuffix => {
//Push the contraction suffix
let contraction_suffix_slice = &english[start_of_in_between_words_index..];
pig_latin_string.extend_from_slice(contraction_suffix_slice);
},
}
}
//Avoids the overhead of having to convert suffixes to uppercase for the standard translation functions at runtime
pub(crate) fn translate_with_style_lower_and_upper_suffixes_old (
english: &[u8],

Loading…
Cancel
Save