@ -381,37 +381,25 @@ pub(crate) fn translate_with_style_lower_and_upper_suffixes (
let first_letter = english [ word_start_index ] ;
global_index + = 1 ;
if ( word_start_index + 1 ) = = english . len ( ) { //The word is only one letter long (special case)
//Push the letter and add the lowercase special suffix (even if the letter is uppercase)
pig_latin_string . push ( first_letter ) ;
pig_latin_string . extend_from_slice ( special_case_suffix_lower ) ;
return ; //The entire string is ending, so exit the function
} else if ! english [ word_start_index + 1 ] . is_ascii_alphabetic ( ) { //The next character exists but is not a letter (special case)
if ( global_index = = english . len ( ) ) | | ( ! english [ global_index ] . is_ascii_alphabetic ( ) ) { //The word is only one letter long (special case)
//Push the letter and add the lowercase special suffix (even if the letter is uppercase)
pig_latin_string . push ( first_letter ) ;
pig_latin_string . extend_from_slice ( special_case_suffix_lower ) ;
} else if is_vowel ( first_letter ) { //The word is longer than a letter and starts with a vowel (special case)
//As a heuristic, we consider Y to be a vowel when it is not at the start of the word
//TODO better code reuse
//Find the end of the word
let word_end_index : usize ;
//Get the slice containing the whole word
let slice_to_search_for_end = & english [ global_index .. ] ;
let word_slice : & [ u8 ] ;
if let Some ( found_end_of_word_index ) = slice_to_search_for_end . iter ( ) . position ( | & x | ! x . is_ascii_alphabetic ( ) ) { //We found a non-letter that ends the word
global_index + = found_end_of_word_index ;
word_end_index = global_index ;
word_slice = & english [ word_start_index .. global_index ] ;
} else { //The string ended
pig_latin_string . extend_from_slice ( slice_to_search_for_end ) ;
if fast_is_ascii_uppercase ( english [ word_start_index + 1 ] ) { //As a heuristic, we consider the word to be uppercase if the second letter is
pig_latin_string . extend_from_slice ( special_case_suffix_upper ) ;
} else { //Word is entirely lowercase, or its first letter is uppercase only
pig_latin_string . extend_from_slice ( special_case_suffix_lower ) ;
}
return ;
global_index = english . len ( ) ;
word_slice = slice_to_search_for_end ;
}
let word_slice = & english [ word_start_index .. word_end_index ] ;
//Translate the word and push it
pig_latin_string . extend_from_slice ( word_slice ) ;
if fast_is_ascii_uppercase ( english [ word_start_index + 1 ] ) { //As a heuristic, we consider the word to be uppercase if the second letter is
pig_latin_string . extend_from_slice ( special_case_suffix_upper ) ;
@ -419,43 +407,25 @@ pub(crate) fn translate_with_style_lower_and_upper_suffixes (
pig_latin_string . extend_from_slice ( special_case_suffix_lower ) ;
}
} else { //The word is longer than a letter and doesn't start with a vowel
//Find the first vowel
//Find the first vowel; we assume the word actually has a vowel in it
let first_vowel_index : usize ;
let slice_to_search_for_vowel = & english [ global_index .. ] ;
/* if let Some(first_vowel_of_word_index) = slice_to_search_for_vowel.iter().position(|&x| { !x.is_ascii_alphabetic() || is_vowel(x) || is_y(x) }) { / / As a heuristic, we consider Y to be a vowel when it is not at the start of the word
if ! english [ first_vowel_of_word_index ] . is_ascii_alphabetic ( ) { //The word ended and we never found a vowel
//We used to still check for a contraction suffix after this, but all valid words should have a vowel,
//including contractions. Really anything goes at this point, and this is the fastest and easiest thing to do
let word_slice = & english [ word_start_index .. ( global_index + first_vowel_of_word_index ) ] ;
pig_latin_string . extend_from_slice ( word_slice ) ;
pig_latin_string . extend_from_slice ( suffix_lower ) ;
continue ;
}
global_index + = first_vowel_of_word_index ;
first_vowel_index = global_index ;
} else { //This string ended and we never found a vowel
let word_slice = & english [ word_start_index .. ] ;
pig_latin_string . extend_from_slice ( word_slice ) ;
pig_latin_string . extend_from_slice ( suffix_lower ) ;
return ;
} * /
if let Some ( first_vowel_of_word_index ) = slice_to_search_for_vowel . iter ( ) . position ( | & x | { is_vowel ( x ) | | is_y ( x ) } ) { //As a heuristic, we consider Y to be a vowel when it is not at the start of the word
global_index + = first_vowel_of_word_index ;
first_vowel_index = global_index ;
} else { //This string ended and we never found a vowel
return ; //Just give up
}
first_vowel_index = global_index ;
//Find the end of the word
let word_end_index : usize ;
let slice_to_search_for_end = & english [ global_index .. ] ;
if let Some ( end_of_word_index ) = slice_to_search_for_end . iter ( ) . position ( | & x | ! x . is_ascii_alphabetic ( ) ) { //We found a non-letter that ends the word
global_index + = end_of_word_index ;
word_end_index = global_index ;
} else { //The string ended
global_index = english . len ( ) ;
word_end_index = global_index ;
}
word_end_index = global_index ;
//Translate the word
//TODO improve code reuse here
@ -501,462 +471,20 @@ pub(crate) fn translate_with_style_lower_and_upper_suffixes (
//Push the normal suffix (lowercase)
pig_latin_string . extend_from_slice ( suffix_lower ) ;
}
//Don't go on if we reached the end of the string during the word
if word_end_index = = english . len ( ) {
return ;
}
}
//At this point, global_index contains the index to the next character to check
}
//Copies contraction suffixes, if present
if english [ global_index ] = = b'\'' { //TODO if this is true we can also skip the regular inter-word loop on the next iteration
let mut start_of_contraction_suffix_index : usize = global_index ; //Inclusive
global_index + = 1 ;
loop {
if global_index = = english . len ( ) {
//Copy all of the characters so far (all that remain) and return
let remaining_characters_slice = & english [ start_of_contraction_suffix_index .. ] ;
pig_latin_string . extend_from_slice ( remaining_characters_slice ) ;
return ;
}
if ! english [ global_index ] . is_ascii_alphabetic ( ) { //End of the contraction suffix
break ;
}
global_index + = 1 ;
}
//Copy the contraction suffix as-is
let contraction_suffix_slice = & english [ start_of_contraction_suffix_index .. global_index ] ;
pig_latin_string . extend_from_slice ( contraction_suffix_slice ) ;
}
}
}
//Avoids the overhead of having to convert suffixes to uppercase for the standard translation functions at runtime
pub ( crate ) fn translate_with_style_lower_and_upper_suffixes_old_inprogress (
english : & [ u8 ] ,
suffix_lower : & [ u8 ] , special_case_suffix_lower : & [ u8 ] , suffix_upper : & [ u8 ] , special_case_suffix_upper : & [ u8 ] ,
pig_latin_string : & mut Vec ::< u8 >
) {
if english . is_empty ( ) {
return ;
}
let mut global_index : usize = 0 ;
loop {
//Copies characters in-between words
//TODO this could probably be optimized with vector instructions
{
//Fastest so far :)
let mut start_of_in_between_words_index : usize = global_index ; //Inclusive
loop {
if english [ global_index ] . is_ascii_alphabetic ( ) { //Start of a word
break ;
}
global_index + = 1 ;
if global_index = = english . len ( ) {
//Copy all of the characters so far (all that remain) and return
let remaining_characters_slice = & english [ start_of_in_between_words_index .. ] ;
pig_latin_string . extend_from_slice ( remaining_characters_slice ) ;
return ;
}
}
//Copy the characters in-between words as-is
let in_between_words_characters_slice = & english [ start_of_in_between_words_index .. global_index ] ;
pig_latin_string . extend_from_slice ( in_between_words_characters_slice ) ;
//The speed of this is in-between
/*
let mut start_of_in_between_words_index : usize = global_index ; //Inclusive
loop {
if english [ global_index ] . is_ascii_alphabetic ( ) { //Start of a word
break ;
}
pig_latin_string . push ( english [ global_index ] ) ;
global_index + = 1 ;
if global_index = = english . len ( ) {
return ;
}
}
* /
//This is the slowest
/* let slice_to_search = &english[global_index..];
if let Some ( rel_letter_index ) = slice_to_search . iter ( ) . position ( | & character | character . is_ascii_alphabetic ( ) ) {
let abs_letter_index = global_index + rel_letter_index ;
let slice_to_copy = & english [ global_index .. abs_letter_index ] ;
pig_latin_string . extend_from_slice ( slice_to_copy ) ;
global_index = abs_letter_index ;
} else {
pig_latin_string . extend_from_slice ( slice_to_search ) ;
return ;
}
* /
}
//Translates the current word and pushes the result
{
//TESTING just call the original function for now
/*
let mut word_start_index : usize = global_index ; //Inclusive
{
while global_index < english . len ( ) {
if ! english [ global_index ] . is_ascii_alphabetic ( ) {
break ;
}
global_index + = 1 ;
}
let word_slice : & [ u8 ] = & english [ word_start_index .. global_index ] ;
translate_word_with_style_reuse_buffers (
word_slice ,
suffix_lower , special_case_suffix_lower , suffix_upper , special_case_suffix_upper ,
pig_latin_string
) ;
if global_index = = english . len ( ) { return ; }
} * /
//New
let word_start_index = global_index ;
let first_letter = english [ word_start_index ] ;
global_index + = 1 ;
if ( word_start_index + 1 ) = = english . len ( ) { //The word is only one letter long (special case)
//Push the letter and add the lowercase special suffix (even if the letter is uppercase)
pig_latin_string . push ( first_letter ) ;
pig_latin_string . extend_from_slice ( special_case_suffix_lower ) ;
return ; //The entire string is ending, so exit the function
} else if ! english [ word_start_index + 1 ] . is_ascii_alphabetic ( ) { //The next character exists but is not a letter (special case)
//Push the letter and add the lowercase special suffix (even if the letter is uppercase)
pig_latin_string . push ( first_letter ) ;
pig_latin_string . extend_from_slice ( special_case_suffix_lower ) ;
} else if is_vowel ( first_letter ) { //The word is longer than a letter and starts with a vowel (special case)
//As a heuristic, we consider Y to be a vowel when it is not at the start of the word
//TODO better code reuse
//Find the end of the word
let word_end_index : usize ;
let slice_to_search_for_end = & english [ global_index .. ] ;
if let Some ( found_end_of_word_index ) = slice_to_search_for_end . iter ( ) . position ( | & x | ! x . is_ascii_alphabetic ( ) ) { //We found a non-letter that ends the word
global_index + = found_end_of_word_index ;
word_end_index = global_index ;
} else { //The string ended
pig_latin_string . extend_from_slice ( slice_to_search_for_end ) ;
if fast_is_ascii_uppercase ( english [ word_start_index + 1 ] ) { //As a heuristic, we consider the word to be uppercase if the second letter is
pig_latin_string . extend_from_slice ( special_case_suffix_upper ) ;
} else { //Word is entirely lowercase, or its first letter is uppercase only
pig_latin_string . extend_from_slice ( special_case_suffix_lower ) ;
}
return ;
}
let word_slice = & english [ word_start_index .. word_end_index ] ;
pig_latin_string . extend_from_slice ( word_slice ) ;
if fast_is_ascii_uppercase ( english [ word_start_index + 1 ] ) { //As a heuristic, we consider the word to be uppercase if the second letter is
pig_latin_string . extend_from_slice ( special_case_suffix_upper ) ;
} else { //Word is entirely lowercase, or its first letter is uppercase only
pig_latin_string . extend_from_slice ( special_case_suffix_lower ) ;
}
} else { //The word is longer than a letter and doesn't start with a vowel
//Find the first vowel's index
/* loop {
if is_vowel ( english [ global_index ] ) | | is_y ( english [ global_index ] ) { //As a heuristic, we consider Y to be a vowel when it is not at the start of the word
break ;
}
global_index + = 1 ;
if global_index = = english . len ( ) {
return ; //todo!();//TODO//No vowels in the word
}
}
let first_vowel_index : usize = global_index ;
* /
//FIXME what if the word has no vowels, we go into the next word, and then enounter a vowel?
let first_vowel_index : usize ;
let slice_to_search_for_vowel = & english [ global_index .. ] ;
if let Some ( first_vowel_of_word_index ) = slice_to_search_for_vowel . iter ( ) . position ( | & x | { is_vowel ( x ) | | is_y ( x ) | | ! x . is_ascii_alphabetic ( ) } ) { //As a heuristic, we consider Y to be a vowel when it is not at the start of the word
if english [ first_vowel_of_word_index ] . is_ascii_alphabetic ( ) {
global_index + = first_vowel_of_word_index ;
first_vowel_index = global_index ;
let word_end_index : usize ;
let slice_to_search_for_end = & english [ global_index .. ] ;
if let Some ( end_of_word_index ) = slice_to_search_for_end . iter ( ) . position ( | & x | ! x . is_ascii_alphabetic ( ) ) { //We found a non-letter that ends the word
global_index + = end_of_word_index ;
word_end_index = global_index ;
} else { //The string ended
global_index = english . len ( ) ;
word_end_index = global_index ;
}
//TODO improve code reuse here
if fast_is_ascii_uppercase ( first_letter ) { //Check if the first letter is uppercase
if fast_is_ascii_uppercase ( english [ word_start_index + 1 ] ) { //As a heuristic, we consider the word to be uppercase if the second letter is
//Push the vowel and all letters after it
let vowel_to_end_slice = & english [ first_vowel_index .. word_end_index ] ;
pig_latin_string . extend_from_slice ( vowel_to_end_slice ) ;
//Push the starting consonants
let start_to_vowel_slice = & english [ word_start_index .. first_vowel_index ] ;
pig_latin_string . extend_from_slice ( start_to_vowel_slice ) ;
//Push the normal suffix (uppercase)
pig_latin_string . extend_from_slice ( suffix_upper ) ;
} else { //Word starts with an uppercase letter, but is otherwise lowercase
//Push the vowel, matching the starting case of the original word
pig_latin_string . push ( fast_to_ascii_uppercase ( english [ first_vowel_index ] ) ) ;
//Push all letters after the vowel
let after_vowel_slice = & english [ ( first_vowel_index + 1 ) .. word_end_index ] ;
pig_latin_string . extend_from_slice ( after_vowel_slice ) ;
//Push the first starting consonant, which should be lowercase now
pig_latin_string . push ( fast_to_ascii_lowercase ( english [ word_start_index ] ) ) ;
//Push the remaining starting consonants
let after_start_to_vowel_slice = & english [ ( word_start_index + 1 ) .. first_vowel_index ] ;
pig_latin_string . extend_from_slice ( after_start_to_vowel_slice ) ;
//Push the normal suffix
pig_latin_string . extend_from_slice ( suffix_lower ) ;
}
} else { //Word is entirely lowercase
//Push the vowel and all letters after it
let vowel_to_end_slice = & english [ first_vowel_index .. word_end_index ] ;
pig_latin_string . extend_from_slice ( vowel_to_end_slice ) ;
//Push the starting consonants
let start_to_vowel_slice = & english [ word_start_index .. first_vowel_index ] ;
pig_latin_string . extend_from_slice ( start_to_vowel_slice ) ;
//Push the normal suffix (lowercase)
pig_latin_string . extend_from_slice ( suffix_lower ) ;
}
} else {
let word_slice = & english [ word_start_index .. ( global_index + first_vowel_of_word_index ) ] ;
pig_latin_string . extend_from_slice ( word_slice ) ;
pig_latin_string . extend_from_slice ( suffix_lower ) ;
//todo!();//We should skip the next section
}
} else { //This word has no vowels
/* if let Some(end_of_word_index) = slice_to_search_for_vowel.iter().position(|&x| !x.is_ascii_alphabetic()) { / / We found a non-letter that ends the word
let word_slice = & english [ word_start_index .. ( global_index + end_of_word_index ) ] ;
pig_latin_string . extend_from_slice ( word_slice ) ;
pig_latin_string . extend_from_slice ( suffix_lower ) ;
todo! ( ) ; //We should skip the next section
} else { //The string ended
/* let word_slice = &english[word_start_index..];
pig_latin_string . extend_from_slice ( word_slice ) ;
pig_latin_string . extend_from_slice ( suffix_lower ) ;
return ;
* /
todo! ( ) ;
}
* /
//return;
let word_slice = & english [ word_start_index .. ] ;
pig_latin_string . extend_from_slice ( word_slice ) ;
pig_latin_string . extend_from_slice ( suffix_lower ) ;
return ;
}
/* let first_vowel_index: usize;
let slice_to_search_for_vowel = & english [ global_index .. ] ;
if let Some ( first_vowel_of_word_index ) = slice_to_search_for_vowel . iter ( ) . position ( | & x | { is_vowel ( x ) | | is_y ( x ) } ) { //As a heuristic, we consider Y to be a vowel when it is not at the start of the word
global_index + = first_vowel_of_word_index ;
first_vowel_index = global_index ;
} else { //This word has no vowels
if let Some ( end_of_word_index ) = slice_to_search_for_vowel . iter ( ) . position ( | & x | ! x . is_ascii_alphabetic ( ) ) { //We found a non-letter that ends the word
let word_slice = & english [ word_start_index .. ( global_index + end_of_word_index ) ] ;
pig_latin_string . extend_from_slice ( word_slice ) ;
pig_latin_string . extend_from_slice ( suffix_lower ) ;
todo! ( ) ; //We should skip the next section
} else { //The string ended
/* let word_slice = &english[word_start_index..];
pig_latin_string . extend_from_slice ( word_slice ) ;
pig_latin_string . extend_from_slice ( suffix_lower ) ;
return ;
* /
todo! ( ) ;
}
//return;
}
* /
//Find the end of the word
/* loop {
if ! english [ global_index ] . is_ascii_alphabetic ( ) {
break ;
}
global_index + = 1 ;
if global_index = = english . len ( ) {
return ; //todo!();//TODO//Word ended
}
}
let word_end_index : usize = global_index ;
* /
/*
let word_end_index : usize ;
let slice_to_search_for_end = & english [ global_index .. ] ;
if let Some ( end_of_word_index ) = slice_to_search_for_end . iter ( ) . position ( | & x | ! x . is_ascii_alphabetic ( ) ) { //We found a non-letter that ends the word
global_index + = end_of_word_index ;
word_end_index = global_index ;
} else { //The string ended
global_index = english . len ( ) ;
word_end_index = global_index ;
}
//TODO improve code reuse here
if fast_is_ascii_uppercase ( first_letter ) { //Check if the first letter is uppercase
if fast_is_ascii_uppercase ( english [ word_start_index + 1 ] ) { //As a heuristic, we consider the word to be uppercase if the second letter is
//Push the vowel and all letters after it
let vowel_to_end_slice = & english [ first_vowel_index .. word_end_index ] ;
pig_latin_string . extend_from_slice ( vowel_to_end_slice ) ;
//Push the starting consonants
let start_to_vowel_slice = & english [ word_start_index .. first_vowel_index ] ;
pig_latin_string . extend_from_slice ( start_to_vowel_slice ) ;
//Push the normal suffix (uppercase)
pig_latin_string . extend_from_slice ( suffix_upper ) ;
} else { //Word starts with an uppercase letter, but is otherwise lowercase
//Push the vowel, matching the starting case of the original word
pig_latin_string . push ( fast_to_ascii_uppercase ( english [ first_vowel_index ] ) ) ;
//Push all letters after the vowel
let after_vowel_slice = & english [ ( first_vowel_index + 1 ) .. word_end_index ] ;
pig_latin_string . extend_from_slice ( after_vowel_slice ) ;
//Push the first starting consonant, which should be lowercase now
pig_latin_string . push ( fast_to_ascii_lowercase ( english [ word_start_index ] ) ) ;
//Push the remaining starting consonants
let after_start_to_vowel_slice = & english [ ( word_start_index + 1 ) .. first_vowel_index ] ;
pig_latin_string . extend_from_slice ( after_start_to_vowel_slice ) ;
//Push the normal suffix
pig_latin_string . extend_from_slice ( suffix_lower ) ;
}
} else { //Word is entirely lowercase
//Push the vowel and all letters after it
let vowel_to_end_slice = & english [ first_vowel_index .. word_end_index ] ;
pig_latin_string . extend_from_slice ( vowel_to_end_slice ) ;
//Push the starting consonants
let start_to_vowel_slice = & english [ word_start_index .. first_vowel_index ] ;
pig_latin_string . extend_from_slice ( start_to_vowel_slice ) ;
//Push the normal suffix (lowercase)
pig_latin_string . extend_from_slice ( suffix_lower ) ;
}
* /
}
//OLD don't use
/*
if ( global_index + 1 ) = = english . len ( ) { //The word is only one letter long (special case)
//Push the letter and add the lowercase special suffix (even if the letter is uppercase)
pig_latin_string . push ( first_letter ) ;
pig_latin_string . extend_from_slice ( special_case_suffix_lower ) ;
//Don't go on if we reached the end of the string during the word
if global_index = = english . len ( ) {
return ;
}
let second_character = english [ global_index + 1 ] ;
if ! second_character . is_ascii_alphabetic ( ) { //The word is only one letter long (special case)
//Push the letter and add the lowercase special suffix (even if the letter is uppercase)
pig_latin_string . push ( first_letter ) ;
pig_latin_string . extend_from_slice ( special_case_suffix_lower ) ;
global_index + = 1 ;
} else { //The word is more than one letter long
let mut word_start_index : usize = global_index ; //Inclusive
global_index + = 1 ;
//TODO case if word starts with vowel
if is_vowel ( first_letter ) { //As a herustic, we consider Y to be a vowel when it is not at the start of the word
//Copy all remaining letters in the word and append the special suffix
//TODO what about uppercase words?
/* loop {
if global_index = = english . len ( ) {
//Copy all of the characters so far (all that remain) and return
let remaining_characters_slice = & english [ start_of_contraction_suffix_index .. ] ;
pig_latin_string . extend_from_slice ( remaining_characters_slice ) ;
return ;
}
if ! english [ global_index ] . is_ascii_alphabetic ( ) { //End of the contraction suffix
break ;
}
global_index + = 1 ;
}
* /
} else {
//Find the first vowel's index
loop {
if is_vowel ( english [ global_index ] ) | | is_y ( english [ global_index ] ) { //As a herustic, we consider Y to be a vowel when it is not at the start of the word
break ;
}
global_index + = 1 ;
if global_index = = english . len ( ) {
todo! ( ) ; //No vowels in the word
}
}
let first_vowel_index : usize = global_index ;
//Find the end of the word
loop {
if ! english [ global_index ] . is_ascii_alphabetic ( ) {
break ;
}
global_index + = 1 ;
if global_index = = english . len ( ) {
todo! ( ) ; //Word ended
}
}
let word_end_index : usize = global_index ;
//Translate the word
//Push the vowel and all letters after it
let vowel_to_end_slice = & english [ first_vowel_index .. word_end_index ] ;
pig_latin_string . extend_from_slice ( vowel_to_end_slice ) ;
//Push the starting consonants
let start_to_vowel_slice = & english [ word_start_index .. first_vowel_index ] ;
pig_latin_string . extend_from_slice ( start_to_vowel_slice ) ;
//Push the normal suffix
pig_latin_string . extend_from_slice ( suffix_lower ) ;
} * /
/* let mut word_start_index: usize = global_index; / / Inclusive
loop {
}
* /
//}
/* */
//let mut first_vowel_index: usize = 0xDEADBEEF;//Also exclusive end of starting consonants
/* loop {
break ; //TODO
} * /
/* */
//TODO Wrap-up this section here
//At this point, global_index contains the index to the next character to check
}
//Copies contraction suffixes, if present
if english [ global_index ] = = b'\'' { //TODO if this is true we can also skip the regular inter-word loop on the next iteration
let mut start_of_contraction_suffix_index : usize = global_index ; //Inclusive
global_index + = 1 ;
global_index + = 1 ; //We skip over the apostrophe for the loop below, but we still want to copy it in the end
loop {
if global_index = = english . len ( ) {
//Copy all of the characters so far (all that remain) and return
@ -978,182 +506,6 @@ pub(crate) fn translate_with_style_lower_and_upper_suffixes_old_inprogress (
}
}
//Avoids the overhead of having to convert suffixes to uppercase for the standard translation functions at runtime
pub ( crate ) fn translate_with_style_lower_and_upper_suffixes_abandoned (
english : & [ u8 ] ,
suffix_lower : & [ u8 ] , special_case_suffix_lower : & [ u8 ] , suffix_upper : & [ u8 ] , special_case_suffix_upper : & [ u8 ] ,
pig_latin_string : & mut Vec ::< u8 >
) {
if english . is_empty ( ) {
return ;
}
#[ derive(Debug, Clone, Copy) ]
enum State {
InBetweenWords ,
InFirstLetterOfWord ,
InRegularWord ,
FinishingRegularWord ,
InWordStartingWithVowel ,
InContractionSuffix
} ;
let mut current_state : State = State ::InBetweenWords ;
let mut word_start_index : usize = 0xDEADBEEF ; //Inclusive
let mut first_vowel_index : usize = 0xDEADBEEF ; //Also exclusive end of starting consonants
let mut start_of_in_between_words_index : usize = 0 ; //Inclusive; Also exclusive end of word
for i in 0 .. english . len ( ) {
//println!("{:?}", current_state);
match current_state { //FIXME this is a bottleneck; instead have two nested infinite loops within another infinite loops to reduce the size of the match
State ::InBetweenWords = > {
if english [ i ] . is_ascii_alphabetic ( ) { //Start of a word
//This is the start of the word, so copy all non-word characters up to this point since the last word
let in_between_words_characters_slice = & english [ start_of_in_between_words_index .. i ] ;
pig_latin_string . extend_from_slice ( in_between_words_characters_slice ) ;
//Setup things to begin processing the word
word_start_index = i ;
current_state = State ::InFirstLetterOfWord ;
}
} ,
State ::InFirstLetterOfWord = > {
if english [ i ] . is_ascii_alphabetic ( ) { //This word is more than one letter
if is_vowel ( english [ word_start_index ] ) { //As a herustic, we consider Y to be a vowel when it is not at the start of the word
current_state = State ::InWordStartingWithVowel ;
} else {
if is_vowel ( english [ i ] ) | | is_y ( english [ i ] ) { //As a herustic, we consider Y to be a vowel when it is not at the start of the word
first_vowel_index = i ;
current_state = State ::FinishingRegularWord ;
} else {
current_state = State ::InRegularWord ;
}
}
} else { //This is a one-letter word (special case)
//Push the letter and add the lowercase special suffix (even if the letter is uppercase)
pig_latin_string . push ( english [ word_start_index ] ) ;
pig_latin_string . extend_from_slice ( special_case_suffix_lower ) ;
//Decide the next state
if english [ i ] = = b'\'' {
current_state = State ::InContractionSuffix ;
} else {
current_state = State ::InBetweenWords ;
}
start_of_in_between_words_index = i ;
}
} ,
State ::InRegularWord = > {
if is_vowel ( english [ i ] ) | | is_y ( english [ i ] ) { //As a herustic, we consider Y to be a vowel when it is not at the start of the word
first_vowel_index = i ;
current_state = State ::FinishingRegularWord ;
}
//TODO handle the case where the word ends here before a vowel is encountered
} ,
State ::FinishingRegularWord = > {
if ! english [ i ] . is_ascii_alphabetic ( ) { //End of word
//We now need to actually translate the word
if fast_is_ascii_uppercase ( english [ word_start_index ] ) { //The first letter of the word was uppercase
if fast_is_ascii_uppercase ( english [ first_vowel_index ] ) { //Heuristic: Assume the word was uppercase if the first vowel is
//TODO
} else {
//Push the vowel, matching the starting case of the original word
pig_latin_string . push ( fast_to_ascii_uppercase ( english [ first_vowel_index ] ) ) ;
//Push all letters after the vowel
let after_vowel_slice = & english [ ( first_vowel_index + 1 ) .. i ] ;
pig_latin_string . extend_from_slice ( after_vowel_slice ) ;
//Push the first starting consonant, which should be lowercase now
pig_latin_string . push ( fast_to_ascii_lowercase ( english [ word_start_index ] ) ) ;
//Push the remaining starting consonants
let after_start_to_vowel_slice = & english [ ( word_start_index + 1 ) .. first_vowel_index ] ;
pig_latin_string . extend_from_slice ( after_start_to_vowel_slice ) ;
//Push the normal suffix
pig_latin_string . extend_from_slice ( suffix_lower ) ;
}
} else { //The first letter of the word was lowercase
//Push the vowel and all letters after it
let vowel_to_end_slice = & english [ first_vowel_index .. i ] ;
pig_latin_string . extend_from_slice ( vowel_to_end_slice ) ;
//Push the starting consonants
let start_to_vowel_slice = & english [ word_start_index .. first_vowel_index ] ;
pig_latin_string . extend_from_slice ( start_to_vowel_slice ) ;
//Push the normal suffix
pig_latin_string . extend_from_slice ( suffix_lower ) ;
}
//Decide the next state
if english [ i ] = = b'\'' {
current_state = State ::InContractionSuffix ;
} else {
current_state = State ::InBetweenWords ;
}
start_of_in_between_words_index = i ;
}
}
State ::InWordStartingWithVowel = > {
if ! english [ i ] . is_ascii_alphabetic ( ) { //End of word
//We now need to actually translate the word
//TODO handle uppercase
let word_slice = & english [ word_start_index .. i ] ;
pig_latin_string . extend_from_slice ( word_slice ) ;
pig_latin_string . extend_from_slice ( special_case_suffix_lower ) ;
//Decide the next state
if english [ i ] = = b'\'' {
current_state = State ::InContractionSuffix ;
} else {
current_state = State ::InBetweenWords ;
}
start_of_in_between_words_index = i ;
}
} ,
State ::InContractionSuffix = > {
if ! english [ i ] . is_ascii_alphabetic ( ) { //End of contraction suffix
//Push the contraction suffix
let contraction_suffix_slice = & english [ start_of_in_between_words_index .. i ] ;
pig_latin_string . extend_from_slice ( contraction_suffix_slice ) ;
//We're back in-between words
start_of_in_between_words_index = i ;
current_state = State ::InBetweenWords ;
}
} ,
}
}
//Wrap-up based on the state we ended the loop in
match current_state {
State ::InBetweenWords = > {
//Copy remaining characters
let remaining_characters_slice = & english [ start_of_in_between_words_index .. ] ;
pig_latin_string . extend_from_slice ( remaining_characters_slice ) ;
} ,
State ::InFirstLetterOfWord = > {
//TODO
} ,
State ::InRegularWord = > {
//TODO
} ,
State ::FinishingRegularWord = > {
//TODO
}
State ::InWordStartingWithVowel = > {
//TODO
} ,
State ::InContractionSuffix = > {
//Push the contraction suffix
let contraction_suffix_slice = & english [ start_of_in_between_words_index .. ] ;
pig_latin_string . extend_from_slice ( contraction_suffix_slice ) ;
} ,
}
}
//Avoids the overhead of having to convert suffixes to uppercase for the standard translation functions at runtime
pub ( crate ) fn translate_with_style_lower_and_upper_suffixes_old (
english : & [ u8 ] ,