@ -349,6 +349,173 @@ pub(crate) fn translate_with_style_lower_and_upper_suffixes (
return ;
}
let mut global_index : usize = 0 ;
loop {
//Copies characters in-between words
//TODO this could probably be optimized with vector instructions
{
let mut start_of_in_between_words_index : usize = global_index ; //Inclusive
loop {
if english [ global_index ] . is_ascii_alphabetic ( ) { //Start of a word
break ;
}
global_index + = 1 ;
if global_index = = english . len ( ) {
//Copy all of the characters so far (all that remain) and return
let remaining_characters_slice = & english [ start_of_in_between_words_index .. ] ;
pig_latin_string . extend_from_slice ( remaining_characters_slice ) ;
return ;
}
}
//Copy the characters in-between words as-is
let in_between_words_characters_slice = & english [ start_of_in_between_words_index .. global_index ] ;
pig_latin_string . extend_from_slice ( in_between_words_characters_slice ) ;
//At this point, global_index contains the index to the start of the word to translate
}
//Translates the current word
{
let word_start_index = global_index ;
let first_letter = english [ word_start_index ] ;
global_index + = 1 ;
if ( global_index = = english . len ( ) ) | | ( ! english [ global_index ] . is_ascii_alphabetic ( ) ) { //The word is only one letter long (special case)
//Push the letter and add the lowercase special suffix (even if the letter is uppercase)
pig_latin_string . push ( first_letter ) ;
pig_latin_string . extend_from_slice ( special_case_suffix_lower ) ;
} else if is_vowel ( first_letter ) { //The word is longer than a letter and starts with a vowel (special case)
//As a heuristic, we consider Y to be a vowel when it is not at the start of the word
//Get the slice containing the whole word
let slice_to_search_for_end = & english [ global_index .. ] ;
let word_slice : & [ u8 ] ;
if let Some ( found_end_of_word_index ) = slice_to_search_for_end . iter ( ) . position ( | & x | ! x . is_ascii_alphabetic ( ) ) { //We found a non-letter that ends the word
global_index + = found_end_of_word_index ;
word_slice = & english [ word_start_index .. global_index ] ;
} else { //The string ended
global_index = english . len ( ) ;
word_slice = slice_to_search_for_end ;
}
//Translate the word and push it
pig_latin_string . extend_from_slice ( word_slice ) ;
if fast_is_ascii_uppercase ( english [ word_start_index + 1 ] ) { //As a heuristic, we consider the word to be uppercase if the second letter is
pig_latin_string . extend_from_slice ( special_case_suffix_upper ) ;
} else { //Word is entirely lowercase, or its first letter is uppercase only
pig_latin_string . extend_from_slice ( special_case_suffix_lower ) ;
}
} else { //The word is longer than a letter and doesn't start with a vowel
//Find the first vowel; we assume the word actually has a vowel in it
let first_vowel_index : usize ;
let slice_to_search_for_vowel = & english [ global_index .. ] ;
if let Some ( first_vowel_of_word_index ) = slice_to_search_for_vowel . iter ( ) . position ( | & x | { is_vowel ( x ) | | is_y ( x ) } ) { //As a heuristic, we consider Y to be a vowel when it is not at the start of the word
global_index + = first_vowel_of_word_index ;
} else { //This string ended and we never found a vowel
return ; //Just give up
}
first_vowel_index = global_index ;
//Find the end of the word
let word_end_index : usize ;
let slice_to_search_for_end = & english [ global_index .. ] ;
if let Some ( end_of_word_index ) = slice_to_search_for_end . iter ( ) . position ( | & x | ! x . is_ascii_alphabetic ( ) ) { //We found a non-letter that ends the word
global_index + = end_of_word_index ;
} else { //The string ended
global_index = english . len ( ) ;
}
word_end_index = global_index ;
//Translate the word
//TODO improve code reuse here
if fast_is_ascii_uppercase ( first_letter ) { //Check if the first letter is uppercase
if fast_is_ascii_uppercase ( english [ word_start_index + 1 ] ) { //As a heuristic, we consider the word to be uppercase if the second letter is
//Push the vowel and all letters after it
let vowel_to_end_slice = & english [ first_vowel_index .. word_end_index ] ;
pig_latin_string . extend_from_slice ( vowel_to_end_slice ) ;
//Push the starting consonants
let start_to_vowel_slice = & english [ word_start_index .. first_vowel_index ] ;
pig_latin_string . extend_from_slice ( start_to_vowel_slice ) ;
//Push the normal suffix (uppercase)
pig_latin_string . extend_from_slice ( suffix_upper ) ;
} else { //Word starts with an uppercase letter, but is otherwise lowercase
//Push the vowel, matching the starting case of the original word
pig_latin_string . push ( fast_to_ascii_uppercase ( english [ first_vowel_index ] ) ) ;
//Push all letters after the vowel
let after_vowel_slice = & english [ ( first_vowel_index + 1 ) .. word_end_index ] ;
pig_latin_string . extend_from_slice ( after_vowel_slice ) ;
//Push the first starting consonant, which should be lowercase now
pig_latin_string . push ( fast_to_ascii_lowercase ( english [ word_start_index ] ) ) ;
//Push the remaining starting consonants
let after_start_to_vowel_slice = & english [ ( word_start_index + 1 ) .. first_vowel_index ] ;
pig_latin_string . extend_from_slice ( after_start_to_vowel_slice ) ;
//Push the normal suffix
pig_latin_string . extend_from_slice ( suffix_lower ) ;
}
} else { //Word is entirely lowercase
//Push the vowel and all letters after it
let vowel_to_end_slice = & english [ first_vowel_index .. word_end_index ] ;
pig_latin_string . extend_from_slice ( vowel_to_end_slice ) ;
//Push the starting consonants
let start_to_vowel_slice = & english [ word_start_index .. first_vowel_index ] ;
pig_latin_string . extend_from_slice ( start_to_vowel_slice ) ;
//Push the normal suffix (lowercase)
pig_latin_string . extend_from_slice ( suffix_lower ) ;
}
}
//Don't go on if we reached the end of the string during the word
if global_index = = english . len ( ) {
return ;
}
//At this point, global_index contains the index to the next character to check
}
//Copies contraction suffixes, if present
if english [ global_index ] = = b'\'' { //TODO if this is true we can also skip the regular inter-word loop on the next iteration
let mut start_of_contraction_suffix_index : usize = global_index ; //Inclusive
global_index + = 1 ; //We skip over the apostrophe for the loop below, but we still want to copy it in the end
loop {
if global_index = = english . len ( ) {
//Copy all of the characters so far (all that remain) and return
let remaining_characters_slice = & english [ start_of_contraction_suffix_index .. ] ;
pig_latin_string . extend_from_slice ( remaining_characters_slice ) ;
return ;
}
if ! english [ global_index ] . is_ascii_alphabetic ( ) { //End of the contraction suffix
break ;
}
global_index + = 1 ;
}
//Copy the contraction suffix as-is
let contraction_suffix_slice = & english [ start_of_contraction_suffix_index .. global_index ] ;
pig_latin_string . extend_from_slice ( contraction_suffix_slice ) ;
}
}
}
//Avoids the overhead of having to convert suffixes to uppercase for the standard translation functions at runtime
pub ( crate ) fn translate_with_style_lower_and_upper_suffixes_old (
english : & [ u8 ] ,
suffix_lower : & [ u8 ] , special_case_suffix_lower : & [ u8 ] , suffix_upper : & [ u8 ] , special_case_suffix_upper : & [ u8 ] ,
pig_latin_string : & mut Vec ::< u8 >
) {
if english . is_empty ( ) {
return ;
}
//TODO merge the word and the generic text function into one function to allow for optimizations with certain things
//TODO do an SSE/AVX optimized version of this