More ASCII progress

main
John Zacarias Jekel 1 year ago
parent 2d2333677c
commit d03c413ee7
  1. 6
      src/anslatortray.rs
  2. 105
      src/ascii.rs
  3. 2
      src/lib.rs

@ -9,7 +9,7 @@
/* Imports */
use anslatortray::translate;
use anslatortray::translate_ascii;
use anslatortray::ascii;
/* Constants */
@ -71,7 +71,7 @@ fn help() {
eprintln!("--translate-args Translates all remaining arguments provided and outputs them to stdout");
eprintln!("--stdin-to-stdout Translates input from stdin directly to stdout");
eprintln!("\n{}", translate_ascii("Have a good day!"));
eprintln!("\n{}", String::from_utf8(ascii::translate(b"Have a good day!")).unwrap());
}
fn interactive(args: &Vec<String>) {
@ -156,7 +156,7 @@ fn benchmark_file(args: &Vec<String>) {
for _ in 0..iterations {
let start_time = std::time::Instant::now();
let translated_file_contents = translate_ascii(&file_contents);
let translated_file_contents = ascii::translate(file_contents.as_bytes());
let time_to_translate = start_time.elapsed();
total_duration_ascii += time_to_translate;
std::fs::write("/dev/null", &translated_file_contents).unwrap();//TODO avoid needing unix

@ -31,6 +31,111 @@ use crate::helpers::{is_vowel_ascii, is_y_ascii, word_is_uppercase_ascii, push_s
/* Functions */
pub fn translate(english: &[u8]) -> Vec::<u8> {
return translate_way(english);
}
pub fn translate_way(english: &[u8]) -> Vec::<u8> {
return translate_with_style(english, b"ay", b"way");
}
pub fn translate_with_style(english: &[u8], suffix_lower: &[u8], special_case_suffix_lower: &[u8]) -> Vec::<u8> {
if english.is_empty() {
return Vec::<u8>::new();
}
//TODO switch to fully operating on u8 slices/arrays/Vecs internally (converting from a string, then to a string at the end) in anslatortray 0.5.0
let mut pig_latin_string = Vec::<u8>::with_capacity(english.len() * 2);//Plenty of headroom in case the words are very small or the suffixes are long
//Convert the suffix and special_case_suffix we were provided to uppercase for words that are capitalized
let mut suffix_upper = Vec::<u8>::with_capacity(suffix_lower.len());
for letter in suffix_lower.iter() {
suffix_upper.push(letter.to_ascii_uppercase());
}
let mut special_case_suffix_upper = Vec::<u8>::with_capacity(special_case_suffix_lower.len());
for letter in special_case_suffix_lower.iter() {
special_case_suffix_upper.push(letter.to_ascii_uppercase());
}
//Flags used to remember if we're currently processing a word, contraction, contraction suffix or neither
let mut in_word: bool = false;
let mut in_contraction_suffix: bool = false;
//Buffer for improved performance (avoid repeated heap allocations)
let mut starting_consonants_buffer = Vec::<u8>::with_capacity(64);//Longer than basically all English words to avoid unneeded allocations, plus the fact that this isn't the whole word
//Indexes for improved performance (avoid copying characters to use as the english_word argument for translate_word_with_style_reuse_buffers)
//However, this assumes each character is one byte, so this only works with ASCII strings
let mut slice_start_index: usize = 0;//Inclusive
let mut slice_end_index: usize = 0;//Exclusive
for character in english.iter() {
if in_word {
if in_contraction_suffix {
if character.is_ascii_alphabetic() {
//We never translate the contraction suffix of a word, so just copy remaining letters as-is
} else {
//The contraction ended, and so too does the word
//We still want to copy the non-letter to the output though
in_contraction_suffix = false;
in_word = false;
}
pig_latin_string.push(*character);//Copy the character
slice_start_index += 1;//Keep the slice start index up to speed for later use
} else {
if character.is_ascii_alphabetic() {
//This character is part of the word, so increment the slice_end_index to include it in the slice
slice_end_index += 1;
} else {
//The word or first part of the contraction ended, so translate the word we've identified up until this point!
let word_slice: &[u8] = &english[slice_start_index..slice_end_index];
translate_word_with_style_reuse_buffers (
word_slice,
suffix_lower, special_case_suffix_lower, &suffix_upper, &special_case_suffix_upper,
&mut pig_latin_string, &mut starting_consonants_buffer
);
//Bring the slice_start_index to the end since we've finished the word and need it ready for the next one
slice_start_index = slice_end_index + 1;
//Append the symbol/whitespace we just got after the translated word
pig_latin_string.push(*character);
//If the symbol/whitespace we just got is an apostrophe, then this is a contraction suffix
if *character == b'\'' {
in_contraction_suffix = true;
} else {
in_word = false;//This wasn't a contraction, so we're done with the word
}
}
}
} else {
if character.is_ascii_alphabetic() {
//If we see a letter, we are in a word, so set the slice_end_index to the character after the slice_start_index
in_word = true;
slice_end_index = slice_start_index + 1;
} else {
//Otherwise copy symbols and whitespace as-is
pig_latin_string.push(*character);
slice_start_index += 1;
}
}
}
//If we ended on a word (but not on a contraction suffix), we translate it and push it to the end of the string
if in_word && !in_contraction_suffix {
let word_slice: &[u8] = &english[slice_start_index..slice_end_index];
translate_word_with_style_reuse_buffers (
word_slice,
suffix_lower, special_case_suffix_lower, &suffix_upper, &special_case_suffix_upper,
&mut pig_latin_string, &mut starting_consonants_buffer
);
}
return pig_latin_string;
}
/*
pub(super) fn translate_word_with_style_reuse_buffers_better_perhaps <
const SUFFIX_LEN: usize, const SPECIAL_CASE_SUFFIX_LEN: usize

@ -81,7 +81,7 @@
mod helpers;
mod translate_strings;
mod translate_words;
mod ascii;
pub mod ascii;
mod utf8;
pub use translate_strings::{translate, translate_way, translate_yay, translate_hay, translate_ferb, translate_with_style};

Loading…
Cancel
Save