Move more things to byte_string.rs

main
John Zacarias Jekel 1 year ago
parent 197fd51228
commit c37d6a1b94
  1. 3
      Cargo.toml
  2. 15
      src/anslatortray.rs
  3. 87
      src/byte_string.rs
  4. 10
      src/lib.rs
  5. 32
      src/string.rs

@ -16,9 +16,8 @@ documentation = "https://docs.rs/anslatortray/latest/anslatortray/"
[features]
default = []
nightly-features = ["nightly-features-benches", "nightly-features-generics"]
nightly-features = ["nightly-features-benches"]
nightly-features-benches = []
nightly-features-generics = []
[lib]
name = "anslatortray"

@ -8,7 +8,7 @@
/* Imports */
use anslatortray::string;
use anslatortray::translate;
use anslatortray::byte_string;
/* Constants */
@ -71,7 +71,7 @@ fn help() {
eprintln!("--translate-args Translates all remaining arguments provided and outputs them to stdout");
eprintln!("--stdin-to-stdout Translates input from stdin directly to stdout");
eprintln!("\n{}", string::translate("Have a good day!"));
eprintln!("\n{}", translate("Have a good day!"));
}
fn interactive(args: &Vec<String>) {
@ -89,7 +89,7 @@ fn interactive(args: &Vec<String>) {
loop {
eprint!("anslatortray> ");
stdin.read_line(&mut line_buffer).unwrap();
eprintln!("{}", string::translate(&line_buffer));
eprintln!("{}", translate(&line_buffer));
line_buffer.truncate(0);
}
}
@ -111,7 +111,7 @@ fn file(args: &Vec<String>) {
let file_contents = std::fs::read_to_string(input_file).unwrap();
let start_time = std::time::Instant::now();
let translated_file_contents = string::translate(&file_contents);
let translated_file_contents = translate(&file_contents);
//let translated_file_contents = ascii::translate(file_contents.as_bytes());//TESTING
let time_to_translate = start_time.elapsed();
std::fs::write(output_file, &translated_file_contents).unwrap();
@ -129,7 +129,6 @@ fn benchmark_file(args: &Vec<String>) {
}
//TODO error handling
//TODO just benchmark byte_string from now on
let input_file = &args[0];
let iterations = args[1].parse::<u128>().unwrap();//TODO error handling
@ -140,7 +139,7 @@ fn benchmark_file(args: &Vec<String>) {
for _ in 0..iterations {
let start_time = std::time::Instant::now();
let translated_file_contents = string::translate(&file_contents);
let translated_file_contents = translate(&file_contents);
let time_to_translate = start_time.elapsed();
total_duration_utf8 += time_to_translate;
@ -178,7 +177,7 @@ fn translate_args(args: &Vec<String>) {
//Translate the arguments and print them out for the user
for string in args {
print!("{} ", string::translate(&string));
print!("{} ", translate(&string));
}
println!();
}
@ -198,7 +197,7 @@ fn stdin_to_stdout(args: &Vec<String>) {
while let Ok(bytes_read) = stdin.read_to_string(&mut buffer) {
if bytes_read == 0 { return; }
write!(stdout, "{}", string::translate(&buffer)).unwrap();//TODO do this more efficiently (avoid format string)
write!(stdout, "{}", translate(&buffer)).unwrap();//TODO do this more efficiently (avoid format string)
buffer.truncate(0);//TODO is this needed here?
}
}

@ -1,14 +1,12 @@
/* NAME//TODO
* By: John Jekel
* Copyright (C) 2022 John Jekel
* See the LICENSE file at the root of the project for licensing info.
*
* TODO description
*
*/
/* Imports */
use crate::helpers::{is_vowel_ascii, is_y_ascii, word_is_uppercase_ascii, push_slice_to_vector};
/* Constants */
//TODO
@ -31,8 +29,6 @@ use crate::helpers::{is_vowel_ascii, is_y_ascii, word_is_uppercase_ascii, push_s
/* Functions */
//TODO rename the "ascii" functions to "byte" functions since they work on UTF8 bytestrings too
//TODO be sure to mention that if the strings are not ascii, the non-ascii bytes won't be affected
pub fn translate(english: &[u8], pig_latin_string: &mut Vec::<u8>) {
translate_way(english, pig_latin_string);
@ -40,16 +36,25 @@ pub fn translate(english: &[u8], pig_latin_string: &mut Vec::<u8>) {
//TODO be sure to mention that if the strings are not ascii, the non-ascii bytes won't be affected
pub fn translate_way(english: &[u8], pig_latin_string: &mut Vec::<u8>) {
translate_with_style(english, b"ay", b"way", pig_latin_string);
translate_with_style_lower_and_upper_suffixes(english, b"ay", b"way", b"AY", b"WAY", pig_latin_string);
}
//TODO tests for this function
//TODO be sure to mention that if the strings are not ascii, the non-ascii bytes won't be affected
pub fn translate_with_style(english: &[u8], suffix_lower: &[u8], special_case_suffix_lower: &[u8], pig_latin_string: &mut Vec::<u8>) {
if english.is_empty() {
return;
}
pub fn translate_yay(english: &[u8], pig_latin_string: &mut Vec::<u8>) {
translate_with_style_lower_and_upper_suffixes(english, b"ay", b"yay", b"AY", b"WAY", pig_latin_string);
}
//TODO be sure to mention that if the strings are not ascii, the non-ascii bytes won't be affected
pub fn translate_hay(english: &[u8], pig_latin_string: &mut Vec::<u8>) {
translate_with_style_lower_and_upper_suffixes(english, b"ay", b"hay", b"AY", b"HAY", pig_latin_string);
}
//TODO be sure to mention that if the strings are not ascii, the non-ascii bytes won't be affected
pub fn translate_ferb(english: &[u8], pig_latin_string: &mut Vec::<u8>) {
translate_with_style_lower_and_upper_suffixes(english, b"erb", b"ferb", b"ERB", b"FERB", pig_latin_string);
}
pub fn translate_with_style(english: &[u8], suffix_lower: &[u8], special_case_suffix_lower: &[u8], pig_latin_string: &mut Vec::<u8>) {
//Convert the suffix and special_case_suffix we were provided to uppercase for words that are capitalized
let mut suffix_upper = Vec::<u8>::with_capacity(suffix_lower.len());
for letter in suffix_lower.iter() {
@ -60,6 +65,20 @@ pub fn translate_with_style(english: &[u8], suffix_lower: &[u8], special_case_su
special_case_suffix_upper.push(letter.to_ascii_uppercase());
}
translate_with_style_lower_and_upper_suffixes(english, suffix_lower, &suffix_upper, special_case_suffix_lower, &special_case_suffix_upper, pig_latin_string);
}
//TODO tests for this function
//TODO be sure to mention that if the strings are not ascii, the non-ascii bytes won't be affected
pub(crate) fn translate_with_style_lower_and_upper_suffixes (
english: &[u8],
suffix_lower: &[u8], special_case_suffix_lower: &[u8], suffix_upper: &[u8], special_case_suffix_upper: &[u8],
pig_latin_string: &mut Vec::<u8>
) {
if english.is_empty() {
return;
}
//Flags used to remember if we're currently processing a word, contraction, contraction suffix or neither
let mut in_word: bool = false;
let mut in_contraction_suffix: bool = false;
@ -95,7 +114,7 @@ pub fn translate_with_style(english: &[u8], suffix_lower: &[u8], special_case_su
let word_slice: &[u8] = &english[slice_start_index..slice_end_index];
translate_word_with_style_reuse_buffers (
word_slice,
suffix_lower, special_case_suffix_lower, &suffix_upper, &special_case_suffix_upper,
suffix_lower, special_case_suffix_lower, suffix_upper, special_case_suffix_upper,
pig_latin_string, &mut starting_consonants_buffer
);
@ -130,7 +149,7 @@ pub fn translate_with_style(english: &[u8], suffix_lower: &[u8], special_case_su
let word_slice: &[u8] = &english[slice_start_index..slice_end_index];
translate_word_with_style_reuse_buffers (
word_slice,
suffix_lower, special_case_suffix_lower, &suffix_upper, &special_case_suffix_upper,
suffix_lower, special_case_suffix_lower, suffix_upper, special_case_suffix_upper,
pig_latin_string, &mut starting_consonants_buffer
);
}
@ -153,6 +172,7 @@ fn translate_word_with_style_reuse_buffers (
suffix_lower: &[u8], special_case_suffix_lower: &[u8], suffix_upper: &[u8], special_case_suffix_upper: &[u8],
buffer_to_append_to: &mut Vec<u8>, starting_consonants: &mut Vec<u8>
) {
debug_assert!(english_word.len() != 0);
if english_word.len() == 1 {
push_slice_to_vector(buffer_to_append_to, english_word);
push_slice_to_vector(buffer_to_append_to, special_case_suffix_lower);
@ -165,10 +185,10 @@ fn translate_word_with_style_reuse_buffers (
let mut index = 1;
//Check if the word is uppercase
let word_uppercase = word_is_uppercase_ascii(english_word);
let word_uppercase = word_is_uppercase(english_word);
//As a herustic, we consider Y to be a vowel when it is not at the start of the word
let first_letter_was_vowel: bool = is_vowel_ascii(english_word[0]);//Not including y
let first_letter_was_vowel: bool = is_vowel(english_word[0]);//Not including y
//Clear the starting_consonants buffer we were given
starting_consonants.truncate(0);
@ -182,7 +202,7 @@ fn translate_word_with_style_reuse_buffers (
//Grab all of the starting consonants, and push the first vowel we enounter to buffer_to_append_to
while index < english_word.len() {
let character: u8 = english_word[index];
if is_vowel_ascii(character) || is_y_ascii(character) {//As a herustic, we consider Y to be a vowel when it is not at the start of the word
if is_vowel(character) || is_y(character) {//As a herustic, we consider Y to be a vowel when it is not at the start of the word
//The vowel is the first letter of the word; we want it match the capitalization of the first letter of the original word
if first_char_was_upper {
buffer_to_append_to.push(character.to_ascii_uppercase());
@ -221,6 +241,33 @@ fn translate_word_with_style_reuse_buffers (
}
}
//Returns whether a letter is a vowel or not.
fn is_vowel(letter: u8) -> bool {
match letter.to_ascii_lowercase() {
b'a' | b'e' | b'i' | b'o' | b'u' => { return true; }
_ => { return false; }
}
}
//Returns whether a letter is y or not.
pub(crate) fn is_y(letter: u8) -> bool {
return letter.to_ascii_lowercase() == b'y';
}
//Returns whether an entire word is upper case or not.
pub(crate) fn word_is_uppercase(english_word_bytes: &[u8]) -> bool {
//Asume length is non-zero
//Heuristic: If the last letter of the word is uppercase, likely the whole word is uppercase
return (english_word_bytes[english_word_bytes.len() - 1] as char).is_ascii_uppercase();
}
//Clones each element of a slice and push()es it to a vector
pub(crate) fn push_slice_to_vector<T: Clone>(vec: &mut Vec<T>, slice: &[T]) {
for element in slice {
vec.push(element.clone());
}
}
/* Tests */
#[cfg(test)]
@ -267,8 +314,6 @@ mod tests {
}
}
//TODO add generic versions
fn translate_word_with_style(english_word: &str, suffix_lower: &str, special_case_suffix_lower: &str) -> String {
let mut suffix_upper = String::with_capacity(suffix_lower.len());
for letter in suffix_lower.chars() {
@ -279,8 +324,8 @@ mod tests {
special_case_suffix_upper.push(letter.to_ascii_uppercase());
}
let mut pig_latin_word = Vec::<u8>::with_capacity(64 * 2);//Longer than all English words to avoid unneeded allocations, times 2 to leave room for whitespace, symbols, and the suffix
let mut starting_consonants_buffer = Vec::<u8>::with_capacity(64);//Longer than basically all English words to avoid unneeded allocations, plus the fact that this isn't the whole word
let mut pig_latin_word = Vec::<u8>::new();
let mut starting_consonants_buffer = Vec::<u8>::new();
translate_word_with_style_reuse_buffers (
english_word.as_bytes(),
suffix_lower.as_bytes(), special_case_suffix_lower.as_bytes(), &suffix_upper.as_bytes(), &special_case_suffix_upper.as_bytes(),

@ -73,16 +73,10 @@
//Only enabled if the relevant Cargo feature is
#![cfg_attr(feature = "nightly-features-benches", feature(test))]
#![cfg_attr(feature = "nightly-features-generics", feature(adt_const_params))]
#![cfg_attr(feature = "nightly-features-generics", feature(generic_const_exprs))]
/* Imports */
mod helpers;
//mod translate_strings;
//mod translate_words;
pub mod byte_string;
pub mod string;
mod string;
pub use string::translate;//For convinence; for other functions use either the string or byte_string modules
//pub use translate_strings::{translate_ascii, translate_way_ascii, translate_yay_ascii, translate_hay_ascii, translate_ferb_ascii, translate_with_style_ascii};
pub use string::{translate, translate_yay, translate_hay, translate_ferb, translate_with_style};

@ -1,7 +1,9 @@
/* NAME//TODO
/* string.rs
* By: John Jekel
* Copyright (C) 2022 John Jekel
* See the LICENSE file at the root of the project for licensing info.
*
* TODO description
* Translation functions operating on &str and String (the ones most users will want to use)
*
*/
@ -9,28 +11,10 @@
use crate::byte_string::translate_with_style as translate_byte_string_with_style;
/* Constants */
//TODO
/* Macros */
//TODO (also pub(crate) use the_macro statements here too)
/* Static Variables */
//TODO
/* Types */
//TODO
/* Associated Functions and Methods */
//TODO
/* Functions */
//TODO use byte_string::translate_with_style_lower_and_upper_suffixes for speed
pub fn translate(english: &str) -> String {
return translate_way(english);
}
@ -52,10 +36,6 @@ pub fn translate_ferb(english: &str) -> String {
}
pub fn translate_with_style(english: &str, suffix_lower: &str, special_case_suffix_lower: &str) -> String {
if english.is_empty() {
return String::new();
}
//Convert the string slices to byte slices and translate those (only ASCII letters are affected, non-letters or UTF-8 are preserved)
let mut pig_latin_string_bytes = Vec::<u8>::with_capacity(english.len() * 2);//Plenty of headroom in case the words are very small or the suffixes are long
translate_byte_string_with_style(english.as_bytes(), suffix_lower.as_bytes(), special_case_suffix_lower.as_bytes(), &mut pig_latin_string_bytes);

Loading…
Cancel
Save