Skip to content

Commit

Permalink
Fix additional paste bugs
Browse files Browse the repository at this point in the history
  • Loading branch information
andrewliebenow committed Oct 8, 2024
1 parent f096a21 commit 5a54253
Show file tree
Hide file tree
Showing 2 changed files with 90 additions and 36 deletions.
73 changes: 37 additions & 36 deletions src/uu/paste/src/paste.rs
Original file line number Diff line number Diff line change
Expand Up @@ -227,66 +227,67 @@ fn paste(
Ok(())
}

/// Unescape all special characters
fn unescape(input: &str) -> String {
fn parse_delimiters(delimiters: &str) -> Box<[Box<[u8]>]> {
/// A single backslash char
const BACKSLASH: char = '\\';

let mut string = String::with_capacity(input.len());
fn add_one_byte_single_char_delimiter(vec: &mut Vec<Box<[u8]>>, byte: u8) {
vec.push(Box::new([byte]));
}

// a buffer of length four is large enough to encode any char
let mut buffer = [0; 4];

let mut add_single_char_delimiter = |vec: &mut Vec<Box<[u8]>>, ch: char| {
let delimiter_encoded = ch.encode_utf8(&mut buffer);

vec.push(Box::from(delimiter_encoded.as_bytes()));
};

let mut chars = input.chars();
let mut vec = Vec::<Box<[u8]>>::with_capacity(delimiters.len());

let mut chars = delimiters.chars();

// Unescape all special characters
while let Some(char) = chars.next() {
match char {
// Empty string (not a null character)
BACKSLASH => match chars.next() {
// Keep "\" if it is the last char
// "Empty string (not a null character)"
// https://pubs.opengroup.org/onlinepubs/9799919799/utilities/paste.html
Some('0') => {
vec.push(Box::<[u8; 0]>::new([]));
}
// "\\" to "\"
None | Some(BACKSLASH) => {
string.push(BACKSLASH);
Some(BACKSLASH) => {
add_one_byte_single_char_delimiter(&mut vec, b'\\');
}
// "\n" to U+000A
Some('n') => {
string.push('\n');
add_one_byte_single_char_delimiter(&mut vec, b'\n');

Check warning on line 267 in src/uu/paste/src/paste.rs

View check run for this annotation

Codecov / codecov/patch

src/uu/paste/src/paste.rs#L267

Added line #L267 was not covered by tests
}
// "\t" to U+0009
Some('t') => {
string.push('\t');
add_one_byte_single_char_delimiter(&mut vec, b'\t');

Check warning on line 271 in src/uu/paste/src/paste.rs

View check run for this annotation

Codecov / codecov/patch

src/uu/paste/src/paste.rs#L271

Added line #L271 was not covered by tests
}
Some(other_char) => {
string.push(BACKSLASH);
string.push(other_char);
// "If any other characters follow the <backslash>, the results are unspecified."
// https://pubs.opengroup.org/onlinepubs/9799919799/utilities/paste.html
// However, other implementations remove the backslash
// See "test_posix_unspecified_delimiter"
add_single_char_delimiter(&mut vec, other_char);
}
None => {
unreachable!("Delimiter list cannot end with an unescaped backslash");

Check warning on line 281 in src/uu/paste/src/paste.rs

View check run for this annotation

Codecov / codecov/patch

src/uu/paste/src/paste.rs#L281

Added line #L281 was not covered by tests
}
},
non_backslash_char => {
string.push(non_backslash_char);
add_single_char_delimiter(&mut vec, non_backslash_char);
}
}
}

string
}

fn parse_delimiters(delimiters: &str) -> Box<[Box<[u8]>]> {
let delimiters_unescaped = unescape(delimiters).chars().collect::<Vec<_>>();

let delimiters_unescaped_len = delimiters_unescaped.len();

if delimiters_unescaped_len > 0 {
let mut vec = Vec::<Box<[u8]>>::with_capacity(delimiters_unescaped_len);

// a buffer of length four is large enough to encode any char
let mut buffer = [0; 4];

for delimiter in delimiters_unescaped {
let delimiter_encoded = delimiter.encode_utf8(&mut buffer);

vec.push(Box::from(delimiter_encoded.as_bytes()));
}

vec.into_boxed_slice()
} else {
Box::new([])
}
vec.into_boxed_slice()
}

enum DelimiterState<'a> {
Expand Down
53 changes: 53 additions & 0 deletions tests/by-util/test_paste.rs
Original file line number Diff line number Diff line change
Expand Up @@ -289,6 +289,59 @@ fn test_three_trailing_backslashes_delimiter() {
}
}

// "If any other characters follow the <backslash>, the results are unspecified."
// https://pubs.opengroup.org/onlinepubs/9799919799/utilities/paste.html
// However, other implementations remove the backslash
#[test]
fn test_posix_unspecified_delimiter() {
for option_style in ["-d", "--delimiters"] {
new_ucmd!()
// This is not "\\z", but "\z"
.args(&[option_style, "\\z", "-s"])
.pipe_in(
"\
1
2
3
4
",
)
.succeeds()
.stdout_only(
"\
1z2z3z4
",
);
}
}

// "Empty string (not a null character)"
// https://pubs.opengroup.org/onlinepubs/9799919799/utilities/paste.html
#[test]
fn test_backslash_zero_delimiter() {
for option_style in ["-d", "--delimiters"] {
new_ucmd!()
// This is "\0z\0"
.args(&[option_style, "\\0z\\0", "-s"])
.pipe_in(
"\
1
2
3
4
5
6
",
)
.succeeds()
.stdout_only(
"\
12z345z6
",
);
}
}

#[test]
fn test_data() {
for example in EXAMPLE_DATA {
Expand Down

0 comments on commit 5a54253

Please sign in to comment.