diff --git a/Cargo.lock b/Cargo.lock index 7f412968..6e658990 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -150,6 +150,7 @@ version = "0.2.0" dependencies = [ "assert_cmd", "chrono", + "clap", "filetime", "glob", "once_cell", diff --git a/Cargo.toml b/Cargo.toml index f3eb7089..140a01af 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -11,6 +11,7 @@ authors = ["uutils developers"] [dependencies] chrono = "0.4" +clap = "2.34" glob = "0.3" walkdir = "2.3" tempfile = "3" @@ -28,6 +29,10 @@ serial_test = "0.5" name = "find" path = "src/find/main.rs" +[[bin]] +name = "xargs" +path = "src/xargs/main.rs" + [[bin]] name = "testing-commandline" path = "src/testing/commandline/main.rs" diff --git a/src/lib.rs b/src/lib.rs index fd320a52..7a60d7d9 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -12,3 +12,4 @@ extern crate walkdir; extern crate tempfile; pub mod find; +pub mod xargs; diff --git a/src/testing/commandline/main.rs b/src/testing/commandline/main.rs index b28362e4..6308ce21 100644 --- a/src/testing/commandline/main.rs +++ b/src/testing/commandline/main.rs @@ -6,7 +6,7 @@ use std::env; use std::fs::{self, File, OpenOptions}; -use std::io::Write; +use std::io::{stdin, stdout, Read, Write}; use std::path::PathBuf; fn usage() -> ! { @@ -14,10 +14,19 @@ fn usage() -> ! { std::process::exit(2); } +enum ExitWith { + Failure, + UrgentFailure, + #[cfg(unix)] + Signal, +} + #[derive(Default)] struct Config { - exit_with_failure: bool, - destination_dir: String, + exit_with: Option, + print_stdin: bool, + no_print_cwd: bool, + destination_dir: Option, } fn open_file(destination_dir: &str) -> File { @@ -39,20 +48,61 @@ fn open_file(destination_dir: &str) -> File { } } +fn write_content(mut f: impl Write, config: &Config, args: &Vec) { + if !config.no_print_cwd { + write!(f, "cwd={}\n", env::current_dir().unwrap().to_string_lossy()) + .expect("failed to write to file"); + } + + if config.print_stdin { + let mut s = String::new(); + stdin() + .read_to_string(&mut s) + .expect("failed to read from stdin"); + write!(f, "stdin={}\n", s.trim()).expect("failed to write to file"); + } + + write!(f, "args=\n").expect("failed to write to file"); + + // first two args are going to be the path to this executable and + // the destination_dir we want to write to. Don't write either of those + // as they'll be non-deterministic. + for arg in &args[2..] { + write!(f, "{}\n", arg).expect("failed to write to file"); + } +} + fn main() { let args = env::args().collect::>(); if args.len() < 2 || args[1] == "-h" || args[1] == "--help" { usage(); } let mut config = Config { - destination_dir: args[1].clone(), + destination_dir: if args[1] != "-" { + Some(args[1].clone()) + } else { + None + }, ..Default::default() }; for arg in &args[2..] { if arg.starts_with("--") { match arg.as_ref() { "--exit_with_failure" => { - config.exit_with_failure = true; + config.exit_with = Some(ExitWith::Failure); + } + "--exit_with_urgent_failure" => { + config.exit_with = Some(ExitWith::UrgentFailure); + } + #[cfg(unix)] + "--exit_with_signal" => { + config.exit_with = Some(ExitWith::Signal); + } + "--no_print_cwd" => { + config.no_print_cwd = true; + } + "--print_stdin" => { + config.print_stdin = true; } _ => { usage(); @@ -61,20 +111,19 @@ fn main() { } } - { - let mut f = open_file(&config.destination_dir); - // first two args are going to be the path to this executable and - // the destination_dir we want to write to. Don't write either of those - // as they'll be non-deterministic. - f.write_fmt(format_args!( - "cwd={}\nargs=\n", - env::current_dir().unwrap().to_string_lossy() - )) - .expect("failed to write to file"); - for arg in &args[2..] { - f.write_fmt(format_args!("{}\n", arg)) - .expect("failed to write to file"); - } + if let Some(destination_dir) = &config.destination_dir { + write_content(open_file(destination_dir), &config, &args); + } else { + write_content(stdout(), &config, &args); + } + + match config.exit_with { + None => std::process::exit(0), + Some(ExitWith::Failure) => std::process::exit(2), + Some(ExitWith::UrgentFailure) => std::process::exit(255), + #[cfg(unix)] + Some(ExitWith::Signal) => unsafe { + uucore::libc::raise(uucore::libc::SIGINT); + }, } - std::process::exit(if config.exit_with_failure { 2 } else { 0 }); } diff --git a/src/xargs/main.rs b/src/xargs/main.rs new file mode 100644 index 00000000..0607801c --- /dev/null +++ b/src/xargs/main.rs @@ -0,0 +1,12 @@ +// Copyright 2021 Collabora, Ltd. +// +// Use of this source code is governed by a MIT-style +// license that can be found in the LICENSE file or at +// https://opensource.org/licenses/MIT. + +fn main() { + let args = std::env::args().collect::>(); + std::process::exit(findutils::xargs::xargs_main( + &args.iter().map(|s| s.as_ref()).collect::>(), + )) +} diff --git a/src/xargs/mod.rs b/src/xargs/mod.rs new file mode 100644 index 00000000..322683dd --- /dev/null +++ b/src/xargs/mod.rs @@ -0,0 +1,1202 @@ +// Copyright 2021 Collabora, Ltd. +// +// Use of this source code is governed by a MIT-style +// license that can be found in the LICENSE file or at +// https://opensource.org/licenses/MIT. + +use std::{ + collections::HashMap, + error::Error, + ffi::{OsStr, OsString}, + fmt::Display, + fs, + io::{self, BufRead, BufReader, Read}, + process::{Command, Stdio}, +}; + +use clap::{crate_version, App, AppSettings, Arg}; + +mod options { + pub const COMMAND: &str = "COMMAND"; + + pub const ARG_FILE: &str = "arg-file"; + pub const DELIMITER: &str = "delimiter"; + pub const EXIT: &str = "exit"; + pub const MAX_ARGS: &str = "max-args"; + pub const MAX_LINES: &str = "max-lines"; + pub const MAX_PROCS: &str = "max-procs"; + pub const NO_RUN_IF_EMPTY: &str = "no-run-if-empty"; + pub const NULL: &str = "null"; + pub const SIZE: &str = "size"; + pub const VERBOSE: &str = "verbose"; +} + +struct Options { + arg_file: Option, + delimiter: Option, + exit_if_pass_char_limit: bool, + max_args: Option, + max_lines: Option, + no_run_if_empty: bool, + null: bool, + size: Option, + verbose: bool, +} + +#[derive(Debug, PartialEq, Eq)] +enum ArgumentKind { + /// An argument provided as part of the initial command line. + Initial, + /// An argument that was terminated by a newline or custom delimiter. + HardTerminated, + /// An argument that was terminated by non-newline whitespace. + SoftTerminated, +} + +#[derive(Debug, PartialEq, Eq)] +struct Argument { + arg: OsString, + kind: ArgumentKind, +} + +struct ExhaustedCommandSpace { + arg: Argument, + out_of_chars: bool, +} + +/// A "limiter" to constrain the size of a single command line. Given a cursor +/// pointing to the next limiter that should be tried. +trait CommandSizeLimiter { + fn try_arg( + &mut self, + arg: Argument, + cursor: LimiterCursor<'_>, + ) -> Result; + fn dyn_clone(&self) -> Box; +} + +/// A pointer to the next limiter. A limiter should *always* call the cursor's +/// try_next *before* updating its own state, to ensure that all other limiters +/// are okay with the argument first. +struct LimiterCursor<'collection> { + limiters: &'collection mut [Box], +} + +impl LimiterCursor<'_> { + fn try_next(self, arg: Argument) -> Result { + if self.limiters.is_empty() { + Ok(arg) + } else { + let (current, remaining) = self.limiters.split_at_mut(1); + current[0].try_arg( + arg, + LimiterCursor { + limiters: remaining, + }, + ) + } + } +} + +struct LimiterCollection { + limiters: Vec>, +} + +impl LimiterCollection { + fn new() -> LimiterCollection { + LimiterCollection { limiters: vec![] } + } + + fn add(&mut self, limiter: impl CommandSizeLimiter + 'static) { + self.limiters.push(Box::new(limiter)); + } + + fn try_arg(&mut self, arg: Argument) -> Result { + let cursor = LimiterCursor { + limiters: &mut self.limiters[..], + }; + cursor.try_next(arg) + } +} + +impl Clone for LimiterCollection { + fn clone(&self) -> Self { + LimiterCollection { + limiters: self + .limiters + .iter() + .map(|limiter| limiter.dyn_clone()) + .collect(), + } + } +} + +#[cfg(windows)] +fn count_osstr_chars_for_exec(s: &OsStr) -> usize { + use std::os::windows::ffi::OsStrExt; + s.encode_wide().count() +} + +#[cfg(unix)] +fn count_osstr_chars_for_exec(s: &OsStr) -> usize { + use std::os::unix::ffi::OsStrExt; + // Include +1 for the null terminator. + s.as_bytes().len() + 1 +} + +#[derive(Clone)] +struct MaxCharsCommandSizeLimiter { + current_size: usize, + max_chars: usize, +} + +impl MaxCharsCommandSizeLimiter { + fn new(max_chars: usize) -> MaxCharsCommandSizeLimiter { + MaxCharsCommandSizeLimiter { + current_size: 0, + max_chars, + } + } + + #[cfg(windows)] + fn new_system(_env: &HashMap) -> MaxCharsCommandSizeLimiter { + // Taken from the CreateProcess docs, with 1 removed for the null + // terminator. + const MAX_CMDLINE: usize = 32767 - 1; + MaxCharsCommandSizeLimiter::new(MAX_CMDLINE) + } + + #[cfg(unix)] + fn new_system(env: &HashMap) -> MaxCharsCommandSizeLimiter { + // POSIX requires that we leave 2048 bytes of space so that the child processes + // can have room to set their own environment variables. + const ARG_HEADROOM: usize = 2048; + let arg_max = unsafe { uucore::libc::sysconf(uucore::libc::_SC_ARG_MAX) } as usize; + + let env_size: usize = env + .iter() + .map(|(var, value)| count_osstr_chars_for_exec(var) + count_osstr_chars_for_exec(value)) + .sum(); + + MaxCharsCommandSizeLimiter::new(arg_max - ARG_HEADROOM - env_size) + } +} + +impl CommandSizeLimiter for MaxCharsCommandSizeLimiter { + fn try_arg( + &mut self, + arg: Argument, + cursor: LimiterCursor<'_>, + ) -> Result { + let chars = count_osstr_chars_for_exec(&arg.arg); + if self.current_size + chars <= self.max_chars { + let arg = cursor.try_next(arg)?; + self.current_size += chars; + Ok(arg) + } else { + Err(ExhaustedCommandSpace { + arg, + out_of_chars: true, + }) + } + } + + fn dyn_clone(&self) -> Box { + Box::new(self.clone()) + } +} + +#[derive(Clone)] +struct MaxArgsCommandSizeLimiter { + current_args: usize, + max_args: usize, +} + +impl MaxArgsCommandSizeLimiter { + fn new(max_args: usize) -> MaxArgsCommandSizeLimiter { + MaxArgsCommandSizeLimiter { + current_args: 0, + max_args, + } + } +} + +impl CommandSizeLimiter for MaxArgsCommandSizeLimiter { + fn try_arg( + &mut self, + arg: Argument, + cursor: LimiterCursor<'_>, + ) -> Result { + if self.current_args + 1 <= self.max_args { + let arg = cursor.try_next(arg)?; + if arg.kind != ArgumentKind::Initial { + self.current_args += 1; + } + Ok(arg) + } else { + Err(ExhaustedCommandSpace { + arg, + out_of_chars: false, + }) + } + } + + fn dyn_clone(&self) -> Box { + Box::new(self.clone()) + } +} + +#[derive(Clone)] +struct MaxLinesCommandSizeLimiter { + current_line: usize, + max_lines: usize, +} + +impl MaxLinesCommandSizeLimiter { + fn new(max_lines: usize) -> MaxLinesCommandSizeLimiter { + MaxLinesCommandSizeLimiter { + current_line: 1, + max_lines, + } + } +} + +impl CommandSizeLimiter for MaxLinesCommandSizeLimiter { + fn try_arg( + &mut self, + arg: Argument, + cursor: LimiterCursor<'_>, + ) -> Result { + if self.current_line <= self.max_lines { + let arg = cursor.try_next(arg)?; + // The name of this limiter is a bit of a lie: although this limits + // by max "lines", if a custom delimiter is used, xargs uses that + // instead. So, this actually limits based on the max amount of hard + // terminations. + if arg.kind == ArgumentKind::HardTerminated { + self.current_line += 1; + } + Ok(arg) + } else { + Err(ExhaustedCommandSpace { + arg, + out_of_chars: false, + }) + } + } + + fn dyn_clone(&self) -> Box { + Box::new(self.clone()) + } +} + +enum CommandResult { + Success, + Failure, +} + +impl CommandResult { + fn combine(&mut self, other: CommandResult) { + if matches!(*self, CommandResult::Success) { + *self = other; + } + } +} + +#[derive(Debug)] +enum CommandExecutionError { + // exit code 255 + UrgentlyFailed, + Killed { signal: i32 }, + CannotRun(io::Error), + NotFound, + Unknown, +} + +impl Display for CommandExecutionError { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self { + CommandExecutionError::UrgentlyFailed => write!(f, "Command exited with code 255"), + CommandExecutionError::Killed { signal } => { + write!(f, "Command was killed with signal {}", signal) + } + CommandExecutionError::CannotRun(err) => write!(f, "Command could not be run: {}", err), + CommandExecutionError::NotFound => write!(f, "Command not found"), + CommandExecutionError::Unknown => write!(f, "Unknown error running command"), + } + } +} + +impl Error for CommandExecutionError {} + +enum ExecAction { + Command(Vec), + Echo, +} + +struct CommandBuilderOptions { + action: ExecAction, + env: HashMap, + limiters: LimiterCollection, + verbose: bool, + close_stdin: bool, +} + +impl CommandBuilderOptions { + fn new( + action: ExecAction, + env: HashMap, + mut limiters: LimiterCollection, + ) -> Result { + let initial_args = match &action { + ExecAction::Command(args) => args.iter().map(|arg| arg.as_ref()).collect(), + ExecAction::Echo => vec![OsStr::new("echo")], + }; + + for arg in initial_args { + limiters.try_arg(Argument { + arg: arg.to_owned(), + kind: ArgumentKind::Initial, + })?; + } + + Ok(CommandBuilderOptions { + action, + env, + limiters, + verbose: false, + close_stdin: false, + }) + } +} + +struct CommandBuilder<'options> { + options: &'options CommandBuilderOptions, + extra_args: Vec, + limiters: LimiterCollection, +} + +impl CommandBuilder<'_> { + fn new(options: &CommandBuilderOptions) -> CommandBuilder<'_> { + CommandBuilder { + options, + extra_args: vec![], + limiters: options.limiters.clone(), + } + } + + fn add_arg(&mut self, arg: Argument) -> Result<(), ExhaustedCommandSpace> { + let arg = self.limiters.try_arg(arg)?; + self.extra_args.push(arg.arg); + Ok(()) + } + + fn execute(self) -> Result { + let (entry_point, initial_args): (&OsStr, &[OsString]) = match &self.options.action { + ExecAction::Command(args) => (&args[0], &args[1..]), + ExecAction::Echo => (OsStr::new("echo"), &[]), + }; + + let mut command = Command::new(entry_point); + command + .args(initial_args) + .args(&self.extra_args) + .env_clear() + .envs(&self.options.env); + if self.options.close_stdin { + command.stdin(Stdio::null()); + } + if self.options.verbose { + eprintln!("{:?}", command); + } + + match &self.options.action { + ExecAction::Command(_) => match command.status() { + Ok(status) => { + if status.success() { + Ok(CommandResult::Success) + } else if let Some(err) = status.code() { + if err == 255 { + Err(CommandExecutionError::UrgentlyFailed) + } else { + Ok(CommandResult::Failure) + } + } else { + #[cfg(unix)] + { + use std::os::unix::process::ExitStatusExt; + if let Some(signal) = status.signal() { + Err(CommandExecutionError::Killed { signal }) + } else { + Err(CommandExecutionError::Unknown) + } + } + + #[cfg(not(unix))] + Err(CommandExecutionError::Unknown) + } + } + Err(e) if e.kind() == io::ErrorKind::NotFound => { + Err(CommandExecutionError::NotFound) + } + Err(e) => Err(CommandExecutionError::CannotRun(e)), + }, + ExecAction::Echo => { + println!( + "{}", + self.extra_args + .iter() + .map(|arg| arg.to_string_lossy()) + .collect::>() + .join(" ") + ); + Ok(CommandResult::Success) + } + } + } +} + +trait ArgumentReader { + fn next(&mut self) -> io::Result>; +} + +struct WhitespaceDelimitedArgumentReader { + rd: R, + pending: Vec, +} + +impl WhitespaceDelimitedArgumentReader +where + R: Read, +{ + fn new(rd: R) -> Self { + WhitespaceDelimitedArgumentReader { + rd, + pending: vec![], + } + } +} + +impl ArgumentReader for WhitespaceDelimitedArgumentReader +where + R: Read, +{ + fn next(&mut self) -> io::Result> { + let mut result = vec![]; + let mut terminated_by_newline = false; + + let mut pending = vec![]; + std::mem::swap(&mut pending, &mut self.pending); + + enum Escape { + Slash, + Quote(u8), + } + + let mut escape: Option = None; + let mut i = 0; + loop { + if i == pending.len() { + pending.resize(4096, 0); + // Already hit the end of our buffer, so read in some more data. + let bytes_read = loop { + match self.rd.read(&mut pending[..]) { + Ok(bytes_read) => break bytes_read, + Err(e) if e.kind() == io::ErrorKind::Interrupted => continue, + Err(e) => return Err(e), + } + }; + + if bytes_read == 0 { + if let Some(Escape::Quote(q)) = &escape { + return Err(io::Error::new( + io::ErrorKind::InvalidInput, + format!("Unterminated quote: {}", q), + )); + } else if i == 0 { + return Ok(None); + } else { + pending.clear(); + break; + } + } + + pending.resize(bytes_read, 0); + i = 0; + } + + match (&escape, pending[i]) { + (Some(Escape::Quote(quote)), c) if c == *quote => escape = None, + (Some(Escape::Quote(_)), c) => result.push(c), + (Some(Escape::Slash), c) => { + result.push(c); + escape = None; + } + (None, c @ (b'"' | b'\'')) => escape = Some(Escape::Quote(c)), + (None, b'\\') => escape = Some(Escape::Slash), + (None, c) if c.is_ascii_whitespace() => { + if !result.is_empty() { + terminated_by_newline = c == b'\n'; + break; + } + } + (None, c) => result.push(c), + } + + i += 1; + } + + if i < pending.len() { + self.pending = pending.split_off(i + 1); + } + + Ok(Some(Argument { + arg: String::from_utf8_lossy(&result[..]).into_owned().into(), + kind: if terminated_by_newline { + ArgumentKind::HardTerminated + } else { + ArgumentKind::SoftTerminated + }, + })) + } +} + +struct ByteDelimitedArgumentReader { + rd: BufReader, + delimiter: u8, +} + +impl ByteDelimitedArgumentReader +where + R: Read, +{ + fn new(rd: R, delimiter: u8) -> Self { + ByteDelimitedArgumentReader { + rd: BufReader::new(rd), + delimiter, + } + } +} + +impl ArgumentReader for ByteDelimitedArgumentReader +where + R: Read, +{ + fn next(&mut self) -> io::Result> { + Ok(loop { + let mut buf = vec![]; + let bytes_read = self.rd.read_until(self.delimiter, &mut buf)?; + if bytes_read > 0 { + let need_to_trim_delimiter = buf[buf.len() - 1] == self.delimiter; + let bytes = if need_to_trim_delimiter { + if buf.len() == 1 { + // This was *only* a delimiter, so we didn't actually + // read anything interesting. Try again. + continue; + } + + &buf[..buf.len() - 1] + } else { + &buf[..] + }; + break Some(Argument { + arg: String::from_utf8_lossy(bytes).into_owned().into(), + kind: ArgumentKind::HardTerminated, + }); + } else { + break None; + } + }) + } +} + +#[derive(Debug)] +enum XargsError { + ArgumentTooLarge, + CommandExecution(CommandExecutionError), + Io(io::Error), + Untyped(String), +} + +impl Display for XargsError { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self { + XargsError::ArgumentTooLarge => write!(f, "Argument too large"), + XargsError::CommandExecution(e) => write!(f, "{}", e), + XargsError::Io(e) => write!(f, "{}", e), + XargsError::Untyped(s) => write!(f, "{}", s), + } + } +} + +impl Error for XargsError {} + +impl From for XargsError { + fn from(s: String) -> Self { + XargsError::Untyped(s) + } +} + +impl From<&'_ str> for XargsError { + fn from(s: &'_ str) -> Self { + s.to_owned().into() + } +} + +impl From for XargsError { + fn from(e: CommandExecutionError) -> Self { + XargsError::CommandExecution(e) + } +} + +impl From for XargsError { + fn from(e: io::Error) -> Self { + XargsError::Io(e) + } +} + +fn process_input( + builder_options: CommandBuilderOptions, + mut args: Box, + options: &Options, +) -> Result { + let mut current_builder = CommandBuilder::new(&builder_options); + let mut have_pending_command = false; + let mut result = CommandResult::Success; + + while let Some(arg) = args.next()? { + if let Err(ExhaustedCommandSpace { arg, out_of_chars }) = current_builder.add_arg(arg) { + if out_of_chars + && options.exit_if_pass_char_limit + && (options.max_args.is_some() || options.max_lines.is_some()) + { + return Err(XargsError::ArgumentTooLarge); + } else if have_pending_command { + result.combine(current_builder.execute()?); + } + + current_builder = CommandBuilder::new(&builder_options); + if let Err(ExhaustedCommandSpace { .. }) = current_builder.add_arg(arg) { + return Err(XargsError::ArgumentTooLarge); + } + } + + have_pending_command = true; + } + + if !options.no_run_if_empty || have_pending_command { + result.combine(current_builder.execute()?); + } + + Ok(result) +} + +fn parse_delimiter(s: &str) -> Result { + if let Some(hex) = s.strip_prefix("\\x") { + u8::from_str_radix(hex, 16).map_err(|e| format!("Invalid hex sequence: {}", e)) + } else if let Some(oct) = s.strip_prefix("\\0") { + u8::from_str_radix(oct, 8).map_err(|e| format!("Invalid octal sequence: {}", e)) + } else if let Some(special) = s.strip_prefix("\\") { + match special { + "a" => Ok(b'\x07'), + "b" => Ok(b'\x08'), + "f" => Ok(b'\x0C'), + "n" => Ok(b'\n'), + "r" => Ok(b'\r'), + "t" => Ok(b'\t'), + "v" => Ok(b'\x0B'), + "0" => Ok(b'\0'), + "\\" => Ok(b'\\'), + _ => Err(format!("Invalid escape sequence: {}", s)), + } + } else { + let bytes = s.as_bytes(); + if bytes.len() == 1 { + Ok(bytes[0]) + } else { + Err("Delimiter must be one byte".to_owned()) + } + } +} + +fn validate_positive_usize(s: String) -> Result<(), String> { + match s.parse::() { + Ok(v) if v > 0 => Ok(()), + Ok(v) => Err(format!("Value must be > 0, not: {}", v)), + Err(e) => Err(e.to_string()), + } +} + +fn do_xargs(args: &[&str]) -> Result { + let matches = App::new("xargs") + .version(crate_version!()) + .about("Run commands using arguments derived from standard input") + .settings(&[AppSettings::TrailingVarArg]) + .arg( + Arg::with_name(options::COMMAND) + .takes_value(true) + .multiple(true) + .help("The command to run"), + ) + .arg( + Arg::with_name(options::ARG_FILE) + .short("a") + .long(options::ARG_FILE) + .takes_value(true) + .help("Read arguments from the given file instead of stdin"), + ) + .arg( + Arg::with_name(options::DELIMITER) + .short("d") + .long(options::DELIMITER) + .takes_value(true) + .validator(|s| parse_delimiter(&s).map(|_| ())) + .help("Use the given delimiter to split the input"), + ) + .arg( + Arg::with_name(options::EXIT) + .short("x") + .long(options::EXIT) + .help( + "Exit if the number of arguments allowed by -L or -n do not \ + fit into the number of allowed characters", + ), + ) + .arg( + Arg::with_name(options::MAX_ARGS) + .short("n") + .takes_value(true) + .long(options::MAX_ARGS) + .validator(validate_positive_usize) + .help( + "Set the max number of arguments read from stdin to be passed \ + to each command invocation (mutually exclusive with -L)", + ), + ) + .arg( + Arg::with_name(options::MAX_LINES) + .short("L") + .takes_value(true) + .validator(validate_positive_usize) + .help( + "Set the max number of lines from stdin to be passed to each \ + command invocation (mutually exclusive with -n)", + ), + ) + .arg( + Arg::with_name(options::MAX_PROCS) + .short("P") + .takes_value(true) + .long(options::MAX_PROCS) + .help("Run up to this many commands in parallel [NOT IMPLEMENTED]"), + ) + .arg( + Arg::with_name(options::NO_RUN_IF_EMPTY) + .short("r") + .long(options::NO_RUN_IF_EMPTY) + .help("If there are no input arguments, do not run the command at all"), + ) + .arg( + Arg::with_name(options::NULL) + .short("0") + .long(options::NULL) + .help("Split the input by null terminators rather than whitespace"), + ) + .arg( + Arg::with_name(options::SIZE) + .short("s") + .long(options::SIZE) + .takes_value(true) + .validator(validate_positive_usize) + .help( + "Set the max number of characters to be passed to each \ + invocation", + ), + ) + .arg( + Arg::with_name(options::VERBOSE) + .short("t") + .long(options::VERBOSE) + .help("Be verbose"), + ) + .get_matches_from(args); + + let options = Options { + arg_file: matches + .value_of(options::ARG_FILE) + .map(|value| value.to_owned()), + delimiter: matches + .value_of(options::DELIMITER) + .map(|value| parse_delimiter(value).unwrap()), + exit_if_pass_char_limit: matches.is_present(options::EXIT), + max_args: matches + .value_of(options::MAX_ARGS) + .map(|value| value.parse().unwrap()), + max_lines: matches + .value_of(options::MAX_LINES) + .map(|value| value.parse().unwrap()), + no_run_if_empty: matches.is_present(options::NO_RUN_IF_EMPTY), + null: matches.is_present(options::NULL), + size: matches + .value_of(options::SIZE) + .map(|value| value.parse().unwrap()), + verbose: matches.is_present(options::VERBOSE), + }; + + let delimiter = match (options.delimiter, options.null) { + (Some(delimiter), true) => { + if matches.indices_of(options::NULL).unwrap().last() + > matches.indices_of(options::DELIMITER).unwrap().last() + { + Some(b'\0') + } else { + Some(delimiter) + } + } + (Some(delimiter), false) => Some(delimiter), + (None, true) => Some(b'\0'), + (None, false) => None, + }; + + let action = match matches.values_of_os(options::COMMAND) { + Some(args) if args.len() > 0 => { + ExecAction::Command(args.map(|arg| arg.to_owned()).collect()) + } + _ => ExecAction::Echo, + }; + + let env = std::env::vars_os().collect(); + + let mut limiters = LimiterCollection::new(); + + match (options.max_args, options.max_lines) { + (Some(max_args), Some(max_lines)) => { + eprintln!( + "WARNING: Both --{} and -L were given; last option will be used", + options::MAX_ARGS, + ); + if matches.indices_of(options::MAX_LINES).unwrap().last() + > matches.indices_of(options::MAX_ARGS).unwrap().last() + { + limiters.add(MaxLinesCommandSizeLimiter::new(max_lines)); + } else { + limiters.add(MaxArgsCommandSizeLimiter::new(max_args)); + } + } + (Some(max_args), None) => limiters.add(MaxArgsCommandSizeLimiter::new(max_args)), + (None, Some(max_lines)) => limiters.add(MaxLinesCommandSizeLimiter::new(max_lines)), + (None, None) => (), + } + + if let Some(max_chars) = options.size { + limiters.add(MaxCharsCommandSizeLimiter::new(max_chars)); + } + + limiters.add(MaxCharsCommandSizeLimiter::new_system(&env)); + + let mut builder_options = CommandBuilderOptions::new(action, env, limiters).map_err(|_| { + "Base command and environment are too large to fit into one command execution" + })?; + + builder_options.verbose = options.verbose; + builder_options.close_stdin = options.arg_file.is_none(); + + let args_file: Box = if let Some(path) = &options.arg_file { + Box::new(fs::File::open(path).map_err(|e| format!("Failed to open {}: {}", path, e))?) + } else { + Box::new(io::stdin()) + }; + + let args: Box = if let Some(delimiter) = delimiter { + Box::new(ByteDelimitedArgumentReader::new(args_file, delimiter)) + } else { + Box::new(WhitespaceDelimitedArgumentReader::new(args_file)) + }; + + let result = process_input(builder_options, args, &options)?; + Ok(result) +} + +pub fn xargs_main(args: &[&str]) -> i32 { + match do_xargs(args) { + Ok(CommandResult::Success) => 0, + Ok(CommandResult::Failure) => 123, + Err(e) => { + eprintln!("Error: {}", e); + if let XargsError::CommandExecution(cx) = e { + match cx { + CommandExecutionError::UrgentlyFailed => 124, + CommandExecutionError::Killed { .. } => 125, + CommandExecutionError::CannotRun(_) => 126, + CommandExecutionError::NotFound => 127, + CommandExecutionError::Unknown => 1, + } + } else { + 1 + } + } + } +} + +#[cfg(test)] +mod tests { + use super::*; + + fn make_arg_init(s: &str) -> Argument { + Argument { + arg: s.to_owned().into(), + kind: ArgumentKind::Initial, + } + } + + fn make_arg_hard(s: &str) -> Argument { + Argument { + arg: s.to_owned().into(), + kind: ArgumentKind::HardTerminated, + } + } + + fn make_arg_soft(s: &str) -> Argument { + Argument { + arg: s.to_owned().into(), + kind: ArgumentKind::SoftTerminated, + } + } + + #[derive(Clone)] + struct AlwaysRejectLimiter; + + impl CommandSizeLimiter for AlwaysRejectLimiter { + fn try_arg( + &mut self, + arg: Argument, + _cursor: LimiterCursor<'_>, + ) -> Result { + Err(ExhaustedCommandSpace { + arg, + out_of_chars: false, + }) + } + + fn dyn_clone(&self) -> Box { + Box::new(self.clone()) + } + } + + fn empty_cursor() -> LimiterCursor<'static> { + LimiterCursor { limiters: &mut [] } + } + + enum Chunk { + Data(&'static [u8]), + Error(io::ErrorKind), + } + + struct ChunkReader { + chunks: Vec, + current: usize, + } + + impl ChunkReader { + fn new(chunks: Vec) -> ChunkReader { + ChunkReader { chunks, current: 0 } + } + } + + impl Read for ChunkReader { + fn read(&mut self, buf: &mut [u8]) -> io::Result { + if self.current >= self.chunks.len() { + return Ok(0); + } + + match &mut self.chunks[self.current] { + Chunk::Data(data) => { + let byte_count = std::cmp::min(data.len(), buf.len()); + buf[..byte_count].copy_from_slice(&(*data)[..byte_count]); + if byte_count == data.len() { + self.current += 1; + } else { + *data = &(*data)[byte_count..]; + } + + Ok(byte_count) + } + Chunk::Error(kind) => { + self.current += 1; + Err(io::Error::new(*kind, "Synthesized error")) + } + } + } + } + + #[test] + fn test_chars_limiter() { + let mut limiter = MaxCharsCommandSizeLimiter::new(6); + assert!(limiter + .try_arg(make_arg_hard("abc"), empty_cursor()) + .is_ok()); + assert!(limiter + .try_arg(make_arg_hard("abcd"), empty_cursor()) + .is_err()); + assert!(limiter.try_arg(make_arg_hard("a"), empty_cursor()).is_ok()); + } + + #[test] + fn test_chars_limiter_asks_cursor() { + let mut rejects: [Box; 1] = [Box::new(AlwaysRejectLimiter)]; + let reject_cursor = LimiterCursor { + limiters: &mut rejects, + }; + + let mut limiter = MaxCharsCommandSizeLimiter::new(5); + assert!(limiter + .try_arg(make_arg_hard("abc"), reject_cursor) + .is_err()); + // Ensure the limiter didn't update before trying the cursor. + assert!(limiter + .try_arg(make_arg_hard("abc"), empty_cursor()) + .is_ok()); + } + + #[test] + fn test_args_limiter() { + let mut limiter = MaxArgsCommandSizeLimiter::new(2); + // Should not count initial arguments. + for _ in 1..3 { + assert!(limiter + .try_arg(make_arg_init("abc"), empty_cursor()) + .is_ok()); + } + assert!(limiter + .try_arg(make_arg_hard("abc"), empty_cursor()) + .is_ok()); + assert!(limiter + .try_arg(make_arg_hard("abc"), empty_cursor()) + .is_ok()); + assert!(limiter + .try_arg(make_arg_hard("abc"), empty_cursor()) + .is_err()); + } + + #[test] + fn test_args_limiter_asks_cursor() { + let mut rejects: [Box; 1] = [Box::new(AlwaysRejectLimiter)]; + let reject_cursor = LimiterCursor { + limiters: &mut rejects, + }; + + let mut limiter = MaxArgsCommandSizeLimiter::new(1); + assert!(limiter + .try_arg(make_arg_hard("abc"), reject_cursor) + .is_err()); + // Ensure the limiter didn't update before trying the cursor. + assert!(limiter + .try_arg(make_arg_hard("abc"), empty_cursor()) + .is_ok()); + } + + #[test] + fn test_lines_limiter() { + let mut limiter = MaxLinesCommandSizeLimiter::new(2); + assert!(limiter + .try_arg(make_arg_soft("abc"), empty_cursor()) + .is_ok()); + assert!(limiter + .try_arg(make_arg_soft("abc"), empty_cursor()) + .is_ok()); + assert!(limiter + .try_arg(make_arg_soft("abc"), empty_cursor()) + .is_ok()); + assert!(limiter + .try_arg(make_arg_hard("abc"), empty_cursor()) + .is_ok()); + assert!(limiter + .try_arg(make_arg_soft("abc"), empty_cursor()) + .is_ok()); + assert!(limiter + .try_arg(make_arg_hard("abc"), empty_cursor()) + .is_ok()); + assert!(limiter + .try_arg(make_arg_soft("abc"), empty_cursor()) + .is_err()); + assert!(limiter + .try_arg(make_arg_hard("abc"), empty_cursor()) + .is_err()); + } + + #[test] + fn test_lines_limiter_asks_cursor() { + let mut rejects: [Box; 1] = [Box::new(AlwaysRejectLimiter)]; + let reject_cursor = LimiterCursor { + limiters: &mut rejects, + }; + + let mut limiter = MaxLinesCommandSizeLimiter::new(1); + assert!(limiter + .try_arg(make_arg_hard("abc"), reject_cursor) + .is_err()); + // Ensure the limiter didn't update before trying the cursor. + assert!(limiter + .try_arg(make_arg_hard("abc"), empty_cursor()) + .is_ok()); + } + + #[test] + fn test_whitespace_delimited_reader() { + let mut reader = WhitespaceDelimitedArgumentReader::new(ChunkReader::new(vec![ + Chunk::Data(b"abc "), + Chunk::Data(b" def"), + Chunk::Data(b"\nghi\t\tj"), + Chunk::Data(b"kl\n"), + Chunk::Data(b"mn"), + Chunk::Error(io::ErrorKind::Interrupted), + Chunk::Data(b"\\\t\\ o 'ab"), + Chunk::Data(b" \"' \"xy' z\""), + ])); + + assert_eq!(reader.next().unwrap().unwrap(), make_arg_soft("abc")); + assert_eq!(reader.next().unwrap().unwrap(), make_arg_hard("def")); + assert_eq!(reader.next().unwrap().unwrap(), make_arg_soft("ghi")); + assert_eq!(reader.next().unwrap().unwrap(), make_arg_hard("jkl")); + assert_eq!(reader.next().unwrap().unwrap(), make_arg_soft("mn\t o")); + assert_eq!(reader.next().unwrap().unwrap(), make_arg_soft("ab \"")); + assert_eq!(reader.next().unwrap().unwrap(), make_arg_soft("xy' z")); + assert_eq!(reader.next().unwrap(), None); + } + + #[test] + fn test_byte_delimited_reader() { + let mut reader = ByteDelimitedArgumentReader::new( + ChunkReader::new(vec![ + Chunk::Data(b"ab"), + Chunk::Error(io::ErrorKind::Interrupted), + Chunk::Data(b"c!de!"), + Chunk::Data(b"!ef!!gh"), + Chunk::Data(b"!ij"), + ]), + b'!', + ); + + assert_eq!(reader.next().unwrap().unwrap(), make_arg_hard("abc")); + assert_eq!(reader.next().unwrap().unwrap(), make_arg_hard("de")); + assert_eq!(reader.next().unwrap().unwrap(), make_arg_hard("ef")); + assert_eq!(reader.next().unwrap().unwrap(), make_arg_hard("gh")); + assert_eq!(reader.next().unwrap().unwrap(), make_arg_hard("ij")); + assert_eq!(reader.next().unwrap(), None); + } + + #[test] + fn test_delimiter_parsing() { + assert_eq!(parse_delimiter("a").unwrap(), b'a'); + assert_eq!(parse_delimiter("\\x61").unwrap(), b'a'); + assert_eq!(parse_delimiter("\\x00061").unwrap(), b'a'); + assert_eq!(parse_delimiter("\\0141").unwrap(), b'a'); + assert_eq!(parse_delimiter("\\0000141").unwrap(), b'a'); + assert_eq!(parse_delimiter("\\n").unwrap(), b'\n'); + + assert!(parse_delimiter("\\0").is_err()); + assert!(parse_delimiter("\\x").is_err()); + assert!(parse_delimiter("\\").is_err()); + assert!(parse_delimiter("abc").is_err()); + } +} diff --git a/tests/xargs_tests.rs b/tests/xargs_tests.rs new file mode 100644 index 00000000..44b6012a --- /dev/null +++ b/tests/xargs_tests.rs @@ -0,0 +1,325 @@ +// Copyright 2021 Collabora, Ltd. +// +// Use of this source code is governed by a MIT-style +// license that can be found in the LICENSE file or at +// https://opensource.org/licenses/MIT. + +/// ! This file contains integration tests for xargs, separate from the unit +/// ! tests so that testing-commandline can be built first. +extern crate findutils; +extern crate tempfile; + +use std::io::{Seek, SeekFrom, Write}; + +use assert_cmd::Command; +use predicates::prelude::*; + +use common::test_helpers::*; + +mod common; + +#[test] +fn xargs_basics() { + Command::cargo_bin("xargs") + .expect("found binary") + .write_stdin("abc\ndef g\\hi 'i j \"k'") + .assert() + .success() + .stderr(predicate::str::is_empty()) + .stdout(predicate::str::diff("abc def ghi i j \"k\n")); +} + +#[test] +fn xargs_null() { + Command::cargo_bin("xargs") + .expect("found binary") + .args(&["-0n1"]) + .write_stdin("ab c\0d\tef\0") + .assert() + .success() + .stderr(predicate::str::is_empty()) + .stdout(predicate::str::diff("ab c\nd\tef\n")); +} + +#[test] +fn xargs_delim() { + Command::cargo_bin("xargs") + .expect("found binary") + .args(&["-d1"]) + .write_stdin("ab1cd1ef") + .assert() + .success() + .stderr(predicate::str::is_empty()) + .stdout(predicate::str::diff("ab cd ef\n")); + + Command::cargo_bin("xargs") + .expect("found binary") + .args(&["-d\\t", "-n1"]) + .write_stdin("a\nb\td e\tfg") + .assert() + .success() + .stderr(predicate::str::is_empty()) + .stdout(predicate::str::diff("a\nb\nd e\nfg\n")); + + Command::cargo_bin("xargs") + .expect("found binary") + .args(&["-dabc"]) + .assert() + .failure() + .code(1) + .stderr(predicate::str::contains("Invalid")) + .stdout(predicate::str::is_empty()); +} + +#[test] +fn xargs_null_conflict() { + Command::cargo_bin("xargs") + .expect("found binary") + .args(&["-d\t", "-0n1"]) + .write_stdin("ab c\0d\tef\0") + .assert() + .success() + .stderr(predicate::str::is_empty()) + .stdout(predicate::str::diff("ab c\nd\tef\n")); +} + +#[test] +fn xargs_if_empty() { + Command::cargo_bin("xargs") + .expect("found binary") + .assert() + .success() + .stderr(predicate::str::is_empty()) + // Should echo at least once still. + .stdout(predicate::eq("\n")); + + Command::cargo_bin("xargs") + .expect("found binary") + .args(&["--no-run-if-empty"]) + .assert() + .success() + .stderr(predicate::str::is_empty()) + // Should never echo. + .stdout(predicate::str::is_empty()); +} + +#[test] +fn xargs_max_args() { + Command::cargo_bin("xargs") + .expect("found binary") + .args(["-n2"]) + .write_stdin("ab cd ef\ngh i") + .assert() + .success() + .stderr(predicate::str::is_empty()) + .stdout(predicate::str::diff("ab cd\nef gh\ni\n")); +} + +#[test] +fn xargs_max_lines() { + Command::cargo_bin("xargs") + .expect("found binary") + .args(["-L2"]) + .write_stdin("ab cd\nef\ngh i\n\njkl\n") + .assert() + .success() + .stderr(predicate::str::is_empty()) + .stdout(predicate::str::diff("ab cd ef\ngh i jkl\n")); +} + +#[test] +fn xargs_max_args_lines_conflict() { + Command::cargo_bin("xargs") + .expect("found binary") + // -n2 is last, so it should be given priority. + .args(["-L2", "-n2"]) + .write_stdin("ab cd ef\ngh i") + .assert() + .success() + .stderr(predicate::str::contains("WARNING")) + .stdout(predicate::str::diff("ab cd\nef gh\ni\n")); + + Command::cargo_bin("xargs") + .expect("found binary") + // -L2 is last, so it should be given priority. + .args(["-n2", "-L2"]) + .write_stdin("ab cd\nef\ngh i\n\njkl\n") + .assert() + .success() + .stderr(predicate::str::contains("WARNING")) + .stdout(predicate::str::diff("ab cd ef\ngh i jkl\n")); +} + +#[test] +fn xargs_max_chars() { + Command::cargo_bin("xargs") + .expect("found binary") + .args(["-s11"]) + .write_stdin("ab cd efg") + .assert() + .success() + .stderr(predicate::str::is_empty()) + .stdout(predicate::str::diff("ab cd\nefg\n")); + + // Behavior should be the same with -x, which only takes effect with -L or + // -n. + Command::cargo_bin("xargs") + .expect("found binary") + .args(["-xs11"]) + .write_stdin("ab cd efg") + .assert() + .success() + .stderr(predicate::str::is_empty()) + .stdout(predicate::str::diff("ab cd\nefg\n")); + + Command::cargo_bin("xargs") + .expect("found binary") + .args(["-s10"]) + .write_stdin("abcdefghijkl ab") + .assert() + .failure() + .code(1) + .stderr(predicate::str::contains("Error:")) + .stdout(predicate::str::is_empty()); +} + +#[test] +fn xargs_exit_on_large() { + Command::cargo_bin("xargs") + .expect("found binary") + .args(["-xs11", "-n2"]) + .write_stdin("ab cd efg h i") + .assert() + .success() + .stderr(predicate::str::is_empty()) + .stdout(predicate::str::diff("ab cd\nefg h\ni\n")); + + Command::cargo_bin("xargs") + .expect("found binary") + .args(["-xs11", "-n2"]) + .write_stdin("abcdefg hijklmn") + .assert() + .failure() + .code(1) + .stderr(predicate::str::contains("Error:")) + .stdout(predicate::str::is_empty()); +} + +#[test] +fn xargs_exec() { + Command::cargo_bin("xargs") + .expect("found binary") + .args([ + "-n2", + &path_to_testing_commandline(), + "-", + "--print_stdin", + "--no_print_cwd", + ]) + .write_stdin("a b c\nd") + .assert() + .success() + .stderr(predicate::str::is_empty()) + .stdout(predicate::str::diff( + "stdin=\nargs=\n--print_stdin\n--no_print_cwd\na\nb\n\ + stdin=\nargs=\n--print_stdin\n--no_print_cwd\nc\nd\n", + )); +} + +#[test] +fn xargs_exec_stdin_open() { + let mut temp_file = tempfile::NamedTempFile::new().unwrap(); + + write!(temp_file, "a b c").unwrap(); + temp_file.seek(SeekFrom::Start(0)).unwrap(); + + Command::cargo_bin("xargs") + .expect("found binary") + .args([ + "-a", + &temp_file.path().to_string_lossy(), + &path_to_testing_commandline(), + "-", + "--print_stdin", + "--no_print_cwd", + ]) + .write_stdin("test") + .assert() + .success() + .stderr(predicate::str::is_empty()) + .stdout(predicate::str::diff( + "stdin=test\nargs=\n--print_stdin\n--no_print_cwd\na\nb\nc\n", + )); +} + +#[test] +fn xargs_exec_failure() { + Command::cargo_bin("xargs") + .expect("found binary") + .args(&[ + "-n1", + &path_to_testing_commandline(), + "-", + "--no_print_cwd", + "--exit_with_failure", + ]) + .write_stdin("a b") + .assert() + .failure() + .code(123) + .stderr(predicate::str::is_empty()) + .stdout( + "args=\n--no_print_cwd\n--exit_with_failure\na\n\ + args=\n--no_print_cwd\n--exit_with_failure\nb\n", + ); +} + +#[test] +fn xargs_exec_urgent_failure() { + Command::cargo_bin("xargs") + .expect("found binary") + .args(&[ + "-n1", + &path_to_testing_commandline(), + "-", + "--no_print_cwd", + "--exit_with_urgent_failure", + ]) + .write_stdin("a b") + .assert() + .failure() + .code(124) + .stderr(predicate::str::contains("Error:")) + .stdout("args=\n--no_print_cwd\n--exit_with_urgent_failure\na\n"); +} + +#[test] +fn xargs_exec_with_signal() { + Command::cargo_bin("xargs") + .expect("found binary") + .args(&[ + "-n1", + &path_to_testing_commandline(), + "-", + "--no_print_cwd", + "--exit_with_signal", + ]) + .write_stdin("a b") + .assert() + .failure() + .code(125) + .stderr(predicate::str::contains("Error:")) + .stdout("args=\n--no_print_cwd\n--exit_with_signal\na\n"); +} + +#[test] +fn xargs_exec_not_found() { + Command::cargo_bin("xargs") + .expect("found binary") + .args(&["this-file-does-not-exist"]) + .assert() + .failure() + .code(127) + .stderr(predicate::str::contains("Error:")) + .stdout(predicate::str::is_empty()); +}