Skip to content

Commit f3b7e0b

Browse files
andrewliebenowsylvestre
authored andcommitted
printf: accept non-UTF-8 input in FORMAT and ARGUMENT arguments
Other implementations of `printf` permit arbitrary data to be passed to `printf`. The only restriction is that a null byte terminates FORMAT and ARGUMENT argument strings (since they are C strings). The current implementation only accepts FORMAT and ARGUMENT arguments that are valid UTF-8 (this is being enforced by clap). This commit removes the UTF-8 validation by switching to OsStr and OsString. This allows users to use `printf` to transmit or reformat null-safe but not UTF-8-safe data, such as text encoded in an 8-bit text encoding. See the `non_utf_8_input` test for an example (ISO-8859-1 text). [drinkcat: also squashed in this commit to ease rebase] Author: Justin Tracey <[email protected]> uucore, printf: improve non-UTF-8 format arguments This fixes handling of format arguments, in part by eliminating duplicate implementations. Utilities with format arguments other than printf will no longer accept things like "'a" as numbers, etc.
1 parent dd1b315 commit f3b7e0b

File tree

10 files changed

+388
-223
lines changed

10 files changed

+388
-223
lines changed

src/uu/echo/src/echo.rs

Lines changed: 5 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -6,11 +6,11 @@
66
use clap::builder::ValueParser;
77
use clap::{Arg, ArgAction, Command};
88
use std::env;
9-
use std::ffi::{OsStr, OsString};
9+
use std::ffi::OsString;
1010
use std::io::{self, StdoutLock, Write};
11-
use uucore::error::{UResult, USimpleError};
11+
use uucore::error::UResult;
1212
use uucore::format::{FormatChar, OctalParsing, parse_escape_only};
13-
use uucore::format_usage;
13+
use uucore::{format_usage, os_str_as_bytes};
1414

1515
use uucore::locale::get_message;
1616

@@ -176,10 +176,9 @@ fn execute(
176176
escaped: bool,
177177
) -> UResult<()> {
178178
for (i, input) in arguments_after_options.into_iter().enumerate() {
179-
let Some(bytes) = bytes_from_os_string(input.as_os_str()) else {
180-
return Err(USimpleError::new(1, get_message("echo-error-non-utf8")));
181-
};
179+
let bytes = os_str_as_bytes(&input)?;
182180

181+
// Don't print a space before the first argument
183182
if i > 0 {
184183
stdout_lock.write_all(b" ")?;
185184
}
@@ -201,19 +200,3 @@ fn execute(
201200

202201
Ok(())
203202
}
204-
205-
fn bytes_from_os_string(input: &OsStr) -> Option<&[u8]> {
206-
#[cfg(target_family = "unix")]
207-
{
208-
use std::os::unix::ffi::OsStrExt;
209-
210-
Some(input.as_bytes())
211-
}
212-
213-
#[cfg(not(target_family = "unix"))]
214-
{
215-
// TODO
216-
// Verify that this works correctly on these platforms
217-
input.to_str().map(|st| st.as_bytes())
218-
}
219-
}

src/uu/printf/src/printf.rs

Lines changed: 8 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44
// file that was distributed with this source code.
55
use clap::{Arg, ArgAction, Command};
66
use std::collections::HashMap;
7+
use std::ffi::OsString;
78
use std::io::stdout;
89
use std::ops::ControlFlow;
910
use uucore::error::{UResult, UUsageError};
@@ -18,21 +19,19 @@ mod options {
1819
pub const FORMAT: &str = "FORMAT";
1920
pub const ARGUMENT: &str = "ARGUMENT";
2021
}
22+
2123
#[uucore::main]
2224
pub fn uumain(args: impl uucore::Args) -> UResult<()> {
2325
let matches = uu_app().get_matches_from(args);
2426

2527
let format = matches
26-
.get_one::<std::ffi::OsString>(options::FORMAT)
28+
.get_one::<OsString>(options::FORMAT)
2729
.ok_or_else(|| UUsageError::new(1, get_message("printf-error-missing-operand")))?;
2830
let format = os_str_as_bytes(format)?;
2931

30-
let values: Vec<_> = match matches.get_many::<std::ffi::OsString>(options::ARGUMENT) {
31-
// FIXME: use os_str_as_bytes once FormatArgument supports Vec<u8>
32+
let values: Vec<_> = match matches.get_many::<OsString>(options::ARGUMENT) {
3233
Some(s) => s
33-
.map(|os_string| {
34-
FormatArgument::Unparsed(std::ffi::OsStr::to_string_lossy(os_string).to_string())
35-
})
34+
.map(|os_string| FormatArgument::Unparsed(os_string.to_owned()))
3635
.collect(),
3736
None => vec![],
3837
};
@@ -62,7 +61,7 @@ pub fn uumain(args: impl uucore::Args) -> UResult<()> {
6261
"{}",
6362
get_message_with_args(
6463
"printf-warning-ignoring-excess-arguments",
65-
HashMap::from([("arg".to_string(), arg_str.to_string())])
64+
HashMap::from([("arg".to_string(), arg_str.to_string_lossy().to_string())])
6665
)
6766
);
6867
}
@@ -103,10 +102,10 @@ pub fn uu_app() -> Command {
103102
.help(get_message("printf-help-version"))
104103
.action(ArgAction::Version),
105104
)
106-
.arg(Arg::new(options::FORMAT).value_parser(clap::value_parser!(std::ffi::OsString)))
105+
.arg(Arg::new(options::FORMAT).value_parser(clap::value_parser!(OsString)))
107106
.arg(
108107
Arg::new(options::ARGUMENT)
109108
.action(ArgAction::Append)
110-
.value_parser(clap::value_parser!(std::ffi::OsString)),
109+
.value_parser(clap::value_parser!(OsString)),
111110
)
112111
}

src/uucore/src/lib/features/checksum.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -968,7 +968,7 @@ fn process_checksum_line(
968968
cached_line_format: &mut Option<LineFormat>,
969969
last_algo: &mut Option<String>,
970970
) -> Result<(), LineCheckError> {
971-
let line_bytes = os_str_as_bytes(line)?;
971+
let line_bytes = os_str_as_bytes(line).map_err(|e| LineCheckError::UError(Box::new(e)))?;
972972

973973
// Early return on empty or commented lines.
974974
if line.is_empty() || line_bytes.starts_with(b"#") {

src/uucore/src/lib/features/extendedbigdecimal.rs

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -101,6 +101,18 @@ impl From<f64> for ExtendedBigDecimal {
101101
}
102102
}
103103

104+
impl From<u8> for ExtendedBigDecimal {
105+
fn from(val: u8) -> Self {
106+
Self::BigDecimal(val.into())
107+
}
108+
}
109+
110+
impl From<u32> for ExtendedBigDecimal {
111+
fn from(val: u32) -> Self {
112+
Self::BigDecimal(val.into())
113+
}
114+
}
115+
104116
impl ExtendedBigDecimal {
105117
pub fn zero() -> Self {
106118
Self::BigDecimal(0.into())

0 commit comments

Comments
 (0)