Skip to content

Commit

Permalink
Handle str literals written with ' lexed as lifetime
Browse files Browse the repository at this point in the history
Given `'hello world'` and `'1 str', provide a structured suggestion for a valid string literal:

```
error[E0762]: unterminated character literal
  --> $DIR/lex-bad-str-literal-as-char-3.rs:2:26
   |
LL |     println!('hello world');
   |                          ^^^^
   |
help: if you meant to write a `str` literal, use double quotes
   |
LL |     println!("hello world");
   |              ~           ~
```
```
error[E0762]: unterminated character literal
  --> $DIR/lex-bad-str-literal-as-char-1.rs:2:20
   |
LL |     println!('1 + 1');
   |                    ^^^^
   |
help: if you meant to write a `str` literal, use double quotes
   |
LL |     println!("1 + 1");
   |              ~     ~
```

Fix rust-lang#119685.
  • Loading branch information
estebank committed Mar 9, 2024
1 parent 46b180e commit b4de21f
Show file tree
Hide file tree
Showing 13 changed files with 135 additions and 5 deletions.
2 changes: 1 addition & 1 deletion compiler/rustc_lexer/src/cursor.rs
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ impl<'a> Cursor<'a> {
/// If requested position doesn't exist, `EOF_CHAR` is returned.
/// However, getting `EOF_CHAR` doesn't always mean actual end of file,
/// it should be checked with `is_eof` method.
pub(crate) fn first(&self) -> char {
pub fn first(&self) -> char {
// `.next()` optimizes better than `.nth(0)`
self.chars.clone().next().unwrap_or(EOF_CHAR)
}
Expand Down
1 change: 1 addition & 0 deletions compiler/rustc_parse/messages.ftl
Original file line number Diff line number Diff line change
Expand Up @@ -839,6 +839,7 @@ parse_unknown_prefix = prefix `{$prefix}` is unknown
.label = unknown prefix
.note = prefixed identifiers and literals are reserved since Rust 2021
.suggestion_br = use `br` for a raw byte string
.suggestion_str = if you meant to write a `str` literal, use double quotes
.suggestion_whitespace = consider inserting whitespace here
parse_unknown_start_of_token = unknown start of token: {$escaped}
Expand Down
11 changes: 11 additions & 0 deletions compiler/rustc_parse/src/errors.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1994,6 +1994,17 @@ pub enum UnknownPrefixSugg {
style = "verbose"
)]
Whitespace(#[primary_span] Span),
#[multipart_suggestion(
parse_suggestion_str,
applicability = "maybe-incorrect",
style = "verbose"
)]
MeantStr {
#[suggestion_part(code = "\"")]
start: Span,
#[suggestion_part(code = "\"")]
end: Span,
},
}

#[derive(Diagnostic)]
Expand Down
51 changes: 47 additions & 4 deletions compiler/rustc_parse/src/lexer/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,7 @@ pub(crate) fn parse_token_trees<'psess, 'src>(
cursor,
override_span,
nbsp_is_whitespace: false,
last_lifetime: None,
};
let (stream, res, unmatched_delims) =
tokentrees::TokenTreesReader::parse_all_token_trees(string_reader);
Expand Down Expand Up @@ -105,6 +106,10 @@ struct StringReader<'psess, 'src> {
/// in this file, it's safe to treat further occurrences of the non-breaking
/// space character as whitespace.
nbsp_is_whitespace: bool,

/// Track the `Span` for the leading `'` of the last lifetime. Used for
/// diagnostics to detect possible typo where `"` was meant.
last_lifetime: Option<Span>,
}

impl<'psess, 'src> StringReader<'psess, 'src> {
Expand All @@ -130,6 +135,23 @@ impl<'psess, 'src> StringReader<'psess, 'src> {

debug!("next_token: {:?}({:?})", token.kind, self.str_from(start));

if let rustc_lexer::TokenKind::Semi
| rustc_lexer::TokenKind::LineComment { .. }
| rustc_lexer::TokenKind::BlockComment { .. }
| rustc_lexer::TokenKind::Comma
| rustc_lexer::TokenKind::Dot
| rustc_lexer::TokenKind::OpenParen
| rustc_lexer::TokenKind::CloseParen
| rustc_lexer::TokenKind::OpenBrace
| rustc_lexer::TokenKind::CloseBrace
| rustc_lexer::TokenKind::OpenBracket
| rustc_lexer::TokenKind::CloseBracket = token.kind
{
// Heuristic: we assume that it is unlikely we're dealing with an unterminated
// string surrounded by single quotes.
self.last_lifetime = None;
}

// Now "cook" the token, converting the simple `rustc_lexer::TokenKind` enum into a
// rich `rustc_ast::TokenKind`. This turns strings into interned symbols and runs
// additional validation.
Expand Down Expand Up @@ -247,6 +269,7 @@ impl<'psess, 'src> StringReader<'psess, 'src> {
// expansion purposes. See #12512 for the gory details of why
// this is necessary.
let lifetime_name = self.str_from(start);
self.last_lifetime = Some(self.mk_sp(start, start + BytePos(1)));
if starts_with_number {
let span = self.mk_sp(start, self.pos);
self.dcx().struct_err("lifetimes cannot start with a number")
Expand Down Expand Up @@ -395,10 +418,21 @@ impl<'psess, 'src> StringReader<'psess, 'src> {
match kind {
rustc_lexer::LiteralKind::Char { terminated } => {
if !terminated {
self.dcx()
let mut err = self
.dcx()
.struct_span_fatal(self.mk_sp(start, end), "unterminated character literal")
.with_code(E0762)
.emit()
.with_code(E0762);
if let Some(lt_sp) = self.last_lifetime {
err.multipart_suggestion(
"if you meant to write a `str` literal, use double quotes",
vec![
(lt_sp, "\"".to_string()),
(self.mk_sp(start, start + BytePos(1)), "\"".to_string()),
],
Applicability::MaybeIncorrect,
);
}
err.emit()
}
self.cook_unicode(token::Char, Mode::Char, start, end, 1, 1) // ' '
}
Expand Down Expand Up @@ -673,7 +707,16 @@ impl<'psess, 'src> StringReader<'psess, 'src> {
let sugg = if prefix == "rb" {
Some(errors::UnknownPrefixSugg::UseBr(prefix_span))
} else if expn_data.is_root() {
Some(errors::UnknownPrefixSugg::Whitespace(prefix_span.shrink_to_hi()))
if self.cursor.first() == '\''
&& let Some(start) = self.last_lifetime
{
Some(errors::UnknownPrefixSugg::MeantStr {
start,
end: self.mk_sp(self.pos, self.pos + BytePos(1)),
})
} else {
Some(errors::UnknownPrefixSugg::Whitespace(prefix_span.shrink_to_hi()))
}
} else {
None
};
Expand Down
6 changes: 6 additions & 0 deletions tests/ui/lexer/lex-bad-str-literal-as-char-1.fixed
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
//@ run-rustfix
fn main() {
println!("1 + 1");
//~^ ERROR unterminated character literal
//~| ERROR lifetimes cannot start with a number
}
6 changes: 6 additions & 0 deletions tests/ui/lexer/lex-bad-str-literal-as-char-1.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
//@ run-rustfix
fn main() {
println!('1 + 1');
//~^ ERROR unterminated character literal
//~| ERROR lifetimes cannot start with a number
}
20 changes: 20 additions & 0 deletions tests/ui/lexer/lex-bad-str-literal-as-char-1.stderr
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
error[E0762]: unterminated character literal
--> $DIR/lex-bad-str-literal-as-char-1.rs:3:20
|
LL | println!('1 + 1');
| ^^^
|
help: if you meant to write a `str` literal, use double quotes
|
LL | println!("1 + 1");
| ~ ~

error: lifetimes cannot start with a number
--> $DIR/lex-bad-str-literal-as-char-1.rs:3:14
|
LL | println!('1 + 1');
| ^^

error: aborting due to 2 previous errors

For more information about this error, try `rustc --explain E0762`.
4 changes: 4 additions & 0 deletions tests/ui/lexer/lex-bad-str-literal-as-char-2.fixed
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
//@ run-rustfix
fn main() {
println!(" 1 + 1"); //~ ERROR character literal may only contain one codepoint
}
4 changes: 4 additions & 0 deletions tests/ui/lexer/lex-bad-str-literal-as-char-2.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
//@ run-rustfix
fn main() {
println!(' 1 + 1'); //~ ERROR character literal may only contain one codepoint
}
13 changes: 13 additions & 0 deletions tests/ui/lexer/lex-bad-str-literal-as-char-2.stderr
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
error: character literal may only contain one codepoint
--> $DIR/lex-bad-str-literal-as-char-2.rs:3:14
|
LL | println!(' 1 + 1');
| ^^^^^^^^
|
help: if you meant to write a `str` literal, use double quotes
|
LL | println!(" 1 + 1");
| ~~~~~~~~

error: aborting due to 1 previous error

4 changes: 4 additions & 0 deletions tests/ui/lexer/lex-bad-str-literal-as-char-3.fixed
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
//@ run-rustfix
fn main() {
println!("hello world"); //~ ERROR unterminated character literal
}
4 changes: 4 additions & 0 deletions tests/ui/lexer/lex-bad-str-literal-as-char-3.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
//@ run-rustfix
fn main() {
println!('hello world'); //~ ERROR unterminated character literal
}
14 changes: 14 additions & 0 deletions tests/ui/lexer/lex-bad-str-literal-as-char-3.stderr
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
error[E0762]: unterminated character literal
--> $DIR/lex-bad-str-literal-as-char-3.rs:3:26
|
LL | println!('hello world');
| ^^^^
|
help: if you meant to write a `str` literal, use double quotes
|
LL | println!("hello world");
| ~ ~

error: aborting due to 1 previous error

For more information about this error, try `rustc --explain E0762`.

0 comments on commit b4de21f

Please sign in to comment.