Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

rustdoc: Replaces fn main search and extern crate search with proper parsing during doctests. #54861

Merged
merged 8 commits into from
Nov 4, 2018
135 changes: 118 additions & 17 deletions src/librustdoc/test.rs
Original file line number Diff line number Diff line change
Expand Up @@ -378,7 +378,7 @@ pub fn make_test(s: &str,
dont_insert_main: bool,
opts: &TestOptions)
-> (String, usize) {
let (crate_attrs, everything_else) = partition_source(s);
let (crate_attrs, everything_else, crates) = partition_source(s);
let everything_else = everything_else.trim();
let mut line_offset = 0;
let mut prog = String::new();
Expand All @@ -402,30 +402,91 @@ pub fn make_test(s: &str,
// are intended to be crate attributes.
prog.push_str(&crate_attrs);

// Uses libsyntax to parse the doctest and find if there's a main fn and the extern
// crate already is included.
let (already_has_main, already_has_extern_crate) = crate::syntax::with_globals(|| {
use crate::syntax::{ast, parse::{self, ParseSess}, source_map::FilePathMapping};
use crate::syntax_pos::FileName;
use errors::emitter::EmitterWriter;
use errors::Handler;

let filename = FileName::Anon;
let source = crates + &everything_else;

// any errors in parsing should also appear when the doctest is compiled for real, so just
// send all the errors that libsyntax emits directly into a Sink instead of stderr
let cm = Lrc::new(SourceMap::new(FilePathMapping::empty()));
let emitter = EmitterWriter::new(box io::sink(), None, false, false);
let handler = Handler::with_emitter(false, false, box emitter);
let sess = ParseSess::with_span_handler(handler, cm);

debug!("about to parse: \n{}", source);

let mut found_main = false;
let mut found_extern_crate = cratename.is_none();

let mut parser = match parse::maybe_new_parser_from_source_str(&sess, filename, source) {
Ok(p) => p,
Err(errs) => {
for mut err in errs {
err.cancel();
}

return (found_main, found_extern_crate);
}
};

loop {
match parser.parse_item() {
Ok(Some(item)) => {
if !found_main {
if let ast::ItemKind::Fn(..) = item.node {
if item.ident.as_str() == "main" {
found_main = true;
}
}
}

if !found_extern_crate {
if let ast::ItemKind::ExternCrate(original) = item.node {
// This code will never be reached if `cratename` is none because
// `found_extern_crate` is initialized to `true` if it is none.
let cratename = cratename.unwrap();

match original {
Some(name) => found_extern_crate = name.as_str() == cratename,
None => found_extern_crate = item.ident.as_str() == cratename,
}
}
}

if found_main && found_extern_crate {
break;
}
}
Ok(None) => break,
Err(mut e) => {
e.cancel();
break;
}
}
}

(found_main, found_extern_crate)
});

// Don't inject `extern crate std` because it's already injected by the
// compiler.
if !s.contains("extern crate") && !opts.no_crate_inject && cratename != Some("std") {
if !already_has_extern_crate && !opts.no_crate_inject && cratename != Some("std") {
if let Some(cratename) = cratename {
// Make sure its actually used if not included.
if s.contains(cratename) {
prog.push_str(&format!("extern crate {};\n", cratename));
line_offset += 1;
}
}
}

// FIXME (#21299): prefer libsyntax or some other actual parser over this
// best-effort ad hoc approach
let already_has_main = s.lines()
.map(|line| {
let comment = line.find("//");
if let Some(comment_begins) = comment {
&line[0..comment_begins]
} else {
line
}
})
.any(|code| code.contains("fn main"));

if dont_insert_main || already_has_main {
prog.push_str(everything_else);
} else {
Expand All @@ -441,9 +502,10 @@ pub fn make_test(s: &str,
}

// FIXME(aburka): use a real parser to deal with multiline attributes
fn partition_source(s: &str) -> (String, String) {
fn partition_source(s: &str) -> (String, String, String) {
let mut after_header = false;
let mut before = String::new();
let mut crates = String::new();
let mut after = String::new();

for line in s.lines() {
Expand All @@ -457,12 +519,17 @@ fn partition_source(s: &str) -> (String, String) {
after.push_str(line);
after.push_str("\n");
} else {
if trimline.starts_with("#[macro_use] extern crate")
|| trimline.starts_with("extern crate") {
crates.push_str(line);
crates.push_str("\n");
}
before.push_str(line);
before.push_str("\n");
}
}

(before, after)
(before, after, crates)
}

pub trait Tester {
Expand Down Expand Up @@ -1014,4 +1081,38 @@ assert_eq!(2+2, 4);
let output = make_test(input, None, false, &opts);
assert_eq!(output, (expected, 1));
}

#[test]
fn make_test_issues_21299_33731() {
let opts = TestOptions::default();

let input =
"// fn main
assert_eq!(2+2, 4);";

let expected =
"#![allow(unused)]
fn main() {
// fn main
assert_eq!(2+2, 4);
}".to_string();

let output = make_test(input, None, false, &opts);
assert_eq!(output, (expected, 2));

let input =
"extern crate hella_qwop;
assert_eq!(asdf::foo, 4);";

let expected =
"#![allow(unused)]
extern crate hella_qwop;
extern crate asdf;
fn main() {
assert_eq!(asdf::foo, 4);
}".to_string();

let output = make_test(input, Some("asdf"), false, &opts);
assert_eq!(output, (expected, 3));
}
}
23 changes: 22 additions & 1 deletion src/libsyntax/parse/lexer/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
use ast::{self, Ident};
use syntax_pos::{self, BytePos, CharPos, Pos, Span, NO_EXPANSION};
use source_map::{SourceMap, FilePathMapping};
use errors::{Applicability, FatalError, DiagnosticBuilder};
use errors::{Applicability, FatalError, Diagnostic, DiagnosticBuilder};
use parse::{token, ParseSess};
use str::char_at;
use symbol::{Symbol, keywords};
Expand Down Expand Up @@ -175,6 +175,16 @@ impl<'a> StringReader<'a> {
self.fatal_errs.clear();
}

pub fn buffer_fatal_errors(&mut self) -> Vec<Diagnostic> {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Was this was the only API addition you needed?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This and the new_or_buffered_errs constructor. I needed a way to "handle" the errors without prematurely emitting them (in this case, to cancel them), and this felt like a natural way to get the content of self.fatal_errs without just exposing the Vec directly. Since both new and new_without_err emitted the errors directly, i added this instead.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm not against it, I'm surprised at how little you needed to add :)

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yup! I just needed a version that wouldn't immediately crash the process if it ran into a problem in the first token. One thing i will ask is whether the current implementation with buffer() is appropriate, since plain Diagnostics don't have the destructor bomb that full DiagnosticBuilders have. (Since they need a Handler to be emitted, they don't have the same ability to forcibly print themselves.) The alternative would be to swap out the Vec entirely and return the full DiagnosticBuilder, and since the ParseSess isn't local the lifetimes would work out.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

let mut buffer = Vec::new();

for err in self.fatal_errs.drain(..) {
err.buffer(&mut buffer);
}

buffer
}

pub fn peek(&self) -> TokenAndSpan {
// FIXME(pcwalton): Bad copy!
TokenAndSpan {
Expand Down Expand Up @@ -251,6 +261,17 @@ impl<'a> StringReader<'a> {
Ok(sr)
}

pub fn new_or_buffered_errs(sess: &'a ParseSess,
source_file: Lrc<syntax_pos::SourceFile>,
override_span: Option<Span>) -> Result<Self, Vec<Diagnostic>> {
let mut sr = StringReader::new_raw(sess, source_file, override_span);
if sr.advance_token().is_err() {
Err(sr.buffer_fatal_errors())
} else {
Ok(sr)
}
}

pub fn retokenize(sess: &'a ParseSess, mut span: Span) -> Self {
let begin = sess.source_map().lookup_byte_offset(span.lo());
let end = sess.source_map().lookup_byte_offset(span.hi());
Expand Down
48 changes: 46 additions & 2 deletions src/libsyntax/parse/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ use ast::{self, CrateConfig, NodeId};
use early_buffered_lints::{BufferedEarlyLint, BufferedEarlyLintId};
use source_map::{SourceMap, FilePathMapping};
use syntax_pos::{Span, SourceFile, FileName, MultiSpan};
use errors::{Handler, ColorConfig, DiagnosticBuilder};
use errors::{Handler, ColorConfig, Diagnostic, DiagnosticBuilder};
use feature_gate::UnstableFeatures;
use parse::parser::Parser;
use ptr::P;
Expand Down Expand Up @@ -174,14 +174,25 @@ pub fn parse_stream_from_source_str(name: FileName, source: String, sess: &Parse
source_file_to_stream(sess, sess.source_map().new_source_file(name, source), override_span)
}

// Create a new parser from a source string
/// Create a new parser from a source string
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Shouldn't this use maybe_new_parser_from_source_str and emit the buffered errors? That would also allow removing source_file_to_parser, I think.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

How deep do you want me to go? Should i reimplement each existing function in terms of its new maybe_ variant?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If you have the time, that'd be great. I just want to minimize code duplication. Whenever I add a fallible version of an existing method, I rewrite the callees if under a dozen, or I rewrite the infallible version to call the fallible version.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Makes sense. I've got the change written locally, i'll push once i finish my current build.

pub fn new_parser_from_source_str(sess: &ParseSess, name: FileName, source: String)
-> Parser {
let mut parser = source_file_to_parser(sess, sess.source_map().new_source_file(name, source));
parser.recurse_into_file_modules = false;
parser
}

/// Create a new parser from a source string. Returns any buffered errors from lexing the initial
/// token stream.
pub fn maybe_new_parser_from_source_str(sess: &ParseSess, name: FileName, source: String)
-> Result<Parser, Vec<Diagnostic>>
{
let mut parser = maybe_source_file_to_parser(sess,
sess.source_map().new_source_file(name, source))?;
parser.recurse_into_file_modules = false;
Ok(parser)
}

/// Create a new parser, handling errors as appropriate
/// if the file doesn't exist
pub fn new_parser_from_file<'a>(sess: &'a ParseSess, path: &Path) -> Parser<'a> {
Expand Down Expand Up @@ -214,6 +225,21 @@ fn source_file_to_parser(sess: & ParseSess, source_file: Lrc<SourceFile>) -> Par
parser
}

/// Given a source_file and config, return a parser. Returns any buffered errors from lexing the
/// initial token stream.
fn maybe_source_file_to_parser(sess: &ParseSess, source_file: Lrc<SourceFile>)
-> Result<Parser, Vec<Diagnostic>>
{
let end_pos = source_file.end_pos;
let mut parser = stream_to_parser(sess, maybe_file_to_stream(sess, source_file, None)?);

if parser.token == token::Eof && parser.span.is_dummy() {
parser.span = Span::new(end_pos, end_pos, parser.span.ctxt());
}

Ok(parser)
}

// must preserve old name for now, because quote! from the *existing*
// compiler expands into it
pub fn new_parser_from_tts(sess: &ParseSess, tts: Vec<TokenTree>) -> Parser {
Expand Down Expand Up @@ -248,6 +274,24 @@ pub fn source_file_to_stream(sess: &ParseSess,
panictry!(srdr.parse_all_token_trees())
}

/// Given a source file, produce a sequence of token-trees. Returns any buffered errors from
/// parsing the token tream.
pub fn maybe_file_to_stream(sess: &ParseSess,
source_file: Lrc<SourceFile>,
override_span: Option<Span>) -> Result<TokenStream, Vec<Diagnostic>> {
let mut srdr = lexer::StringReader::new_or_buffered_errs(sess, source_file, override_span)?;
srdr.real_token();

match srdr.parse_all_token_trees() {
Ok(stream) => Ok(stream),
Err(err) => {
let mut buffer = Vec::with_capacity(1);
err.buffer(&mut buffer);
Err(buffer)
}
}
}

/// Given stream and the `ParseSess`, produce a parser
pub fn stream_to_parser(sess: &ParseSess, stream: TokenStream) -> Parser {
Parser::new(sess, stream, None, true, false)
Expand Down
2 changes: 2 additions & 0 deletions src/tools/error_index_generator/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@

#![feature(rustc_private)]

extern crate env_logger;
extern crate syntax;
extern crate rustdoc;
extern crate serialize as rustc_serialize;
Expand Down Expand Up @@ -264,6 +265,7 @@ fn parse_args() -> (OutputFormat, PathBuf) {
}

fn main() {
env_logger::init();
PLAYGROUND.with(|slot| {
*slot.borrow_mut() = Some((None, String::from("https://play.rust-lang.org/")));
});
Expand Down