Skip to content

Commit

Permalink
Auto merge of #7368 - alexcrichton:canonical-urls-omg, r=ehuss
Browse files Browse the repository at this point in the history
Work with canonical URLs in `[patch]`

This commit addresses an issue with how the resolver processes `[patch]`
annotations in manifests and lock files. Previously the resolver would
use the raw `Url` coming out of a manifest, but the rest of resolution,
when comparing `SourceId`, uses a canonical form of a `Url` rather than
the actual raw `Url`. This ended up causing discrepancies like those
found in #7282.

To fix the issue all `patch` intermediate storage in the resolver uses a
newly-added `CanonicalUrl` type instead of a `Url`. This
`CanonicalUrl` is then also used throughout the codebase, and all
lookups in the resolver as switched to using `CanonicalUrl` instead of
`Url`, which...

Closes #7282
  • Loading branch information
bors committed Sep 17, 2019
2 parents 658bde1 + e545412 commit 35c55a9
Show file tree
Hide file tree
Showing 9 changed files with 200 additions and 91 deletions.
31 changes: 20 additions & 11 deletions src/cargo/core/registry.rs
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ use crate::core::PackageSet;
use crate::core::{Dependency, PackageId, Source, SourceId, SourceMap, Summary};
use crate::sources::config::SourceConfigMap;
use crate::util::errors::{CargoResult, CargoResultExt};
use crate::util::{profile, Config};
use crate::util::{profile, CanonicalUrl, Config};

/// Source of information about a group of packages.
///
Expand Down Expand Up @@ -75,9 +75,9 @@ pub struct PackageRegistry<'cfg> {
yanked_whitelist: HashSet<PackageId>,
source_config: SourceConfigMap<'cfg>,

patches: HashMap<Url, Vec<Summary>>,
patches: HashMap<CanonicalUrl, Vec<Summary>>,
patches_locked: bool,
patches_available: HashMap<Url, Vec<PackageId>>,
patches_available: HashMap<CanonicalUrl, Vec<PackageId>>,
}

/// A map of all "locked packages" which is filled in when parsing a lock file
Expand Down Expand Up @@ -230,6 +230,8 @@ impl<'cfg> PackageRegistry<'cfg> {
/// `query` until `lock_patches` is called below, which should be called
/// once all patches have been added.
pub fn patch(&mut self, url: &Url, deps: &[Dependency]) -> CargoResult<()> {
let canonical = CanonicalUrl::new(url)?;

// First up we need to actually resolve each `deps` specification to
// precisely one summary. We're not using the `query` method below as it
// internally uses maps we're building up as part of this method
Expand Down Expand Up @@ -284,7 +286,7 @@ impl<'cfg> PackageRegistry<'cfg> {
url
)
}
if summary.package_id().source_id().url() == url {
if *summary.package_id().source_id().canonical_url() == canonical {
failure::bail!(
"patch for `{}` in `{}` points to the same source, but \
patches must point to different sources",
Expand Down Expand Up @@ -317,8 +319,8 @@ impl<'cfg> PackageRegistry<'cfg> {
// `lock` method) and otherwise store the unlocked summaries in
// `patches` to get locked in a future call to `lock_patches`.
let ids = unlocked_summaries.iter().map(|s| s.package_id()).collect();
self.patches_available.insert(url.clone(), ids);
self.patches.insert(url.clone(), unlocked_summaries);
self.patches_available.insert(canonical.clone(), ids);
self.patches.insert(canonical, unlocked_summaries);

Ok(())
}
Expand All @@ -340,8 +342,11 @@ impl<'cfg> PackageRegistry<'cfg> {
self.patches_locked = true;
}

pub fn patches(&self) -> &HashMap<Url, Vec<Summary>> {
&self.patches
pub fn patches(&self) -> Vec<Summary> {
self.patches
.values()
.flat_map(|v| v.iter().cloned())
.collect()
}

fn load(&mut self, source_id: SourceId, kind: Kind) -> CargoResult<()> {
Expand Down Expand Up @@ -472,7 +477,7 @@ impl<'cfg> Registry for PackageRegistry<'cfg> {
// This means that `dep.matches(..)` will always return false, when
// what we really care about is the name/version match.
let mut patches = Vec::<Summary>::new();
if let Some(extra) = self.patches.get(dep.source_id().url()) {
if let Some(extra) = self.patches.get(dep.source_id().canonical_url()) {
patches.extend(
extra
.iter()
Expand Down Expand Up @@ -605,7 +610,11 @@ impl<'cfg> Registry for PackageRegistry<'cfg> {
}
}

fn lock(locked: &LockedMap, patches: &HashMap<Url, Vec<PackageId>>, summary: Summary) -> Summary {
fn lock(
locked: &LockedMap,
patches: &HashMap<CanonicalUrl, Vec<PackageId>>,
summary: Summary,
) -> Summary {
let pair = locked
.get(&summary.source_id())
.and_then(|map| map.get(&*summary.name()))
Expand Down Expand Up @@ -669,7 +678,7 @@ fn lock(locked: &LockedMap, patches: &HashMap<Url, Vec<PackageId>>, summary: Sum
// map, and we see if `id` is contained in the list of patches
// for that url. If it is then this lock is still valid,
// otherwise the lock is no longer valid.
match patches.get(dep.source_id().url()) {
match patches.get(dep.source_id().canonical_url()) {
Some(list) => list.contains(&id),
None => false,
}
Expand Down
6 changes: 2 additions & 4 deletions src/cargo/core/resolver/resolve.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,6 @@ use std::collections::{HashMap, HashSet};
use std::fmt;
use std::iter::FromIterator;

use url::Url;

use crate::core::dependency::Kind;
use crate::core::{Dependency, PackageId, PackageIdSpec, Summary, Target};
use crate::util::errors::CargoResult;
Expand Down Expand Up @@ -114,8 +112,8 @@ impl Resolve {
self.graph.path_to_top(pkg)
}

pub fn register_used_patches(&mut self, patches: &HashMap<Url, Vec<Summary>>) {
for summary in patches.values().flat_map(|v| v) {
pub fn register_used_patches(&mut self, patches: &[Summary]) {
for summary in patches {
if self.iter().any(|id| id == summary.package_id()) {
continue;
}
Expand Down
19 changes: 12 additions & 7 deletions src/cargo/core/source/source_id.rs
Original file line number Diff line number Diff line change
Expand Up @@ -15,10 +15,9 @@ use url::Url;

use crate::core::PackageId;
use crate::ops;
use crate::sources::git;
use crate::sources::DirectorySource;
use crate::sources::{GitSource, PathSource, RegistrySource, CRATES_IO_INDEX};
use crate::util::{CargoResult, Config, IntoUrl};
use crate::util::{CanonicalUrl, CargoResult, Config, IntoUrl};

lazy_static::lazy_static! {
static ref SOURCE_ID_CACHE: Mutex<HashSet<&'static SourceIdInner>> = Mutex::new(HashSet::new());
Expand All @@ -34,8 +33,8 @@ pub struct SourceId {
struct SourceIdInner {
/// The source URL.
url: Url,
/// The result of `git::canonicalize_url()` on `url` field.
canonical_url: Url,
/// The canonical version of the above url
canonical_url: CanonicalUrl,
/// The source kind.
kind: Kind,
/// For example, the exact Git revision of the specified branch for a Git Source.
Expand Down Expand Up @@ -80,7 +79,7 @@ impl SourceId {
fn new(kind: Kind, url: Url) -> CargoResult<SourceId> {
let source_id = SourceId::wrap(SourceIdInner {
kind,
canonical_url: git::canonicalize_url(&url)?,
canonical_url: CanonicalUrl::new(&url)?,
url,
precise: None,
name: None,
Expand Down Expand Up @@ -216,7 +215,7 @@ impl SourceId {
let url = config.get_registry_index(key)?;
Ok(SourceId::wrap(SourceIdInner {
kind: Kind::Registry,
canonical_url: git::canonicalize_url(&url)?,
canonical_url: CanonicalUrl::new(&url)?,
url,
precise: None,
name: Some(key.to_string()),
Expand All @@ -228,6 +227,12 @@ impl SourceId {
&self.inner.url
}

/// Gets the canonical URL of this source, used for internal comparison
/// purposes.
pub fn canonical_url(&self) -> &CanonicalUrl {
&self.inner.canonical_url
}

pub fn display_index(self) -> String {
if self.is_default_registry() {
"crates.io index".to_string()
Expand Down Expand Up @@ -508,7 +513,7 @@ impl Hash for SourceId {
fn hash<S: hash::Hasher>(&self, into: &mut S) {
self.inner.kind.hash(into);
match self.inner.kind {
Kind::Git(_) => self.inner.canonical_url.as_str().hash(into),
Kind::Git(_) => self.inner.canonical_url.hash(into),
_ => self.inner.url.as_str().hash(into),
}
}
Expand Down
2 changes: 1 addition & 1 deletion src/cargo/ops/resolve.rs
Original file line number Diff line number Diff line change
Expand Up @@ -343,7 +343,7 @@ pub fn resolve_with_previous<'cfg>(
Some(ws.config()),
ws.features().require(Feature::public_dependency()).is_ok(),
)?;
resolved.register_used_patches(registry.patches());
resolved.register_used_patches(&registry.patches());
if register_patches {
// It would be good if this warning was more targeted and helpful
// (such as showing close candidates that failed to match). However,
Expand Down
2 changes: 1 addition & 1 deletion src/cargo/sources/git/mod.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
pub use self::source::{canonicalize_url, GitSource};
pub use self::source::GitSource;
pub use self::utils::{fetch, GitCheckout, GitDatabase, GitRemote, GitRevision};
mod source;
mod utils;
91 changes: 24 additions & 67 deletions src/cargo/sources/git/source.rs
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ impl<'cfg> GitSource<'cfg> {
assert!(source_id.is_git(), "id is not git, id={}", source_id);

let remote = GitRemote::new(source_id.url());
let ident = ident(source_id.url())?;
let ident = ident(&source_id);

let reference = match source_id.precise() {
Some(s) => GitReference::Rev(s.to_string()),
Expand Down Expand Up @@ -59,58 +59,17 @@ impl<'cfg> GitSource<'cfg> {
}
}

fn ident(url: &Url) -> CargoResult<String> {
let url = canonicalize_url(url)?;
let ident = url
fn ident(id: &SourceId) -> String {
let ident = id
.canonical_url()
.raw_canonicalized_url()
.path_segments()
.and_then(|mut s| s.next_back())
.and_then(|s| s.rev().next())
.unwrap_or("");

let ident = if ident == "" { "_empty" } else { ident };

Ok(format!("{}-{}", ident, short_hash(&url)))
}

// Some hacks and heuristics for making equivalent URLs hash the same.
pub fn canonicalize_url(url: &Url) -> CargoResult<Url> {
let mut url = url.clone();

// cannot-be-a-base-urls (e.g., `github.com:rust-lang-nursery/rustfmt.git`)
// are not supported.
if url.cannot_be_a_base() {
failure::bail!(
"invalid url `{}`: cannot-be-a-base-URLs are not supported",
url
)
}

// Strip a trailing slash.
if url.path().ends_with('/') {
url.path_segments_mut().unwrap().pop_if_empty();
}

// HACK: for GitHub URLs specifically, just lower-case
// everything. GitHub treats both the same, but they hash
// differently, and we're gonna be hashing them. This wants a more
// general solution, and also we're almost certainly not using the
// same case conversion rules that GitHub does. (See issue #84.)
if url.host_str() == Some("github.com") {
url.set_scheme("https").unwrap();
let path = url.path().to_lowercase();
url.set_path(&path);
}

// Repos can generally be accessed with or without `.git` extension.
let needs_chopping = url.path().ends_with(".git");
if needs_chopping {
let last = {
let last = url.path_segments().unwrap().next_back().unwrap();
last[..last.len() - 4].to_owned()
};
url.path_segments_mut().unwrap().pop().push(&last);
}

Ok(url)
format!("{}-{}", ident, short_hash(id.canonical_url()))
}

impl<'cfg> Debug for GitSource<'cfg> {
Expand Down Expand Up @@ -241,56 +200,54 @@ impl<'cfg> Source for GitSource<'cfg> {
#[cfg(test)]
mod test {
use super::ident;
use crate::core::{GitReference, SourceId};
use crate::util::IntoUrl;
use url::Url;

#[test]
pub fn test_url_to_path_ident_with_path() {
let ident = ident(&url("https://github.com/carlhuda/cargo")).unwrap();
let ident = ident(&src("https://github.com/carlhuda/cargo"));
assert!(ident.starts_with("cargo-"));
}

#[test]
pub fn test_url_to_path_ident_without_path() {
let ident = ident(&url("https://github.com")).unwrap();
let ident = ident(&src("https://github.com"));
assert!(ident.starts_with("_empty-"));
}

#[test]
fn test_canonicalize_idents_by_stripping_trailing_url_slash() {
let ident1 = ident(&url("https://github.com/PistonDevelopers/piston/")).unwrap();
let ident2 = ident(&url("https://github.com/PistonDevelopers/piston")).unwrap();
let ident1 = ident(&src("https://github.com/PistonDevelopers/piston/"));
let ident2 = ident(&src("https://github.com/PistonDevelopers/piston"));
assert_eq!(ident1, ident2);
}

#[test]
fn test_canonicalize_idents_by_lowercasing_github_urls() {
let ident1 = ident(&url("https://github.com/PistonDevelopers/piston")).unwrap();
let ident2 = ident(&url("https://github.com/pistondevelopers/piston")).unwrap();
let ident1 = ident(&src("https://github.com/PistonDevelopers/piston"));
let ident2 = ident(&src("https://github.com/pistondevelopers/piston"));
assert_eq!(ident1, ident2);
}

#[test]
fn test_canonicalize_idents_by_stripping_dot_git() {
let ident1 = ident(&url("https://github.com/PistonDevelopers/piston")).unwrap();
let ident2 = ident(&url("https://github.com/PistonDevelopers/piston.git")).unwrap();
let ident1 = ident(&src("https://github.com/PistonDevelopers/piston"));
let ident2 = ident(&src("https://github.com/PistonDevelopers/piston.git"));
assert_eq!(ident1, ident2);
}

#[test]
fn test_canonicalize_idents_different_protocols() {
let ident1 = ident(&url("https://github.com/PistonDevelopers/piston")).unwrap();
let ident2 = ident(&url("git://github.com/PistonDevelopers/piston")).unwrap();
let ident1 = ident(&src("https://github.com/PistonDevelopers/piston"));
let ident2 = ident(&src("git://github.com/PistonDevelopers/piston"));
assert_eq!(ident1, ident2);
}

#[test]
fn test_canonicalize_cannot_be_a_base_urls() {
assert!(ident(&url("github.com:PistonDevelopers/piston")).is_err());
assert!(ident(&url("google.com:PistonDevelopers/piston")).is_err());
}

fn url(s: &str) -> Url {
s.into_url().unwrap()
fn src(s: &str) -> SourceId {
SourceId::for_git(
&s.into_url().unwrap(),
GitReference::Branch("master".to_string()),
)
.unwrap()
}
}
Loading

0 comments on commit 35c55a9

Please sign in to comment.