push sheeet
Some checks failed
Periodic Merges (6h) / master → staging-nixos (push) Failing after 12m50s
Periodic Merges (6h) / master → staging-next (push) Failing after 12m54s
Periodic Merges (24h) / merge-base(master,staging) → haskell-updates (push) Failing after 11m54s
Periodic Merges (6h) / staging-next → staging (push) Failing after 12m13s
Periodic Merges (24h) / staging-next-25.05 → staging-25.05 (push) Failing after 13m24s
Periodic Merges (24h) / release-25.05 → staging-next-25.05 (push) Failing after 14m28s

This commit is contained in:
Dark Steveneq
2025-10-09 14:15:47 +02:00
commit 646b892680
49168 changed files with 5897842 additions and 0 deletions

View File

@@ -0,0 +1,128 @@
use data_encoding::BASE64;
use digest::{Digest, Update};
use serde::{Deserialize, Serialize};
use sha1::Sha1;
use sha2::{Sha256, Sha512};
use std::{
fmt::Write as FmtWrite,
fs::{self, File},
io::Write,
path::PathBuf,
};
use url::Url;
#[allow(clippy::struct_field_names)]
#[derive(Serialize, Deserialize)]
pub(super) struct Key {
pub(super) key: String,
pub(super) integrity: String,
pub(super) time: u8,
pub(super) size: usize,
pub(super) metadata: Metadata,
}
#[derive(Serialize, Deserialize)]
pub(super) struct Metadata {
pub(super) url: Url,
pub(super) options: Options,
}
#[derive(Serialize, Deserialize)]
pub(super) struct Options {
pub(super) compress: bool,
}
pub struct Cache(PathBuf);
fn push_hash_segments(path: &mut PathBuf, hash: &str) {
path.push(&hash[0..2]);
path.push(&hash[2..4]);
path.push(&hash[4..]);
}
impl Cache {
pub fn new(path: PathBuf) -> Cache {
Cache(path)
}
pub fn init(&self) -> anyhow::Result<()> {
fs::create_dir_all(self.0.join("content-v2"))?;
fs::create_dir_all(self.0.join("index-v5"))?;
Ok(())
}
pub fn put(
&self,
key: String,
url: Url,
data: &[u8],
integrity: Option<String>,
) -> anyhow::Result<()> {
let (algo, hash, integrity) = if let Some(integrity) = integrity {
let (algo, hash) = integrity
.split_once('-')
.expect("hash should be SRI format");
(algo.to_string(), BASE64.decode(hash.as_bytes())?, integrity)
} else {
let hash = Sha512::new().chain(data).finalize();
(
String::from("sha512"),
hash.to_vec(),
format!("sha512-{}", BASE64.encode(&hash)),
)
};
let content_path = {
let mut p = self.0.join("content-v2");
p.push(algo);
push_hash_segments(
&mut p,
&hash.into_iter().fold(String::new(), |mut out, n| {
let _ = write!(out, "{n:02x}");
out
}),
);
p
};
fs::create_dir_all(content_path.parent().unwrap())?;
fs::write(content_path, data)?;
let index_path = {
let mut p = self.0.join("index-v5");
push_hash_segments(
&mut p,
&format!("{:x}", Sha256::new().chain(&key).finalize()),
);
p
};
fs::create_dir_all(index_path.parent().unwrap())?;
let data = serde_json::to_string(&Key {
key,
integrity,
time: 0,
size: data.len(),
metadata: Metadata {
url,
options: Options { compress: true },
},
})?;
let mut file = File::options().append(true).create(true).open(index_path)?;
write!(file, "{:x}\t{data}", Sha1::new().chain(&data).finalize())?;
Ok(())
}
}

View File

@@ -0,0 +1,427 @@
#![warn(clippy::pedantic)]
use crate::cacache::{Cache, Key};
use anyhow::{anyhow, bail};
use rayon::prelude::*;
use serde_json::{Map, Value};
use std::{
collections::HashMap,
env, fs,
path::{Path, PathBuf},
process,
};
use tempfile::tempdir;
use url::Url;
use walkdir::WalkDir;
mod cacache;
mod parse;
mod util;
fn cache_map_path() -> Option<PathBuf> {
env::var_os("CACHE_MAP_PATH").map(PathBuf::from)
}
/// `fixup_lockfile` rewrites `integrity` hashes to match cache and removes the `integrity` field from Git dependencies.
///
/// Sometimes npm has multiple instances of a given `resolved` URL that have different types of `integrity` hashes (e.g. SHA-1
/// and SHA-512) in the lockfile. Given we only cache one version of these, the `integrity` field must be normalized to the hash
/// we cache as (which is the strongest available one).
///
/// Git dependencies from specific providers can be retrieved from those providers' automatic tarball features.
/// When these dependencies are specified with a commit identifier, npm generates a tarball, and inserts the integrity hash of that
/// tarball into the lockfile.
///
/// Thus, we remove this hash, to replace it with our own determinstic copies of dependencies from hosted Git providers.
///
/// If no fixups were performed, `None` is returned and the lockfile structure should be left as-is. If fixups were performed, the
/// `dependencies` key in v2 lockfiles designed for backwards compatibility with v1 parsers is removed because of inconsistent data.
fn fixup_lockfile(
mut lock: Map<String, Value>,
cache: &Option<HashMap<String, String>>,
) -> anyhow::Result<Option<Map<String, Value>>> {
let mut fixed = false;
match lock
.get("lockfileVersion")
.ok_or_else(|| anyhow!("couldn't get lockfile version"))?
.as_i64()
.ok_or_else(|| anyhow!("lockfile version isn't an int"))?
{
1 => fixup_v1_deps(
lock.get_mut("dependencies")
.unwrap()
.as_object_mut()
.unwrap(),
cache,
&mut fixed,
),
2 | 3 => {
for package in lock
.get_mut("packages")
.ok_or_else(|| anyhow!("couldn't get packages"))?
.as_object_mut()
.ok_or_else(|| anyhow!("packages isn't a map"))?
.values_mut()
{
if let Some(Value::String(resolved)) = package.get("resolved") {
if let Some(Value::String(integrity)) = package.get("integrity") {
if resolved.starts_with("git+") {
fixed = true;
package
.as_object_mut()
.ok_or_else(|| anyhow!("package isn't a map"))?
.remove("integrity");
} else if let Some(cache_hashes) = cache {
let cache_hash = cache_hashes
.get(resolved)
.expect("dependency should have a hash");
if integrity != cache_hash {
fixed = true;
*package
.as_object_mut()
.ok_or_else(|| anyhow!("package isn't a map"))?
.get_mut("integrity")
.unwrap() = Value::String(cache_hash.clone());
}
}
}
}
}
if fixed {
lock.remove("dependencies");
}
}
v => bail!("unsupported lockfile version {v}"),
}
if fixed {
Ok(Some(lock))
} else {
Ok(None)
}
}
// Recursive helper to fixup v1 lockfile deps
fn fixup_v1_deps(
dependencies: &mut Map<String, Value>,
cache: &Option<HashMap<String, String>>,
fixed: &mut bool,
) {
for dep in dependencies.values_mut() {
if let Some(Value::String(resolved)) = dep
.as_object()
.expect("v1 dep must be object")
.get("resolved")
{
if let Some(Value::String(integrity)) = dep
.as_object()
.expect("v1 dep must be object")
.get("integrity")
{
if resolved.starts_with("git+ssh://") {
*fixed = true;
dep.as_object_mut()
.expect("v1 dep must be object")
.remove("integrity");
} else if let Some(cache_hashes) = cache {
let cache_hash = cache_hashes
.get(resolved)
.expect("dependency should have a hash");
if integrity != cache_hash {
*fixed = true;
*dep.as_object_mut()
.expect("v1 dep must be object")
.get_mut("integrity")
.unwrap() = Value::String(cache_hash.clone());
}
}
}
}
if let Some(Value::Object(more_deps)) = dep.as_object_mut().unwrap().get_mut("dependencies")
{
fixup_v1_deps(more_deps, cache, fixed);
}
}
}
fn map_cache() -> anyhow::Result<HashMap<Url, String>> {
let mut hashes = HashMap::new();
let content_path = Path::new(&env::var_os("npmDeps").unwrap()).join("_cacache/index-v5");
for entry in WalkDir::new(content_path) {
let entry = entry?;
if entry.file_type().is_file() {
let content = fs::read_to_string(entry.path())?;
let key: Key = serde_json::from_str(content.split_ascii_whitespace().nth(1).unwrap())?;
hashes.insert(key.metadata.url, key.integrity);
}
}
Ok(hashes)
}
fn main() -> anyhow::Result<()> {
env_logger::init();
let args = env::args().collect::<Vec<_>>();
if args.len() < 2 {
println!("usage: {} <path/to/package-lock.json>", args[0]);
println!();
println!("Prefetches npm dependencies for usage by fetchNpmDeps.");
process::exit(1);
}
if let Ok(jobs) = env::var("NIX_BUILD_CORES") {
if !jobs.is_empty() {
rayon::ThreadPoolBuilder::new()
.num_threads(
jobs.parse()
.expect("NIX_BUILD_CORES must be a whole number"),
)
.build_global()
.unwrap();
}
}
if args[1] == "--fixup-lockfile" {
let lock = serde_json::from_str(&fs::read_to_string(&args[2])?)?;
let cache = cache_map_path()
.map(|map_path| Ok::<_, anyhow::Error>(serde_json::from_slice(&fs::read(map_path)?)?))
.transpose()?;
if let Some(fixed) = fixup_lockfile(lock, &cache)? {
println!("Fixing lockfile");
fs::write(&args[2], serde_json::to_string(&fixed)?)?;
}
return Ok(());
} else if args[1] == "--map-cache" {
let map = map_cache()?;
fs::write(
cache_map_path().expect("CACHE_MAP_PATH environment variable must be set"),
serde_json::to_string(&map)?,
)?;
return Ok(());
}
let lock_content = fs::read_to_string(&args[1])?;
let out_tempdir;
let (out, print_hash) = if let Some(path) = args.get(2) {
(Path::new(path), false)
} else {
out_tempdir = tempdir()?;
(out_tempdir.path(), true)
};
let packages = parse::lockfile(
&lock_content,
env::var("FORCE_GIT_DEPS").is_ok(),
env::var("FORCE_EMPTY_CACHE").is_ok(),
)?;
let cache = Cache::new(out.join("_cacache"));
cache.init()?;
packages.into_par_iter().try_for_each(|package| {
let tarball = package
.tarball()
.map_err(|e| anyhow!("couldn't fetch {} at {}: {e:?}", package.name, package.url))?;
let integrity = package.integrity().map(ToString::to_string);
cache
.put(
format!("make-fetch-happen:request-cache:{}", package.url),
package.url,
&tarball,
integrity,
)
.map_err(|e| anyhow!("couldn't insert cache entry for {}: {e:?}", package.name))?;
Ok::<_, anyhow::Error>(())
})?;
fs::write(out.join("package-lock.json"), lock_content)?;
if print_hash {
println!("{}", util::make_sri_hash(out)?);
}
Ok(())
}
#[cfg(test)]
mod tests {
use std::collections::HashMap;
use super::fixup_lockfile;
use serde_json::json;
#[test]
fn lockfile_fixup() -> anyhow::Result<()> {
let input = json!({
"lockfileVersion": 2,
"name": "foo",
"packages": {
"": {
},
"foo": {
"resolved": "https://github.com/NixOS/nixpkgs",
"integrity": "sha1-aaa"
},
"bar": {
"resolved": "git+ssh://git@github.com/NixOS/nixpkgs.git",
"integrity": "sha512-aaa"
},
"foo-bad": {
"resolved": "foo",
"integrity": "sha1-foo"
},
"foo-good": {
"resolved": "foo",
"integrity": "sha512-foo"
},
}
});
let expected = json!({
"lockfileVersion": 2,
"name": "foo",
"packages": {
"": {
},
"foo": {
"resolved": "https://github.com/NixOS/nixpkgs",
"integrity": ""
},
"bar": {
"resolved": "git+ssh://git@github.com/NixOS/nixpkgs.git",
},
"foo-bad": {
"resolved": "foo",
"integrity": "sha512-foo"
},
"foo-good": {
"resolved": "foo",
"integrity": "sha512-foo"
},
}
});
let mut hashes = HashMap::new();
hashes.insert(
String::from("https://github.com/NixOS/nixpkgs"),
String::new(),
);
hashes.insert(
String::from("git+ssh://git@github.com/NixOS/nixpkgs.git"),
String::new(),
);
hashes.insert(String::from("foo"), String::from("sha512-foo"));
assert_eq!(
fixup_lockfile(input.as_object().unwrap().clone(), &Some(hashes))?,
Some(expected.as_object().unwrap().clone())
);
Ok(())
}
#[test]
fn lockfile_v1_fixup() -> anyhow::Result<()> {
let input = json!({
"lockfileVersion": 1,
"name": "foo",
"dependencies": {
"foo": {
"resolved": "https://github.com/NixOS/nixpkgs",
"integrity": "sha512-aaa"
},
"foo-good": {
"resolved": "foo",
"integrity": "sha512-foo"
},
"bar": {
"resolved": "git+ssh://git@github.com/NixOS/nixpkgs.git",
"integrity": "sha512-bbb",
"dependencies": {
"foo-bad": {
"resolved": "foo",
"integrity": "sha1-foo"
},
},
},
}
});
let expected = json!({
"lockfileVersion": 1,
"name": "foo",
"dependencies": {
"foo": {
"resolved": "https://github.com/NixOS/nixpkgs",
"integrity": ""
},
"foo-good": {
"resolved": "foo",
"integrity": "sha512-foo"
},
"bar": {
"resolved": "git+ssh://git@github.com/NixOS/nixpkgs.git",
"dependencies": {
"foo-bad": {
"resolved": "foo",
"integrity": "sha512-foo"
},
},
},
}
});
let mut hashes = HashMap::new();
hashes.insert(
String::from("https://github.com/NixOS/nixpkgs"),
String::new(),
);
hashes.insert(
String::from("git+ssh://git@github.com/NixOS/nixpkgs.git"),
String::new(),
);
hashes.insert(String::from("foo"), String::from("sha512-foo"));
assert_eq!(
fixup_lockfile(input.as_object().unwrap().clone(), &Some(hashes))?,
Some(expected.as_object().unwrap().clone())
);
Ok(())
}
}

View File

@@ -0,0 +1,370 @@
use anyhow::{anyhow, bail, Context};
use rayon::slice::ParallelSliceMut;
use serde::{
de::{self, Visitor},
Deserialize, Deserializer,
};
use std::{
cmp::Ordering,
collections::{HashMap, HashSet},
fmt,
};
use url::Url;
pub(super) fn packages(content: &str) -> anyhow::Result<Vec<Package>> {
let lockfile: Lockfile = serde_json::from_str(content)?;
let mut packages = match lockfile.version {
1 => {
let initial_url = get_initial_url()?;
to_new_packages(lockfile.dependencies.unwrap_or_default(), &initial_url)?
}
2 | 3 => lockfile
.packages
.unwrap_or_default()
.into_iter()
.filter(|(n, p)| !n.is_empty() && matches!(p.resolved, Some(UrlOrString::Url(_))))
.map(|(n, p)| Package { name: Some(n), ..p })
.collect(),
_ => bail!(
"We don't support lockfile version {}, please file an issue.",
lockfile.version
),
};
packages.par_sort_by(|x, y| {
x.resolved
.partial_cmp(&y.resolved)
.expect("resolved should be comparable")
.then(
// v1 lockfiles can contain multiple references to the same version of a package, with
// different integrity values (e.g. a SHA-1 and a SHA-512 in one, but just a SHA-512 in another)
y.integrity
.partial_cmp(&x.integrity)
.expect("integrity should be comparable"),
)
});
packages.dedup_by(|x, y| x.resolved == y.resolved);
Ok(packages)
}
#[derive(Deserialize)]
struct Lockfile {
#[serde(rename = "lockfileVersion")]
version: u8,
dependencies: Option<HashMap<String, OldPackage>>,
packages: Option<HashMap<String, Package>>,
}
#[derive(Deserialize)]
struct OldPackage {
version: UrlOrString,
#[serde(default)]
bundled: bool,
resolved: Option<UrlOrString>,
integrity: Option<HashCollection>,
dependencies: Option<HashMap<String, OldPackage>>,
}
#[derive(Debug, Deserialize, PartialEq, Eq)]
pub(super) struct Package {
#[serde(default)]
pub(super) name: Option<String>,
pub(super) resolved: Option<UrlOrString>,
pub(super) integrity: Option<HashCollection>,
}
#[derive(Debug, Deserialize, PartialEq, Eq, PartialOrd, Ord)]
#[serde(untagged)]
pub(super) enum UrlOrString {
Url(Url),
String(String),
}
impl fmt::Display for UrlOrString {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
match self {
UrlOrString::Url(url) => url.fmt(f),
UrlOrString::String(string) => string.fmt(f),
}
}
}
#[derive(Debug, PartialEq, Eq)]
pub struct HashCollection(HashSet<Hash>);
impl HashCollection {
pub fn from_str(s: impl AsRef<str>) -> anyhow::Result<HashCollection> {
let hashes = s
.as_ref()
.split_ascii_whitespace()
.map(Hash::new)
.collect::<anyhow::Result<_>>()?;
Ok(HashCollection(hashes))
}
pub fn into_best(self) -> Option<Hash> {
self.0.into_iter().max()
}
}
impl PartialOrd for HashCollection {
fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
let lhs = self.0.iter().max()?;
let rhs = other.0.iter().max()?;
lhs.partial_cmp(rhs)
}
}
impl<'de> Deserialize<'de> for HashCollection {
fn deserialize<D>(deserializer: D) -> Result<HashCollection, D::Error>
where
D: Deserializer<'de>,
{
deserializer.deserialize_string(HashCollectionVisitor)
}
}
struct HashCollectionVisitor;
impl<'de> Visitor<'de> for HashCollectionVisitor {
type Value = HashCollection;
fn expecting(&self, formatter: &mut fmt::Formatter) -> fmt::Result {
formatter.write_str("a single SRI hash or a collection of them (separated by spaces)")
}
fn visit_str<E>(self, value: &str) -> Result<HashCollection, E>
where
E: de::Error,
{
HashCollection::from_str(value).map_err(E::custom)
}
}
#[derive(Clone, Debug, Deserialize, PartialEq, Eq, Hash)]
pub struct Hash(String);
// Hash algorithms, in ascending preference.
const ALGOS: &[&str] = &["sha1", "sha512"];
impl Hash {
fn new(s: impl AsRef<str>) -> anyhow::Result<Hash> {
let algo = s
.as_ref()
.split_once('-')
.ok_or_else(|| anyhow!("expected SRI hash, got {:?}", s.as_ref()))?
.0;
if ALGOS.iter().any(|&a| algo == a) {
Ok(Hash(s.as_ref().to_string()))
} else {
Err(anyhow!("unknown hash algorithm {algo:?}"))
}
}
pub fn as_str(&self) -> &str {
&self.0
}
}
impl fmt::Display for Hash {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
self.as_str().fmt(f)
}
}
#[allow(clippy::non_canonical_partial_ord_impl)]
impl PartialOrd for Hash {
fn partial_cmp(&self, other: &Hash) -> Option<Ordering> {
let lhs = self.0.split_once('-')?.0;
let rhs = other.0.split_once('-')?.0;
ALGOS
.iter()
.position(|&s| lhs == s)?
.partial_cmp(&ALGOS.iter().position(|&s| rhs == s)?)
}
}
impl Ord for Hash {
fn cmp(&self, other: &Hash) -> Ordering {
self.partial_cmp(other).unwrap()
}
}
#[allow(clippy::case_sensitive_file_extension_comparisons)]
fn to_new_packages(
old_packages: HashMap<String, OldPackage>,
initial_url: &Url,
) -> anyhow::Result<Vec<Package>> {
let mut new = Vec::new();
for (name, mut package) in old_packages {
// In some cases, a bundled dependency happens to have the same version as a non-bundled one, causing
// the bundled one without a URL to override the entry for the non-bundled instance, which prevents the
// dependency from being downloaded.
if package.bundled {
continue;
}
if let UrlOrString::Url(v) = &package.version {
if v.scheme() == "npm" {
if let Some(UrlOrString::Url(ref url)) = &package.resolved {
package.version = UrlOrString::Url(url.clone());
}
} else {
for (scheme, host) in [
("github", "github.com"),
("bitbucket", "bitbucket.org"),
("gitlab", "gitlab.com"),
] {
if v.scheme() == scheme {
package.version = {
let mut new_url = initial_url.clone();
new_url.set_host(Some(host))?;
if v.path().ends_with(".git") {
new_url.set_path(v.path());
} else {
new_url.set_path(&format!("{}.git", v.path()));
}
new_url.set_fragment(v.fragment());
UrlOrString::Url(new_url)
};
break;
}
}
}
}
new.push(Package {
name: Some(name),
resolved: if matches!(package.version, UrlOrString::Url(_)) {
Some(package.version)
} else {
package.resolved
},
integrity: package.integrity,
});
if let Some(dependencies) = package.dependencies {
new.append(&mut to_new_packages(dependencies, initial_url)?);
}
}
Ok(new)
}
fn get_initial_url() -> anyhow::Result<Url> {
Url::parse("git+ssh://git@a.b").context("initial url should be valid")
}
#[cfg(test)]
mod tests {
use super::{
get_initial_url, packages, to_new_packages, Hash, HashCollection, OldPackage, Package,
UrlOrString,
};
use std::{
cmp::Ordering,
collections::{HashMap, HashSet},
};
use url::Url;
#[test]
fn git_shorthand_v1() -> anyhow::Result<()> {
let old = {
let mut o = HashMap::new();
o.insert(
String::from("sqlite3"),
OldPackage {
version: UrlOrString::Url(
Url::parse(
"github:mapbox/node-sqlite3#593c9d498be2510d286349134537e3bf89401c4a",
)
.unwrap(),
),
bundled: false,
resolved: None,
integrity: None,
dependencies: None,
},
);
o
};
let initial_url = get_initial_url()?;
let new = to_new_packages(old, &initial_url)?;
assert_eq!(new.len(), 1, "new packages map should contain 1 value");
assert_eq!(new[0], Package {
name: Some(String::from("sqlite3")),
resolved: Some(UrlOrString::Url(Url::parse("git+ssh://git@github.com/mapbox/node-sqlite3.git#593c9d498be2510d286349134537e3bf89401c4a").unwrap())),
integrity: None
});
Ok(())
}
#[test]
fn hash_preference() {
assert_eq!(
Hash(String::from("sha1-foo")).partial_cmp(&Hash(String::from("sha512-foo"))),
Some(Ordering::Less)
);
assert_eq!(
HashCollection({
let mut set = HashSet::new();
set.insert(Hash(String::from("sha512-foo")));
set.insert(Hash(String::from("sha1-bar")));
set
})
.into_best(),
Some(Hash(String::from("sha512-foo")))
);
}
#[test]
fn parse_lockfile_correctly() {
let packages = packages(
r#"{
"name": "node-ddr",
"version": "1.0.0",
"lockfileVersion": 1,
"requires": true,
"dependencies": {
"string-width-cjs": {
"version": "npm:string-width@4.2.3",
"resolved": "https://registry.npmjs.org/string-width/-/string-width-4.2.3.tgz",
"integrity": "sha512-wKyQRQpjJ0sIp62ErSZdGsjMJWsap5oRNihHhu6G7JVO/9jIB6UyevL+tXuOqrng8j/cxKTWyWUwvSTriiZz/g==",
"requires": {
"emoji-regex": "^8.0.0",
"is-fullwidth-code-point": "^3.0.0",
"strip-ansi": "^6.0.1"
}
}
}
}"#).unwrap();
assert_eq!(packages.len(), 1);
assert_eq!(
packages[0].resolved,
Some(UrlOrString::Url(
Url::parse("https://registry.npmjs.org/string-width/-/string-width-4.2.3.tgz")
.unwrap()
))
);
}
}

View File

@@ -0,0 +1,353 @@
use anyhow::{anyhow, bail, Context};
use lock::UrlOrString;
use log::{debug, info};
use rayon::prelude::*;
use serde_json::{Map, Value};
use std::{
fs,
io::Write,
process::{Command, Stdio},
};
use tempfile::{tempdir, TempDir};
use url::Url;
use crate::util;
pub mod lock;
pub fn lockfile(
content: &str,
force_git_deps: bool,
force_empty_cache: bool,
) -> anyhow::Result<Vec<Package>> {
debug!("parsing lockfile with contents:\n{content}");
let mut packages = lock::packages(content)
.context("failed to extract packages from lockfile")?
.into_par_iter()
.map(|p| {
let n = p.name.clone().unwrap();
Package::from_lock(p).with_context(|| format!("failed to parse data for {n}"))
})
.collect::<anyhow::Result<Vec<_>>>()?;
if packages.is_empty() && !force_empty_cache {
bail!("No cacheable dependencies were found. Please inspect the upstream `package-lock.json` file and ensure that remote dependencies have `resolved` URLs and `integrity` hashes. If the lockfile is missing this data, attempt to get upstream to fix it via a tool like <https://github.com/jeslie0/npm-lockfile-fix>. If generating an empty cache is intentional and you would like to do it anyways, set `forceEmptyCache = true`.");
}
let mut new = Vec::new();
for pkg in packages
.iter()
.filter(|p| matches!(p.specifics, Specifics::Git { .. }))
{
let dir = match &pkg.specifics {
Specifics::Git { workdir } => workdir,
Specifics::Registry { .. } => unimplemented!(),
};
let path = dir.path().join("package");
info!("recursively parsing lockfile for {} at {path:?}", pkg.name);
let lockfile_contents = fs::read_to_string(path.join("package-lock.json"));
let package_json_path = path.join("package.json");
let mut package_json: Map<String, Value> =
serde_json::from_str(&fs::read_to_string(package_json_path)?)?;
if let Some(scripts) = package_json
.get_mut("scripts")
.and_then(Value::as_object_mut)
{
// https://github.com/npm/pacote/blob/272edc1bac06991fc5f95d06342334bbacfbaa4b/lib/git.js#L166-L172
for typ in [
"postinstall",
"build",
"preinstall",
"install",
"prepack",
"prepare",
] {
if scripts.contains_key(typ) && lockfile_contents.is_err() && !force_git_deps {
bail!("Git dependency {} contains install scripts, but has no lockfile, which is something that will probably break. Open an issue if you can't feasibly patch this dependency out, and we'll come up with a workaround.\nIf you'd like to attempt to try to use this dependency anyways, set `forceGitDeps = true`.", pkg.name);
}
}
}
if let Ok(lockfile_contents) = lockfile_contents {
new.append(&mut lockfile(
&lockfile_contents,
force_git_deps,
// force_empty_cache is turned on here since recursively parsed lockfiles should be
// allowed to have an empty cache without erroring by default
true,
)?);
}
}
packages.append(&mut new);
packages.par_sort_by(|x, y| {
x.url
.partial_cmp(&y.url)
.expect("resolved should be comparable")
});
packages.dedup_by(|x, y| x.url == y.url);
Ok(packages)
}
#[derive(Debug)]
pub struct Package {
pub name: String,
pub url: Url,
specifics: Specifics,
}
#[derive(Debug)]
enum Specifics {
Registry { integrity: lock::Hash },
Git { workdir: TempDir },
}
impl Package {
fn from_lock(pkg: lock::Package) -> anyhow::Result<Package> {
let mut resolved = match pkg
.resolved
.expect("at this point, packages should have URLs")
{
UrlOrString::Url(u) => u,
UrlOrString::String(_) => panic!("at this point, all packages should have URLs"),
};
let specifics = match get_hosted_git_url(&resolved)? {
Some(hosted) => {
let body = util::get_url_body_with_retry(&hosted)?;
let workdir = tempdir()?;
let tar_path = workdir.path().join("package");
fs::create_dir(&tar_path)?;
let mut cmd = Command::new("tar")
.args(["--extract", "--gzip", "--strip-components=1", "-C"])
.arg(&tar_path)
.stdin(Stdio::piped())
.spawn()?;
cmd.stdin.take().unwrap().write_all(&body)?;
let exit = cmd.wait()?;
if !exit.success() {
bail!(
"failed to extract tarball for {}: tar exited with status code {}",
pkg.name.unwrap(),
exit.code().unwrap()
);
}
resolved = hosted;
Specifics::Git { workdir }
}
None => Specifics::Registry {
integrity: pkg
.integrity
.expect("non-git dependencies should have associated integrity")
.into_best()
.expect("non-git dependencies should have non-empty associated integrity"),
},
};
Ok(Package {
name: pkg.name.unwrap(),
url: resolved,
specifics,
})
}
pub fn tarball(&self) -> anyhow::Result<Vec<u8>> {
match &self.specifics {
Specifics::Registry { .. } => Ok(util::get_url_body_with_retry(&self.url)?),
Specifics::Git { workdir } => Ok(Command::new("tar")
.args([
"--sort=name",
"--mtime=@0",
"--owner=0",
"--group=0",
"--numeric-owner",
"--format=gnu",
"-I",
"gzip -n -9",
"--create",
"-C",
])
.arg(workdir.path())
.arg("package")
.output()?
.stdout),
}
}
pub fn integrity(&self) -> Option<&lock::Hash> {
match &self.specifics {
Specifics::Registry { integrity } => Some(integrity),
Specifics::Git { .. } => None,
}
}
}
#[allow(clippy::case_sensitive_file_extension_comparisons)]
fn get_hosted_git_url(url: &Url) -> anyhow::Result<Option<Url>> {
if ["git", "git+ssh", "git+https", "ssh"].contains(&url.scheme()) {
let mut s = url
.path_segments()
.ok_or_else(|| anyhow!("bad URL: {url}"))?;
let mut get_url = || match url.host_str()? {
"github.com" => {
let user = s.next()?;
let mut project = s.next()?;
let typ = s.next();
let mut commit = s.next();
if typ.is_none() {
commit = url.fragment();
} else if typ.is_some() && typ != Some("tree") {
return None;
}
if project.ends_with(".git") {
project = project.strip_suffix(".git")?;
}
let commit = commit.unwrap();
Some(
Url::parse(&format!(
"https://codeload.github.com/{user}/{project}/tar.gz/{commit}"
))
.ok()?,
)
}
"bitbucket.org" => {
let user = s.next()?;
let mut project = s.next()?;
let aux = s.next();
if aux == Some("get") {
return None;
}
if project.ends_with(".git") {
project = project.strip_suffix(".git")?;
}
let commit = url.fragment()?;
Some(
Url::parse(&format!(
"https://bitbucket.org/{user}/{project}/get/{commit}.tar.gz"
))
.ok()?,
)
}
"gitlab.com" => {
/* let path = &url.path()[1..];
if path.contains("/~/") || path.contains("/archive.tar.gz") {
return None;
}
let user = s.next()?;
let mut project = s.next()?;
if project.ends_with(".git") {
project = project.strip_suffix(".git")?;
}
let commit = url.fragment()?;
Some(
Url::parse(&format!(
"https://gitlab.com/{user}/{project}/repository/archive.tar.gz?ref={commit}"
))
.ok()?,
) */
// lmao: https://github.com/npm/hosted-git-info/pull/109
None
}
"git.sr.ht" => {
let user = s.next()?;
let mut project = s.next()?;
let aux = s.next();
if aux == Some("archive") {
return None;
}
if project.ends_with(".git") {
project = project.strip_suffix(".git")?;
}
let commit = url.fragment()?;
Some(
Url::parse(&format!(
"https://git.sr.ht/{user}/{project}/archive/{commit}.tar.gz"
))
.ok()?,
)
}
_ => None,
};
match get_url() {
Some(u) => Ok(Some(u)),
None => Err(anyhow!("This lockfile either contains a Git dependency with an unsupported host, or a malformed URL in the lockfile: {url}"))
}
} else {
Ok(None)
}
}
#[cfg(test)]
mod tests {
use super::get_hosted_git_url;
use url::Url;
#[test]
fn hosted_git_urls() {
for (input, expected) in [
(
"git+ssh://git@github.com/castlabs/electron-releases.git#fc5f78d046e8d7cdeb66345a2633c383ab41f525",
Some("https://codeload.github.com/castlabs/electron-releases/tar.gz/fc5f78d046e8d7cdeb66345a2633c383ab41f525"),
),
(
"git+ssh://bitbucket.org/foo/bar#branch",
Some("https://bitbucket.org/foo/bar/get/branch.tar.gz")
),
(
"git+ssh://git.sr.ht/~foo/bar#branch",
Some("https://git.sr.ht/~foo/bar/archive/branch.tar.gz")
),
] {
assert_eq!(
get_hosted_git_url(&Url::parse(input).unwrap()).unwrap(),
expected.map(|u| Url::parse(u).unwrap())
);
}
assert!(
get_hosted_git_url(&Url::parse("ssh://git@gitlab.com/foo/bar.git#fix/bug").unwrap())
.is_err(),
"GitLab URLs should be marked as invalid (lol)"
);
}
}

View File

@@ -0,0 +1,99 @@
use anyhow::bail;
use backoff::{retry, ExponentialBackoff};
use data_encoding::BASE64;
use digest::Digest;
use isahc::{
config::{CaCertificate, Configurable, RedirectPolicy, SslOption},
Body, Request, RequestExt,
};
use log::info;
use nix_nar::{Encoder, NarError};
use serde_json::{Map, Value};
use sha2::Sha256;
use std::{
env,
io::{self, Read},
path::Path,
};
use url::Url;
pub fn get_url(url: &Url) -> Result<Body, anyhow::Error> {
let mut request = Request::get(url.as_str()).redirect_policy(RedirectPolicy::Limit(10));
// Respect SSL_CERT_FILE if environment variable exists
if let Ok(ssl_cert_file) = env::var("SSL_CERT_FILE") {
if Path::new(&ssl_cert_file).exists() {
// When file exists, use it. NIX_SSL_CERT_FILE will still override.
request = request.ssl_ca_certificate(CaCertificate::file(ssl_cert_file));
} else if env::var("outputHash").is_ok() {
// When file does not exist, assume we are downloading in a FOD and
// therefore do not need to check certificates, since the output is
// already hashed.
request = request.ssl_options(SslOption::DANGER_ACCEPT_INVALID_CERTS);
}
}
// Respect NIX_NPM_TOKENS environment variable, which should be a JSON mapping in the shape of:
// `{ "registry.example.com": "example-registry-bearer-token", ... }`
if let Some(host) = url.host_str() {
if let Ok(npm_tokens) = env::var("NIX_NPM_TOKENS") {
if let Ok(tokens) = serde_json::from_str::<Map<String, Value>>(&npm_tokens) {
if let Some(token) = tokens.get(host).and_then(serde_json::Value::as_str) {
info!("Found NPM token for {}. Adding authorization header to request.", host);
request = request.header("Authorization", format!("Bearer {token}"));
}
}
}
}
let res = request.body(())?.send()?;
if !res.status().is_success() {
if res.status().is_client_error() {
bail!("Client error: {}", res.status());
}
if res.status().is_server_error() {
bail!("Server error: {}", res.status());
}
bail!("{}", res.status());
}
Ok(res.into_body())
}
pub fn get_url_body_with_retry(url: &Url) -> Result<Vec<u8>, anyhow::Error> {
retry(ExponentialBackoff::default(), || {
get_url(url)
.and_then(|mut body| {
let mut buf = Vec::new();
body.read_to_end(&mut buf)?;
Ok(buf)
})
.map_err(|err| match err.downcast_ref::<isahc::Error>() {
Some(isahc_err) => {
if isahc_err.is_network() || isahc_err.is_timeout() {
backoff::Error::transient(err)
} else {
backoff::Error::permanent(err)
}
}
None => backoff::Error::permanent(err),
})
})
.map_err(|backoff_err| match backoff_err {
backoff::Error::Permanent(err)
| backoff::Error::Transient {
err,
retry_after: _,
} => err,
})
}
pub fn make_sri_hash(path: &Path) -> Result<String, NarError> {
let mut encoder = Encoder::new(path)?;
let mut hasher = Sha256::new();
io::copy(&mut encoder, &mut hasher)?;
Ok(format!("sha256-{}", BASE64.encode(&hasher.finalize())))
}