From 44dd11cd4cde06f184001de02b6a2830b697ab3e Mon Sep 17 00:00:00 2001 From: Hana Date: Fri, 16 Aug 2024 04:25:16 +0800 Subject: [PATCH] perf: cost-free conversion from paths to `&str` (#93) Background: When I was migrating `PathBuf` to `Utf8PathBuf`, etc, I found out some regression in our benchmarks. Then I found out `as_str` is actually not cost-free as in the older version of rustc there's no way to get the underlying bytes out of an `OsStr` until 1.74.0. In this PR, with the help of [`OsStr::as_encoded_bytes`](https://github.com/rust-lang/rust/pull/115443) was stabilized in 1.74.0, We can perform a cost-free conversion from `&OsStr` to `&str` with constraint of it's underlying bytes are `UTF-8` encoded. Benchmark: With the benchmark included in the PR, the time cost is a constant now. Result: ``` // String length of 10 osstr to_str/10 time: [5.9769 ns 5.9913 ns 6.0060 ns] osstr as_encoded_bytes/10 time: [554.90 ps 558.32 ps 562.19 ps] // String length of 100 osstr to_str/100 time: [6.6113 ns 6.6250 ns 6.6404 ns] osstr as_encoded_bytes/100 time: [553.18 ps 557.33 ps 561.68 ps] // String length of 1000 osstr to_str/1000 time: [26.990 ns 27.033 ns 27.086 ns] osstr as_encoded_bytes/1000 time: [553.66 ps 560.67 ps 570.42 ps] // String length of 10000 osstr to_str/10000 time: [310.17 ns 310.77 ns 311.32 ns] osstr as_encoded_bytes/10000 time: [550.98 ps 555.16 ps 559.53 ps] ``` --- .github/workflows/ci.yml | 19 +++++++++++++++++-- Cargo.toml | 2 +- build.rs | 6 ++++++ camino-examples/.gitignore | 1 + camino-examples/Cargo.toml | 12 ++++++++++++ camino-examples/benches/bench.rs | 24 ++++++++++++++++++++++++ src/lib.rs | 25 +++++++++++++++++++------ 7 files changed, 80 insertions(+), 9 deletions(-) create mode 100644 camino-examples/.gitignore create mode 100644 camino-examples/benches/bench.rs diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 59f576ea84..c9cc6994ca 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -26,8 +26,14 @@ jobs: components: rustfmt, clippy - name: Lint (clippy) run: cargo clippy --workspace --all-features --all-targets + - name: Lint (clippy in camino-examples) + run: | + cd camino-examples && cargo clippy --all-features --all-targets - name: Lint (rustfmt) run: cargo xfmt --check + - name: Lint (rustfmt in camino-examples) + run: | + cd camino-examples && cargo xfmt --check - name: Check for differences run: git diff --exit-code @@ -51,6 +57,7 @@ jobs: - 1.63 - nightly-2022-12-14 - 1.68 + - 1.74 - stable exclude: # These versions started failing with "archive member 'lib.rmeta' with length 26456 is not @@ -89,11 +96,19 @@ jobs: - name: Build all targets with all features # Some optional features are not compatible with earlier versions if: ${{ matrix.rust-version == 'stable' }} - run: cargo hack --feature-powerset build --workspace + run: cargo hack --feature-powerset build --workspace --all-targets - name: Test all targets with all features # Some optional features are not compatible with earlier versions if: ${{ matrix.rust-version == 'stable' }} - run: cargo hack --feature-powerset test --workspace + run: cargo hack --feature-powerset test --workspace --all-targets + - name: Build camino-examples + if: ${{ matrix.rust-version == 'stable' }} + run: | + cd camino-examples && cargo build + - name: Test camino-examples + if: ${{ matrix.rust-version == 'stable' }} + run: | + cd camino-examples && cargo test miri: name: Check unsafe code against miri diff --git a/Cargo.toml b/Cargo.toml index e2c1403045..c5f261384d 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,5 +1,5 @@ [workspace] -members = [".", "camino-examples"] +members = ["."] [package] name = "camino" diff --git a/build.rs b/build.rs index 25cb382496..23e300d1c0 100644 --- a/build.rs +++ b/build.rs @@ -18,6 +18,7 @@ fn main() { println!("cargo:rustc-check-cfg=cfg(path_buf_capacity)"); println!("cargo:rustc-check-cfg=cfg(shrink_to)"); println!("cargo:rustc-check-cfg=cfg(try_reserve_2)"); + println!("cargo:rustc-check-cfg=cfg(os_str_bytes)"); let compiler = match rustc_version() { Some(compiler) => compiler, @@ -49,6 +50,11 @@ fn main() { { println!("cargo:rustc-cfg=path_buf_deref_mut"); } + // os_str_bytes was added in a 1.74 stable. + if (compiler.minor >= 74 && compiler.channel == ReleaseChannel::Stable) || compiler.minor >= 75 + { + println!("cargo:rustc-cfg=os_str_bytes"); + } // Catch situations where the actual features aren't enabled. Currently, they're only shown with // `-vv` output, but maybe that will be noticed. diff --git a/camino-examples/.gitignore b/camino-examples/.gitignore new file mode 100644 index 0000000000..ea8c4bf7f3 --- /dev/null +++ b/camino-examples/.gitignore @@ -0,0 +1 @@ +/target diff --git a/camino-examples/Cargo.toml b/camino-examples/Cargo.toml index 58b7845c3b..2c9a07ae56 100644 --- a/camino-examples/Cargo.toml +++ b/camino-examples/Cargo.toml @@ -1,3 +1,8 @@ +[workspace] +# camino-examples pulls in crates that aren't supported by old versions of Rust, so make it its own +# workspace. +members = ["."] + [package] name = "camino-examples" description = "Examples for camino" @@ -12,5 +17,12 @@ clap = { version = "3.0.7", features = ["derive"] } serde = { version = "1", features = ["derive"] } serde_json = { version = "1.0.62" } +[dev-dependencies] +criterion = "0.5.1" + [[bin]] name = "serde" + +[[bench]] +name = "bench" +harness = false diff --git a/camino-examples/benches/bench.rs b/camino-examples/benches/bench.rs new file mode 100644 index 0000000000..1c924f1b86 --- /dev/null +++ b/camino-examples/benches/bench.rs @@ -0,0 +1,24 @@ +// Copyright (c) The camino Contributors +// SPDX-License-Identifier: MIT OR Apache-2.0 + +// This benchmark is here because criterion has a higher MSRV than camino -- camino-examples is only +// tested on stable, which is good enough. + +use camino::Utf8PathBuf; +use criterion::*; + +fn bench_path(c: &mut Criterion) { + let mut group = c.benchmark_group("Path"); + for i in [10, 100, 1000, 10000] { + let p = "i".repeat(i); + let buf = Utf8PathBuf::from(black_box(p)); + group.bench_with_input(BenchmarkId::new("Utf8PathBuf::as_str", i), &buf, |b, i| { + b.iter(|| { + let _ = black_box(&i).as_str(); + }) + }); + } +} + +criterion_group!(benches, bench_path); +criterion_main!(benches); diff --git a/src/lib.rs b/src/lib.rs index e9a115f20c..7d510bdf7d 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -701,10 +701,10 @@ impl Utf8Path { /// the current directory. /// /// * On Unix, a path is absolute if it starts with the root, so - /// `is_absolute` and [`has_root`] are equivalent. + /// `is_absolute` and [`has_root`] are equivalent. /// /// * On Windows, a path is absolute if it has a prefix and starts with the - /// root: `c:\windows` is absolute, while `c:temp` and `\temp` are not. + /// root: `c:\windows` is absolute, while `c:temp` and `\temp` are not. /// /// # Examples /// @@ -3067,9 +3067,22 @@ impl_cmp_os_str!(&'a Utf8Path, OsString); // invariant: OsStr must be guaranteed to be utf8 data #[inline] unsafe fn str_assume_utf8(string: &OsStr) -> &str { - // Adapted from the source code for Option::unwrap_unchecked. - match string.to_str() { - Some(val) => val, - None => std::hint::unreachable_unchecked(), + #[cfg(os_str_bytes)] + { + // SAFETY: OsStr is guaranteed to be utf8 data from the invariant + unsafe { + std::str::from_utf8_unchecked( + #[allow(clippy::incompatible_msrv)] + string.as_encoded_bytes(), + ) + } + } + #[cfg(not(os_str_bytes))] + { + // Adapted from the source code for Option::unwrap_unchecked. + match string.to_str() { + Some(val) => val, + None => std::hint::unreachable_unchecked(), + } } }