From 0186822af25810f647a577ae8dcdb32a58b126a4 Mon Sep 17 00:00:00 2001 From: wcampbell Date: Tue, 3 Sep 2024 21:57:55 -0400 Subject: [PATCH] Fix benchmark, add more inlines * These benchmarks were not run before the last release, but due to recent stable rustc versions the benchmarks were more in the 7ms range. This moves the needle back to acceptable levels by forcing more functions to be inlined. --- CHANGELOG.md | 3 +++ README.md | 6 +++--- benches/demod_benchmark.rs | 42 +++++++------------------------------- src/crc.rs | 1 + src/demod_2400.rs | 7 +++++++ src/mode_s/mod.rs | 3 ++- 6 files changed, 23 insertions(+), 39 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index d19d7eb..d6d272b 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -6,6 +6,9 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## Unreleased +## [v0.8.1] 2024-09-03 +- Restore performance for recent rustc versions, force more functions to be inlined [!135](https://github.com/rsadsb/dump1090_rs/pull/135) + ## [v0.8.0] 2024-09-02 - Update MSRV to `1.74` to [!130](https://github.com/rsadsb/dump1090_rs/pull/130) - Properly decode short ADSB messages, thanks [@gariac](https://github.com/gariac) for finding. [!130](https://github.com/rsadsb/dump1090_rs/pull/130) diff --git a/README.md b/README.md index a7e9f71..6043503 100644 --- a/README.md +++ b/README.md @@ -104,9 +104,9 @@ Reading from a 512KB iq sample to ADS-B bytes takes ~3.0 ms, but feel free to ru ### Intel i7-7700K CPU @ 4.20GHz ``` -01 time: [2.9850 ms 2.9859 ms 2.9868 ms] -02 time: [2.9260 ms 2.9279 ms 2.9308 ms] -03 time: [2.8449 ms 2.8525 ms 2.8653 ms] +01 time: [3.6691 ms 3.6950 ms 3.7264 ms] +02 time: [3.5941 ms 3.5987 ms 3.6040 ms] +03 time: [3.4930 ms 3.4961 ms 3.4994 ms] ``` # Changes diff --git a/benches/demod_benchmark.rs b/benches/demod_benchmark.rs index 57c5182..653ed5c 100644 --- a/benches/demod_benchmark.rs +++ b/benches/demod_benchmark.rs @@ -1,56 +1,28 @@ -// third-party -use assert_hex::assert_eq_hex; +use std::hint::black_box; + use criterion::{criterion_group, criterion_main, Criterion}; -use hexlit::hex; -// crate use libdump1090_rs::{demod_2400::demodulate2400, icao_filter::icao_flush, utils}; use num_complex::Complex; -fn routine(data: [Complex; 0x20000], expected_data: &Vec>) { +fn routine(data: [Complex; 0x20000]) { // make sure icao starts in a deterministic position icao_flush(); let outbuf = utils::to_mag(&data); - let data = demodulate2400(&outbuf).unwrap(); - for (a, b) in data.iter().zip(expected_data.iter()) { - assert_eq_hex!(a.buffer(), *b); - } + let _ = black_box(demodulate2400(&outbuf).unwrap()); } fn criterion_benchmark(c: &mut Criterion) { let filename = "test_iq/test_1641427457780.iq"; let data_01 = utils::read_test_data(filename); - let expected_data_01 = Vec::from([ - hex!("8dad929358b9c6273f002169c02e").to_vec(), - hex!("8daa2bc4f82100020049b8db9449").to_vec(), - hex!("8daa2bc4f82100020049b8db9449").to_vec(), - hex!("02e1971ce17c84").to_vec(), - hex!("8da0aaa058bf163fcf860013e840").to_vec(), - ]); let filename = "test_iq/test_1641428165033.iq"; let data_02 = utils::read_test_data(filename); - let expected_data_02 = Vec::from([ - hex!("8da79de99909932f780c9e2f2f8f").to_vec(), - hex!("8dac04d358a7820a86ac3709e689").to_vec(), - hex!("8dac04d3ea4288669b5c082751d4").to_vec(), - hex!("8da79de958bdf59c85104874adad").to_vec(), - hex!("5dad92936265f5").to_vec(), - hex!("5dad92936265f525be017735997b").to_vec(), - ]); let filename = "test_iq/test_1641428106243.iq"; let data_03 = utils::read_test_data(filename); - let expected_data_03 = Vec::from([ - hex!("8da8aac8990c30b51808aa24e573").to_vec(), - hex!("02e19838bff1d9").to_vec(), - hex!("8dada6b9990cf61e4848af2a8656").to_vec(), - hex!("8da4ba025885462008fa0a4a6eb2").to_vec(), - hex!("8da4ba025885462008fa0a4a6eb2").to_vec(), - hex!("8da4ba0299115f301074a72db6ff").to_vec(), - ]); - c.bench_function("01", |b| b.iter(|| routine(data_01, &expected_data_01))); - c.bench_function("02", |b| b.iter(|| routine(data_02, &expected_data_02))); - c.bench_function("03", |b| b.iter(|| routine(data_03, &expected_data_03))); + c.bench_function("01", |b| b.iter(|| routine(data_01))); + c.bench_function("02", |b| b.iter(|| routine(data_02))); + c.bench_function("03", |b| b.iter(|| routine(data_03))); } criterion_group!(benches, criterion_benchmark); diff --git a/src/crc.rs b/src/crc.rs index 329f4c6..329c639 100644 --- a/src/crc.rs +++ b/src/crc.rs @@ -259,6 +259,7 @@ pub const CRC_TABLE: [u32; 256] = [ 0x00fa_0480, ]; +#[inline] pub fn modes_checksum(message: &[u8], bits: usize) -> u32 { let mut rem: u32 = 0; let n = bits / 8; diff --git a/src/demod_2400.rs b/src/demod_2400.rs index 17a4c41..a9b013c 100644 --- a/src/demod_2400.rs +++ b/src/demod_2400.rs @@ -19,6 +19,7 @@ enum Phase { } impl From for Phase { + #[inline(always)] fn from(num: usize) -> Self { match num % 5 { 0 => Self::Zero, @@ -33,6 +34,7 @@ impl From for Phase { impl Phase { /// Increment from 0..4 for incrementing the starting phase + #[inline(always)] fn next_start(self) -> Self { match self { Self::Zero => Self::One, @@ -44,6 +46,7 @@ impl Phase { } /// Increment by expected next phase transition for bit denoting + #[inline(always)] fn next(self) -> Self { match self { Self::Zero => Self::Two, @@ -55,6 +58,7 @@ impl Phase { } /// Amount of mag indexs used, for adding to the next start index + #[inline(always)] fn increment_index(self, index: usize) -> usize { index + match self { @@ -98,6 +102,7 @@ pub struct ModeSMessage { } impl ModeSMessage { + #[inline(always)] pub fn buffer(&self) -> &[u8] { match self.msglen { MsgLen::Short => &self.msg[..MODES_SHORT_MSG_BYTES], @@ -106,6 +111,7 @@ impl ModeSMessage { } } +#[inline(always)] pub fn demodulate2400(mag: &MagnitudeBuffer) -> Result, &'static str> { let mut results = vec![]; @@ -205,6 +211,7 @@ pub fn demodulate2400(mag: &MagnitudeBuffer) -> Result, &'stat Ok(results) } +#[inline(always)] fn check_preamble(preamble: &[u16]) -> Option<(i32, u32, u32)> { // This gets rid of the 3 core::panicking::panic_bounds_check calls, // but doesn't look to improve performance diff --git a/src/mode_s/mod.rs b/src/mode_s/mod.rs index 0b58f4a..cd128a0 100644 --- a/src/mode_s/mod.rs +++ b/src/mode_s/mod.rs @@ -10,6 +10,7 @@ use super::{crc::modes_checksum, icao_filter::icao_filter_test}; // mode_s.c:215 #[must_use] +#[inline(always)] pub fn getbits(data: &[u8], firstbit_1idx: usize, lastbit_1idx: usize) -> usize { let mut ans: usize = 0; @@ -33,7 +34,7 @@ pub fn getbits(data: &[u8], firstbit_1idx: usize, lastbit_1idx: usize) -> usize pub fn score_modes_message(msg: &[u8]) -> Option<(MsgLen, i32)> { let validbits = msg.len() * 8; - if validbits < 56 { + if validbits < MODES_SHORT_MSG_BYTES * 8 { return None; }