criterion/
lib.rs

1#![allow(unused)]
2//! A statistics-driven micro-benchmarking library written in Rust.
3//!
4//! This crate is a microbenchmarking library which aims to provide strong
5//! statistical confidence in detecting and estimating the size of performance
6//! improvements and regressions, while also being easy to use.
7//!
8//! See
9//! [the user guide](https://bheisler.github.io/criterion.rs/book/index.html)
10//! for examples as well as details on the measurement and analysis process,
11//! and the output.
12//!
13//! ## Features:
14//! * Collects detailed statistics, providing strong confidence that changes
15//!   to performance are real, not measurement noise.
16//! * Produces detailed charts, providing thorough understanding of your code's
17//!   performance behavior.
18//!
19//! ## Feature flags
20#![cfg_attr(feature = "document-features", doc = document_features::document_features!())]
21#![cfg_attr(docsrs, feature(doc_auto_cfg))]
22//!
23
24#![allow(clippy::style, clippy::complexity)]
25#![warn(bare_trait_objects)]
26#![cfg_attr(feature = "codspeed", allow(unused))]
27
28#[cfg(all(feature = "rayon", target_arch = "wasm32"))]
29compile_error!("Rayon cannot be used when targeting wasi32. Try disabling default features.");
30
31use serde::{Deserialize, Serialize};
32
33// Needs to be declared before other modules
34// in order to be usable there.
35#[macro_use]
36mod macros_private;
37#[macro_use]
38mod analysis;
39mod benchmark;
40#[macro_use]
41mod benchmark_group;
42#[cfg(feature = "codspeed")]
43#[macro_use]
44pub mod codspeed;
45pub mod async_executor;
46mod bencher;
47mod cli;
48mod connection;
49mod criterion;
50mod error;
51mod estimate;
52mod format;
53mod fs;
54mod kde;
55pub mod measurement;
56pub mod profiler;
57mod report;
58mod routine;
59mod stats;
60
61#[cfg(not(feature = "codspeed"))]
62#[macro_use]
63mod macros;
64#[cfg(feature = "codspeed")]
65#[macro_use]
66mod macros_codspeed;
67
68use std::{
69    default::Default,
70    env,
71    net::TcpStream,
72    path::PathBuf,
73    process::Command,
74    sync::{Mutex, OnceLock},
75    time::Duration,
76};
77
78#[cfg(feature = "async")]
79#[cfg(not(feature = "codspeed"))]
80pub use crate::bencher::AsyncBencher;
81#[cfg(not(feature = "codspeed"))]
82pub use crate::bencher::Bencher;
83#[cfg(not(feature = "codspeed"))]
84pub use crate::benchmark_group::{BenchmarkGroup, BenchmarkId};
85#[cfg(feature = "async")]
86#[cfg(feature = "codspeed")]
87pub use crate::codspeed::bencher::AsyncBencher;
88#[cfg(feature = "codspeed")]
89pub use crate::codspeed::bencher::Bencher;
90#[cfg(feature = "codspeed")]
91pub use crate::codspeed::benchmark_group::{BenchmarkGroup, BenchmarkId};
92#[cfg(feature = "codspeed")]
93pub use crate::codspeed::criterion::Criterion;
94#[cfg(not(feature = "codspeed"))]
95pub use crate::criterion::Criterion;
96use crate::{
97    benchmark::BenchmarkConfig,
98    connection::{Connection, OutgoingMessage},
99    measurement::{Measurement, WallTime},
100    profiler::{ExternalProfiler, Profiler},
101    report::{BencherReport, CliReport, CliVerbosity, Report, ReportContext, Reports},
102};
103
104fn cargo_criterion_connection() -> &'static Option<Mutex<Connection>> {
105    static CARGO_CRITERION_CONNECTION: OnceLock<Option<Mutex<Connection>>> = OnceLock::new();
106
107    CARGO_CRITERION_CONNECTION.get_or_init(|| match std::env::var("CARGO_CRITERION_PORT") {
108        Ok(port_str) => {
109            let port: u16 = port_str.parse().ok()?;
110            let stream = TcpStream::connect(("localhost", port)).ok()?;
111            Some(Mutex::new(Connection::new(stream).ok()?))
112        }
113        Err(_) => None,
114    })
115}
116
117fn default_output_directory() -> &'static PathBuf {
118    static DEFAULT_OUTPUT_DIRECTORY: OnceLock<PathBuf> = OnceLock::new();
119
120    DEFAULT_OUTPUT_DIRECTORY.get_or_init(|| {
121        // Set criterion home to (in descending order of preference):
122        // - $CRITERION_HOME (cargo-criterion sets this, but other users could as well)
123        // - $CARGO_TARGET_DIR/criterion
124        // - the cargo target dir from `cargo metadata`
125        // - ./target/criterion
126        if let Some(value) = env::var_os("CRITERION_HOME") {
127            PathBuf::from(value)
128        } else if let Some(path) = cargo_target_directory() {
129            path.join("criterion")
130        } else {
131            PathBuf::from("target/criterion")
132        }
133    })
134}
135
136fn debug_enabled() -> bool {
137    static DEBUG_ENABLED: OnceLock<bool> = OnceLock::new();
138
139    *DEBUG_ENABLED.get_or_init(|| std::env::var_os("CRITERION_DEBUG").is_some())
140}
141
142/// Reexport of [std::hint::black_box].
143#[inline]
144pub fn black_box<T>(dummy: T) -> T {
145    std::hint::black_box(dummy)
146}
147
148/// Argument to [`Bencher::iter_batched`] and [`Bencher::iter_batched_ref`] which controls the
149/// batch size.
150///
151/// Generally speaking, almost all benchmarks should use `SmallInput`. If the input or the result
152/// of the benchmark routine is large enough that `SmallInput` causes out-of-memory errors,
153/// `LargeInput` can be used to reduce memory usage at the cost of increasing the measurement
154/// overhead. If the input or the result is extremely large (or if it holds some
155/// limited external resource like a file handle), `PerIteration` will set the number of iterations
156/// per batch to exactly one. `PerIteration` can increase the measurement overhead substantially
157/// and should be avoided wherever possible.
158///
159/// Each value lists an estimate of the measurement overhead. This is intended as a rough guide
160/// to assist in choosing an option, it should not be relied upon. In particular, it is not valid
161/// to subtract the listed overhead from the measurement and assume that the result represents the
162/// true runtime of a function. The actual measurement overhead for your specific benchmark depends
163/// on the details of the function you're benchmarking and the hardware and operating
164/// system running the benchmark.
165///
166/// With that said, if the runtime of your function is small relative to the measurement overhead
167/// it will be difficult to take accurate measurements. In this situation, the best option is to use
168/// [`Bencher::iter`] which has next-to-zero measurement overhead.
169#[derive(Debug, Eq, PartialEq, Copy, Hash, Clone)]
170pub enum BatchSize {
171    /// `SmallInput` indicates that the input to the benchmark routine (the value returned from
172    /// the setup routine) is small enough that millions of values can be safely held in memory.
173    /// Always prefer `SmallInput` unless the benchmark is using too much memory.
174    ///
175    /// In testing, the maximum measurement overhead from benchmarking with `SmallInput` is on the
176    /// order of 500 picoseconds. This is presented as a rough guide; your results may vary.
177    SmallInput,
178
179    /// `LargeInput` indicates that the input to the benchmark routine or the value returned from
180    /// that routine is large. This will reduce the memory usage but increase the measurement
181    /// overhead.
182    ///
183    /// In testing, the maximum measurement overhead from benchmarking with `LargeInput` is on the
184    /// order of 750 picoseconds. This is presented as a rough guide; your results may vary.
185    LargeInput,
186
187    /// `PerIteration` indicates that the input to the benchmark routine or the value returned from
188    /// that routine is extremely large or holds some limited resource, such that holding many values
189    /// in memory at once is infeasible. This provides the worst measurement overhead, but the
190    /// lowest memory usage.
191    ///
192    /// In testing, the maximum measurement overhead from benchmarking with `PerIteration` is on the
193    /// order of 350 nanoseconds or 350,000 picoseconds. This is presented as a rough guide; your
194    /// results may vary.
195    PerIteration,
196
197    /// `NumBatches` will attempt to divide the iterations up into a given number of batches.
198    /// A larger number of batches (and thus smaller batches) will reduce memory usage but increase
199    /// measurement overhead. This allows the user to choose their own tradeoff between memory usage
200    /// and measurement overhead, but care must be taken in tuning the number of batches. Most
201    /// benchmarks should use `SmallInput` or `LargeInput` instead.
202    NumBatches(u64),
203
204    /// `NumIterations` fixes the batch size to a constant number, specified by the user. This
205    /// allows the user to choose their own tradeoff between overhead and memory usage, but care must
206    /// be taken in tuning the batch size. In general, the measurement overhead of `NumIterations`
207    /// will be larger than that of `NumBatches`. Most benchmarks should use `SmallInput` or
208    /// `LargeInput` instead.
209    NumIterations(u64),
210
211    #[doc(hidden)]
212    __NonExhaustive,
213}
214impl BatchSize {
215    /// Convert to a number of iterations per batch.
216    ///
217    /// We try to do a constant number of batches regardless of the number of iterations in this
218    /// sample. If the measurement overhead is roughly constant regardless of the number of
219    /// iterations the analysis of the results later will have an easier time separating the
220    /// measurement overhead from the benchmark time.
221    fn iters_per_batch(self, iters: u64) -> u64 {
222        match self {
223            BatchSize::SmallInput => (iters + 10 - 1) / 10,
224            BatchSize::LargeInput => (iters + 1000 - 1) / 1000,
225            BatchSize::PerIteration => 1,
226            BatchSize::NumBatches(batches) => (iters + batches - 1) / batches,
227            BatchSize::NumIterations(size) => size,
228            BatchSize::__NonExhaustive => panic!("__NonExhaustive is not a valid BatchSize."),
229        }
230    }
231}
232
233/// Baseline describes how the `baseline_directory` is handled.
234#[derive(Debug, Clone, Copy)]
235pub enum Baseline {
236    /// `CompareLenient` compares against a previous saved version of the baseline.
237    /// If a previous baseline does not exist, the benchmark is run as normal but no comparison occurs.
238    CompareLenient,
239    /// `CompareStrict` compares against a previous saved version of the baseline.
240    /// If a previous baseline does not exist, a panic occurs.
241    CompareStrict,
242    /// `Save` writes the benchmark results to the baseline directory,
243    /// overwriting any results that were previously there.
244    Save,
245    /// `Discard` benchmark results.
246    Discard,
247}
248
249#[derive(Debug, Clone)]
250/// Enum representing the execution mode.
251pub(crate) enum Mode {
252    /// Run benchmarks normally.
253    Benchmark,
254    /// List all benchmarks but do not run them.
255    List(ListFormat),
256    /// Run benchmarks once to verify that they work, but otherwise do not measure them.
257    Test,
258    /// Iterate benchmarks for a given length of time but do not analyze or report on them.
259    Profile(Duration),
260}
261impl Mode {
262    pub fn is_benchmark(&self) -> bool {
263        matches!(self, Mode::Benchmark)
264    }
265
266    pub fn is_terse(&self) -> bool {
267        matches!(self, Mode::List(ListFormat::Terse))
268    }
269}
270
271#[derive(Debug, Clone, Copy)]
272/// Enum representing the list format.
273pub(crate) enum ListFormat {
274    /// The regular, default format.
275    Pretty,
276    /// The terse format, where nothing other than the name of the test and ": benchmark" at the end
277    /// is printed out.
278    Terse,
279}
280
281impl Default for ListFormat {
282    fn default() -> Self {
283        Self::Pretty
284    }
285}
286
287/// Benchmark filtering support.
288#[derive(Clone, Debug)]
289pub enum BenchmarkFilter {
290    /// Run all benchmarks.
291    AcceptAll,
292    /// Run the benchmark matching this string exactly.
293    Exact(String),
294    /// Look for benchmark that contain substring
295    Substring(String),
296    /// Do not run any benchmarks.
297    RejectAll,
298}
299
300/// Returns the Cargo target directory, possibly calling `cargo metadata` to
301/// figure it out.
302fn cargo_target_directory() -> Option<PathBuf> {
303    #[derive(Deserialize)]
304    struct Metadata {
305        target_directory: PathBuf,
306    }
307
308    env::var_os("CARGO_TARGET_DIR").map(PathBuf::from).or_else(|| {
309        let output = Command::new(env::var_os("CARGO")?)
310            .args(["metadata", "--format-version", "1"])
311            .output()
312            .ok()?;
313        let metadata: Metadata = serde_json::from_slice(&output.stdout).ok()?;
314        Some(metadata.target_directory)
315    })
316}
317
318/// Enum representing different ways of measuring the throughput of benchmarked code.
319/// If the throughput setting is configured for a benchmark then the estimated throughput will
320/// be reported as well as the time per iteration.
321// TODO: Remove serialize/deserialize from the public API.
322#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
323pub enum Throughput {
324    /// Measure throughput in terms of bytes/second. The value should be the number of bytes
325    /// processed by one iteration of the benchmarked code. Typically, this would be the length of
326    /// an input string or `&[u8]`.
327    Bytes(u64),
328
329    /// Equivalent to Bytes, but the value will be reported in terms of
330    /// kilobytes (1000 bytes) per second instead of kibibytes (1024 bytes) per
331    /// second, megabytes instead of mibibytes, and gigabytes instead of gibibytes.
332    BytesDecimal(u64),
333
334    /// Measure throughput in terms of elements/second. The value should be the number of elements
335    /// processed by one iteration of the benchmarked code. Typically, this would be the size of a
336    /// collection, but could also be the number of lines of input text or the number of values to
337    /// parse.
338    Elements(u64),
339}
340
341/// Axis scaling type
342#[derive(Debug, Clone, Copy)]
343pub enum AxisScale {
344    /// Axes scale linearly
345    Linear,
346
347    /// Axes scale logarithmically
348    Logarithmic,
349}
350
351/// This enum allows the user to control how Criterion.rs chooses the iteration count when sampling.
352/// The default is Auto, which will choose a method automatically based on the iteration time during
353/// the warm-up phase.
354#[derive(Debug, Clone, Copy)]
355pub enum SamplingMode {
356    /// Criterion.rs should choose a sampling method automatically. This is the default, and is
357    /// recommended for most users and most benchmarks.
358    Auto,
359
360    /// Scale the iteration count in each sample linearly. This is suitable for most benchmarks,
361    /// but it tends to require many iterations which can make it very slow for very long benchmarks.
362    Linear,
363
364    /// Keep the iteration count the same for all samples. This is not recommended, as it affects
365    /// the statistics that Criterion.rs can compute. However, it requires fewer iterations than
366    /// the Linear method and therefore is more suitable for very long-running benchmarks where
367    /// benchmark execution time is more of a problem and statistical precision is less important.
368    Flat,
369}
370impl SamplingMode {
371    pub(crate) fn choose_sampling_mode(
372        &self,
373        warmup_mean_execution_time: f64,
374        sample_count: u64,
375        target_time: f64,
376    ) -> ActualSamplingMode {
377        match self {
378            SamplingMode::Linear => ActualSamplingMode::Linear,
379            SamplingMode::Flat => ActualSamplingMode::Flat,
380            SamplingMode::Auto => {
381                // Estimate execution time with linear sampling
382                let total_runs = sample_count * (sample_count + 1) / 2;
383                let d =
384                    (target_time / warmup_mean_execution_time / total_runs as f64).ceil() as u64;
385                let expected_ns = total_runs as f64 * d as f64 * warmup_mean_execution_time;
386
387                if expected_ns > (2.0 * target_time) {
388                    ActualSamplingMode::Flat
389                } else {
390                    ActualSamplingMode::Linear
391                }
392            }
393        }
394    }
395}
396
397/// Enum to represent the sampling mode without Auto.
398#[derive(Debug, Clone, Copy, Serialize, Deserialize)]
399pub(crate) enum ActualSamplingMode {
400    Linear,
401    Flat,
402}
403impl ActualSamplingMode {
404    pub(crate) fn iteration_counts(
405        &self,
406        warmup_mean_execution_time: f64,
407        sample_count: u64,
408        target_time: &Duration,
409    ) -> Vec<u64> {
410        match self {
411            ActualSamplingMode::Linear => {
412                let n = sample_count;
413                let met = warmup_mean_execution_time;
414                let m_ns = target_time.as_nanos();
415                // Solve: [d + 2*d + 3*d + ... + n*d] * met = m_ns
416                let total_runs = n * (n + 1) / 2;
417                let d = ((m_ns as f64 / met / total_runs as f64).ceil() as u64).max(1);
418                let expected_ns = total_runs as f64 * d as f64 * met;
419
420                if d == 1 {
421                    let recommended_sample_size =
422                        ActualSamplingMode::recommend_linear_sample_size(m_ns as f64, met);
423                    let actual_time = Duration::from_nanos(expected_ns as u64);
424                    eprint!(
425                        "\nWarning: Unable to complete {} samples in {:.1?}. You may wish to increase target time to {:.1?}",
426                        n, target_time, actual_time
427                    );
428
429                    if recommended_sample_size != n {
430                        eprintln!(
431                            ", enable flat sampling, or reduce sample count to {}.",
432                            recommended_sample_size
433                        );
434                    } else {
435                        eprintln!(" or enable flat sampling.");
436                    }
437                }
438
439                (1..(n + 1)).map(|a| a * d).collect::<Vec<u64>>()
440            }
441            ActualSamplingMode::Flat => {
442                let n = sample_count;
443                let met = warmup_mean_execution_time;
444                let m_ns = target_time.as_nanos() as f64;
445                let time_per_sample = m_ns / (n as f64);
446                // This is pretty simplistic; we could do something smarter to fit into the allotted time.
447                let iterations_per_sample = ((time_per_sample / met).ceil() as u64).max(1);
448
449                let expected_ns = met * (iterations_per_sample * n) as f64;
450
451                if iterations_per_sample == 1 {
452                    let recommended_sample_size =
453                        ActualSamplingMode::recommend_flat_sample_size(m_ns, met);
454                    let actual_time = Duration::from_nanos(expected_ns as u64);
455                    eprint!(
456                        "\nWarning: Unable to complete {} samples in {:.1?}. You may wish to increase target time to {:.1?}",
457                        n, target_time, actual_time
458                    );
459
460                    if recommended_sample_size != n {
461                        eprintln!(", or reduce sample count to {}.", recommended_sample_size);
462                    } else {
463                        eprintln!(".");
464                    }
465                }
466
467                vec![iterations_per_sample; n as usize]
468            }
469        }
470    }
471
472    fn is_linear(&self) -> bool {
473        matches!(self, ActualSamplingMode::Linear)
474    }
475
476    fn recommend_linear_sample_size(target_time: f64, met: f64) -> u64 {
477        // Some math shows that n(n+1)/2 * d * met = target_time. d = 1, so it can be ignored.
478        // This leaves n(n+1) = (2*target_time)/met, or n^2 + n - (2*target_time)/met = 0
479        // Which can be solved with the quadratic formula. Since A and B are constant 1,
480        // this simplifies to sample_size = (-1 +- sqrt(1 - 4C))/2, where C = (2*target_time)/met.
481        // We don't care about the negative solution. Experimentation shows that this actually tends to
482        // result in twice the desired execution time (probably because of the ceil used to calculate
483        // d) so instead I use c = target_time/met.
484        let c = target_time / met;
485        let sample_size = (-1.0 + (4.0 * c).sqrt()) / 2.0;
486        let sample_size = sample_size as u64;
487
488        // Round down to the nearest 10 to give a margin and avoid excessive precision
489        let sample_size = (sample_size / 10) * 10;
490
491        // Clamp it to be at least 10, since criterion.rs doesn't allow sample sizes smaller than 10.
492        if sample_size < 10 { 10 } else { sample_size }
493    }
494
495    fn recommend_flat_sample_size(target_time: f64, met: f64) -> u64 {
496        let sample_size = (target_time / met) as u64;
497
498        // Round down to the nearest 10 to give a margin and avoid excessive precision
499        let sample_size = (sample_size / 10) * 10;
500
501        // Clamp it to be at least 10, since criterion.rs doesn't allow sample sizes smaller than 10.
502        if sample_size < 10 { 10 } else { sample_size }
503    }
504}
505
506#[derive(Debug, Serialize, Deserialize)]
507pub(crate) struct SavedSample {
508    sampling_mode: ActualSamplingMode,
509    iters: Vec<f64>,
510    times: Vec<f64>,
511}
512
513/// Custom-test-framework runner. Should not be called directly.
514#[doc(hidden)]
515pub fn runner(benches: &[&dyn Fn()]) {
516    for bench in benches {
517        bench();
518    }
519    crate::criterion::Criterion::default().configure_from_args().final_summary();
520}