criterion/lib.rs
1#![allow(unused)]
2//! A statistics-driven micro-benchmarking library written in Rust.
3//!
4//! This crate is a microbenchmarking library which aims to provide strong
5//! statistical confidence in detecting and estimating the size of performance
6//! improvements and regressions, while also being easy to use.
7//!
8//! See
9//! [the user guide](https://bheisler.github.io/criterion.rs/book/index.html)
10//! for examples as well as details on the measurement and analysis process,
11//! and the output.
12//!
13//! ## Features:
14//! * Collects detailed statistics, providing strong confidence that changes
15//! to performance are real, not measurement noise.
16//! * Produces detailed charts, providing thorough understanding of your code's
17//! performance behavior.
18//!
19//! ## Feature flags
20#![cfg_attr(feature = "document-features", doc = document_features::document_features!())]
21#![cfg_attr(docsrs, feature(doc_auto_cfg))]
22//!
23
24#![allow(clippy::style, clippy::complexity)]
25#![warn(bare_trait_objects)]
26#![cfg_attr(feature = "codspeed", allow(unused))]
27
28#[cfg(all(feature = "rayon", target_arch = "wasm32"))]
29compile_error!("Rayon cannot be used when targeting wasi32. Try disabling default features.");
30
31use serde::{Deserialize, Serialize};
32
33// Needs to be declared before other modules
34// in order to be usable there.
35#[macro_use]
36mod macros_private;
37#[macro_use]
38mod analysis;
39mod benchmark;
40#[macro_use]
41mod benchmark_group;
42#[cfg(feature = "codspeed")]
43#[macro_use]
44pub mod codspeed;
45pub mod async_executor;
46mod bencher;
47mod cli;
48mod connection;
49mod criterion;
50mod error;
51mod estimate;
52mod format;
53mod fs;
54mod kde;
55pub mod measurement;
56pub mod profiler;
57mod report;
58mod routine;
59mod stats;
60
61#[cfg(not(feature = "codspeed"))]
62#[macro_use]
63mod macros;
64#[cfg(feature = "codspeed")]
65#[macro_use]
66mod macros_codspeed;
67
68use std::{
69 default::Default,
70 env,
71 net::TcpStream,
72 path::PathBuf,
73 process::Command,
74 sync::{Mutex, OnceLock},
75 time::Duration,
76};
77
78#[cfg(feature = "async")]
79#[cfg(not(feature = "codspeed"))]
80pub use crate::bencher::AsyncBencher;
81#[cfg(not(feature = "codspeed"))]
82pub use crate::bencher::Bencher;
83#[cfg(not(feature = "codspeed"))]
84pub use crate::benchmark_group::{BenchmarkGroup, BenchmarkId};
85#[cfg(feature = "async")]
86#[cfg(feature = "codspeed")]
87pub use crate::codspeed::bencher::AsyncBencher;
88#[cfg(feature = "codspeed")]
89pub use crate::codspeed::bencher::Bencher;
90#[cfg(feature = "codspeed")]
91pub use crate::codspeed::benchmark_group::{BenchmarkGroup, BenchmarkId};
92#[cfg(feature = "codspeed")]
93pub use crate::codspeed::criterion::Criterion;
94#[cfg(not(feature = "codspeed"))]
95pub use crate::criterion::Criterion;
96use crate::{
97 benchmark::BenchmarkConfig,
98 connection::{Connection, OutgoingMessage},
99 measurement::{Measurement, WallTime},
100 profiler::{ExternalProfiler, Profiler},
101 report::{BencherReport, CliReport, CliVerbosity, Report, ReportContext, Reports},
102};
103
104fn cargo_criterion_connection() -> &'static Option<Mutex<Connection>> {
105 static CARGO_CRITERION_CONNECTION: OnceLock<Option<Mutex<Connection>>> = OnceLock::new();
106
107 CARGO_CRITERION_CONNECTION.get_or_init(|| match std::env::var("CARGO_CRITERION_PORT") {
108 Ok(port_str) => {
109 let port: u16 = port_str.parse().ok()?;
110 let stream = TcpStream::connect(("localhost", port)).ok()?;
111 Some(Mutex::new(Connection::new(stream).ok()?))
112 }
113 Err(_) => None,
114 })
115}
116
117fn default_output_directory() -> &'static PathBuf {
118 static DEFAULT_OUTPUT_DIRECTORY: OnceLock<PathBuf> = OnceLock::new();
119
120 DEFAULT_OUTPUT_DIRECTORY.get_or_init(|| {
121 // Set criterion home to (in descending order of preference):
122 // - $CRITERION_HOME (cargo-criterion sets this, but other users could as well)
123 // - $CARGO_TARGET_DIR/criterion
124 // - the cargo target dir from `cargo metadata`
125 // - ./target/criterion
126 if let Some(value) = env::var_os("CRITERION_HOME") {
127 PathBuf::from(value)
128 } else if let Some(path) = cargo_target_directory() {
129 path.join("criterion")
130 } else {
131 PathBuf::from("target/criterion")
132 }
133 })
134}
135
136fn debug_enabled() -> bool {
137 static DEBUG_ENABLED: OnceLock<bool> = OnceLock::new();
138
139 *DEBUG_ENABLED.get_or_init(|| std::env::var_os("CRITERION_DEBUG").is_some())
140}
141
142/// Reexport of [std::hint::black_box].
143#[inline]
144pub fn black_box<T>(dummy: T) -> T {
145 std::hint::black_box(dummy)
146}
147
148/// Argument to [`Bencher::iter_batched`] and [`Bencher::iter_batched_ref`] which controls the
149/// batch size.
150///
151/// Generally speaking, almost all benchmarks should use `SmallInput`. If the input or the result
152/// of the benchmark routine is large enough that `SmallInput` causes out-of-memory errors,
153/// `LargeInput` can be used to reduce memory usage at the cost of increasing the measurement
154/// overhead. If the input or the result is extremely large (or if it holds some
155/// limited external resource like a file handle), `PerIteration` will set the number of iterations
156/// per batch to exactly one. `PerIteration` can increase the measurement overhead substantially
157/// and should be avoided wherever possible.
158///
159/// Each value lists an estimate of the measurement overhead. This is intended as a rough guide
160/// to assist in choosing an option, it should not be relied upon. In particular, it is not valid
161/// to subtract the listed overhead from the measurement and assume that the result represents the
162/// true runtime of a function. The actual measurement overhead for your specific benchmark depends
163/// on the details of the function you're benchmarking and the hardware and operating
164/// system running the benchmark.
165///
166/// With that said, if the runtime of your function is small relative to the measurement overhead
167/// it will be difficult to take accurate measurements. In this situation, the best option is to use
168/// [`Bencher::iter`] which has next-to-zero measurement overhead.
169#[derive(Debug, Eq, PartialEq, Copy, Hash, Clone)]
170pub enum BatchSize {
171 /// `SmallInput` indicates that the input to the benchmark routine (the value returned from
172 /// the setup routine) is small enough that millions of values can be safely held in memory.
173 /// Always prefer `SmallInput` unless the benchmark is using too much memory.
174 ///
175 /// In testing, the maximum measurement overhead from benchmarking with `SmallInput` is on the
176 /// order of 500 picoseconds. This is presented as a rough guide; your results may vary.
177 SmallInput,
178
179 /// `LargeInput` indicates that the input to the benchmark routine or the value returned from
180 /// that routine is large. This will reduce the memory usage but increase the measurement
181 /// overhead.
182 ///
183 /// In testing, the maximum measurement overhead from benchmarking with `LargeInput` is on the
184 /// order of 750 picoseconds. This is presented as a rough guide; your results may vary.
185 LargeInput,
186
187 /// `PerIteration` indicates that the input to the benchmark routine or the value returned from
188 /// that routine is extremely large or holds some limited resource, such that holding many values
189 /// in memory at once is infeasible. This provides the worst measurement overhead, but the
190 /// lowest memory usage.
191 ///
192 /// In testing, the maximum measurement overhead from benchmarking with `PerIteration` is on the
193 /// order of 350 nanoseconds or 350,000 picoseconds. This is presented as a rough guide; your
194 /// results may vary.
195 PerIteration,
196
197 /// `NumBatches` will attempt to divide the iterations up into a given number of batches.
198 /// A larger number of batches (and thus smaller batches) will reduce memory usage but increase
199 /// measurement overhead. This allows the user to choose their own tradeoff between memory usage
200 /// and measurement overhead, but care must be taken in tuning the number of batches. Most
201 /// benchmarks should use `SmallInput` or `LargeInput` instead.
202 NumBatches(u64),
203
204 /// `NumIterations` fixes the batch size to a constant number, specified by the user. This
205 /// allows the user to choose their own tradeoff between overhead and memory usage, but care must
206 /// be taken in tuning the batch size. In general, the measurement overhead of `NumIterations`
207 /// will be larger than that of `NumBatches`. Most benchmarks should use `SmallInput` or
208 /// `LargeInput` instead.
209 NumIterations(u64),
210
211 #[doc(hidden)]
212 __NonExhaustive,
213}
214impl BatchSize {
215 /// Convert to a number of iterations per batch.
216 ///
217 /// We try to do a constant number of batches regardless of the number of iterations in this
218 /// sample. If the measurement overhead is roughly constant regardless of the number of
219 /// iterations the analysis of the results later will have an easier time separating the
220 /// measurement overhead from the benchmark time.
221 fn iters_per_batch(self, iters: u64) -> u64 {
222 match self {
223 BatchSize::SmallInput => (iters + 10 - 1) / 10,
224 BatchSize::LargeInput => (iters + 1000 - 1) / 1000,
225 BatchSize::PerIteration => 1,
226 BatchSize::NumBatches(batches) => (iters + batches - 1) / batches,
227 BatchSize::NumIterations(size) => size,
228 BatchSize::__NonExhaustive => panic!("__NonExhaustive is not a valid BatchSize."),
229 }
230 }
231}
232
233/// Baseline describes how the `baseline_directory` is handled.
234#[derive(Debug, Clone, Copy)]
235pub enum Baseline {
236 /// `CompareLenient` compares against a previous saved version of the baseline.
237 /// If a previous baseline does not exist, the benchmark is run as normal but no comparison occurs.
238 CompareLenient,
239 /// `CompareStrict` compares against a previous saved version of the baseline.
240 /// If a previous baseline does not exist, a panic occurs.
241 CompareStrict,
242 /// `Save` writes the benchmark results to the baseline directory,
243 /// overwriting any results that were previously there.
244 Save,
245 /// `Discard` benchmark results.
246 Discard,
247}
248
249#[derive(Debug, Clone)]
250/// Enum representing the execution mode.
251pub(crate) enum Mode {
252 /// Run benchmarks normally.
253 Benchmark,
254 /// List all benchmarks but do not run them.
255 List(ListFormat),
256 /// Run benchmarks once to verify that they work, but otherwise do not measure them.
257 Test,
258 /// Iterate benchmarks for a given length of time but do not analyze or report on them.
259 Profile(Duration),
260}
261impl Mode {
262 pub fn is_benchmark(&self) -> bool {
263 matches!(self, Mode::Benchmark)
264 }
265
266 pub fn is_terse(&self) -> bool {
267 matches!(self, Mode::List(ListFormat::Terse))
268 }
269}
270
271#[derive(Debug, Clone, Copy)]
272/// Enum representing the list format.
273pub(crate) enum ListFormat {
274 /// The regular, default format.
275 Pretty,
276 /// The terse format, where nothing other than the name of the test and ": benchmark" at the end
277 /// is printed out.
278 Terse,
279}
280
281impl Default for ListFormat {
282 fn default() -> Self {
283 Self::Pretty
284 }
285}
286
287/// Benchmark filtering support.
288#[derive(Clone, Debug)]
289pub enum BenchmarkFilter {
290 /// Run all benchmarks.
291 AcceptAll,
292 /// Run the benchmark matching this string exactly.
293 Exact(String),
294 /// Look for benchmark that contain substring
295 Substring(String),
296 /// Do not run any benchmarks.
297 RejectAll,
298}
299
300/// Returns the Cargo target directory, possibly calling `cargo metadata` to
301/// figure it out.
302fn cargo_target_directory() -> Option<PathBuf> {
303 #[derive(Deserialize)]
304 struct Metadata {
305 target_directory: PathBuf,
306 }
307
308 env::var_os("CARGO_TARGET_DIR").map(PathBuf::from).or_else(|| {
309 let output = Command::new(env::var_os("CARGO")?)
310 .args(["metadata", "--format-version", "1"])
311 .output()
312 .ok()?;
313 let metadata: Metadata = serde_json::from_slice(&output.stdout).ok()?;
314 Some(metadata.target_directory)
315 })
316}
317
318/// Enum representing different ways of measuring the throughput of benchmarked code.
319/// If the throughput setting is configured for a benchmark then the estimated throughput will
320/// be reported as well as the time per iteration.
321// TODO: Remove serialize/deserialize from the public API.
322#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
323pub enum Throughput {
324 /// Measure throughput in terms of bytes/second. The value should be the number of bytes
325 /// processed by one iteration of the benchmarked code. Typically, this would be the length of
326 /// an input string or `&[u8]`.
327 Bytes(u64),
328
329 /// Equivalent to Bytes, but the value will be reported in terms of
330 /// kilobytes (1000 bytes) per second instead of kibibytes (1024 bytes) per
331 /// second, megabytes instead of mibibytes, and gigabytes instead of gibibytes.
332 BytesDecimal(u64),
333
334 /// Measure throughput in terms of elements/second. The value should be the number of elements
335 /// processed by one iteration of the benchmarked code. Typically, this would be the size of a
336 /// collection, but could also be the number of lines of input text or the number of values to
337 /// parse.
338 Elements(u64),
339}
340
341/// Axis scaling type
342#[derive(Debug, Clone, Copy)]
343pub enum AxisScale {
344 /// Axes scale linearly
345 Linear,
346
347 /// Axes scale logarithmically
348 Logarithmic,
349}
350
351/// This enum allows the user to control how Criterion.rs chooses the iteration count when sampling.
352/// The default is Auto, which will choose a method automatically based on the iteration time during
353/// the warm-up phase.
354#[derive(Debug, Clone, Copy)]
355pub enum SamplingMode {
356 /// Criterion.rs should choose a sampling method automatically. This is the default, and is
357 /// recommended for most users and most benchmarks.
358 Auto,
359
360 /// Scale the iteration count in each sample linearly. This is suitable for most benchmarks,
361 /// but it tends to require many iterations which can make it very slow for very long benchmarks.
362 Linear,
363
364 /// Keep the iteration count the same for all samples. This is not recommended, as it affects
365 /// the statistics that Criterion.rs can compute. However, it requires fewer iterations than
366 /// the Linear method and therefore is more suitable for very long-running benchmarks where
367 /// benchmark execution time is more of a problem and statistical precision is less important.
368 Flat,
369}
370impl SamplingMode {
371 pub(crate) fn choose_sampling_mode(
372 &self,
373 warmup_mean_execution_time: f64,
374 sample_count: u64,
375 target_time: f64,
376 ) -> ActualSamplingMode {
377 match self {
378 SamplingMode::Linear => ActualSamplingMode::Linear,
379 SamplingMode::Flat => ActualSamplingMode::Flat,
380 SamplingMode::Auto => {
381 // Estimate execution time with linear sampling
382 let total_runs = sample_count * (sample_count + 1) / 2;
383 let d =
384 (target_time / warmup_mean_execution_time / total_runs as f64).ceil() as u64;
385 let expected_ns = total_runs as f64 * d as f64 * warmup_mean_execution_time;
386
387 if expected_ns > (2.0 * target_time) {
388 ActualSamplingMode::Flat
389 } else {
390 ActualSamplingMode::Linear
391 }
392 }
393 }
394 }
395}
396
397/// Enum to represent the sampling mode without Auto.
398#[derive(Debug, Clone, Copy, Serialize, Deserialize)]
399pub(crate) enum ActualSamplingMode {
400 Linear,
401 Flat,
402}
403impl ActualSamplingMode {
404 pub(crate) fn iteration_counts(
405 &self,
406 warmup_mean_execution_time: f64,
407 sample_count: u64,
408 target_time: &Duration,
409 ) -> Vec<u64> {
410 match self {
411 ActualSamplingMode::Linear => {
412 let n = sample_count;
413 let met = warmup_mean_execution_time;
414 let m_ns = target_time.as_nanos();
415 // Solve: [d + 2*d + 3*d + ... + n*d] * met = m_ns
416 let total_runs = n * (n + 1) / 2;
417 let d = ((m_ns as f64 / met / total_runs as f64).ceil() as u64).max(1);
418 let expected_ns = total_runs as f64 * d as f64 * met;
419
420 if d == 1 {
421 let recommended_sample_size =
422 ActualSamplingMode::recommend_linear_sample_size(m_ns as f64, met);
423 let actual_time = Duration::from_nanos(expected_ns as u64);
424 eprint!(
425 "\nWarning: Unable to complete {} samples in {:.1?}. You may wish to increase target time to {:.1?}",
426 n, target_time, actual_time
427 );
428
429 if recommended_sample_size != n {
430 eprintln!(
431 ", enable flat sampling, or reduce sample count to {}.",
432 recommended_sample_size
433 );
434 } else {
435 eprintln!(" or enable flat sampling.");
436 }
437 }
438
439 (1..(n + 1)).map(|a| a * d).collect::<Vec<u64>>()
440 }
441 ActualSamplingMode::Flat => {
442 let n = sample_count;
443 let met = warmup_mean_execution_time;
444 let m_ns = target_time.as_nanos() as f64;
445 let time_per_sample = m_ns / (n as f64);
446 // This is pretty simplistic; we could do something smarter to fit into the allotted time.
447 let iterations_per_sample = ((time_per_sample / met).ceil() as u64).max(1);
448
449 let expected_ns = met * (iterations_per_sample * n) as f64;
450
451 if iterations_per_sample == 1 {
452 let recommended_sample_size =
453 ActualSamplingMode::recommend_flat_sample_size(m_ns, met);
454 let actual_time = Duration::from_nanos(expected_ns as u64);
455 eprint!(
456 "\nWarning: Unable to complete {} samples in {:.1?}. You may wish to increase target time to {:.1?}",
457 n, target_time, actual_time
458 );
459
460 if recommended_sample_size != n {
461 eprintln!(", or reduce sample count to {}.", recommended_sample_size);
462 } else {
463 eprintln!(".");
464 }
465 }
466
467 vec![iterations_per_sample; n as usize]
468 }
469 }
470 }
471
472 fn is_linear(&self) -> bool {
473 matches!(self, ActualSamplingMode::Linear)
474 }
475
476 fn recommend_linear_sample_size(target_time: f64, met: f64) -> u64 {
477 // Some math shows that n(n+1)/2 * d * met = target_time. d = 1, so it can be ignored.
478 // This leaves n(n+1) = (2*target_time)/met, or n^2 + n - (2*target_time)/met = 0
479 // Which can be solved with the quadratic formula. Since A and B are constant 1,
480 // this simplifies to sample_size = (-1 +- sqrt(1 - 4C))/2, where C = (2*target_time)/met.
481 // We don't care about the negative solution. Experimentation shows that this actually tends to
482 // result in twice the desired execution time (probably because of the ceil used to calculate
483 // d) so instead I use c = target_time/met.
484 let c = target_time / met;
485 let sample_size = (-1.0 + (4.0 * c).sqrt()) / 2.0;
486 let sample_size = sample_size as u64;
487
488 // Round down to the nearest 10 to give a margin and avoid excessive precision
489 let sample_size = (sample_size / 10) * 10;
490
491 // Clamp it to be at least 10, since criterion.rs doesn't allow sample sizes smaller than 10.
492 if sample_size < 10 { 10 } else { sample_size }
493 }
494
495 fn recommend_flat_sample_size(target_time: f64, met: f64) -> u64 {
496 let sample_size = (target_time / met) as u64;
497
498 // Round down to the nearest 10 to give a margin and avoid excessive precision
499 let sample_size = (sample_size / 10) * 10;
500
501 // Clamp it to be at least 10, since criterion.rs doesn't allow sample sizes smaller than 10.
502 if sample_size < 10 { 10 } else { sample_size }
503 }
504}
505
506#[derive(Debug, Serialize, Deserialize)]
507pub(crate) struct SavedSample {
508 sampling_mode: ActualSamplingMode,
509 iters: Vec<f64>,
510 times: Vec<f64>,
511}
512
513/// Custom-test-framework runner. Should not be called directly.
514#[doc(hidden)]
515pub fn runner(benches: &[&dyn Fn()]) {
516 for bench in benches {
517 bench();
518 }
519 crate::criterion::Criterion::default().configure_from_args().final_summary();
520}