criterion/criterion.rs
1use std::{
2 cell::RefCell,
3 collections::HashSet,
4 io::{IsTerminal, stdout},
5 path::{Path, PathBuf},
6 sync::MutexGuard,
7 time::Duration,
8};
9
10use crate::{
11 Baseline, BencherReport, BenchmarkConfig, BenchmarkFilter, CliReport, CliVerbosity, Connection,
12 ExternalProfiler, Measurement, Mode, OutgoingMessage, Profiler, Report, ReportContext, Reports,
13 WallTime,
14 bencher::Bencher,
15 benchmark_group::{BenchmarkGroup, BenchmarkId},
16 cargo_criterion_connection, debug_enabled, default_output_directory,
17};
18
19/// The benchmark manager
20///
21/// `Criterion` lets you configure and execute benchmarks
22///
23/// Each benchmark consists of four phases:
24///
25/// - **Warm-up**: The routine is repeatedly executed, to let the CPU/OS/JIT/interpreter adapt to
26/// the new load
27/// - **Measurement**: The routine is repeatedly executed, and timing information is collected into
28/// a sample
29/// - **Analysis**: The sample is analyzed and distilled into meaningful statistics that get
30/// reported to stdout, stored in files, and plotted
31/// - **Comparison**: The current sample is compared with the sample obtained in the previous
32/// benchmark.
33pub struct Criterion<M: Measurement = WallTime> {
34 pub(crate) config: BenchmarkConfig,
35 pub(crate) filter: BenchmarkFilter,
36 pub(crate) report: Reports,
37 pub(crate) output_directory: PathBuf,
38 pub(crate) baseline_directory: String,
39 pub(crate) baseline: Baseline,
40 pub(crate) load_baseline: Option<String>,
41 pub(crate) all_directories: HashSet<String>,
42 pub(crate) all_titles: HashSet<String>,
43 pub(crate) measurement: M,
44 pub(crate) profiler: Box<RefCell<dyn Profiler>>,
45 pub(crate) connection: Option<MutexGuard<'static, Connection>>,
46 pub(crate) mode: Mode,
47}
48
49impl Default for Criterion {
50 /// Creates a benchmark manager with the following default settings:
51 ///
52 /// - Sample size: 100 measurements
53 /// - Warm-up time: 3 s
54 /// - Measurement time: 5 s
55 /// - Bootstrap size: 100 000 resamples
56 /// - Noise threshold: 0.01 (1%)
57 /// - Confidence level: 0.95
58 /// - Significance level: 0.05
59 /// - Plotting: enabled, using gnuplot if available or plotters if gnuplot is not available
60 /// - No filter
61 fn default() -> Criterion {
62 let reports = Reports {
63 cli_enabled: true,
64 cli: CliReport::new(false, false, CliVerbosity::Normal),
65 bencher_enabled: false,
66 bencher: BencherReport,
67 };
68
69 let mut criterion = Criterion {
70 config: BenchmarkConfig::default(),
71 filter: BenchmarkFilter::AcceptAll,
72 report: reports,
73 baseline_directory: "base".to_owned(),
74 baseline: Baseline::Save,
75 load_baseline: None,
76 output_directory: default_output_directory().clone(),
77 all_directories: HashSet::new(),
78 all_titles: HashSet::new(),
79 measurement: WallTime,
80 profiler: Box::new(RefCell::new(ExternalProfiler)),
81 connection: cargo_criterion_connection().as_ref().map(|mtx| mtx.lock().unwrap()),
82 mode: Mode::Benchmark,
83 };
84
85 if criterion.connection.is_some() {
86 // disable all reports when connected to cargo-criterion; it will do the reporting.
87 criterion.report.cli_enabled = false;
88 criterion.report.bencher_enabled = false;
89 }
90 criterion
91 }
92}
93
94impl<M: Measurement> Criterion<M> {
95 /// Changes the measurement for the benchmarks run with this runner. See the
96 /// Measurement trait for more details
97 pub fn with_measurement<M2: Measurement>(self, m: M2) -> Criterion<M2> {
98 // Can't use struct update syntax here because they're technically different types.
99 Criterion {
100 config: self.config,
101 filter: self.filter,
102 report: self.report,
103 baseline_directory: self.baseline_directory,
104 baseline: self.baseline,
105 load_baseline: self.load_baseline,
106 output_directory: self.output_directory,
107 all_directories: self.all_directories,
108 all_titles: self.all_titles,
109 measurement: m,
110 profiler: self.profiler,
111 connection: self.connection,
112 mode: self.mode,
113 }
114 }
115
116 #[must_use]
117 /// Changes the internal profiler for benchmarks run with this runner. See
118 /// the Profiler trait for more details.
119 pub fn with_profiler<P: Profiler + 'static>(self, p: P) -> Criterion<M> {
120 Criterion { profiler: Box::new(RefCell::new(p)), ..self }
121 }
122
123 #[must_use]
124 /// Changes the default size of the sample for benchmarks run with this runner.
125 ///
126 /// A bigger sample should yield more accurate results if paired with a sufficiently large
127 /// measurement time.
128 ///
129 /// Sample size must be at least 10.
130 ///
131 /// # Panics
132 ///
133 /// Panics if n < 10
134 pub fn sample_size(mut self, n: usize) -> Criterion<M> {
135 assert!(n >= 10);
136
137 self.config.sample_size = n;
138 self
139 }
140
141 #[must_use]
142 /// Changes the default warm up time for benchmarks run with this runner.
143 ///
144 /// # Panics
145 ///
146 /// Panics if the input duration is zero
147 pub fn warm_up_time(mut self, dur: Duration) -> Criterion<M> {
148 assert!(dur.as_nanos() > 0);
149
150 self.config.warm_up_time = dur;
151 self
152 }
153
154 #[must_use]
155 /// Changes the default measurement time for benchmarks run with this runner.
156 ///
157 /// With a longer time, the measurement will become more resilient to transitory peak loads
158 /// caused by external programs
159 ///
160 /// **Note**: If the measurement time is too "low", Criterion will automatically increase it
161 ///
162 /// # Panics
163 ///
164 /// Panics if the input duration in zero
165 pub fn measurement_time(mut self, dur: Duration) -> Criterion<M> {
166 assert!(dur.as_nanos() > 0);
167
168 self.config.measurement_time = dur;
169 self
170 }
171
172 #[must_use]
173 /// Changes the default number of resamples for benchmarks run with this runner.
174 ///
175 /// Number of resamples to use for the
176 /// [bootstrap](http://en.wikipedia.org/wiki/Bootstrapping_(statistics)#Case_resampling)
177 ///
178 /// A larger number of resamples reduces the random sampling errors, which are inherent to the
179 /// bootstrap method, but also increases the analysis time
180 ///
181 /// # Panics
182 ///
183 /// Panics if the number of resamples is set to zero
184 pub fn nresamples(mut self, n: usize) -> Criterion<M> {
185 assert!(n > 0);
186 if n <= 1000 {
187 eprintln!("\nWarning: It is not recommended to reduce nresamples below 1000.");
188 }
189
190 self.config.nresamples = n;
191 self
192 }
193
194 #[must_use]
195 /// Changes the default noise threshold for benchmarks run with this runner. The noise threshold
196 /// is used to filter out small changes in performance, even if they are statistically
197 /// significant. Sometimes benchmarking the same code twice will result in small but
198 /// statistically significant differences solely because of noise. This provides a way to filter
199 /// out some of these false positives at the cost of making it harder to detect small changes
200 /// to the true performance of the benchmark.
201 ///
202 /// The default is 0.01, meaning that changes smaller than 1% will be ignored.
203 ///
204 /// # Panics
205 ///
206 /// Panics if the threshold is set to a negative value
207 pub fn noise_threshold(mut self, threshold: f64) -> Criterion<M> {
208 assert!(threshold >= 0.0);
209
210 self.config.noise_threshold = threshold;
211 self
212 }
213
214 #[must_use]
215 /// Changes the default confidence level for benchmarks run with this runner. The confidence
216 /// level is the desired probability that the true runtime lies within the estimated
217 /// [confidence interval](https://en.wikipedia.org/wiki/Confidence_interval). The default is
218 /// 0.95, meaning that the confidence interval should capture the true value 95% of the time.
219 ///
220 /// # Panics
221 ///
222 /// Panics if the confidence level is set to a value outside the `(0, 1)` range
223 pub fn confidence_level(mut self, cl: f64) -> Criterion<M> {
224 assert!(cl > 0.0 && cl < 1.0);
225 if cl < 0.5 {
226 eprintln!("\nWarning: It is not recommended to reduce confidence level below 0.5.");
227 }
228
229 self.config.confidence_level = cl;
230 self
231 }
232
233 #[must_use]
234 /// Changes the default [significance level](https://en.wikipedia.org/wiki/Statistical_significance)
235 /// for benchmarks run with this runner. This is used to perform a
236 /// [hypothesis test](https://en.wikipedia.org/wiki/Statistical_hypothesis_testing) to see if
237 /// the measurements from this run are different from the measured performance of the last run.
238 /// The significance level is the desired probability that two measurements of identical code
239 /// will be considered 'different' due to noise in the measurements. The default value is 0.05,
240 /// meaning that approximately 5% of identical benchmarks will register as different due to
241 /// noise.
242 ///
243 /// This presents a trade-off. By setting the significance level closer to 0.0, you can increase
244 /// the statistical robustness against noise, but it also weakens Criterion.rs' ability to
245 /// detect small but real changes in the performance. By setting the significance level
246 /// closer to 1.0, Criterion.rs will be more able to detect small true changes, but will also
247 /// report more spurious differences.
248 ///
249 /// See also the noise threshold setting.
250 ///
251 /// # Panics
252 ///
253 /// Panics if the significance level is set to a value outside the `(0, 1)` range
254 pub fn significance_level(mut self, sl: f64) -> Criterion<M> {
255 assert!(sl > 0.0 && sl < 1.0);
256
257 self.config.significance_level = sl;
258 self
259 }
260
261 #[must_use]
262 /// Names an explicit baseline and enables overwriting the previous results.
263 pub fn save_baseline(mut self, baseline: String) -> Criterion<M> {
264 self.baseline_directory = baseline;
265 self.baseline = Baseline::Save;
266 self
267 }
268
269 #[must_use]
270 /// Names an explicit baseline and disables overwriting the previous results.
271 pub fn retain_baseline(mut self, baseline: String, strict: bool) -> Criterion<M> {
272 self.baseline_directory = baseline;
273 self.baseline = if strict { Baseline::CompareStrict } else { Baseline::CompareLenient };
274 self
275 }
276
277 /// Only run benchmarks specified by the given filter.
278 ///
279 /// This overwrites [`Self::with_filter`].
280 pub fn with_benchmark_filter(mut self, filter: BenchmarkFilter) -> Criterion<M> {
281 self.filter = filter;
282
283 self
284 }
285
286 #[must_use]
287 /// Override whether the CLI output will be colored or not. Usually you would use the `--color`
288 /// CLI argument, but this is available for programmmatic use as well.
289 pub fn with_output_color(mut self, enabled: bool) -> Criterion<M> {
290 self.report.cli.enable_text_coloring = enabled;
291 self
292 }
293
294 /// Set the output directory (currently for testing only)
295 #[must_use]
296 #[doc(hidden)]
297 pub fn output_directory(mut self, path: &Path) -> Criterion<M> {
298 path.clone_into(&mut self.output_directory);
299
300 self
301 }
302
303 /// Set the profile time (currently for testing only)
304 #[must_use]
305 #[doc(hidden)]
306 pub fn profile_time(mut self, profile_time: Option<Duration>) -> Criterion<M> {
307 match profile_time {
308 Some(time) => self.mode = Mode::Profile(time),
309 None => self.mode = Mode::Benchmark,
310 }
311
312 self
313 }
314
315 /// Generate the final summary at the end of a run.
316 #[doc(hidden)]
317 pub fn final_summary(&self) {
318 if !self.mode.is_benchmark() {
319 return;
320 }
321
322 let report_context = ReportContext { output_directory: self.output_directory.clone() };
323
324 self.report.final_summary(&report_context);
325 }
326
327 /// Configure this criterion struct based on the command-line arguments to
328 /// this process.
329 #[must_use]
330 pub fn configure_from_args(mut self) -> Criterion<M> {
331 use crate::cli::*;
332
333 let opts = options(&self.config).fallback_to_usage().run();
334
335 if self.connection.is_some() {
336 if opts.color != Color::Auto {
337 eprintln!(
338 "Warning: --color will be ignored when running with cargo-criterion. Use `cargo criterion --color {} -- <args>` instead.",
339 opts.color
340 );
341 }
342
343 // What about quiet?
344 if opts.verbosity == CliVerbosity::Verbose {
345 eprintln!(
346 "Warning: --verbose will be ignored when running with cargo-criterion. Use `cargo criterion --output-format verbose -- <args>` instead."
347 );
348 }
349 if opts.output_format != OutputFormat::Criterion {
350 eprintln!(
351 "Warning: --output-format will be ignored when running with cargo-criterion. Use `cargo criterion --output-format {} -- <args>` instead.",
352 opts.output_format
353 );
354 }
355
356 // TODO - currently baseline stuff seem to be partially coupled with operations
357 if matches!(opts.op, Op::LoadBaseline(_)) {
358 eprintln!("Error: baselines are not supported when running with cargo-criterion.");
359 std::process::exit(1);
360 }
361 }
362
363 self.mode = match opts.op {
364 Op::List => Mode::List(opts.format),
365 Op::LoadBaseline(ref dir) => {
366 self.load_baseline = Some(dir.to_owned());
367 Mode::Benchmark
368 }
369 Op::ProfileTime(t) => Mode::Profile(t),
370 Op::Test => Mode::Test,
371 Op::Benchmark => Mode::Benchmark,
372 };
373
374 // This is kind of a hack, but disable the connection to the runner if we're not benchmarking.
375 if !self.mode.is_benchmark() {
376 self.connection = None;
377 }
378
379 let filter = if opts.ignored {
380 // --ignored overwrites any name-based filters passed in.
381 BenchmarkFilter::RejectAll
382 } else if let Some(filter) = opts.filter.as_ref() {
383 let filter = filter.to_owned();
384 if opts.exact {
385 BenchmarkFilter::Exact(filter)
386 } else {
387 BenchmarkFilter::Substring(filter)
388 }
389 } else {
390 BenchmarkFilter::AcceptAll
391 };
392 self = self.with_benchmark_filter(filter);
393
394 match opts.baseline {
395 Baseline_::Save(ref dir) => {
396 self.baseline = Baseline::Save;
397 dir.clone_into(&mut self.baseline_directory)
398 }
399 Baseline_::Discard => {
400 self.baseline = Baseline::Discard;
401 }
402 Baseline_::Lenient(ref dir) => {
403 self.baseline = Baseline::CompareLenient;
404 dir.clone_into(&mut self.baseline_directory);
405 }
406 Baseline_::Strict(ref dir) => {
407 self.baseline = Baseline::CompareStrict;
408 dir.clone_into(&mut self.baseline_directory);
409 }
410 }
411
412 if self.connection.is_some() {
413 // disable all reports when connected to cargo-criterion; it will do the reporting.
414 self.report.cli_enabled = false;
415 self.report.bencher_enabled = false;
416 } else {
417 match opts.output_format {
418 OutputFormat::Bencher => {
419 self.report.bencher_enabled = true;
420 self.report.cli_enabled = false;
421 }
422 OutputFormat::Criterion => {
423 let verbosity = opts.verbosity;
424 let verbose = opts.verbosity == CliVerbosity::Verbose;
425
426 let stdout_isatty = stdout().is_terminal();
427 let mut enable_text_overwrite = stdout_isatty && !verbose && !debug_enabled();
428 let enable_text_coloring;
429 match opts.color {
430 Color::Auto => enable_text_coloring = stdout_isatty,
431 Color::Always => enable_text_coloring = true,
432 Color::Never => {
433 enable_text_coloring = false;
434 enable_text_overwrite = false;
435 }
436 }
437 self.report.bencher_enabled = false;
438 self.report.cli_enabled = true;
439 self.report.cli = CliReport::new(
440 enable_text_overwrite,
441 enable_text_coloring,
442 verbosity.into(),
443 );
444 }
445 }
446 }
447
448 match opts.sample {
449 Sample::Specific(size) => {
450 assert!(size >= 10);
451 self.config.sample_size = size;
452 }
453 Sample::Quick => self.config.quick_mode = true,
454 }
455
456 assert!(opts.warm_up_time > Duration::from_secs(0));
457 self.config.warm_up_time = opts.warm_up_time;
458
459 assert!(opts.measurement_time > Duration::from_secs(0));
460 self.config.measurement_time = opts.measurement_time;
461
462 assert!(opts.nresamples > 0);
463 self.config.nresamples = opts.nresamples;
464
465 assert!(opts.noise_threshold > 0.0);
466 self.config.noise_threshold = opts.noise_threshold;
467
468 assert!(opts.confidence_level > 0.0 && opts.confidence_level < 1.0);
469 self.config.confidence_level = opts.confidence_level;
470
471 assert!(opts.significance_level > 0.0 && opts.significance_level < 1.0);
472 self.config.significance_level = opts.significance_level;
473
474 self
475 }
476
477 pub(crate) fn filter_matches(&self, id: &str) -> bool {
478 match &self.filter {
479 BenchmarkFilter::AcceptAll => true,
480 BenchmarkFilter::Exact(exact) => id == exact,
481 BenchmarkFilter::RejectAll => false,
482 BenchmarkFilter::Substring(s) => id.contains(s),
483 }
484 }
485
486 /// Returns true iff we should save the benchmark results in
487 /// json files on the local disk.
488 pub(crate) fn should_save_baseline(&self) -> bool {
489 self.connection.is_none()
490 && self.load_baseline.is_none()
491 && !matches!(self.baseline, Baseline::Discard)
492 }
493
494 /// Return a benchmark group. All benchmarks performed using a benchmark group will be
495 /// grouped together in the final report.
496 ///
497 /// # Examples:
498 ///
499 /// ```rust
500 /// use self::criterion::*;
501 ///
502 /// fn bench_simple(c: &mut Criterion) {
503 /// let mut group = c.benchmark_group("My Group");
504 ///
505 /// // Now we can perform benchmarks with this group
506 /// group.bench_function("Bench 1", |b| b.iter(|| 1 ));
507 /// group.bench_function("Bench 2", |b| b.iter(|| 2 ));
508 ///
509 /// group.finish();
510 /// }
511 /// criterion_group!(benches, bench_simple);
512 /// criterion_main!(benches);
513 /// ```
514 /// # Panics:
515 /// Panics if the group name is empty
516 pub fn benchmark_group<S: Into<String>>(&mut self, group_name: S) -> BenchmarkGroup<'_, M> {
517 let group_name = group_name.into();
518 assert!(!group_name.is_empty(), "Group name must not be empty.");
519
520 if let Some(conn) = &self.connection {
521 conn.send(&OutgoingMessage::BeginningBenchmarkGroup { group: &group_name }).unwrap();
522 }
523
524 BenchmarkGroup::new(self, group_name)
525 }
526}
527impl<M> Criterion<M>
528where
529 M: Measurement + 'static,
530{
531 /// Benchmarks a function. For comparing multiple functions, see `benchmark_group`.
532 ///
533 /// # Example
534 ///
535 /// ```rust
536 /// use self::criterion::*;
537 ///
538 /// fn bench(c: &mut Criterion) {
539 /// // Setup (construct data, allocate memory, etc)
540 /// c.bench_function(
541 /// "function_name",
542 /// |b| b.iter(|| {
543 /// // Code to benchmark goes here
544 /// }),
545 /// );
546 /// }
547 ///
548 /// criterion_group!(benches, bench);
549 /// criterion_main!(benches);
550 /// ```
551 pub fn bench_function<F>(&mut self, id: &str, f: F) -> &mut Criterion<M>
552 where
553 F: FnMut(&mut Bencher<'_, M>),
554 {
555 self.benchmark_group(id).bench_function(BenchmarkId::no_function(), f);
556 self
557 }
558
559 /// Benchmarks a function with an input. For comparing multiple functions or multiple inputs,
560 /// see `benchmark_group`.
561 ///
562 /// # Example
563 ///
564 /// ```rust
565 /// use self::criterion::*;
566 ///
567 /// fn bench(c: &mut Criterion) {
568 /// // Setup (construct data, allocate memory, etc)
569 /// let input = 5u64;
570 /// c.bench_with_input(
571 /// BenchmarkId::new("function_name", input), &input,
572 /// |b, i| b.iter(|| {
573 /// // Code to benchmark using input `i` goes here
574 /// }),
575 /// );
576 /// }
577 ///
578 /// criterion_group!(benches, bench);
579 /// criterion_main!(benches);
580 /// ```
581 pub fn bench_with_input<F, I>(&mut self, id: BenchmarkId, input: &I, f: F) -> &mut Criterion<M>
582 where
583 F: FnMut(&mut Bencher<'_, M>, &I),
584 {
585 // It's possible to use BenchmarkId::from_parameter to create a benchmark ID with no function
586 // name. That's intended for use with BenchmarkGroups where the function name isn't necessary,
587 // but here it is.
588 let group_name = id.function_name.expect(
589 "Cannot use BenchmarkId::from_parameter with Criterion::bench_with_input. \
590 Consider using a BenchmarkGroup or BenchmarkId::new instead.",
591 );
592 // Guaranteed safe because external callers can't create benchmark IDs without a parameter
593 let parameter = id.parameter.unwrap();
594 self.benchmark_group(group_name).bench_with_input(
595 BenchmarkId::no_function_with_input(parameter),
596 input,
597 f,
598 );
599 self
600 }
601}