criterion/
criterion.rs

1use std::{
2    cell::RefCell,
3    collections::HashSet,
4    io::{IsTerminal, stdout},
5    path::{Path, PathBuf},
6    sync::MutexGuard,
7    time::Duration,
8};
9
10use crate::{
11    Baseline, BencherReport, BenchmarkConfig, BenchmarkFilter, CliReport, CliVerbosity, Connection,
12    ExternalProfiler, Measurement, Mode, OutgoingMessage, Profiler, Report, ReportContext, Reports,
13    WallTime,
14    bencher::Bencher,
15    benchmark_group::{BenchmarkGroup, BenchmarkId},
16    cargo_criterion_connection, debug_enabled, default_output_directory,
17};
18
19/// The benchmark manager
20///
21/// `Criterion` lets you configure and execute benchmarks
22///
23/// Each benchmark consists of four phases:
24///
25/// - **Warm-up**: The routine is repeatedly executed, to let the CPU/OS/JIT/interpreter adapt to
26/// the new load
27/// - **Measurement**: The routine is repeatedly executed, and timing information is collected into
28/// a sample
29/// - **Analysis**: The sample is analyzed and distilled into meaningful statistics that get
30/// reported to stdout, stored in files, and plotted
31/// - **Comparison**: The current sample is compared with the sample obtained in the previous
32/// benchmark.
33pub struct Criterion<M: Measurement = WallTime> {
34    pub(crate) config: BenchmarkConfig,
35    pub(crate) filter: BenchmarkFilter,
36    pub(crate) report: Reports,
37    pub(crate) output_directory: PathBuf,
38    pub(crate) baseline_directory: String,
39    pub(crate) baseline: Baseline,
40    pub(crate) load_baseline: Option<String>,
41    pub(crate) all_directories: HashSet<String>,
42    pub(crate) all_titles: HashSet<String>,
43    pub(crate) measurement: M,
44    pub(crate) profiler: Box<RefCell<dyn Profiler>>,
45    pub(crate) connection: Option<MutexGuard<'static, Connection>>,
46    pub(crate) mode: Mode,
47}
48
49impl Default for Criterion {
50    /// Creates a benchmark manager with the following default settings:
51    ///
52    /// - Sample size: 100 measurements
53    /// - Warm-up time: 3 s
54    /// - Measurement time: 5 s
55    /// - Bootstrap size: 100 000 resamples
56    /// - Noise threshold: 0.01 (1%)
57    /// - Confidence level: 0.95
58    /// - Significance level: 0.05
59    /// - Plotting: enabled, using gnuplot if available or plotters if gnuplot is not available
60    /// - No filter
61    fn default() -> Criterion {
62        let reports = Reports {
63            cli_enabled: true,
64            cli: CliReport::new(false, false, CliVerbosity::Normal),
65            bencher_enabled: false,
66            bencher: BencherReport,
67        };
68
69        let mut criterion = Criterion {
70            config: BenchmarkConfig::default(),
71            filter: BenchmarkFilter::AcceptAll,
72            report: reports,
73            baseline_directory: "base".to_owned(),
74            baseline: Baseline::Save,
75            load_baseline: None,
76            output_directory: default_output_directory().clone(),
77            all_directories: HashSet::new(),
78            all_titles: HashSet::new(),
79            measurement: WallTime,
80            profiler: Box::new(RefCell::new(ExternalProfiler)),
81            connection: cargo_criterion_connection().as_ref().map(|mtx| mtx.lock().unwrap()),
82            mode: Mode::Benchmark,
83        };
84
85        if criterion.connection.is_some() {
86            // disable all reports when connected to cargo-criterion; it will do the reporting.
87            criterion.report.cli_enabled = false;
88            criterion.report.bencher_enabled = false;
89        }
90        criterion
91    }
92}
93
94impl<M: Measurement> Criterion<M> {
95    /// Changes the measurement for the benchmarks run with this runner. See the
96    /// Measurement trait for more details
97    pub fn with_measurement<M2: Measurement>(self, m: M2) -> Criterion<M2> {
98        // Can't use struct update syntax here because they're technically different types.
99        Criterion {
100            config: self.config,
101            filter: self.filter,
102            report: self.report,
103            baseline_directory: self.baseline_directory,
104            baseline: self.baseline,
105            load_baseline: self.load_baseline,
106            output_directory: self.output_directory,
107            all_directories: self.all_directories,
108            all_titles: self.all_titles,
109            measurement: m,
110            profiler: self.profiler,
111            connection: self.connection,
112            mode: self.mode,
113        }
114    }
115
116    #[must_use]
117    /// Changes the internal profiler for benchmarks run with this runner. See
118    /// the Profiler trait for more details.
119    pub fn with_profiler<P: Profiler + 'static>(self, p: P) -> Criterion<M> {
120        Criterion { profiler: Box::new(RefCell::new(p)), ..self }
121    }
122
123    #[must_use]
124    /// Changes the default size of the sample for benchmarks run with this runner.
125    ///
126    /// A bigger sample should yield more accurate results if paired with a sufficiently large
127    /// measurement time.
128    ///
129    /// Sample size must be at least 10.
130    ///
131    /// # Panics
132    ///
133    /// Panics if n < 10
134    pub fn sample_size(mut self, n: usize) -> Criterion<M> {
135        assert!(n >= 10);
136
137        self.config.sample_size = n;
138        self
139    }
140
141    #[must_use]
142    /// Changes the default warm up time for benchmarks run with this runner.
143    ///
144    /// # Panics
145    ///
146    /// Panics if the input duration is zero
147    pub fn warm_up_time(mut self, dur: Duration) -> Criterion<M> {
148        assert!(dur.as_nanos() > 0);
149
150        self.config.warm_up_time = dur;
151        self
152    }
153
154    #[must_use]
155    /// Changes the default measurement time for benchmarks run with this runner.
156    ///
157    /// With a longer time, the measurement will become more resilient to transitory peak loads
158    /// caused by external programs
159    ///
160    /// **Note**: If the measurement time is too "low", Criterion will automatically increase it
161    ///
162    /// # Panics
163    ///
164    /// Panics if the input duration in zero
165    pub fn measurement_time(mut self, dur: Duration) -> Criterion<M> {
166        assert!(dur.as_nanos() > 0);
167
168        self.config.measurement_time = dur;
169        self
170    }
171
172    #[must_use]
173    /// Changes the default number of resamples for benchmarks run with this runner.
174    ///
175    /// Number of resamples to use for the
176    /// [bootstrap](http://en.wikipedia.org/wiki/Bootstrapping_(statistics)#Case_resampling)
177    ///
178    /// A larger number of resamples reduces the random sampling errors, which are inherent to the
179    /// bootstrap method, but also increases the analysis time
180    ///
181    /// # Panics
182    ///
183    /// Panics if the number of resamples is set to zero
184    pub fn nresamples(mut self, n: usize) -> Criterion<M> {
185        assert!(n > 0);
186        if n <= 1000 {
187            eprintln!("\nWarning: It is not recommended to reduce nresamples below 1000.");
188        }
189
190        self.config.nresamples = n;
191        self
192    }
193
194    #[must_use]
195    /// Changes the default noise threshold for benchmarks run with this runner. The noise threshold
196    /// is used to filter out small changes in performance, even if they are statistically
197    /// significant. Sometimes benchmarking the same code twice will result in small but
198    /// statistically significant differences solely because of noise. This provides a way to filter
199    /// out some of these false positives at the cost of making it harder to detect small changes
200    /// to the true performance of the benchmark.
201    ///
202    /// The default is 0.01, meaning that changes smaller than 1% will be ignored.
203    ///
204    /// # Panics
205    ///
206    /// Panics if the threshold is set to a negative value
207    pub fn noise_threshold(mut self, threshold: f64) -> Criterion<M> {
208        assert!(threshold >= 0.0);
209
210        self.config.noise_threshold = threshold;
211        self
212    }
213
214    #[must_use]
215    /// Changes the default confidence level for benchmarks run with this runner. The confidence
216    /// level is the desired probability that the true runtime lies within the estimated
217    /// [confidence interval](https://en.wikipedia.org/wiki/Confidence_interval). The default is
218    /// 0.95, meaning that the confidence interval should capture the true value 95% of the time.
219    ///
220    /// # Panics
221    ///
222    /// Panics if the confidence level is set to a value outside the `(0, 1)` range
223    pub fn confidence_level(mut self, cl: f64) -> Criterion<M> {
224        assert!(cl > 0.0 && cl < 1.0);
225        if cl < 0.5 {
226            eprintln!("\nWarning: It is not recommended to reduce confidence level below 0.5.");
227        }
228
229        self.config.confidence_level = cl;
230        self
231    }
232
233    #[must_use]
234    /// Changes the default [significance level](https://en.wikipedia.org/wiki/Statistical_significance)
235    /// for benchmarks run with this runner. This is used to perform a
236    /// [hypothesis test](https://en.wikipedia.org/wiki/Statistical_hypothesis_testing) to see if
237    /// the measurements from this run are different from the measured performance of the last run.
238    /// The significance level is the desired probability that two measurements of identical code
239    /// will be considered 'different' due to noise in the measurements. The default value is 0.05,
240    /// meaning that approximately 5% of identical benchmarks will register as different due to
241    /// noise.
242    ///
243    /// This presents a trade-off. By setting the significance level closer to 0.0, you can increase
244    /// the statistical robustness against noise, but it also weakens Criterion.rs' ability to
245    /// detect small but real changes in the performance. By setting the significance level
246    /// closer to 1.0, Criterion.rs will be more able to detect small true changes, but will also
247    /// report more spurious differences.
248    ///
249    /// See also the noise threshold setting.
250    ///
251    /// # Panics
252    ///
253    /// Panics if the significance level is set to a value outside the `(0, 1)` range
254    pub fn significance_level(mut self, sl: f64) -> Criterion<M> {
255        assert!(sl > 0.0 && sl < 1.0);
256
257        self.config.significance_level = sl;
258        self
259    }
260
261    #[must_use]
262    /// Names an explicit baseline and enables overwriting the previous results.
263    pub fn save_baseline(mut self, baseline: String) -> Criterion<M> {
264        self.baseline_directory = baseline;
265        self.baseline = Baseline::Save;
266        self
267    }
268
269    #[must_use]
270    /// Names an explicit baseline and disables overwriting the previous results.
271    pub fn retain_baseline(mut self, baseline: String, strict: bool) -> Criterion<M> {
272        self.baseline_directory = baseline;
273        self.baseline = if strict { Baseline::CompareStrict } else { Baseline::CompareLenient };
274        self
275    }
276
277    /// Only run benchmarks specified by the given filter.
278    ///
279    /// This overwrites [`Self::with_filter`].
280    pub fn with_benchmark_filter(mut self, filter: BenchmarkFilter) -> Criterion<M> {
281        self.filter = filter;
282
283        self
284    }
285
286    #[must_use]
287    /// Override whether the CLI output will be colored or not. Usually you would use the `--color`
288    /// CLI argument, but this is available for programmmatic use as well.
289    pub fn with_output_color(mut self, enabled: bool) -> Criterion<M> {
290        self.report.cli.enable_text_coloring = enabled;
291        self
292    }
293
294    /// Set the output directory (currently for testing only)
295    #[must_use]
296    #[doc(hidden)]
297    pub fn output_directory(mut self, path: &Path) -> Criterion<M> {
298        path.clone_into(&mut self.output_directory);
299
300        self
301    }
302
303    /// Set the profile time (currently for testing only)
304    #[must_use]
305    #[doc(hidden)]
306    pub fn profile_time(mut self, profile_time: Option<Duration>) -> Criterion<M> {
307        match profile_time {
308            Some(time) => self.mode = Mode::Profile(time),
309            None => self.mode = Mode::Benchmark,
310        }
311
312        self
313    }
314
315    /// Generate the final summary at the end of a run.
316    #[doc(hidden)]
317    pub fn final_summary(&self) {
318        if !self.mode.is_benchmark() {
319            return;
320        }
321
322        let report_context = ReportContext { output_directory: self.output_directory.clone() };
323
324        self.report.final_summary(&report_context);
325    }
326
327    /// Configure this criterion struct based on the command-line arguments to
328    /// this process.
329    #[must_use]
330    pub fn configure_from_args(mut self) -> Criterion<M> {
331        use crate::cli::*;
332
333        let opts = options(&self.config).fallback_to_usage().run();
334
335        if self.connection.is_some() {
336            if opts.color != Color::Auto {
337                eprintln!(
338                    "Warning: --color will be ignored when running with cargo-criterion. Use `cargo criterion --color {} -- <args>` instead.",
339                    opts.color
340                );
341            }
342
343            // What about quiet?
344            if opts.verbosity == CliVerbosity::Verbose {
345                eprintln!(
346                    "Warning: --verbose will be ignored when running with cargo-criterion. Use `cargo criterion --output-format verbose -- <args>` instead."
347                );
348            }
349            if opts.output_format != OutputFormat::Criterion {
350                eprintln!(
351                    "Warning: --output-format will be ignored when running with cargo-criterion. Use `cargo criterion --output-format {} -- <args>` instead.",
352                    opts.output_format
353                );
354            }
355
356            // TODO - currently baseline stuff seem to be partially coupled with operations
357            if matches!(opts.op, Op::LoadBaseline(_)) {
358                eprintln!("Error: baselines are not supported when running with cargo-criterion.");
359                std::process::exit(1);
360            }
361        }
362
363        self.mode = match opts.op {
364            Op::List => Mode::List(opts.format),
365            Op::LoadBaseline(ref dir) => {
366                self.load_baseline = Some(dir.to_owned());
367                Mode::Benchmark
368            }
369            Op::ProfileTime(t) => Mode::Profile(t),
370            Op::Test => Mode::Test,
371            Op::Benchmark => Mode::Benchmark,
372        };
373
374        // This is kind of a hack, but disable the connection to the runner if we're not benchmarking.
375        if !self.mode.is_benchmark() {
376            self.connection = None;
377        }
378
379        let filter = if opts.ignored {
380            // --ignored overwrites any name-based filters passed in.
381            BenchmarkFilter::RejectAll
382        } else if let Some(filter) = opts.filter.as_ref() {
383            let filter = filter.to_owned();
384            if opts.exact {
385                BenchmarkFilter::Exact(filter)
386            } else {
387                BenchmarkFilter::Substring(filter)
388            }
389        } else {
390            BenchmarkFilter::AcceptAll
391        };
392        self = self.with_benchmark_filter(filter);
393
394        match opts.baseline {
395            Baseline_::Save(ref dir) => {
396                self.baseline = Baseline::Save;
397                dir.clone_into(&mut self.baseline_directory)
398            }
399            Baseline_::Discard => {
400                self.baseline = Baseline::Discard;
401            }
402            Baseline_::Lenient(ref dir) => {
403                self.baseline = Baseline::CompareLenient;
404                dir.clone_into(&mut self.baseline_directory);
405            }
406            Baseline_::Strict(ref dir) => {
407                self.baseline = Baseline::CompareStrict;
408                dir.clone_into(&mut self.baseline_directory);
409            }
410        }
411
412        if self.connection.is_some() {
413            // disable all reports when connected to cargo-criterion; it will do the reporting.
414            self.report.cli_enabled = false;
415            self.report.bencher_enabled = false;
416        } else {
417            match opts.output_format {
418                OutputFormat::Bencher => {
419                    self.report.bencher_enabled = true;
420                    self.report.cli_enabled = false;
421                }
422                OutputFormat::Criterion => {
423                    let verbosity = opts.verbosity;
424                    let verbose = opts.verbosity == CliVerbosity::Verbose;
425
426                    let stdout_isatty = stdout().is_terminal();
427                    let mut enable_text_overwrite = stdout_isatty && !verbose && !debug_enabled();
428                    let enable_text_coloring;
429                    match opts.color {
430                        Color::Auto => enable_text_coloring = stdout_isatty,
431                        Color::Always => enable_text_coloring = true,
432                        Color::Never => {
433                            enable_text_coloring = false;
434                            enable_text_overwrite = false;
435                        }
436                    }
437                    self.report.bencher_enabled = false;
438                    self.report.cli_enabled = true;
439                    self.report.cli = CliReport::new(
440                        enable_text_overwrite,
441                        enable_text_coloring,
442                        verbosity.into(),
443                    );
444                }
445            }
446        }
447
448        match opts.sample {
449            Sample::Specific(size) => {
450                assert!(size >= 10);
451                self.config.sample_size = size;
452            }
453            Sample::Quick => self.config.quick_mode = true,
454        }
455
456        assert!(opts.warm_up_time > Duration::from_secs(0));
457        self.config.warm_up_time = opts.warm_up_time;
458
459        assert!(opts.measurement_time > Duration::from_secs(0));
460        self.config.measurement_time = opts.measurement_time;
461
462        assert!(opts.nresamples > 0);
463        self.config.nresamples = opts.nresamples;
464
465        assert!(opts.noise_threshold > 0.0);
466        self.config.noise_threshold = opts.noise_threshold;
467
468        assert!(opts.confidence_level > 0.0 && opts.confidence_level < 1.0);
469        self.config.confidence_level = opts.confidence_level;
470
471        assert!(opts.significance_level > 0.0 && opts.significance_level < 1.0);
472        self.config.significance_level = opts.significance_level;
473
474        self
475    }
476
477    pub(crate) fn filter_matches(&self, id: &str) -> bool {
478        match &self.filter {
479            BenchmarkFilter::AcceptAll => true,
480            BenchmarkFilter::Exact(exact) => id == exact,
481            BenchmarkFilter::RejectAll => false,
482            BenchmarkFilter::Substring(s) => id.contains(s),
483        }
484    }
485
486    /// Returns true iff we should save the benchmark results in
487    /// json files on the local disk.
488    pub(crate) fn should_save_baseline(&self) -> bool {
489        self.connection.is_none()
490            && self.load_baseline.is_none()
491            && !matches!(self.baseline, Baseline::Discard)
492    }
493
494    /// Return a benchmark group. All benchmarks performed using a benchmark group will be
495    /// grouped together in the final report.
496    ///
497    /// # Examples:
498    ///
499    /// ```rust
500    /// use self::criterion::*;
501    ///
502    /// fn bench_simple(c: &mut Criterion) {
503    ///     let mut group = c.benchmark_group("My Group");
504    ///
505    ///     // Now we can perform benchmarks with this group
506    ///     group.bench_function("Bench 1", |b| b.iter(|| 1 ));
507    ///     group.bench_function("Bench 2", |b| b.iter(|| 2 ));
508    ///
509    ///     group.finish();
510    /// }
511    /// criterion_group!(benches, bench_simple);
512    /// criterion_main!(benches);
513    /// ```
514    /// # Panics:
515    /// Panics if the group name is empty
516    pub fn benchmark_group<S: Into<String>>(&mut self, group_name: S) -> BenchmarkGroup<'_, M> {
517        let group_name = group_name.into();
518        assert!(!group_name.is_empty(), "Group name must not be empty.");
519
520        if let Some(conn) = &self.connection {
521            conn.send(&OutgoingMessage::BeginningBenchmarkGroup { group: &group_name }).unwrap();
522        }
523
524        BenchmarkGroup::new(self, group_name)
525    }
526}
527impl<M> Criterion<M>
528where
529    M: Measurement + 'static,
530{
531    /// Benchmarks a function. For comparing multiple functions, see `benchmark_group`.
532    ///
533    /// # Example
534    ///
535    /// ```rust
536    /// use self::criterion::*;
537    ///
538    /// fn bench(c: &mut Criterion) {
539    ///     // Setup (construct data, allocate memory, etc)
540    ///     c.bench_function(
541    ///         "function_name",
542    ///         |b| b.iter(|| {
543    ///             // Code to benchmark goes here
544    ///         }),
545    ///     );
546    /// }
547    ///
548    /// criterion_group!(benches, bench);
549    /// criterion_main!(benches);
550    /// ```
551    pub fn bench_function<F>(&mut self, id: &str, f: F) -> &mut Criterion<M>
552    where
553        F: FnMut(&mut Bencher<'_, M>),
554    {
555        self.benchmark_group(id).bench_function(BenchmarkId::no_function(), f);
556        self
557    }
558
559    /// Benchmarks a function with an input. For comparing multiple functions or multiple inputs,
560    /// see `benchmark_group`.
561    ///
562    /// # Example
563    ///
564    /// ```rust
565    /// use self::criterion::*;
566    ///
567    /// fn bench(c: &mut Criterion) {
568    ///     // Setup (construct data, allocate memory, etc)
569    ///     let input = 5u64;
570    ///     c.bench_with_input(
571    ///         BenchmarkId::new("function_name", input), &input,
572    ///         |b, i| b.iter(|| {
573    ///             // Code to benchmark using input `i` goes here
574    ///         }),
575    ///     );
576    /// }
577    ///
578    /// criterion_group!(benches, bench);
579    /// criterion_main!(benches);
580    /// ```
581    pub fn bench_with_input<F, I>(&mut self, id: BenchmarkId, input: &I, f: F) -> &mut Criterion<M>
582    where
583        F: FnMut(&mut Bencher<'_, M>, &I),
584    {
585        // It's possible to use BenchmarkId::from_parameter to create a benchmark ID with no function
586        // name. That's intended for use with BenchmarkGroups where the function name isn't necessary,
587        // but here it is.
588        let group_name = id.function_name.expect(
589            "Cannot use BenchmarkId::from_parameter with Criterion::bench_with_input. \
590                 Consider using a BenchmarkGroup or BenchmarkId::new instead.",
591        );
592        // Guaranteed safe because external callers can't create benchmark IDs without a parameter
593        let parameter = id.parameter.unwrap();
594        self.benchmark_group(group_name).bench_with_input(
595            BenchmarkId::no_function_with_input(parameter),
596            input,
597            f,
598        );
599        self
600    }
601}