criterion/benchmark_group.rs
1use std::time::Duration;
2
3use crate::{
4 Mode, SamplingMode, Throughput, analysis,
5 bencher::Bencher,
6 benchmark::PartialBenchmarkConfig,
7 connection::OutgoingMessage,
8 criterion::Criterion,
9 measurement::Measurement,
10 report::{BenchmarkId as InternalBenchmarkId, Report, ReportContext},
11 routine::{Function, Routine},
12};
13
14/// Structure used to group together a set of related benchmarks, along with custom configuration
15/// settings for groups of benchmarks. All benchmarks performed using a benchmark group will be
16/// grouped together in the final report.
17///
18/// # Examples:
19///
20/// ```no_run
21/// use self::criterion::*;
22/// use std::time::Duration;
23///
24/// fn bench_simple(c: &mut Criterion) {
25/// let mut group = c.benchmark_group("My Group");
26///
27/// // Now we can perform benchmarks with this group
28/// group.bench_function("Bench 1", |b| b.iter(|| 1 ));
29/// group.bench_function("Bench 2", |b| b.iter(|| 2 ));
30///
31/// // It's recommended to call group.finish() explicitly at the end, but if you don't it will
32/// // be called automatically when the group is dropped.
33/// group.finish();
34/// }
35///
36/// fn bench_nested(c: &mut Criterion) {
37/// let mut group = c.benchmark_group("My Second Group");
38/// // We can override the configuration on a per-group level
39/// group.measurement_time(Duration::from_secs(1));
40///
41/// // We can also use loops to define multiple benchmarks, even over multiple dimensions.
42/// for x in 0..3 {
43/// for y in 0..3 {
44/// let point = (x, y);
45/// let parameter_string = format!("{} * {}", x, y);
46/// group.bench_with_input(BenchmarkId::new("Multiply", parameter_string), &point,
47/// |b, (p_x, p_y)| b.iter(|| p_x * p_y));
48/// }
49/// }
50///
51/// group.finish();
52/// }
53///
54/// fn bench_throughput(c: &mut Criterion) {
55/// let mut group = c.benchmark_group("Summation");
56///
57/// for size in [1024, 2048, 4096].iter() {
58/// // Generate input of an appropriate size...
59/// let input = vec![1u64, *size];
60///
61/// // We can use the throughput function to tell Criterion.rs how large the input is
62/// // so it can calculate the overall throughput of the function. If we wanted, we could
63/// // even change the benchmark configuration for different inputs (eg. to reduce the
64/// // number of samples for extremely large and slow inputs) or even different functions.
65/// group.throughput(Throughput::Elements(*size as u64));
66///
67/// group.bench_with_input(BenchmarkId::new("sum", *size), &input,
68/// |b, i| b.iter(|| i.iter().sum::<u64>()));
69/// group.bench_with_input(BenchmarkId::new("fold", *size), &input,
70/// |b, i| b.iter(|| i.iter().fold(0u64, |a, b| a + b)));
71/// }
72///
73/// group.finish();
74/// }
75///
76/// criterion_group!(benches, bench_simple, bench_nested, bench_throughput);
77/// criterion_main!(benches);
78/// ```
79pub struct BenchmarkGroup<'a, M: Measurement> {
80 criterion: &'a mut Criterion<M>,
81 group_name: String,
82 all_ids: Vec<InternalBenchmarkId>,
83 any_matched: bool,
84 partial_config: PartialBenchmarkConfig,
85 throughput: Option<Throughput>,
86}
87impl<'a, M: Measurement> BenchmarkGroup<'a, M> {
88 /// Changes the size of the sample for this benchmark
89 ///
90 /// A bigger sample should yield more accurate results if paired with a sufficiently large
91 /// measurement time.
92 ///
93 /// Sample size must be at least 10.
94 ///
95 /// # Panics
96 ///
97 /// Panics if n < 10.
98 pub fn sample_size(&mut self, n: usize) -> &mut Self {
99 assert!(n >= 10);
100
101 self.partial_config.sample_size = Some(n);
102 self
103 }
104
105 /// Changes the warm up time for this benchmark
106 ///
107 /// # Panics
108 ///
109 /// Panics if the input duration is zero
110 pub fn warm_up_time(&mut self, dur: Duration) -> &mut Self {
111 assert!(dur.as_nanos() > 0);
112
113 self.partial_config.warm_up_time = Some(dur);
114 self
115 }
116
117 /// Changes the target measurement time for this benchmark group.
118 ///
119 /// Criterion will attempt to spent approximately this amount of time measuring each
120 /// benchmark on a best-effort basis. If it is not possible to perform the measurement in
121 /// the requested time (eg. because each iteration of the benchmark is long) then Criterion
122 /// will spend as long as is needed to collect the desired number of samples. With a longer
123 /// time, the measurement will become more resilient to interference from other programs.
124 ///
125 /// # Panics
126 ///
127 /// Panics if the input duration is zero
128 pub fn measurement_time(&mut self, dur: Duration) -> &mut Self {
129 assert!(dur.as_nanos() > 0);
130
131 self.partial_config.measurement_time = Some(dur);
132 self
133 }
134
135 /// Changes the number of resamples for this benchmark group
136 ///
137 /// Number of resamples to use for the
138 /// [bootstrap](http://en.wikipedia.org/wiki/Bootstrapping_(statistics)#Case_resampling)
139 ///
140 /// A larger number of resamples reduces the random sampling errors which are inherent to the
141 /// bootstrap method, but also increases the analysis time.
142 ///
143 /// # Panics
144 ///
145 /// Panics if the number of resamples is set to zero
146 pub fn nresamples(&mut self, n: usize) -> &mut Self {
147 assert!(n > 0);
148 if n <= 1000 {
149 eprintln!("\nWarning: It is not recommended to reduce nresamples below 1000.");
150 }
151
152 self.partial_config.nresamples = Some(n);
153 self
154 }
155
156 /// Changes the noise threshold for benchmarks in this group. The noise threshold
157 /// is used to filter out small changes in performance from one run to the next, even if they
158 /// are statistically significant. Sometimes benchmarking the same code twice will result in
159 /// small but statistically significant differences solely because of noise. This provides a way
160 /// to filter out some of these false positives at the cost of making it harder to detect small
161 /// changes to the true performance of the benchmark.
162 ///
163 /// The default is 0.01, meaning that changes smaller than 1% will be ignored.
164 ///
165 /// # Panics
166 ///
167 /// Panics if the threshold is set to a negative value
168 pub fn noise_threshold(&mut self, threshold: f64) -> &mut Self {
169 assert!(threshold >= 0.0);
170
171 self.partial_config.noise_threshold = Some(threshold);
172 self
173 }
174
175 /// Changes the confidence level for benchmarks in this group. The confidence
176 /// level is the desired probability that the true runtime lies within the estimated
177 /// [confidence interval](https://en.wikipedia.org/wiki/Confidence_interval). The default is
178 /// 0.95, meaning that the confidence interval should capture the true value 95% of the time.
179 ///
180 /// # Panics
181 ///
182 /// Panics if the confidence level is set to a value outside the `(0, 1)` range
183 pub fn confidence_level(&mut self, cl: f64) -> &mut Self {
184 assert!(cl > 0.0 && cl < 1.0);
185 if cl < 0.5 {
186 eprintln!("\nWarning: It is not recommended to reduce confidence level below 0.5.");
187 }
188
189 self.partial_config.confidence_level = Some(cl);
190 self
191 }
192
193 /// Changes the [significance level](https://en.wikipedia.org/wiki/Statistical_significance)
194 /// for benchmarks in this group. This is used to perform a
195 /// [hypothesis test](https://en.wikipedia.org/wiki/Statistical_hypothesis_testing) to see if
196 /// the measurements from this run are different from the measured performance of the last run.
197 /// The significance level is the desired probability that two measurements of identical code
198 /// will be considered 'different' due to noise in the measurements. The default value is 0.05,
199 /// meaning that approximately 5% of identical benchmarks will register as different due to
200 /// noise.
201 ///
202 /// This presents a trade-off. By setting the significance level closer to 0.0, you can increase
203 /// the statistical robustness against noise, but it also weakens Criterion.rs' ability to
204 /// detect small but real changes in the performance. By setting the significance level
205 /// closer to 1.0, Criterion.rs will be more able to detect small true changes, but will also
206 /// report more spurious differences.
207 ///
208 /// See also the noise threshold setting.
209 ///
210 /// # Panics
211 ///
212 /// Panics if the significance level is set to a value outside the `(0, 1)` range
213 pub fn significance_level(&mut self, sl: f64) -> &mut Self {
214 assert!(sl > 0.0 && sl < 1.0);
215
216 self.partial_config.significance_level = Some(sl);
217 self
218 }
219
220 /// Set the input size for this benchmark group. Used for reporting the
221 /// throughput.
222 pub fn throughput(&mut self, throughput: Throughput) -> &mut Self {
223 self.throughput = Some(throughput);
224 self
225 }
226
227 /// Set the sampling mode for this benchmark group.
228 pub fn sampling_mode(&mut self, new_mode: SamplingMode) -> &mut Self {
229 self.partial_config.sampling_mode = Some(new_mode);
230 self
231 }
232
233 pub(crate) fn new(criterion: &mut Criterion<M>, group_name: String) -> BenchmarkGroup<'_, M> {
234 BenchmarkGroup {
235 criterion,
236 group_name,
237 all_ids: vec![],
238 any_matched: false,
239 partial_config: PartialBenchmarkConfig::default(),
240 throughput: None,
241 }
242 }
243
244 /// Benchmark the given parameterless function inside this benchmark group.
245 pub fn bench_function<ID: IntoBenchmarkId, F>(&mut self, id: ID, mut f: F) -> &mut Self
246 where
247 F: FnMut(&mut Bencher<'_, M>),
248 {
249 self.run_bench(id.into_benchmark_id(), &(), |b, _| f(b));
250 self
251 }
252
253 /// Benchmark the given parameterized function inside this benchmark group.
254 pub fn bench_with_input<ID: IntoBenchmarkId, F, I>(
255 &mut self,
256 id: ID,
257 input: &I,
258 f: F,
259 ) -> &mut Self
260 where
261 F: FnMut(&mut Bencher<'_, M>, &I),
262 I: ?Sized,
263 {
264 self.run_bench(id.into_benchmark_id(), input, f);
265 self
266 }
267
268 fn run_bench<F, I>(&mut self, id: BenchmarkId, input: &I, f: F)
269 where
270 F: FnMut(&mut Bencher<'_, M>, &I),
271 I: ?Sized,
272 {
273 let config = self.partial_config.to_complete(&self.criterion.config);
274 let report_context =
275 ReportContext { output_directory: self.criterion.output_directory.clone() };
276
277 let mut id = InternalBenchmarkId::new(
278 self.group_name.clone(),
279 id.function_name,
280 id.parameter,
281 self.throughput.clone(),
282 );
283
284 assert!(
285 !self.all_ids.contains(&id),
286 "Benchmark IDs must be unique within a group. Encountered duplicated benchmark ID {}",
287 &id
288 );
289
290 id.ensure_directory_name_unique(&self.criterion.all_directories);
291 self.criterion.all_directories.insert(id.as_directory_name().to_owned());
292 id.ensure_title_unique(&self.criterion.all_titles);
293 self.criterion.all_titles.insert(id.as_title().to_owned());
294
295 let do_run = self.criterion.filter_matches(id.id());
296 self.any_matched |= do_run;
297 let mut func = Function::new(f);
298
299 match &self.criterion.mode {
300 Mode::Benchmark => {
301 if let Some(conn) = &self.criterion.connection {
302 if do_run {
303 conn.send(&OutgoingMessage::BeginningBenchmark { id: (&id).into() })
304 .unwrap();
305 } else {
306 conn.send(&OutgoingMessage::SkippingBenchmark { id: (&id).into() })
307 .unwrap();
308 }
309 }
310 if do_run {
311 analysis::common(
312 &id,
313 &mut func,
314 &config,
315 self.criterion,
316 &report_context,
317 input,
318 self.throughput.clone(),
319 );
320 }
321 }
322 Mode::List(_) => {
323 if do_run {
324 println!("{}: benchmark", id);
325 }
326 }
327 Mode::Test => {
328 if do_run {
329 // In test mode, run the benchmark exactly once, then exit.
330 self.criterion.report.test_start(&id, &report_context);
331 func.test(&self.criterion.measurement, input);
332 self.criterion.report.test_pass(&id, &report_context);
333 }
334 }
335 &Mode::Profile(duration) => {
336 if do_run {
337 func.profile(
338 &self.criterion.measurement,
339 &id,
340 self.criterion,
341 &report_context,
342 duration,
343 input,
344 );
345 }
346 }
347 }
348
349 self.all_ids.push(id);
350 }
351
352 /// Consume the benchmark group and generate the summary reports for the group.
353 ///
354 /// It is recommended to call this explicitly, but if you forget it will be called when the
355 /// group is dropped.
356 pub fn finish(self) {
357 ::std::mem::drop(self);
358 }
359}
360impl<'a, M: Measurement> Drop for BenchmarkGroup<'a, M> {
361 fn drop(&mut self) {
362 // I don't really like having a bunch of non-trivial code in drop, but this is the only way
363 // to really write linear types like this in Rust...
364 if let Some(conn) = &mut self.criterion.connection {
365 conn.send(&OutgoingMessage::FinishedBenchmarkGroup { group: &self.group_name })
366 .unwrap();
367
368 conn.serve_value_formatter(self.criterion.measurement.formatter()).unwrap();
369 }
370
371 if self.all_ids.len() > 1 && self.any_matched && self.criterion.mode.is_benchmark() {
372 let report_context =
373 ReportContext { output_directory: self.criterion.output_directory.clone() };
374
375 self.criterion.report.summarize(
376 &report_context,
377 &self.all_ids,
378 self.criterion.measurement.formatter(),
379 );
380 }
381 if self.any_matched && !self.criterion.mode.is_terse() {
382 self.criterion.report.group_separator();
383 }
384 }
385}
386
387/// Simple structure representing an ID for a benchmark. The ID must be unique within a benchmark
388/// group.
389#[derive(Clone, Eq, PartialEq, Hash)]
390pub struct BenchmarkId {
391 pub(crate) function_name: Option<String>,
392 pub(crate) parameter: Option<String>,
393}
394impl BenchmarkId {
395 /// Construct a new benchmark ID from a string function name and a parameter value.
396 ///
397 /// Note that the parameter value need not be the same as the parameter passed to your
398 /// actual benchmark. For instance, you might have a benchmark that takes a 1MB string as
399 /// input. It would be impractical to embed the whole string in the benchmark ID, so instead
400 /// your parameter value might be a descriptive string like "1MB Alphanumeric".
401 ///
402 /// # Examples
403 /// ```
404 /// # use criterion::{BenchmarkId, Criterion};
405 /// // A basic benchmark ID is typically constructed from a constant string and a simple
406 /// // parameter
407 /// let basic_id = BenchmarkId::new("my_id", 5);
408 ///
409 /// // The function name can be a string
410 /// let function_name = "test_string".to_string();
411 /// let string_id = BenchmarkId::new(function_name, 12);
412 ///
413 /// // Benchmark IDs are passed to benchmark groups:
414 /// let mut criterion = Criterion::default();
415 /// let mut group = criterion.benchmark_group("My Group");
416 /// // Generate a very large input
417 /// let input : String = ::std::iter::repeat("X").take(1024 * 1024).collect();
418 ///
419 /// // Note that we don't have to use the input as the parameter in the ID
420 /// group.bench_with_input(BenchmarkId::new("Test long string", "1MB X's"), &input, |b, i| {
421 /// b.iter(|| i.len())
422 /// });
423 /// ```
424 pub fn new<S: Into<String>, P: ::std::fmt::Display>(
425 function_name: S,
426 parameter: P,
427 ) -> BenchmarkId {
428 BenchmarkId {
429 function_name: Some(function_name.into()),
430 parameter: Some(format!("{}", parameter)),
431 }
432 }
433
434 /// Construct a new benchmark ID from just a parameter value. Use this when benchmarking a
435 /// single function with a variety of different inputs.
436 pub fn from_parameter<P: ::std::fmt::Display>(parameter: P) -> BenchmarkId {
437 BenchmarkId { function_name: None, parameter: Some(format!("{}", parameter)) }
438 }
439
440 pub(crate) fn no_function() -> BenchmarkId {
441 BenchmarkId { function_name: None, parameter: None }
442 }
443
444 pub(crate) fn no_function_with_input<P: ::std::fmt::Display>(parameter: P) -> BenchmarkId {
445 BenchmarkId { function_name: None, parameter: Some(format!("{}", parameter)) }
446 }
447}
448
449mod private {
450 pub trait Sealed {}
451 impl Sealed for super::BenchmarkId {}
452 impl<S: Into<String>> Sealed for S {}
453}
454
455/// Sealed trait which allows users to automatically convert strings to benchmark IDs.
456pub trait IntoBenchmarkId: private::Sealed {
457 fn into_benchmark_id(self) -> BenchmarkId;
458}
459impl IntoBenchmarkId for BenchmarkId {
460 fn into_benchmark_id(self) -> BenchmarkId {
461 self
462 }
463}
464impl<S: Into<String>> IntoBenchmarkId for S {
465 fn into_benchmark_id(self) -> BenchmarkId {
466 let function_name = self.into();
467 assert!(!function_name.is_empty(), "Function name must not be empty.");
468
469 BenchmarkId { function_name: Some(function_name), parameter: None }
470 }
471}