iai_callgrind/lib.rs
1//! Iai-Callgrind is a benchmarking framework/harness which primarily uses [Valgrind's
2//! Callgrind](https://valgrind.org/docs/manual/cl-manual.html) and the other Valgrind tools to
3//! provide extremely accurate and consistent measurements of Rust code, making it perfectly suited
4//! to run in environments like a CI.
5//!
6//! # Table of contents
7//! - [Characteristics](#characteristics)
8//! - [Benchmarking](#benchmarking)
9//! - [Library Benchmarks](#library-benchmarks)
10//! - [Important Default Behavior](#important-default-behavior)
11//! - [Quickstart](#quickstart-library-benchmarks)
12//! - [Configuration](#configuration-library-benchmarks)
13//! - [Binary Benchmarks](#binary-benchmarks)
14//! - [Important default behavior](#important-default-behavior)
15//! - [Quickstart](#quickstart-binary-benchmarks)
16//! - [Configuration](#configuration-binary-benchmarks)
17//! - [Valgrind Tools](#valgrind-tools)
18//! - [Client Requests](#client-requests)
19//! - [Flamegraphs](#flamegraphs)
20//!
21//! ## Characteristics
22//! - __Precision__: High-precision measurements allow you to reliably detect very small
23//! optimizations of your code
24//! - __Consistency__: Iai-Callgrind can take accurate measurements even in virtualized CI
25//! environments
26//! - __Performance__: Since Iai-Callgrind only executes a benchmark once, it is typically a lot
27//! faster to run than benchmarks measuring the execution and wall-clock time
28//! - __Regression__: Iai-Callgrind reports the difference between benchmark runs to make it easy to
29//! spot detailed performance regressions and improvements.
30//! - __CPU and Cache Profiling__: Iai-Callgrind generates a Callgrind profile of your code while
31//! benchmarking, so you can use Callgrind-compatible tools like
32//! [callgrind_annotate](https://valgrind.org/docs/manual/cl-manual.html#cl-manual.callgrind_annotate-options)
33//! or the visualizer [kcachegrind](https://kcachegrind.github.io/html/Home.html) to analyze the
34//! results in detail.
35//! - __Memory Profiling__: You can run other Valgrind tools like [DHAT: a dynamic heap analysis tool](https://valgrind.org/docs/manual/dh-manual.html)
36//! and [Massif: a heap profiler](https://valgrind.org/docs/manual/ms-manual.html) with the
37//! Iai-Callgrind benchmarking framework. Their profiles are stored next to the callgrind profiles
38//! and are ready to be examined with analyzing tools like `dh_view.html`, `ms_print` and others.
39//! - __Visualization__: Iai-Callgrind is capable of creating regular and differential flamegraphs
40//! from the Callgrind output format.
41//! - __Valgrind Client Requests__: Support of zero overhead [Valgrind Client Requests](https://valgrind.org/docs/manual/manual-core-adv.html#manual-core-adv.clientreq)
42//! (compared to native valgrind client requests overhead) on many targets
43//! - __Stable-compatible__: Benchmark your code without installing nightly Rust
44//!
45//! ## Benchmarking
46//!
47//! `iai-callgrind` can be divided into two sections: Benchmarking the library and
48//! its public functions and benchmarking of the binaries of a crate.
49//!
50//! ### Library Benchmarks
51//!
52//! Use this scheme of the [`main`] macro if you want to benchmark functions of your
53//! crate's library.
54//!
55//! #### Important default behavior
56//!
57//! The environment variables are cleared before running a library benchmark. See also the
58//! Configuration section below if you need to change that behavior.
59//!
60//! #### Quickstart (#library-benchmarks)
61//!
62//! ```rust
63//! use iai_callgrind::{
64//! library_benchmark, library_benchmark_group, main, LibraryBenchmarkConfig
65//! };
66//! use std::hint::black_box;
67//!
68//! // Our function we want to test. Just assume this is a public function in your
69//! // library.
70//! fn bubble_sort(mut array: Vec<i32>) -> Vec<i32> {
71//! for i in 0..array.len() {
72//! for j in 0..array.len() - i - 1 {
73//! if array[j + 1] < array[j] {
74//! array.swap(j, j + 1);
75//! }
76//! }
77//! }
78//! array
79//! }
80//!
81//! // This function is used to create a worst case array we want to sort with our
82//! // implementation of bubble sort
83//! fn setup_worst_case_array(start: i32) -> Vec<i32> {
84//! if start.is_negative() {
85//! (start..0).rev().collect()
86//! } else {
87//! (0..start).rev().collect()
88//! }
89//! }
90//!
91//! // The #[library_benchmark] attribute let's you define a benchmark function which you
92//! // can later use in the `library_benchmark_groups!` macro.
93//! #[library_benchmark]
94//! fn bench_bubble_sort_empty() -> Vec<i32> {
95//! // The `black_box` is needed to tell the compiler to not optimize what's inside
96//! // black_box or else the benchmarks might return inaccurate results.
97//! black_box(bubble_sort(black_box(vec![])))
98//! }
99//!
100//! // This benchmark uses the `bench` attribute to setup benchmarks with different
101//! // setups. The big advantage is, that the setup costs and event counts aren't
102//! // attributed to the benchmark (and opposed to the old api we don't have to deal with
103//! // callgrind arguments, toggles, ...)
104//! #[library_benchmark]
105//! #[bench::empty(vec![])]
106//! #[bench::worst_case_6(vec![6, 5, 4, 3, 2, 1])]
107//! // Function calls are fine too
108//! #[bench::worst_case_4000(setup_worst_case_array(4000))]
109//! // The argument of the benchmark function defines the type of the argument from the
110//! // `bench` cases.
111//! fn bench_bubble_sort(array: Vec<i32>) -> Vec<i32> {
112//! // Note `array` is not put in a `black_box` because that's already done for you.
113//! black_box(bubble_sort(array))
114//! }
115//!
116//! // You can use the `benches` attribute to specify multiple benchmark runs in one go. You can
117//! // specify multiple `benches` attributes or mix the `benches` attribute with `bench`
118//! // attributes.
119//! #[library_benchmark]
120//! // This is the simple form. Each `,`-separated element is another benchmark run and is
121//! // passed to the benchmarking function as parameter. So, this is the same as specifying
122//! // two `#[bench]` attributes #[bench::multiple_0(vec![1])] and #[bench::multiple_1(vec![5])].
123//! #[benches::multiple(vec![1], vec![5])]
124//! // You can also use the `args` argument to achieve the same. Using `args` is necessary if you
125//! // also want to specify a `config` or `setup` function.
126//! #[benches::with_args(args = [vec![1], vec![5]], config = LibraryBenchmarkConfig::default())]
127//! // Usually, each element in `args` is passed directly to the benchmarking function. You can
128//! // instead reroute them to a `setup` function. In that case the (black boxed) return value of
129//! // the setup function is passed as parameter to the benchmarking function.
130//! #[benches::with_setup(args = [1, 5], setup = setup_worst_case_array)]
131//! fn bench_bubble_sort_with_benches_attribute(input: Vec<i32>) -> Vec<i32> {
132//! black_box(bubble_sort(input))
133//! }
134//!
135//! // A benchmarking function with multiple parameters requires the elements to be specified as
136//! // tuples.
137//! #[library_benchmark]
138//! #[benches::multiple((1, 2), (3, 4))]
139//! fn bench_bubble_sort_with_multiple_parameters(a: i32, b: i32) -> Vec<i32> {
140//! black_box(bubble_sort(black_box(vec![a, b])))
141//! }
142//!
143//! // A group in which we can put all our benchmark functions
144//! library_benchmark_group!(
145//! name = bubble_sort_group;
146//! benchmarks =
147//! bench_bubble_sort_empty,
148//! bench_bubble_sort,
149//! bench_bubble_sort_with_benches_attribute,
150//! bench_bubble_sort_with_multiple_parameters
151//! );
152//!
153//! # fn main() {
154//! // Finally, the mandatory main! macro which collects all `library_benchmark_groups`.
155//! // The main! macro creates a benchmarking harness and runs all the benchmarks defined
156//! // in the groups and benches.
157//! main!(library_benchmark_groups = bubble_sort_group);
158//! # }
159//! ```
160//!
161//! Note that it is important to annotate the benchmark functions with
162//! [`#[library_benchmark]`](crate::library_benchmark).
163//!
164//! ### Configuration (#library-benchmarks)
165//!
166//! It's possible to configure some of the behavior of `iai-callgrind`. See the docs of
167//! [`crate::LibraryBenchmarkConfig`] for more details. Configure library benchmarks at
168//! top-level with the [`crate::main`] macro, at group level within the
169//! [`crate::library_benchmark_group`], at [`crate::library_benchmark`] level
170//!
171//! and at `bench` level:
172//!
173//! ```rust
174//! # use iai_callgrind::{LibraryBenchmarkConfig, library_benchmark};
175//! #[library_benchmark]
176//! #[bench::some_id(args = (1, 2), config = LibraryBenchmarkConfig::default())]
177//! // ...
178//! # fn some_func(first: u8, second: u8) -> u8 {
179//! # first + second
180//! # }
181//! # fn main() {}
182//! ```
183//!
184//! The config at `bench` level overwrites the config at `library_benchmark` level. The config at
185//! `library_benchmark` level overwrites the config at group level and so on. Note that
186//! configuration values like `envs` are additive and don't overwrite configuration values of higher
187//! levels.
188//!
189//! See also the docs of [`crate::library_benchmark_group`]. The
190//! [README](https://github.com/iai-callgrind/iai-callgrind) of this crate includes more explanations,
191//! common recipes and some examples.
192//!
193//! ### Binary Benchmarks
194//!
195//! Use this scheme of the [`main`] macro to benchmark one or more binaries of your crate (or any
196//! other executable). The documentation for setting up binary benchmarks with the
197//! `binary_benchmark_group` macro can be found in the docs of [`crate::binary_benchmark_group`].
198//!
199//! #### Important default behavior
200//!
201//! Per default, all binary benchmarks run with the environment variables cleared. See also
202//! [`crate::BinaryBenchmarkConfig::env_clear`] for how to change this behavior.
203//!
204//! #### Quickstart (#binary-benchmarks)
205//!
206//! There are two apis to set up binary benchmarks, but we only describe the high-level api using
207//! the [`#[binary_benchmark]`](`crate::binary_benchmark`) attribute here. See the docs of
208//! [`binary_benchmark_group`] for more details about the low level api. The `#[binary_benchmark]`
209//! attribute works almost the same as the `#[library_benchmark]` attribute. You will find the same
210//! parameters `setup`, `teardown`, `config`, etc. in `#[binary_benchmark]` as in
211//! `#[library_benchmark]` and the inner attributes `#[bench]`, `#[benches]`. But, there are also
212//! substantial (differences)[#differences-to-library-benchmarks].
213//!
214//! Suppose your crate's binaries are named `my-foo` and `my-bar`
215//!
216//! ```rust
217//! # macro_rules! env { ($m:tt) => {{ "/some/path" }} }
218//! use iai_callgrind::{
219//! main, binary_benchmark, binary_benchmark_group,
220//! };
221//! use std::path::PathBuf;
222//! use std::ffi::OsString;
223//!
224//! // In binary benchmarks there's no need to return a value from the setup function
225//! fn my_setup() {
226//! println!("Put code in here which will be run before the actual command");
227//! }
228//!
229//! #[binary_benchmark]
230//! #[bench::just_a_fixture("benches/fixture.json")]
231//! // First big difference to library benchmarks! `my_setup` is not evaluated right away and the
232//! // return value of `my_setup` is not used as input for the `bench_foo` function. Instead,
233//! // `my_setup()` is executed before the execution of the `Command`.
234//! #[bench::with_other_fixture_and_setup(args = ("benches/other_fixture.txt"), setup = my_setup())]
235//! #[benches::multiple("benches/fix_1.txt", "benches/fix_2.txt")]
236//! // All functions annotated with `#[binary_benchmark]` need to return a `iai_callgrind::Command`
237//! fn bench_foo(path: &str) -> iai_callgrind::Command {
238//! let path: PathBuf = path.into();
239//! // We can put any code in here which is needed to configure the `Command`.
240//! let stdout = if path.extension().unwrap() == "txt" {
241//! iai_callgrind::Stdio::Inherit
242//! } else {
243//! iai_callgrind::Stdio::File(path.with_extension("out"))
244//! };
245//! // Configure the command depending on the arguments passed to this function and the code
246//! // above
247//! iai_callgrind::Command::new(env!("CARGO_BIN_EXE_my-foo"))
248//! .stdout(stdout)
249//! .arg(path)
250//! .build()
251//! }
252//!
253//! #[binary_benchmark]
254//! // The id just needs to be unique within the same `#[binary_benchmark]`, so we can reuse
255//! // `just_a_fixture` if we want to
256//! #[bench::just_a_fixture("benches/fixture.json")]
257//! // The function can be generic, too.
258//! fn bench_bar<P>(path: P) -> iai_callgrind::Command
259//! where
260//! P: Into<OsString>
261//! {
262//! iai_callgrind::Command::new(env!("CARGO_BIN_EXE_my-bar"))
263//! .arg(path)
264//! .build()
265//! }
266//!
267//! // Put all `#[binary_benchmark]` annotated functions you want to benchmark into the `benchmarks`
268//! // section of this macro
269//! binary_benchmark_group!(
270//! name = my_group;
271//! benchmarks = bench_foo, bench_bar
272//! );
273//!
274//! # fn main() {
275//! // As last step specify all groups you want to benchmark in the macro argument
276//! // `binary_benchmark_groups`. As the binary_benchmark_group macro, the main macro is
277//! // always needed and finally expands to a benchmarking harness
278//! main!(binary_benchmark_groups = my_group);
279//! # }
280//! ```
281//!
282//! #### Differences to library benchmarks
283//!
284//! As opposed to library benchmarks the function annotated with the `binary_benchmark` attribute
285//! always returns a `iai_callgrind::Command`. More specifically, this function is not a benchmark
286//! function, since we don't benchmark functions anymore but [`Command`]s instead which are the
287//! return value of the [`#[binary_benchmark]`](crate::binary_benchmark) function.
288//!
289//! This change has far-reaching consequences but also simplifies things. Since the function itself
290//! is not benchmarked you can put any code into this function, and it does not influence the
291//! benchmark of the [`Command`] itself. However, this function is run only once to __build__ the
292//! [`Command`] and when we collect all commands and its configuration to be able to actually
293//! __execute__ the [`Command`]s later in the benchmark runner. Whichever code you want to run
294//! before the [`Command`] is executed has to go into the `setup`. And, into `teardown` for code you
295//! want to run after the execution of the [`Command`].
296//!
297//! In library benchmarks the `setup` argument only takes a path to a function, more specifically
298//! the function pointer. In binary benchmarks however, the `setup` (and `teardown`) parameters of
299//! the [`#[binary_benchmark]`](crate::binary_benchmark), `#[bench]` and `#[benches]` attribute
300//! take expressions which includes function calls for example `setup = my_setup()`. Only in the
301//! special case that the expression is a function pointer, we pass the `args` of the `#[bench]` and
302//! `#[benches]` attributes into the `setup`, `teardown` __and__ the function itself. Also, these
303//! expressions are not executed right away but in a separate process before the [`Command`] is
304//! executed. This is the main reason why the return value of the setup function is simply ignored
305//! and not routed back into the benchmark function as it would be the case in library benchmarks.
306//! We simply don't need to. To sum it up, put code you need to configure the [`Command`] into the
307//! annotated function and code you need to execute before (after) the execution of the [`Command`]
308//! into the `setup` (`teardown`).
309//!
310//! #### Configuration (#binary-benchmarks)
311//!
312//! Much like the configuration of library benchmarks (See above) it's possible to configure binary
313//! benchmarks at top-level in the `main!` macro and at group-level in the
314//! `binary_benchmark_groups!` with the `config = ...;` argument. In contrast to library benchmarks,
315//! binary benchmarks can be also configured at a lower and last level in [`Command`] directly.
316//!
317//! For further details see the section about binary benchmarks of the [`crate::main`] docs the docs
318//! of [`crate::binary_benchmark_group`] and [`Command`]. Also, the
319//! [README](https://github.com/iai-callgrind/iai-callgrind) of this crate includes some
320//! introductory documentation with additional examples.
321//!
322//! ## Valgrind Tools
323//!
324//! In addition to the default benchmarks, you can use the Iai-Callgrind framework to run other
325//! Valgrind profiling [`Tool`]s like `DHAT`, `Massif` and the experimental `BBV` but also
326//! `Memcheck`, `Helgrind` and `DRD` if you need to check memory and thread safety of benchmarked
327//! code. See also the [Valgrind User Manual](https://valgrind.org/docs/manual/manual.html) for
328//! details and command line arguments. The additional tools can be specified in
329//! [`LibraryBenchmarkConfig`], [`BinaryBenchmarkConfig`]. For example to run `DHAT` for
330//! all library benchmarks:
331//! ```rust
332//! # use iai_callgrind::{library_benchmark, library_benchmark_group};
333//! use iai_callgrind::{main, LibraryBenchmarkConfig, Tool, ValgrindTool};
334//! # #[library_benchmark]
335//! # fn some_func() {}
336//! # library_benchmark_group!(name = some_group; benchmarks = some_func);
337//! # fn main() {
338//! main!(
339//! config = LibraryBenchmarkConfig::default()
340//! .tool(Tool::new(ValgrindTool::DHAT));
341//! library_benchmark_groups = some_group
342//! );
343//! # }
344//! ```
345//!
346//! ## Client requests
347//!
348//! `iai-callgrind` supports valgrind client requests. See the documentation of the
349//! [`client_requests`] module.
350//!
351//! ## Flamegraphs
352//!
353//! Flamegraphs are opt-in and can be created if you pass a [`FlamegraphConfig`] to the
354//! [`BinaryBenchmarkConfig::flamegraph`] or [`LibraryBenchmarkConfig::flamegraph`]. Callgrind
355//! flamegraphs are meant as a complement to valgrind's visualization tools `callgrind_annotate` and
356//! `kcachegrind`.
357//!
358//! Callgrind flamegraphs show the inclusive costs for functions and a specific event type, much
359//! like `callgrind_annotate` does but in a nicer (and clickable) way. Especially, differential
360//! flamegraphs facilitate a deeper understanding of code sections which cause a bottleneck or a
361//! performance regressions etc.
362//!
363//! The produced flamegraph svg files are located next to the respective callgrind output file in
364//! the `target/iai` directory.
365
366#![cfg_attr(docsrs, feature(doc_auto_cfg))]
367#![doc(test(attr(warn(unused))))]
368#![doc(test(attr(allow(unused_extern_crates))))]
369#![warn(missing_docs)]
370#![warn(clippy::pedantic)]
371#![warn(clippy::default_numeric_fallback)]
372#![warn(clippy::else_if_without_else)]
373#![warn(clippy::fn_to_numeric_cast_any)]
374#![warn(clippy::get_unwrap)]
375#![warn(clippy::if_then_some_else_none)]
376#![warn(clippy::mixed_read_write_in_expression)]
377#![warn(clippy::partial_pub_fields)]
378#![warn(clippy::rest_pat_in_fully_bound_structs)]
379#![warn(clippy::str_to_string)]
380#![warn(clippy::string_to_string)]
381#![warn(clippy::todo)]
382#![warn(clippy::try_err)]
383#![warn(clippy::undocumented_unsafe_blocks)]
384#![warn(clippy::unneeded_field_pattern)]
385#![allow(clippy::must_use_candidate)]
386#![allow(clippy::return_self_not_must_use)]
387#![allow(clippy::enum_glob_use)]
388#![allow(clippy::module_name_repetitions)]
389
390#[cfg(feature = "default")]
391mod bin_bench;
392#[cfg(feature = "client_requests_defs")]
393pub mod client_requests;
394#[cfg(feature = "default")]
395mod common;
396#[cfg(feature = "default")]
397#[doc(hidden)]
398pub mod error;
399#[cfg(feature = "default")]
400#[doc(hidden)]
401pub mod internal;
402#[cfg(feature = "default")]
403mod lib_bench;
404#[cfg(feature = "default")]
405mod macros;
406#[cfg(feature = "default")]
407pub use bin_bench::{
408 Bench, BenchmarkId, BinaryBenchmark, BinaryBenchmarkConfig, BinaryBenchmarkGroup, Command,
409 Delay, ExitWith, Sandbox,
410};
411#[cfg(feature = "default")]
412pub use bincode;
413#[cfg(feature = "default")]
414pub use common::{black_box, FlamegraphConfig, OutputFormat, RegressionConfig, Tool};
415#[cfg(feature = "client_requests_defs")]
416pub use cty;
417#[cfg(feature = "default")]
418pub use iai_callgrind_macros::{binary_benchmark, library_benchmark};
419#[cfg(feature = "default")]
420pub use iai_callgrind_runner::api::{
421 DelayKind, Direction, EntryPoint, EventKind, FlamegraphKind, Pipe, Stdin, Stdio, ValgrindTool,
422};
423#[cfg(feature = "default")]
424pub use lib_bench::LibraryBenchmarkConfig;