ckb_metrics/
lib.rs

1#![allow(missing_docs)]
2//! A lightweight metrics facade used in CKB.
3//!
4//! The `ckb-metrics` crate is a set of tools for metrics.
5//! The crate [`ckb-metrics-service`] is the runtime which handles the metrics data in CKB.
6//!
7//! [`ckb-metrics-service`]: ../ckb_metrics_service/index.html
8
9use prometheus_static_metric::make_static_metric;
10use std::cell::Cell;
11
12pub use prometheus::*;
13
14pub fn gather() -> Vec<prometheus::proto::MetricFamily> {
15    prometheus::gather()
16}
17
18make_static_metric! {
19    // Struct for the CKB sys mem process statistics type label
20    struct CkbSysMemProcessStatistics: IntGauge{
21        "type" => {
22            rss,
23            vms,
24        },
25    }
26
27    // Struct for the CKB sys mem jemalloc statistics type label
28    struct CkbSysMemJemallocStatistics: IntGauge{
29        "type" => {
30            allocated,
31            resident,
32            active,
33            mapped,
34            retained,
35            metadata,
36        },
37    }
38
39    // Struct for CKB tx-pool entry status statistics type label
40    struct CkbTxPoolEntryStatistics: IntGauge{
41        "type" => {
42            pending,
43            gap,
44            proposed,
45        },
46    }
47
48    struct CkbHeaderMapMemoryHitMissStatistics: IntCounter{
49        "type" => {
50            hit,
51            miss,
52        },
53    }
54}
55
56pub struct Metrics {
57    /// Gauge metric for CKB chain tip header number
58    pub ckb_chain_tip: IntGauge,
59    /// CKB chain unverified tip header number
60    pub ckb_chain_unverified_tip: IntGauge,
61    /// ckb_chain asynchronous_process duration (seconds)
62    pub ckb_chain_async_process_block_duration: Histogram,
63    /// ckb_chain consume_orphan thread's process_lonely_block duration (seconds)
64    pub ckb_chain_process_lonely_block_duration: Histogram,
65    /// ckb_chain consume_unverified thread's consume_unverified_block duration (seconds)
66    pub ckb_chain_consume_unverified_block_duration: Histogram,
67    /// ckb_chain consume_unverified thread's consume_unverified_block waiting for block duration (seconds)
68    pub ckb_chain_consume_unverified_block_waiting_block_duration: Histogram,
69    /// ckb_chain execute_callback duration (seconds)
70    pub ckb_chain_execute_callback_duration: Histogram,
71    /// ckb_chain orphan blocks count
72    pub ckb_chain_orphan_count: IntGauge,
73    pub ckb_chain_lonely_block_ch_len: IntGauge,
74    pub ckb_chain_unverified_block_ch_len: IntGauge,
75    pub ckb_chain_preload_unverified_block_ch_len: IntGauge,
76    pub ckb_chain_load_full_unverified_block: Histogram,
77    /// ckb_sync_msg_process duration (seconds)
78    pub ckb_sync_msg_process_duration: HistogramVec,
79    /// ckb_sync_block_fetch duraiton (seconds)
80    pub ckb_sync_block_fetch_duration: Histogram,
81    // ckb_header_map_limit_memory duration (seconds)
82    pub ckb_header_map_limit_memory_duration: Histogram,
83    // ckb_header_map_limit_memory operation duration (seconds)
84    pub ckb_header_map_ops_duration: HistogramVec,
85    // how many headers in the HeaderMap's memory map?
86    pub ckb_header_map_memory_count: IntGauge,
87    // how many times the HeaderMap's memory map is hit?
88    pub ckb_header_map_memory_hit_miss_count: CkbHeaderMapMemoryHitMissStatistics,
89    /// Gauge for tracking the size of all frozen data
90    pub ckb_freezer_size: IntGauge,
91    /// Counter for measuring the effective amount of data read
92    pub ckb_freezer_read: IntCounter,
93    /// Gauge for tracking the number of ckb_freezer
94    pub ckb_freezer_number: IntGauge,
95    /// Counter for relay transaction short id collide
96    pub ckb_relay_transaction_short_id_collide: IntCounter,
97    /// Histogram for relay compact block verify duration
98    pub ckb_relay_cb_verify_duration: Histogram,
99    /// Histogram for block process duration
100    pub ckb_block_process_duration: Histogram,
101    /// Histogram for sync process tx in txpool
102    pub ckb_tx_pool_sync_process: Histogram,
103    /// Histogram for async process tx in txpool
104    pub ckb_tx_pool_async_process: Histogram,
105    /// Counter for relay compact block transaction count
106    pub ckb_relay_cb_transaction_count: IntCounter,
107    /// Counter for relay compact block reconstruct ok
108    pub ckb_relay_cb_reconstruct_ok: IntCounter,
109    /// Counter for relay compact block fresh transaction count
110    pub ckb_relay_cb_fresh_tx_cnt: IntCounter,
111    /// Counter for relay compact block reconstruct fail
112    pub ckb_relay_cb_reconstruct_fail: IntCounter,
113    // Gauge for CKB shared best number
114    pub ckb_shared_best_number: IntGauge,
115    // GaugeVec for CKB system memory process statistics
116    pub ckb_sys_mem_process: CkbSysMemProcessStatistics,
117    // GaugeVec for CKB system memory jemalloc statistics
118    pub ckb_sys_mem_jemalloc: CkbSysMemJemallocStatistics,
119    // GaugeVec for CKB tx-pool tx entry status statistics
120    pub ckb_tx_pool_entry: CkbTxPoolEntryStatistics,
121    /// Histogram for CKB network connections
122    pub ckb_message_bytes: HistogramVec,
123    /// Gauge for CKB rocksdb statistics
124    pub ckb_sys_mem_rocksdb: IntGaugeVec,
125    /// Counter for CKB network ban peers
126    pub ckb_network_ban_peer: IntCounter,
127    pub ckb_inflight_blocks_count: IntGauge,
128    pub ckb_inflight_timeout_count: IntCounter,
129}
130
131static METRICS: std::sync::LazyLock<Metrics> = std::sync::LazyLock::new(|| {
132    Metrics {
133    ckb_chain_tip: register_int_gauge!("ckb_chain_tip", "The CKB chain tip header number").unwrap(),
134    ckb_chain_unverified_tip: register_int_gauge!(
135        "ckb_chain_unverified_tip",
136        "The CKB chain unverified tip header number"
137    )
138            .unwrap(),
139    ckb_chain_async_process_block_duration: register_histogram!(
140        "ckb_chain_async_process_block_duration",
141        "The CKB chain asynchronous_process_block duration (seconds)"
142    )
143            .unwrap(),
144    ckb_chain_process_lonely_block_duration: register_histogram!(
145        "ckb_chain_process_lonely_block_duration",
146        "The CKB chain consume_orphan thread's process_lonely_block duration (seconds)"
147    )
148            .unwrap(),
149    ckb_chain_consume_unverified_block_duration: register_histogram!(
150        "ckb_chain_consume_unverified_block_duration",
151        "The CKB chain consume_unverified thread's consume_unverified_block duration (seconds)"
152    )
153            .unwrap(),
154    ckb_chain_consume_unverified_block_waiting_block_duration: register_histogram!(
155        "ckb_chain_consume_unverified_block_waiting_block_duration",
156        "The CKB chain consume_unverified thread's consume_unverified_block waiting for block duration (seconds)"
157    ).unwrap(),
158    ckb_chain_execute_callback_duration: register_histogram!(
159            "ckb_chain_execute_callback_duration",
160            "The CKB chain execute_callback duration (seconds)"
161        ).unwrap(),
162    ckb_chain_orphan_count: register_int_gauge!(
163            "ckb_chain_orphan_count",
164            "The CKB chain orphan blocks count",
165        ).unwrap(),
166    ckb_chain_lonely_block_ch_len: register_int_gauge!(
167            "ckb_chain_lonely_block_ch_len",
168            "The CKB chain lonely block channel length",
169        ).unwrap(),
170    ckb_chain_unverified_block_ch_len: register_int_gauge!(
171            "ckb_chain_unverified_block_ch_len",
172            "The CKB chain unverified block channel length",
173        ).unwrap(),
174    ckb_chain_preload_unverified_block_ch_len: register_int_gauge!(
175            "ckb_chain_preload_unverified_block_ch_len",
176            "The CKB chain fill unverified block channel length",
177        ).unwrap(),
178    ckb_chain_load_full_unverified_block: register_histogram!(
179            "ckb_chain_load_full_unverified_block",
180            "The CKB chain load_full_unverified_block duration (seconds)"
181        ).unwrap(),
182    ckb_sync_msg_process_duration: register_histogram_vec!(
183            "ckb_sync_msg_process_duration",
184            "The CKB sync message process duration (seconds)",
185            &["msg_type"],
186        ).unwrap(),
187    ckb_sync_block_fetch_duration: register_histogram!(
188            "ckb_sync_block_fetch_duration",
189            "The CKB sync block fetch duration (seconds)"
190        ).unwrap(),
191    ckb_header_map_limit_memory_duration: register_histogram!(
192            "ckb_header_map_limit_memory_duration",
193            "The CKB header map limit_memory job duration (seconds)"
194        ).unwrap(),
195    ckb_header_map_ops_duration: register_histogram_vec!(
196            "ckb_header_map_ops_duration",
197            "The CKB header map operation duration (seconds)",
198            &["operation"],
199        ).unwrap(),
200    ckb_header_map_memory_count: register_int_gauge!(
201            "ckb_header_map_memory_count",
202            "The CKB HeaderMap memory count",
203        ).unwrap(),
204    ckb_header_map_memory_hit_miss_count: CkbHeaderMapMemoryHitMissStatistics::from(
205            &register_int_counter_vec!(
206            "ckb_header_map_memory_hit_miss_count",
207            "The CKB HeaderMap memory hit count",
208            &["type"]
209        )
210                .unwrap()
211        ),
212    ckb_freezer_size: register_int_gauge!("ckb_freezer_size", "The CKB freezer size").unwrap(),
213    ckb_freezer_read: register_int_counter!("ckb_freezer_read", "The CKB freezer read").unwrap(),
214    ckb_freezer_number: register_int_gauge!("ckb_freezer_number", "The CKB freezer number").unwrap(),
215    ckb_relay_transaction_short_id_collide: register_int_counter!(
216        "ckb_relay_transaction_short_id_collide",
217        "The CKB relay transaction short id collide"
218    )
219            .unwrap(),
220    ckb_relay_cb_verify_duration: register_histogram!(
221        "ckb_relay_cb_verify_duration",
222        "The CKB relay compact block verify duration"
223    )
224            .unwrap(),
225    ckb_block_process_duration: register_histogram!(
226        "ckb_block_process_duration",
227        "The CKB block process duration"
228    )
229    .unwrap(),
230    ckb_tx_pool_sync_process: register_histogram!(
231        "ckb_tx_pool_sync_process",
232        "The CKB tx_pool sync process tx duration"
233    )
234    .unwrap(),
235    ckb_tx_pool_async_process: register_histogram!(
236        "ckb_tx_pool_async_process",
237        "The CKB tx_pool async process tx duration"
238    )
239    .unwrap(),
240    ckb_relay_cb_transaction_count: register_int_counter!(
241        "ckb_relay_cb_transaction_count",
242        "The CKB relay compact block transaction count"
243    ).unwrap(),
244    ckb_relay_cb_reconstruct_ok: register_int_counter!(
245        "ckb_relay_cb_reconstruct_ok",
246        "The CKB relay compact block reconstruct ok count"
247    ).unwrap(),
248    ckb_relay_cb_fresh_tx_cnt: register_int_counter!(
249        "ckb_relay_cb_fresh_tx_cnt",
250        "The CKB relay compact block fresh tx count"
251    ).unwrap(),
252    ckb_relay_cb_reconstruct_fail: register_int_counter!(
253        "ckb_relay_cb_reconstruct_fail",
254        "The CKB relay compact block reconstruct fail count"
255    )
256            .unwrap(),
257    ckb_shared_best_number: register_int_gauge!(
258        "ckb_shared_best_number",
259        "The CKB shared best header number"
260    )
261            .unwrap(),
262    ckb_sys_mem_process: CkbSysMemProcessStatistics::from(
263            &register_int_gauge_vec!(
264            "ckb_sys_mem_process",
265            "CKB system memory for process statistics",
266            &["type"]
267        )
268                .unwrap(),
269        ),
270    ckb_sys_mem_jemalloc: CkbSysMemJemallocStatistics::from(
271            &register_int_gauge_vec!(
272            "ckb_sys_mem_jemalloc",
273            "CKB system memory for jemalloc statistics",
274            &["type"]
275        )
276                .unwrap(),
277        ),
278    ckb_tx_pool_entry: CkbTxPoolEntryStatistics::from(
279            &register_int_gauge_vec!(
280            "ckb_tx_pool_entry",
281            "CKB tx-pool entry status statistics",
282            &["type"]
283        )
284                .unwrap(),
285        ),
286    ckb_message_bytes: register_histogram_vec!(
287        "ckb_message_bytes",
288        "The CKB message bytes",
289        &["direction", "protocol_name", "msg_item_name", "status_code"],
290        vec![
291            500.0, 1000.0, 2000.0, 5000.0, 10000.0, 20000.0, 50000.0, 100000.0, 200000.0, 500000.0
292        ]
293    )
294            .unwrap(),
295    ckb_sys_mem_rocksdb: register_int_gauge_vec!(
296        "ckb_sys_mem_rocksdb",
297        "CKB system memory for rocksdb statistics",
298        &["type", "cf"]
299    )
300            .unwrap(),
301    ckb_network_ban_peer: register_int_counter!(
302        "ckb_network_ban_peer",
303        "CKB network baned peer count"
304    )
305            .unwrap(),
306    ckb_inflight_blocks_count: register_int_gauge!(
307            "ckb_inflight_blocks_count",
308            "The CKB inflight blocks count"
309    )
310            .unwrap(),
311    ckb_inflight_timeout_count: register_int_counter!(
312            "ckb_inflight_timeout_count",
313            "The CKB inflight timeout count"
314    ).unwrap(),
315    }
316});
317
318/// Indicate whether the metrics service is enabled.
319/// This value will set by ckb-metrics-service
320pub static METRICS_SERVICE_ENABLED: std::sync::OnceLock<bool> = std::sync::OnceLock::new();
321
322thread_local! {
323    static ENABLE_COLLECT_METRICS: Cell<Option<bool>>= Cell::default();
324}
325
326/// if metrics service is enabled, `handle()` will return `Some(&'static METRICS)`
327/// else will return `None`
328pub fn handle() -> Option<&'static Metrics> {
329    let enabled_collect_metrics: bool =
330        ENABLE_COLLECT_METRICS.with(
331            |enable_collect_metrics| match enable_collect_metrics.get() {
332                Some(enabled) => enabled,
333                None => match METRICS_SERVICE_ENABLED.get().copied() {
334                    Some(enabled) => {
335                        enable_collect_metrics.set(Some(enabled));
336                        enabled
337                    }
338                    None => false,
339                },
340            },
341        );
342
343    if enabled_collect_metrics {
344        Some(&METRICS)
345    } else {
346        None
347    }
348}
349
350#[cfg(test)]
351mod tests {
352    use crate::METRICS;
353    use std::ops::Deref;
354
355    // https://prometheus.io/docs/concepts/data_model/#metric-names-and-labels
356    // The Metric names may contain ASCII letters and digits, as well as underscores and colons. It must match the regex [a-zA-Z_:][a-zA-Z0-9_:]*.
357    // The Metric Label names may contain ASCII letters, numbers, as well as underscores. They must match the regex [a-zA-Z_][a-zA-Z0-9_]*. Label names beginning with __ are reserved for internal use.
358    // Test that all metrics have valid names and labels
359    // Just simple call .deref() method to make sure all metrics are initialized successfully
360    // If the metrics name or label is invalid, this test will panic
361    #[test]
362    fn test_metrics_name() {
363        let _ = METRICS.deref();
364    }
365
366    #[test]
367    #[should_panic]
368    fn test_bad_metrics_name() {
369        let res = prometheus::register_int_gauge!(
370            "ckb.chain.tip",
371            "a bad metric which contains '.' in its name"
372        );
373        assert!(res.is_err());
374        let res = prometheus::register_int_gauge!(
375            "ckb-chain-tip",
376            "a bad metric which contains '-' in its name"
377        );
378        assert!(res.is_err());
379    }
380}