iroh_net/discovery/pkarr/
dht.rs

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
//! Pkarr based node discovery for iroh-net, supporting both relay servers and the DHT.
//!
//! This module contains pkarr-based node discovery for iroh-net which can use both pkarr
//! relay servers as well as the Mainline DHT directly.  See the [pkarr module] for an
//! overview of pkarr.
//!
//! [pkarr module]: super
use std::{
    sync::{Arc, Mutex},
    time::Duration,
};

use futures_lite::{stream::Boxed, StreamExt};
use genawaiter::sync::{Co, Gen};
use pkarr::{
    PkarrClient, PkarrClientAsync, PkarrRelayClient, PkarrRelayClientAsync, PublicKey,
    RelaySettings, SignedPacket,
};
use tokio_util::task::AbortOnDropHandle;
use url::Url;

use crate::{
    discovery::{
        pkarr::{DEFAULT_PKARR_TTL, N0_DNS_PKARR_RELAY_PROD},
        Discovery, DiscoveryItem,
    },
    dns::node_info::NodeInfo,
    key::SecretKey,
    AddrInfo, Endpoint, NodeId,
};

/// Republish delay for the DHT.
///
/// This is only for when the info does not change.  If the info changes, it will be
/// published immediately.
const REPUBLISH_DELAY: Duration = Duration::from_secs(60 * 60);
/// Initial publish delay.
///
/// This is to avoid spamming the DHT when there are frequent network changes at startup.
const INITIAL_PUBLISH_DELAY: Duration = Duration::from_millis(500);

/// Pkarr Mainline DHT and relay server node discovery.
///
/// It stores node addresses in DNS records, signed by the node's private key, and publishes
/// them to the BitTorrent Mainline DHT.  See the [pkarr module] for more details.
///
/// This implements the [`Discovery`] trait to be used as a node discovery service which can
/// be used as both a publisher and resolver.  Calling [`DhtDiscovery::publish`] will start
/// a background task that periodically publishes the node address.
///
/// [pkarr module]: super
#[derive(Debug, Clone)]
pub struct DhtDiscovery(Arc<Inner>);

impl Default for DhtDiscovery {
    fn default() -> Self {
        Self::builder().build().expect("valid builder")
    }
}

#[derive(derive_more::Debug)]
struct Inner {
    /// Pkarr client for interacting with the DHT.
    pkarr: PkarrClientAsync,
    /// Pkarr client for interacting with a pkarr relay
    #[debug("Option<PkarrRelayClientAsync>")]
    pkarr_relay: Option<PkarrRelayClientAsync>,
    /// The background task that periodically publishes the node address.
    ///
    /// Due to [`AbortOnDropHandle`], this will be aborted when the discovery is dropped.
    task: Mutex<Option<AbortOnDropHandle<()>>>,
    /// Optional keypair for signing the DNS packets.
    ///
    /// If this is None, the node will not publish its address to the DHT.
    secret_key: Option<SecretKey>,
    /// Optional pkarr relay URL to use.
    relay_url: Option<Url>,
    /// Whether to publish to the mainline DHT.
    dht: bool,
    /// Time-to-live value for the DNS packets.
    ttl: u32,
    /// True to include the direct addresses in the DNS packet.
    include_direct_addresses: bool,
    /// Initial delay before the first publish.
    initial_publish_delay: Duration,
    /// Republish delay for the DHT.
    republish_delay: Duration,
}

/// Builder for [`DhtDiscovery`].
///
/// By default, publishing to the DHT is enabled, and relay publishing is disabled.
#[derive(Debug)]
pub struct Builder {
    client: Option<PkarrClient>,
    secret_key: Option<SecretKey>,
    ttl: Option<u32>,
    pkarr_relay: Option<Url>,
    dht: bool,
    include_direct_addresses: bool,
    initial_publish_delay: Duration,
    republish_delay: Duration,
}

impl Default for Builder {
    fn default() -> Self {
        Self {
            client: None,
            secret_key: None,
            ttl: None,
            pkarr_relay: None,
            dht: true,
            include_direct_addresses: false,
            initial_publish_delay: INITIAL_PUBLISH_DELAY,
            republish_delay: REPUBLISH_DELAY,
        }
    }
}

impl Builder {
    /// Explicitly sets the pkarr client to use.
    pub fn client(mut self, client: PkarrClient) -> Self {
        self.client = Some(client);
        self
    }

    /// Sets the secret key to use for signing the DNS packets.
    ///
    /// Without a secret key, the node will not publish its address to the DHT.
    pub fn secret_key(mut self, secret_key: SecretKey) -> Self {
        self.secret_key = Some(secret_key);
        self
    }

    /// Sets the time-to-live value for the DNS packets.
    pub fn ttl(mut self, ttl: u32) -> Self {
        self.ttl = Some(ttl);
        self
    }

    /// Sets the pkarr relay URL to use.
    pub fn pkarr_relay(mut self, pkarr_relay: Url) -> Self {
        self.pkarr_relay = Some(pkarr_relay);
        self
    }

    /// Uses the default [number 0] pkarr relay URL.
    ///
    /// [number 0]: https://n0.computer
    pub fn n0_dns_pkarr_relay(mut self) -> Self {
        self.pkarr_relay = Some(N0_DNS_PKARR_RELAY_PROD.parse().expect("valid URL"));
        self
    }

    /// Sets whether to publish to the Mainline DHT.
    pub fn dht(mut self, dht: bool) -> Self {
        self.dht = dht;
        self
    }

    /// Sets whether to include the direct addresses in the DNS packet.
    pub fn include_direct_addresses(mut self, include_direct_addresses: bool) -> Self {
        self.include_direct_addresses = include_direct_addresses;
        self
    }

    /// Sets the initial delay before the first publish.
    pub fn initial_publish_delay(mut self, initial_publish_delay: Duration) -> Self {
        self.initial_publish_delay = initial_publish_delay;
        self
    }

    /// Sets the republish delay for the DHT.
    pub fn republish_delay(mut self, republish_delay: Duration) -> Self {
        self.republish_delay = republish_delay;
        self
    }

    /// Builds the discovery mechanism.
    pub fn build(self) -> anyhow::Result<DhtDiscovery> {
        let pkarr = self
            .client
            .unwrap_or_else(|| PkarrClient::new(Default::default()).unwrap())
            .as_async();
        let ttl = self.ttl.unwrap_or(DEFAULT_PKARR_TTL);
        let relay_url = self.pkarr_relay;
        let dht = self.dht;
        let include_direct_addresses = self.include_direct_addresses;
        anyhow::ensure!(
            dht || relay_url.is_some(),
            "at least one of DHT or relay must be enabled"
        );

        let pkarr_relay = match relay_url.clone() {
            Some(url) => Some(
                PkarrRelayClient::new(RelaySettings {
                    relays: vec![url.to_string()],
                    ..RelaySettings::default()
                })?
                .as_async(),
            ),
            None => None,
        };

        Ok(DhtDiscovery(Arc::new(Inner {
            pkarr,
            pkarr_relay,
            ttl,
            relay_url,
            dht,
            include_direct_addresses,
            secret_key: self.secret_key,
            initial_publish_delay: self.initial_publish_delay,
            republish_delay: self.republish_delay,
            task: Default::default(),
        })))
    }
}

impl DhtDiscovery {
    /// Creates a new builder for [`DhtDiscovery`].
    pub fn builder() -> Builder {
        Builder::default()
    }

    /// Periodically publishes the node address to the DHT and/or relay.
    async fn publish_loop(self, keypair: SecretKey, signed_packet: SignedPacket) {
        let this = self;
        let z32 = pkarr::PublicKey::try_from(keypair.public().as_bytes())
            .expect("valid public key")
            .to_z32();
        // initial delay. If the task gets aborted before this delay is over,
        // we have not published anything to the DHT yet.
        tokio::time::sleep(this.0.initial_publish_delay).await;
        loop {
            // publish to the DHT if enabled
            let dht_publish = async {
                if this.0.dht {
                    let res = this.0.pkarr.publish(&signed_packet).await;
                    match res {
                        Ok(()) => {
                            tracing::debug!("pkarr publish success. published under {z32}",);
                        }
                        Err(e) => {
                            // we could do a smaller delay here, but in general DHT publish
                            // not working is due to a network issue, and if the network changes
                            // the task will be restarted anyway.
                            //
                            // Being unable to publish to the DHT is something that is expected
                            // to happen from time to time, so this does not warrant a error log.
                            tracing::warn!("pkarr publish error: {}", e);
                        }
                    }
                }
            };
            // publish to the relay if enabled
            let relay_publish = async {
                if let Some(relay) = this.0.pkarr_relay.as_ref() {
                    tracing::info!(
                        "publishing to relay: {}",
                        this.0.relay_url.as_ref().unwrap().to_string()
                    );
                    match relay.publish(&signed_packet).await {
                        Ok(_) => {
                            tracing::debug!("pkarr publish to relay success");
                        }
                        Err(e) => {
                            tracing::warn!("pkarr publish to relay error: {}", e);
                        }
                    }
                }
            };
            // do both at the same time
            tokio::join!(relay_publish, dht_publish);
            tokio::time::sleep(this.0.republish_delay).await;
        }
    }

    async fn resolve_relay(
        &self,
        pkarr_public_key: PublicKey,
        co: &Co<anyhow::Result<DiscoveryItem>>,
    ) {
        let Some(relay) = &self.0.pkarr_relay else {
            return;
        };
        let url = self.0.relay_url.as_ref().unwrap();
        tracing::info!("resolving {} from relay {}", pkarr_public_key.to_z32(), url);
        let response = relay.resolve(&pkarr_public_key).await;
        match response {
            Ok(Some(signed_packet)) => {
                if let Ok(node_info) = NodeInfo::from_pkarr_signed_packet(&signed_packet) {
                    let node_id = node_info.node_id;
                    let addr_info = node_info.into();
                    tracing::info!("discovered node info from relay {:?}", addr_info);
                    co.yield_(Ok(DiscoveryItem {
                        node_id,
                        provenance: "relay",
                        last_updated: None,
                        addr_info,
                    }))
                    .await;
                } else {
                    tracing::debug!("failed to parse signed packet as node info");
                }
            }
            Ok(None) => {
                tracing::debug!("no signed packet found in relay");
            }
            Err(e) => {
                tracing::debug!("failed to get signed packet from relay: {}", e);
                co.yield_(Err(e.into())).await;
            }
        }
    }

    /// Resolves a node id from the DHT.
    async fn resolve_dht(
        &self,
        pkarr_public_key: PublicKey,
        co: &Co<anyhow::Result<DiscoveryItem>>,
    ) {
        if !self.0.dht {
            return;
        };
        tracing::info!("resolving {} from DHT", pkarr_public_key.to_z32());
        let response = match self.0.pkarr.resolve(&pkarr_public_key).await {
            Ok(r) => r,
            Err(e) => {
                co.yield_(Err(e.into())).await;
                return;
            }
        };
        let Some(signed_packet) = response else {
            tracing::debug!("no signed packet found in DHT");
            return;
        };
        if let Ok(node_info) = NodeInfo::from_pkarr_signed_packet(&signed_packet) {
            let node_id = node_info.node_id;
            let addr_info = node_info.into();
            tracing::info!("discovered node info from DHT {:?}", addr_info);
            co.yield_(Ok(DiscoveryItem {
                node_id,
                provenance: "mainline",
                last_updated: None,
                addr_info,
            }))
            .await;
        } else {
            tracing::debug!("failed to parse signed packet as node info");
        }
    }

    async fn gen_resolve(self, node_id: NodeId, co: Co<anyhow::Result<DiscoveryItem>>) {
        let pkarr_public_key =
            pkarr::PublicKey::try_from(node_id.as_bytes()).expect("valid public key");
        tokio::join!(
            self.resolve_dht(pkarr_public_key.clone(), &co),
            self.resolve_relay(pkarr_public_key, &co)
        );
    }
}

impl Discovery for DhtDiscovery {
    fn publish(&self, info: &AddrInfo) {
        let Some(keypair) = &self.0.secret_key else {
            tracing::debug!("no keypair set, not publishing");
            return;
        };
        tracing::debug!("publishing {:?}", info);
        let info = NodeInfo {
            node_id: keypair.public(),
            relay_url: info.relay_url.clone().map(Url::from),
            direct_addresses: if self.0.include_direct_addresses {
                info.direct_addresses.clone()
            } else {
                Default::default()
            },
        };
        let Ok(signed_packet) = info.to_pkarr_signed_packet(keypair, self.0.ttl) else {
            tracing::warn!("failed to create signed packet");
            return;
        };
        let this = self.clone();
        let curr = tokio::spawn(this.publish_loop(keypair.clone(), signed_packet));
        let mut task = self.0.task.lock().unwrap();
        *task = Some(AbortOnDropHandle::new(curr));
    }

    fn resolve(
        &self,
        _endpoint: Endpoint,
        node_id: NodeId,
    ) -> Option<Boxed<anyhow::Result<DiscoveryItem>>> {
        let this = self.clone();
        let pkarr_public_key =
            pkarr::PublicKey::try_from(node_id.as_bytes()).expect("valid public key");
        tracing::info!("resolving {} as {}", node_id, pkarr_public_key.to_z32());
        Some(Gen::new(|co| async move { this.gen_resolve(node_id, co).await }).boxed())
    }
}

#[cfg(test)]
mod tests {
    use std::collections::BTreeSet;

    use iroh_base::node_addr::RelayUrl;
    use mainline::dht::DhtSettings;
    use testresult::TestResult;

    use super::*;

    #[tokio::test]
    #[ignore = "flaky"]
    async fn dht_discovery_smoke() -> TestResult {
        let _logging_guard = iroh_test::logging::setup();
        let ep = crate::Endpoint::builder().bind().await?;
        let secret = ep.secret_key().clone();
        let testnet = mainline::dht::Testnet::new(2);
        let settings = pkarr::Settings {
            dht: DhtSettings {
                bootstrap: Some(testnet.bootstrap.clone()),
                ..Default::default()
            },
            ..Default::default()
        };
        let client = PkarrClient::new(settings)?;
        let discovery = DhtDiscovery::builder()
            .secret_key(secret.clone())
            .initial_publish_delay(Duration::ZERO)
            .client(client)
            .build()?;
        let relay_url: RelayUrl = Url::parse("https://example.com")?.into();

        discovery.publish(&AddrInfo {
            relay_url: Some(relay_url.clone()),
            direct_addresses: Default::default(),
        });

        // publish is fire and forget, so we have no way to wait until it is done.
        tokio::time::timeout(Duration::from_secs(30), async move {
            loop {
                tokio::time::sleep(Duration::from_millis(200)).await;
                let mut found_relay_urls = BTreeSet::new();
                let items = discovery
                    .resolve(ep.clone(), secret.public())
                    .unwrap()
                    .collect::<Vec<_>>()
                    .await;
                for item in items.into_iter().flatten() {
                    if let Some(url) = item.addr_info.relay_url {
                        found_relay_urls.insert(url);
                    }
                }
                if found_relay_urls.contains(&relay_url) {
                    break;
                }
            }
        })
        .await
        .expect("timeout, relay_url not found on DHT");
        Ok(())
    }
}