probe_rs/debug/
source_instructions.rs

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
use super::{
    canonical_path_eq,
    unit_info::{self, UnitInfo},
    ColumnType, DebugError, DebugInfo, GimliReader,
};
use gimli::LineSequence;
use serde::Serialize;
use std::{
    fmt::{Debug, Formatter},
    num::NonZeroU64,
    ops::Range,
};
use typed_path::{TypedPath, TypedPathBuf};

/// A verified breakpoint represents an instruction address, and the source location that it corresponds to it,
/// for locations in the target binary that comply with the DWARF standard terminology for "recommended breakpoint location".
/// This typically refers to instructions that are not part of the prologue or epilogue, and are part of the user code,
/// or are the final instruction in a sequence, before the processor begins the epilogue code.
/// The `probe-rs` debugger uses this information to identify valid halt locations for breakpoints and stepping.
#[derive(Clone, Debug)]
pub struct VerifiedBreakpoint {
    /// The address in target memory, where the breakpoint can be set.
    pub address: u64,
    /// If the breakpoint request was for a specific source location, then this field will contain the resolved source location.
    pub source_location: SourceLocation,
}

impl VerifiedBreakpoint {
    /// Return the first valid breakpoint location of the statement that is greater than OR equal to `address`.
    /// e.g., if the `address` is the current program counter, then the return value will be the next valid halt address
    /// in the current sequence.
    pub(crate) fn for_address(
        debug_info: &DebugInfo,
        address: u64,
    ) -> Result<VerifiedBreakpoint, DebugError> {
        let instruction_sequence = InstructionSequence::from_address(debug_info, address)?;

        // Cycle through various degrees of matching, to find the most relevant source location.
        if let Some(verified_breakpoint) = match_address(&instruction_sequence, address, debug_info)
        {
            tracing::debug!(
                "Found valid breakpoint for address: {:#010x} : {verified_breakpoint:?}",
                &address
            );
            return Ok(verified_breakpoint);
        }
        // If we get here, we have not found a valid breakpoint location.
        let message = format!("Could not identify a valid breakpoint for address: {address:#010x}. Please consider using instruction level stepping.");
        Err(DebugError::WarnAndContinue { message })
    }

    /// Identifying the breakpoint location for a specific location (path, line, colunmn) is a bit more complex,
    /// compared to the `for_address()` method, due to a few factors:
    /// - The correct program instructions, may be in any of the compilation units of the current program.
    /// - The debug information may not contain data for the "specific source" location requested:
    ///   - DWARFv5 standard, section 6.2, allows omissions based on certain conditions. In this case,
    ///     we need to find the closest "relevant" source location that has valid debug information.
    ///   - The requested location may not be a valid source location, e.g. when the
    ///     debug information has been optimized away. In this case we will return an appropriate error.
    ///
    /// #### The logic used to find the "most relevant" source location is as follows:
    /// 1. Filter  [`UnitInfo`], by using [`gimli::LineProgramHeader`] to match units that include
    ///    the requested path.
    /// 2. For each matching compilation unit, get the [`gimli::LineProgram`] and
    ///    [`Vec<LineSequence>`][LineSequence].
    /// 3. Filter the [`Vec<LineSequence>`][LineSequence] entries to only include sequences that match the requested path.
    /// 3. Convert remaining [`LineSequence`], to [`InstructionSequence`].
    /// 4. Return the first [`InstructionSequence`] that contains the requested source location.
    ///    4a. This may be an exact match on file/line/column, or,
    ///    4b. Failing an exact match, a match on file/line only.
    ///    4c. Failing that, a match on file only, where the line number is the "next" available instruction,
    ///        on the next available line of the specified file.
    pub(crate) fn for_source_location(
        debug_info: &DebugInfo,
        path: TypedPath,
        line: u64,
        column: Option<u64>,
    ) -> Result<Self, DebugError> {
        for program_unit in &debug_info.unit_infos {
            let Some(ref line_program) = program_unit.unit.line_program else {
                // Not all compilation units need to have debug line information, so we skip those.
                continue;
            };

            let mut num_files = line_program.header().file_names().len();

            // For DWARF version 5, the current compilation file is included in the file names, with index 0.
            //
            // For earlier versions, the current compilation file is not included in the file names, but index 0 still refers to it.
            // To get the correct number of files, we have to add 1 here.
            if program_unit.unit.header.version() <= 4 {
                num_files += 1;
            }

            // There can be multiple file indices which match, due to the inclusion of the current compilation file with index 0.
            //
            // At least for DWARF 4 there are cases where the current compilation file is also included in the file names with
            // a non-zero index.
            let matching_file_indices: Vec<_> = (0..num_files)
                .filter_map(|file_index| {
                    let file_index = file_index as u64;

                    debug_info
                        .get_path(&program_unit.unit, file_index)
                        .and_then(|combined_path: TypedPathBuf| {
                            if canonical_path_eq(path, combined_path.to_path()) {
                                tracing::debug!(
                                    "Found matching file index: {file_index} for path: {path}",
                                    file_index = file_index,
                                    path = path.display()
                                );
                                Some(file_index)
                            } else {
                                None
                            }
                        })
                })
                .collect();

            if matching_file_indices.is_empty() {
                continue;
            }

            let Ok((complete_line_program, line_sequences)) = line_program.clone().sequences()
            else {
                tracing::debug!("Failed to get line sequences for line program");
                continue;
            };

            for line_sequence in line_sequences {
                let instruction_sequence = InstructionSequence::from_line_sequence(
                    debug_info,
                    program_unit,
                    &complete_line_program,
                    &line_sequence,
                );

                for matching_file_index in &matching_file_indices {
                    // Cycle through various degrees of matching, to find the most relevant source location.
                    if let Some(verified_breakpoint) = match_file_line_column(
                        &instruction_sequence,
                        *matching_file_index,
                        line,
                        column,
                        debug_info,
                        program_unit,
                    ) {
                        return Ok(verified_breakpoint);
                    }

                    if let Some(verified_breakpoint) = match_file_line_first_available_column(
                        &instruction_sequence,
                        *matching_file_index,
                        line,
                        debug_info,
                        program_unit,
                    ) {
                        return Ok(verified_breakpoint);
                    }
                }
            }
        }
        // If we get here, we have not found a valid breakpoint location.
        Err(DebugError::Other(format!("No valid breakpoint information found for file: {}, line: {line:?}, column: {column:?}", path.display())))
    }
}

/// Find the valid halt instruction location that is equal to, or greater than, the address.
fn match_address(
    instruction_sequence: &InstructionSequence<'_>,
    address: u64,
    debug_info: &DebugInfo,
) -> Option<VerifiedBreakpoint> {
    if instruction_sequence.address_range.contains(&address) {
        let instruction_location =
            instruction_sequence
                .instructions
                .iter()
                .find(|instruction_location| {
                    instruction_location.instruction_type == InstructionType::HaltLocation
                        && instruction_location.address >= address
                })?;

        let source_location = SourceLocation::from_instruction_location(
            debug_info,
            instruction_sequence.program_unit,
            instruction_location,
        )?;

        Some(VerifiedBreakpoint {
            address: instruction_location.address,
            source_location,
        })
    } else {
        None
    }
}

/// Find the valid halt instruction location that matches the file, line and column.
fn match_file_line_column(
    instruction_sequence: &InstructionSequence<'_>,
    matching_file_index: u64,
    line: u64,
    column: Option<u64>,
    debug_info: &DebugInfo,
    program_unit: &UnitInfo,
) -> Option<VerifiedBreakpoint> {
    let instruction_location =
        instruction_sequence
            .instructions
            .iter()
            .find(|instruction_location| {
                instruction_location.instruction_type == InstructionType::HaltLocation
                    && matching_file_index == instruction_location.file_index
                    && NonZeroU64::new(line) == instruction_location.line
                    && column
                        .map(ColumnType::Column)
                        .is_some_and(|col| col == instruction_location.column)
            })?;

    let source_location =
        SourceLocation::from_instruction_location(debug_info, program_unit, instruction_location)?;

    Some(VerifiedBreakpoint {
        address: instruction_location.address,
        source_location,
    })
}

/// Find the first valid halt instruction location that matches the file and line, ignoring column.
fn match_file_line_first_available_column(
    instruction_sequence: &InstructionSequence<'_>,
    matching_file_index: u64,
    line: u64,
    debug_info: &DebugInfo,
    program_unit: &UnitInfo,
) -> Option<VerifiedBreakpoint> {
    let instruction_location =
        instruction_sequence
            .instructions
            .iter()
            .find(|instruction_location| {
                instruction_location.instruction_type == InstructionType::HaltLocation
                    && matching_file_index == instruction_location.file_index
                    && NonZeroU64::new(line) == instruction_location.line
            })?;

    let source_location =
        SourceLocation::from_instruction_location(debug_info, program_unit, instruction_location)?;

    Some(VerifiedBreakpoint {
        address: instruction_location.address,
        source_location,
    })
}

fn serialize_typed_path<S>(path: &TypedPathBuf, serializer: S) -> Result<S::Ok, S::Error>
where
    S: serde::Serializer,
{
    serializer.serialize_str(&path.to_string_lossy())
}

/// A specific location in source code.
/// Each unique line, column, file and directory combination is a unique source location.
#[derive(Clone, PartialEq, Eq, Serialize)]
pub struct SourceLocation {
    /// The path to the source file
    #[serde(serialize_with = "serialize_typed_path")]
    pub path: TypedPathBuf,
    /// The line number in the source file with zero based indexing.
    pub line: Option<u64>,
    /// The column number in the source file.
    pub column: Option<ColumnType>,
}

impl Debug for SourceLocation {
    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
        write!(
            f,
            "{}:{:?}:{:?}",
            self.path.to_path().display(),
            self.line,
            self.column
        )
    }
}

impl SourceLocation {
    /// Resolve debug information for a [`InstructionLocation`] and create a [`SourceLocation`].
    fn from_instruction_location(
        debug_info: &DebugInfo,
        program_unit: &unit_info::UnitInfo,
        instruction_location: &InstructionLocation,
    ) -> Option<SourceLocation> {
        debug_info
            .find_file_and_directory(&program_unit.unit, instruction_location.file_index)
            .map(|path| SourceLocation {
                line: instruction_location.line.map(std::num::NonZeroU64::get),
                column: Some(instruction_location.column),
                path,
            })
    }

    /// Get the file name of the source file
    pub fn file_name(&self) -> Option<String> {
        self.path
            .file_name()
            .map(|name| String::from_utf8_lossy(name).to_string())
    }
}

/// Keep track of all the instruction locations required to satisfy the operations of [`SteppingMode`][s].
/// This is a list of target instructions, belonging to a [`gimli::LineSequence`],
/// and filters it to only user code instructions (no prologue code, and no non-statement instructions),
/// so that we are left only with what DWARF terms as "recommended breakpoint location".
///
/// [s]: crate::debug::debug_step::SteppingMode
struct InstructionSequence<'debug_info> {
    /// The `address_range.start` is the starting address of the program counter for which this sequence is valid,
    /// and allows us to identify target instruction locations where the program counter lies inside the prologue.
    /// The `address_range.end` is the first address that is not covered by this sequence within the line number program,
    /// and allows us to identify when stepping over a instruction location would result in leaving a sequence.
    /// - This is typically the instruction address of the first instruction in the next sequence,
    ///   which may also be the first instruction in a new function.
    address_range: Range<u64>,
    // NOTE: Use Vec as a container, because we will have relatively few statements per sequence, and we need to maintain the order.
    instructions: Vec<InstructionLocation>,
    // The following private fields are required to resolve the source location information for
    // each instruction location.
    debug_info: &'debug_info DebugInfo,
    program_unit: &'debug_info UnitInfo,
}

impl Debug for InstructionSequence<'_> {
    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
        writeln!(
            f,
            "Instruction Sequence with address range: {:#010x} - {:#010x}",
            self.address_range.start, self.address_range.end
        )?;
        for instruction_location in &self.instructions {
            writeln!(
                f,
                "\t{instruction_location:?} - {}",
                self.debug_info
                    .get_path(&self.program_unit.unit, instruction_location.file_index)
                    .map(|file_path| file_path.to_string_lossy().to_string())
                    .unwrap_or("<unknown file>".to_string())
            )?;
        }
        Ok(())
    }
}

impl<'debug_info> InstructionSequence<'debug_info> {
    /// Extract all the instruction locations, belonging to the active sequence (i.e. the sequence that contains the `address`).
    fn from_address(
        debug_info: &'debug_info DebugInfo,
        program_counter: u64,
    ) -> Result<Self, DebugError> {
        let program_unit = debug_info.compile_unit_info(program_counter)?;
        let (offset, address_size) = if let Some(line_program) =
            program_unit.unit.line_program.clone()
        {
            (
                line_program.header().offset(),
                line_program.header().address_size(),
            )
        } else {
            let message = "The specified source location does not have any line_program information available. Please consider using instruction level stepping.".to_string();
            return Err(DebugError::WarnAndContinue { message });
        };

        // Get the sequences of rows from the CompleteLineProgram at the given program_counter.
        let incomplete_line_program =
            debug_info
                .debug_line_section
                .program(offset, address_size, None, None)?;
        let (complete_line_program, line_sequences) = incomplete_line_program.sequences()?;

        // Get the sequence of rows that belongs to the program_counter.
        let Some(line_sequence) = line_sequences.iter().find(|line_sequence| {
            line_sequence.start <= program_counter && program_counter < line_sequence.end
        }) else {
            let message = "The specified source location does not have any line information available. Please consider using instruction level stepping.".to_string();
            return Err(DebugError::WarnAndContinue { message });
        };
        let instruction_sequence = Self::from_line_sequence(
            debug_info,
            program_unit,
            &complete_line_program,
            line_sequence,
        );

        if instruction_sequence.len() == 0 {
            let message = "Could not find valid instruction locations for this address. Consider using instruction level stepping.".to_string();
            Err(DebugError::WarnAndContinue { message })
        } else {
            tracing::trace!(
                "Instruction location for pc={:#010x}\n{:?}",
                program_counter,
                instruction_sequence
            );
            Ok(instruction_sequence)
        }
    }

    /// Build [`InstructionSequence`] from a [`gimli::LineSequence`], with all the markers we need to determine valid halt locations.
    fn from_line_sequence(
        debug_info: &'debug_info DebugInfo,
        program_unit: &'debug_info UnitInfo,
        complete_line_program: &gimli::CompleteLineProgram<GimliReader>,
        line_sequence: &LineSequence<GimliReader>,
    ) -> Self {
        let program_language = program_unit.get_language();
        let mut sequence_rows = complete_line_program.resume_from(line_sequence);

        // We have enough information to create the InstructionSequence.
        let mut instruction_sequence = InstructionSequence {
            address_range: line_sequence.start..line_sequence.end,
            instructions: Vec::new(),
            debug_info,
            program_unit,
        };
        let mut prologue_completed = false;
        let mut previous_row: Option<gimli::LineRow> = None;
        while let Ok(Some((_, row))) = sequence_rows.next_row() {
            // Don't do anything until we are at least at the prologue_end() of a function.
            if row.prologue_end() {
                prologue_completed = true;
            }

            // For GNU C, it is known that the `DW_LNS_set_prologue_end` is not set, so we employ the same heuristic as GDB to determine when the prologue is complete.
            // For other C compilers in the C99/11/17 standard, they will either set the `DW_LNS_set_prologue_end` or they will trigger this heuristic also.
            // See https://gcc.gnu.org/legacy-ml/gcc-patches/2011-03/msg02106.html
            if !prologue_completed
                && matches!(
                    program_language,
                    gimli::DW_LANG_C99 | gimli::DW_LANG_C11 | gimli::DW_LANG_C17
                )
            {
                if let Some(prev_row) = previous_row {
                    if row.end_sequence()
                        || (row.is_stmt()
                            && (row.file_index() == prev_row.file_index()
                                && (row.line() != prev_row.line() || row.line().is_none())))
                    {
                        prologue_completed = true;
                    }
                }
            }

            if !prologue_completed {
                log_row_eval(line_sequence, row, "  inside prologue>");
            } else {
                log_row_eval(line_sequence, row, "  after prologue>");
            }

            // The end of the sequence is not a valid halt location,
            // nor is it a valid instruction in the current sequence.
            if row.end_sequence() {
                break;
            }

            instruction_sequence.add(prologue_completed, row, previous_row.as_ref());
            previous_row = Some(*row);
        }
        instruction_sequence
    }

    /// Add a instruction location to the list.
    fn add(
        &mut self,
        prologue_completed: bool,
        row: &gimli::LineRow,
        previous_row: Option<&gimli::LineRow>,
    ) {
        // Workaround the line number issue (if recorded as 0 in the DWARF, then gimli reports it as None).
        // For debug purposes, it makes more sense to be the same as the previous line, which almost always
        // has the same file index and column value.
        // This prevents the debugger from jumping to the top of the file unexpectedly.
        let mut instruction_line = row.line();
        if let Some(prev_row) = previous_row {
            if row.line().is_none()
                && prev_row.line().is_some()
                && row.file_index() == prev_row.file_index()
                && prev_row.column() == row.column()
            {
                instruction_line = prev_row.line();
            }
        }

        let instruction_location = InstructionLocation {
            address: row.address(),
            file_index: row.file_index(),
            line: instruction_line,
            column: row.column().into(),
            instruction_type: if !prologue_completed {
                InstructionType::Prologue
            } else if row.epilogue_begin() || row.is_stmt() {
                InstructionType::HaltLocation
            } else {
                InstructionType::Unspecified
            },
        };

        self.instructions.push(instruction_location);
    }

    /// Get the number of instruction locations in the list.
    fn len(&self) -> usize {
        self.instructions.len()
    }
}

#[derive(Debug, Clone, PartialEq)]
/// The type of instruction, as defined by [`gimli::LineRow`] attributes and relative position in the sequence.
enum InstructionType {
    /// We need to keep track of source lines that signal function signatures,
    /// even if their program lines are not valid halt locations.
    Prologue,
    /// DWARF defined "recommended breakpoint location",
    /// typically marked with `is_stmt` or `epilogue_begin`.
    HaltLocation,
    /// Any other instruction that is not part of the prologue or epilogue, and is not a statement,
    /// is considered to be an unspecified instruction type.
    Unspecified,
}

#[derive(Clone)]
/// - A [`InstructionLocation`] filters and maps [`gimli::LineRow`] entries to be used for determining valid halt points.
///   - Each [`InstructionLocation`] maps to a single machine instruction on target.
///   - For establishing valid halt locations (breakpoint or stepping), we are only interested,
///     in the [`InstructionLocation`]'s that represent DWARF defined `statements`,
///     which are not part of the prologue or epilogue.
/// - A line of code in a source file may contain multiple instruction locations, in which case
///     a new [`InstructionLocation`] with unique `column` is created.
/// - A [`InstructionSequence`] is a series of contiguous [`InstructionLocation`]'s.
struct InstructionLocation {
    address: u64,
    file_index: u64,
    line: Option<NonZeroU64>,
    column: ColumnType,
    instruction_type: InstructionType,
}

impl Debug for InstructionLocation {
    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
        write!(
            f,
            "Instruction @ {:010x}, on line={:04}  col={:05}  f={:02}, type={:?}",
            &self.address,
            match &self.line {
                Some(line) => line.get(),
                None => 0,
            },
            match &self.column {
                ColumnType::LeftEdge => 0,
                ColumnType::Column(column) => column.to_owned(),
            },
            &self.file_index,
            &self.instruction_type,
        )?;
        Ok(())
    }
}

/// Helper function to avoid code duplication when logging of information during row evaluation.
fn log_row_eval(
    active_sequence: &LineSequence<super::GimliReader>,
    row: &gimli::LineRow,
    status: &str,
) {
    tracing::trace!("Sequence: line={:04} col={:05} f={:02} stmt={:5} ep={:5} es={:5} eb={:5} : {:#010X}<={:#010X}<{:#010X} : {}",
        match row.line() {
            Some(line) => line.get(),
            None => 0,
        },
        match row.column() {
            gimli::ColumnType::LeftEdge => 0,
            gimli::ColumnType::Column(column) => column.get(),
        },
        row.file_index(),
        row.is_stmt(),
        row.prologue_end(),
        row.end_sequence(),
        row.epilogue_begin(),
        active_sequence.start,
        row.address(),
        active_sequence.end,
        status);
}