nu_protocol/ir/
mod.rs

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
use std::{fmt, sync::Arc};

use crate::{
    ast::{CellPath, Expression, Operator, Pattern, RangeInclusion},
    engine::EngineState,
    BlockId, DeclId, RegId, Span, Value, VarId,
};

use chrono::{DateTime, FixedOffset};
use serde::{Deserialize, Serialize};

mod call;
mod display;

pub use call::*;
pub use display::{FmtInstruction, FmtIrBlock};

#[derive(Clone, Serialize, Deserialize)]
pub struct IrBlock {
    pub instructions: Vec<Instruction>,
    pub spans: Vec<Span>,
    #[serde(with = "serde_arc_u8_array")]
    pub data: Arc<[u8]>,
    pub ast: Vec<Option<IrAstRef>>,
    /// Additional information that can be added to help with debugging
    pub comments: Vec<Box<str>>,
    pub register_count: u32,
    pub file_count: u32,
}

impl fmt::Debug for IrBlock {
    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
        // the ast field is too verbose and doesn't add much
        f.debug_struct("IrBlock")
            .field("instructions", &self.instructions)
            .field("spans", &self.spans)
            .field("data", &self.data)
            .field("comments", &self.comments)
            .field("register_count", &self.register_count)
            .field("file_count", &self.file_count)
            .finish_non_exhaustive()
    }
}

impl IrBlock {
    /// Returns a value that can be formatted with [`Display`](std::fmt::Display) to show a detailed
    /// listing of the instructions contained within this [`IrBlock`].
    pub fn display<'a>(&'a self, engine_state: &'a EngineState) -> FmtIrBlock<'a> {
        FmtIrBlock {
            engine_state,
            ir_block: self,
        }
    }
}

/// A slice into the `data` array of a block. This is a compact and cache-friendly way to store
/// string data that a block uses.
#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Serialize, Deserialize)]
pub struct DataSlice {
    pub start: u32,
    pub len: u32,
}

impl DataSlice {
    /// A data slice that contains no data. This slice is always valid.
    pub const fn empty() -> DataSlice {
        DataSlice { start: 0, len: 0 }
    }
}

impl std::ops::Index<DataSlice> for [u8] {
    type Output = [u8];

    fn index(&self, index: DataSlice) -> &Self::Output {
        &self[index.start as usize..(index.start as usize + index.len as usize)]
    }
}

/// A possible reference into the abstract syntax tree for an instruction. This is not present for
/// most instructions and is just added when needed.
#[derive(Debug, Clone)]
pub struct IrAstRef(pub Arc<Expression>);

impl Serialize for IrAstRef {
    fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
    where
        S: serde::Serializer,
    {
        self.0.as_ref().serialize(serializer)
    }
}

impl<'de> Deserialize<'de> for IrAstRef {
    fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
    where
        D: serde::Deserializer<'de>,
    {
        Expression::deserialize(deserializer).map(|expr| IrAstRef(Arc::new(expr)))
    }
}

#[derive(Debug, Clone, Serialize, Deserialize)]
pub enum Instruction {
    /// Unreachable code path (error)
    Unreachable,
    /// Load a literal value into the `dst` register
    LoadLiteral { dst: RegId, lit: Literal },
    /// Load a clone of a boxed value into the `dst` register (e.g. from const evaluation)
    LoadValue { dst: RegId, val: Box<Value> },
    /// Move a register. Value is taken from `src` (used by this instruction).
    Move { dst: RegId, src: RegId },
    /// Copy a register (must be a collected value). Value is still in `src` after this instruction.
    Clone { dst: RegId, src: RegId },
    /// Collect a stream in a register to a value
    Collect { src_dst: RegId },
    /// Change the span of the contents of a register to the span of this instruction.
    Span { src_dst: RegId },
    /// Drop the value/stream in a register, without draining
    Drop { src: RegId },
    /// Drain the value/stream in a register and discard (e.g. semicolon).
    ///
    /// If passed a stream from an external command, sets $env.LAST_EXIT_CODE to the resulting exit
    /// code, and invokes any available error handler with Empty, or if not available, returns an
    /// exit-code-only stream, leaving the block.
    Drain { src: RegId },
    /// Drain the value/stream in a register and discard only if this is the last pipeline element.
    // TODO: see if it's possible to remove this
    DrainIfEnd { src: RegId },
    /// Load the value of a variable into the `dst` register
    LoadVariable { dst: RegId, var_id: VarId },
    /// Store the value of a variable from the `src` register
    StoreVariable { var_id: VarId, src: RegId },
    /// Remove a variable from the stack, freeing up whatever resources were associated with it
    DropVariable { var_id: VarId },
    /// Load the value of an environment variable into the `dst` register
    LoadEnv { dst: RegId, key: DataSlice },
    /// Load the value of an environment variable into the `dst` register, or `Nothing` if it
    /// doesn't exist
    LoadEnvOpt { dst: RegId, key: DataSlice },
    /// Store the value of an environment variable from the `src` register
    StoreEnv { key: DataSlice, src: RegId },
    /// Add a positional arg to the next (internal) call.
    PushPositional { src: RegId },
    /// Add a list of args to the next (internal) call (spread/rest).
    AppendRest { src: RegId },
    /// Add a named arg with no value to the next (internal) call.
    PushFlag { name: DataSlice },
    /// Add a short named arg with no value to the next (internal) call.
    PushShortFlag { short: DataSlice },
    /// Add a named arg with a value to the next (internal) call.
    PushNamed { name: DataSlice, src: RegId },
    /// Add a short named arg with a value to the next (internal) call.
    PushShortNamed { short: DataSlice, src: RegId },
    /// Add parser info to the next (internal) call.
    PushParserInfo {
        name: DataSlice,
        info: Box<Expression>,
    },
    /// Set the redirection for stdout for the next call (only).
    ///
    /// The register for a file redirection is not consumed.
    RedirectOut { mode: RedirectMode },
    /// Set the redirection for stderr for the next call (only).
    ///
    /// The register for a file redirection is not consumed.
    RedirectErr { mode: RedirectMode },
    /// Throw an error if stderr wasn't redirected in the given stream. `src` is preserved.
    CheckErrRedirected { src: RegId },
    /// Open a file for redirection, pushing it onto the file stack.
    OpenFile {
        file_num: u32,
        path: RegId,
        append: bool,
    },
    /// Write data from the register to a file. This is done to finish a file redirection, in case
    /// an internal command or expression was evaluated rather than an external one.
    WriteFile { file_num: u32, src: RegId },
    /// Pop a file used for redirection from the file stack.
    CloseFile { file_num: u32 },
    /// Make a call. The input is taken from `src_dst`, and the output is placed in `src_dst`,
    /// overwriting it. The argument stack is used implicitly and cleared when the call ends.
    Call { decl_id: DeclId, src_dst: RegId },
    /// Append a value onto the end of a string. Uses `to_expanded_string(", ", ...)` on the value.
    /// Used for string interpolation literals. Not the same thing as the `++` operator.
    StringAppend { src_dst: RegId, val: RegId },
    /// Convert a string into a glob. Used for glob interpolation and setting glob variables. If the
    /// value is already a glob, it won't be modified (`no_expand` will have no effect).
    GlobFrom { src_dst: RegId, no_expand: bool },
    /// Push a value onto the end of a list. Used to construct list literals.
    ListPush { src_dst: RegId, item: RegId },
    /// Spread a value onto the end of a list. Used to construct list literals.
    ListSpread { src_dst: RegId, items: RegId },
    /// Insert a key-value pair into a record. Used to construct record literals. Raises an error if
    /// the key already existed in the record.
    RecordInsert {
        src_dst: RegId,
        key: RegId,
        val: RegId,
    },
    /// Spread a record onto a record. Used to construct record literals. Any existing value for the
    /// key is overwritten.
    RecordSpread { src_dst: RegId, items: RegId },
    /// Negate a boolean.
    Not { src_dst: RegId },
    /// Do a binary operation on `lhs_dst` (left) and `rhs` (right) and write the result to
    /// `lhs_dst`.
    BinaryOp {
        lhs_dst: RegId,
        op: Operator,
        rhs: RegId,
    },
    /// Follow a cell path on the value in `src_dst`, storing the result back to `src_dst`
    FollowCellPath { src_dst: RegId, path: RegId },
    /// Clone the value at a cell path in `src`, storing the result to `dst`. The original value
    /// remains in `src`. Must be a collected value.
    CloneCellPath { dst: RegId, src: RegId, path: RegId },
    /// Update/insert a cell path to `new_value` on the value in `src_dst`, storing the modified
    /// value back to `src_dst`
    UpsertCellPath {
        src_dst: RegId,
        path: RegId,
        new_value: RegId,
    },
    /// Jump to an offset in this block
    Jump { index: usize },
    /// Branch to an offset in this block if the value of the `cond` register is a true boolean,
    /// otherwise continue execution
    BranchIf { cond: RegId, index: usize },
    /// Branch to an offset in this block if the value of the `src` register is Empty or Nothing,
    /// otherwise continue execution. The original value in `src` is preserved.
    BranchIfEmpty { src: RegId, index: usize },
    /// Match a pattern on `src`. If the pattern matches, branch to `index` after having set any
    /// variables captured by the pattern. If the pattern doesn't match, continue execution. The
    /// original value is preserved in `src` through this instruction.
    Match {
        pattern: Box<Pattern>,
        src: RegId,
        index: usize,
    },
    /// Check that a match guard is a boolean, throwing
    /// [`MatchGuardNotBool`](crate::ShellError::MatchGuardNotBool) if it isn't. Preserves `src`.
    CheckMatchGuard { src: RegId },
    /// Iterate on register `stream`, putting the next value in `dst` if present, or jumping to
    /// `end_index` if the iterator is finished
    Iterate {
        dst: RegId,
        stream: RegId,
        end_index: usize,
    },
    /// Push an error handler, without capturing the error value
    OnError { index: usize },
    /// Push an error handler, capturing the error value into `dst`. If the error handler is not
    /// called, the register should be freed manually.
    OnErrorInto { index: usize, dst: RegId },
    /// Pop an error handler. This is not necessary when control flow is directed to the error
    /// handler due to an error.
    PopErrorHandler,
    /// Return early from the block, raising a `ShellError::Return` instead.
    ///
    /// Collecting the value is unavoidable.
    ReturnEarly { src: RegId },
    /// Return from the block with the value in the register
    Return { src: RegId },
}

impl Instruction {
    /// Returns a value that can be formatted with [`Display`](std::fmt::Display) to show a detailed
    /// listing of the instruction.
    pub fn display<'a>(
        &'a self,
        engine_state: &'a EngineState,
        data: &'a [u8],
    ) -> FmtInstruction<'a> {
        FmtInstruction {
            engine_state,
            instruction: self,
            data,
        }
    }

    /// Get the output register, for instructions that produce some kind of immediate result.
    pub fn output_register(&self) -> Option<RegId> {
        match *self {
            Instruction::Unreachable => None,
            Instruction::LoadLiteral { dst, .. } => Some(dst),
            Instruction::LoadValue { dst, .. } => Some(dst),
            Instruction::Move { dst, .. } => Some(dst),
            Instruction::Clone { dst, .. } => Some(dst),
            Instruction::Collect { src_dst } => Some(src_dst),
            Instruction::Span { src_dst } => Some(src_dst),
            Instruction::Drop { .. } => None,
            Instruction::Drain { .. } => None,
            Instruction::DrainIfEnd { .. } => None,
            Instruction::LoadVariable { dst, .. } => Some(dst),
            Instruction::StoreVariable { .. } => None,
            Instruction::DropVariable { .. } => None,
            Instruction::LoadEnv { dst, .. } => Some(dst),
            Instruction::LoadEnvOpt { dst, .. } => Some(dst),
            Instruction::StoreEnv { .. } => None,
            Instruction::PushPositional { .. } => None,
            Instruction::AppendRest { .. } => None,
            Instruction::PushFlag { .. } => None,
            Instruction::PushShortFlag { .. } => None,
            Instruction::PushNamed { .. } => None,
            Instruction::PushShortNamed { .. } => None,
            Instruction::PushParserInfo { .. } => None,
            Instruction::RedirectOut { .. } => None,
            Instruction::RedirectErr { .. } => None,
            Instruction::CheckErrRedirected { .. } => None,
            Instruction::OpenFile { .. } => None,
            Instruction::WriteFile { .. } => None,
            Instruction::CloseFile { .. } => None,
            Instruction::Call { src_dst, .. } => Some(src_dst),
            Instruction::StringAppend { src_dst, .. } => Some(src_dst),
            Instruction::GlobFrom { src_dst, .. } => Some(src_dst),
            Instruction::ListPush { src_dst, .. } => Some(src_dst),
            Instruction::ListSpread { src_dst, .. } => Some(src_dst),
            Instruction::RecordInsert { src_dst, .. } => Some(src_dst),
            Instruction::RecordSpread { src_dst, .. } => Some(src_dst),
            Instruction::Not { src_dst } => Some(src_dst),
            Instruction::BinaryOp { lhs_dst, .. } => Some(lhs_dst),
            Instruction::FollowCellPath { src_dst, .. } => Some(src_dst),
            Instruction::CloneCellPath { dst, .. } => Some(dst),
            Instruction::UpsertCellPath { src_dst, .. } => Some(src_dst),
            Instruction::Jump { .. } => None,
            Instruction::BranchIf { .. } => None,
            Instruction::BranchIfEmpty { .. } => None,
            Instruction::Match { .. } => None,
            Instruction::CheckMatchGuard { .. } => None,
            Instruction::Iterate { dst, .. } => Some(dst),
            Instruction::OnError { .. } => None,
            Instruction::OnErrorInto { .. } => None,
            Instruction::PopErrorHandler => None,
            Instruction::ReturnEarly { .. } => None,
            Instruction::Return { .. } => None,
        }
    }

    /// Returns the branch target index of the instruction if this is a branching instruction.
    pub fn branch_target(&self) -> Option<usize> {
        match self {
            Instruction::Jump { index } => Some(*index),
            Instruction::BranchIf { cond: _, index } => Some(*index),
            Instruction::BranchIfEmpty { src: _, index } => Some(*index),
            Instruction::Match {
                pattern: _,
                src: _,
                index,
            } => Some(*index),

            Instruction::Iterate {
                dst: _,
                stream: _,
                end_index,
            } => Some(*end_index),
            Instruction::OnError { index } => Some(*index),
            Instruction::OnErrorInto { index, dst: _ } => Some(*index),
            _ => None,
        }
    }

    /// Sets the branch target of the instruction if this is a branching instruction.
    ///
    /// Returns `Err(target_index)` if it isn't a branching instruction.
    pub fn set_branch_target(&mut self, target_index: usize) -> Result<(), usize> {
        match self {
            Instruction::Jump { index } => *index = target_index,
            Instruction::BranchIf { cond: _, index } => *index = target_index,
            Instruction::BranchIfEmpty { src: _, index } => *index = target_index,
            Instruction::Match {
                pattern: _,
                src: _,
                index,
            } => *index = target_index,

            Instruction::Iterate {
                dst: _,
                stream: _,
                end_index,
            } => *end_index = target_index,
            Instruction::OnError { index } => *index = target_index,
            Instruction::OnErrorInto { index, dst: _ } => *index = target_index,
            _ => return Err(target_index),
        }
        Ok(())
    }
}

// This is to document/enforce the size of `Instruction` in bytes.
// We should try to avoid increasing the size of `Instruction`,
// and PRs that do so will have to change the number below so that it's noted in review.
const _: () = assert!(std::mem::size_of::<Instruction>() <= 24);

/// A literal value that can be embedded in an instruction.
#[derive(Debug, Clone, Serialize, Deserialize)]
pub enum Literal {
    Bool(bool),
    Int(i64),
    Float(f64),
    Filesize(i64),
    Duration(i64),
    Binary(DataSlice),
    Block(BlockId),
    Closure(BlockId),
    RowCondition(BlockId),
    Range {
        start: RegId,
        step: RegId,
        end: RegId,
        inclusion: RangeInclusion,
    },
    List {
        capacity: usize,
    },
    Record {
        capacity: usize,
    },
    Filepath {
        val: DataSlice,
        no_expand: bool,
    },
    Directory {
        val: DataSlice,
        no_expand: bool,
    },
    GlobPattern {
        val: DataSlice,
        no_expand: bool,
    },
    String(DataSlice),
    RawString(DataSlice),
    CellPath(Box<CellPath>),
    Date(Box<DateTime<FixedOffset>>),
    Nothing,
}

/// A redirection mode for the next call. See [`OutDest`](crate::OutDest).
///
/// This is generated by:
///
/// 1. Explicit redirection in a [`PipelineElement`](crate::ast::PipelineElement), or
/// 2. The [`pipe_redirection()`](crate::engine::Command::pipe_redirection) of the command being
///    piped into.
///
/// Not setting it uses the default, determined by [`Stack`](crate::engine::Stack).
#[derive(Debug, Clone, Copy, Serialize, Deserialize)]
pub enum RedirectMode {
    Pipe,
    PipeSeparate,
    Value,
    Null,
    Inherit,
    Print,
    /// Use the given numbered file.
    File {
        file_num: u32,
    },
    /// Use the redirection mode requested by the caller, for a pre-return call.
    Caller,
}

/// Just a hack to allow `Arc<[u8]>` to be serialized and deserialized
mod serde_arc_u8_array {
    use serde::{Deserialize, Serialize};
    use std::sync::Arc;

    pub fn serialize<S>(data: &Arc<[u8]>, ser: S) -> Result<S::Ok, S::Error>
    where
        S: serde::Serializer,
    {
        data.as_ref().serialize(ser)
    }

    pub fn deserialize<'de, D>(de: D) -> Result<Arc<[u8]>, D::Error>
    where
        D: serde::Deserializer<'de>,
    {
        let data: Vec<u8> = Deserialize::deserialize(de)?;
        Ok(data.into())
    }
}