sway_ir/analysis/
memory_utils.rs

1//! An analysis to compute symbols that escape out from a function.
2//! This could be into another function, or via `ptr_to_int` etc.
3//! Any transformations involving such symbols are unsafe.
4
5use indexmap::IndexSet;
6use rustc_hash::FxHashSet;
7use sway_types::{FxIndexMap, FxIndexSet};
8
9use crate::{
10    AnalysisResult, AnalysisResultT, AnalysisResults, BlockArgument, Context, FuelVmInstruction,
11    Function, InstOp, Instruction, IrError, LocalVar, Pass, PassMutability, ScopedPass, Type,
12    Value, ValueDatum,
13};
14
15pub const ESCAPED_SYMBOLS_NAME: &str = "escaped-symbols";
16
17pub fn create_escaped_symbols_pass() -> Pass {
18    Pass {
19        name: ESCAPED_SYMBOLS_NAME,
20        descr: "Symbols that escape or cannot be analyzed",
21        deps: vec![],
22        runner: ScopedPass::FunctionPass(PassMutability::Analysis(compute_escaped_symbols_pass)),
23    }
24}
25
26#[derive(Debug, Eq, PartialEq, Copy, Clone, Hash)]
27pub enum Symbol {
28    Local(LocalVar),
29    Arg(BlockArgument),
30}
31
32impl Symbol {
33    pub fn get_type(&self, context: &Context) -> Type {
34        match self {
35            Symbol::Local(l) => l.get_type(context),
36            Symbol::Arg(ba) => ba.ty,
37        }
38    }
39
40    pub fn _get_name(&self, context: &Context, function: Function) -> String {
41        match self {
42            Symbol::Local(l) => function.lookup_local_name(context, l).unwrap().clone(),
43            Symbol::Arg(ba) => format!("{}[{}]", ba.block.get_label(context), ba.idx),
44        }
45    }
46}
47
48/// Get [Symbol]s, both [Symbol::Local]s and [Symbol::Arg]s, reachable
49/// from the `val` via chain of [InstOp::GetElemPtr] (GEP) instructions.
50/// A `val` can, via GEP instructions, refer indirectly to none, or one
51/// or more symbols.
52///
53/// If the `val` is not a pointer, an empty set is returned.
54///
55/// Note that this function does not return [Symbol]s potentially reachable
56/// via referencing (`&`), dereferencing (`*`), and raw pointers (`__addr_of`)
57/// and is thus suitable for all IR analysis and manipulation that deals
58/// strictly with GEP access.
59///
60/// To acquire all [Symbol]s reachable from the `val`, use [get_referred_symbols] instead.
61pub fn get_gep_referred_symbols(context: &Context, val: Value) -> FxIndexSet<Symbol> {
62    match get_symbols(context, val, true) {
63        ReferredSymbols::Complete(symbols) => symbols,
64        _ => unreachable!(
65            "In the case of GEP access, the set of returned symbols is always complete."
66        ),
67    }
68}
69
70/// Provides [Symbol]s, both [Symbol::Local]s and [Symbol::Arg]s, reachable
71/// from a certain [Value] via chain of [InstOp::GetElemPtr] (GEP) instructions
72/// or via [InstOp::IntToPtr] and [InstOp::PtrToInt] instruction patterns
73/// specific to references, both referencing (`&`) and dereferencing (`*`),
74/// and raw pointers, via `__addr_of`.
75pub enum ReferredSymbols {
76    /// Guarantees that all [Symbol]s reachable from the particular [Value]
77    /// are collected, thus, that there are no escapes or pointer accesses
78    /// in the scope that _might_ result in symbols indirectly related to
79    /// the [Value] but not reachable only via GEP, or references, or
80    /// raw pointers only.
81    Complete(FxIndexSet<Symbol>),
82    /// Denotes that there _might_ be [Symbol]s out of returned ones that
83    /// are related to the particular [Value], but not reachable only via GEP,
84    /// or references, or raw pointers.
85    Incomplete(FxIndexSet<Symbol>),
86}
87
88impl ReferredSymbols {
89    pub fn new(is_complete: bool, symbols: FxIndexSet<Symbol>) -> Self {
90        if is_complete {
91            Self::Complete(symbols)
92        } else {
93            Self::Incomplete(symbols)
94        }
95    }
96
97    /// Returns the referred [Symbol]s and the information if they are
98    /// complete (true) or incomplete (false).
99    pub fn consume(self) -> (bool, FxIndexSet<Symbol>) {
100        let is_complete = matches!(self, ReferredSymbols::Complete(_));
101        let syms = match self {
102            ReferredSymbols::Complete(syms) | ReferredSymbols::Incomplete(syms) => syms,
103        };
104
105        (is_complete, syms)
106    }
107}
108
109/// Get [Symbol]s, both [Symbol::Local]s and [Symbol::Arg]s, reachable
110/// from the `val` via chain of [InstOp::GetElemPtr] (GEP) instructions
111/// or via [InstOp::IntToPtr] and [InstOp::PtrToInt] instruction patterns
112/// specific to references, both referencing (`&`) and dereferencing (`*`),
113/// and raw pointers, via `__addr_of`.
114/// A `val` can, via these instructions, refer indirectly to none, or one
115/// or more symbols.
116///
117/// Note that *this function does not perform any escape analysis*. E.g., if a
118/// local symbol gets passed by `raw_ptr` or `&T` to a function and returned
119/// back from the function via the same `raw_ptr` or `&T` the value returned
120/// from the function will not be tracked back to the original symbol and the
121/// symbol will not be collected as referred.
122///
123/// This means that, even if the result contains [Symbol]s, it _might_ be that
124/// there are still other [Symbol]s in scope related to the `val`. E.g., in case
125/// of branching, where the first branch directly returns `& local_var_a`
126/// and the second branch, indirectly over a function call as explained above,
127/// `& local_var_b`, only the `local_var_a` will be returned as a result.
128///
129/// Therefore, the function returns the [ReferredSymbols] enum to denote
130/// if the returned set of symbols is guaranteed to be complete, or if it is
131/// incomplete.
132///
133/// If the `val` is not a pointer, an empty set is returned and marked as
134/// [ReferredSymbols::Complete].
135pub fn get_referred_symbols(context: &Context, val: Value) -> ReferredSymbols {
136    get_symbols(context, val, false)
137}
138
139/// Get [Symbol]s, both [Symbol::Local]s and [Symbol::Arg]s, reachable
140/// from the `val`.
141///
142/// If `gep_only` is `true` only the [Symbol]s reachable via GEP instructions
143/// are returned. Otherwise, the result also contains [Symbol]s reachable
144/// via referencing (`&`) and dereferencing (`*`).
145///
146/// If the `val` is not a pointer, an empty set is returned and marked as
147/// [ReferredSymbols::Complete].
148fn get_symbols(context: &Context, val: Value, gep_only: bool) -> ReferredSymbols {
149    // The input to this recursive function is always a pointer.
150    // The function tracks backwards where the pointer is coming from.
151    fn get_symbols_rec(
152        context: &Context,
153        symbols: &mut FxIndexSet<Symbol>,
154        visited: &mut FxHashSet<Value>,
155        ptr: Value,
156        gep_only: bool,
157        is_complete: &mut bool,
158    ) {
159        fn get_argument_symbols(
160            context: &Context,
161            symbols: &mut FxIndexSet<Symbol>,
162            visited: &mut FxHashSet<Value>,
163            arg: BlockArgument,
164            gep_only: bool,
165            is_complete: &mut bool,
166        ) {
167            if arg.block.get_label(context) == "entry" {
168                symbols.insert(Symbol::Arg(arg));
169            } else {
170                arg.block
171                    .pred_iter(context)
172                    .map(|pred| arg.get_val_coming_from(context, pred).unwrap())
173                    .for_each(|v| {
174                        get_symbols_rec(context, symbols, visited, v, gep_only, is_complete)
175                    })
176            }
177        }
178
179        fn get_symbols_from_u64_address_argument(
180            context: &Context,
181            symbols: &mut FxIndexSet<Symbol>,
182            visited: &mut FxHashSet<Value>,
183            u64_address_arg: BlockArgument,
184            is_complete: &mut bool,
185        ) {
186            if u64_address_arg.block.get_label(context) == "entry" {
187                // The u64 address is coming from a function argument.
188                // Same as in the case of a pointer coming from a function argument,
189                // we collect it.
190                symbols.insert(Symbol::Arg(u64_address_arg));
191            } else {
192                u64_address_arg
193                    .block
194                    .pred_iter(context)
195                    .map(|pred| u64_address_arg.get_val_coming_from(context, pred).unwrap())
196                    .for_each(|v| {
197                        get_symbols_from_u64_address_rec(context, symbols, visited, v, is_complete)
198                    })
199            }
200        }
201
202        // The input to this recursive function is always a `u64` holding an address.
203        // The below chain of instructions are specific to patterns where pointers
204        // are obtained from `u64` addresses and vice versa. This includes:
205        //  - referencing and dereferencing
206        //  - raw pointers (`__addr_of`)
207        //  - GTF intrinsic
208        fn get_symbols_from_u64_address_rec(
209            context: &Context,
210            symbols: &mut FxIndexSet<Symbol>,
211            visited: &mut FxHashSet<Value>,
212            u64_address: Value,
213            is_complete: &mut bool,
214        ) {
215            match context.values[u64_address.0].value {
216                // Follow the sources of the address, and for every source address,
217                // recursively come back to this function.
218                ValueDatum::Argument(arg) => get_symbols_from_u64_address_argument(
219                    context,
220                    symbols,
221                    visited,
222                    arg,
223                    is_complete,
224                ),
225                // 1. Patterns related to references and raw pointers.
226                ValueDatum::Instruction(Instruction {
227                    // The address is coming from a `raw_pointer` or `&T` variable.
228                    op: InstOp::Load(_loaded_from),
229                    ..
230                }) => {
231                    // TODO: https://github.com/FuelLabs/sway/issues/6065
232                    //       We want to track sources of loaded addresses.
233                    //       Currently we don't and simply mark the result as incomplete.
234                    *is_complete = false;
235                }
236                ValueDatum::Instruction(Instruction {
237                    op: InstOp::PtrToInt(ptr_value, _),
238                    ..
239                }) => get_symbols_rec(context, symbols, visited, ptr_value, false, is_complete),
240                // 2. The address is coming from a GTF instruction.
241                ValueDatum::Instruction(Instruction {
242                    // There cannot be a symbol behind it, and so the returned set is complete.
243                    op: InstOp::FuelVm(FuelVmInstruction::Gtf { .. }),
244                    ..
245                }) => (),
246                // In other cases, e.g., getting the integer address from an unsafe pointer
247                // arithmetic, or as a function result, etc. we bail out and mark the
248                // collection as not being guaranteed to be a complete set of all referred symbols.
249                _ => {
250                    *is_complete = false;
251                }
252            }
253        }
254
255        if visited.contains(&ptr) {
256            return;
257        }
258        visited.insert(ptr);
259        match context.values[ptr.0].value {
260            ValueDatum::Instruction(Instruction {
261                op: InstOp::GetLocal(local),
262                ..
263            }) => {
264                symbols.insert(Symbol::Local(local));
265            }
266            ValueDatum::Instruction(Instruction {
267                op: InstOp::GetElemPtr { base, .. },
268                ..
269            }) => get_symbols_rec(context, symbols, visited, base, gep_only, is_complete),
270            ValueDatum::Instruction(Instruction {
271                op: InstOp::IntToPtr(u64_address, _),
272                ..
273            }) if !gep_only => get_symbols_from_u64_address_rec(
274                context,
275                symbols,
276                visited,
277                u64_address,
278                is_complete,
279            ),
280            // We've reached a configurable at the top of the chain.
281            // There cannot be a symbol behind it, and so the returned set is complete.
282            ValueDatum::Instruction(Instruction {
283                op: InstOp::GetConfig(_, _),
284                ..
285            }) if !gep_only => (),
286            // Note that in this case, the pointer itself is coming from a `Load`,
287            // and not an address. So, we just continue following the pointer.
288            ValueDatum::Instruction(Instruction {
289                op: InstOp::Load(loaded_from),
290                ..
291            }) if !gep_only => get_symbols_rec(
292                context,
293                symbols,
294                visited,
295                loaded_from,
296                gep_only,
297                is_complete,
298            ),
299            ValueDatum::Instruction(Instruction {
300                op: InstOp::CastPtr(ptr_to_cast, _),
301                ..
302            }) if !gep_only => get_symbols_rec(
303                context,
304                symbols,
305                visited,
306                ptr_to_cast,
307                gep_only,
308                is_complete,
309            ),
310            ValueDatum::Argument(arg) => {
311                get_argument_symbols(context, symbols, visited, arg, gep_only, is_complete)
312            }
313            // We've reached a constant at the top of the chain.
314            // There cannot be a symbol behind it, and so the returned set is complete.
315            ValueDatum::Constant(_) if !gep_only => (),
316            _ if !gep_only => {
317                // In other cases, e.g., getting the pointer from an ASM block,
318                // or as a function result, etc., we cannot track the value up the chain
319                // and cannot guarantee that the value is not coming from some of the symbols.
320                // So, we bail out and mark the collection as not being guaranteed to be
321                // a complete set of all referred symbols.
322                *is_complete = false;
323            }
324            // In the case of GEP only access, the returned set is always complete.
325            _ => (),
326        }
327    }
328
329    if !val.get_type(context).is_some_and(|t| t.is_ptr(context)) {
330        return ReferredSymbols::new(true, IndexSet::default());
331    }
332
333    let mut visited = FxHashSet::default();
334    let mut symbols = IndexSet::default();
335    let mut is_complete = true;
336
337    get_symbols_rec(
338        context,
339        &mut symbols,
340        &mut visited,
341        val,
342        gep_only,
343        &mut is_complete,
344    );
345
346    ReferredSymbols::new(is_complete, symbols)
347}
348
349pub fn get_gep_symbol(context: &Context, val: Value) -> Option<Symbol> {
350    let syms = get_gep_referred_symbols(context, val);
351    (syms.len() == 1)
352        .then(|| syms.iter().next().cloned())
353        .flatten()
354}
355
356/// Return [Symbol] referred by `val` if there is _exactly one_ symbol referred,
357/// or `None` if there are no [Symbol]s referred or if there is more then one
358/// referred.
359pub fn get_referred_symbol(context: &Context, val: Value) -> Option<Symbol> {
360    let syms = get_referred_symbols(context, val);
361    match syms {
362        ReferredSymbols::Complete(syms) => (syms.len() == 1)
363            .then(|| syms.iter().next().cloned())
364            .flatten(),
365        // It might be that we have more than one referred symbol here.
366        ReferredSymbols::Incomplete(_) => None,
367    }
368}
369
370pub enum EscapedSymbols {
371    /// Guarantees that all escaping [Symbol]s are collected.
372    Complete(FxHashSet<Symbol>),
373    /// Denotes that there _might_ be additional escaping [Symbol]s
374    /// out of the collected ones.
375    Incomplete(FxHashSet<Symbol>),
376}
377
378impl AnalysisResultT for EscapedSymbols {}
379
380pub fn compute_escaped_symbols_pass(
381    context: &Context,
382    _: &AnalysisResults,
383    function: Function,
384) -> Result<AnalysisResult, IrError> {
385    Ok(Box::new(compute_escaped_symbols(context, &function)))
386}
387
388pub fn compute_escaped_symbols(context: &Context, function: &Function) -> EscapedSymbols {
389    let add_from_val = |result: &mut FxHashSet<Symbol>, val: &Value, is_complete: &mut bool| {
390        let (complete, syms) = get_referred_symbols(context, *val).consume();
391
392        *is_complete &= complete;
393
394        syms.iter().for_each(|s| {
395            result.insert(*s);
396        });
397    };
398
399    let mut result = FxHashSet::default();
400    let mut is_complete = true;
401
402    for (_block, inst) in function.instruction_iter(context) {
403        match &inst.get_instruction(context).unwrap().op {
404            InstOp::AsmBlock(_, args) => {
405                for arg_init in args.iter().filter_map(|arg| arg.initializer) {
406                    add_from_val(&mut result, &arg_init, &mut is_complete)
407                }
408            }
409            InstOp::UnaryOp { .. } => (),
410            InstOp::BinaryOp { .. } => (),
411            InstOp::BitCast(_, _) => (),
412            InstOp::Branch(_) => (),
413            InstOp::Call(_, args) => args
414                .iter()
415                .for_each(|v| add_from_val(&mut result, v, &mut is_complete)),
416            InstOp::CastPtr(_, _) => (),
417            InstOp::Cmp(_, _, _) => (),
418            InstOp::ConditionalBranch { .. } => (),
419            InstOp::ContractCall { params, .. } => {
420                add_from_val(&mut result, params, &mut is_complete)
421            }
422            InstOp::FuelVm(_) => (),
423            InstOp::GetLocal(_) => (),
424            InstOp::GetGlobal(_) => (),
425            InstOp::GetConfig(_, _) => (),
426            InstOp::GetElemPtr { .. } => (),
427            InstOp::IntToPtr(_, _) => (),
428            InstOp::Load(_) => (),
429            InstOp::MemCopyBytes { .. } => (),
430            InstOp::MemCopyVal { .. } => (),
431            InstOp::Nop => (),
432            InstOp::PtrToInt(v, _) => add_from_val(&mut result, v, &mut is_complete),
433            InstOp::Ret(_, _) => (),
434            InstOp::Store { .. } => (),
435        }
436    }
437
438    if is_complete {
439        EscapedSymbols::Complete(result)
440    } else {
441        EscapedSymbols::Incomplete(result)
442    }
443}
444
445/// Pointers that may possibly be loaded from the instruction `inst`.
446pub fn get_loaded_ptr_values(context: &Context, inst: Value) -> Vec<Value> {
447    match &inst.get_instruction(context).unwrap().op {
448        InstOp::UnaryOp { .. }
449        | InstOp::BinaryOp { .. }
450        | InstOp::BitCast(_, _)
451        | InstOp::Branch(_)
452        | InstOp::ConditionalBranch { .. }
453        | InstOp::Cmp(_, _, _)
454        | InstOp::Nop
455        | InstOp::CastPtr(_, _)
456        | InstOp::GetLocal(_)
457        | InstOp::GetGlobal(_)
458        | InstOp::GetConfig(_, _)
459        | InstOp::GetElemPtr { .. }
460        | InstOp::IntToPtr(_, _) => vec![],
461        InstOp::PtrToInt(src_val_ptr, _) => vec![*src_val_ptr],
462        InstOp::ContractCall {
463            params,
464            coins,
465            asset_id,
466            ..
467        } => vec![*params, *coins, *asset_id],
468        InstOp::Call(_, args) => args.clone(),
469        InstOp::AsmBlock(_, args) => args.iter().filter_map(|val| val.initializer).collect(),
470        InstOp::MemCopyBytes { src_val_ptr, .. }
471        | InstOp::MemCopyVal { src_val_ptr, .. }
472        | InstOp::Ret(src_val_ptr, _)
473        | InstOp::Load(src_val_ptr)
474        | InstOp::FuelVm(FuelVmInstruction::Log {
475            log_val: src_val_ptr,
476            ..
477        })
478        | InstOp::FuelVm(FuelVmInstruction::StateLoadWord(src_val_ptr))
479        | InstOp::FuelVm(FuelVmInstruction::StateStoreWord {
480            key: src_val_ptr, ..
481        })
482        | InstOp::FuelVm(FuelVmInstruction::StateLoadQuadWord {
483            key: src_val_ptr, ..
484        })
485        | InstOp::FuelVm(FuelVmInstruction::StateClear {
486            key: src_val_ptr, ..
487        }) => vec![*src_val_ptr],
488        InstOp::FuelVm(FuelVmInstruction::StateStoreQuadWord {
489            stored_val: memopd1,
490            key: memopd2,
491            ..
492        })
493        | InstOp::FuelVm(FuelVmInstruction::Smo {
494            recipient: memopd1,
495            message: memopd2,
496            ..
497        }) => vec![*memopd1, *memopd2],
498        InstOp::Store { dst_val_ptr: _, .. } => vec![],
499        InstOp::FuelVm(FuelVmInstruction::Gtf { .. })
500        | InstOp::FuelVm(FuelVmInstruction::ReadRegister(_))
501        | InstOp::FuelVm(FuelVmInstruction::Revert(_) | FuelVmInstruction::JmpMem) => vec![],
502        InstOp::FuelVm(FuelVmInstruction::WideUnaryOp { arg, .. }) => vec![*arg],
503        InstOp::FuelVm(FuelVmInstruction::WideBinaryOp { arg1, arg2, .. })
504        | InstOp::FuelVm(FuelVmInstruction::WideCmpOp { arg1, arg2, .. }) => {
505            vec![*arg1, *arg2]
506        }
507        InstOp::FuelVm(FuelVmInstruction::WideModularOp {
508            arg1, arg2, arg3, ..
509        }) => vec![*arg1, *arg2, *arg3],
510        InstOp::FuelVm(FuelVmInstruction::Retd { ptr, .. }) => vec![*ptr],
511    }
512}
513
514/// [Symbol]s that may possibly, directly or indirectly, be loaded from the instruction `inst`.
515pub fn get_loaded_symbols(context: &Context, inst: Value) -> ReferredSymbols {
516    let mut res = IndexSet::default();
517    let mut is_complete = true;
518    for val in get_loaded_ptr_values(context, inst) {
519        let (complete, syms) = get_referred_symbols(context, val).consume();
520
521        is_complete &= complete;
522
523        for sym in syms {
524            res.insert(sym);
525        }
526    }
527
528    ReferredSymbols::new(is_complete, res)
529}
530
531/// Pointers that may possibly be stored to the instruction `inst`.
532pub fn get_stored_ptr_values(context: &Context, inst: Value) -> Vec<Value> {
533    match &inst.get_instruction(context).unwrap().op {
534        InstOp::UnaryOp { .. }
535        | InstOp::BinaryOp { .. }
536        | InstOp::BitCast(_, _)
537        | InstOp::Branch(_)
538        | InstOp::ConditionalBranch { .. }
539        | InstOp::Cmp(_, _, _)
540        | InstOp::Nop
541        | InstOp::PtrToInt(_, _)
542        | InstOp::Ret(_, _)
543        | InstOp::CastPtr(_, _)
544        | InstOp::GetLocal(_)
545        | InstOp::GetGlobal(_)
546        | InstOp::GetConfig(_, _)
547        | InstOp::GetElemPtr { .. }
548        | InstOp::IntToPtr(_, _) => vec![],
549        InstOp::ContractCall { params, .. } => vec![*params],
550        InstOp::Call(_, args) => args.clone(),
551        InstOp::AsmBlock(_, args) => args.iter().filter_map(|val| val.initializer).collect(),
552        InstOp::MemCopyBytes { dst_val_ptr, .. }
553        | InstOp::MemCopyVal { dst_val_ptr, .. }
554        | InstOp::Store { dst_val_ptr, .. } => vec![*dst_val_ptr],
555        InstOp::Load(_) => vec![],
556        InstOp::FuelVm(vmop) => match vmop {
557            FuelVmInstruction::Gtf { .. }
558            | FuelVmInstruction::Log { .. }
559            | FuelVmInstruction::ReadRegister(_)
560            | FuelVmInstruction::Revert(_)
561            | FuelVmInstruction::JmpMem
562            | FuelVmInstruction::Smo { .. }
563            | FuelVmInstruction::StateClear { .. } => vec![],
564            FuelVmInstruction::StateLoadQuadWord { load_val, .. } => vec![*load_val],
565            FuelVmInstruction::StateLoadWord(_) | FuelVmInstruction::StateStoreWord { .. } => {
566                vec![]
567            }
568            FuelVmInstruction::StateStoreQuadWord { stored_val: _, .. } => vec![],
569            FuelVmInstruction::WideUnaryOp { result, .. }
570            | FuelVmInstruction::WideBinaryOp { result, .. }
571            | FuelVmInstruction::WideModularOp { result, .. } => vec![*result],
572            FuelVmInstruction::WideCmpOp { .. } => vec![],
573            _ => vec![],
574        },
575    }
576}
577
578/// [Symbol]s that may possibly, directly or indirectly, be stored to the instruction `inst`.
579pub fn get_stored_symbols(context: &Context, inst: Value) -> ReferredSymbols {
580    let mut res = IndexSet::default();
581    let mut is_complete = true;
582    for val in get_stored_ptr_values(context, inst) {
583        let (complete, syms) = get_referred_symbols(context, val).consume();
584
585        is_complete &= complete;
586
587        for sym in syms {
588            res.insert(sym);
589        }
590    }
591
592    ReferredSymbols::new(is_complete, res)
593}
594
595/// Combine a series of GEPs into one.
596pub fn combine_indices(context: &Context, val: Value) -> Option<Vec<Value>> {
597    match &context.values[val.0].value {
598        ValueDatum::Instruction(Instruction {
599            op: InstOp::GetLocal(_),
600            ..
601        }) => Some(vec![]),
602        ValueDatum::Instruction(Instruction {
603            op:
604                InstOp::GetElemPtr {
605                    base,
606                    elem_ptr_ty: _,
607                    indices,
608                },
609            ..
610        }) => {
611            let mut base_indices = combine_indices(context, *base)?;
612            base_indices.append(&mut indices.clone());
613            Some(base_indices)
614        }
615        ValueDatum::Argument(_) => Some(vec![]),
616        _ => None,
617    }
618}
619
620/// Given a memory pointer instruction, compute the offset of indexed element,
621/// for each symbol that it may alias to.
622/// If for any symbol we can't compute it, return None.
623pub fn get_memory_offsets(context: &Context, val: Value) -> Option<FxIndexMap<Symbol, u64>> {
624    let syms = get_gep_referred_symbols(context, val);
625
626    let mut res: FxIndexMap<Symbol, u64> = FxIndexMap::default();
627    for sym in syms {
628        let offset = sym
629            .get_type(context)
630            .get_pointee_type(context)?
631            .get_value_indexed_offset(context, &combine_indices(context, val)?)?;
632        res.insert(sym, offset);
633    }
634    Some(res)
635}
636
637/// Can memory ranges [val1, val1+len1] and [val2, val2+len2] overlap?
638/// Conservatively returns true if cannot statically determine.
639pub fn may_alias(context: &Context, val1: Value, len1: u64, val2: Value, len2: u64) -> bool {
640    let (Some(mem_offsets_1), Some(mem_offsets_2)) = (
641        get_memory_offsets(context, val1),
642        get_memory_offsets(context, val2),
643    ) else {
644        return true;
645    };
646
647    for (sym1, off1) in mem_offsets_1 {
648        if let Some(off2) = mem_offsets_2.get(&sym1) {
649            // does off1 + len1 overlap with off2 + len2?
650            if (off1 <= *off2 && (off1 + len1 > *off2)) || (*off2 <= off1 && (*off2 + len2 > off1))
651            {
652                return true;
653            }
654        }
655    }
656    false
657}
658
659/// Are memory ranges [val1, val1+len1] and [val2, val2+len2] exactly the same?
660/// Conservatively returns false if cannot statically determine.
661pub fn must_alias(context: &Context, val1: Value, len1: u64, val2: Value, len2: u64) -> bool {
662    let (Some(mem_offsets_1), Some(mem_offsets_2)) = (
663        get_memory_offsets(context, val1),
664        get_memory_offsets(context, val2),
665    ) else {
666        return false;
667    };
668
669    if mem_offsets_1.len() != 1 || mem_offsets_2.len() != 1 {
670        return false;
671    }
672
673    let (sym1, off1) = mem_offsets_1.iter().next().unwrap();
674    let (sym2, off2) = mem_offsets_2.iter().next().unwrap();
675
676    // does off1 + len1 overlap with off2 + len2?
677    sym1 == sym2 && off1 == off2 && len1 == len2
678}
679
680/// For a pointer argument `ptr_val`, what's the size of its pointee.
681pub fn pointee_size(context: &Context, ptr_val: Value) -> u64 {
682    ptr_val
683        .get_type(context)
684        .unwrap()
685        .get_pointee_type(context)
686        .expect("Expected arg to be a pointer")
687        .size(context)
688        .in_bytes()
689}