pgrx_bindgen/
build.rs

1//LICENSE Portions Copyright 2019-2021 ZomboDB, LLC.
2//LICENSE
3//LICENSE Portions Copyright 2021-2023 Technology Concepts & Design, Inc.
4//LICENSE
5//LICENSE Portions Copyright 2023-2023 PgCentral Foundation, Inc. <contact@pgcentral.org>
6//LICENSE
7//LICENSE All rights reserved.
8//LICENSE
9//LICENSE Use of this source code is governed by the MIT license that can be found in the LICENSE file.
10use bindgen::callbacks::{DeriveTrait, EnumVariantValue, ImplementsTrait, MacroParsingBehavior};
11use bindgen::NonCopyUnionStyle;
12use eyre::{eyre, WrapErr};
13use pgrx_pg_config::{
14    is_supported_major_version, PgConfig, PgConfigSelector, PgMinorVersion, PgVersion, Pgrx,
15    SUPPORTED_VERSIONS,
16};
17use quote::{quote, ToTokens};
18use std::cell::RefCell;
19use std::cmp::Ordering;
20use std::collections::{BTreeMap, BTreeSet, HashMap, HashSet};
21use std::fs;
22use std::path::{self, Path, PathBuf}; // disambiguate path::Path and syn::Type::Path
23use std::process::{Command, Output};
24use std::rc::Rc;
25use std::sync::OnceLock;
26use syn::{ForeignItem, Item, ItemConst};
27
28const BLOCKLISTED_TYPES: [&str; 3] = ["Datum", "NullableDatum", "Oid"];
29
30// These postgres versions were effectively "yanked" by the community, even tho they still exist
31// in the wild.  pgrx will refuse to compile against them
32const YANKED_POSTGRES_VERSIONS: &[PgVersion] = &[
33    // this set of releases introduced an ABI break in the [`pg_sys::ResultRelInfo`] struct
34    // and was replaced by the community on 2024-11-21
35    // https://www.postgresql.org/about/news/postgresql-172-166-1510-1415-1318-and-1222-released-2965/
36    PgVersion::new(17, PgMinorVersion::Release(1), None),
37    PgVersion::new(16, PgMinorVersion::Release(5), None),
38    PgVersion::new(15, PgMinorVersion::Release(9), None),
39    PgVersion::new(14, PgMinorVersion::Release(14), None),
40    PgVersion::new(13, PgMinorVersion::Release(17), None),
41    PgVersion::new(12, PgMinorVersion::Release(21), None),
42];
43
44pub(super) mod clang;
45pub(super) mod sym_blocklist;
46
47#[derive(Debug)]
48struct BindingOverride {
49    ignore_macros: HashSet<&'static str>,
50    enum_names: InnerMut<EnumMap>,
51}
52
53type InnerMut<T> = Rc<RefCell<T>>;
54type EnumMap = BTreeMap<String, Vec<(String, EnumVariantValue)>>;
55
56impl BindingOverride {
57    fn new_from(enum_names: InnerMut<EnumMap>) -> Self {
58        // these cause duplicate definition problems on linux
59        // see: https://github.com/rust-lang/rust-bindgen/issues/687
60        BindingOverride {
61            ignore_macros: HashSet::from_iter([
62                "FP_INFINITE",
63                "FP_NAN",
64                "FP_NORMAL",
65                "FP_SUBNORMAL",
66                "FP_ZERO",
67                "IPPORT_RESERVED",
68                // These are just annoying due to clippy
69                "M_E",
70                "M_LOG2E",
71                "M_LOG10E",
72                "M_LN2",
73                "M_LN10",
74                "M_PI",
75                "M_PI_2",
76                "M_PI_4",
77                "M_1_PI",
78                "M_2_PI",
79                "M_SQRT2",
80                "M_SQRT1_2",
81                "M_2_SQRTPI",
82            ]),
83            enum_names,
84        }
85    }
86}
87
88impl bindgen::callbacks::ParseCallbacks for BindingOverride {
89    fn will_parse_macro(&self, name: &str) -> MacroParsingBehavior {
90        if self.ignore_macros.contains(name) {
91            bindgen::callbacks::MacroParsingBehavior::Ignore
92        } else {
93            bindgen::callbacks::MacroParsingBehavior::Default
94        }
95    }
96
97    fn blocklisted_type_implements_trait(
98        &self,
99        name: &str,
100        derive_trait: DeriveTrait,
101    ) -> Option<ImplementsTrait> {
102        if !BLOCKLISTED_TYPES.contains(&name) {
103            return None;
104        }
105
106        let implements_trait = match derive_trait {
107            DeriveTrait::Copy => ImplementsTrait::Yes,
108            DeriveTrait::Debug => ImplementsTrait::Yes,
109            _ => ImplementsTrait::No,
110        };
111        Some(implements_trait)
112    }
113
114    // FIXME: alter types on some int macros to the actually-used types so we can stop as-casting them
115    fn int_macro(&self, _name: &str, _value: i64) -> Option<bindgen::callbacks::IntKind> {
116        None
117    }
118
119    // FIXME: implement a... C compiler?
120    fn func_macro(&self, _name: &str, _value: &[&[u8]]) {}
121
122    /// Intentionally doesn't do anything, just updates internal state.
123    fn enum_variant_behavior(
124        &self,
125        enum_name: Option<&str>,
126        variant_name: &str,
127        variant_value: bindgen::callbacks::EnumVariantValue,
128    ) -> Option<bindgen::callbacks::EnumVariantCustomBehavior> {
129        enum_name.inspect(|name| match name.strip_prefix("enum").unwrap_or(name).trim() {
130            // specifically overridden enum
131            "NodeTag" => return,
132            name if name.contains("unnamed at") || name.contains("anonymous at") => return,
133            // to prevent problems with BuiltinOid
134            _ if variant_name.contains("OID") => return,
135            name => self
136                .enum_names
137                .borrow_mut()
138                .entry(name.to_string())
139                .or_insert(Vec::new())
140                .push((variant_name.to_string(), variant_value)),
141        });
142        None
143    }
144
145    // FIXME: hide nodetag fields and default them to appropriate values
146    fn field_visibility(
147        &self,
148        _info: bindgen::callbacks::FieldInfo<'_>,
149    ) -> Option<bindgen::FieldVisibilityKind> {
150        None
151    }
152}
153
154pub fn main() -> eyre::Result<()> {
155    if env_tracked("DOCS_RS").as_deref() == Some("1") {
156        return Ok(());
157    }
158
159    // dump the environment for debugging if asked
160    if env_tracked("PGRX_BUILD_VERBOSE").as_deref() == Some("true") {
161        for (k, v) in std::env::vars() {
162            eprintln!("{k}={v}");
163        }
164    }
165
166    let compile_cshim = env_tracked("CARGO_FEATURE_CSHIM").as_deref() == Some("1");
167
168    let is_for_release =
169        env_tracked("PGRX_PG_SYS_GENERATE_BINDINGS_FOR_RELEASE").as_deref() == Some("1");
170
171    let build_paths = BuildPaths::from_env();
172
173    eprintln!("build_paths={build_paths:?}");
174
175    emit_rerun_if_changed();
176
177    let pg_configs: Vec<(u16, PgConfig)> = if is_for_release {
178        // This does not cross-check config.toml and Cargo.toml versions, as it is release infra.
179        Pgrx::from_config()?.iter(PgConfigSelector::All)
180            .map(|r| r.expect("invalid pg_config"))
181            .map(|c| (c.major_version().expect("invalid major version"), c))
182            .filter_map(|t| {
183                if is_supported_major_version(t.0) {
184                    Some(t)
185                } else {
186                    println!(
187                        "cargo:warning={} contains a configuration for pg{}, which pgrx does not support.",
188                        Pgrx::config_toml()
189                            .expect("Could not get PGRX configuration TOML")
190                            .to_string_lossy(),
191                        t.0
192                    );
193                    None
194                }
195            })
196            .collect()
197    } else {
198        let mut found = Vec::new();
199        for pgver in SUPPORTED_VERSIONS() {
200            if env_tracked(&format!("CARGO_FEATURE_PG{}", pgver.major)).is_some() {
201                found.push(pgver);
202            }
203        }
204        let found_ver = match &found[..] {
205            [ver] => ver,
206            [] => {
207                return Err(eyre!(
208                    "Did not find `pg$VERSION` feature. `pgrx-pg-sys` requires one of {} to be set",
209                    SUPPORTED_VERSIONS()
210                        .iter()
211                        .map(|pgver| format!("`pg{}`", pgver.major))
212                        .collect::<Vec<_>>()
213                        .join(", ")
214                ))
215            }
216            versions => {
217                return Err(eyre!(
218                    "Multiple `pg$VERSION` features found.\n`--no-default-features` may be required.\nFound: {}",
219                    versions
220                        .iter()
221                        .map(|version| format!("pg{}", version.major))
222                        .collect::<Vec<String>>()
223                        .join(", ")
224                ))
225            }
226        };
227
228        let found_major = found_ver.major;
229        if let Ok(pg_config) = PgConfig::from_env() {
230            let major_version = pg_config.major_version()?;
231
232            if major_version != found_major {
233                panic!("Feature flag `pg{found_major}` does not match version from the environment-described PgConfig (`{major_version}`)")
234            }
235            vec![(major_version, pg_config)]
236        } else {
237            let specific = Pgrx::from_config()?.get(&format!("pg{}", found_ver.major))?;
238            vec![(found_ver.major, specific)]
239        }
240    };
241
242    // make sure we're not trying to build any of the yanked postgres versions
243    for (_, pg_config) in &pg_configs {
244        let version = pg_config.get_version()?;
245        if YANKED_POSTGRES_VERSIONS.contains(&version) {
246            panic!("Postgres v{}{} is incompatible with \
247                    other versions in this major series and is not supported by pgrx.  Please upgrade \
248                    to the latest version in the v{} series.", version.major, version.minor, version.major);
249        }
250    }
251
252    std::thread::scope(|scope| {
253        // This is pretty much either always 1 (normally) or 5 (for releases),
254        // but in the future if we ever have way more, we should consider
255        // chunking `pg_configs` based on `thread::available_parallelism()`.
256        let threads = pg_configs
257            .iter()
258            .map(|(pg_major_ver, pg_config)| {
259                scope.spawn(|| {
260                    generate_bindings(
261                        *pg_major_ver,
262                        pg_config,
263                        &build_paths,
264                        is_for_release,
265                        compile_cshim,
266                    )
267                })
268            })
269            .collect::<Vec<_>>();
270        // Most of the rest of this is just for better error handling --
271        // `thread::scope` already joins the threads for us before it returns.
272        let results = threads
273            .into_iter()
274            .map(|thread| thread.join().expect("thread panicked while generating bindings"))
275            .collect::<Vec<eyre::Result<_>>>();
276        results.into_iter().try_for_each(|r| r)
277    })?;
278
279    if compile_cshim {
280        // compile the cshim for each binding
281        for (_version, pg_config) in pg_configs {
282            build_shim(&build_paths.shim_src, &build_paths.shim_dst, &pg_config)?;
283        }
284    }
285
286    Ok(())
287}
288
289fn emit_rerun_if_changed() {
290    // `pgrx-pg-config` doesn't emit one for this.
291    println!("cargo:rerun-if-env-changed=PGRX_PG_CONFIG_PATH");
292    println!("cargo:rerun-if-env-changed=PGRX_PG_CONFIG_AS_ENV");
293    // Bindgen's behavior depends on these vars, but it doesn't emit them
294    // directly because the output would cause issue with `bindgen-cli`. Do it
295    // on bindgen's behalf.
296    println!("cargo:rerun-if-env-changed=LLVM_CONFIG_PATH");
297    println!("cargo:rerun-if-env-changed=LIBCLANG_PATH");
298    println!("cargo:rerun-if-env-changed=LIBCLANG_STATIC_PATH");
299    // Follows the logic bindgen uses here, more or less.
300    // https://github.com/rust-lang/rust-bindgen/blob/e6dd2c636/bindgen/lib.rs#L2918
301    println!("cargo:rerun-if-env-changed=BINDGEN_EXTRA_CLANG_ARGS");
302    if let Some(target) = env_tracked("TARGET") {
303        println!("cargo:rerun-if-env-changed=BINDGEN_EXTRA_CLANG_ARGS_{target}");
304        println!(
305            "cargo:rerun-if-env-changed=BINDGEN_EXTRA_CLANG_ARGS_{}",
306            target.replace('-', "_"),
307        );
308    }
309
310    // don't want to get stuck always generating bindings
311    println!("cargo:rerun-if-env-changed=PGRX_PG_SYS_GENERATE_BINDINGS_FOR_RELEASE");
312
313    println!("cargo:rerun-if-changed=include");
314    println!("cargo:rerun-if-changed=pgrx-cshim.c");
315
316    if let Ok(pgrx_config) = Pgrx::config_toml() {
317        println!("cargo:rerun-if-changed={}", pgrx_config.display());
318    }
319}
320
321fn generate_bindings(
322    major_version: u16,
323    pg_config: &PgConfig,
324    build_paths: &BuildPaths,
325    is_for_release: bool,
326    enable_cshim: bool,
327) -> eyre::Result<()> {
328    let mut include_h = build_paths.manifest_dir.clone();
329    include_h.push("include");
330    include_h.push(format!("pg{major_version}.h"));
331
332    let bindgen_output = get_bindings(major_version, pg_config, &include_h, enable_cshim)
333        .wrap_err_with(|| format!("bindgen failed for pg{major_version}"))?;
334
335    let oids = extract_oids(&bindgen_output);
336    let rewritten_items = rewrite_items(&bindgen_output, &oids)
337        .wrap_err_with(|| format!("failed to rewrite items for pg{major_version}"))?;
338    let oids = format_builtin_oid_impl(oids);
339
340    let dest_dirs = if is_for_release {
341        vec![build_paths.out_dir.clone(), build_paths.src_dir.clone()]
342    } else {
343        vec![build_paths.out_dir.clone()]
344    };
345    for dest_dir in dest_dirs {
346        let mut bindings_file = dest_dir.clone();
347        bindings_file.push(&format!("pg{major_version}.rs"));
348        write_rs_file(
349            rewritten_items.clone(),
350            &bindings_file,
351            quote! {
352                use crate as pg_sys;
353                use crate::{Datum, Oid, PgNode};
354            },
355            is_for_release,
356        )
357        .wrap_err_with(|| {
358            format!(
359                "Unable to write bindings file for pg{} to `{}`",
360                major_version,
361                bindings_file.display()
362            )
363        })?;
364
365        let mut oids_file = dest_dir.clone();
366        oids_file.push(&format!("pg{major_version}_oids.rs"));
367        write_rs_file(oids.clone(), &oids_file, quote! {}, is_for_release).wrap_err_with(|| {
368            format!(
369                "Unable to write oids file for pg{} to `{}`",
370                major_version,
371                oids_file.display()
372            )
373        })?;
374    }
375    Ok(())
376}
377
378#[derive(Debug, Clone)]
379struct BuildPaths {
380    /// CARGO_MANIFEST_DIR
381    manifest_dir: PathBuf,
382    /// OUT_DIR
383    out_dir: PathBuf,
384    /// {manifest_dir}/src
385    src_dir: PathBuf,
386    /// {manifest_dir}/pgrx-cshim.c
387    shim_src: PathBuf,
388    /// {out_dir}/pgrx-cshim.c
389    shim_dst: PathBuf,
390}
391
392impl BuildPaths {
393    fn from_env() -> Self {
394        // Cargo guarantees these are provided, so unwrap is fine.
395        let manifest_dir = env_tracked("CARGO_MANIFEST_DIR").map(PathBuf::from).unwrap();
396        let out_dir = env_tracked("OUT_DIR").map(PathBuf::from).unwrap();
397        Self {
398            src_dir: manifest_dir.join("src/include"),
399            shim_src: manifest_dir.join("pgrx-cshim.c"),
400            shim_dst: out_dir.join("pgrx-cshim.c"),
401            out_dir,
402            manifest_dir,
403        }
404    }
405}
406
407fn write_rs_file(
408    code: proc_macro2::TokenStream,
409    file_path: &Path,
410    header: proc_macro2::TokenStream,
411    is_for_release: bool,
412) -> eyre::Result<()> {
413    use std::io::Write;
414    let mut contents = header;
415    contents.extend(code);
416    let mut file = fs::File::create(file_path)?;
417    write!(file, "/* Automatically generated by bindgen. Do not hand-edit.")?;
418    if is_for_release {
419        write!(
420            file,
421            "\n
422        This code is generated for documentation purposes, so that it is
423        easy to reference on docs.rs. Bindings are regenerated for your
424        build of pgrx, and the values of your Postgres version may differ.
425        */"
426        )
427    } else {
428        write!(file, " */")
429    }?;
430    write!(file, "{contents}")?;
431    rust_fmt(file_path)
432}
433
434/// Given a token stream representing a file, apply a series of transformations to munge
435/// the bindgen generated code with some postgres specific enhancements
436fn rewrite_items(
437    file: &syn::File,
438    oids: &BTreeMap<syn::Ident, Box<syn::Expr>>,
439) -> eyre::Result<proc_macro2::TokenStream> {
440    let items_vec = rewrite_oid_consts(&file.items, oids);
441    let mut items = apply_pg_guard(&items_vec)?;
442    let pgnode_impls = impl_pg_node(&items_vec)?;
443
444    // append the pgnodes to the set of items
445    items.extend(pgnode_impls);
446
447    Ok(items)
448}
449
450/// Find all the constants that represent Postgres type OID values.
451///
452/// These are constants of type `u32` whose name ends in the string "OID"
453fn extract_oids(code: &syn::File) -> BTreeMap<syn::Ident, Box<syn::Expr>> {
454    let mut oids = BTreeMap::new(); // we would like to have a nice sorted set
455    for item in &code.items {
456        let Item::Const(ItemConst { ident, ty, expr, .. }) = item else { continue };
457        // Retype as strings for easy comparison
458        let name = ident.to_string();
459        let ty_str = ty.to_token_stream().to_string();
460
461        // This heuristic identifies "OIDs"
462        // We're going to warp the const declarations to be our newtype Oid
463        if ty_str == "u32" && is_builtin_oid(&name) {
464            oids.insert(ident.clone(), expr.clone());
465        }
466    }
467    oids
468}
469
470fn is_builtin_oid(name: &str) -> bool {
471    name.ends_with("OID") && name != "HEAP_HASOID"
472        || name.ends_with("RelationId")
473        || name == "TemplateDbOid"
474}
475
476fn rewrite_oid_consts(
477    items: &[syn::Item],
478    oids: &BTreeMap<syn::Ident, Box<syn::Expr>>,
479) -> Vec<syn::Item> {
480    items
481        .iter()
482        .map(|item| match item {
483            Item::Const(ItemConst { ident, ty, expr, .. })
484                if ty.to_token_stream().to_string() == "u32" && oids.get(ident) == Some(expr) =>
485            {
486                syn::parse2(quote! { pub const #ident : Oid = Oid(#expr); }).unwrap()
487            }
488            item => item.clone(),
489        })
490        .collect()
491}
492
493fn format_builtin_oid_impl(oids: BTreeMap<syn::Ident, Box<syn::Expr>>) -> proc_macro2::TokenStream {
494    let enum_variants: proc_macro2::TokenStream;
495    let from_impl: proc_macro2::TokenStream;
496    (enum_variants, from_impl) = oids
497        .iter()
498        .map(|(ident, expr)| {
499            (quote! { #ident = #expr, }, quote! { #expr => Ok(BuiltinOid::#ident), })
500        })
501        .unzip();
502
503    quote! {
504        use crate::{NotBuiltinOid};
505
506        #[derive(Copy, Clone, Eq, PartialEq, Hash, Ord, PartialOrd, Debug)]
507        pub enum BuiltinOid {
508            #enum_variants
509        }
510
511        impl BuiltinOid {
512            pub const fn from_u32(uint: u32) -> Result<BuiltinOid, NotBuiltinOid> {
513                match uint {
514                    0 => Err(NotBuiltinOid::Invalid),
515                    #from_impl
516                    _ => Err(NotBuiltinOid::Ambiguous),
517                }
518            }
519        }
520    }
521}
522
523/// Implement our `PgNode` marker trait for `pg_sys::Node` and its "subclasses"
524fn impl_pg_node(items: &[syn::Item]) -> eyre::Result<proc_macro2::TokenStream> {
525    let mut pgnode_impls = proc_macro2::TokenStream::new();
526
527    // we scope must of the computation so we can borrow `items` and then
528    // extend it at the very end.
529    let struct_graph: StructGraph = StructGraph::from(items);
530
531    // collect all the structs with `NodeTag` as their first member,
532    // these will serve as roots in our forest of `Node`s
533    let mut root_node_structs = Vec::new();
534    for descriptor in struct_graph.descriptors.iter() {
535        // grab the first field, if any
536        let first_field = match &descriptor.struct_.fields {
537            syn::Fields::Named(fields) => {
538                if let Some(first_field) = fields.named.first() {
539                    first_field
540                } else {
541                    continue;
542                }
543            }
544            syn::Fields::Unnamed(fields) => {
545                if let Some(first_field) = fields.unnamed.first() {
546                    first_field
547                } else {
548                    continue;
549                }
550            }
551            _ => continue,
552        };
553
554        // grab the type name of the first field
555        let ty_name = if let syn::Type::Path(p) = &first_field.ty {
556            if let Some(last_segment) = p.path.segments.last() {
557                last_segment.ident.to_string()
558            } else {
559                continue;
560            }
561        } else {
562            continue;
563        };
564
565        if ty_name == "NodeTag" {
566            root_node_structs.push(descriptor);
567        }
568    }
569
570    // the set of types which subclass `Node` according to postgres' object system
571    let mut node_set = BTreeSet::new();
572    // fill in any children of the roots with a recursive DFS
573    // (we are not operating on user input, so it is ok to just
574    //  use direct recursion rather than an explicit stack).
575    for root in root_node_structs.into_iter() {
576        dfs_find_nodes(root, &struct_graph, &mut node_set);
577    }
578
579    // now we can finally iterate the Nodes and emit out Display impl
580    for node_struct in node_set.into_iter() {
581        let struct_name = &node_struct.struct_.ident;
582
583        // impl the PgNode trait for all nodes
584        pgnode_impls.extend(quote! {
585            impl pg_sys::seal::Sealed for #struct_name {}
586            impl pg_sys::PgNode for #struct_name {}
587        });
588
589        // impl Rust's Display trait for all nodes
590        pgnode_impls.extend(quote! {
591            impl ::core::fmt::Display for #struct_name {
592                fn fmt(&self, f: &mut ::core::fmt::Formatter<'_>) -> ::core::fmt::Result {
593                    self.display_node().fmt(f)
594                }
595            }
596        });
597    }
598
599    Ok(pgnode_impls)
600}
601
602/// Given a root node, dfs_find_nodes adds all its children nodes to `node_set`.
603fn dfs_find_nodes<'graph>(
604    node: &'graph StructDescriptor<'graph>,
605    graph: &'graph StructGraph<'graph>,
606    node_set: &mut BTreeSet<StructDescriptor<'graph>>,
607) {
608    node_set.insert(node.clone());
609
610    for child in node.children(graph) {
611        if node_set.contains(child) {
612            continue;
613        }
614        dfs_find_nodes(child, graph, node_set);
615    }
616}
617
618/// A graph describing the inheritance relationships between different nodes
619/// according to postgres' object system.
620///
621/// NOTE: the borrowed lifetime on a StructGraph should also ensure that the offsets
622///       it stores into the underlying items struct are always correct.
623#[derive(Clone, Debug)]
624struct StructGraph<'a> {
625    #[allow(dead_code)]
626    /// A table mapping struct names to their offset in the descriptor table
627    name_tab: HashMap<String, usize>,
628    #[allow(dead_code)]
629    /// A table mapping offsets into the underlying items table to offsets in the descriptor table
630    item_offset_tab: Vec<Option<usize>>,
631    /// A table of struct descriptors
632    descriptors: Vec<StructDescriptor<'a>>,
633}
634
635impl<'a> From<&'a [syn::Item]> for StructGraph<'a> {
636    fn from(items: &'a [syn::Item]) -> StructGraph<'a> {
637        let mut descriptors = Vec::new();
638
639        // a table mapping struct names to their offset in `descriptors`
640        let mut name_tab: HashMap<String, usize> = HashMap::new();
641        let mut item_offset_tab: Vec<Option<usize>> = vec![None; items.len()];
642        for (i, item) in items.iter().enumerate() {
643            if let &syn::Item::Struct(struct_) = &item {
644                let next_offset = descriptors.len();
645                descriptors.push(StructDescriptor {
646                    struct_,
647                    items_offset: i,
648                    parent: None,
649                    children: Vec::new(),
650                });
651                name_tab.insert(struct_.ident.to_string(), next_offset);
652                item_offset_tab[i] = Some(next_offset);
653            }
654        }
655
656        for item in items.iter() {
657            // grab the first field if it is struct
658            let (id, first_field) = match &item {
659                syn::Item::Struct(syn::ItemStruct {
660                    ident: id,
661                    fields: syn::Fields::Named(fields),
662                    ..
663                }) => {
664                    if let Some(first_field) = fields.named.first() {
665                        (id.to_string(), first_field)
666                    } else {
667                        continue;
668                    }
669                }
670                &syn::Item::Struct(syn::ItemStruct {
671                    ident: id,
672                    fields: syn::Fields::Unnamed(fields),
673                    ..
674                }) => {
675                    if let Some(first_field) = fields.unnamed.first() {
676                        (id.to_string(), first_field)
677                    } else {
678                        continue;
679                    }
680                }
681                _ => continue,
682            };
683
684            if let syn::Type::Path(p) = &first_field.ty {
685                // We should be guaranteed that just extracting the last path
686                // segment is ok because these structs are all from the same module.
687                // (also, they are all generated from C code, so collisions should be
688                //  impossible anyway thanks to C's single shared namespace).
689                if let Some(last_segment) = p.path.segments.last() {
690                    if let Some(parent_offset) = name_tab.get(&last_segment.ident.to_string()) {
691                        // establish the 2-way link
692                        let child_offset = name_tab[&id];
693                        descriptors[child_offset].parent = Some(*parent_offset);
694                        descriptors[*parent_offset].children.push(child_offset);
695                    }
696                }
697            }
698        }
699
700        StructGraph { name_tab, item_offset_tab, descriptors }
701    }
702}
703
704impl<'a> StructDescriptor<'a> {
705    /// children returns an iterator over the children of this node in the graph
706    fn children(&'a self, graph: &'a StructGraph) -> StructDescriptorChildren<'a> {
707        StructDescriptorChildren { offset: 0, descriptor: self, graph }
708    }
709}
710
711/// An iterator over a StructDescriptor's children
712struct StructDescriptorChildren<'a> {
713    offset: usize,
714    descriptor: &'a StructDescriptor<'a>,
715    graph: &'a StructGraph<'a>,
716}
717
718impl<'a> std::iter::Iterator for StructDescriptorChildren<'a> {
719    type Item = &'a StructDescriptor<'a>;
720    fn next(&mut self) -> Option<&'a StructDescriptor<'a>> {
721        if self.offset >= self.descriptor.children.len() {
722            None
723        } else {
724            let ret = Some(&self.graph.descriptors[self.descriptor.children[self.offset]]);
725            self.offset += 1;
726            ret
727        }
728    }
729}
730
731/// A node a StructGraph
732#[derive(Clone, Debug, Hash, Eq, PartialEq)]
733struct StructDescriptor<'a> {
734    /// A reference to the underlying struct syntax node
735    struct_: &'a syn::ItemStruct,
736    /// An offset into the items slice that was used to construct the struct graph that
737    /// this StructDescriptor is a part of
738    items_offset: usize,
739    /// The offset of the "parent" (first member) struct (if any).
740    parent: Option<usize>,
741    /// The offsets of the "children" structs (if any).
742    children: Vec<usize>,
743}
744
745impl PartialOrd for StructDescriptor<'_> {
746    #[inline]
747    fn partial_cmp(&self, other: &StructDescriptor) -> Option<Ordering> {
748        Some(self.cmp(other))
749    }
750}
751
752impl Ord for StructDescriptor<'_> {
753    #[inline]
754    fn cmp(&self, other: &StructDescriptor) -> Ordering {
755        self.struct_.ident.cmp(&other.struct_.ident)
756    }
757}
758
759fn get_bindings(
760    major_version: u16,
761    pg_config: &PgConfig,
762    include_h: &path::Path,
763    enable_cshim: bool,
764) -> eyre::Result<syn::File> {
765    let bindings = if let Some(info_dir) =
766        target_env_tracked(&format!("PGRX_TARGET_INFO_PATH_PG{major_version}"))
767    {
768        let bindings_file = format!("{info_dir}/pg{major_version}_raw_bindings.rs");
769        std::fs::read_to_string(&bindings_file)
770            .wrap_err_with(|| format!("failed to read raw bindings from {bindings_file}"))?
771    } else {
772        let bindings = run_bindgen(major_version, pg_config, include_h, enable_cshim)?;
773        if let Some(path) = env_tracked("PGRX_PG_SYS_EXTRA_OUTPUT_PATH") {
774            std::fs::write(path, &bindings)?;
775        }
776        bindings
777    };
778    syn::parse_file(bindings.as_str()).wrap_err_with(|| "failed to parse generated bindings")
779}
780
781/// Given a specific postgres version, `run_bindgen` generates bindings for the given
782/// postgres version and returns them as a token stream.
783fn run_bindgen(
784    major_version: u16,
785    pg_config: &PgConfig,
786    include_h: &path::Path,
787    enable_cshim: bool,
788) -> eyre::Result<String> {
789    eprintln!("Generating bindings for pg{major_version}");
790    let configure = pg_config.configure()?;
791    let preferred_clang: Option<&std::path::Path> = configure.get("CLANG").map(|s| s.as_ref());
792    eprintln!("pg_config --configure CLANG = {preferred_clang:?}");
793    let (autodetect, includes) = clang::detect_include_paths_for(preferred_clang);
794    let mut binder = bindgen::Builder::default();
795    binder = add_blocklists(binder);
796    binder = add_derives(binder);
797    if !autodetect {
798        let builtin_includes = includes.iter().filter_map(|p| Some(format!("-I{}", p.to_str()?)));
799        binder = binder.clang_args(builtin_includes);
800    };
801    let enum_names = Rc::new(RefCell::new(BTreeMap::new()));
802    let overrides = BindingOverride::new_from(Rc::clone(&enum_names));
803    let out_path = PathBuf::from(std::env::var("OUT_DIR").unwrap());
804    let bindings = binder
805        .header(include_h.display().to_string())
806        .clang_args(extra_bindgen_clang_args(pg_config)?)
807        .clang_args(pg_target_include_flags(major_version, pg_config)?)
808        .detect_include_paths(autodetect)
809        .parse_callbacks(Box::new(overrides))
810        .default_enum_style(bindgen::EnumVariation::ModuleConsts)
811        // The NodeTag enum is closed: additions break existing values in the set, so it is not extensible
812        .rustified_non_exhaustive_enum("NodeTag")
813        .size_t_is_usize(true)
814        .merge_extern_blocks(true)
815        .wrap_unsafe_ops(true)
816        .use_core()
817        .generate_cstr(true)
818        .disable_nested_struct_naming()
819        .formatter(bindgen::Formatter::None)
820        .layout_tests(false)
821        .default_non_copy_union_style(NonCopyUnionStyle::ManuallyDrop)
822        .wrap_static_fns(enable_cshim)
823        .wrap_static_fns_path(out_path.join("pgrx-cshim-static"))
824        .wrap_static_fns_suffix("__pgrx_cshim")
825        .generate()
826        .wrap_err_with(|| format!("Unable to generate bindings for pg{major_version}"))?;
827    let mut binding_str = bindings.to_string();
828    drop(bindings); // So the Rc::into_inner can unwrap
829
830    // FIXME: do this for the Node graph instead of reparsing?
831    let enum_names: EnumMap = Rc::into_inner(enum_names).unwrap().into_inner();
832    binding_str.extend(enum_names.into_iter().flat_map(|(name, variants)| {
833        const MIN_I32: i64 = i32::MIN as _;
834        const MAX_I32: i64 = i32::MAX as _;
835        const MAX_U32: u64 = u32::MAX as _;
836        variants.into_iter().map(move |(variant, value)| {
837            let (ty, value) = match value {
838                EnumVariantValue::Boolean(b) => ("bool", b.to_string()),
839                EnumVariantValue::Signed(v @ MIN_I32..=MAX_I32) => ("i32", v.to_string()),
840                EnumVariantValue::Signed(v) => ("i64", v.to_string()),
841                EnumVariantValue::Unsigned(v @ 0..=MAX_U32) => ("u32", v.to_string()),
842                EnumVariantValue::Unsigned(v) => ("u64", v.to_string()),
843            };
844            format!(
845                r#"
846#[deprecated(since = "0.12.0", note = "you want pg_sys::{module}::{variant}")]
847pub const {module}_{variant}: {ty} = {value};"#,
848                module = &*name, // imprecise closure capture
849            )
850        })
851    }));
852
853    Ok(binding_str)
854}
855
856fn add_blocklists(bind: bindgen::Builder) -> bindgen::Builder {
857    bind.blocklist_type("Datum") // manually wrapping datum for correctness
858        .blocklist_type("Oid") // "Oid" is not just any u32
859        .blocklist_function("varsize_any") // pgrx converts the VARSIZE_ANY macro, so we don't want to also have this function, which is in heaptuple.c
860        .blocklist_function("(?:raw_)?(?:expression|query|query_or_expression)_tree_walker")
861        .blocklist_function("planstate_tree_walker")
862        .blocklist_function("range_table_(?:entry_)?walker")
863        .blocklist_function(".*(?:set|long)jmp")
864        .blocklist_function("pg_re_throw")
865        .blocklist_function("err(start|code|msg|detail|context_msg|hint|finish)")
866        .blocklist_var("CONFIGURE_ARGS") // configuration during build is hopefully irrelevant
867        .blocklist_var("_*(?:HAVE|have)_.*") // header tracking metadata
868        .blocklist_var("_[A-Z_]+_H") // more header metadata
869        .blocklist_item("__[A-Z].*") // these are reserved and unused by Postgres
870        .blocklist_item("__darwin.*") // this should always be Apple's names
871        .blocklist_function("pq(?:Strerror|Get.*)") // wrappers around platform functions: user can call those themselves
872        .blocklist_function("log")
873        .blocklist_item(".*pthread.*)") // shims for pthreads on non-pthread systems, just use std::thread
874        .blocklist_item(".*(?i:va)_(?i:list|start|end|copy).*") // do not need va_list anything!
875        .blocklist_function("(?:pg_|p)v(?:sn?|f)?printf")
876        .blocklist_function("appendStringInfoVA")
877        .blocklist_file("stdarg.h")
878        // these cause cause warnings, errors, or deprecations on some systems,
879        // and are not useful for us.
880        .blocklist_function("(?:sigstack|sigreturn|siggetmask|gets|vfork|te?mpnam(?:_r)?|mktemp)")
881        // Missing on some systems, despite being in their headers.
882        .blocklist_function("inet_net_pton.*")
883        // To make it work without `cshim`
884        .blocklist_function("heap_getattr")
885        .blocklist_function("BufferGetBlock")
886        .blocklist_function("BufferGetPage")
887        .blocklist_function("BufferIsLocal")
888        .blocklist_function("GetMemoryChunkContext")
889        .blocklist_function("GETSTRUCT")
890        .blocklist_function("MAXALIGN")
891        .blocklist_function("MemoryContextIsValid")
892        .blocklist_function("MemoryContextSwitchTo")
893        .blocklist_function("TYPEALIGN")
894        .blocklist_function("TransactionIdIsNormal")
895        .blocklist_function("expression_tree_walker")
896        .blocklist_function("get_pg_major_minor_version_string")
897        .blocklist_function("get_pg_major_version_num")
898        .blocklist_function("get_pg_major_version_string")
899        .blocklist_function("get_pg_version_string")
900        .blocklist_function("heap_tuple_get_struct")
901        .blocklist_function("planstate_tree_walker")
902        .blocklist_function("query_or_expression_tree_walker")
903        .blocklist_function("query_tree_walker")
904        .blocklist_function("range_table_entry_walker")
905        .blocklist_function("range_table_walker")
906        .blocklist_function("raw_expression_tree_walker")
907        .blocklist_function("type_is_array")
908        // These structs contains array that is larger than 32
909        // so that `derive(Default)` would fail.
910        .blocklist_type("tagMONITORINFOEXA")
911        .blocklist_type("MONITORINFOEXA")
912        .blocklist_type("LPMONITORINFOEXA")
913        .blocklist_type("MONITORINFOEX")
914        .blocklist_type("LPMONITORINFOEX")
915        .blocklist_function("ua_.*") // this should be Windows's names
916}
917
918fn add_derives(bind: bindgen::Builder) -> bindgen::Builder {
919    bind.derive_debug(true)
920        .derive_copy(true)
921        .derive_default(true)
922        .derive_eq(false)
923        .derive_partialeq(false)
924        .derive_hash(false)
925        .derive_ord(false)
926        .derive_partialord(false)
927}
928
929fn env_tracked(s: &str) -> Option<String> {
930    // a **sorted** list of environment variable keys that cargo might set that we don't need to track
931    // these were picked out, by hand, from: https://doc.rust-lang.org/cargo/reference/environment-variables.html
932    const CARGO_KEYS: &[&str] = &[
933        "BROWSER",
934        "DEBUG",
935        "DOCS_RS",
936        "HOST",
937        "HTTP_PROXY",
938        "HTTP_TIMEOUT",
939        "NUM_JOBS",
940        "OPT_LEVEL",
941        "OUT_DIR",
942        "PATH",
943        "PROFILE",
944        "TARGET",
945        "TERM",
946    ];
947
948    let is_cargo_key =
949        s.starts_with("CARGO") || s.starts_with("RUST") || CARGO_KEYS.binary_search(&s).is_ok();
950
951    if !is_cargo_key {
952        // if it's an envar that cargo gives us, we don't want to ask it to rerun build.rs if it changes
953        // we'll let cargo figure that out for itself, and doing so, depending on the key, seems to
954        // cause cargo to rerun build.rs every time, which is terrible
955        println!("cargo:rerun-if-env-changed={s}");
956    }
957    std::env::var(s).ok()
958}
959
960fn target_env_tracked(s: &str) -> Option<String> {
961    let target = env_tracked("TARGET").unwrap();
962    env_tracked(&format!("{s}_{target}")).or_else(|| env_tracked(s))
963}
964
965/// Returns `Err` if `pg_config` errored, `None` if we should
966fn pg_target_include_flags(pg_version: u16, pg_config: &PgConfig) -> eyre::Result<Option<String>> {
967    let var = "PGRX_INCLUDEDIR_SERVER";
968    let value =
969        target_env_tracked(&format!("{var}_PG{pg_version}")).or_else(|| target_env_tracked(var));
970    match value {
971        // No configured value: ask `pg_config`.
972        None => Ok(Some(format!("-I{}", pg_config.includedir_server()?.display()))),
973        // Configured to empty string: assume bindgen is getting it some other
974        // way, pass nothing.
975        Some(overridden) if overridden.is_empty() => Ok(None),
976        // Configured to non-empty string: pass to bindgen
977        Some(overridden) => Ok(Some(format!("-I{overridden}"))),
978    }
979}
980
981fn build_shim(
982    shim_src: &path::Path,
983    shim_dst: &path::Path,
984    pg_config: &PgConfig,
985) -> eyre::Result<()> {
986    let major_version = pg_config.major_version()?;
987
988    std::fs::copy(shim_src, shim_dst).unwrap();
989
990    let mut build = cc::Build::new();
991    if let Some(flag) = pg_target_include_flags(major_version, pg_config)? {
992        build.flag(&flag);
993    }
994    for flag in extra_bindgen_clang_args(pg_config)? {
995        build.flag(&flag);
996    }
997    build.file(shim_dst);
998    build.compile("pgrx-cshim");
999    Ok(())
1000}
1001
1002fn extra_bindgen_clang_args(pg_config: &PgConfig) -> eyre::Result<Vec<String>> {
1003    let mut out = vec![];
1004    if env_tracked("CARGO_CFG_TARGET_OS").as_deref() == Some("macos") {
1005        // On macOS, find the `-isysroot` arg out of the c preprocessor flags,
1006        // to handle the case where bindgen uses a libclang isn't provided by
1007        // the system.
1008        let flags = pg_config.cppflags()?;
1009        // In practice this will always be valid UTF-8 because of how the
1010        // `pgrx-pg-config` crate is implemented, but even if it were not, the
1011        // problem won't be with flags we are interested in.
1012        let flags = shlex::split(&flags.to_string_lossy()).unwrap_or_default();
1013        // Just give clang the full flag set, since presumably that's what we're
1014        // getting when we build the C shim anyway.
1015        out.extend(flags.iter().cloned());
1016
1017        // Find the `-isysroot` flags so we can warn about them, so something
1018        // reasonable shows up if/when the build fails.
1019        //
1020        // Eventually we should probably wrangle the sysroot for `cargo pgrx
1021        // init`-installed PGs a bit more aggressively, but for now, whatever.
1022        for pair in flags.windows(2) {
1023            if pair[0] == "-isysroot" {
1024                if !std::path::Path::new(&pair[1]).exists() {
1025                    // The SDK path doesn't exist. Emit a warning, which they'll
1026                    // see if the build ends up failing (it may not fail in all
1027                    // cases, so we don't panic here).
1028                    //
1029                    // There's a bunch of smarter things we can try here, but
1030                    // most of them either break things that currently work, or
1031                    // are very difficult to get right. If you try to fix this,
1032                    // be sure to consider cases like:
1033                    //
1034                    // - User may have CommandLineTools and not Xcode, vice
1035                    //   versa, or both installed.
1036                    // - User may using a newer SDK than their OS, or vice
1037                    //   versa.
1038                    // - User may be using a newer SDK than their XCode (updated
1039                    //   Command line tools, not OS), or vice versa.
1040                    // - And so on.
1041                    //
1042                    // These are all actually fairly common. Note that the code
1043                    // as-is is *not* broken in these cases (except on OS/SDK
1044                    // updates), so care should be taken to avoid changing that
1045                    // if possible.
1046                    //
1047                    // The logic we'd like ideally is for `cargo pgrx init` to
1048                    // choose a good SDK in the first place, and force postgres
1049                    // to use it. Then, the logic in this build script would
1050                    // Just Work without changes (since we are using its
1051                    // sysroot verbatim).
1052                    //
1053                    // The value of "Good" here is tricky, but the logic should
1054                    // probably:
1055                    //
1056                    // - prefer SDKs from the CLI tools to ones from XCode
1057                    //   (since they're guaranteed compatible with the user's OS
1058                    //   version)
1059                    //
1060                    // - prefer SDKs that specify only the major SDK version
1061                    //   (e.g. MacOSX12.sdk and not MacOSX12.4.sdk or
1062                    //   MacOSX.sdk), to avoid breaking too frequently (if we
1063                    //   have a minor version) or being totally unable to detect
1064                    //   what version of the SDK was used to build postgres (if
1065                    //   we have neither).
1066                    //
1067                    // - Avoid choosing an SDK newer than the user's OS version,
1068                    //   since postgres fails to detect that they are missing if
1069                    //   you do.
1070                    //
1071                    // This is surprisingly hard to implement, as the
1072                    // information is scattered across a dozen ini files.
1073                    // Presumably Apple assumes you'll use
1074                    // `MACOSX_DEPLOYMENT_TARGET`, rather than basing it off the
1075                    // SDK version, but it's not an option for postgres.
1076                    let major_version = pg_config.major_version()?;
1077                    println!(
1078                        "cargo:warning=postgres v{major_version} was compiled against an \
1079                         SDK Root which does not seem to exist on this machine ({}). You may \
1080                         need to re-run `cargo pgrx init` and/or update your command line tools.",
1081                        pair[1],
1082                    );
1083                };
1084                // Either way, we stop here.
1085                break;
1086            }
1087        }
1088    }
1089    Ok(out)
1090}
1091
1092fn run_command(mut command: &mut Command, version: &str) -> eyre::Result<Output> {
1093    let mut dbg = String::new();
1094
1095    command = command
1096        .env_remove("DEBUG")
1097        .env_remove("MAKEFLAGS")
1098        .env_remove("MAKELEVEL")
1099        .env_remove("MFLAGS")
1100        .env_remove("DYLD_FALLBACK_LIBRARY_PATH")
1101        .env_remove("OPT_LEVEL")
1102        .env_remove("PROFILE")
1103        .env_remove("OUT_DIR")
1104        .env_remove("NUM_JOBS");
1105
1106    eprintln!("[{version}] {command:?}");
1107    dbg.push_str(&format!("[{version}] -------- {command:?} -------- \n"));
1108
1109    let output = command.output()?;
1110    let rc = output.clone();
1111
1112    if !output.stdout.is_empty() {
1113        for line in String::from_utf8(output.stdout).unwrap().lines() {
1114            if line.starts_with("cargo:") {
1115                dbg.push_str(&format!("{line}\n"));
1116            } else {
1117                dbg.push_str(&format!("[{version}] [stdout] {line}\n"));
1118            }
1119        }
1120    }
1121
1122    if !output.stderr.is_empty() {
1123        for line in String::from_utf8(output.stderr).unwrap().lines() {
1124            dbg.push_str(&format!("[{version}] [stderr] {line}\n"));
1125        }
1126    }
1127    dbg.push_str(&format!("[{version}] /----------------------------------------\n"));
1128
1129    eprintln!("{dbg}");
1130    Ok(rc)
1131}
1132
1133// Plausibly it would be better to generate a regex to pass to bindgen for this,
1134// but this is less error-prone for now.
1135static BLOCKLISTED: OnceLock<BTreeSet<&'static str>> = OnceLock::new();
1136fn is_blocklisted_item(item: &ForeignItem) -> bool {
1137    let sym_name = match item {
1138        ForeignItem::Fn(f) => &f.sig.ident,
1139        // We don't *need* to filter statics too (only functions), but it
1140        // doesn't hurt.
1141        ForeignItem::Static(s) => &s.ident,
1142        _ => return false,
1143    };
1144    BLOCKLISTED
1145        .get_or_init(|| sym_blocklist::SYMBOLS.iter().copied().collect::<BTreeSet<&str>>())
1146        .contains(sym_name.to_string().as_str())
1147}
1148
1149fn apply_pg_guard(items: &Vec<syn::Item>) -> eyre::Result<proc_macro2::TokenStream> {
1150    let mut out = proc_macro2::TokenStream::new();
1151    for item in items {
1152        match item {
1153            Item::ForeignMod(block) => {
1154                let abi = &block.abi;
1155                let items = block.items.iter().filter(|&item| !is_blocklisted_item(item));
1156                out.extend(quote! {
1157                    #[pgrx_macros::pg_guard]
1158                    #abi { #(#items)* }
1159                });
1160            }
1161            _ => {
1162                out.extend(item.into_token_stream());
1163            }
1164        }
1165    }
1166
1167    Ok(out)
1168}
1169
1170fn rust_fmt(path: &Path) -> eyre::Result<()> {
1171    // We shouldn't hit this path in a case where we care about it, but... just
1172    // in case we probably should respect RUSTFMT.
1173    let rustfmt = env_tracked("RUSTFMT").unwrap_or_else(|| "rustfmt".into());
1174    let out = run_command(Command::new(rustfmt).arg(path).current_dir("."), "[bindings_diff]");
1175    match out {
1176        Ok(_) => Ok(()),
1177        Err(e)
1178            if e.downcast_ref::<std::io::Error>()
1179                .ok_or(eyre!("Couldn't downcast error ref"))?
1180                .kind()
1181                == std::io::ErrorKind::NotFound =>
1182        {
1183            Err(e).wrap_err("Failed to run `rustfmt`, is it installed?")
1184        }
1185        Err(e) => Err(e),
1186    }
1187}