pgrx_bindgen/
build.rs

1//LICENSE Portions Copyright 2019-2021 ZomboDB, LLC.
2//LICENSE
3//LICENSE Portions Copyright 2021-2023 Technology Concepts & Design, Inc.
4//LICENSE
5//LICENSE Portions Copyright 2023-2023 PgCentral Foundation, Inc. <contact@pgcentral.org>
6//LICENSE
7//LICENSE All rights reserved.
8//LICENSE
9//LICENSE Use of this source code is governed by the MIT license that can be found in the LICENSE file.
10use bindgen::callbacks::{DeriveTrait, EnumVariantValue, ImplementsTrait, MacroParsingBehavior};
11use bindgen::NonCopyUnionStyle;
12use eyre::{eyre, WrapErr};
13use pgrx_pg_config::{
14    is_supported_major_version, PgConfig, PgConfigSelector, PgMinorVersion, PgVersion, Pgrx,
15    SUPPORTED_VERSIONS,
16};
17use quote::{quote, ToTokens};
18use std::cell::RefCell;
19use std::cmp::Ordering;
20use std::collections::{BTreeMap, BTreeSet, HashMap, HashSet};
21use std::fs;
22use std::path::{self, Path, PathBuf}; // disambiguate path::Path and syn::Type::Path
23use std::process::{Command, Output};
24use std::rc::Rc;
25use syn::{Item, ItemConst};
26
27const BLOCKLISTED_TYPES: [&str; 3] = ["Datum", "NullableDatum", "Oid"];
28
29// These postgres versions were effectively "yanked" by the community, even tho they still exist
30// in the wild.  pgrx will refuse to compile against them
31const YANKED_POSTGRES_VERSIONS: &[PgVersion] = &[
32    // this set of releases introduced an ABI break in the [`pg_sys::ResultRelInfo`] struct
33    // and was replaced by the community on 2024-11-21
34    // https://www.postgresql.org/about/news/postgresql-172-166-1510-1415-1318-and-1222-released-2965/
35    PgVersion::new(17, PgMinorVersion::Release(1), None),
36    PgVersion::new(16, PgMinorVersion::Release(5), None),
37    PgVersion::new(15, PgMinorVersion::Release(9), None),
38    PgVersion::new(14, PgMinorVersion::Release(14), None),
39    PgVersion::new(13, PgMinorVersion::Release(17), None),
40    PgVersion::new(12, PgMinorVersion::Release(21), None),
41];
42
43pub(super) mod clang;
44
45#[derive(Debug)]
46struct BindingOverride {
47    ignore_macros: HashSet<&'static str>,
48    enum_names: InnerMut<EnumMap>,
49}
50
51type InnerMut<T> = Rc<RefCell<T>>;
52type EnumMap = BTreeMap<String, Vec<(String, EnumVariantValue)>>;
53
54impl BindingOverride {
55    fn new_from(enum_names: InnerMut<EnumMap>) -> Self {
56        // these cause duplicate definition problems on linux
57        // see: https://github.com/rust-lang/rust-bindgen/issues/687
58        BindingOverride {
59            ignore_macros: HashSet::from_iter([
60                "FP_INFINITE",
61                "FP_NAN",
62                "FP_NORMAL",
63                "FP_SUBNORMAL",
64                "FP_ZERO",
65                "IPPORT_RESERVED",
66                // These are just annoying due to clippy
67                "M_E",
68                "M_LOG2E",
69                "M_LOG10E",
70                "M_LN2",
71                "M_LN10",
72                "M_PI",
73                "M_PI_2",
74                "M_PI_4",
75                "M_1_PI",
76                "M_2_PI",
77                "M_SQRT2",
78                "M_SQRT1_2",
79                "M_2_SQRTPI",
80            ]),
81            enum_names,
82        }
83    }
84}
85
86impl bindgen::callbacks::ParseCallbacks for BindingOverride {
87    fn will_parse_macro(&self, name: &str) -> MacroParsingBehavior {
88        if self.ignore_macros.contains(name) {
89            bindgen::callbacks::MacroParsingBehavior::Ignore
90        } else {
91            bindgen::callbacks::MacroParsingBehavior::Default
92        }
93    }
94
95    fn blocklisted_type_implements_trait(
96        &self,
97        name: &str,
98        derive_trait: DeriveTrait,
99    ) -> Option<ImplementsTrait> {
100        if !BLOCKLISTED_TYPES.contains(&name) {
101            return None;
102        }
103
104        let implements_trait = match derive_trait {
105            DeriveTrait::Copy => ImplementsTrait::Yes,
106            DeriveTrait::Debug => ImplementsTrait::Yes,
107            _ => ImplementsTrait::No,
108        };
109        Some(implements_trait)
110    }
111
112    // FIXME: alter types on some int macros to the actually-used types so we can stop as-casting them
113    fn int_macro(&self, _name: &str, _value: i64) -> Option<bindgen::callbacks::IntKind> {
114        None
115    }
116
117    // FIXME: implement a... C compiler?
118    fn func_macro(&self, _name: &str, _value: &[&[u8]]) {}
119
120    /// Intentionally doesn't do anything, just updates internal state.
121    fn enum_variant_behavior(
122        &self,
123        enum_name: Option<&str>,
124        variant_name: &str,
125        variant_value: bindgen::callbacks::EnumVariantValue,
126    ) -> Option<bindgen::callbacks::EnumVariantCustomBehavior> {
127        enum_name.inspect(|name| match name.strip_prefix("enum").unwrap_or(name).trim() {
128            // specifically overridden enum
129            "NodeTag" => return,
130            name if name.contains("unnamed at") || name.contains("anonymous at") => return,
131            // to prevent problems with BuiltinOid
132            _ if variant_name.contains("OID") => return,
133            name => self
134                .enum_names
135                .borrow_mut()
136                .entry(name.to_string())
137                .or_insert(Vec::new())
138                .push((variant_name.to_string(), variant_value)),
139        });
140        None
141    }
142
143    // FIXME: hide nodetag fields and default them to appropriate values
144    fn field_visibility(
145        &self,
146        _info: bindgen::callbacks::FieldInfo<'_>,
147    ) -> Option<bindgen::FieldVisibilityKind> {
148        None
149    }
150}
151
152pub fn main() -> eyre::Result<()> {
153    if env_tracked("DOCS_RS").as_deref() == Some("1") {
154        return Ok(());
155    }
156
157    // dump the environment for debugging if asked
158    if env_tracked("PGRX_BUILD_VERBOSE").as_deref() == Some("true") {
159        for (k, v) in std::env::vars() {
160            eprintln!("{k}={v}");
161        }
162    }
163
164    let compile_cshim = env_tracked("CARGO_FEATURE_CSHIM").as_deref() == Some("1");
165
166    let is_for_release =
167        env_tracked("PGRX_PG_SYS_GENERATE_BINDINGS_FOR_RELEASE").as_deref() == Some("1");
168
169    let build_paths = BuildPaths::from_env();
170
171    eprintln!("build_paths={build_paths:?}");
172
173    emit_rerun_if_changed();
174
175    let pg_configs: Vec<(u16, PgConfig)> = if is_for_release {
176        // This does not cross-check config.toml and Cargo.toml versions, as it is release infra.
177        Pgrx::from_config()?.iter(PgConfigSelector::All)
178            .map(|r| r.expect("invalid pg_config"))
179            .map(|c| (c.major_version().expect("invalid major version"), c))
180            .filter_map(|t| {
181                if is_supported_major_version(t.0) {
182                    Some(t)
183                } else {
184                    println!(
185                        "cargo:warning={} contains a configuration for pg{}, which pgrx does not support.",
186                        Pgrx::config_toml()
187                            .expect("Could not get PGRX configuration TOML")
188                            .to_string_lossy(),
189                        t.0
190                    );
191                    None
192                }
193            })
194            .collect()
195    } else {
196        let mut found = Vec::new();
197        for pgver in SUPPORTED_VERSIONS() {
198            if env_tracked(&format!("CARGO_FEATURE_PG{}", pgver.major)).is_some() {
199                found.push(pgver);
200            }
201        }
202        let found_ver = match &found[..] {
203            [ver] => ver,
204            [] => {
205                return Err(eyre!(
206                    "Did not find `pg$VERSION` feature. `pgrx-pg-sys` requires one of {} to be set",
207                    SUPPORTED_VERSIONS()
208                        .iter()
209                        .map(|pgver| format!("`pg{}`", pgver.major))
210                        .collect::<Vec<_>>()
211                        .join(", ")
212                ))
213            }
214            versions => {
215                return Err(eyre!(
216                    "Multiple `pg$VERSION` features found.\n`--no-default-features` may be required.\nFound: {}",
217                    versions
218                        .iter()
219                        .map(|version| format!("pg{}", version.major))
220                        .collect::<Vec<String>>()
221                        .join(", ")
222                ))
223            }
224        };
225
226        let found_major = found_ver.major;
227        if let Ok(pg_config) = PgConfig::from_env() {
228            let major_version = pg_config.major_version()?;
229
230            if major_version != found_major {
231                panic!("Feature flag `pg{found_major}` does not match version from the environment-described PgConfig (`{major_version}`)")
232            }
233            vec![(major_version, pg_config)]
234        } else {
235            let specific = Pgrx::from_config()?.get(&format!("pg{}", found_ver.major))?;
236            vec![(found_ver.major, specific)]
237        }
238    };
239
240    // make sure we're not trying to build any of the yanked postgres versions
241    for (_, pg_config) in &pg_configs {
242        let version = pg_config.get_version()?;
243        if YANKED_POSTGRES_VERSIONS.contains(&version) {
244            panic!("Postgres v{}{} is incompatible with \
245                    other versions in this major series and is not supported by pgrx.  Please upgrade \
246                    to the latest version in the v{} series.", version.major, version.minor, version.major);
247        }
248    }
249
250    std::thread::scope(|scope| {
251        // This is pretty much either always 1 (normally) or 5 (for releases),
252        // but in the future if we ever have way more, we should consider
253        // chunking `pg_configs` based on `thread::available_parallelism()`.
254        let threads = pg_configs
255            .iter()
256            .map(|(pg_major_ver, pg_config)| {
257                scope.spawn(|| {
258                    generate_bindings(
259                        *pg_major_ver,
260                        pg_config,
261                        &build_paths,
262                        is_for_release,
263                        compile_cshim,
264                    )
265                })
266            })
267            .collect::<Vec<_>>();
268        // Most of the rest of this is just for better error handling --
269        // `thread::scope` already joins the threads for us before it returns.
270        let results = threads
271            .into_iter()
272            .map(|thread| thread.join().expect("thread panicked while generating bindings"))
273            .collect::<Vec<eyre::Result<_>>>();
274        results.into_iter().try_for_each(|r| r)
275    })?;
276
277    if compile_cshim {
278        // compile the cshim for each binding
279        for (_version, pg_config) in pg_configs {
280            build_shim(&build_paths.shim_src, &build_paths.shim_dst, &pg_config)?;
281        }
282    }
283
284    Ok(())
285}
286
287fn emit_rerun_if_changed() {
288    // `pgrx-pg-config` doesn't emit one for this.
289    println!("cargo:rerun-if-env-changed=PGRX_PG_CONFIG_PATH");
290    println!("cargo:rerun-if-env-changed=PGRX_PG_CONFIG_AS_ENV");
291    // Bindgen's behavior depends on these vars, but it doesn't emit them
292    // directly because the output would cause issue with `bindgen-cli`. Do it
293    // on bindgen's behalf.
294    println!("cargo:rerun-if-env-changed=LLVM_CONFIG_PATH");
295    println!("cargo:rerun-if-env-changed=LIBCLANG_PATH");
296    println!("cargo:rerun-if-env-changed=LIBCLANG_STATIC_PATH");
297    // Follows the logic bindgen uses here, more or less.
298    // https://github.com/rust-lang/rust-bindgen/blob/e6dd2c636/bindgen/lib.rs#L2918
299    println!("cargo:rerun-if-env-changed=BINDGEN_EXTRA_CLANG_ARGS");
300    if let Some(target) = env_tracked("TARGET") {
301        println!("cargo:rerun-if-env-changed=BINDGEN_EXTRA_CLANG_ARGS_{target}");
302        println!(
303            "cargo:rerun-if-env-changed=BINDGEN_EXTRA_CLANG_ARGS_{}",
304            target.replace('-', "_"),
305        );
306    }
307
308    // don't want to get stuck always generating bindings
309    println!("cargo:rerun-if-env-changed=PGRX_PG_SYS_GENERATE_BINDINGS_FOR_RELEASE");
310
311    println!("cargo:rerun-if-changed=include");
312    println!("cargo:rerun-if-changed=pgrx-cshim.c");
313
314    if let Ok(pgrx_config) = Pgrx::config_toml() {
315        println!("cargo:rerun-if-changed={}", pgrx_config.display());
316    }
317}
318
319fn generate_bindings(
320    major_version: u16,
321    pg_config: &PgConfig,
322    build_paths: &BuildPaths,
323    is_for_release: bool,
324    enable_cshim: bool,
325) -> eyre::Result<()> {
326    let mut include_h = build_paths.manifest_dir.clone();
327    include_h.push("include");
328    include_h.push(format!("pg{major_version}.h"));
329
330    let bindgen_output = get_bindings(major_version, pg_config, &include_h, enable_cshim)
331        .wrap_err_with(|| format!("bindgen failed for pg{major_version}"))?;
332
333    let oids = extract_oids(&bindgen_output);
334    let rewritten_items = rewrite_items(&bindgen_output, &oids)
335        .wrap_err_with(|| format!("failed to rewrite items for pg{major_version}"))?;
336    let oids = format_builtin_oid_impl(oids);
337
338    let dest_dirs = if is_for_release {
339        vec![build_paths.out_dir.clone(), build_paths.src_dir.clone()]
340    } else {
341        vec![build_paths.out_dir.clone()]
342    };
343    for dest_dir in dest_dirs {
344        let mut bindings_file = dest_dir.clone();
345        bindings_file.push(format!("pg{major_version}.rs"));
346        write_rs_file(
347            rewritten_items.clone(),
348            &bindings_file,
349            quote! {
350                use crate as pg_sys;
351                use crate::{Datum, Oid, PgNode};
352            },
353            is_for_release,
354        )
355        .wrap_err_with(|| {
356            format!(
357                "Unable to write bindings file for pg{} to `{}`",
358                major_version,
359                bindings_file.display()
360            )
361        })?;
362
363        let mut oids_file = dest_dir.clone();
364        oids_file.push(format!("pg{major_version}_oids.rs"));
365        write_rs_file(oids.clone(), &oids_file, quote! {}, is_for_release).wrap_err_with(|| {
366            format!(
367                "Unable to write oids file for pg{} to `{}`",
368                major_version,
369                oids_file.display()
370            )
371        })?;
372    }
373
374    let lib_dir = pg_config.lib_dir()?;
375    println!(
376        "cargo:rustc-link-search={}",
377        lib_dir.to_str().ok_or(eyre!("{lib_dir:?} is not valid UTF-8 string"))?
378    );
379    Ok(())
380}
381
382#[derive(Debug, Clone)]
383struct BuildPaths {
384    /// CARGO_MANIFEST_DIR
385    manifest_dir: PathBuf,
386    /// OUT_DIR
387    out_dir: PathBuf,
388    /// {manifest_dir}/src
389    src_dir: PathBuf,
390    /// {manifest_dir}/pgrx-cshim.c
391    shim_src: PathBuf,
392    /// {out_dir}/pgrx-cshim.c
393    shim_dst: PathBuf,
394}
395
396impl BuildPaths {
397    fn from_env() -> Self {
398        // Cargo guarantees these are provided, so unwrap is fine.
399        let manifest_dir = env_tracked("CARGO_MANIFEST_DIR").map(PathBuf::from).unwrap();
400        let out_dir = env_tracked("OUT_DIR").map(PathBuf::from).unwrap();
401        Self {
402            src_dir: manifest_dir.join("src/include"),
403            shim_src: manifest_dir.join("pgrx-cshim.c"),
404            shim_dst: out_dir.join("pgrx-cshim.c"),
405            out_dir,
406            manifest_dir,
407        }
408    }
409}
410
411fn write_rs_file(
412    code: proc_macro2::TokenStream,
413    file_path: &Path,
414    header: proc_macro2::TokenStream,
415    is_for_release: bool,
416) -> eyre::Result<()> {
417    use std::io::Write;
418    let mut contents = header;
419    contents.extend(code);
420    let mut file = fs::File::create(file_path)?;
421    write!(file, "/* Automatically generated by bindgen. Do not hand-edit.")?;
422    if is_for_release {
423        write!(
424            file,
425            "\n
426        This code is generated for documentation purposes, so that it is
427        easy to reference on docs.rs. Bindings are regenerated for your
428        build of pgrx, and the values of your Postgres version may differ.
429        */"
430        )
431    } else {
432        write!(file, " */")
433    }?;
434    write!(file, "{contents}")?;
435    rust_fmt(file_path)
436}
437
438/// Given a token stream representing a file, apply a series of transformations to munge
439/// the bindgen generated code with some postgres specific enhancements
440fn rewrite_items(
441    file: &syn::File,
442    oids: &BTreeMap<syn::Ident, Box<syn::Expr>>,
443) -> eyre::Result<proc_macro2::TokenStream> {
444    let items_vec = rewrite_oid_consts(&file.items, oids);
445    let mut items = apply_pg_guard(&items_vec)?;
446    let pgnode_impls = impl_pg_node(&items_vec)?;
447
448    // append the pgnodes to the set of items
449    items.extend(pgnode_impls);
450
451    Ok(items)
452}
453
454/// Find all the constants that represent Postgres type OID values.
455///
456/// These are constants of type `u32` whose name ends in the string "OID"
457fn extract_oids(code: &syn::File) -> BTreeMap<syn::Ident, Box<syn::Expr>> {
458    let mut oids = BTreeMap::new(); // we would like to have a nice sorted set
459    for item in &code.items {
460        let Item::Const(ItemConst { ident, ty, expr, .. }) = item else { continue };
461        // Retype as strings for easy comparison
462        let name = ident.to_string();
463        let ty_str = ty.to_token_stream().to_string();
464
465        // This heuristic identifies "OIDs"
466        // We're going to warp the const declarations to be our newtype Oid
467        if ty_str == "u32" && is_builtin_oid(&name) {
468            oids.insert(ident.clone(), expr.clone());
469        }
470    }
471    oids
472}
473
474fn is_builtin_oid(name: &str) -> bool {
475    name.ends_with("OID") && name != "HEAP_HASOID"
476        || name.ends_with("RelationId")
477        || name == "TemplateDbOid"
478}
479
480fn rewrite_oid_consts(
481    items: &[syn::Item],
482    oids: &BTreeMap<syn::Ident, Box<syn::Expr>>,
483) -> Vec<syn::Item> {
484    items
485        .iter()
486        .map(|item| match item {
487            Item::Const(ItemConst { ident, ty, expr, .. })
488                if ty.to_token_stream().to_string() == "u32" && oids.get(ident) == Some(expr) =>
489            {
490                syn::parse2(quote! { pub const #ident : Oid = Oid(#expr); }).unwrap()
491            }
492            item => item.clone(),
493        })
494        .collect()
495}
496
497fn format_builtin_oid_impl(oids: BTreeMap<syn::Ident, Box<syn::Expr>>) -> proc_macro2::TokenStream {
498    let enum_variants: proc_macro2::TokenStream;
499    let from_impl: proc_macro2::TokenStream;
500    (enum_variants, from_impl) = oids
501        .iter()
502        .map(|(ident, expr)| {
503            (quote! { #ident = #expr, }, quote! { #expr => Ok(BuiltinOid::#ident), })
504        })
505        .unzip();
506
507    quote! {
508        use crate::{NotBuiltinOid};
509
510        #[derive(Copy, Clone, Eq, PartialEq, Hash, Ord, PartialOrd, Debug)]
511        pub enum BuiltinOid {
512            #enum_variants
513        }
514
515        impl BuiltinOid {
516            pub const fn from_u32(uint: u32) -> Result<BuiltinOid, NotBuiltinOid> {
517                match uint {
518                    0 => Err(NotBuiltinOid::Invalid),
519                    #from_impl
520                    _ => Err(NotBuiltinOid::Ambiguous),
521                }
522            }
523        }
524    }
525}
526
527/// Implement our `PgNode` marker trait for `pg_sys::Node` and its "subclasses"
528fn impl_pg_node(items: &[syn::Item]) -> eyre::Result<proc_macro2::TokenStream> {
529    let mut pgnode_impls = proc_macro2::TokenStream::new();
530
531    // we scope must of the computation so we can borrow `items` and then
532    // extend it at the very end.
533    let struct_graph: StructGraph = StructGraph::from(items);
534
535    // collect all the structs with `NodeTag` as their first member,
536    // these will serve as roots in our forest of `Node`s
537    let mut root_node_structs = Vec::new();
538    for descriptor in struct_graph.descriptors.iter() {
539        // grab the first field, if any
540        let first_field = match &descriptor.struct_.fields {
541            syn::Fields::Named(fields) => {
542                if let Some(first_field) = fields.named.first() {
543                    first_field
544                } else {
545                    continue;
546                }
547            }
548            syn::Fields::Unnamed(fields) => {
549                if let Some(first_field) = fields.unnamed.first() {
550                    first_field
551                } else {
552                    continue;
553                }
554            }
555            _ => continue,
556        };
557
558        // grab the type name of the first field
559        let ty_name = if let syn::Type::Path(p) = &first_field.ty {
560            if let Some(last_segment) = p.path.segments.last() {
561                last_segment.ident.to_string()
562            } else {
563                continue;
564            }
565        } else {
566            continue;
567        };
568
569        if ty_name == "NodeTag" {
570            root_node_structs.push(descriptor);
571        }
572    }
573
574    // the set of types which subclass `Node` according to postgres' object system
575    let mut node_set = BTreeSet::new();
576    // fill in any children of the roots with a recursive DFS
577    // (we are not operating on user input, so it is ok to just
578    //  use direct recursion rather than an explicit stack).
579    for root in root_node_structs.into_iter() {
580        dfs_find_nodes(root, &struct_graph, &mut node_set);
581    }
582
583    // now we can finally iterate the Nodes and emit out Display impl
584    for node_struct in node_set.into_iter() {
585        let struct_name = &node_struct.struct_.ident;
586
587        // impl the PgNode trait for all nodes
588        pgnode_impls.extend(quote! {
589            impl pg_sys::seal::Sealed for #struct_name {}
590            impl pg_sys::PgNode for #struct_name {}
591        });
592
593        // impl Rust's Display trait for all nodes
594        pgnode_impls.extend(quote! {
595            impl ::core::fmt::Display for #struct_name {
596                fn fmt(&self, f: &mut ::core::fmt::Formatter<'_>) -> ::core::fmt::Result {
597                    self.display_node().fmt(f)
598                }
599            }
600        });
601    }
602
603    Ok(pgnode_impls)
604}
605
606/// Given a root node, dfs_find_nodes adds all its children nodes to `node_set`.
607fn dfs_find_nodes<'graph>(
608    node: &'graph StructDescriptor<'graph>,
609    graph: &'graph StructGraph<'graph>,
610    node_set: &mut BTreeSet<StructDescriptor<'graph>>,
611) {
612    node_set.insert(node.clone());
613
614    for child in node.children(graph) {
615        if node_set.contains(child) {
616            continue;
617        }
618        dfs_find_nodes(child, graph, node_set);
619    }
620}
621
622/// A graph describing the inheritance relationships between different nodes
623/// according to postgres' object system.
624///
625/// NOTE: the borrowed lifetime on a StructGraph should also ensure that the offsets
626///       it stores into the underlying items struct are always correct.
627#[derive(Clone, Debug)]
628struct StructGraph<'a> {
629    #[allow(dead_code)]
630    /// A table mapping struct names to their offset in the descriptor table
631    name_tab: HashMap<String, usize>,
632    #[allow(dead_code)]
633    /// A table mapping offsets into the underlying items table to offsets in the descriptor table
634    item_offset_tab: Vec<Option<usize>>,
635    /// A table of struct descriptors
636    descriptors: Vec<StructDescriptor<'a>>,
637}
638
639impl<'a> From<&'a [syn::Item]> for StructGraph<'a> {
640    fn from(items: &'a [syn::Item]) -> StructGraph<'a> {
641        let mut descriptors = Vec::new();
642
643        // a table mapping struct names to their offset in `descriptors`
644        let mut name_tab: HashMap<String, usize> = HashMap::new();
645        let mut item_offset_tab: Vec<Option<usize>> = vec![None; items.len()];
646        for (i, item) in items.iter().enumerate() {
647            if let &syn::Item::Struct(struct_) = &item {
648                let next_offset = descriptors.len();
649                descriptors.push(StructDescriptor {
650                    struct_,
651                    items_offset: i,
652                    parent: None,
653                    children: Vec::new(),
654                });
655                name_tab.insert(struct_.ident.to_string(), next_offset);
656                item_offset_tab[i] = Some(next_offset);
657            }
658        }
659
660        for item in items.iter() {
661            // grab the first field if it is struct
662            let (id, first_field) = match &item {
663                syn::Item::Struct(syn::ItemStruct {
664                    ident: id,
665                    fields: syn::Fields::Named(fields),
666                    ..
667                }) => {
668                    if let Some(first_field) = fields.named.first() {
669                        (id.to_string(), first_field)
670                    } else {
671                        continue;
672                    }
673                }
674                &syn::Item::Struct(syn::ItemStruct {
675                    ident: id,
676                    fields: syn::Fields::Unnamed(fields),
677                    ..
678                }) => {
679                    if let Some(first_field) = fields.unnamed.first() {
680                        (id.to_string(), first_field)
681                    } else {
682                        continue;
683                    }
684                }
685                _ => continue,
686            };
687
688            if let syn::Type::Path(p) = &first_field.ty {
689                // We should be guaranteed that just extracting the last path
690                // segment is ok because these structs are all from the same module.
691                // (also, they are all generated from C code, so collisions should be
692                //  impossible anyway thanks to C's single shared namespace).
693                if let Some(last_segment) = p.path.segments.last() {
694                    if let Some(parent_offset) = name_tab.get(&last_segment.ident.to_string()) {
695                        // establish the 2-way link
696                        let child_offset = name_tab[&id];
697                        descriptors[child_offset].parent = Some(*parent_offset);
698                        descriptors[*parent_offset].children.push(child_offset);
699                    }
700                }
701            }
702        }
703
704        StructGraph { name_tab, item_offset_tab, descriptors }
705    }
706}
707
708impl<'a> StructDescriptor<'a> {
709    /// children returns an iterator over the children of this node in the graph
710    fn children(&'a self, graph: &'a StructGraph) -> StructDescriptorChildren<'a> {
711        StructDescriptorChildren { offset: 0, descriptor: self, graph }
712    }
713}
714
715/// An iterator over a StructDescriptor's children
716struct StructDescriptorChildren<'a> {
717    offset: usize,
718    descriptor: &'a StructDescriptor<'a>,
719    graph: &'a StructGraph<'a>,
720}
721
722impl<'a> std::iter::Iterator for StructDescriptorChildren<'a> {
723    type Item = &'a StructDescriptor<'a>;
724    fn next(&mut self) -> Option<&'a StructDescriptor<'a>> {
725        if self.offset >= self.descriptor.children.len() {
726            None
727        } else {
728            let ret = Some(&self.graph.descriptors[self.descriptor.children[self.offset]]);
729            self.offset += 1;
730            ret
731        }
732    }
733}
734
735/// A node a StructGraph
736#[derive(Clone, Debug, Hash, Eq, PartialEq)]
737struct StructDescriptor<'a> {
738    /// A reference to the underlying struct syntax node
739    struct_: &'a syn::ItemStruct,
740    /// An offset into the items slice that was used to construct the struct graph that
741    /// this StructDescriptor is a part of
742    items_offset: usize,
743    /// The offset of the "parent" (first member) struct (if any).
744    parent: Option<usize>,
745    /// The offsets of the "children" structs (if any).
746    children: Vec<usize>,
747}
748
749impl PartialOrd for StructDescriptor<'_> {
750    #[inline]
751    fn partial_cmp(&self, other: &StructDescriptor) -> Option<Ordering> {
752        Some(self.cmp(other))
753    }
754}
755
756impl Ord for StructDescriptor<'_> {
757    #[inline]
758    fn cmp(&self, other: &StructDescriptor) -> Ordering {
759        self.struct_.ident.cmp(&other.struct_.ident)
760    }
761}
762
763fn get_bindings(
764    major_version: u16,
765    pg_config: &PgConfig,
766    include_h: &path::Path,
767    enable_cshim: bool,
768) -> eyre::Result<syn::File> {
769    let bindings = if let Some(info_dir) =
770        target_env_tracked(&format!("PGRX_TARGET_INFO_PATH_PG{major_version}"))
771    {
772        let bindings_file = format!("{info_dir}/pg{major_version}_raw_bindings.rs");
773        std::fs::read_to_string(&bindings_file)
774            .wrap_err_with(|| format!("failed to read raw bindings from {bindings_file}"))?
775    } else {
776        let bindings = run_bindgen(major_version, pg_config, include_h, enable_cshim)?;
777        if let Some(path) = env_tracked("PGRX_PG_SYS_EXTRA_OUTPUT_PATH") {
778            std::fs::write(path, &bindings)?;
779        }
780        bindings
781    };
782    syn::parse_file(bindings.as_str()).wrap_err_with(|| "failed to parse generated bindings")
783}
784
785/// Given a specific postgres version, `run_bindgen` generates bindings for the given
786/// postgres version and returns them as a token stream.
787fn run_bindgen(
788    major_version: u16,
789    pg_config: &PgConfig,
790    include_h: &path::Path,
791    enable_cshim: bool,
792) -> eyre::Result<String> {
793    eprintln!("Generating bindings for pg{major_version}");
794    let configure = pg_config.configure()?;
795    let preferred_clang: Option<&std::path::Path> = configure.get("CLANG").map(|s| s.as_ref());
796    eprintln!("pg_config --configure CLANG = {preferred_clang:?}");
797    let (autodetect, includes) = clang::detect_include_paths_for(preferred_clang);
798    let mut binder = bindgen::Builder::default();
799    binder = add_blocklists(binder);
800    binder = binder
801        .allowlist_file(format!("{}.*", pg_target_include(major_version, pg_config)?))
802        .allowlist_item("PGERROR")
803        .allowlist_item("SIG.*");
804    binder = add_derives(binder);
805    if !autodetect {
806        let builtin_includes = includes.iter().filter_map(|p| Some(format!("-I{}", p.to_str()?)));
807        binder = binder.clang_args(builtin_includes);
808    };
809    let enum_names = Rc::new(RefCell::new(BTreeMap::new()));
810    let overrides = BindingOverride::new_from(Rc::clone(&enum_names));
811    let out_path = PathBuf::from(std::env::var("OUT_DIR").unwrap());
812    let bindings = binder
813        .header(include_h.display().to_string())
814        .clang_args(extra_bindgen_clang_args(pg_config)?)
815        .clang_arg(format!("-I{}", pg_target_include(major_version, pg_config)?))
816        .detect_include_paths(autodetect)
817        .parse_callbacks(Box::new(overrides))
818        .default_enum_style(bindgen::EnumVariation::ModuleConsts)
819        // The NodeTag enum is closed: additions break existing values in the set, so it is not extensible
820        .rustified_non_exhaustive_enum("NodeTag")
821        .size_t_is_usize(true)
822        .merge_extern_blocks(true)
823        .wrap_unsafe_ops(true)
824        .use_core()
825        .generate_cstr(true)
826        .disable_nested_struct_naming()
827        .formatter(bindgen::Formatter::None)
828        .layout_tests(false)
829        .default_non_copy_union_style(NonCopyUnionStyle::ManuallyDrop)
830        .wrap_static_fns(enable_cshim)
831        .wrap_static_fns_path(out_path.join("pgrx-cshim-static"))
832        .wrap_static_fns_suffix("__pgrx_cshim")
833        .generate()
834        .wrap_err_with(|| format!("Unable to generate bindings for pg{major_version}"))?;
835    let mut binding_str = bindings.to_string();
836    drop(bindings); // So the Rc::into_inner can unwrap
837
838    // FIXME: do this for the Node graph instead of reparsing?
839    let enum_names: EnumMap = Rc::into_inner(enum_names).unwrap().into_inner();
840    binding_str.extend(enum_names.into_iter().flat_map(|(name, variants)| {
841        const MIN_I32: i64 = i32::MIN as _;
842        const MAX_I32: i64 = i32::MAX as _;
843        const MAX_U32: u64 = u32::MAX as _;
844        variants.into_iter().map(move |(variant, value)| {
845            let (ty, value) = match value {
846                EnumVariantValue::Boolean(b) => ("bool", b.to_string()),
847                EnumVariantValue::Signed(v @ MIN_I32..=MAX_I32) => ("i32", v.to_string()),
848                EnumVariantValue::Signed(v) => ("i64", v.to_string()),
849                EnumVariantValue::Unsigned(v @ 0..=MAX_U32) => ("u32", v.to_string()),
850                EnumVariantValue::Unsigned(v) => ("u64", v.to_string()),
851            };
852            format!(
853                r#"
854#[deprecated(since = "0.12.0", note = "you want pg_sys::{module}::{variant}")]
855pub const {module}_{variant}: {ty} = {value};"#,
856                module = &*name, // imprecise closure capture
857            )
858        })
859    }));
860
861    Ok(binding_str)
862}
863
864fn add_blocklists(bind: bindgen::Builder) -> bindgen::Builder {
865    bind.blocklist_type("Datum") // manually wrapping datum for correctness
866        .blocklist_type("Oid") // "Oid" is not just any u32
867        .blocklist_var("CONFIGURE_ARGS") // configuration during build is hopefully irrelevant
868        .blocklist_var("_*(?:HAVE|have)_.*") // header tracking metadata
869        .blocklist_var("_[A-Z_]+_H") // more header metadata
870        // It's used by explict `extern "C"`
871        .blocklist_function("pg_re_throw")
872        .blocklist_function("err(start|code|msg|detail|context_msg|hint|finish)")
873        // These functions are already ported in Rust
874        .blocklist_function("heap_getattr")
875        .blocklist_function("BufferGetBlock")
876        .blocklist_function("BufferGetPage")
877        .blocklist_function("BufferIsLocal")
878        .blocklist_function("GetMemoryChunkContext")
879        .blocklist_function("GETSTRUCT")
880        .blocklist_function("MAXALIGN")
881        .blocklist_function("MemoryContextIsValid")
882        .blocklist_function("MemoryContextSwitchTo")
883        .blocklist_function("TYPEALIGN")
884        .blocklist_function("TransactionIdIsNormal")
885        .blocklist_function("expression_tree_walker")
886        .blocklist_function("get_pg_major_minor_version_string")
887        .blocklist_function("get_pg_major_version_num")
888        .blocklist_function("get_pg_major_version_string")
889        .blocklist_function("get_pg_version_string")
890        .blocklist_function("heap_tuple_get_struct")
891        .blocklist_function("planstate_tree_walker")
892        .blocklist_function("query_or_expression_tree_walker")
893        .blocklist_function("query_tree_walker")
894        .blocklist_function("range_table_entry_walker")
895        .blocklist_function("range_table_walker")
896        .blocklist_function("raw_expression_tree_walker")
897        .blocklist_function("type_is_array")
898        .blocklist_function("varsize_any")
899        // it's defined twice on Windows, so use PGERROR instead
900        .blocklist_item("ERROR")
901}
902
903fn add_derives(bind: bindgen::Builder) -> bindgen::Builder {
904    bind.derive_debug(true)
905        .derive_copy(true)
906        .derive_default(true)
907        .derive_eq(false)
908        .derive_partialeq(false)
909        .derive_hash(false)
910        .derive_ord(false)
911        .derive_partialord(false)
912}
913
914fn env_tracked(s: &str) -> Option<String> {
915    // a **sorted** list of environment variable keys that cargo might set that we don't need to track
916    // these were picked out, by hand, from: https://doc.rust-lang.org/cargo/reference/environment-variables.html
917    const CARGO_KEYS: &[&str] = &[
918        "BROWSER",
919        "DEBUG",
920        "DOCS_RS",
921        "HOST",
922        "HTTP_PROXY",
923        "HTTP_TIMEOUT",
924        "NUM_JOBS",
925        "OPT_LEVEL",
926        "OUT_DIR",
927        "PATH",
928        "PROFILE",
929        "TARGET",
930        "TERM",
931    ];
932
933    let is_cargo_key =
934        s.starts_with("CARGO") || s.starts_with("RUST") || CARGO_KEYS.binary_search(&s).is_ok();
935
936    if !is_cargo_key {
937        // if it's an envar that cargo gives us, we don't want to ask it to rerun build.rs if it changes
938        // we'll let cargo figure that out for itself, and doing so, depending on the key, seems to
939        // cause cargo to rerun build.rs every time, which is terrible
940        println!("cargo:rerun-if-env-changed={s}");
941    }
942    std::env::var(s).ok()
943}
944
945fn target_env_tracked(s: &str) -> Option<String> {
946    let target = env_tracked("TARGET").unwrap();
947    env_tracked(&format!("{s}_{target}")).or_else(|| env_tracked(s))
948}
949
950fn pg_target_include(pg_version: u16, pg_config: &PgConfig) -> eyre::Result<String> {
951    let var = "PGRX_INCLUDEDIR_SERVER";
952    let value =
953        target_env_tracked(&format!("{var}_PG{pg_version}")).or_else(|| target_env_tracked(var));
954    let path = match value {
955        // No configured value: ask `pg_config`.
956        None => pg_config.includedir_server()?,
957        // Configured to non-empty string: pass to bindgen
958        Some(overridden) => Path::new(&overridden).to_path_buf(),
959    };
960    let path = std::fs::canonicalize(&path)
961        .wrap_err(format!("cannot find {path:?} for C header files"))?
962        .join("") // returning a `/`-ending path
963        .to_str()
964        .ok_or(eyre!("{path:?} is not valid UTF-8 string"))?
965        .to_string();
966    Ok(path)
967}
968
969fn build_shim(
970    shim_src: &path::Path,
971    shim_dst: &path::Path,
972    pg_config: &PgConfig,
973) -> eyre::Result<()> {
974    let major_version = pg_config.major_version()?;
975
976    std::fs::copy(shim_src, shim_dst).unwrap();
977
978    let mut build = cc::Build::new();
979    build.flag(&format!("-I{}", pg_target_include(major_version, pg_config)?));
980    for flag in extra_bindgen_clang_args(pg_config)? {
981        build.flag(&flag);
982    }
983    build.file(shim_dst);
984    build.compile("pgrx-cshim");
985    Ok(())
986}
987
988fn extra_bindgen_clang_args(pg_config: &PgConfig) -> eyre::Result<Vec<String>> {
989    let mut out = vec![];
990    let flags = shlex::split(&pg_config.cppflags()?.to_string_lossy()).unwrap_or_default();
991    // Just give clang the full flag set, since presumably that's what we're
992    // getting when we build the C shim anyway.
993    out.extend(flags.iter().cloned());
994    if env_tracked("CARGO_CFG_TARGET_OS").as_deref() == Some("macos") {
995        // Find the `-isysroot` flags so we can warn about them, so something
996        // reasonable shows up if/when the build fails.
997        //
998        // TODO(thom): Could probably fix some brew/xcode issues here in the
999        // Find the `-isysroot` flags so we can warn about them, so something
1000        // reasonable shows up if/when the build fails.
1001        //
1002        // - Handle homebrew packages initially linked against as keg-only, but
1003        //   which have had their version bumped.
1004        for pair in flags.windows(2) {
1005            if pair[0] == "-isysroot" {
1006                if !std::path::Path::new(&pair[1]).exists() {
1007                    // The SDK path doesn't exist. Emit a warning, which they'll
1008                    // see if the build ends up failing (it may not fail in all
1009                    // cases, so we don't panic here).
1010                    //
1011                    // There's a bunch of smarter things we can try here, but
1012                    // most of them either break things that currently work, or
1013                    // are very difficult to get right. If you try to fix this,
1014                    // be sure to consider cases like:
1015                    //
1016                    // - User may have CommandLineTools and not Xcode, vice
1017                    //   versa, or both installed.
1018                    // - User may using a newer SDK than their OS, or vice
1019                    //   versa.
1020                    // - User may be using a newer SDK than their XCode (updated
1021                    //   Command line tools, not OS), or vice versa.
1022                    // - And so on.
1023                    //
1024                    // These are all actually fairly common. Note that the code
1025                    // as-is is *not* broken in these cases (except on OS/SDK
1026                    // updates), so care should be taken to avoid changing that
1027                    // if possible.
1028                    //
1029                    // The logic we'd like ideally is for `cargo pgrx init` to
1030                    // choose a good SDK in the first place, and force postgres
1031                    // to use it. Then, the logic in this build script would
1032                    // Just Work without changes (since we are using its
1033                    // sysroot verbatim).
1034                    //
1035                    // The value of "Good" here is tricky, but the logic should
1036                    // probably:
1037                    //
1038                    // - prefer SDKs from the CLI tools to ones from XCode
1039                    //   (since they're guaranteed compatible with the user's OS
1040                    //   version)
1041                    //
1042                    // - prefer SDKs that specify only the major SDK version
1043                    //   (e.g. MacOSX12.sdk and not MacOSX12.4.sdk or
1044                    //   MacOSX.sdk), to avoid breaking too frequently (if we
1045                    //   have a minor version) or being totally unable to detect
1046                    //   what version of the SDK was used to build postgres (if
1047                    //   we have neither).
1048                    //
1049                    // - Avoid choosing an SDK newer than the user's OS version,
1050                    //   since postgres fails to detect that they are missing if
1051                    //   you do.
1052                    //
1053                    // This is surprisingly hard to implement, as the
1054                    // information is scattered across a dozen ini files.
1055                    // Presumably Apple assumes you'll use
1056                    // `MACOSX_DEPLOYMENT_TARGET`, rather than basing it off the
1057                    // SDK version, but it's not an option for postgres.
1058                    let major_version = pg_config.major_version()?;
1059                    println!(
1060                        "cargo:warning=postgres v{major_version} was compiled against an \
1061                         SDK Root which does not seem to exist on this machine ({}). You may \
1062                         need to re-run `cargo pgrx init` and/or update your command line tools.",
1063                        pair[1],
1064                    );
1065                };
1066                // Either way, we stop here.
1067                break;
1068            }
1069        }
1070    }
1071    Ok(out)
1072}
1073
1074fn run_command(mut command: &mut Command, version: &str) -> eyre::Result<Output> {
1075    let mut dbg = String::new();
1076
1077    command = command
1078        .env_remove("DEBUG")
1079        .env_remove("MAKEFLAGS")
1080        .env_remove("MAKELEVEL")
1081        .env_remove("MFLAGS")
1082        .env_remove("DYLD_FALLBACK_LIBRARY_PATH")
1083        .env_remove("OPT_LEVEL")
1084        .env_remove("PROFILE")
1085        .env_remove("OUT_DIR")
1086        .env_remove("NUM_JOBS");
1087
1088    eprintln!("[{version}] {command:?}");
1089    dbg.push_str(&format!("[{version}] -------- {command:?} -------- \n"));
1090
1091    let output = command.output()?;
1092    let rc = output.clone();
1093
1094    if !output.stdout.is_empty() {
1095        for line in String::from_utf8(output.stdout).unwrap().lines() {
1096            if line.starts_with("cargo:") {
1097                dbg.push_str(&format!("{line}\n"));
1098            } else {
1099                dbg.push_str(&format!("[{version}] [stdout] {line}\n"));
1100            }
1101        }
1102    }
1103
1104    if !output.stderr.is_empty() {
1105        for line in String::from_utf8(output.stderr).unwrap().lines() {
1106            dbg.push_str(&format!("[{version}] [stderr] {line}\n"));
1107        }
1108    }
1109    dbg.push_str(&format!("[{version}] /----------------------------------------\n"));
1110
1111    eprintln!("{dbg}");
1112    Ok(rc)
1113}
1114
1115fn apply_pg_guard(items: &Vec<syn::Item>) -> eyre::Result<proc_macro2::TokenStream> {
1116    let mut out = proc_macro2::TokenStream::new();
1117    for item in items {
1118        match item {
1119            Item::ForeignMod(block) => {
1120                out.extend(quote! {
1121                    #[pgrx_macros::pg_guard]
1122                    #block
1123                });
1124            }
1125            _ => {
1126                out.extend(item.into_token_stream());
1127            }
1128        }
1129    }
1130
1131    Ok(out)
1132}
1133
1134fn rust_fmt(path: &Path) -> eyre::Result<()> {
1135    // We shouldn't hit this path in a case where we care about it, but... just
1136    // in case we probably should respect RUSTFMT.
1137    let rustfmt = env_tracked("RUSTFMT").unwrap_or_else(|| "rustfmt".into());
1138    let out = run_command(Command::new(rustfmt).arg(path).current_dir("."), "[bindings_diff]");
1139    match out {
1140        Ok(_) => Ok(()),
1141        Err(e)
1142            if e.downcast_ref::<std::io::Error>()
1143                .ok_or(eyre!("Couldn't downcast error ref"))?
1144                .kind()
1145                == std::io::ErrorKind::NotFound =>
1146        {
1147            Err(e).wrap_err("Failed to run `rustfmt`, is it installed?")
1148        }
1149        Err(e) => Err(e),
1150    }
1151}