Struct common_params

Source

#[repr(C)]
pub struct common_params {Show 30 fields
    pub n_predict: i32,
    pub n_ctx: i32,
    pub n_batch: i32,
    pub n_ubatch: i32,
    pub n_keep: i32,
    pub n_draft: i32,
    pub n_chunks: i32,
    pub n_parallel: i32,
    pub n_sequences: i32,
    pub p_split: f32,
    pub n_gpu_layers: i32,
    pub n_gpu_layers_draft: i32,
    pub main_gpu: i32,
    pub grp_attn_n: i32,
    pub grp_attn_w: i32,
    pub n_print: i32,
    pub rope_freq_base: f32,
    pub rope_freq_scale: f32,
    pub yarn_ext_factor: f32,
    pub yarn_attn_factor: f32,
    pub yarn_beta_fast: f32,
    pub yarn_beta_slow: f32,
    pub yarn_orig_ctx: i32,
    pub defrag_thold: f32,
    pub numa: ggml_numa_strategy,
    pub split_mode: llama_split_mode,
    pub rope_scaling_type: llama_rope_scaling_type,
    pub pooling_type: llama_pooling_type,
    pub attention_type: llama_attention_type,
    pub sparams: common_sampler_params,
}

Fields§

§n_predict: i32

new tokens to predict

§n_ctx: i32

context size

§n_batch: i32

logical batch size for prompt processing (must be >=32 to use BLAS)

§n_ubatch: i32

physical batch size for prompt processing (must be >=32 to use BLAS)

§n_keep: i32

number of tokens to keep from initial prompt

§n_draft: i32

number of tokens to draft during speculative decoding

§n_chunks: i32

max number of chunks to process (-1 = unlimited)

§n_parallel: i32

number of parallel sequences to decode

§n_sequences: i32

number of sequences to decode

§p_split: f32§n_gpu_layers: i32

number of layers to store in VRAM (-1 - use default)

§n_gpu_layers_draft: i32

number of layers to store in VRAM for the draft model (-1 - use default)

§main_gpu: i32

the GPU that is used for scratch and small tensors

§grp_attn_n: i32

how split tensors should be distributed across GPUs group-attention factor

§grp_attn_w: i32

group-attention width

§n_print: i32

print token count every n tokens (-1 = disabled)

§rope_freq_base: f32

RoPE base frequency

§rope_freq_scale: f32

RoPE frequency scaling factor

§yarn_ext_factor: f32

YaRN extrapolation mix factor

§yarn_attn_factor: f32

YaRN magnitude scaling factor

§yarn_beta_fast: f32

YaRN low correction dim

§yarn_beta_slow: f32

YaRN high correction dim

§yarn_orig_ctx: i32

YaRN original context length

§defrag_thold: f32

KV cache defragmentation threshold

§numa: ggml_numa_strategy§split_mode: llama_split_mode§rope_scaling_type: llama_rope_scaling_type§pooling_type: llama_pooling_type§attention_type: llama_attention_type§sparams: common_sampler_params