Crate llama_cpp_sys_2

Source
Expand description

See llama-cpp-2 for a documented and safe API.

Structs§

_IO_FILE
_IO_codecvt
_IO_marker
_IO_wide_data
ggml_backend
ggml_backend_buffer
ggml_backend_buffer_type
ggml_backend_dev_caps
ggml_backend_dev_props
ggml_backend_device
ggml_backend_event
ggml_backend_feature
ggml_backend_graph_copy
ggml_backend_reg
ggml_backend_sched
ggml_bf16_t
ggml_cgraph
ggml_context
ggml_cplan
ggml_gallocr
ggml_init_params
ggml_object
ggml_tallocr
ggml_tensor
ggml_threadpool
ggml_threadpool_params
ggml_type_traits
ggml_type_traits_cpu
llama_adapter_lora
llama_batch
llama_chat_message
llama_context
llama_context_params
llama_kv_cache_view
llama_kv_cache_view_cell
llama_logit_bias
llama_model
llama_model_kv_override
llama_model_params
llama_model_quantize_params
llama_perf_context_data
llama_perf_sampler_data
llama_sampler
llama_sampler_chain_params
llama_sampler_i
llama_token_data
llama_token_data_array
llama_vocab

Constants§

GGML_BACKEND_BUFFER_USAGE_ANY
GGML_BACKEND_BUFFER_USAGE_COMPUTE
GGML_BACKEND_BUFFER_USAGE_WEIGHTS
GGML_BACKEND_DEVICE_TYPE_ACCEL
GGML_BACKEND_DEVICE_TYPE_CPU
GGML_BACKEND_DEVICE_TYPE_GPU
GGML_FTYPE_ALL_F32
GGML_FTYPE_MOSTLY_BF16
GGML_FTYPE_MOSTLY_F16
GGML_FTYPE_MOSTLY_IQ1_M
GGML_FTYPE_MOSTLY_IQ1_S
GGML_FTYPE_MOSTLY_IQ2_S
GGML_FTYPE_MOSTLY_IQ2_XS
GGML_FTYPE_MOSTLY_IQ2_XXS
GGML_FTYPE_MOSTLY_IQ3_S
GGML_FTYPE_MOSTLY_IQ3_XXS
GGML_FTYPE_MOSTLY_IQ4_NL
GGML_FTYPE_MOSTLY_IQ4_XS
GGML_FTYPE_MOSTLY_Q2_K
GGML_FTYPE_MOSTLY_Q3_K
GGML_FTYPE_MOSTLY_Q4_0
GGML_FTYPE_MOSTLY_Q4_1
GGML_FTYPE_MOSTLY_Q4_1_SOME_F16
GGML_FTYPE_MOSTLY_Q4_K
GGML_FTYPE_MOSTLY_Q5_0
GGML_FTYPE_MOSTLY_Q5_1
GGML_FTYPE_MOSTLY_Q5_K
GGML_FTYPE_MOSTLY_Q6_K
GGML_FTYPE_MOSTLY_Q8_0
GGML_FTYPE_UNKNOWN
GGML_LOG_LEVEL_CONT
GGML_LOG_LEVEL_DEBUG
GGML_LOG_LEVEL_ERROR
GGML_LOG_LEVEL_INFO
GGML_LOG_LEVEL_NONE
GGML_LOG_LEVEL_WARN
GGML_NUMA_STRATEGY_COUNT
GGML_NUMA_STRATEGY_DISABLED
GGML_NUMA_STRATEGY_DISTRIBUTE
GGML_NUMA_STRATEGY_ISOLATE
GGML_NUMA_STRATEGY_MIRROR
GGML_NUMA_STRATEGY_NUMACTL
GGML_OBJECT_TYPE_GRAPH
GGML_OBJECT_TYPE_TENSOR
GGML_OBJECT_TYPE_WORK_BUFFER
GGML_OP_ACC
GGML_OP_ADD
GGML_OP_ADD1
GGML_OP_ADD_REL_POS
GGML_OP_ARANGE
GGML_OP_ARGMAX
GGML_OP_ARGSORT
GGML_OP_CLAMP
GGML_OP_CONCAT
GGML_OP_CONT
GGML_OP_CONV_TRANSPOSE_1D
GGML_OP_CONV_TRANSPOSE_2D
GGML_OP_COS
GGML_OP_COUNT
GGML_OP_COUNT_EQUAL
GGML_OP_CPY
GGML_OP_CROSS_ENTROPY_LOSS
GGML_OP_CROSS_ENTROPY_LOSS_BACK
GGML_OP_DIAG
GGML_OP_DIAG_MASK_INF
GGML_OP_DIAG_MASK_ZERO
GGML_OP_DIV
GGML_OP_DUP
GGML_OP_FLASH_ATTN_BACK
GGML_OP_FLASH_ATTN_EXT
GGML_OP_GATED_LINEAR_ATTN
GGML_OP_GET_REL_POS
GGML_OP_GET_ROWS
GGML_OP_GET_ROWS_BACK
GGML_OP_GROUP_NORM
GGML_OP_IM2COL
GGML_OP_IM2COL_BACK
GGML_OP_LEAKY_RELU
GGML_OP_LOG
GGML_OP_MAP_BINARY
GGML_OP_MAP_CUSTOM1
GGML_OP_MAP_CUSTOM2
GGML_OP_MAP_CUSTOM3
GGML_OP_MAP_CUSTOM1_F32
GGML_OP_MAP_CUSTOM2_F32
GGML_OP_MAP_CUSTOM3_F32
GGML_OP_MAP_UNARY
GGML_OP_MEAN
GGML_OP_MUL
GGML_OP_MUL_MAT
GGML_OP_MUL_MAT_ID
GGML_OP_NONE
GGML_OP_NORM
GGML_OP_OPT_STEP_ADAMW
GGML_OP_OUT_PROD
GGML_OP_PAD
GGML_OP_PAD_REFLECT_1D
GGML_OP_PERMUTE
GGML_OP_POOL_1D
GGML_OP_POOL_2D
GGML_OP_POOL_2D_BACK
GGML_OP_POOL_AVG
GGML_OP_POOL_COUNT
GGML_OP_POOL_MAX
GGML_OP_REPEAT
GGML_OP_REPEAT_BACK
GGML_OP_RESHAPE
GGML_OP_RMS_NORM
GGML_OP_RMS_NORM_BACK
GGML_OP_ROPE
GGML_OP_ROPE_BACK
GGML_OP_RWKV_WKV6
GGML_OP_SCALE
GGML_OP_SET
GGML_OP_SILU_BACK
GGML_OP_SIN
GGML_OP_SOFT_MAX
GGML_OP_SOFT_MAX_BACK
GGML_OP_SQR
GGML_OP_SQRT
GGML_OP_SSM_CONV
GGML_OP_SSM_SCAN
GGML_OP_SUB
GGML_OP_SUM
GGML_OP_SUM_ROWS
GGML_OP_TIMESTEP_EMBEDDING
GGML_OP_TRANSPOSE
GGML_OP_UNARY
GGML_OP_UPSCALE
GGML_OP_VIEW
GGML_OP_WIN_PART
GGML_OP_WIN_UNPART
GGML_PREC_DEFAULT
GGML_PREC_F32
GGML_SCHED_PRIO_HIGH
GGML_SCHED_PRIO_MEDIUM
GGML_SCHED_PRIO_NORMAL
GGML_SCHED_PRIO_REALTIME
GGML_SORT_ORDER_ASC
GGML_SORT_ORDER_DESC
GGML_STATUS_ABORTED
GGML_STATUS_ALLOC_FAILED
GGML_STATUS_FAILED
GGML_STATUS_SUCCESS
GGML_TENSOR_FLAG_INPUT
GGML_TENSOR_FLAG_LOSS
GGML_TENSOR_FLAG_OUTPUT
GGML_TENSOR_FLAG_PARAM
GGML_TYPE_BF16
GGML_TYPE_COUNT
GGML_TYPE_F16
GGML_TYPE_F32
GGML_TYPE_F64
GGML_TYPE_I8
GGML_TYPE_I16
GGML_TYPE_I32
GGML_TYPE_I64
GGML_TYPE_IQ1_M
GGML_TYPE_IQ1_S
GGML_TYPE_IQ2_S
GGML_TYPE_IQ2_XS
GGML_TYPE_IQ2_XXS
GGML_TYPE_IQ3_S
GGML_TYPE_IQ3_XXS
GGML_TYPE_IQ4_NL
GGML_TYPE_IQ4_XS
GGML_TYPE_Q2_K
GGML_TYPE_Q3_K
GGML_TYPE_Q4_0
GGML_TYPE_Q4_1
GGML_TYPE_Q4_K
GGML_TYPE_Q5_0
GGML_TYPE_Q5_1
GGML_TYPE_Q5_K
GGML_TYPE_Q6_K
GGML_TYPE_Q8_0
GGML_TYPE_Q8_1
GGML_TYPE_Q8_K
GGML_TYPE_TQ1_0
GGML_TYPE_TQ2_0
GGML_UNARY_OP_ABS
GGML_UNARY_OP_COUNT
GGML_UNARY_OP_ELU
GGML_UNARY_OP_EXP
GGML_UNARY_OP_GELU
GGML_UNARY_OP_GELU_QUICK
GGML_UNARY_OP_HARDSIGMOID
GGML_UNARY_OP_HARDSWISH
GGML_UNARY_OP_NEG
GGML_UNARY_OP_RELU
GGML_UNARY_OP_SGN
GGML_UNARY_OP_SIGMOID
GGML_UNARY_OP_SILU
GGML_UNARY_OP_STEP
GGML_UNARY_OP_TANH
LLAMA_ATTENTION_TYPE_CAUSAL
LLAMA_ATTENTION_TYPE_NON_CAUSAL
LLAMA_ATTENTION_TYPE_UNSPECIFIED
LLAMA_FTYPE_ALL_F32
LLAMA_FTYPE_GUESSED
LLAMA_FTYPE_MOSTLY_BF16
LLAMA_FTYPE_MOSTLY_F16
LLAMA_FTYPE_MOSTLY_IQ1_M
LLAMA_FTYPE_MOSTLY_IQ1_S
LLAMA_FTYPE_MOSTLY_IQ2_M
LLAMA_FTYPE_MOSTLY_IQ2_S
LLAMA_FTYPE_MOSTLY_IQ2_XS
LLAMA_FTYPE_MOSTLY_IQ2_XXS
LLAMA_FTYPE_MOSTLY_IQ3_M
LLAMA_FTYPE_MOSTLY_IQ3_S
LLAMA_FTYPE_MOSTLY_IQ3_XS
LLAMA_FTYPE_MOSTLY_IQ3_XXS
LLAMA_FTYPE_MOSTLY_IQ4_NL
LLAMA_FTYPE_MOSTLY_IQ4_XS
LLAMA_FTYPE_MOSTLY_Q2_K
LLAMA_FTYPE_MOSTLY_Q2_K_S
LLAMA_FTYPE_MOSTLY_Q3_K_L
LLAMA_FTYPE_MOSTLY_Q3_K_M
LLAMA_FTYPE_MOSTLY_Q3_K_S
LLAMA_FTYPE_MOSTLY_Q4_0
LLAMA_FTYPE_MOSTLY_Q4_1
LLAMA_FTYPE_MOSTLY_Q4_K_M
LLAMA_FTYPE_MOSTLY_Q4_K_S
LLAMA_FTYPE_MOSTLY_Q5_0
LLAMA_FTYPE_MOSTLY_Q5_1
LLAMA_FTYPE_MOSTLY_Q5_K_M
LLAMA_FTYPE_MOSTLY_Q5_K_S
LLAMA_FTYPE_MOSTLY_Q6_K
LLAMA_FTYPE_MOSTLY_Q8_0
LLAMA_FTYPE_MOSTLY_TQ1_0
LLAMA_FTYPE_MOSTLY_TQ2_0
LLAMA_KV_OVERRIDE_TYPE_BOOL
LLAMA_KV_OVERRIDE_TYPE_FLOAT
LLAMA_KV_OVERRIDE_TYPE_INT
LLAMA_KV_OVERRIDE_TYPE_STR
LLAMA_POOLING_TYPE_CLS
LLAMA_POOLING_TYPE_LAST
LLAMA_POOLING_TYPE_MEAN
LLAMA_POOLING_TYPE_NONE
LLAMA_POOLING_TYPE_RANK
LLAMA_POOLING_TYPE_UNSPECIFIED
LLAMA_ROPE_SCALING_TYPE_LINEAR
LLAMA_ROPE_SCALING_TYPE_LONGROPE
LLAMA_ROPE_SCALING_TYPE_MAX_VALUE
LLAMA_ROPE_SCALING_TYPE_NONE
LLAMA_ROPE_SCALING_TYPE_UNSPECIFIED
LLAMA_ROPE_SCALING_TYPE_YARN
LLAMA_ROPE_TYPE_MROPE
LLAMA_ROPE_TYPE_NEOX
LLAMA_ROPE_TYPE_NONE
LLAMA_ROPE_TYPE_NORM
LLAMA_ROPE_TYPE_VISION
LLAMA_SPLIT_MODE_LAYER
LLAMA_SPLIT_MODE_NONE
LLAMA_SPLIT_MODE_ROW
LLAMA_TOKEN_ATTR_BYTE
LLAMA_TOKEN_ATTR_CONTROL
LLAMA_TOKEN_ATTR_LSTRIP
LLAMA_TOKEN_ATTR_NORMAL
LLAMA_TOKEN_ATTR_NORMALIZED
LLAMA_TOKEN_ATTR_RSTRIP
LLAMA_TOKEN_ATTR_SINGLE_WORD
LLAMA_TOKEN_ATTR_UNDEFINED
LLAMA_TOKEN_ATTR_UNKNOWN
LLAMA_TOKEN_ATTR_UNUSED
LLAMA_TOKEN_ATTR_USER_DEFINED
LLAMA_TOKEN_TYPE_BYTE
LLAMA_TOKEN_TYPE_CONTROL
LLAMA_TOKEN_TYPE_NORMAL
LLAMA_TOKEN_TYPE_UNDEFINED
LLAMA_TOKEN_TYPE_UNKNOWN
LLAMA_TOKEN_TYPE_UNUSED
LLAMA_TOKEN_TYPE_USER_DEFINED
LLAMA_VOCAB_PRE_TYPE_BLOOM
LLAMA_VOCAB_PRE_TYPE_CHAMELEON
LLAMA_VOCAB_PRE_TYPE_CHATGLM3
LLAMA_VOCAB_PRE_TYPE_CHATGLM4
LLAMA_VOCAB_PRE_TYPE_CODESHELL
LLAMA_VOCAB_PRE_TYPE_COMMAND_R
LLAMA_VOCAB_PRE_TYPE_DBRX
LLAMA_VOCAB_PRE_TYPE_DEEPSEEK3_LLM
LLAMA_VOCAB_PRE_TYPE_DEEPSEEK_CODER
LLAMA_VOCAB_PRE_TYPE_DEEPSEEK_LLM
LLAMA_VOCAB_PRE_TYPE_DEFAULT
LLAMA_VOCAB_PRE_TYPE_EXAONE
LLAMA_VOCAB_PRE_TYPE_FALCON
LLAMA_VOCAB_PRE_TYPE_GPT2
LLAMA_VOCAB_PRE_TYPE_GPT3_FINNISH
LLAMA_VOCAB_PRE_TYPE_JAIS
LLAMA_VOCAB_PRE_TYPE_LLAMA3
LLAMA_VOCAB_PRE_TYPE_MINERVA
LLAMA_VOCAB_PRE_TYPE_MPT
LLAMA_VOCAB_PRE_TYPE_OLMO
LLAMA_VOCAB_PRE_TYPE_PORO
LLAMA_VOCAB_PRE_TYPE_QWEN2
LLAMA_VOCAB_PRE_TYPE_REFACT
LLAMA_VOCAB_PRE_TYPE_SMAUG
LLAMA_VOCAB_PRE_TYPE_SMOLLM
LLAMA_VOCAB_PRE_TYPE_STABLELM2
LLAMA_VOCAB_PRE_TYPE_STARCODER
LLAMA_VOCAB_PRE_TYPE_TEKKEN
LLAMA_VOCAB_PRE_TYPE_VIKING
LLAMA_VOCAB_TYPE_BPE
LLAMA_VOCAB_TYPE_NONE
LLAMA_VOCAB_TYPE_RWKV
LLAMA_VOCAB_TYPE_SPM
LLAMA_VOCAB_TYPE_UGM
LLAMA_VOCAB_TYPE_WPM

Functions§

ggml_abort
ggml_abs
ggml_abs_inplace
ggml_acc
ggml_acc_inplace
ggml_add
ggml_add1
ggml_add1_inplace
ggml_add_cast
ggml_add_inplace
ggml_add_rel_pos
ggml_add_rel_pos_inplace
ggml_arange
ggml_are_same_shape
ggml_are_same_stride
ggml_argmax
ggml_argsort
ggml_backend_alloc_buffer
ggml_backend_alloc_ctx_tensors
ggml_backend_alloc_ctx_tensors_from_buft
ggml_backend_buffer_clear
ggml_backend_buffer_free
ggml_backend_buffer_get_alignment
ggml_backend_buffer_get_alloc_size
ggml_backend_buffer_get_base
ggml_backend_buffer_get_max_size
ggml_backend_buffer_get_size
ggml_backend_buffer_get_type
ggml_backend_buffer_get_usage
ggml_backend_buffer_init_tensor
ggml_backend_buffer_is_host
ggml_backend_buffer_name
ggml_backend_buffer_reset
ggml_backend_buffer_set_usage
ggml_backend_buft_alloc_buffer
ggml_backend_buft_get_alignment
ggml_backend_buft_get_alloc_size
ggml_backend_buft_get_device
ggml_backend_buft_get_max_size
ggml_backend_buft_is_host
ggml_backend_buft_name
ggml_backend_compare_graph_backend
ggml_backend_cpu_buffer_from_ptr
ggml_backend_cpu_buffer_type
ggml_backend_cpu_init
ggml_backend_cpu_reg
ggml_backend_cpu_set_abort_callback
ggml_backend_cpu_set_n_threads
ggml_backend_cpu_set_threadpool
ggml_backend_dev_backend_reg
ggml_backend_dev_buffer_from_host_ptr
ggml_backend_dev_buffer_type
ggml_backend_dev_by_name
ggml_backend_dev_by_type
ggml_backend_dev_count
ggml_backend_dev_description
ggml_backend_dev_get
ggml_backend_dev_get_props
ggml_backend_dev_host_buffer_type
ggml_backend_dev_init
ggml_backend_dev_memory
ggml_backend_dev_name
ggml_backend_dev_offload_op
ggml_backend_dev_supports_buft
ggml_backend_dev_supports_op
ggml_backend_dev_type
ggml_backend_device_register
ggml_backend_event_free
ggml_backend_event_new
ggml_backend_event_record
ggml_backend_event_synchronize
ggml_backend_event_wait
ggml_backend_free
ggml_backend_get_alignment
ggml_backend_get_default_buffer_type
ggml_backend_get_device
ggml_backend_get_max_size
ggml_backend_graph_compute
ggml_backend_graph_compute_async
ggml_backend_graph_copy
ggml_backend_graph_copy_free
ggml_backend_graph_plan_compute
ggml_backend_graph_plan_create
ggml_backend_graph_plan_free
ggml_backend_guid
ggml_backend_init_best
ggml_backend_init_by_name
ggml_backend_init_by_type
ggml_backend_is_cpu
ggml_backend_load
ggml_backend_load_all
ggml_backend_load_all_from_path
ggml_backend_name
ggml_backend_offload_op
ggml_backend_reg_by_name
ggml_backend_reg_count
ggml_backend_reg_dev_count
ggml_backend_reg_dev_get
ggml_backend_reg_get
ggml_backend_reg_get_proc_address
ggml_backend_reg_name
ggml_backend_sched_alloc_graph
ggml_backend_sched_free
ggml_backend_sched_get_backend
ggml_backend_sched_get_buffer_size
ggml_backend_sched_get_n_backends
ggml_backend_sched_get_n_copies
ggml_backend_sched_get_n_splits
ggml_backend_sched_get_tensor_backend
ggml_backend_sched_graph_compute
ggml_backend_sched_graph_compute_async
ggml_backend_sched_new
ggml_backend_sched_reserve
ggml_backend_sched_reset
ggml_backend_sched_set_eval_callback
ggml_backend_sched_set_tensor_backend
ggml_backend_sched_synchronize
ggml_backend_supports_buft
ggml_backend_supports_op
ggml_backend_synchronize
ggml_backend_tensor_alloc
ggml_backend_tensor_copy
ggml_backend_tensor_copy_async
ggml_backend_tensor_get
ggml_backend_tensor_get_async
ggml_backend_tensor_memset
ggml_backend_tensor_set
ggml_backend_tensor_set_async
ggml_backend_unload
ggml_backend_view_init
ggml_bf16_to_fp32
ggml_bf16_to_fp32_row
ggml_blck_size
ggml_build_backward_expand
ggml_build_forward_expand
ggml_can_repeat
ggml_cast
ggml_clamp
ggml_concat
ggml_cont
ggml_cont_1d
ggml_cont_2d
ggml_cont_3d
ggml_cont_4d
ggml_conv_1d
ggml_conv_1d_dw
ggml_conv_1d_dw_ph
ggml_conv_1d_ph
ggml_conv_2d
ggml_conv_2d_dw
ggml_conv_2d_s1_ph
ggml_conv_2d_sk_p0
ggml_conv_transpose_1d
ggml_conv_transpose_2d_p0
ggml_cos
ggml_cos_inplace
ggml_count_equal
ggml_cpu_get_sve_cnt
ggml_cpu_has_amx_int8
ggml_cpu_has_arm_fma
ggml_cpu_has_avx
ggml_cpu_has_avx2
ggml_cpu_has_avx512
ggml_cpu_has_avx512_bf16
ggml_cpu_has_avx512_vbmi
ggml_cpu_has_avx512_vnni
ggml_cpu_has_avx_vnni
ggml_cpu_has_dotprod
ggml_cpu_has_f16c
ggml_cpu_has_fma
ggml_cpu_has_fp16_va
ggml_cpu_has_llamafile
ggml_cpu_has_matmul_int8
ggml_cpu_has_neon
ggml_cpu_has_riscv_v
ggml_cpu_has_sse3
ggml_cpu_has_ssse3
ggml_cpu_has_sve
ggml_cpu_has_vsx
ggml_cpu_has_wasm_simd
ggml_cpu_init
ggml_cpy
ggml_cross_entropy_loss
ggml_cross_entropy_loss_back
ggml_cycles
ggml_cycles_per_ms
ggml_diag
ggml_diag_mask_inf
ggml_diag_mask_inf_inplace
ggml_diag_mask_zero
ggml_diag_mask_zero_inplace
ggml_div
ggml_div_inplace
ggml_dup
ggml_dup_inplace
ggml_dup_tensor
ggml_element_size
ggml_elu
ggml_elu_inplace
ggml_exp
ggml_exp_inplace
ggml_flash_attn_back
ggml_flash_attn_ext
ggml_flash_attn_ext_get_prec
ggml_flash_attn_ext_set_prec
ggml_fopen
ggml_format_name
ggml_fp16_to_fp32
ggml_fp16_to_fp32_row
ggml_fp32_to_bf16
ggml_fp32_to_bf16_row
ggml_fp32_to_bf16_row_ref
ggml_fp32_to_fp16
ggml_fp32_to_fp16_row
ggml_free
ggml_ftype_to_ggml_type
ggml_gallocr_alloc_graph
ggml_gallocr_free
ggml_gallocr_get_buffer_size
ggml_gallocr_new
ggml_gallocr_new_n
ggml_gallocr_reserve
ggml_gallocr_reserve_n
ggml_gated_linear_attn
ggml_gelu
ggml_gelu_inplace
ggml_gelu_quick
ggml_gelu_quick_inplace
ggml_get_data
ggml_get_data_f32
ggml_get_f32_1d
ggml_get_f32_nd
ggml_get_first_tensor
ggml_get_i32_1d
ggml_get_i32_nd
ggml_get_max_tensor_size
ggml_get_mem_buffer
ggml_get_mem_size
ggml_get_name
ggml_get_next_tensor
ggml_get_no_alloc
ggml_get_rel_pos
ggml_get_rows
ggml_get_rows_back
ggml_get_tensor
ggml_get_type_traits
ggml_get_type_traits_cpu
ggml_get_unary_op
ggml_graph_add_node
ggml_graph_clear
ggml_graph_compute
ggml_graph_compute_with_ctx
ggml_graph_cpy
ggml_graph_dump_dot
ggml_graph_dup
ggml_graph_export
ggml_graph_get_grad
ggml_graph_get_grad_acc
ggml_graph_get_tensor
ggml_graph_import
ggml_graph_n_nodes
ggml_graph_node
ggml_graph_nodes
ggml_graph_overhead
ggml_graph_overhead_custom
ggml_graph_plan
ggml_graph_print
ggml_graph_reset
ggml_graph_size
ggml_group_norm
ggml_group_norm_inplace
ggml_guid_matches
ggml_hardsigmoid
ggml_hardswish
ggml_im2col
ggml_im2col_back
ggml_init
ggml_is_3d
ggml_is_contiguous
ggml_is_contiguous_0
ggml_is_contiguous_1
ggml_is_contiguous_2
ggml_is_empty
ggml_is_matrix
ggml_is_numa
ggml_is_permuted
ggml_is_quantized
ggml_is_scalar
ggml_is_transposed
ggml_is_vector
ggml_leaky_relu
ggml_log
ggml_log_inplace
ggml_log_set
ggml_map_binary_f32
ggml_map_binary_inplace_f32
ggml_map_custom1
ggml_map_custom2
ggml_map_custom3
ggml_map_custom1_f32
ggml_map_custom1_inplace
ggml_map_custom1_inplace_f32
ggml_map_custom2_f32
ggml_map_custom2_inplace
ggml_map_custom2_inplace_f32
ggml_map_custom3_f32
ggml_map_custom3_inplace
ggml_map_custom3_inplace_f32
ggml_map_unary_f32
ggml_map_unary_inplace_f32
ggml_mean
ggml_mul
ggml_mul_inplace
ggml_mul_mat
ggml_mul_mat_id
ggml_mul_mat_set_prec
ggml_n_dims
ggml_nbytes
ggml_nbytes_pad
ggml_neg
ggml_neg_inplace
ggml_nelements
ggml_new_buffer
ggml_new_f32
ggml_new_graph
ggml_new_graph_custom
ggml_new_i32
ggml_new_tensor
ggml_new_tensor_1d
ggml_new_tensor_2d
ggml_new_tensor_3d
ggml_new_tensor_4d
ggml_norm
ggml_norm_inplace
ggml_nrows
ggml_numa_init
ggml_op_desc
ggml_op_name
ggml_op_symbol
ggml_opt_step_adamw
ggml_out_prod
ggml_pad
ggml_pad_reflect_1d
ggml_permute
ggml_pool_1d
ggml_pool_2d
ggml_pool_2d_back
ggml_print_object
ggml_print_objects
ggml_quantize_chunk
ggml_quantize_free
ggml_quantize_init
ggml_quantize_requires_imatrix
ggml_relu
ggml_relu_inplace
ggml_repeat
ggml_repeat_back
ggml_reset
ggml_reshape
ggml_reshape_1d
ggml_reshape_2d
ggml_reshape_3d
ggml_reshape_4d
ggml_rms_norm
ggml_rms_norm_back
ggml_rms_norm_inplace
ggml_rope
ggml_rope_custom
ggml_rope_custom_inplace
ggml_rope_ext
ggml_rope_ext_back
ggml_rope_ext_inplace
ggml_rope_inplace
ggml_rope_multi
ggml_rope_multi_back
ggml_rope_yarn_corr_dims
ggml_row_size
ggml_rwkv_wkv6
ggml_scale
ggml_scale_inplace
ggml_set
ggml_set_1d
ggml_set_1d_inplace
ggml_set_2d
ggml_set_2d_inplace
ggml_set_f32
ggml_set_f32_1d
ggml_set_f32_nd
ggml_set_i32
ggml_set_i32_1d
ggml_set_i32_nd
ggml_set_inplace
ggml_set_input
ggml_set_loss
ggml_set_name
ggml_set_no_alloc
ggml_set_output
ggml_set_param
ggml_set_zero
ggml_sgn
ggml_sgn_inplace
ggml_sigmoid
ggml_sigmoid_inplace
ggml_silu
ggml_silu_back
ggml_silu_inplace
ggml_sin
ggml_sin_inplace
ggml_soft_max
ggml_soft_max_ext
ggml_soft_max_ext_back
ggml_soft_max_ext_back_inplace
ggml_soft_max_inplace
ggml_sqr
ggml_sqr_inplace
ggml_sqrt
ggml_sqrt_inplace
ggml_ssm_conv
ggml_ssm_scan
ggml_status_to_string
ggml_step
ggml_step_inplace
ggml_sub
ggml_sub_inplace
ggml_sum
ggml_sum_rows
ggml_tallocr_alloc
ggml_tallocr_new
ggml_tanh
ggml_tanh_inplace
ggml_tensor_overhead
ggml_threadpool_free
ggml_threadpool_get_n_threads
ggml_threadpool_new
ggml_threadpool_params_default
ggml_threadpool_params_init
ggml_threadpool_params_match
ggml_threadpool_pause
ggml_threadpool_resume
ggml_time_init
ggml_time_ms
ggml_time_us
ggml_timestep_embedding
ggml_top_k
ggml_transpose
ggml_type_name
ggml_type_size
ggml_type_sizef
ggml_unary
ggml_unary_inplace
ggml_unary_op_name
ggml_unravel_index
ggml_upscale
ggml_upscale_ext
ggml_used_mem
ggml_validate_row_data
ggml_view_1d
ggml_view_2d
ggml_view_3d
ggml_view_4d
ggml_view_tensor
ggml_win_part
ggml_win_unpart
llama_adapter_lora_free
llama_adapter_lora_init
llama_add_bos_token
llama_add_eos_token
llama_apply_adapter_cvec
llama_attach_threadpool
llama_backend_free
llama_backend_init
llama_batch_free
llama_batch_get_one
llama_batch_init
llama_chat_apply_template
Apply chat template. Inspired by hf apply_chat_template() on python. Both “model” and “custom_template” are optional, but at least one is required. “custom_template” has higher precedence than “model” NOTE: This function does not use a jinja parser. It only support a pre-defined list of template. See more: https://github.com/ggerganov/llama.cpp/wiki/Templates-supported-by-llama_chat_apply_template @param tmpl A Jinja template to use for this chat. If this is nullptr, the model’s default chat template will be used instead. @param chat Pointer to a list of multiple llama_chat_message @param n_msg Number of llama_chat_message in this chat @param add_ass Whether to end the prompt with the token(s) that indicate the start of an assistant message. @param buf A buffer to hold the output formatted prompt. The recommended alloc size is 2 * (total number of characters of all messages) @param length The size of the allocated buffer @return The total number of bytes of the formatted prompt. If is it larger than the size of buffer, you may need to re-alloc it and then re-apply the template.
llama_chat_builtin_templates
llama_clear_adapter_lora
llama_context_default_params
llama_copy_state_data
llama_decode
llama_detach_threadpool
llama_detokenize
@details Convert the provided tokens into text (inverse of llama_tokenize()). @param text The char pointer must be large enough to hold the resulting text. @return Returns the number of chars/bytes on success, no more than text_len_max. @return Returns a negative number on failure - the number of chars/bytes that would have been returned. @param remove_special Allow to remove BOS and EOS tokens if model is configured to do so. @param unparse_special If true, special tokens are rendered in the output.
llama_encode
llama_free
llama_free_model
llama_get_embeddings
llama_get_embeddings_ith
llama_get_embeddings_seq
llama_get_kv_cache_token_count
llama_get_kv_cache_used_cells
llama_get_logits
llama_get_logits_ith
llama_get_model
llama_get_state_size
llama_init_from_model
llama_kv_cache_can_shift
llama_kv_cache_clear
llama_kv_cache_defrag
llama_kv_cache_seq_add
llama_kv_cache_seq_cp
llama_kv_cache_seq_div
llama_kv_cache_seq_keep
llama_kv_cache_seq_pos_max
llama_kv_cache_seq_rm
llama_kv_cache_update
llama_kv_cache_view_free
llama_kv_cache_view_init
llama_kv_cache_view_update
llama_load_model_from_file
llama_load_session_file
llama_log_set
llama_max_devices
llama_model_chat_template
llama_model_decoder_start_token
llama_model_default_params
llama_model_desc
llama_model_free
llama_model_get_vocab
llama_model_has_decoder
llama_model_has_encoder
llama_model_is_recurrent
llama_model_load_from_file
llama_model_load_from_splits
llama_model_meta_count
llama_model_meta_key_by_index
llama_model_meta_val_str
llama_model_meta_val_str_by_index
llama_model_n_ctx_train
llama_model_n_embd
llama_model_n_head
llama_model_n_layer
llama_model_n_params
llama_model_quantize
llama_model_quantize_default_params
llama_model_rope_freq_scale_train
llama_model_rope_type
llama_model_size
llama_n_batch
llama_n_ctx
llama_n_ctx_train
llama_n_embd
llama_n_head
llama_n_layer
llama_n_seq_max
llama_n_threads
llama_n_threads_batch
llama_n_ubatch
llama_n_vocab
llama_new_context_with_model
llama_numa_init
llama_perf_context
llama_perf_context_print
llama_perf_context_reset
llama_perf_sampler
llama_perf_sampler_print
llama_perf_sampler_reset
llama_pooling_type
llama_print_system_info
llama_rm_adapter_lora
llama_sampler_accept
llama_sampler_apply
llama_sampler_chain_add
llama_sampler_chain_default_params
llama_sampler_chain_get
llama_sampler_chain_init
llama_sampler_chain_n
llama_sampler_chain_remove
llama_sampler_clone
llama_sampler_free
llama_sampler_get_seed
llama_sampler_init_dist
llama_sampler_init_dry
@details DRY sampler, designed by p-e-w, as described in: https://github.com/oobabooga/text-generation-webui/pull/5677, porting Koboldcpp implementation authored by pi6am: https://github.com/LostRuins/koboldcpp/pull/982
llama_sampler_init_grammar
llama_sampler_init_greedy
llama_sampler_init_infill
llama_sampler_init_logit_bias
llama_sampler_init_min_p
@details Minimum P sampling as described in https://github.com/ggerganov/llama.cpp/pull/3841
llama_sampler_init_mirostat
@details Mirostat 1.0 algorithm described in the paper https://arxiv.org/abs/2007.14966. Uses tokens instead of words. @param candidates A vector of llama_token_data containing the candidate tokens, their probabilities (p), and log-odds (logit) for the current position in the generated text. @param tau The target cross-entropy (or surprise) value you want to achieve for the generated text. A higher value corresponds to more surprising or less predictable text, while a lower value corresponds to less surprising or more predictable text. @param eta The learning rate used to update mu based on the error between the target and observed surprisal of the sampled word. A larger learning rate will cause mu to be updated more quickly, while a smaller learning rate will result in slower updates. @param m The number of tokens considered in the estimation of s_hat. This is an arbitrary value that is used to calculate s_hat, which in turn helps to calculate the value of k. In the paper, they use m = 100, but you can experiment with different values to see how it affects the performance of the algorithm. @param mu Maximum cross-entropy. This value is initialized to be twice the target cross-entropy (2 * tau) and is updated in the algorithm based on the error between the target and observed surprisal.
llama_sampler_init_mirostat_v2
@details Mirostat 2.0 algorithm described in the paper https://arxiv.org/abs/2007.14966. Uses tokens instead of words. @param candidates A vector of llama_token_data containing the candidate tokens, their probabilities (p), and log-odds (logit) for the current position in the generated text. @param tau The target cross-entropy (or surprise) value you want to achieve for the generated text. A higher value corresponds to more surprising or less predictable text, while a lower value corresponds to less surprising or more predictable text. @param eta The learning rate used to update mu based on the error between the target and observed surprisal of the sampled word. A larger learning rate will cause mu to be updated more quickly, while a smaller learning rate will result in slower updates. @param mu Maximum cross-entropy. This value is initialized to be twice the target cross-entropy (2 * tau) and is updated in the algorithm based on the error between the target and observed surprisal.
llama_sampler_init_penalties
NOTE: Avoid using on the full vocabulary as searching for repeated tokens can become slow. For example, apply top-k or top-p sampling first.
llama_sampler_init_softmax
llama_sampler_init_temp
#details Updates the logits l_i` = l_i/t. When t <= 0.0f, the maximum logit is kept at it’s original value, the rest are set to -inf
llama_sampler_init_temp_ext
@details Dynamic temperature implementation (a.k.a. entropy) described in the paper https://arxiv.org/abs/2309.02772.
llama_sampler_init_top_k
@details Top-K sampling described in academic paper “The Curious Case of Neural Text Degeneration” https://arxiv.org/abs/1904.09751
llama_sampler_init_top_p
@details Nucleus sampling described in academic paper “The Curious Case of Neural Text Degeneration” https://arxiv.org/abs/1904.09751
llama_sampler_init_typical
@details Locally Typical Sampling implementation described in the paper https://arxiv.org/abs/2202.00666.
llama_sampler_init_xtc
@details XTC sampler as described in https://github.com/oobabooga/text-generation-webui/pull/6335
llama_sampler_name
llama_sampler_reset
llama_sampler_sample
llama_save_session_file
llama_set_abort_callback
llama_set_adapter_lora
llama_set_causal_attn
llama_set_embeddings
llama_set_n_threads
llama_set_state_data
llama_split_path
@details Build a split GGUF final path for this chunk. llama_split_path(split_path, sizeof(split_path), “/models/ggml-model-q4_0”, 2, 4) => split_path = “/models/ggml-model-q4_0-00002-of-00004.gguf”
llama_split_prefix
@details Extract the path prefix from the split_path if and only if the split_no and split_count match. llama_split_prefix(split_prefix, 64, “/models/ggml-model-q4_0-00002-of-00004.gguf”, 2, 4) => split_prefix = “/models/ggml-model-q4_0”
llama_state_get_data
llama_state_get_size
llama_state_load_file
llama_state_save_file
llama_state_seq_get_data
llama_state_seq_get_size
llama_state_seq_load_file
llama_state_seq_save_file
llama_state_seq_set_data
llama_state_set_data
llama_supports_gpu_offload
llama_supports_mlock
llama_supports_mmap
llama_supports_rpc
llama_synchronize
llama_time_us
llama_token_bos
llama_token_cls
llama_token_eos
llama_token_eot
llama_token_fim_mid
llama_token_fim_pad
llama_token_fim_pre
llama_token_fim_rep
llama_token_fim_sep
llama_token_fim_suf
llama_token_get_attr
llama_token_get_score
llama_token_get_text
llama_token_is_control
llama_token_is_eog
llama_token_nl
llama_token_pad
llama_token_sep
llama_token_to_piece
llama_tokenize
@details Convert the provided text into tokens. @param tokens The tokens pointer must be large enough to hold the resulting tokens. @return Returns the number of tokens on success, no more than n_tokens_max @return Returns a negative number on failure - the number of tokens that would have been returned @param add_special Allow to add BOS and EOS tokens if model is configured to do so. @param parse_special Allow tokenizing special and/or control tokens which otherwise are not exposed and treated as plaintext. Does not insert a leading space.
llama_vocab_bos
llama_vocab_cls
llama_vocab_eos
llama_vocab_eot
llama_vocab_fim_mid
llama_vocab_fim_pad
llama_vocab_fim_pre
llama_vocab_fim_rep
llama_vocab_fim_sep
llama_vocab_fim_suf
llama_vocab_get_add_bos
llama_vocab_get_add_eos
llama_vocab_get_attr
llama_vocab_get_score
llama_vocab_get_text
llama_vocab_is_control
llama_vocab_is_eog
llama_vocab_n_tokens
llama_vocab_nl
llama_vocab_pad
llama_vocab_sep
llama_vocab_type

Type Aliases§

FILE
_IO_lock_t
__off64_t
__off_t
ggml_abort_callback
ggml_backend_buffer_t
ggml_backend_buffer_type_t
ggml_backend_buffer_usage
ggml_backend_dev_get_extra_bufts_t
ggml_backend_dev_t
ggml_backend_dev_type
ggml_backend_eval_callback
ggml_backend_event_t
ggml_backend_get_features_t
ggml_backend_graph_plan_t
ggml_backend_reg_t
ggml_backend_sched_eval_callback
ggml_backend_sched_t
ggml_backend_set_abort_callback_t
ggml_backend_set_n_threads_t
ggml_backend_split_buffer_type_t
ggml_backend_t
ggml_binary_op_f32_t
ggml_custom1_op_f32_t
ggml_custom1_op_t
ggml_custom2_op_f32_t
ggml_custom2_op_t
ggml_custom3_op_f32_t
ggml_custom3_op_t
ggml_fp16_t
ggml_from_float_t
ggml_ftype
ggml_gallocr_t
ggml_guid
ggml_guid_t
ggml_log_callback
ggml_log_level
ggml_numa_strategy
ggml_object_type
ggml_op
ggml_op_pool
ggml_prec
ggml_sched_priority
ggml_sort_order
ggml_status
ggml_tensor_flag
ggml_threadpool_t
ggml_to_float_t
ggml_type
ggml_unary_op
ggml_unary_op_f32_t
ggml_vec_dot_t
llama_attention_type
llama_ftype
llama_model_kv_override_type
llama_pooling_type
llama_pos
llama_progress_callback
llama_rope_scaling_type
llama_rope_type
llama_sampler_context_t
llama_seq_id
llama_split_mode
llama_token
llama_token_attr
llama_token_type
llama_vocab_pre_type
llama_vocab_type

Unions§

llama_model_kv_override__bindgen_ty_1