Crate llama_cpp_sys_4

Source

Expand description

See llama-cpp-4 for a documented and safe API.

Modules§

common
Manual wrapper for values in llama.cpp/common/common.h

Structs§

Constants§

Statics§

Functions§

common_token_to_piece^⚠
common_tokenize^⚠
common_tokenize1^⚠
ggml_abort^⚠
ggml_abs^⚠
ggml_abs_inplace^⚠
ggml_acc^⚠
ggml_acc_inplace^⚠
ggml_add^⚠
ggml_add1^⚠
ggml_add1_inplace^⚠
ggml_add_cast^⚠
ggml_add_inplace^⚠
ggml_add_rel_pos^⚠
ggml_add_rel_pos_inplace^⚠
ggml_arange^⚠
ggml_are_same_shape^⚠
ggml_are_same_stride^⚠
ggml_argmax^⚠
ggml_argsort^⚠
ggml_backend_alloc_buffer^⚠
ggml_backend_alloc_ctx_tensors^⚠
ggml_backend_alloc_ctx_tensors_from_buft^⚠
ggml_backend_buffer_clear^⚠
ggml_backend_buffer_free^⚠
ggml_backend_buffer_get_alignment^⚠
ggml_backend_buffer_get_alloc_size^⚠
ggml_backend_buffer_get_base^⚠
ggml_backend_buffer_get_max_size^⚠
ggml_backend_buffer_get_size^⚠
ggml_backend_buffer_get_type^⚠
ggml_backend_buffer_get_usage^⚠
ggml_backend_buffer_init_tensor^⚠
ggml_backend_buffer_is_host^⚠
ggml_backend_buffer_name^⚠
ggml_backend_buffer_reset^⚠
ggml_backend_buffer_set_usage^⚠
ggml_backend_buft_alloc_buffer^⚠
ggml_backend_buft_get_alignment^⚠
ggml_backend_buft_get_alloc_size^⚠
ggml_backend_buft_get_device^⚠
ggml_backend_buft_get_max_size^⚠
ggml_backend_buft_is_host^⚠
ggml_backend_buft_name^⚠
ggml_backend_compare_graph_backend^⚠
ggml_backend_cpu_buffer_from_ptr^⚠
ggml_backend_cpu_buffer_type^⚠
ggml_backend_cpu_init^⚠
ggml_backend_cpu_reg^⚠
ggml_backend_cpu_set_abort_callback^⚠
ggml_backend_cpu_set_n_threads^⚠
ggml_backend_cpu_set_threadpool^⚠
ggml_backend_dev_backend_reg^⚠
ggml_backend_dev_buffer_from_host_ptr^⚠
ggml_backend_dev_buffer_type^⚠
ggml_backend_dev_by_name^⚠
ggml_backend_dev_by_type^⚠
ggml_backend_dev_count^⚠
ggml_backend_dev_description^⚠
ggml_backend_dev_get^⚠
ggml_backend_dev_get_props^⚠
ggml_backend_dev_host_buffer_type^⚠
ggml_backend_dev_init^⚠
ggml_backend_dev_memory^⚠
ggml_backend_dev_name^⚠
ggml_backend_dev_offload_op^⚠
ggml_backend_dev_supports_buft^⚠
ggml_backend_dev_supports_op^⚠
ggml_backend_dev_type^⚠
ggml_backend_event_free^⚠
ggml_backend_event_new^⚠
ggml_backend_event_record^⚠
ggml_backend_event_synchronize^⚠
ggml_backend_event_wait^⚠
ggml_backend_free^⚠
ggml_backend_get_alignment^⚠
ggml_backend_get_default_buffer_type^⚠
ggml_backend_get_device^⚠
ggml_backend_get_max_size^⚠
ggml_backend_graph_compute^⚠
ggml_backend_graph_compute_async^⚠
ggml_backend_graph_copy^⚠
ggml_backend_graph_copy_free^⚠
ggml_backend_graph_plan_compute^⚠
ggml_backend_graph_plan_create^⚠
ggml_backend_graph_plan_free^⚠
ggml_backend_guid^⚠
ggml_backend_init_best^⚠
ggml_backend_init_by_name^⚠
ggml_backend_init_by_type^⚠
ggml_backend_is_cpu^⚠
ggml_backend_load^⚠
ggml_backend_load_all^⚠
ggml_backend_load_all_from_path^⚠
ggml_backend_name^⚠
ggml_backend_offload_op^⚠
ggml_backend_reg_by_name^⚠
ggml_backend_reg_count^⚠
ggml_backend_reg_dev_count^⚠
ggml_backend_reg_dev_get^⚠
ggml_backend_reg_get^⚠
ggml_backend_reg_get_proc_address^⚠
ggml_backend_reg_name^⚠
ggml_backend_sched_alloc_graph^⚠
ggml_backend_sched_free^⚠
ggml_backend_sched_get_backend^⚠
ggml_backend_sched_get_buffer_size^⚠
ggml_backend_sched_get_n_backends^⚠
ggml_backend_sched_get_n_copies^⚠
ggml_backend_sched_get_n_splits^⚠
ggml_backend_sched_get_tensor_backend^⚠
ggml_backend_sched_graph_compute^⚠
ggml_backend_sched_graph_compute_async^⚠
ggml_backend_sched_new^⚠
ggml_backend_sched_reserve^⚠
ggml_backend_sched_reset^⚠
ggml_backend_sched_set_eval_callback^⚠
ggml_backend_sched_set_tensor_backend^⚠
ggml_backend_sched_synchronize^⚠
ggml_backend_supports_buft^⚠
ggml_backend_supports_op^⚠
ggml_backend_synchronize^⚠
ggml_backend_tensor_alloc^⚠
ggml_backend_tensor_copy^⚠
ggml_backend_tensor_copy_async^⚠
ggml_backend_tensor_get^⚠
ggml_backend_tensor_get_async^⚠
ggml_backend_tensor_memset^⚠
ggml_backend_tensor_set^⚠
ggml_backend_tensor_set_async^⚠
ggml_backend_unload^⚠
ggml_backend_view_init^⚠
ggml_bf16_to_fp32^⚠
ggml_bf16_to_fp32_row^⚠
ggml_blck_size^⚠
ggml_build_backward_expand^⚠
ggml_build_forward_expand^⚠
ggml_can_repeat^⚠
ggml_cast^⚠
ggml_clamp^⚠
ggml_concat^⚠
ggml_cont^⚠
ggml_cont_1d^⚠
ggml_cont_2d^⚠
ggml_cont_3d^⚠
ggml_cont_4d^⚠
ggml_conv_1d^⚠
ggml_conv_1d_dw^⚠
ggml_conv_1d_dw_ph^⚠
ggml_conv_1d_ph^⚠
ggml_conv_2d^⚠
ggml_conv_2d_dw^⚠
ggml_conv_2d_s1_ph^⚠
ggml_conv_2d_sk_p0^⚠
ggml_conv_transpose_1d^⚠
ggml_conv_transpose_2d_p0^⚠
ggml_cos^⚠
ggml_cos_inplace^⚠
ggml_count_equal^⚠
ggml_cpu_get_sve_cnt^⚠
ggml_cpu_has_amx_int8^⚠
ggml_cpu_has_arm_fma^⚠
ggml_cpu_has_avx^⚠
ggml_cpu_has_avx2^⚠
ggml_cpu_has_avx512^⚠
ggml_cpu_has_avx512_bf16^⚠
ggml_cpu_has_avx512_vbmi^⚠
ggml_cpu_has_avx512_vnni^⚠
ggml_cpu_has_avx_vnni^⚠
ggml_cpu_has_dotprod^⚠
ggml_cpu_has_f16c^⚠
ggml_cpu_has_fma^⚠
ggml_cpu_has_fp16_va^⚠
ggml_cpu_has_llamafile^⚠
ggml_cpu_has_matmul_int8^⚠
ggml_cpu_has_neon^⚠
ggml_cpu_has_riscv_v^⚠
ggml_cpu_has_sse3^⚠
ggml_cpu_has_ssse3^⚠
ggml_cpu_has_sve^⚠
ggml_cpu_has_vsx^⚠
ggml_cpu_has_wasm_simd^⚠
ggml_cpu_init^⚠
ggml_cpy^⚠
ggml_cross_entropy_loss^⚠
ggml_cross_entropy_loss_back^⚠
ggml_cycles^⚠
ggml_cycles_per_ms^⚠
ggml_diag^⚠
ggml_diag_mask_inf^⚠
ggml_diag_mask_inf_inplace^⚠
ggml_diag_mask_zero^⚠
ggml_diag_mask_zero_inplace^⚠
ggml_div^⚠
ggml_div_inplace^⚠
ggml_dup^⚠
ggml_dup_inplace^⚠
ggml_dup_tensor^⚠
ggml_element_size^⚠
ggml_elu^⚠
ggml_elu_inplace^⚠
ggml_exp^⚠
ggml_exp_inplace^⚠
ggml_flash_attn_back^⚠
ggml_flash_attn_ext^⚠
ggml_flash_attn_ext_get_prec^⚠
ggml_flash_attn_ext_set_prec^⚠
ggml_fopen^⚠
ggml_format_name^⚠
ggml_fp16_to_fp32^⚠
ggml_fp16_to_fp32_row^⚠
ggml_fp32_to_bf16^⚠
ggml_fp32_to_bf16_row^⚠
ggml_fp32_to_bf16_row_ref^⚠
ggml_fp32_to_fp16^⚠
ggml_fp32_to_fp16_row^⚠
ggml_free^⚠
ggml_ftype_to_ggml_type^⚠
ggml_gallocr_alloc_graph^⚠
ggml_gallocr_free^⚠
ggml_gallocr_get_buffer_size^⚠
ggml_gallocr_new^⚠
ggml_gallocr_new_n^⚠
ggml_gallocr_reserve^⚠
ggml_gallocr_reserve_n^⚠
ggml_gelu^⚠
ggml_gelu_inplace^⚠
ggml_gelu_quick^⚠
ggml_gelu_quick_inplace^⚠
ggml_get_data^⚠
ggml_get_data_f32^⚠
ggml_get_f32_1d^⚠
ggml_get_f32_nd^⚠
ggml_get_first_tensor^⚠
ggml_get_i32_1d^⚠
ggml_get_i32_nd^⚠
ggml_get_max_tensor_size^⚠
ggml_get_mem_buffer^⚠
ggml_get_mem_size^⚠
ggml_get_name^⚠
ggml_get_next_tensor^⚠
ggml_get_no_alloc^⚠
ggml_get_rel_pos^⚠
ggml_get_rows^⚠
ggml_get_rows_back^⚠
ggml_get_tensor^⚠
ggml_get_type_traits^⚠
ggml_get_type_traits_cpu^⚠
ggml_get_unary_op^⚠
ggml_graph_add_node^⚠
ggml_graph_clear^⚠
ggml_graph_compute^⚠
ggml_graph_compute_with_ctx^⚠
ggml_graph_cpy^⚠
ggml_graph_dump_dot^⚠
ggml_graph_dup^⚠
ggml_graph_export^⚠
ggml_graph_get_grad^⚠
ggml_graph_get_grad_acc^⚠
ggml_graph_get_tensor^⚠
ggml_graph_import^⚠
ggml_graph_n_nodes^⚠
ggml_graph_node^⚠
ggml_graph_nodes^⚠
ggml_graph_overhead^⚠
ggml_graph_overhead_custom^⚠
ggml_graph_plan^⚠
ggml_graph_print^⚠
ggml_graph_reset^⚠
ggml_graph_size^⚠
ggml_group_norm^⚠
ggml_group_norm_inplace^⚠
ggml_guid_matches^⚠
ggml_hardsigmoid^⚠
ggml_hardswish^⚠
ggml_im2col^⚠
ggml_im2col_back^⚠
ggml_init^⚠
ggml_is_3d^⚠
ggml_is_contiguous^⚠
ggml_is_contiguous_0^⚠
ggml_is_contiguous_1^⚠
ggml_is_contiguous_2^⚠
ggml_is_empty^⚠
ggml_is_matrix^⚠
ggml_is_numa^⚠
ggml_is_permuted^⚠
ggml_is_quantized^⚠
ggml_is_scalar^⚠
ggml_is_transposed^⚠
ggml_is_vector^⚠
ggml_leaky_relu^⚠
ggml_log^⚠
ggml_log_inplace^⚠
ggml_log_set^⚠
ggml_map_binary_f32^⚠
ggml_map_binary_inplace_f32^⚠
ggml_map_custom1^⚠
ggml_map_custom2^⚠
ggml_map_custom3^⚠
ggml_map_custom1_f32^⚠
ggml_map_custom1_inplace^⚠
ggml_map_custom1_inplace_f32^⚠
ggml_map_custom2_f32^⚠
ggml_map_custom2_inplace^⚠
ggml_map_custom2_inplace_f32^⚠
ggml_map_custom3_f32^⚠
ggml_map_custom3_inplace^⚠
ggml_map_custom3_inplace_f32^⚠
ggml_map_unary_f32^⚠
ggml_map_unary_inplace_f32^⚠
ggml_mean^⚠
ggml_mul^⚠
ggml_mul_inplace^⚠
ggml_mul_mat^⚠
ggml_mul_mat_id^⚠
ggml_mul_mat_set_prec^⚠
ggml_n_dims^⚠
ggml_nbytes^⚠
ggml_nbytes_pad^⚠
ggml_neg^⚠
ggml_neg_inplace^⚠
ggml_nelements^⚠
ggml_new_buffer^⚠
ggml_new_f32^⚠
ggml_new_graph^⚠
ggml_new_graph_custom^⚠
ggml_new_i32^⚠
ggml_new_tensor^⚠
ggml_new_tensor_1d^⚠
ggml_new_tensor_2d^⚠
ggml_new_tensor_3d^⚠
ggml_new_tensor_4d^⚠
ggml_norm^⚠
ggml_norm_inplace^⚠
ggml_nrows^⚠
ggml_numa_init^⚠
ggml_op_desc^⚠
ggml_op_name^⚠
ggml_op_symbol^⚠
ggml_opt_step_adamw^⚠
ggml_out_prod^⚠
ggml_pad^⚠
ggml_pad_reflect_1d^⚠
ggml_permute^⚠
ggml_pool_1d^⚠
ggml_pool_2d^⚠
ggml_pool_2d_back^⚠
ggml_print_object^⚠
ggml_print_objects^⚠
ggml_quantize_chunk^⚠
ggml_quantize_free^⚠
ggml_quantize_init^⚠
ggml_quantize_requires_imatrix^⚠
ggml_relu^⚠
ggml_relu_inplace^⚠
ggml_repeat^⚠
ggml_repeat_back^⚠
ggml_reset^⚠
ggml_reshape^⚠
ggml_reshape_1d^⚠
ggml_reshape_2d^⚠
ggml_reshape_3d^⚠
ggml_reshape_4d^⚠
ggml_rms_norm^⚠
ggml_rms_norm_back^⚠
ggml_rms_norm_inplace^⚠
ggml_rope^⚠
ggml_rope_back^⚠
ggml_rope_custom^⚠
ggml_rope_custom_inplace^⚠
ggml_rope_ext^⚠
ggml_rope_ext_inplace^⚠
ggml_rope_inplace^⚠
ggml_rope_multi^⚠
ggml_rope_yarn_corr_dims^⚠
ggml_row_size^⚠
ggml_rwkv_wkv6^⚠
ggml_scale^⚠
ggml_scale_inplace^⚠
ggml_set^⚠
ggml_set_1d^⚠
ggml_set_1d_inplace^⚠
ggml_set_2d^⚠
ggml_set_2d_inplace^⚠
ggml_set_f32^⚠
ggml_set_f32_1d^⚠
ggml_set_f32_nd^⚠
ggml_set_i32^⚠
ggml_set_i32_1d^⚠
ggml_set_i32_nd^⚠
ggml_set_inplace^⚠
ggml_set_input^⚠
ggml_set_loss^⚠
ggml_set_name^⚠
ggml_set_no_alloc^⚠
ggml_set_output^⚠
ggml_set_param^⚠
ggml_set_zero^⚠
ggml_sgn^⚠
ggml_sgn_inplace^⚠
ggml_sigmoid^⚠
ggml_sigmoid_inplace^⚠
ggml_silu^⚠
ggml_silu_back^⚠
ggml_silu_inplace^⚠
ggml_sin^⚠
ggml_sin_inplace^⚠
ggml_soft_max^⚠
ggml_soft_max_back^⚠
ggml_soft_max_back_inplace^⚠
ggml_soft_max_ext^⚠
ggml_soft_max_inplace^⚠
ggml_sqr^⚠
ggml_sqr_inplace^⚠
ggml_sqrt^⚠
ggml_sqrt_inplace^⚠
ggml_ssm_conv^⚠
ggml_ssm_scan^⚠
ggml_status_to_string^⚠
ggml_step^⚠
ggml_step_inplace^⚠
ggml_sub^⚠
ggml_sub_inplace^⚠
ggml_sum^⚠
ggml_sum_rows^⚠
ggml_tallocr_alloc^⚠
ggml_tallocr_new^⚠
ggml_tanh^⚠
ggml_tanh_inplace^⚠
ggml_tensor_overhead^⚠
ggml_threadpool_free^⚠
ggml_threadpool_get_n_threads^⚠
ggml_threadpool_new^⚠
ggml_threadpool_params_default^⚠
ggml_threadpool_params_from_cpu_params^⚠
ggml_threadpool_params_init^⚠
ggml_threadpool_params_match^⚠
ggml_threadpool_pause^⚠
ggml_threadpool_resume^⚠
ggml_time_init^⚠
ggml_time_ms^⚠
ggml_time_us^⚠
ggml_timestep_embedding^⚠
ggml_top_k^⚠
ggml_transpose^⚠
ggml_type_name^⚠
ggml_type_size^⚠
ggml_type_sizef^⚠
ggml_unary^⚠
ggml_unary_inplace^⚠
ggml_unary_op_name^⚠
ggml_unravel_index^⚠
ggml_upscale^⚠
ggml_upscale_ext^⚠
ggml_used_mem^⚠
ggml_validate_row_data^⚠
ggml_view_1d^⚠
ggml_view_2d^⚠
ggml_view_3d^⚠
ggml_view_4d^⚠
ggml_view_tensor^⚠
ggml_win_part^⚠
ggml_win_unpart^⚠
llama_add_bos_token^⚠
llama_add_eos_token^⚠
llama_attach_threadpool^⚠
llama_backend_free^⚠
llama_backend_init^⚠
llama_batch_free^⚠
llama_batch_get_one^⚠
llama_batch_init^⚠
llama_chat_apply_template^⚠
Apply chat template. Inspired by hf apply_chat_template() on python. Both “model” and “custom_template” are optional, but at least one is required. “custom_template” has higher precedence than “model” NOTE: This function does not use a jinja parser. It only support a pre-defined list of template. See more: https://github.com/ggerganov/llama.cpp/wiki/Templates-supported-by-llama_chat_apply_template @param tmpl A Jinja template to use for this chat. If this is nullptr, the model’s default chat template will be used instead. @param chat Pointer to a list of multiple llama_chat_message @param n_msg Number of llama_chat_message in this chat @param add_ass Whether to end the prompt with the token(s) that indicate the start of an assistant message. @param buf A buffer to hold the output formatted prompt. The recommended alloc size is 2 * (total number of characters of all messages) @param length The size of the allocated buffer @return The total number of bytes of the formatted prompt. If is it larger than the size of buffer, you may need to re-alloc it and then re-apply the template.
llama_chat_builtin_templates^⚠
llama_context_default_params^⚠
llama_control_vector_apply^⚠
llama_copy_state_data^⚠
llama_decode^⚠
llama_detach_threadpool^⚠
llama_detokenize^⚠
@details Convert the provided tokens into text (inverse of llama_tokenize()). @param text The char pointer must be large enough to hold the resulting text. @return Returns the number of chars/bytes on success, no more than text_len_max. @return Returns a negative number on failure - the number of chars/bytes that would have been returned. @param remove_special Allow to remove BOS and EOS tokens if model is configured to do so. @param unparse_special If true, special tokens are rendered in the output.
llama_encode^⚠
llama_free^⚠
llama_free_model^⚠
llama_get_embeddings^⚠
llama_get_embeddings_ith^⚠
llama_get_embeddings_seq^⚠
llama_get_kv_cache_token_count^⚠
llama_get_kv_cache_used_cells^⚠
llama_get_logits^⚠
llama_get_logits_ith^⚠
llama_get_model^⚠
llama_get_state_size^⚠
llama_grammar_accept^⚠
llama_grammar_accept_impl^⚠
llama_grammar_apply_impl^⚠
llama_grammar_clone_impl^⚠
llama_grammar_free_impl^⚠
llama_grammar_get_rules^⚠
llama_grammar_get_stacks^⚠
llama_grammar_init_impl^⚠
llama_grammar_init_impl1^⚠
llama_grammar_parser_add_rule^⚠
llama_grammar_parser_c_rules^⚠
llama_grammar_parser_generate_symbol_id^⚠
llama_grammar_parser_get_symbol_id^⚠
llama_grammar_parser_parse^⚠
llama_grammar_parser_parse_alternates^⚠
llama_grammar_parser_parse_rule^⚠
llama_grammar_parser_parse_sequence^⚠
llama_grammar_parser_print^⚠
llama_grammar_reject_candidates_for_stack^⚠
llama_kv_cache_can_shift^⚠
llama_kv_cache_clear^⚠
llama_kv_cache_defrag^⚠
llama_kv_cache_seq_add^⚠
llama_kv_cache_seq_cp^⚠
llama_kv_cache_seq_div^⚠
llama_kv_cache_seq_keep^⚠
llama_kv_cache_seq_pos_max^⚠
llama_kv_cache_seq_rm^⚠
llama_kv_cache_update^⚠
llama_kv_cache_view_free^⚠
llama_kv_cache_view_init^⚠
llama_kv_cache_view_update^⚠
llama_load_model_from_file^⚠
llama_load_session_file^⚠
llama_log_set^⚠
llama_lora_adapter_clear^⚠
llama_lora_adapter_free^⚠
llama_lora_adapter_init^⚠
llama_lora_adapter_remove^⚠
llama_lora_adapter_set^⚠
llama_max_devices^⚠
llama_model_decoder_start_token^⚠
llama_model_default_params^⚠
llama_model_desc^⚠
llama_model_free^⚠
llama_model_has_decoder^⚠
llama_model_has_encoder^⚠
llama_model_is_recurrent^⚠
llama_model_load_from_file^⚠
llama_model_meta_count^⚠
llama_model_meta_key_by_index^⚠
llama_model_meta_val_str^⚠
llama_model_meta_val_str_by_index^⚠
llama_model_n_params^⚠
llama_model_quantize^⚠
llama_model_quantize_default_params^⚠
llama_model_size^⚠
llama_n_batch^⚠
llama_n_ctx^⚠
llama_n_ctx_train^⚠
llama_n_embd^⚠
llama_n_head^⚠
llama_n_layer^⚠
llama_n_seq_max^⚠
llama_n_threads^⚠
llama_n_threads_batch^⚠
llama_n_ubatch^⚠
llama_n_vocab^⚠
llama_new_context_with_model^⚠
llama_numa_init^⚠
llama_perf_context^⚠
llama_perf_context_print^⚠
llama_perf_context_reset^⚠
llama_perf_sampler^⚠
llama_perf_sampler_print^⚠
llama_perf_sampler_reset^⚠
llama_pooling_type^⚠
llama_print_system_info^⚠
llama_rope_freq_scale_train^⚠
llama_rope_type^⚠
llama_sampler_accept^⚠
llama_sampler_apply^⚠
llama_sampler_chain_add^⚠
llama_sampler_chain_default_params^⚠
llama_sampler_chain_get^⚠
llama_sampler_chain_init^⚠
llama_sampler_chain_n^⚠
llama_sampler_chain_remove^⚠
llama_sampler_clone^⚠
llama_sampler_free^⚠
llama_sampler_get_seed^⚠
llama_sampler_init_dist^⚠
llama_sampler_init_dry^⚠
@details DRY sampler, designed by p-e-w, as described in: https://github.com/oobabooga/text-generation-webui/pull/5677, porting Koboldcpp implementation authored by pi6am: https://github.com/LostRuins/koboldcpp/pull/982
llama_sampler_init_dry_impl^⚠
llama_sampler_init_dry_testing^⚠
llama_sampler_init_grammar^⚠
llama_sampler_init_grammar_impl^⚠
llama_sampler_init_greedy^⚠
llama_sampler_init_infill^⚠
llama_sampler_init_infill_impl^⚠
llama_sampler_init_logit_bias^⚠
llama_sampler_init_min_p^⚠
@details Minimum P sampling as described in https://github.com/ggerganov/llama.cpp/pull/3841
llama_sampler_init_mirostat^⚠
@details Mirostat 1.0 algorithm described in the paper https://arxiv.org/abs/2007.14966. Uses tokens instead of words. @param candidates A vector of llama_token_data containing the candidate tokens, their probabilities (p), and log-odds (logit) for the current position in the generated text. @param tau The target cross-entropy (or surprise) value you want to achieve for the generated text. A higher value corresponds to more surprising or less predictable text, while a lower value corresponds to less surprising or more predictable text. @param eta The learning rate used to update mu based on the error between the target and observed surprisal of the sampled word. A larger learning rate will cause mu to be updated more quickly, while a smaller learning rate will result in slower updates. @param m The number of tokens considered in the estimation of s_hat. This is an arbitrary value that is used to calculate s_hat, which in turn helps to calculate the value of k. In the paper, they use m = 100, but you can experiment with different values to see how it affects the performance of the algorithm. @param mu Maximum cross-entropy. This value is initialized to be twice the target cross-entropy (2 * tau) and is updated in the algorithm based on the error between the target and observed surprisal.
llama_sampler_init_mirostat_v2^⚠
@details Mirostat 2.0 algorithm described in the paper https://arxiv.org/abs/2007.14966. Uses tokens instead of words. @param candidates A vector of llama_token_data containing the candidate tokens, their probabilities (p), and log-odds (logit) for the current position in the generated text. @param tau The target cross-entropy (or surprise) value you want to achieve for the generated text. A higher value corresponds to more surprising or less predictable text, while a lower value corresponds to less surprising or more predictable text. @param eta The learning rate used to update mu based on the error between the target and observed surprisal of the sampled word. A larger learning rate will cause mu to be updated more quickly, while a smaller learning rate will result in slower updates. @param mu Maximum cross-entropy. This value is initialized to be twice the target cross-entropy (2 * tau) and is updated in the algorithm based on the error between the target and observed surprisal.
llama_sampler_init_penalties^⚠
NOTE: Avoid using on the full vocabulary as searching for repeated tokens can become slow. For example, apply top-k or top-p sampling first.
llama_sampler_init_softmax^⚠
llama_sampler_init_temp^⚠
#details Updates the logits l_i` = l_i/t. When t <= 0.0f, the maximum logit is kept at it’s original value, the rest are set to -inf
llama_sampler_init_temp_ext^⚠
@details Dynamic temperature implementation (a.k.a. entropy) described in the paper https://arxiv.org/abs/2309.02772.
llama_sampler_init_top_k^⚠
@details Top-K sampling described in academic paper “The Curious Case of Neural Text Degeneration” https://arxiv.org/abs/1904.09751
llama_sampler_init_top_p^⚠
@details Nucleus sampling described in academic paper “The Curious Case of Neural Text Degeneration” https://arxiv.org/abs/1904.09751
llama_sampler_init_typical^⚠
@details Locally Typical Sampling implementation described in the paper https://arxiv.org/abs/2202.00666.
llama_sampler_init_xtc^⚠
@details XTC sampler as described in https://github.com/oobabooga/text-generation-webui/pull/6335
llama_sampler_name^⚠
llama_sampler_reset^⚠
llama_sampler_sample^⚠
llama_save_session_file^⚠
llama_set_abort_callback^⚠
llama_set_causal_attn^⚠
llama_set_embeddings^⚠
llama_set_n_threads^⚠
llama_set_state_data^⚠
llama_split_path^⚠
@details Build a split GGUF final path for this chunk. llama_split_path(split_path, sizeof(split_path), “/models/ggml-model-q4_0”, 2, 4) => split_path = “/models/ggml-model-q4_0-00002-of-00004.gguf”
llama_split_prefix^⚠
@details Extract the path prefix from the split_path if and only if the split_no and split_count match. llama_split_prefix(split_prefix, 64, “/models/ggml-model-q4_0-00002-of-00004.gguf”, 2, 4) => split_prefix = “/models/ggml-model-q4_0”
llama_state_get_data^⚠
llama_state_get_size^⚠
llama_state_load_file^⚠
llama_state_save_file^⚠
llama_state_seq_get_data^⚠
llama_state_seq_get_size^⚠
llama_state_seq_load_file^⚠
llama_state_seq_save_file^⚠
llama_state_seq_set_data^⚠
llama_state_set_data^⚠
llama_supports_gpu_offload^⚠
llama_supports_mlock^⚠
llama_supports_mmap^⚠
llama_supports_rpc^⚠
llama_synchronize^⚠
llama_time_us^⚠
llama_token_bos^⚠
llama_token_cls^⚠
llama_token_eos^⚠
llama_token_eot^⚠
llama_token_fim_mid^⚠
llama_token_fim_pad^⚠
llama_token_fim_pre^⚠
llama_token_fim_rep^⚠
llama_token_fim_sep^⚠
llama_token_fim_suf^⚠
llama_token_get_attr^⚠
llama_token_get_score^⚠
llama_token_get_text^⚠
llama_token_is_control^⚠
llama_token_is_eog^⚠
llama_token_middle^⚠
llama_token_nl^⚠
llama_token_pad^⚠
llama_token_prefix^⚠
llama_token_sep^⚠
llama_token_suffix^⚠
llama_token_to_piece^⚠
llama_tokenize^⚠
@details Convert the provided text into tokens. @param tokens The tokens pointer must be large enough to hold the resulting tokens. @return Returns the number of tokens on success, no more than n_tokens_max @return Returns a negative number on failure - the number of tokens that would have been returned @param add_special Allow to add BOS and EOS tokens if model is configured to do so. @param parse_special Allow tokenizing special and/or control tokens which otherwise are not exposed and treated as plaintext. Does not insert a leading space.
llama_vocab_type^⚠

Type Aliases§

Unions§

llama_model_kv_override__bindgen_ty_1

Crate llama_cpp_sys_4Copy item path

Modules§

Structs§

Constants§

Statics§

Functions§

Type Aliases§

Unions§

Crate llama_cpp_sys_4