Struct LlamaContextParams

Source
pub struct LlamaContextParams { /* private fields */ }
Expand description

A safe wrapper around llama_context_params.

Generally this should be created with Default::default() and then modified with with_* methods.

§Examples

use llama_cpp_2::context::params::LlamaContextParams;

let ctx_params = LlamaContextParams::default()
   .with_n_ctx(NonZeroU32::new(2048));

assert_eq!(ctx_params.n_ctx(), NonZeroU32::new(2048));

Implementations§

Source§

impl LlamaContextParams

Source

pub fn with_n_ctx(self, n_ctx: Option<NonZeroU32>) -> Self

Set the side of the context

§Examples
use llama_cpp_2::context::params::LlamaContextParams;
let params = LlamaContextParams::default();
let params = params.with_n_ctx(NonZeroU32::new(2048));
assert_eq!(params.n_ctx(), NonZeroU32::new(2048));
Source

pub fn n_ctx(&self) -> Option<NonZeroU32>

Get the size of the context.

None if the context size is specified by the model and not the context.

§Examples
let params = llama_cpp_2::context::params::LlamaContextParams::default();
assert_eq!(params.n_ctx(), std::num::NonZeroU32::new(512));
Source

pub fn with_n_batch(self, n_batch: u32) -> Self

Set the n_batch

§Examples
use llama_cpp_2::context::params::LlamaContextParams;
let params = LlamaContextParams::default()
    .with_n_batch(2048);
assert_eq!(params.n_batch(), 2048);
Source

pub fn n_batch(&self) -> u32

Get the n_batch

§Examples
use llama_cpp_2::context::params::LlamaContextParams;
let params = LlamaContextParams::default();
assert_eq!(params.n_batch(), 2048);
Source

pub fn with_n_ubatch(self, n_ubatch: u32) -> Self

Set the n_ubatch

§Examples
use llama_cpp_2::context::params::LlamaContextParams;
let params = LlamaContextParams::default()
    .with_n_ubatch(512);
assert_eq!(params.n_ubatch(), 512);
Source

pub fn n_ubatch(&self) -> u32

Get the n_ubatch

§Examples
use llama_cpp_2::context::params::LlamaContextParams;
let params = LlamaContextParams::default();
assert_eq!(params.n_ubatch(), 512);
Source

pub fn with_flash_attention(self, enabled: bool) -> Self

Set the flash_attention parameter

§Examples
use llama_cpp_2::context::params::LlamaContextParams;
let params = LlamaContextParams::default()
    .with_flash_attention(true);
assert_eq!(params.flash_attention(), true);
Source

pub fn flash_attention(&self) -> bool

Get the flash_attention parameter

§Examples
use llama_cpp_2::context::params::LlamaContextParams;
let params = LlamaContextParams::default();
assert_eq!(params.flash_attention(), false);
Source

pub fn with_offload_kqv(self, enabled: bool) -> Self

Set the offload_kqv parameter to control offloading KV cache & KQV ops to GPU

§Examples
use llama_cpp_2::context::params::LlamaContextParams;
let params = LlamaContextParams::default()
    .with_offload_kqv(false);
assert_eq!(params.offload_kqv(), false);
Source

pub fn offload_kqv(&self) -> bool

Get the offload_kqv parameter

§Examples
use llama_cpp_2::context::params::LlamaContextParams;
let params = LlamaContextParams::default();
assert_eq!(params.offload_kqv(), true);
Source

pub fn with_rope_scaling_type(self, rope_scaling_type: RopeScalingType) -> Self

Set the type of rope scaling.

§Examples
use llama_cpp_2::context::params::{LlamaContextParams, RopeScalingType};
let params = LlamaContextParams::default()
    .with_rope_scaling_type(RopeScalingType::Linear);
assert_eq!(params.rope_scaling_type(), RopeScalingType::Linear);
Source

pub fn rope_scaling_type(&self) -> RopeScalingType

Get the type of rope scaling.

§Examples
let params = llama_cpp_2::context::params::LlamaContextParams::default();
assert_eq!(params.rope_scaling_type(), llama_cpp_2::context::params::RopeScalingType::Unspecified);
Source

pub fn with_rope_freq_base(self, rope_freq_base: f32) -> Self

Set the rope frequency base.

§Examples
use llama_cpp_2::context::params::LlamaContextParams;
let params = LlamaContextParams::default()
   .with_rope_freq_base(0.5);
assert_eq!(params.rope_freq_base(), 0.5);
Source

pub fn rope_freq_base(&self) -> f32

Get the rope frequency base.

§Examples
let params = llama_cpp_2::context::params::LlamaContextParams::default();
assert_eq!(params.rope_freq_base(), 0.0);
Source

pub fn with_rope_freq_scale(self, rope_freq_scale: f32) -> Self

Set the rope frequency scale.

§Examples
use llama_cpp_2::context::params::LlamaContextParams;
let params = LlamaContextParams::default()
  .with_rope_freq_scale(0.5);
assert_eq!(params.rope_freq_scale(), 0.5);
Source

pub fn rope_freq_scale(&self) -> f32

Get the rope frequency scale.

§Examples
let params = llama_cpp_2::context::params::LlamaContextParams::default();
assert_eq!(params.rope_freq_scale(), 0.0);
Source

pub fn n_threads(&self) -> i32

Get the number of threads.

§Examples
let params = llama_cpp_2::context::params::LlamaContextParams::default();
assert_eq!(params.n_threads(), 4);
Source

pub fn n_threads_batch(&self) -> i32

Get the number of threads allocated for batches.

§Examples
let params = llama_cpp_2::context::params::LlamaContextParams::default();
assert_eq!(params.n_threads_batch(), 4);
Source

pub fn with_n_threads(self, n_threads: i32) -> Self

Set the number of threads.

§Examples
use llama_cpp_2::context::params::LlamaContextParams;
let params = LlamaContextParams::default()
   .with_n_threads(8);
assert_eq!(params.n_threads(), 8);
Source

pub fn with_n_threads_batch(self, n_threads: i32) -> Self

Set the number of threads allocated for batches.

§Examples
use llama_cpp_2::context::params::LlamaContextParams;
let params = LlamaContextParams::default()
   .with_n_threads_batch(8);
assert_eq!(params.n_threads_batch(), 8);
Source

pub fn embeddings(&self) -> bool

Check whether embeddings are enabled

§Examples
let params = llama_cpp_2::context::params::LlamaContextParams::default();
assert!(!params.embeddings());
Source

pub fn with_embeddings(self, embedding: bool) -> Self

Enable the use of embeddings

§Examples
use llama_cpp_2::context::params::LlamaContextParams;
let params = LlamaContextParams::default()
   .with_embeddings(true);
assert!(params.embeddings());
Source

pub fn with_cb_eval(self, cb_eval: ggml_backend_sched_eval_callback) -> Self

Set the evaluation callback.

§Examples
extern "C" fn cb_eval_fn(
    t: *mut llama_cpp_sys_2::ggml_tensor,
    ask: bool,
    user_data: *mut std::ffi::c_void,
) -> bool {
    false
}

use llama_cpp_2::context::params::LlamaContextParams;
let params = LlamaContextParams::default().with_cb_eval(Some(cb_eval_fn));
Source

pub fn with_cb_eval_user_data(self, cb_eval_user_data: *mut c_void) -> Self

Set the evaluation callback user data.

§Examples
use llama_cpp_2::context::params::LlamaContextParams;
let params = LlamaContextParams::default();
let user_data = std::ptr::null_mut();
let params = params.with_cb_eval_user_data(user_data);
Source

pub fn with_pooling_type(self, pooling_type: LlamaPoolingType) -> Self

Set the type of pooling.

§Examples
use llama_cpp_2::context::params::{LlamaContextParams, LlamaPoolingType};
let params = LlamaContextParams::default()
    .with_pooling_type(LlamaPoolingType::Last);
assert_eq!(params.pooling_type(), LlamaPoolingType::Last);
Source

pub fn pooling_type(&self) -> LlamaPoolingType

Get the type of pooling.

§Examples
let params = llama_cpp_2::context::params::LlamaContextParams::default();
assert_eq!(params.pooling_type(), llama_cpp_2::context::params::LlamaPoolingType::Unspecified);

Trait Implementations§

Source§

impl Clone for LlamaContextParams

Source§

fn clone(&self) -> LlamaContextParams

Returns a copy of the value. Read more
1.0.0 · Source§

fn clone_from(&mut self, source: &Self)

Performs copy-assignment from source. Read more
Source§

impl Debug for LlamaContextParams

Source§

fn fmt(&self, f: &mut Formatter<'_>) -> Result

Formats the value using the given formatter. Read more
Source§

impl Default for LlamaContextParams

Default parameters for LlamaContext. (as defined in llama.cpp by llama_context_default_params)

use llama_cpp_2::context::params::{LlamaContextParams, RopeScalingType};
let params = LlamaContextParams::default();
assert_eq!(params.n_ctx(), NonZeroU32::new(512), "n_ctx should be 512");
assert_eq!(params.rope_scaling_type(), RopeScalingType::Unspecified);
Source§

fn default() -> Self

Returns the “default value” for a type. Read more
Source§

impl Send for LlamaContextParams

SAFETY: we do not currently allow setting or reading the pointers that cause this to not be automatically send or sync.

Source§

impl Sync for LlamaContextParams

Auto Trait Implementations§

Blanket Implementations§

Source§

impl<T> Any for T
where T: 'static + ?Sized,

Source§

fn type_id(&self) -> TypeId

Gets the TypeId of self. Read more
Source§

impl<T> Borrow<T> for T
where T: ?Sized,

Source§

fn borrow(&self) -> &T

Immutably borrows from an owned value. Read more
Source§

impl<T> BorrowMut<T> for T
where T: ?Sized,

Source§

fn borrow_mut(&mut self) -> &mut T

Mutably borrows from an owned value. Read more
Source§

impl<T> CloneToUninit for T
where T: Clone,

Source§

unsafe fn clone_to_uninit(&self, dst: *mut u8)

🔬This is a nightly-only experimental API. (clone_to_uninit)
Performs copy-assignment from self to dst. Read more
Source§

impl<T> From<T> for T

Source§

fn from(t: T) -> T

Returns the argument unchanged.

Source§

impl<T> Instrument for T

Source§

fn instrument(self, span: Span) -> Instrumented<Self>

Instruments this type with the provided Span, returning an Instrumented wrapper. Read more
Source§

fn in_current_span(self) -> Instrumented<Self>

Instruments this type with the current Span, returning an Instrumented wrapper. Read more
Source§

impl<T, U> Into<U> for T
where U: From<T>,

Source§

fn into(self) -> U

Calls U::from(self).

That is, this conversion is whatever the implementation of From<T> for U chooses to do.

Source§

impl<T> ToOwned for T
where T: Clone,

Source§

type Owned = T

The resulting type after obtaining ownership.
Source§

fn to_owned(&self) -> T

Creates owned data from borrowed data, usually by cloning. Read more
Source§

fn clone_into(&self, target: &mut T)

Uses borrowed data to replace owned data, usually by cloning. Read more
Source§

impl<T, U> TryFrom<U> for T
where U: Into<T>,

Source§

type Error = Infallible

The type returned in the event of a conversion error.
Source§

fn try_from(value: U) -> Result<T, <T as TryFrom<U>>::Error>

Performs the conversion.
Source§

impl<T, U> TryInto<U> for T
where U: TryFrom<T>,

Source§

type Error = <U as TryFrom<T>>::Error

The type returned in the event of a conversion error.
Source§

fn try_into(self) -> Result<U, <U as TryFrom<T>>::Error>

Performs the conversion.
Source§

impl<T> WithSubscriber for T

Source§

fn with_subscriber<S>(self, subscriber: S) -> WithDispatch<Self>
where S: Into<Dispatch>,

Attaches the provided Subscriber to this type, returning a WithDispatch wrapper. Read more
Source§

fn with_current_subscriber(self) -> WithDispatch<Self>

Attaches the current default Subscriber to this type, returning a WithDispatch wrapper. Read more