pub struct LlamaModel { /* private fields */ }
Expand description
A safe wrapper around llama_model
.
Implementations§
Source§impl LlamaModel
impl LlamaModel
Sourcepub fn n_ctx_train(&self) -> u32
pub fn n_ctx_train(&self) -> u32
get the number of tokens the model was trained on
§Panics
If the number of tokens the model was trained on does not fit into an u32
. This should be impossible on most
platforms due to llama.cpp returning a c_int
(i32 on most platforms) which is almost certainly positive.
Sourcepub fn tokens(
&self,
special: Special,
) -> impl Iterator<Item = (LlamaToken, Result<String, TokenToStringError>)> + '_
pub fn tokens( &self, special: Special, ) -> impl Iterator<Item = (LlamaToken, Result<String, TokenToStringError>)> + '_
Get all tokens in the model.
Sourcepub fn token_bos(&self) -> LlamaToken
pub fn token_bos(&self) -> LlamaToken
Get the beginning of stream token.
Sourcepub fn token_eos(&self) -> LlamaToken
pub fn token_eos(&self) -> LlamaToken
Get the end of stream token.
Sourcepub fn token_nl(&self) -> LlamaToken
pub fn token_nl(&self) -> LlamaToken
Get the newline token.
Sourcepub fn is_eog_token(&self, token: LlamaToken) -> bool
pub fn is_eog_token(&self, token: LlamaToken) -> bool
Check if a token represents the end of generation (end of turn, end of sequence, etc.)
Sourcepub fn decode_start_token(&self) -> LlamaToken
pub fn decode_start_token(&self) -> LlamaToken
Get the decoder start token.
Sourcepub fn token_to_str(
&self,
token: LlamaToken,
special: Special,
) -> Result<String, TokenToStringError>
pub fn token_to_str( &self, token: LlamaToken, special: Special, ) -> Result<String, TokenToStringError>
Sourcepub fn token_to_bytes(
&self,
token: LlamaToken,
special: Special,
) -> Result<Vec<u8>, TokenToStringError>
pub fn token_to_bytes( &self, token: LlamaToken, special: Special, ) -> Result<Vec<u8>, TokenToStringError>
Convert single token to bytes.
§Errors
See TokenToStringError
for more information.
§Panics
If a TokenToStringError::InsufficientBufferSpace
error returned by
Self::token_to_bytes_with_size
contains a positive nonzero value. This should never
happen.
Sourcepub fn tokens_to_str(
&self,
tokens: &[LlamaToken],
special: Special,
) -> Result<String, TokenToStringError>
pub fn tokens_to_str( &self, tokens: &[LlamaToken], special: Special, ) -> Result<String, TokenToStringError>
Sourcepub fn str_to_token(
&self,
str: &str,
add_bos: AddBos,
) -> Result<Vec<LlamaToken>, StringToTokenError>
pub fn str_to_token( &self, str: &str, add_bos: AddBos, ) -> Result<Vec<LlamaToken>, StringToTokenError>
Convert a string to a Vector of tokens.
§Errors
- if
str
contains a null byte.
§Panics
- if there is more than
usize::MAX
LlamaToken
s instr
.
use llama_cpp_2::model::LlamaModel;
use std::path::Path;
use llama_cpp_2::model::AddBos;
let backend = llama_cpp_2::llama_backend::LlamaBackend::init()?;
let model = LlamaModel::load_from_file(&backend, Path::new("path/to/model"), &Default::default())?;
let tokens = model.str_to_token("Hello, World!", AddBos::Always)?;
Sourcepub fn token_attr(&self, LlamaToken: LlamaToken) -> LlamaTokenAttrs
pub fn token_attr(&self, LlamaToken: LlamaToken) -> LlamaTokenAttrs
Sourcepub fn token_to_str_with_size(
&self,
token: LlamaToken,
buffer_size: usize,
special: Special,
) -> Result<String, TokenToStringError>
pub fn token_to_str_with_size( &self, token: LlamaToken, buffer_size: usize, special: Special, ) -> Result<String, TokenToStringError>
Convert a token to a string with a specified buffer size.
Generally you should use LlamaModel::token_to_str
as it is able to decode tokens with
any length.
§Errors
- if the token type is unknown
- the resultant token is larger than
buffer_size
. - the string returend by llama-cpp is not valid utf8.
§Panics
Sourcepub fn token_to_bytes_with_size(
&self,
token: LlamaToken,
buffer_size: usize,
special: Special,
lstrip: Option<NonZeroU16>,
) -> Result<Vec<u8>, TokenToStringError>
pub fn token_to_bytes_with_size( &self, token: LlamaToken, buffer_size: usize, special: Special, lstrip: Option<NonZeroU16>, ) -> Result<Vec<u8>, TokenToStringError>
Convert a token to bytes with a specified buffer size.
Generally you should use LlamaModel::token_to_bytes
as it is able to handle tokens of
any length.
§Errors
- if the token type is unknown
- the resultant token is larger than
buffer_size
.
§Panics
Sourcepub fn n_vocab(&self) -> i32
pub fn n_vocab(&self) -> i32
The number of tokens the model was trained on.
This returns a c_int
for maximum compatibility. Most of the time it can be cast to an i32
without issue.
Sourcepub fn vocab_type(&self) -> VocabType
pub fn vocab_type(&self) -> VocabType
The type of vocab the model was trained on.
§Panics
If llama-cpp emits a vocab type that is not known to this library.
Sourcepub fn n_embd(&self) -> c_int
pub fn n_embd(&self) -> c_int
This returns a c_int
for maximum compatibility. Most of the time it can be cast to an i32
without issue.
Sourcepub fn get_chat_template(
&self,
buf_size: usize,
) -> Result<String, ChatTemplateError>
pub fn get_chat_template( &self, buf_size: usize, ) -> Result<String, ChatTemplateError>
Sourcepub fn load_from_file(
_: &LlamaBackend,
path: impl AsRef<Path>,
params: &LlamaModelParams,
) -> Result<Self, LlamaModelLoadError>
pub fn load_from_file( _: &LlamaBackend, path: impl AsRef<Path>, params: &LlamaModelParams, ) -> Result<Self, LlamaModelLoadError>
Sourcepub fn lora_adapter_init(
&self,
path: impl AsRef<Path>,
) -> Result<LlamaLoraAdapter, LlamaLoraAdapterInitError>
pub fn lora_adapter_init( &self, path: impl AsRef<Path>, ) -> Result<LlamaLoraAdapter, LlamaLoraAdapterInitError>
Sourcepub fn new_context(
&self,
_: &LlamaBackend,
params: LlamaContextParams,
) -> Result<LlamaContext<'_>, LlamaContextLoadError>
pub fn new_context( &self, _: &LlamaBackend, params: LlamaContextParams, ) -> Result<LlamaContext<'_>, LlamaContextLoadError>
Create a new context from this model.
§Errors
There is many ways this can fail. See LlamaContextLoadError
for more information.
Sourcepub fn apply_chat_template(
&self,
tmpl: Option<String>,
chat: Vec<LlamaChatMessage>,
add_ass: bool,
) -> Result<String, ApplyChatTemplateError>
pub fn apply_chat_template( &self, tmpl: Option<String>, chat: Vec<LlamaChatMessage>, add_ass: bool, ) -> Result<String, ApplyChatTemplateError>
Apply the models chat template to some messages. See https://github.com/ggerganov/llama.cpp/wiki/Templates-supported-by-llama_chat_apply_template
tmpl
of None means to use the default template provided by llama.cpp for the model
§Errors
There are many ways this can fail. See ApplyChatTemplateError
for more information.