LLamaModel

Namespace: LLama.OldVersion

public class LLamaModel : IChatModel, System.IDisposable

Inheritance Object → LLamaModel
Implements IChatModel, IDisposable

Properties

Name

public string Name { get; set; }

Property Value

String

Verbose

public bool Verbose { get; set; }

Property Value

Boolean

NativeHandle

public SafeLLamaContextHandle NativeHandle { get; }

Property Value

SafeLLamaContextHandle

Constructors

LLamaModel(String, String, Boolean, Int32, Int32, Int32, Int32, Int32, Int32, Int32, Dictionary<Int32, Single>, Int32, Single, Single, Single, Single, Single, Int32, Single, Single, Int32, Single, Single, String, String, String, String, List<String>, String, String, Boolean, Boolean, Boolean, Boolean, Boolean, Boolean, Boolean, Boolean, Boolean, Boolean, Boolean, Boolean, Boolean, Boolean, String)

Please refer LLamaParams to find the meanings of each arg. Be sure to have set the n_gpu_layers, otherwise it will load 20 layers to gpu by default.

public LLamaModel(string model_path, string model_name, bool verbose, int seed, int n_threads, int n_predict, int n_ctx, int n_batch, int n_keep, int n_gpu_layers, Dictionary<int, float> logit_bias, int top_k, float top_p, float tfs_z, float typical_p, float temp, float repeat_penalty, int repeat_last_n, float frequency_penalty, float presence_penalty, int mirostat, float mirostat_tau, float mirostat_eta, string prompt, string path_session, string input_prefix, string input_suffix, List<string> antiprompt, string lora_adapter, string lora_base, bool memory_f16, bool random_prompt, bool use_color, bool interactive, bool embedding, bool interactive_first, bool prompt_cache_all, bool instruct, bool penalize_nl, bool perplexity, bool use_mmap, bool use_mlock, bool mem_test, bool verbose_prompt, string encoding)

Parameters

model_path String
The model file path.

model_name String
The model name.

verbose Boolean
Whether to print details when running the model.

seed Int32

n_threads Int32

n_predict Int32

n_ctx Int32

n_batch Int32

n_keep Int32

n_gpu_layers Int32

logit_bias Dictionary<Int32, Single>

top_k Int32

top_p Single

tfs_z Single

typical_p Single

temp Single

repeat_penalty Single

repeat_last_n Int32

frequency_penalty Single

presence_penalty Single

mirostat Int32

mirostat_tau Single

mirostat_eta Single

prompt String

path_session String

input_prefix String

input_suffix String

antiprompt List<String>

lora_adapter String

lora_base String

memory_f16 Boolean

random_prompt Boolean

use_color Boolean

interactive Boolean

embedding Boolean

interactive_first Boolean

prompt_cache_all Boolean

instruct Boolean

penalize_nl Boolean

perplexity Boolean

use_mmap Boolean

use_mlock Boolean

mem_test Boolean

verbose_prompt Boolean

encoding String

LLamaModel(LLamaParams, String, Boolean, String)

Please refer LLamaParams to find the meanings of each arg. Be sure to have set the n_gpu_layers, otherwise it will load 20 layers to gpu by default.

public LLamaModel(LLamaParams params, string name, bool verbose, string encoding)

Parameters

params LLamaParams
The LLamaModel params

name String
Model name

verbose Boolean
Whether to output the detailed info.

encoding String

Exceptions

RuntimeError

Methods

WithPrompt(String, String)

Apply a prompt to the model.

public LLamaModel WithPrompt(string prompt, string encoding)

Parameters

prompt String

encoding String

Returns

LLamaModel

Exceptions

ArgumentException

WithPromptFile(String)

Apply the prompt file to the model.

public LLamaModel WithPromptFile(string promptFileName)

Parameters

promptFileName String

Returns

LLamaModel

InitChatPrompt(String, String)

public void InitChatPrompt(string prompt, string encoding)

Parameters

prompt String

encoding String

InitChatAntiprompt(String[])

public void InitChatAntiprompt(String[] antiprompt)

Parameters

antiprompt String[]

Chat(String, String, String)

Chat with the LLaMa model under interactive mode.

public IEnumerable<string> Chat(string text, string prompt, string encoding)

Parameters

text String

prompt String

encoding String

Returns

IEnumerable<String>

Exceptions

ArgumentException

SaveState(String)

Save the state to specified path.

public void SaveState(string filename)

Parameters

filename String

LoadState(String, Boolean)

Load the state from specified path.

public void LoadState(string filename, bool clearPreviousEmbed)

Parameters

filename String

clearPreviousEmbed Boolean
Whether to clear previous footprints of this model.

Exceptions

RuntimeError

Tokenize(String, String)

Tokenize a string.

public List<int> Tokenize(string text, string encoding)

Parameters

text String
The utf-8 encoded string to tokenize.

encoding String

Returns

List<Int32>
A list of tokens.

Exceptions

RuntimeError
If the tokenization failed.

DeTokenize(IEnumerable<Int32>)

Detokenize a list of tokens.

public string DeTokenize(IEnumerable<int> tokens)

Parameters

tokens IEnumerable<Int32>
The list of tokens to detokenize.

Returns

String
The detokenized string.

Call(String, String)

Call the model to run inference.

public IEnumerable<string> Call(string text, string encoding)

Parameters

text String

encoding String

Returns

IEnumerable<String>

Exceptions

RuntimeError

Dispose()

public void Dispose()