ModelParams
Namespace: LLama.Common
public class ModelParams
Inheritance Object → ModelParams
Properties
ContextSize
Model context size (n_ctx)
public int ContextSize { get; set; }
Property Value
GpuLayerCount
Number of layers to run in VRAM / GPU memory (n_gpu_layers)
public int GpuLayerCount { get; set; }
Property Value
Seed
Seed for the random number generator (seed)
public int Seed { get; set; }
Property Value
UseFp16Memory
Use f16 instead of f32 for memory kv (memory_f16)
public bool UseFp16Memory { get; set; }
Property Value
UseMemorymap
Use mmap for faster loads (use_mmap)
public bool UseMemorymap { get; set; }
Property Value
UseMemoryLock
Use mlock to keep model in memory (use_mlock)
public bool UseMemoryLock { get; set; }
Property Value
Perplexity
Compute perplexity over the prompt (perplexity)
public bool Perplexity { get; set; }
Property Value
ModelPath
Model path (model)
public string ModelPath { get; set; }
Property Value
LoraAdapter
lora adapter path (lora_adapter)
public string LoraAdapter { get; set; }
Property Value
LoraBase
base model path for the lora adapter (lora_base)
public string LoraBase { get; set; }
Property Value
Threads
Number of threads (-1 = autodetect) (n_threads)
public int Threads { get; set; }
Property Value
BatchSize
batch size for prompt processing (must be >=32 to use BLAS) (n_batch)
public int BatchSize { get; set; }
Property Value
ConvertEosToNewLine
Whether to convert eos to newline during the inference.
public bool ConvertEosToNewLine { get; set; }
Property Value
EmbeddingMode
Whether to use embedding mode. (embedding) Note that if this is set to true, The LLamaModel won't produce text response anymore.
public bool EmbeddingMode { get; set; }
Property Value
Constructors
ModelParams(String, Int32, Int32, Int32, Boolean, Boolean, Boolean, Boolean, String, String, Int32, Int32, Boolean, Boolean)
public ModelParams(string modelPath, int contextSize, int gpuLayerCount, int seed, bool useFp16Memory, bool useMemorymap, bool useMemoryLock, bool perplexity, string loraAdapter, string loraBase, int threads, int batchSize, bool convertEosToNewLine, bool embeddingMode)
Parameters
modelPath
String
The model path.
contextSize
Int32
Model context size (n_ctx)
gpuLayerCount
Int32
Number of layers to run in VRAM / GPU memory (n_gpu_layers)
seed
Int32
Seed for the random number generator (seed)
useFp16Memory
Boolean
Whether to use f16 instead of f32 for memory kv (memory_f16)
useMemorymap
Boolean
Whether to use mmap for faster loads (use_mmap)
useMemoryLock
Boolean
Whether to use mlock to keep model in memory (use_mlock)
perplexity
Boolean
Thether to compute perplexity over the prompt (perplexity)
loraAdapter
String
Lora adapter path (lora_adapter)
loraBase
String
Base model path for the lora adapter (lora_base)
threads
Int32
Number of threads (-1 = autodetect) (n_threads)
batchSize
Int32
Batch size for prompt processing (must be >=32 to use BLAS) (n_batch)
convertEosToNewLine
Boolean
Whether to convert eos to newline during the inference.
embeddingMode
Boolean
Whether to use embedding mode. (embedding) Note that if this is set to true, The LLamaModel won't produce text response anymore.