ModelParams
Namespace: LLama.Common
The parameters for initializing a LLama model.
public class ModelParams : LLama.Abstractions.IModelParams, System.IEquatable`1[[LLama.Common.ModelParams, LLamaSharp, Version=0.5.0.0, Culture=neutral, PublicKeyToken=null]]
Inheritance Object → ModelParams
Implements IModelParams, IEquatable<ModelParams>
Properties
ContextSize
Model context size (n_ctx)
public int ContextSize { get; set; }
Property Value
MainGpu
the GPU that is used for scratch and small tensors
public int MainGpu { get; set; }
Property Value
LowVram
if true, reduce VRAM usage at the cost of performance
public bool LowVram { get; set; }
Property Value
GpuLayerCount
Number of layers to run in VRAM / GPU memory (n_gpu_layers)
public int GpuLayerCount { get; set; }
Property Value
Seed
Seed for the random number generator (seed)
public int Seed { get; set; }
Property Value
UseFp16Memory
Use f16 instead of f32 for memory kv (memory_f16)
public bool UseFp16Memory { get; set; }
Property Value
UseMemorymap
Use mmap for faster loads (use_mmap)
public bool UseMemorymap { get; set; }
Property Value
UseMemoryLock
Use mlock to keep model in memory (use_mlock)
public bool UseMemoryLock { get; set; }
Property Value
Perplexity
Compute perplexity over the prompt (perplexity)
public bool Perplexity { get; set; }
Property Value
ModelPath
Model path (model)
public string ModelPath { get; set; }
Property Value
ModelAlias
model alias
public string ModelAlias { get; set; }
Property Value
LoraAdapter
lora adapter path (lora_adapter)
public string LoraAdapter { get; set; }
Property Value
LoraBase
base model path for the lora adapter (lora_base)
public string LoraBase { get; set; }
Property Value
Threads
Number of threads (-1 = autodetect) (n_threads)
public int Threads { get; set; }
Property Value
BatchSize
batch size for prompt processing (must be >=32 to use BLAS) (n_batch)
public int BatchSize { get; set; }
Property Value
ConvertEosToNewLine
Whether to convert eos to newline during the inference.
public bool ConvertEosToNewLine { get; set; }
Property Value
EmbeddingMode
Whether to use embedding mode. (embedding) Note that if this is set to true, The LLamaModel won't produce text response anymore.
public bool EmbeddingMode { get; set; }
Property Value
TensorSplits
how split tensors should be distributed across GPUs
public Single[] TensorSplits { get; set; }
Property Value
RopeFrequencyBase
RoPE base frequency
public float RopeFrequencyBase { get; set; }
Property Value
RopeFrequencyScale
RoPE frequency scaling factor
public float RopeFrequencyScale { get; set; }
Property Value
MulMatQ
Use experimental mul_mat_q kernels
public bool MulMatQ { get; set; }
Property Value
Encoding
The encoding to use to convert text for the model
public Encoding Encoding { get; set; }
Property Value
Constructors
ModelParams(String)
public ModelParams(string modelPath)
Parameters
modelPath
String
The model path.
ModelParams(String, Int32, Int32, Int32, Boolean, Boolean, Boolean, Boolean, String, String, Int32, Int32, Boolean, Boolean, Single, Single, Boolean, String)
Caution
Use object initializer to set all optional parameters
public ModelParams(string modelPath, int contextSize, int gpuLayerCount, int seed, bool useFp16Memory, bool useMemorymap, bool useMemoryLock, bool perplexity, string loraAdapter, string loraBase, int threads, int batchSize, bool convertEosToNewLine, bool embeddingMode, float ropeFrequencyBase, float ropeFrequencyScale, bool mulMatQ, string encoding)
Parameters
modelPath
String
The model path.
contextSize
Int32
Model context size (n_ctx)
gpuLayerCount
Int32
Number of layers to run in VRAM / GPU memory (n_gpu_layers)
seed
Int32
Seed for the random number generator (seed)
useFp16Memory
Boolean
Whether to use f16 instead of f32 for memory kv (memory_f16)
useMemorymap
Boolean
Whether to use mmap for faster loads (use_mmap)
useMemoryLock
Boolean
Whether to use mlock to keep model in memory (use_mlock)
perplexity
Boolean
Thether to compute perplexity over the prompt (perplexity)
loraAdapter
String
Lora adapter path (lora_adapter)
loraBase
String
Base model path for the lora adapter (lora_base)
threads
Int32
Number of threads (-1 = autodetect) (n_threads)
batchSize
Int32
Batch size for prompt processing (must be >=32 to use BLAS) (n_batch)
convertEosToNewLine
Boolean
Whether to convert eos to newline during the inference.
embeddingMode
Boolean
Whether to use embedding mode. (embedding) Note that if this is set to true, The LLamaModel won't produce text response anymore.
ropeFrequencyBase
Single
RoPE base frequency.
ropeFrequencyScale
Single
RoPE frequency scaling factor
mulMatQ
Boolean
Use experimental mul_mat_q kernels
encoding
String
The encoding to use to convert text for the model
Methods
ToString()
public string ToString()
Returns
PrintMembers(StringBuilder)
protected bool PrintMembers(StringBuilder builder)
Parameters
builder
StringBuilder
Returns
GetHashCode()
public int GetHashCode()
Returns
Equals(Object)
public bool Equals(object obj)
Parameters
obj
Object
Returns
Equals(ModelParams)
public bool Equals(ModelParams other)
Parameters
other
ModelParams
Returns
<Clone>$()
public ModelParams <Clone>$()