ModelParams
Namespace: LLama.Common
The parameters for initializing a LLama model.
public class ModelParams : LLama.Abstractions.IModelParams, System.IEquatable`1[[LLama.Common.ModelParams, LLamaSharp, Version=0.5.0.0, Culture=neutral, PublicKeyToken=null]]
Inheritance Object → ModelParams
Implements IModelParams, IEquatable<ModelParams>
Properties
ContextSize
Model context size (n_ctx)
public int ContextSize { get; set; }
Property Value
MainGpu
the GPU that is used for scratch and small tensors
public int MainGpu { get; set; }
Property Value
LowVram
if true, reduce VRAM usage at the cost of performance
public bool LowVram { get; set; }
Property Value
GpuLayerCount
Number of layers to run in VRAM / GPU memory (n_gpu_layers)
public int GpuLayerCount { get; set; }
Property Value
Seed
Seed for the random number generator (seed)
public int Seed { get; set; }
Property Value
UseFp16Memory
Use f16 instead of f32 for memory kv (memory_f16)
public bool UseFp16Memory { get; set; }
Property Value
UseMemorymap
Use mmap for faster loads (use_mmap)
public bool UseMemorymap { get; set; }
Property Value
UseMemoryLock
Use mlock to keep model in memory (use_mlock)
public bool UseMemoryLock { get; set; }
Property Value
Perplexity
Compute perplexity over the prompt (perplexity)
public bool Perplexity { get; set; }
Property Value
ModelPath
Model path (model)
public string ModelPath { get; set; }
Property Value
ModelAlias
model alias
public string ModelAlias { get; set; }
Property Value
LoraAdapter
lora adapter path (lora_adapter)
public string LoraAdapter { get; set; }
Property Value
LoraBase
base model path for the lora adapter (lora_base)
public string LoraBase { get; set; }
Property Value
Threads
Number of threads (-1 = autodetect) (n_threads)
public int Threads { get; set; }
Property Value
BatchSize
batch size for prompt processing (must be >=32 to use BLAS) (n_batch)
public int BatchSize { get; set; }
Property Value
ConvertEosToNewLine
Whether to convert eos to newline during the inference.
public bool ConvertEosToNewLine { get; set; }
Property Value
EmbeddingMode
Whether to use embedding mode. (embedding) Note that if this is set to true, The LLamaModel won't produce text response anymore.
public bool EmbeddingMode { get; set; }
Property Value
TensorSplits
how split tensors should be distributed across GPUs
public Single[] TensorSplits { get; set; }
Property Value
RopeFrequencyBase
RoPE base frequency
public float RopeFrequencyBase { get; set; }
Property Value
RopeFrequencyScale
RoPE frequency scaling factor
public float RopeFrequencyScale { get; set; }
Property Value
MulMatQ
Use experimental mul_mat_q kernels
public bool MulMatQ { get; set; }
Property Value
Encoding
The encoding to use to convert text for the model
public Encoding Encoding { get; set; }
Property Value
Constructors
ModelParams(String)
public ModelParams(string modelPath)
Parameters
modelPath String
The model path.
ModelParams(String, Int32, Int32, Int32, Boolean, Boolean, Boolean, Boolean, String, String, Int32, Int32, Boolean, Boolean, Single, Single, Boolean, String)
Caution
Use object initializer to set all optional parameters
public ModelParams(string modelPath, int contextSize, int gpuLayerCount, int seed, bool useFp16Memory, bool useMemorymap, bool useMemoryLock, bool perplexity, string loraAdapter, string loraBase, int threads, int batchSize, bool convertEosToNewLine, bool embeddingMode, float ropeFrequencyBase, float ropeFrequencyScale, bool mulMatQ, string encoding)
Parameters
modelPath String
The model path.
contextSize Int32
Model context size (n_ctx)
gpuLayerCount Int32
Number of layers to run in VRAM / GPU memory (n_gpu_layers)
seed Int32
Seed for the random number generator (seed)
useFp16Memory Boolean
Whether to use f16 instead of f32 for memory kv (memory_f16)
useMemorymap Boolean
Whether to use mmap for faster loads (use_mmap)
useMemoryLock Boolean
Whether to use mlock to keep model in memory (use_mlock)
perplexity Boolean
Thether to compute perplexity over the prompt (perplexity)
loraAdapter String
Lora adapter path (lora_adapter)
loraBase String
Base model path for the lora adapter (lora_base)
threads Int32
Number of threads (-1 = autodetect) (n_threads)
batchSize Int32
Batch size for prompt processing (must be >=32 to use BLAS) (n_batch)
convertEosToNewLine Boolean
Whether to convert eos to newline during the inference.
embeddingMode Boolean
Whether to use embedding mode. (embedding) Note that if this is set to true, The LLamaModel won't produce text response anymore.
ropeFrequencyBase Single
RoPE base frequency.
ropeFrequencyScale Single
RoPE frequency scaling factor
mulMatQ Boolean
Use experimental mul_mat_q kernels
encoding String
The encoding to use to convert text for the model
Methods
ToString()
public string ToString()
Returns
PrintMembers(StringBuilder)
protected bool PrintMembers(StringBuilder builder)
Parameters
builder StringBuilder
Returns
GetHashCode()
public int GetHashCode()
Returns
Equals(Object)
public bool Equals(object obj)
Parameters
obj Object
Returns
Equals(ModelParams)
public bool Equals(ModelParams other)
Parameters
other ModelParams
Returns
<Clone>$()
public ModelParams <Clone>$()