Skip to content

IModelParams

Namespace: LLama.Abstractions

The parameters for initializing a LLama model.

public interface IModelParams

Properties

ContextSize

Model context size (n_ctx)

public abstract int ContextSize { get; set; }

Property Value

Int32

MainGpu

the GPU that is used for scratch and small tensors

public abstract int MainGpu { get; set; }

Property Value

Int32

LowVram

if true, reduce VRAM usage at the cost of performance

public abstract bool LowVram { get; set; }

Property Value

Boolean

GpuLayerCount

Number of layers to run in VRAM / GPU memory (n_gpu_layers)

public abstract int GpuLayerCount { get; set; }

Property Value

Int32

Seed

Seed for the random number generator (seed)

public abstract int Seed { get; set; }

Property Value

Int32

UseFp16Memory

Use f16 instead of f32 for memory kv (memory_f16)

public abstract bool UseFp16Memory { get; set; }

Property Value

Boolean

UseMemorymap

Use mmap for faster loads (use_mmap)

public abstract bool UseMemorymap { get; set; }

Property Value

Boolean

UseMemoryLock

Use mlock to keep model in memory (use_mlock)

public abstract bool UseMemoryLock { get; set; }

Property Value

Boolean

Perplexity

Compute perplexity over the prompt (perplexity)

public abstract bool Perplexity { get; set; }

Property Value

Boolean

ModelPath

Model path (model)

public abstract string ModelPath { get; set; }

Property Value

String

ModelAlias

model alias

public abstract string ModelAlias { get; set; }

Property Value

String

LoraAdapter

lora adapter path (lora_adapter)

public abstract string LoraAdapter { get; set; }

Property Value

String

LoraBase

base model path for the lora adapter (lora_base)

public abstract string LoraBase { get; set; }

Property Value

String

Threads

Number of threads (-1 = autodetect) (n_threads)

public abstract int Threads { get; set; }

Property Value

Int32

BatchSize

batch size for prompt processing (must be >=32 to use BLAS) (n_batch)

public abstract int BatchSize { get; set; }

Property Value

Int32

ConvertEosToNewLine

Whether to convert eos to newline during the inference.

public abstract bool ConvertEosToNewLine { get; set; }

Property Value

Boolean

EmbeddingMode

Whether to use embedding mode. (embedding) Note that if this is set to true, The LLamaModel won't produce text response anymore.

public abstract bool EmbeddingMode { get; set; }

Property Value

Boolean

TensorSplits

how split tensors should be distributed across GPUs

public abstract Single[] TensorSplits { get; set; }

Property Value

Single[]

RopeFrequencyBase

RoPE base frequency

public abstract float RopeFrequencyBase { get; set; }

Property Value

Single

RopeFrequencyScale

RoPE frequency scaling factor

public abstract float RopeFrequencyScale { get; set; }

Property Value

Single

MulMatQ

Use experimental mul_mat_q kernels

public abstract bool MulMatQ { get; set; }

Property Value

Boolean

Encoding

The encoding to use for models

public abstract Encoding Encoding { get; set; }

Property Value

Encoding