Skip to content

Chinese LLM - with GB2312 encoding

using System.Text;
using LLama.Common;

namespace LLama.Examples.Examples;

// This example shows how to deal with Chinese input with gb2312 encoding.
public class ChatChineseGB2312
{
    private static string ConvertEncoding(string input, Encoding original, Encoding target)
    {
        byte[] bytes = original.GetBytes(input);
        var convertedBytes = Encoding.Convert(original, target, bytes);
        return target.GetString(convertedBytes);
    }

    public static async Task Run()
    {
        // Register provider for GB2312 encoding
        Encoding.RegisterProvider(CodePagesEncodingProvider.Instance);

        Console.ForegroundColor = ConsoleColor.Yellow;
        Console.WriteLine("This example shows how to use Chinese with gb2312 encoding, which is common in windows. It's recommended" +
            " to use https://huggingface.co/hfl/chinese-alpaca-2-7b-gguf/blob/main/ggml-model-q5_0.gguf, which has been verified by LLamaSharp developers.");
        Console.ForegroundColor = ConsoleColor.White;

        string modelPath = UserSettings.GetModelPath();

        var parameters = new ModelParams(modelPath)
        {
            ContextSize = 1024,
            Seed = 1337,
            GpuLayerCount = 5,
            Encoding = Encoding.UTF8
        };
        using var model = LLamaWeights.LoadFromFile(parameters);
        using var context = model.CreateContext(parameters);
        var executor = new InteractiveExecutor(context);

        ChatSession session;
        if (Directory.Exists("Assets/chat-with-kunkun-chinese"))
        {
            Console.ForegroundColor = ConsoleColor.Yellow;
            Console.WriteLine("Loading session from disk.");
            Console.ForegroundColor = ConsoleColor.White;

            session = new ChatSession(executor);
            session.LoadSession("Assets/chat-with-kunkun-chinese");
        }
        else
        {
            var chatHistoryJson = File.ReadAllText("Assets/chat-with-kunkun-chinese.json");
            ChatHistory chatHistory = ChatHistory.FromJson(chatHistoryJson) ?? new ChatHistory();

            session = new ChatSession(executor, chatHistory);
        }

        session
            .WithHistoryTransform(new LLamaTransforms.DefaultHistoryTransform("用户", "坤坤"));

        InferenceParams inferenceParams = new InferenceParams()
        {
            Temperature = 0.9f,
            AntiPrompts = new List<string> { "用户:" }
        };

        Console.ForegroundColor = ConsoleColor.Yellow;
        Console.WriteLine("The chat session has started.");

        // show the prompt
        Console.ForegroundColor = ConsoleColor.White;
        Console.Write("用户:");
        Console.ForegroundColor = ConsoleColor.Green;
        string userInput = Console.ReadLine() ?? "";

        while (userInput != "exit")
        {
            // Convert the encoding from gb2312 to utf8 for the language model
            // and later saving to the history json file.
            userInput = ConvertEncoding(userInput, Encoding.GetEncoding("gb2312"), Encoding.UTF8);

            if (userInput == "save")
            {
                session.SaveSession("Assets/chat-with-kunkun-chinese");
                Console.ForegroundColor = ConsoleColor.Yellow;
                Console.WriteLine("Session saved.");
            }
            else if (userInput == "regenerate")
            {
                Console.ForegroundColor = ConsoleColor.Yellow;
                Console.WriteLine("Regenerating last response ...");

                await foreach (
                    var text
                    in session.RegenerateAssistantMessageAsync(
                        inferenceParams))
                {
                    Console.ForegroundColor = ConsoleColor.White;

                    // Convert the encoding from utf8 to gb2312 for the console output.
                    Console.Write(ConvertEncoding(text, Encoding.UTF8, Encoding.GetEncoding("gb2312")));
                }
            }
            else
            {
                await foreach (
                    var text
                    in session.ChatAsync(
                        new ChatHistory.Message(AuthorRole.User, userInput),
                        inferenceParams))
                {
                    Console.ForegroundColor = ConsoleColor.White;
                    Console.Write(text);
                }
            }

            Console.ForegroundColor = ConsoleColor.Green;
            userInput = Console.ReadLine() ?? "";

            Console.ForegroundColor = ConsoleColor.White;
        }
    }
}