Add --chat-template-file flag to override the default instruction template for API requests

Matches llama.cpp's flag name. Supports .jinja, .jinja2, and .yaml files. Priority: per-request params > --chat-template-file > model's built-in template.
2026-04-08 16:13:41 +00:00 · 2026-03-06 13:33:24 -03:00 · 2026-03-06 13:33:24 -03:00 · f5acf55207
commit f5acf55207
parent 3531069824
3 changed files with 26 additions and 3 deletions
--- a/extensions/openai/completions.py
+++ b/extensions/openai/completions.py
@ -1,9 +1,12 @@
 import copy
+import functools
 import json
 import time
 from collections import deque
+from pathlib import Path

 import tiktoken
+import yaml
 from pydantic import ValidationError

 from extensions.openai.errors import InvalidRequestError
@ -22,6 +25,18 @@ from modules.presets import load_preset_memoized
 from modules.text_generation import decode, encode, generate_reply


+@functools.cache
+def load_chat_template_file(filepath):
+    """Load a chat template from a file path (.jinja, .jinja2, or .yaml/.yml)."""
+    filepath = Path(filepath)
+    ext = filepath.suffix.lower()
+    text = filepath.read_text(encoding='utf-8')
+    if ext in ['.yaml', '.yml']:
+        data = yaml.safe_load(text)
+        return data.get('instruction_template', '')
+    return text
+
+
 def convert_logprobs_to_tiktoken(model, logprobs):
    # more problems than it's worth.
    # try:
@ -234,6 +249,8 @@ def chat_completions_common(body: dict, is_legacy: bool = False, stream=False, p
        instruction_template = body['instruction_template']
        instruction_template = "Alpaca" if instruction_template == "None" else instruction_template
        instruction_template_str = load_instruction_template_memoized(instruction_template)
+    elif shared.args.chat_template_file:
+        instruction_template_str = load_chat_template_file(shared.args.chat_template_file)
    else:
        instruction_template_str = shared.settings['instruction_template_str']