llama.cpp: allow ctx_size=0 for auto context via --fit

This commit is contained in:
oobabooga 2026-03-04 19:33:20 -08:00
parent fbfcd59fe0
commit 69fa4dd0b1
6 changed files with 13 additions and 9 deletions

View file

@ -54,7 +54,8 @@ def load_model(model_name, loader=None):
shared.settings.update({k: v for k, v in metadata.items() if k in shared.settings})
if loader.lower().startswith('exllama') or loader.lower().startswith('tensorrt') or loader == 'llama.cpp':
shared.settings['truncation_length'] = shared.args.ctx_size
if shared.args.ctx_size > 0:
shared.settings['truncation_length'] = shared.args.ctx_size
shared.is_multimodal = False
if loader.lower() in ('exllamav3', 'llama.cpp') and hasattr(model, 'is_multimodal'):