mirror of
https://github.com/oobabooga/text-generation-webui.git
synced 2026-04-05 06:35:15 +00:00
Auto-adjust GPU layers on context size and cache type changes + many fixes
This commit is contained in:
parent
93e1850a2c
commit
4925c307cf
3 changed files with 109 additions and 38 deletions
23
server.py
23
server.py
|
|
@ -49,10 +49,9 @@ from modules.extensions import apply_extensions
|
|||
from modules.LoRA import add_lora_to_model
|
||||
from modules.models import load_model, unload_model_if_idle
|
||||
from modules.models_settings import (
|
||||
estimate_vram,
|
||||
get_fallback_settings,
|
||||
get_model_metadata,
|
||||
get_nvidia_free_vram,
|
||||
update_gpu_layers_and_vram,
|
||||
update_model_parameters
|
||||
)
|
||||
from modules.shared import do_cmd_flags_warnings
|
||||
|
|
@ -250,15 +249,19 @@ if __name__ == "__main__":
|
|||
model_settings = get_model_metadata(model_name)
|
||||
update_model_parameters(model_settings, initial=True) # hijack the command-line arguments
|
||||
|
||||
if 'gpu_layers' not in shared.provided_arguments:
|
||||
available_vram = get_nvidia_free_vram()
|
||||
if available_vram > 0:
|
||||
n_layers = model_settings['gpu_layers']
|
||||
tolerance = 906
|
||||
while n_layers > 0 and estimate_vram(model_name, n_layers, shared.args.ctx_size, shared.args.cache_type) > available_vram - tolerance:
|
||||
n_layers -= 1
|
||||
# Auto-adjust GPU layers if not provided by user and it's a llama.cpp model
|
||||
if 'gpu_layers' not in shared.provided_arguments and shared.args.loader == 'llama.cpp' and 'gpu_layers' in model_settings:
|
||||
vram_usage, adjusted_layers = update_gpu_layers_and_vram(
|
||||
shared.args.loader,
|
||||
model_name,
|
||||
model_settings['gpu_layers'],
|
||||
shared.args.ctx_size,
|
||||
shared.args.cache_type,
|
||||
auto_adjust=True,
|
||||
for_ui=False
|
||||
)
|
||||
|
||||
shared.args.gpu_layers = n_layers
|
||||
shared.args.gpu_layers = adjusted_layers
|
||||
|
||||
# Load the model
|
||||
shared.model, shared.tokenizer = load_model(model_name)
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue