mirror of
https://github.com/oobabooga/text-generation-webui.git
synced 2026-04-09 08:33:41 +00:00
Delegate GPU layer allocation to llama.cpp's --fit
This commit is contained in:
parent
8a3d866401
commit
f4d787ab8d
5 changed files with 26 additions and 145 deletions
15
server.py
15
server.py
|
|
@ -62,7 +62,6 @@ from modules.models import load_model, unload_model_if_idle
|
|||
from modules.models_settings import (
|
||||
get_fallback_settings,
|
||||
get_model_metadata,
|
||||
update_gpu_layers_and_vram,
|
||||
update_model_parameters
|
||||
)
|
||||
from modules.shared import do_cmd_flags_warnings
|
||||
|
|
@ -315,20 +314,6 @@ if __name__ == "__main__":
|
|||
model_settings = get_model_metadata(shared.model_name)
|
||||
update_model_parameters(model_settings, initial=True) # hijack the command-line arguments
|
||||
|
||||
# Auto-adjust GPU layers if not provided by user and it's a llama.cpp model
|
||||
if 'gpu_layers' not in shared.provided_arguments and shared.args.loader == 'llama.cpp' and 'gpu_layers' in model_settings:
|
||||
vram_usage, adjusted_layers = update_gpu_layers_and_vram(
|
||||
shared.args.loader,
|
||||
shared.model_name,
|
||||
model_settings['gpu_layers'],
|
||||
shared.args.ctx_size,
|
||||
shared.args.cache_type,
|
||||
auto_adjust=True,
|
||||
for_ui=False
|
||||
)
|
||||
|
||||
shared.args.gpu_layers = adjusted_layers
|
||||
|
||||
# Load the model
|
||||
shared.model, shared.tokenizer = load_model(shared.model_name)
|
||||
if shared.args.lora:
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue