Fix CUDA error on MPS backend during API request (#6572)

---------

Co-authored-by: oobabooga <oobabooga4@gmail.com>
This commit is contained in:
Petr Korolev 2025-01-02 06:06:11 +03:00 committed by GitHub
parent 979e1f1bd6
commit 13c033c745
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
5 changed files with 63 additions and 65 deletions

View file

@ -1,11 +1,8 @@
from pathlib import Path
import torch
from transformers import is_torch_xpu_available
import modules.shared as shared
from modules.logging_colors import logger
from modules.models import reload_model
from modules.models import get_device, reload_model
def add_lora_to_model(lora_names):
@ -132,14 +129,9 @@ def add_lora_transformers(lora_names):
if not shared.args.load_in_8bit and not shared.args.cpu:
shared.model.half()
if not hasattr(shared.model, "hf_device_map"):
if torch.backends.mps.is_available():
device = torch.device('mps')
device = get_device()
if device:
shared.model = shared.model.to(device)
elif is_torch_xpu_available():
device = torch.device("xpu:0")
shared.model = shared.model.to(device)
else:
shared.model = shared.model.cuda()
shared.lora_names = lora_names