mirror of
https://github.com/oobabooga/text-generation-webui.git
synced 2026-04-10 00:53:39 +00:00
Remove obsolete DeepSpeed inference code (2023 relic)
This commit is contained in:
parent
942ff8fcb4
commit
387cf9d8df
7 changed files with 2 additions and 165 deletions
|
|
@ -1,4 +1,3 @@
|
|||
import os
|
||||
import pprint
|
||||
from pathlib import Path
|
||||
|
||||
|
|
@ -6,11 +5,7 @@ import torch
|
|||
import torch.nn.functional as F
|
||||
import transformers
|
||||
from accelerate import infer_auto_device_map, init_empty_weights
|
||||
from accelerate.utils import (
|
||||
is_ccl_available,
|
||||
is_npu_available,
|
||||
is_xpu_available
|
||||
)
|
||||
from accelerate.utils import is_xpu_available
|
||||
from transformers import (
|
||||
AutoConfig,
|
||||
AutoModel,
|
||||
|
|
@ -28,31 +23,6 @@ from modules.torch_utils import get_device
|
|||
|
||||
transformers.logging.set_verbosity_error()
|
||||
|
||||
local_rank = None
|
||||
if shared.args.deepspeed:
|
||||
import deepspeed
|
||||
from transformers.integrations.deepspeed import (
|
||||
HfDeepSpeedConfig,
|
||||
is_deepspeed_zero3_enabled
|
||||
)
|
||||
|
||||
from modules.deepspeed_parameters import generate_ds_config
|
||||
|
||||
# Distributed setup
|
||||
local_rank = shared.args.local_rank if shared.args.local_rank is not None else int(os.getenv("LOCAL_RANK", "0"))
|
||||
world_size = int(os.getenv("WORLD_SIZE", "1"))
|
||||
if is_xpu_available() and is_ccl_available():
|
||||
torch.xpu.set_device(local_rank)
|
||||
deepspeed.init_distributed(backend="ccl")
|
||||
elif is_npu_available():
|
||||
torch.npu.set_device(local_rank)
|
||||
deepspeed.init_distributed(dist_backend="hccl")
|
||||
else:
|
||||
torch.cuda.set_device(local_rank)
|
||||
deepspeed.init_distributed()
|
||||
ds_config = generate_ds_config(shared.args.bf16, 1 * world_size, shared.args.nvme_offload_dir)
|
||||
dschf = HfDeepSpeedConfig(ds_config) # Keep this object alive for the Transformers integration
|
||||
|
||||
|
||||
class _StopEverythingStoppingCriteria(transformers.StoppingCriteria):
|
||||
def __init__(self):
|
||||
|
|
@ -163,7 +133,6 @@ def load_model_HF(model_name):
|
|||
shared.args.load_in_8bit,
|
||||
shared.args.load_in_4bit,
|
||||
shared.args.disk,
|
||||
shared.args.deepspeed,
|
||||
shared.args.cpu_memory is not None,
|
||||
shared.args.compress_pos_emb > 1,
|
||||
shared.args.alpha_value > 1,
|
||||
|
|
@ -183,25 +152,6 @@ def load_model_HF(model_name):
|
|||
if device:
|
||||
model = model.to(device)
|
||||
|
||||
# DeepSpeed ZeRO-3
|
||||
elif shared.args.deepspeed:
|
||||
model = LoaderClass.from_pretrained(
|
||||
path_to_model,
|
||||
torch_dtype=params['torch_dtype'],
|
||||
trust_remote_code=params.get('trust_remote_code')
|
||||
)
|
||||
|
||||
model = deepspeed.initialize(
|
||||
model=model,
|
||||
config_params=ds_config,
|
||||
model_parameters=None,
|
||||
optimizer=None,
|
||||
lr_scheduler=None
|
||||
)[0]
|
||||
|
||||
model.module.eval() # Inference
|
||||
logger.info(f'DeepSpeed ZeRO-3 is enabled: {is_deepspeed_zero3_enabled()}')
|
||||
|
||||
# Load with quantization and/or offloading
|
||||
else:
|
||||
if not any((shared.args.cpu, torch.cuda.is_available(), is_xpu_available(), torch.backends.mps.is_available())):
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue