mirror of
https://github.com/oobabooga/text-generation-webui.git
synced 2025-12-06 07:12:10 +01:00
Make --trust-remote-code immutable from the UI/API
This commit is contained in:
parent
efaf2aef3d
commit
b5a6904c4a
|
|
@ -45,7 +45,6 @@ loaders_and_params = OrderedDict({
|
||||||
'disk',
|
'disk',
|
||||||
'use_double_quant',
|
'use_double_quant',
|
||||||
'bf16',
|
'bf16',
|
||||||
'trust_remote_code',
|
|
||||||
'no_use_fast',
|
'no_use_fast',
|
||||||
],
|
],
|
||||||
'ExLlamav3_HF': [
|
'ExLlamav3_HF': [
|
||||||
|
|
@ -53,7 +52,6 @@ loaders_and_params = OrderedDict({
|
||||||
'cache_type',
|
'cache_type',
|
||||||
'gpu_split',
|
'gpu_split',
|
||||||
'cfg_cache',
|
'cfg_cache',
|
||||||
'trust_remote_code',
|
|
||||||
'no_use_fast',
|
'no_use_fast',
|
||||||
'enable_tp',
|
'enable_tp',
|
||||||
'tp_backend',
|
'tp_backend',
|
||||||
|
|
@ -82,7 +80,6 @@ loaders_and_params = OrderedDict({
|
||||||
'no_xformers',
|
'no_xformers',
|
||||||
'no_sdpa',
|
'no_sdpa',
|
||||||
'cfg_cache',
|
'cfg_cache',
|
||||||
'trust_remote_code',
|
|
||||||
'no_use_fast',
|
'no_use_fast',
|
||||||
],
|
],
|
||||||
'ExLlamav2': [
|
'ExLlamav2': [
|
||||||
|
|
|
||||||
|
|
@ -174,6 +174,7 @@ if cmd_flags_path.exists():
|
||||||
|
|
||||||
|
|
||||||
args = parser.parse_args()
|
args = parser.parse_args()
|
||||||
|
original_args = copy.deepcopy(args)
|
||||||
args_defaults = parser.parse_args([])
|
args_defaults = parser.parse_args([])
|
||||||
|
|
||||||
# Create a mapping of all argument aliases to their canonical names
|
# Create a mapping of all argument aliases to their canonical names
|
||||||
|
|
@ -295,7 +296,13 @@ default_settings = copy.deepcopy(settings)
|
||||||
def do_cmd_flags_warnings():
|
def do_cmd_flags_warnings():
|
||||||
# Security warnings
|
# Security warnings
|
||||||
if args.trust_remote_code:
|
if args.trust_remote_code:
|
||||||
logger.warning('trust_remote_code is enabled. This is dangerous.')
|
logger.warning(
|
||||||
|
"The `--trust-remote-code` flag is enabled.\n"
|
||||||
|
"This allows models to execute arbitrary code on your machine.\n\n"
|
||||||
|
"1. Only use with models from sources you fully trust.\n"
|
||||||
|
"2. Set an access password with `--gradio-auth`."
|
||||||
|
)
|
||||||
|
|
||||||
if 'COLAB_GPU' not in os.environ and not args.nowebui:
|
if 'COLAB_GPU' not in os.environ and not args.nowebui:
|
||||||
if args.share:
|
if args.share:
|
||||||
logger.warning("The gradio \"share link\" feature uses a proprietary executable to create a reverse tunnel. Use it with care.")
|
logger.warning("The gradio \"share link\" feature uses a proprietary executable to create a reverse tunnel. Use it with care.")
|
||||||
|
|
|
||||||
|
|
@ -123,7 +123,7 @@ def load_tokenizer(model_name, tokenizer_dir=None):
|
||||||
|
|
||||||
tokenizer = AutoTokenizer.from_pretrained(
|
tokenizer = AutoTokenizer.from_pretrained(
|
||||||
path_to_model,
|
path_to_model,
|
||||||
trust_remote_code=shared.args.trust_remote_code,
|
trust_remote_code=shared.original_args.trust_remote_code,
|
||||||
use_fast=not shared.args.no_use_fast
|
use_fast=not shared.args.no_use_fast
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
@ -140,13 +140,13 @@ def load_model_HF(model_name):
|
||||||
'torch_dtype': torch.bfloat16 if shared.args.bf16 else torch.float16,
|
'torch_dtype': torch.bfloat16 if shared.args.bf16 else torch.float16,
|
||||||
}
|
}
|
||||||
|
|
||||||
if shared.args.trust_remote_code:
|
if shared.original_args.trust_remote_code:
|
||||||
params['trust_remote_code'] = True
|
params['trust_remote_code'] = True
|
||||||
|
|
||||||
if shared.args.force_safetensors:
|
if shared.args.force_safetensors:
|
||||||
params['force_safetensors'] = True
|
params['force_safetensors'] = True
|
||||||
|
|
||||||
config = AutoConfig.from_pretrained(path_to_model, trust_remote_code=shared.args.trust_remote_code)
|
config = AutoConfig.from_pretrained(path_to_model, trust_remote_code=shared.original_args.trust_remote_code)
|
||||||
|
|
||||||
if 'chatglm' in model_name.lower():
|
if 'chatglm' in model_name.lower():
|
||||||
LoaderClass = AutoModel
|
LoaderClass = AutoModel
|
||||||
|
|
|
||||||
|
|
@ -160,7 +160,6 @@ def list_model_elements():
|
||||||
'no_sdpa',
|
'no_sdpa',
|
||||||
'cfg_cache',
|
'cfg_cache',
|
||||||
'cpp_runner',
|
'cpp_runner',
|
||||||
'trust_remote_code',
|
|
||||||
'no_use_fast',
|
'no_use_fast',
|
||||||
'model_draft',
|
'model_draft',
|
||||||
'draft_max',
|
'draft_max',
|
||||||
|
|
|
||||||
|
|
@ -57,7 +57,6 @@ def create_ui():
|
||||||
shared.gradio['autosplit'] = gr.Checkbox(label="autosplit", value=shared.args.autosplit, info='Automatically split the model tensors across the available GPUs.')
|
shared.gradio['autosplit'] = gr.Checkbox(label="autosplit", value=shared.args.autosplit, info='Automatically split the model tensors across the available GPUs.')
|
||||||
shared.gradio['enable_tp'] = gr.Checkbox(label="enable_tp", value=shared.args.enable_tp, info='Enable tensor parallelism (TP).')
|
shared.gradio['enable_tp'] = gr.Checkbox(label="enable_tp", value=shared.args.enable_tp, info='Enable tensor parallelism (TP).')
|
||||||
shared.gradio['cpp_runner'] = gr.Checkbox(label="cpp-runner", value=shared.args.cpp_runner, info='Enable inference with ModelRunnerCpp, which is faster than the default ModelRunner.')
|
shared.gradio['cpp_runner'] = gr.Checkbox(label="cpp-runner", value=shared.args.cpp_runner, info='Enable inference with ModelRunnerCpp, which is faster than the default ModelRunner.')
|
||||||
shared.gradio['trust_remote_code'] = gr.Checkbox(label="trust-remote-code", value=shared.args.trust_remote_code, info='Set trust_remote_code=True while loading the tokenizer/model. To enable this option, start the web UI with the --trust-remote-code flag.', interactive=shared.args.trust_remote_code)
|
|
||||||
shared.gradio['tensorrt_llm_info'] = gr.Markdown('* TensorRT-LLM has to be installed manually in a separate Python 3.10 environment at the moment. For a guide, consult the description of [this PR](https://github.com/oobabooga/text-generation-webui/pull/5715). \n\n* `ctx_size` is only used when `cpp-runner` is checked.\n\n* `cpp_runner` does not support streaming at the moment.')
|
shared.gradio['tensorrt_llm_info'] = gr.Markdown('* TensorRT-LLM has to be installed manually in a separate Python 3.10 environment at the moment. For a guide, consult the description of [this PR](https://github.com/oobabooga/text-generation-webui/pull/5715). \n\n* `ctx_size` is only used when `cpp-runner` is checked.\n\n* `cpp_runner` does not support streaming at the moment.')
|
||||||
|
|
||||||
# Multimodal
|
# Multimodal
|
||||||
|
|
|
||||||
Loading…
Reference in a new issue