# Parser copied from https://github.com/vladmandic/automatic
parser=argparse.ArgumentParser(description="Text generation web UI",conflict_handler='resolve',add_help=True,formatter_class=lambdaprog:argparse.HelpFormatter(prog,max_help_position=55,indent_increment=2,width=200))
group.add_argument('--multi-user',action='store_true',help='Multi-user mode. Chat histories are not saved or automatically loaded. Warning: this is likely not safe for sharing publicly.')
group.add_argument('--character',type=str,help='The name of the character to load in chat mode by default.')
group.add_argument('--model',type=str,help='Name of the model to load by default.')
group.add_argument('--lora',type=str,nargs='+',help='The list of LoRAs to load. If you want to load more than one LoRA, write the names separated by spaces.')
group.add_argument('--model-dir',type=str,default='models/',help='Path to directory with all the models.')
group.add_argument('--lora-dir',type=str,default='loras/',help='Path to directory with all the loras.')
group.add_argument('--settings',type=str,help='Load the default interface settings from this yaml file. See settings-template.yaml for an example. If you create a file called settings.yaml, this file will be loaded by default without the need to use the --settings flag.')
group.add_argument('--extensions',type=str,nargs='+',help='The list of extensions to load. If you want to load more than one extension, write the names separated by spaces.')
group.add_argument('--verbose',action='store_true',help='Print the prompts to the terminal.')
group.add_argument('--idle-timeout',type=int,default=0,help='Unload model after this many minutes of inactivity. It will be automatically reloaded when you try to use it again.')
group.add_argument('--loader',type=str,help='Choose the model loader manually, otherwise, it will get autodetected. Valid options: Transformers, llama.cpp, ExLlamav3_HF, ExLlamav2_HF, ExLlamav2, HQQ, TensorRT-LLM.')
group.add_argument('--disk',action='store_true',help='If the model is too large for your GPU(s) and CPU combined, send the remaining layers to the disk.')
group.add_argument('--disk-cache-dir',type=str,default='cache',help='Directory to save the disk cache to. Defaults to "cache".')
group.add_argument('--load-in-8bit',action='store_true',help='Load the model with 8-bit precision (using bitsandbytes).')
group.add_argument('--bf16',action='store_true',help='Load the model with bfloat16 precision. Requires NVIDIA Ampere GPU.')
group.add_argument('--no-cache',action='store_true',help='Set use_cache to False while generating text. This reduces VRAM usage slightly, but it comes at a performance cost.')
group.add_argument('--trust-remote-code',action='store_true',help='Set trust_remote_code=True while loading the model. Necessary for some models.')
group.add_argument('--force-safetensors',action='store_true',help='Set use_safetensors=True while loading the model. This prevents arbitrary code execution.')
group.add_argument('--no_use_fast',action='store_true',help='Set use_fast=False while loading the tokenizer (it\'s True by default). Use this if you have any problems related to use_fast.')
group.add_argument('--use_flash_attention_2',action='store_true',help='Set use_flash_attention_2=True while loading the model.')
group.add_argument('--tensor-split',type=str,default=None,help='Split the model across multiple GPUs. Comma-separated list of proportions. Example: 60,40.')
group.add_argument('--model-draft',type=str,default=None,help='Path to the draft model for speculative decoding.')
group.add_argument('--draft-max',type=int,default=4,help='Number of tokens to draft for speculative decoding.')
group.add_argument('--gpu-layers-draft',type=int,default=0,help='Number of layers to offload to the GPU for the draft model.')
group.add_argument('--device-draft',type=str,default=None,help='Comma-separated list of devices to use for offloading the draft model.')
group.add_argument('--ctx-size-draft',type=int,default=0,help='Size of the prompt context for the draft model. If 0, uses the same as the main model.')
group.add_argument('--autosplit',action='store_true',help='Autosplit the model tensors across the available GPUs. This causes --gpu-split to be ignored.')
group.add_argument('--cfg-cache',action='store_true',help='ExLlamav2_HF: Create an additional cache for CFG negative prompts. Necessary to use CFG with that loader.')
group.add_argument('--hqq-backend',type=str,default='PYTORCH_COMPILE',help='Backend for the HQQ loader. Valid options: PYTORCH, PYTORCH_COMPILE, ATEN.')
group.add_argument('--cpp-runner',action='store_true',help='Use the ModelRunnerCpp runner, which is faster than the default ModelRunner but doesn\'t support streaming yet.')
group.add_argument('--alpha_value',type=float,default=1,help='Positional embeddings alpha factor for NTK RoPE scaling. Use either this or compress_pos_emb, not both.')
group.add_argument('--rope_freq_base',type=int,default=0,help='If greater than 0, will be used instead of alpha_value. Those two are related by rope_freq_base = 10000 * alpha_value ^ (64 / 63).')
group.add_argument('--compress_pos_emb',type=int,default=1,help="Positional embeddings compression factor. Should be set to (context length) / (model\'s original context length). Equal to 1/rope_freq_scale.")
group.add_argument('--listen',action='store_true',help='Make the web UI reachable from your local network.')
group.add_argument('--listen-port',type=int,help='The listening port that the server will use.')
group.add_argument('--listen-host',type=str,help='The hostname that the server will use.')
group.add_argument('--share',action='store_true',help='Create a public URL. This is useful for running the web UI on Google Colab or similar.')
group.add_argument('--auto-launch',action='store_true',default=False,help='Open the web UI in the default browser upon launch.')
group.add_argument('--gradio-auth',type=str,help='Set Gradio authentication password in the format "username:password". Multiple credentials can also be supplied with "u1:p1,u2:p2,u3:p3".',default=None)
group.add_argument('--gradio-auth-path',type=str,help='Set the Gradio authentication file path. The file should contain one or more user:password pairs in the same format as above.',default=None)
group.add_argument('--ssl-keyfile',type=str,help='The path to the SSL certificate key file.',default=None)
group.add_argument('--ssl-certfile',type=str,help='The path to the SSL certificate cert file.',default=None)
group.add_argument('--admin-key',type=str,default='',help='API authentication key for admin tasks like loading and unloading models. If not set, will be the same as --api-key.')
logger.warning("\nYou are potentially exposing the web UI to the entire internet without any access password.\nYou can create one with the \"--gradio-auth\" flag like this:\n\n--gradio-auth username:password\n\nMake sure to replace username:password with your own.")
ifargs.multi_user:
logger.warning('\nThe multi-user mode is highly experimental and should not be shared publicly.')