mirror of
https://github.com/oobabooga/text-generation-webui.git
synced 2026-04-06 07:03:37 +00:00
Add ik_llama.cpp support via --ik flag
This commit is contained in:
parent
e154140021
commit
4cbea02ed4
2 changed files with 38 additions and 0 deletions
|
|
@ -470,6 +470,10 @@ class LlamaServer:
|
|||
else:
|
||||
cmd.append(f"--{flag_item}")
|
||||
|
||||
# Patch flags for ik_llama.cpp compatibility
|
||||
if shared.args.ik:
|
||||
cmd = _patch_cmd_for_ik(cmd)
|
||||
|
||||
env = os.environ.copy()
|
||||
if os.name == 'posix':
|
||||
current_path = env.get('LD_LIBRARY_PATH', '')
|
||||
|
|
@ -607,3 +611,36 @@ def filter_stderr_with_progress(process_stderr):
|
|||
process_stderr.close()
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
|
||||
def _patch_cmd_for_ik(cmd):
|
||||
"""
|
||||
Rewrite upstream llama.cpp flags to ik_llama.cpp equivalents:
|
||||
--no-webui → --webui none
|
||||
--fit off → (removed)
|
||||
--fit on / --fit-ctx → --fit (bare flag)
|
||||
--fit-target → --fit-margin
|
||||
"""
|
||||
patched = []
|
||||
i = 0
|
||||
while i < len(cmd):
|
||||
arg = cmd[i]
|
||||
|
||||
if arg == "--no-webui":
|
||||
patched += ["--webui", "none"]
|
||||
elif arg == "--fit" and i + 1 < len(cmd) and cmd[i + 1] in ("on", "off"):
|
||||
val = cmd[i + 1]
|
||||
i += 1
|
||||
if val == "on":
|
||||
patched.append("--fit")
|
||||
# "off" → drop entirely
|
||||
elif arg == "--fit-ctx":
|
||||
i += 1 # skip the value
|
||||
elif arg == "--fit-target":
|
||||
patched.append("--fit-margin")
|
||||
else:
|
||||
patched.append(arg)
|
||||
|
||||
i += 1
|
||||
|
||||
return patched
|
||||
|
|
|
|||
|
|
@ -110,6 +110,7 @@ group.add_argument('--numa', action='store_true', help='Activate NUMA task alloc
|
|||
group.add_argument('--parallel', type=int, default=1, help='Number of parallel request slots. The context size is divided equally among slots. For example, to have 4 slots with 8192 context each, set ctx_size to 32768.')
|
||||
group.add_argument('--fit-target', type=str, default='512', help='Target VRAM margin per device for auto GPU layers, comma-separated list of values in MiB. A single value is broadcast across all devices.')
|
||||
group.add_argument('--extra-flags', type=str, default=None, help='Extra flags to pass to llama-server. Example: "--jinja --rpc 192.168.1.100:50052"')
|
||||
group.add_argument('--ik', action='store_true', help='Use ik_llama.cpp instead of upstream llama.cpp. To install: build ik_llama.cpp, then delete all files inside <venv>/lib/pythonX.Y/site-packages/llama_cpp_binaries/bin/ and copy or symlink the ik_llama.cpp build outputs into that folder.')
|
||||
|
||||
# Transformers/Accelerate
|
||||
group = parser.add_argument_group('Transformers/Accelerate')
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue