llama.cpp: Simplify the prompt processing progress indicator

The progress bar was unreliable
This commit is contained in:
oobabooga 2025-04-26 17:33:47 -07:00
parent 4ff91b6588
commit 234aba1c50

View file

@ -11,7 +11,6 @@ from pathlib import Path
import llama_cpp_binaries
import requests
from tqdm import tqdm
from modules import shared
from modules.logging_colors import logger
@ -391,31 +390,15 @@ class LlamaServer:
def filter_stderr_with_progress(process_stderr):
progress_bar = None
progress_pattern = re.compile(r'slot update_slots: id.*progress = (\d+\.\d+)')
try:
for line in iter(process_stderr.readline, ''):
progress_match = progress_pattern.search(line)
if progress_match:
progress = float(progress_match.group(1))
# Create progress bar on first progress message
if progress_bar is None:
progress_bar = tqdm(total=1.0, desc="Prompt Processing", leave=False)
progress_bar.update(progress - progress_bar.n)
# Clean up when complete
if progress >= 1.0:
progress_bar.close()
progress_bar = None
if not line.startswith(('srv ', 'slot ')) and 'log_server_r: request: GET /health' not in line:
sys.stderr.write(line)
sys.stderr.flush()
elif not line.startswith(('srv ', 'slot ')) and 'log_server_r: request: GET /health' not in line:
sys.stderr.write(line)
sys.stderr.flush()
except (ValueError, IOError):
if progress_bar:
progress_bar.close()
pass