Show file sizes on "Get file list"

This commit is contained in:
oobabooga 2025-06-18 21:35:07 -07:00
parent 9bd114b5d7
commit 9c6913ad61
2 changed files with 50 additions and 7 deletions

View file

@ -82,6 +82,7 @@ class ModelDownloader:
links = []
sha256 = []
file_sizes = []
classifications = []
has_pytorch = False
has_pt = False
@ -118,8 +119,14 @@ class ModelDownloader:
is_tokenizer = re.match(r"(tokenizer|ice|spiece).*\.model", fname) or is_tiktoken
is_text = re.match(r".*\.(txt|json|py|md)", fname) or is_tokenizer
if any((is_pytorch, is_safetensors, is_pt, is_gguf, is_tokenizer, is_text)):
file_size = 0
if 'lfs' in dict[i]:
sha256.append([fname, dict[i]['lfs']['oid']])
file_size = dict[i]['lfs'].get('size', 0)
elif 'size' in dict[i]:
file_size = dict[i]['size']
file_sizes.append(file_size)
if is_text:
links.append(f"{base}/{model}/resolve/{branch}/{fname}")
@ -152,6 +159,7 @@ class ModelDownloader:
for i in range(len(classifications) - 1, -1, -1):
if classifications[i] in ['pytorch', 'pt', 'gguf']:
links.pop(i)
file_sizes.pop(i)
# For GGUF, try to download only the Q4_K_M if no specific file is specified.
if has_gguf and specific_file is None:
@ -164,13 +172,15 @@ class ModelDownloader:
for i in range(len(classifications) - 1, -1, -1):
if 'q4_k_m' not in links[i].lower():
links.pop(i)
file_sizes.pop(i)
else:
for i in range(len(classifications) - 1, -1, -1):
if links[i].lower().endswith('.gguf'):
links.pop(i)
file_sizes.pop(i)
is_llamacpp = has_gguf and specific_file is not None
return links, sha256, is_lora, is_llamacpp
return links, sha256, is_lora, is_llamacpp, file_sizes
def get_output_folder(self, model, branch, is_lora, is_llamacpp=False, model_dir=None):
if model_dir:
@ -396,7 +406,7 @@ if __name__ == '__main__':
sys.exit()
# Get the download links from Hugging Face
links, sha256, is_lora, is_llamacpp = downloader.get_download_links_from_huggingface(
links, sha256, is_lora, is_llamacpp, file_sizes = downloader.get_download_links_from_huggingface(
model, branch, text_only=args.text_only, specific_file=specific_file, exclude_pattern=exclude_pattern
)

View file

@ -1,4 +1,5 @@
import importlib
import math
import queue
import threading
import traceback
@ -244,7 +245,7 @@ def download_model_wrapper(repo_id, specific_file, progress=gr.Progress(), retur
model, branch = downloader.sanitize_model_and_branch_names(repo_id, None)
yield "Getting download links from Hugging Face..."
links, sha256, is_lora, is_llamacpp = downloader.get_download_links_from_huggingface(model, branch, text_only=False, specific_file=specific_file)
links, sha256, is_lora, is_llamacpp, file_sizes = downloader.get_download_links_from_huggingface(model, branch, text_only=False, specific_file=specific_file)
if not links:
yield "No files found to download for the given model/criteria."
@ -254,17 +255,33 @@ def download_model_wrapper(repo_id, specific_file, progress=gr.Progress(), retur
# Check for multiple GGUF files
gguf_files = [link for link in links if link.lower().endswith('.gguf')]
if len(gguf_files) > 1 and not specific_file:
# Sort GGUF files by size (ascending - smallest first)
gguf_data = []
for i, link in enumerate(links):
if link.lower().endswith('.gguf'):
file_size = file_sizes[i]
gguf_data.append((file_size, link))
gguf_data.sort(key=lambda x: x[0])
output = "Multiple GGUF files found. Please copy one of the following filenames to the 'File name' field:\n\n```\n"
for link in gguf_files:
output += f"{Path(link).name}\n"
for file_size, link in gguf_data:
size_str = format_file_size(file_size)
output += f"{size_str} - {Path(link).name}\n"
output += "```"
yield output
return
if return_links:
# Sort files by size (ascending - smallest first)
file_data = list(zip(file_sizes, links))
file_data.sort(key=lambda x: x[0])
output = "```\n"
for link in links:
output += f"{Path(link).name}" + "\n"
for file_size, link in file_data:
size_str = format_file_size(file_size)
output += f"{size_str} - {Path(link).name}\n"
output += "```"
yield output
@ -391,3 +408,19 @@ def handle_load_model_event_final(truncation_length, loader, state):
def handle_unload_model_click():
unload_model()
return "Model unloaded"
def format_file_size(size_bytes):
"""Convert bytes to human readable format with 2 decimal places for GB and above"""
if size_bytes == 0:
return "0 B"
size_names = ["B", "KB", "MB", "GB", "TB"]
i = int(math.floor(math.log(size_bytes, 1024)))
p = math.pow(1024, i)
s = size_bytes / p
if i >= 3: # GB or TB
return f"{s:.2f} {size_names[i]}"
else:
return f"{s:.1f} {size_names[i]}"