Improve SuperboogaV2 with Date/Time Embeddings, GPU Support, and Multiple File Formats (#6748)

This commit is contained in:
Alireza Ghasemi 2025-02-18 02:38:15 +01:00 committed by GitHub
parent 12f6f7ba9f
commit 01f20d2d9f
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
8 changed files with 227 additions and 38 deletions

View file

@ -6,6 +6,7 @@ It will only include full words.
import bisect
import re
from datetime import datetime
import extensions.superboogav2.parameters as parameters
@ -154,6 +155,13 @@ def process_and_add_to_collector(corpus: str, collector: ChromaCollector, clear_
data_chunks_with_context = []
data_chunk_starting_indices = []
if parameters.get_add_date_time():
now = datetime.now()
date_time_chunk = f"Current time is {now.strftime('%H:%M:%S')}. Today is {now.strftime('%A')}. The current date is {now.strftime('%Y-%m-%d')}."
data_chunks.append(date_time_chunk)
data_chunks_with_context.append(date_time_chunk)
data_chunk_starting_indices.append(0)
# Handling chunk_regex
if parameters.get_chunk_regex():
if parameters.get_chunk_separator():