2023-09-27 02:30:19 +02:00
"""
This file is responsible for the UI and how the application interracts with the rest of the system .
"""
import os
from pathlib import Path
# Point to where nltk will find the required data.
os . environ [ ' NLTK_DATA ' ] = str ( Path ( " extensions/superboogav2/nltk_data " ) . resolve ( ) )
import codecs
2024-03-07 19:03:18 +01:00
import textwrap
2025-02-18 02:38:15 +01:00
import docx
import pptx
import fitz
fitz . TOOLS . mupdf_display_errors ( False )
import pandas as pd
from odf . opendocument import load
from odf . draw import Page
2024-03-07 19:03:18 +01:00
2023-09-27 02:30:19 +02:00
import gradio as gr
import extensions . superboogav2 . parameters as parameters
from modules import shared
2024-03-07 19:03:18 +01:00
from modules . logging_colors import logger
2023-09-27 02:30:19 +02:00
2024-03-07 19:03:18 +01:00
from . api import APIManager
from . benchmark import benchmark
from . chat_handler import custom_generate_chat_prompt_internal
2023-09-27 02:30:19 +02:00
from . chromadb import make_collector
from . data_processor import process_and_add_to_collector
2024-03-07 19:03:18 +01:00
from . download_urls import feed_url_into_collector
2023-09-27 02:30:19 +02:00
from . notebook_handler import input_modifier_internal
2024-03-07 19:03:18 +01:00
from . optimize import optimize
from . utils import create_metadata_source
2023-09-27 02:30:19 +02:00
collector = None
api_manager = None
2024-03-07 19:03:18 +01:00
2023-09-27 02:30:19 +02:00
def setup ( ) :
global collector
global api_manager
collector = make_collector ( )
api_manager = APIManager ( collector )
if parameters . get_api_on ( ) :
api_manager . start_server ( parameters . get_api_port ( ) )
2024-03-07 19:03:18 +01:00
2023-09-27 02:30:19 +02:00
def _feed_data_into_collector ( corpus ) :
yield ' ### Processing data... '
process_and_add_to_collector ( corpus , collector , False , create_metadata_source ( ' direct-text ' ) )
yield ' ### Done. '
2025-02-18 02:38:15 +01:00
def _feed_file_into_collector ( files ) :
if not files :
logger . warning ( " No files selected. " )
return
def read_binary_file ( file_path ) :
try :
with open ( file_path , ' rb ' ) as f :
return f . read ( )
except Exception :
logger . error ( f " Failed to read { file_path } . " )
return None
def extract_with_utf8 ( text ) :
try :
return text . decode ( ' utf-8 ' )
except Exception :
return " "
def extract_with_fitz ( file_content ) :
try :
with fitz . open ( stream = file_content , filetype = None ) as doc :
num_pages = doc . page_count
text = " \n " . join ( block [ 4 ] for page in doc for block in page . get_text ( " blocks " ) if block [ 6 ] == 0 )
logger . info ( f " Extracted text from { num_pages } pages with fitz. " )
return text
except Exception :
return " "
def extract_with_docx ( file_path ) :
try :
paragraphs = docx . Document ( file_path ) . paragraphs
text = " \n " . join ( para . text for para in paragraphs )
logger . info ( f " Extracted text from { len ( paragraphs ) } paragraphs with docx. " )
return text
except Exception :
return " "
def extract_with_pptx ( file_path ) :
try :
slides = pptx . Presentation ( file_path ) . slides
text = " \n " . join (
shape . text for slide in slides for shape in slide . shapes if hasattr ( shape , " text " )
)
logger . info ( f " Extracted text from { len ( slides ) } slides with pptx. " )
return text
except Exception :
return " "
def extract_with_odf ( file_path ) :
if not file_path . endswith ( " .odp " ) :
return " "
try :
doc = load ( file_path )
text_content = [ ]
def extract_text ( element ) :
parts = [ ]
if hasattr ( element , " childNodes " ) :
for node in element . childNodes :
if node . nodeType == node . TEXT_NODE :
parts . append ( node . data )
else :
parts . append ( extract_text ( node ) )
return " " . join ( parts )
for slide in doc . getElementsByType ( Page ) :
slide_text = extract_text ( slide )
if slide_text . strip ( ) :
text_content . append ( slide_text . strip ( ) )
text = " \n " . join ( text_content )
logger . info ( f " Extracted text from { len ( doc . getElementsByType ( Page ) ) } slides with odf. " )
return text
except Exception as e :
logger . error ( f " Failed to extract text from { file_path } : { str ( e ) } " )
return " "
def extract_with_pandas ( file_path ) :
try :
df = pd . read_excel ( file_path )
text = " \n " . join ( str ( cell ) for col in df . columns for cell in df [ col ] )
logger . info ( f " Extracted text from { df . shape [ 0 ] } x { df . shape [ 1 ] } cells with pandas. " )
return text
except Exception :
return " "
for index , file in enumerate ( files , start = 1 ) :
file_name = os . path . basename ( file )
logger . info ( f " Processing { file_name } ... " )
file_content = read_binary_file ( file )
if not file_content :
continue
text_extractors = [
lambda : extract_with_utf8 ( file_content ) ,
lambda : extract_with_fitz ( file_content ) ,
lambda : extract_with_docx ( file ) ,
lambda : extract_with_pptx ( file ) ,
lambda : extract_with_odf ( file ) ,
lambda : extract_with_pandas ( file ) ,
]
for extractor in text_extractors :
text = extractor ( )
if text :
break
if not text :
logger . error ( f " Failed to extract text from { file_name } , unsupported format. " )
continue
process_and_add_to_collector ( text , collector , False , create_metadata_source ( f " file- { index } " ) )
logger . info ( " Done. " )
yield " ### Done. "
2023-09-27 02:30:19 +02:00
def _feed_url_into_collector ( urls ) :
for i in feed_url_into_collector ( urls , collector ) :
yield i
yield ' ### Done. '
def _begin_benchmark ( ) :
score , max_score = benchmark ( Path ( " extensions/superboogav2/benchmark_texts/questions.json " ) , collector )
return f ' **Score**: { score } / { max_score } '
def _begin_optimization ( progress = gr . Progress ( ) ) :
return optimize ( collector , progress ) , * _get_optimizable_settings ( )
def _clear_data ( ) :
collector . clear ( )
return " ### Data Cleared! "
def _get_optimizable_settings ( ) - > list :
preprocess_pipeline = [ ]
if parameters . should_to_lower ( ) :
preprocess_pipeline . append ( ' Lower Cases ' )
if parameters . should_remove_punctuation ( ) :
preprocess_pipeline . append ( ' Remove Punctuation ' )
if parameters . should_remove_specific_pos ( ) :
preprocess_pipeline . append ( ' Remove Adverbs ' )
if parameters . should_remove_stopwords ( ) :
preprocess_pipeline . append ( ' Remove Stop Words ' )
if parameters . should_lemmatize ( ) :
preprocess_pipeline . append ( ' Lemmatize ' )
if parameters . should_merge_spaces ( ) :
preprocess_pipeline . append ( ' Merge Spaces ' )
if parameters . should_strip ( ) :
preprocess_pipeline . append ( ' Strip Edges ' )
2024-03-07 19:03:18 +01:00
2023-09-27 02:30:19 +02:00
return [
parameters . get_time_power ( ) ,
parameters . get_time_steepness ( ) ,
parameters . get_significant_level ( ) ,
parameters . get_min_num_sentences ( ) ,
parameters . get_new_dist_strategy ( ) ,
parameters . get_delta_start ( ) ,
parameters . get_min_num_length ( ) ,
parameters . get_num_conversion_strategy ( ) ,
preprocess_pipeline ,
parameters . get_chunk_count ( ) ,
parameters . get_context_len ( ) ,
parameters . get_chunk_len ( )
]
2024-03-07 19:03:18 +01:00
def _apply_settings ( optimization_steps , time_power , time_steepness , significant_level , min_sentences , new_dist_strat , delta_start , min_number_length , num_conversion ,
2025-02-18 02:38:15 +01:00
preprocess_pipeline , api_port , api_on , injection_strategy , add_chat_to_data , manual , add_date_time , postfix , data_separator , prefix , max_token_count ,
2023-09-27 02:30:19 +02:00
chunk_count , chunk_sep , context_len , chunk_regex , chunk_len , threads , strong_cleanup ) :
logger . debug ( ' Applying settings. ' )
try :
parameters . set_optimization_steps ( optimization_steps )
parameters . set_significant_level ( significant_level )
parameters . set_min_num_sentences ( min_sentences )
parameters . set_new_dist_strategy ( new_dist_strat )
parameters . set_delta_start ( delta_start )
parameters . set_min_num_length ( min_number_length )
parameters . set_num_conversion_strategy ( num_conversion )
parameters . set_api_port ( api_port )
parameters . set_api_on ( api_on )
parameters . set_injection_strategy ( injection_strategy )
parameters . set_add_chat_to_data ( add_chat_to_data )
parameters . set_manual ( manual )
2025-02-18 02:38:15 +01:00
parameters . set_add_date_time ( add_date_time )
2023-09-27 02:30:19 +02:00
parameters . set_postfix ( codecs . decode ( postfix , ' unicode_escape ' ) )
parameters . set_data_separator ( codecs . decode ( data_separator , ' unicode_escape ' ) )
parameters . set_prefix ( codecs . decode ( prefix , ' unicode_escape ' ) )
parameters . set_max_token_count ( max_token_count )
parameters . set_time_power ( time_power )
parameters . set_time_steepness ( time_steepness )
parameters . set_chunk_count ( chunk_count )
parameters . set_chunk_separator ( codecs . decode ( chunk_sep , ' unicode_escape ' ) )
parameters . set_context_len ( context_len )
parameters . set_chunk_regex ( chunk_regex )
parameters . set_chunk_len ( chunk_len )
parameters . set_num_threads ( threads )
parameters . set_strong_cleanup ( strong_cleanup )
preprocess_choices = [ ' Lower Cases ' , ' Remove Punctuation ' , ' Remove Adverbs ' , ' Remove Stop Words ' , ' Lemmatize ' , ' Merge Spaces ' , ' Strip Edges ' ]
for preprocess_method in preprocess_choices :
if preprocess_method == ' Lower Cases ' :
parameters . set_to_lower ( preprocess_method in preprocess_pipeline )
elif preprocess_method == ' Remove Punctuation ' :
parameters . set_remove_punctuation ( preprocess_method in preprocess_pipeline )
elif preprocess_method == ' Remove Adverbs ' :
parameters . set_remove_specific_pos ( preprocess_method in preprocess_pipeline )
elif preprocess_method == ' Remove Stop Words ' :
parameters . set_remove_stopwords ( preprocess_method in preprocess_pipeline )
elif preprocess_method == ' Lemmatize ' :
parameters . set_lemmatize ( preprocess_method in preprocess_pipeline )
elif preprocess_method == ' Merge Spaces ' :
parameters . set_merge_spaces ( preprocess_method in preprocess_pipeline )
elif preprocess_method == ' Strip Edges ' :
parameters . set_strip ( preprocess_method in preprocess_pipeline )
# Based on API on/off, start or stop the server
if api_manager is not None :
if parameters . get_api_on ( ) and ( not api_manager . is_server_running ( ) ) :
api_manager . start_server ( parameters . get_api_port ( ) )
elif ( not parameters . get_api_on ( ) ) and api_manager . is_server_running ( ) :
api_manager . stop_server ( )
except Exception as e :
logger . warn ( f ' Could not properly apply settings: { str ( e ) } ' )
def custom_generate_chat_prompt ( user_input , state , * * kwargs ) :
return custom_generate_chat_prompt_internal ( user_input , state , collector , * * kwargs )
2023-12-31 06:03:23 +01:00
def input_modifier ( string , state , is_chat = False ) :
return input_modifier_internal ( string , collector , is_chat )
2023-09-27 02:30:19 +02:00
def ui ( ) :
with gr . Accordion ( " Click for more information... " , open = False ) :
gr . Markdown ( textwrap . dedent ( """
## About
This extension takes a dataset as input , breaks it into chunks , and adds the result to a local / offline Chroma database .
The database is then queried during inference time to get the excerpts that are closest to your input . The idea is to create an arbitrarily large pseudo context .
The core methodology was developed and contributed by kaiokendev , who is working on improvements to the method in this repository : https : / / github . com / kaiokendev / superbig
## Data input
Start by entering some data in the interface below and then clicking on " Load data " .
Each time you load some new data , the old chunks are discarded .
## Chat mode
#### Instruct
On each turn , the chunks will be compared to your current input and the most relevant matches will be appended to the input in the following format :
` ` `
Consider the excerpts below as additional context :
. . .
` ` `
The injection doesn ' t make it into the chat history. It is only used in the current generation.
#### Regular chat
The chunks from the external data sources are ignored , and the chroma database is built based on the chat history instead . The most relevant past exchanges relative to the present input are added to the context string . This way , the extension acts as a long term memory .
## Notebook/default modes
Your question must be manually specified between ` < | begin - user - input | > ` and ` < | end - user - input | > ` tags , and the injection point must be specified with ` < | injection - point | > ` .
The special tokens mentioned above ( ` < | begin - user - input | > ` , ` < | end - user - input | > ` , and ` < | injection - point | > ` ) are removed in the background before the text generation begins .
Here is an example in Vicuna 1.1 format :
` ` `
A chat between a curious user and an artificial intelligence assistant . The assistant gives helpful , detailed , and polite answers to the user ' s questions.
USER :
< | injection - point | >
< | begin - user - input | > What datasets are mentioned in the text above ? < | end - user - input | >
ASSISTANT :
` ` `
""" ))
with gr . Row ( ) :
with gr . Column ( min_width = 600 ) :
with gr . Tab ( " Text input " ) :
data_input = gr . Textbox ( lines = 20 , label = ' Input data ' )
update_data = gr . Button ( ' Load data ' )
with gr . Tab ( " URL input " ) :
url_input = gr . Textbox ( lines = 10 , label = ' Input URLs ' , info = ' Enter one or more URLs separated by newline characters. ' )
strong_cleanup = gr . Checkbox ( value = parameters . get_is_strong_cleanup ( ) , label = ' Strong cleanup ' , info = ' Only keeps html elements that look like long-form text. ' )
threads = gr . Number ( value = parameters . get_num_threads ( ) , label = ' Threads ' , info = ' The number of threads to use while downloading the URLs. ' , precision = 0 )
2025-02-18 02:38:15 +01:00
update_urls = gr . Button ( ' Load data ' )
2023-09-27 02:30:19 +02:00
with gr . Tab ( " File input " ) :
2025-02-18 02:38:15 +01:00
file_input = gr . File ( label = " Input file " , type = " filepath " , file_count = " multiple " )
update_files = gr . Button ( ' Load data ' )
2024-03-07 19:03:18 +01:00
2023-09-27 02:30:19 +02:00
with gr . Tab ( " Settings " ) :
with gr . Accordion ( " Processing settings " , open = True ) :
chunk_len = gr . Textbox ( value = parameters . get_chunk_len ( ) , label = ' Chunk length ' , info = ' In characters, not tokens. This value is used when you click on " Load data " . ' )
chunk_regex = gr . Textbox ( value = parameters . get_chunk_regex ( ) , label = ' Chunk regex ' , info = ' Will specifically add the captured text to the embeddings. ' )
context_len = gr . Textbox ( value = parameters . get_context_len ( ) , label = ' Context length ' , info = ' In characters, not tokens. How much context to load around each chunk. ' )
chunk_sep = gr . Textbox ( value = codecs . encode ( parameters . get_chunk_separator ( ) , ' unicode_escape ' ) . decode ( ) , label = ' Chunk separator ' , info = ' Used to manually split chunks. Manually split chunks longer than chunk length are split again. This value is used when you click on " Load data " . ' )
with gr . Accordion ( " Generation settings " , open = False ) :
chunk_count = gr . Number ( value = parameters . get_chunk_count ( ) , label = ' Chunk count ' , info = ' The number of closest-matching chunks to include in the prompt. ' )
max_token_count = gr . Number ( value = parameters . get_max_token_count ( ) , label = ' Max Context Tokens ' , info = ' The context length in tokens will not exceed this value. ' )
prefix = gr . Textbox ( value = codecs . encode ( parameters . get_prefix ( ) , ' unicode_escape ' ) . decode ( ) , label = ' Prefix ' , info = ' What to put before the injection point. ' )
data_separator = gr . Textbox ( value = codecs . encode ( parameters . get_data_separator ( ) , ' unicode_escape ' ) . decode ( ) , label = ' Data separator ' , info = ' When multiple pieces of distant data are added, they might be unrelated. It \' s important to separate them. ' )
postfix = gr . Textbox ( value = codecs . encode ( parameters . get_postfix ( ) , ' unicode_escape ' ) . decode ( ) , label = ' Postfix ' , info = ' What to put after the injection point. ' )
with gr . Row ( ) :
manual = gr . Checkbox ( value = parameters . get_is_manual ( ) , label = " Is Manual " , info = " Manually specify when to use ChromaDB. Insert `!c` at the start or end of the message to trigger a query. " , visible = shared . is_chat ( ) )
2025-02-18 02:38:15 +01:00
add_date_time = gr . Checkbox ( value = parameters . get_add_date_time ( ) , label = " Add date and time to Data " , info = " Make the current date and time available to the model. " , visible = shared . is_chat ( ) )
2023-09-27 02:30:19 +02:00
add_chat_to_data = gr . Checkbox ( value = parameters . get_add_chat_to_data ( ) , label = " Add Chat to Data " , info = " Automatically feed the chat history as you chat. " , visible = shared . is_chat ( ) )
injection_strategy = gr . Radio ( choices = [ parameters . PREPEND_TO_LAST , parameters . APPEND_TO_LAST , parameters . HIJACK_LAST_IN_CONTEXT ] , value = parameters . get_injection_strategy ( ) , label = ' Injection Strategy ' , info = ' Where to inject the messages in chat or instruct mode. ' , visible = shared . is_chat ( ) )
with gr . Row ( ) :
api_on = gr . Checkbox ( value = parameters . get_api_on ( ) , label = " Turn on API " , info = " Check this to turn on the API service. " )
api_port = gr . Number ( value = parameters . get_api_port ( ) , label = " API Port " , info = " The port on which the API service will run. " )
with gr . Accordion ( " Advanced settings " , open = False ) :
preprocess_set_choices = [ ]
if parameters . should_to_lower ( ) :
preprocess_set_choices . append ( ' Lower Cases ' )
if parameters . should_remove_punctuation ( ) :
preprocess_set_choices . append ( ' Remove Punctuation ' )
if parameters . should_remove_specific_pos ( ) :
preprocess_set_choices . append ( ' Remove Adverbs ' )
if parameters . should_remove_stopwords ( ) :
preprocess_set_choices . append ( ' Remove Stop Words ' )
if parameters . should_lemmatize ( ) :
preprocess_set_choices . append ( ' Lemmatize ' )
if parameters . should_merge_spaces ( ) :
preprocess_set_choices . append ( ' Merge Spaces ' )
if parameters . should_strip ( ) :
preprocess_set_choices . append ( ' Strip Edges ' )
preprocess_pipeline = gr . CheckboxGroup ( label = ' Preprocessing pipeline ' , choices = [
' Lower Cases ' ,
' Remove Punctuation ' ,
' Remove Adverbs ' ,
' Remove Stop Words ' ,
' Lemmatize ' ,
' Merge Spaces ' ,
' Strip Edges ' ,
] , value = preprocess_set_choices , interactive = True , info = ' How to preprocess the text before it is turned into an embedding. ' )
with gr . Row ( ) :
num_conversion = gr . Dropdown ( choices = [ parameters . NUM_TO_WORD_METHOD , parameters . NUM_TO_CHAR_METHOD , parameters . NUM_TO_CHAR_LONG_METHOD , ' None ' ] , value = parameters . get_num_conversion_strategy ( ) , label = " Number Conversion Method " , info = ' How to preprocess numbers before creating the embeddings. ' , interactive = True )
min_number_length = gr . Number ( value = parameters . get_min_num_length ( ) , label = ' Number Length Threshold ' , info = ' In digits. Only numbers that have at least that many digits will be converted. ' , interactive = True )
delta_start = gr . Number ( value = parameters . get_delta_start ( ) , label = ' Delta Start Index ' , info = ' If the system encounters two identical embeddings, and they both start within the same delta, then only the first will be considered. ' , interactive = True )
new_dist_strat = gr . Dropdown ( choices = [ parameters . DIST_MIN_STRATEGY , parameters . DIST_HARMONIC_STRATEGY , parameters . DIST_GEOMETRIC_STRATEGY , parameters . DIST_ARITHMETIC_STRATEGY ] , value = parameters . get_new_dist_strategy ( ) , label = " Distance Strategy " , info = ' When two embedding texts are merged, the distance of the new piece will be decided using one of these strategies. ' , interactive = True )
min_sentences = gr . Number ( value = parameters . get_min_num_sentences ( ) , label = ' Summary Threshold ' , info = ' In sentences. The minumum number of sentences to trigger text-rank summarization. ' , interactive = True )
significant_level = gr . Slider ( 0.8 , 2 , value = parameters . get_significant_level ( ) , label = ' Significant Level ' , info = ' Defines the cut-off for what is considered a " significant " distance relative to the median distance among the returned samples. ' , interactive = True )
time_steepness = gr . Slider ( 0.01 , 1.0 , value = parameters . get_time_steepness ( ) , label = ' Time Weighing Steepness ' , info = ' How differently two close excerpts are going to be weighed. ' )
time_power = gr . Slider ( 0.0 , 1.0 , value = parameters . get_time_power ( ) , label = ' Time Weighing Power ' , info = ' How influencial is the weighing. At 1.0, old entries won \' t be considered ' )
with gr . Tab ( " Benchmark " ) :
benchmark_button = gr . Button ( ' Benchmark ' )
optimize_button = gr . Button ( ' Optimize ' )
optimization_steps = gr . Number ( value = parameters . get_optimization_steps ( ) , label = ' Optimization Steps ' , info = ' For how many steps to optimize. ' , interactive = True )
clear_button = gr . Button ( ' ❌ Clear Data ' )
with gr . Column ( ) :
last_updated = gr . Markdown ( )
2024-03-07 19:03:18 +01:00
all_params = [ optimization_steps , time_power , time_steepness , significant_level , min_sentences , new_dist_strat , delta_start , min_number_length , num_conversion ,
2025-02-18 02:38:15 +01:00
preprocess_pipeline , api_port , api_on , injection_strategy , add_chat_to_data , manual , add_date_time , postfix , data_separator , prefix , max_token_count ,
2023-09-27 02:30:19 +02:00
chunk_count , chunk_sep , context_len , chunk_regex , chunk_len , threads , strong_cleanup ]
2024-03-07 19:03:18 +01:00
optimizable_params = [ time_power , time_steepness , significant_level , min_sentences , new_dist_strat , delta_start , min_number_length , num_conversion ,
preprocess_pipeline , chunk_count , context_len , chunk_len ]
2023-09-27 02:30:19 +02:00
update_data . click ( _feed_data_into_collector , [ data_input ] , last_updated , show_progress = False )
2025-02-18 02:38:15 +01:00
update_urls . click ( _feed_url_into_collector , [ url_input ] , last_updated , show_progress = False )
update_files . click ( _feed_file_into_collector , [ file_input ] , last_updated , show_progress = False )
2023-09-27 02:30:19 +02:00
benchmark_button . click ( _begin_benchmark , [ ] , last_updated , show_progress = True )
optimize_button . click ( _begin_optimization , [ ] , [ last_updated ] + optimizable_params , show_progress = True )
clear_button . click ( _clear_data , [ ] , last_updated , show_progress = False )
optimization_steps . input ( fn = _apply_settings , inputs = all_params , show_progress = False )
time_power . input ( fn = _apply_settings , inputs = all_params , show_progress = False )
time_steepness . input ( fn = _apply_settings , inputs = all_params , show_progress = False )
significant_level . input ( fn = _apply_settings , inputs = all_params , show_progress = False )
min_sentences . input ( fn = _apply_settings , inputs = all_params , show_progress = False )
new_dist_strat . input ( fn = _apply_settings , inputs = all_params , show_progress = False )
delta_start . input ( fn = _apply_settings , inputs = all_params , show_progress = False )
min_number_length . input ( fn = _apply_settings , inputs = all_params , show_progress = False )
num_conversion . input ( fn = _apply_settings , inputs = all_params , show_progress = False )
preprocess_pipeline . input ( fn = _apply_settings , inputs = all_params , show_progress = False )
api_port . input ( fn = _apply_settings , inputs = all_params , show_progress = False )
api_on . input ( fn = _apply_settings , inputs = all_params , show_progress = False )
injection_strategy . input ( fn = _apply_settings , inputs = all_params , show_progress = False )
add_chat_to_data . input ( fn = _apply_settings , inputs = all_params , show_progress = False )
2025-02-18 02:38:15 +01:00
add_date_time . input ( fn = _apply_settings , inputs = all_params , show_progress = False )
2023-09-27 02:30:19 +02:00
manual . input ( fn = _apply_settings , inputs = all_params , show_progress = False )
postfix . input ( fn = _apply_settings , inputs = all_params , show_progress = False )
data_separator . input ( fn = _apply_settings , inputs = all_params , show_progress = False )
prefix . input ( fn = _apply_settings , inputs = all_params , show_progress = False )
max_token_count . input ( fn = _apply_settings , inputs = all_params , show_progress = False )
chunk_count . input ( fn = _apply_settings , inputs = all_params , show_progress = False )
chunk_sep . input ( fn = _apply_settings , inputs = all_params , show_progress = False )
context_len . input ( fn = _apply_settings , inputs = all_params , show_progress = False )
chunk_regex . input ( fn = _apply_settings , inputs = all_params , show_progress = False )
chunk_len . input ( fn = _apply_settings , inputs = all_params , show_progress = False )
threads . input ( fn = _apply_settings , inputs = all_params , show_progress = False )
2024-03-07 19:03:18 +01:00
strong_cleanup . input ( fn = _apply_settings , inputs = all_params , show_progress = False )