Add TensorRT-LLM support (#5715)

This commit is contained in:
oobabooga 2024-06-24 02:30:03 -03:00 committed by GitHub
parent 536f8d58d4
commit 577a8cd3ee
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
9 changed files with 197 additions and 4 deletions

View file

@ -81,6 +81,9 @@ def get_model_metadata(model):
# Transformers metadata
if hf_metadata is not None:
metadata = json.loads(open(path, 'r', encoding='utf-8').read())
if 'pretrained_config' in metadata:
metadata = metadata['pretrained_config']
for k in ['max_position_embeddings', 'model_max_length', 'max_seq_len']:
if k in metadata:
model_settings['truncation_length'] = metadata[k]