mirror of
https://github.com/oobabooga/text-generation-webui.git
synced 2026-04-20 22:13:43 +00:00
Replace PyPDF2 with pymupdf for PDF text extraction
pymupdf produces cleaner text (e.g. no concatenated words in headers), handles encrypted and malformed PDFs that PyPDF2 failed on, and supports non-Latin scripts.
This commit is contained in:
parent
f4d787ab8d
commit
f010aa1612
15 changed files with 18 additions and 20 deletions
|
|
@ -17,7 +17,7 @@ peft==0.18.*
|
|||
Pillow>=9.5.0
|
||||
psutil
|
||||
pydantic==2.11.0
|
||||
PyPDF2==3.0.1
|
||||
pymupdf==1.27.1
|
||||
python-docx==1.1.2
|
||||
pyyaml
|
||||
requests
|
||||
|
|
|
|||
|
|
@ -15,7 +15,7 @@ peft==0.18.*
|
|||
Pillow>=9.5.0
|
||||
psutil
|
||||
pydantic==2.11.0
|
||||
PyPDF2==3.0.1
|
||||
pymupdf==1.27.1
|
||||
python-docx==1.1.2
|
||||
pyyaml
|
||||
requests
|
||||
|
|
|
|||
|
|
@ -15,7 +15,7 @@ peft==0.18.*
|
|||
Pillow>=9.5.0
|
||||
psutil
|
||||
pydantic==2.11.0
|
||||
PyPDF2==3.0.1
|
||||
pymupdf==1.27.1
|
||||
python-docx==1.1.2
|
||||
pyyaml
|
||||
requests
|
||||
|
|
|
|||
|
|
@ -15,7 +15,7 @@ peft==0.18.*
|
|||
Pillow>=9.5.0
|
||||
psutil
|
||||
pydantic==2.11.0
|
||||
PyPDF2==3.0.1
|
||||
pymupdf==1.27.1
|
||||
python-docx==1.1.2
|
||||
pyyaml
|
||||
requests
|
||||
|
|
|
|||
|
|
@ -15,7 +15,7 @@ peft==0.18.*
|
|||
Pillow>=9.5.0
|
||||
psutil
|
||||
pydantic==2.11.0
|
||||
PyPDF2==3.0.1
|
||||
pymupdf==1.27.1
|
||||
python-docx==1.1.2
|
||||
pyyaml
|
||||
requests
|
||||
|
|
|
|||
|
|
@ -15,7 +15,7 @@ peft==0.18.*
|
|||
Pillow>=9.5.0
|
||||
psutil
|
||||
pydantic==2.11.0
|
||||
PyPDF2==3.0.1
|
||||
pymupdf==1.27.1
|
||||
python-docx==1.1.2
|
||||
pyyaml
|
||||
requests
|
||||
|
|
|
|||
|
|
@ -6,7 +6,7 @@ jinja2==3.1.6
|
|||
markdown
|
||||
numpy==2.2.*
|
||||
pydantic==2.11.0
|
||||
PyPDF2==3.0.1
|
||||
pymupdf==1.27.1
|
||||
python-docx==1.1.2
|
||||
pyyaml
|
||||
requests
|
||||
|
|
|
|||
|
|
@ -6,7 +6,7 @@ jinja2==3.1.6
|
|||
markdown
|
||||
numpy==2.2.*
|
||||
pydantic==2.11.0
|
||||
PyPDF2==3.0.1
|
||||
pymupdf==1.27.1
|
||||
python-docx==1.1.2
|
||||
pyyaml
|
||||
requests
|
||||
|
|
|
|||
|
|
@ -6,7 +6,7 @@ jinja2==3.1.6
|
|||
markdown
|
||||
numpy==2.2.*
|
||||
pydantic==2.11.0
|
||||
PyPDF2==3.0.1
|
||||
pymupdf==1.27.1
|
||||
python-docx==1.1.2
|
||||
pyyaml
|
||||
requests
|
||||
|
|
|
|||
|
|
@ -6,7 +6,7 @@ jinja2==3.1.6
|
|||
markdown
|
||||
numpy==2.2.*
|
||||
pydantic==2.11.0
|
||||
PyPDF2==3.0.1
|
||||
pymupdf==1.27.1
|
||||
python-docx==1.1.2
|
||||
pyyaml
|
||||
requests
|
||||
|
|
|
|||
|
|
@ -6,7 +6,7 @@ jinja2==3.1.6
|
|||
markdown
|
||||
numpy==2.2.*
|
||||
pydantic==2.11.0
|
||||
PyPDF2==3.0.1
|
||||
pymupdf==1.27.1
|
||||
python-docx==1.1.2
|
||||
pyyaml
|
||||
requests
|
||||
|
|
|
|||
|
|
@ -6,7 +6,7 @@ jinja2==3.1.6
|
|||
markdown
|
||||
numpy==2.2.*
|
||||
pydantic==2.11.0
|
||||
PyPDF2==3.0.1
|
||||
pymupdf==1.27.1
|
||||
python-docx==1.1.2
|
||||
pyyaml
|
||||
requests
|
||||
|
|
|
|||
|
|
@ -6,7 +6,7 @@ jinja2==3.1.6
|
|||
markdown
|
||||
numpy==2.2.*
|
||||
pydantic==2.11.0
|
||||
PyPDF2==3.0.1
|
||||
pymupdf==1.27.1
|
||||
python-docx==1.1.2
|
||||
pyyaml
|
||||
requests
|
||||
|
|
|
|||
|
|
@ -6,7 +6,7 @@ jinja2==3.1.6
|
|||
markdown
|
||||
numpy==2.2.*
|
||||
pydantic==2.11.0
|
||||
PyPDF2==3.0.1
|
||||
pymupdf==1.27.1
|
||||
python-docx==1.1.2
|
||||
pyyaml
|
||||
requests
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue