added function to read from pdf

manzilcheck
PANDACUSHION 6 months ago
parent 585930ce57
commit e525a67376
  1. 16
      backend/utils/utils.py

@ -1,6 +1,11 @@
import string
import hashlib
import random
import os
from PyPDF2 import PdfReader
FILE_NAME = 'manjil.pdf'
FILE_PATH = os.path.join(os.getcwd(), FILE_NAME)
def random_string_generator(string_length: int) -> str:
letters = string.ascii_letters
@ -9,3 +14,14 @@ def random_string_generator(string_length: int) -> str:
def hash_string(string_value: str) ->str:
return hashlib.sha256(string_value.encode('utf-8')).hexdigest()
def read_pdf_human_readable(file_path: str) -> list[str]:
pdf_page_text_contents: list = []
reader: PdfReader = PdfReader(file_path)
for i, page in enumerate(reader.pages):
text: str = page.extract_text()
if text:
pdf_page_text_contents.append(text.strip())
return pdf_page_text_contents

Loading…
Cancel
Save