|
|
|
@ -1,6 +1,11 @@ |
|
|
|
|
import string |
|
|
|
|
import hashlib |
|
|
|
|
import random |
|
|
|
|
import os |
|
|
|
|
from PyPDF2 import PdfReader |
|
|
|
|
|
|
|
|
|
FILE_NAME = 'manjil.pdf' |
|
|
|
|
FILE_PATH = os.path.join(os.getcwd(), FILE_NAME) |
|
|
|
|
|
|
|
|
|
def random_string_generator(string_length: int) -> str: |
|
|
|
|
letters = string.ascii_letters |
|
|
|
@ -9,3 +14,14 @@ def random_string_generator(string_length: int) -> str: |
|
|
|
|
|
|
|
|
|
def hash_string(string_value: str) ->str: |
|
|
|
|
return hashlib.sha256(string_value.encode('utf-8')).hexdigest() |
|
|
|
|
|
|
|
|
|
def read_pdf_human_readable(file_path: str) -> list[str]: |
|
|
|
|
pdf_page_text_contents: list = [] |
|
|
|
|
reader: PdfReader = PdfReader(file_path) |
|
|
|
|
for i, page in enumerate(reader.pages): |
|
|
|
|
text: str = page.extract_text() |
|
|
|
|
if text: |
|
|
|
|
pdf_page_text_contents.append(text.strip()) |
|
|
|
|
return pdf_page_text_contents |
|
|
|
|
|
|
|
|
|
|
|
|
|
|