import string import hashlib import random import os from PyPDF2 import PdfReader, PdfWriter from config import * import re def random_string_generator(string_length: int) -> str: letters = string.ascii_letters random_string = ''.join(random.choice(letters) for i in range(string_length)) return random_string def hash_string(string_value: str) ->str: return hashlib.sha256(string_value.encode('utf-8')).hexdigest() def is_valid_email(email): pattern = r'^[a-zA-Z0-9_.+-]+@[a-zA-Z0-9-]+\.[a-zA-Z0-9-.]+$' if re.match(pattern, email): return True else: return False def password_check_sanity(passwd: str) -> bool: if DISABLE_PASSWORD_SANITY_CHECKS: return True special_symbol_pattern = r'[$@#%]' upper_case_pattern = r'[A-Z]' lower_case_pattern = r'[a-z]' digit_pattern = r'[0-9]' if len(passwd) < 6: raise InsecurePasswordException('Password length should be at least 6 characters.') elif len(passwd) > 20: raise InsecurePasswordException('Password length should not be greater than 20 characters.') if not re.search(digit_pattern, passwd): raise InsecurePasswordException('Password should have at least one numeral.') if not re.search(upper_case_pattern, passwd): raise InsecurePasswordException('Password should have at least one uppercase letter.') if not re.search(lower_case_pattern, passwd): raise InsecurePasswordException('Password should have at least one lowercase letter.') if not re.search(special_symbol_pattern, passwd): raise InsecurePasswordException('Password should have at least one of the symbols $@#%.') return True class InsecurePasswordException(Exception): pass def split_pdf_into_pages_with_text(pdf_path: str, output_directory: str) -> int: print("SPLIT PDF INTO PAGES WITH TEXT") with open(pdf_path, 'rb') as pdf_file: reader = PdfReader(pdf_file) page_counter = 1 for page_num in range(len(reader.pages)): page = reader.pages[page_num] text = page.extract_text() if text is None: text = '' output_txt_filename = os.path.join(output_directory, f"{page_counter}.txt") with open(output_txt_filename, 'w', encoding='utf-8') as output_file: output_file.write(text) # Save as PDF file writer = PdfWriter() writer.add_page(page) output_pdf_filename = os.path.join(output_directory, f"{page_counter}.pdf") with open(output_pdf_filename, 'wb') as output_pdf_file: writer.write(output_pdf_file) if len(reader.pages) == page_counter: return len(reader.pages) page_counter += 1