import os from PyPDF2 import PdfReader, PdfWriter PATH = 'pdfcontent' if not os.path.exists(PATH): os.makedirs(PATH) def getPdfFile(filepath): with open(filepath, 'rb') as pdf_file: reader = PdfReader(pdf_file) for page_num in range(len(reader.pages)): page = reader.pages[page_num] text = page.extract_text() # Save as text file if text: # Ensure there's text on the page before saving filename = os.path.splitext(os.path.basename(filepath))[0] output_txt_filename = os.path.join(PATH, f"{filename}_page{page_num + 1}.txt") with open(output_txt_filename, 'w', encoding='utf-8') as output_file: output_file.write(text) print(f"Page {page_num + 1} extracted and saved as {output_txt_filename}") # Save as PDF file writer = PdfWriter() writer.add_page(page) output_pdf_filename = os.path.join(PATH, f"{filename}_page{page_num + 1}.pdf") with open(output_pdf_filename, 'wb') as output_pdf_file: writer.write(output_pdf_file) print(f"Page {page_num + 1} extracted and saved as {output_pdf_filename}")