You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 
FreeBug/pdfreader.py

29 lines
1.2 KiB

import os
from PyPDF2 import PdfReader, PdfWriter
PATH = 'pdfcontent'
if not os.path.exists(PATH):
os.makedirs(PATH)
def getPdfFile(filepath):
with open(filepath, 'rb') as pdf_file:
reader = PdfReader(pdf_file)
for page_num in range(len(reader.pages)):
page = reader.pages[page_num]
text = page.extract_text()
# Save as text file
if text: # Ensure there's text on the page before saving
filename = os.path.splitext(os.path.basename(filepath))[0]
output_txt_filename = os.path.join(PATH, f"{filename}_page{page_num + 1}.txt")
with open(output_txt_filename, 'w', encoding='utf-8') as output_file:
output_file.write(text)
print(f"Page {page_num + 1} extracted and saved as {output_txt_filename}")
# Save as PDF file
writer = PdfWriter()
writer.add_page(page)
output_pdf_filename = os.path.join(PATH, f"{filename}_page{page_num + 1}.pdf")
with open(output_pdf_filename, 'wb') as output_pdf_file:
writer.write(output_pdf_file)
print(f"Page {page_num + 1} extracted and saved as {output_pdf_filename}")