42 lines
1.1 KiB
Python
42 lines
1.1 KiB
Python
import os
|
|
from PyPDF2 import PdfReader
|
|
|
|
|
|
def find_pdf(start_dir: str, filename: str) -> str:
|
|
"""Return the absolute path to the target PDF, searching downward if needed."""
|
|
candidate = os.path.join(start_dir, filename)
|
|
if os.path.exists(candidate):
|
|
return candidate
|
|
|
|
for root, _, files in os.walk(start_dir):
|
|
if filename in files:
|
|
return os.path.join(root, filename)
|
|
|
|
raise FileNotFoundError(f"Unable to locate {filename} under {start_dir}")
|
|
|
|
|
|
def main() -> None:
|
|
script_dir = os.path.dirname(os.path.abspath(__file__))
|
|
pdf_name = "2025 APMCM Problem B.pdf"
|
|
pdf_path = find_pdf(script_dir, pdf_name)
|
|
|
|
reader = PdfReader(pdf_path)
|
|
pages = []
|
|
for idx, page in enumerate(reader.pages, start=1):
|
|
text = page.extract_text() or ""
|
|
pages.append(f"\n=== Page {idx} ===\n{text.strip()}")
|
|
|
|
output_text = "".join(pages)
|
|
print(output_text)
|
|
|
|
output_path = os.path.join(script_dir, "problem_text_pypdf.txt")
|
|
with open(output_path, "w", encoding="utf-8") as f:
|
|
f.write(output_text)
|
|
|
|
print(f"\nText saved to: {output_path}")
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main()
|
|
|