整合
This commit is contained in:
41
org/other/read_pdf_pypdf.py
Normal file
41
org/other/read_pdf_pypdf.py
Normal file
@@ -0,0 +1,41 @@
|
||||
import os
|
||||
from PyPDF2 import PdfReader
|
||||
|
||||
|
||||
def find_pdf(start_dir: str, filename: str) -> str:
|
||||
"""Return the absolute path to the target PDF, searching downward if needed."""
|
||||
candidate = os.path.join(start_dir, filename)
|
||||
if os.path.exists(candidate):
|
||||
return candidate
|
||||
|
||||
for root, _, files in os.walk(start_dir):
|
||||
if filename in files:
|
||||
return os.path.join(root, filename)
|
||||
|
||||
raise FileNotFoundError(f"Unable to locate {filename} under {start_dir}")
|
||||
|
||||
|
||||
def main() -> None:
|
||||
script_dir = os.path.dirname(os.path.abspath(__file__))
|
||||
pdf_name = "2025 APMCM Problem B.pdf"
|
||||
pdf_path = find_pdf(script_dir, pdf_name)
|
||||
|
||||
reader = PdfReader(pdf_path)
|
||||
pages = []
|
||||
for idx, page in enumerate(reader.pages, start=1):
|
||||
text = page.extract_text() or ""
|
||||
pages.append(f"\n=== Page {idx} ===\n{text.strip()}")
|
||||
|
||||
output_text = "".join(pages)
|
||||
print(output_text)
|
||||
|
||||
output_path = os.path.join(script_dir, "problem_text_pypdf.txt")
|
||||
with open(output_path, "w", encoding="utf-8") as f:
|
||||
f.write(output_text)
|
||||
|
||||
print(f"\nText saved to: {output_path}")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
|
||||
Reference in New Issue
Block a user