Source code for pdfscraper.document

from dataclasses import dataclass
from typing import List, Any

from pdfscraper.layout.utils import Orientation
from pdfscraper.page import Page


@dataclass
[docs]class Document:
[docs] pages: List[Page]
[docs] doc: Any
[docs] orientation: Orientation
@classmethod
[docs] def from_pymupdf( cls, path, orientation: Orientation = Orientation.create( bottom_is_zero=False, left_is_zero=True ), ) -> "Document": if isinstance(path, str): import fitz doc = fitz.open(path) return cls( pages=[ Page.from_pymupdf(page, orientation=orientation) for page in doc ], doc=doc, orientation=orientation,
) @classmethod
[docs] def from_pdfminer( cls, path, orientation: Orientation = Orientation.create( bottom_is_zero=False, left_is_zero=True ), ) -> "Document": if isinstance(path, str): import pdfminer import pdfminer.high_level pages = pdfminer.high_level.extract_pages(path) return cls( [Page.from_pdfminer(page, orientation=orientation) for page in pages], doc=None, orientation=orientation,
)
[docs] def create_sections(self): pass