archaeo_super_prompt.types.pdfpaths
[docs]
module
archaeo_super_prompt.types.pdfpaths
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40 | import pandera.pandas as pa
import pandas as pd
from pathlib import Path
from typing import cast
from collections.abc import Iterable
from pandera.typing.pandas import DataFrame, Series
from .intervention_id import InterventionId
class PDFPathSchema(pa.DataFrameModel):
id: Series[int]
filepath: Series[str]
PDFPathDataset = DataFrame[PDFPathSchema]
def buildPdfPathDataset(
items: Iterable[tuple[InterventionId, Path]],
) -> PDFPathDataset:
ids, paths = cast(
tuple[tuple[InterventionId, ...], tuple[Path, ...]],
zip(*items, strict=True),
)
return PDFPathSchema.validate(
pd.DataFrame({"id": ids, "filepath": [str(path) for path in paths]})
)
def get_intervention_rows(ds: PDFPathDataset):
return [
(InterventionId(row["id"]), Path(row["filepath"]))
for _, row in ds.iterrows()
]
def get_paths(ds: PDFPathDataset) -> list[Path]:
return [Path(str_path) for str_path in cast(Series[str], ds["filepath"])]
|