Skip to content

archaeo_super_prompt.types.embedding_labeled_chunks

[docs] module archaeo_super_prompt.types.embedding_labeled_chunks

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
from pandera.pandas import DataFrameModel


class SemanticallyLabeledChunkDatasetSchema(DataFrameModel):
    """If a chunk is likely to wear information about some data field to be
    extracted, then we add the data field key as a key of the
    semanticallyIdentifiedThesaurus dictionary.
    The best chunks are those in which the list of identified thesaurus is not
    empty for a given identified data field.
    """

    # for each identified field, the list of identified thesaurus
    semanticallyIdentifiedThesaurus: dict[str, list[str] | None]