archaeo_super_prompt.types.ner_labeled_chunks
[docs]
module
archaeo_super_prompt.types.ner_labeled_chunks
1
2
3
4
5
6
7
8
9
10
11
12
13 | from pandera.pandas import DataFrameModel
class NerLabeledChunkDatasetSchema(DataFrameModel):
"""If a chunk is likely to wear information about some data field to be
extracted, then we add the data field key as a key of the
nerIdentifiedThesaurus dictionary.
The best chunks are those in which the list of identified thesaurus is not
empty for a given identified data field.
"""
# for each identified field, the list of identified thesaurus
nerIdentifiedThesaurus: dict[str, list[str] | None]
|