archaeo_super_prompt.modeling.struct_extract.extractors.archiving_date

[docs] package archaeo_super_prompt.modeling.struct_extract.extractors.archiving_date
"""Mock archiving extractor with the good data.As a model for extracting this data is not done, for nowwe assume this data as known and simulate this behaviourwith loading from the dataset."""import datetimefrom typing importAny,cast,overrideimport pandas as pdfrom pandera.typing.pandas importDataFrame,Seriesfrom .....dataset.load importMagohDatasetfrom .....types.intervention_id importInterventionIdfrom .....types.pdfchunks importPDFChunkDatasetfrom .....types.per_intervention_feature import(BasePerInterventionFeatureSchema,)from ....types.detailed_evaluator importDetailedEvaluatorMixinclass ArchivingDateOutputSchema(BasePerInterventionFeatureSchema):[docs]
    """when indentifying the date of an intervention, we refer first to the date of protocol."""data_protocollo:datetime.date[docs]
class ArchivingDateProvider(DetailedEvaluatorMixin[Any,MagohDataset,Any]):    """Give the answer of the ArchivingDate."""def __init__(self)->None:        """."""super().__init__()self._mds:MagohDataset|None=None@overridedef fit(self,X,y:MagohDataset,**kwargs):[docs]
X=X# unusedkwargs=kwargs# unusedself._mds=yreturnselfdef filter_ids(self,y:MagohDataset,ids:set[InterventionId]):[docs]
        """Only keeps the records with an inserted archiving date."""returny.filter_good_records_for_training(ids,lambdadf:cast(Series[bool],df["building__Data_Protocollo"].notnull()),)@overridedef predict([docs]
self,X:PDFChunkDataset,)->DataFrame[ArchivingDateOutputSchema]:ifself._mdsisNone:raiseNotImplementedError("Cannot infer the data of archiving. Please fit the model with the dataset so the answers can be output.")def to_date(dp:str):d,m,y=[int(k)forkindp.strip().split("-")]returndatetime.date(y,m,d)returnArchivingDateOutputSchema.validate(pd.DataFrame([{"id":a.id,"data_protocollo":to_date(str(a.building__Data_Protocollo)),}forainself._mds.get_answers(set(X["id"].to_list()))]).set_index("id"))@overridedef score([docs]
self,X,y,sample_weight=None)->float:X=X# unusedy=y# unusedsample_weight=sample_weight# unusedreturn1.0@overridedef score_and_transform(self,X,y):[docs]
returnself.score(X,y),self.predict(X)