archaeo_super_prompt.modeling.legacy_predict

[docs] module archaeo_super_prompt.modeling.legacy_predict
"""Legacy model for comparison."""from pandera.typing.pandas importDataFramefrom sklearn.pipeline importFunctionTransformer,Pipelineimport sklearnfrom .entity_extractor.types import(ChunksWithThesaurus,)from .struct_extract.language_model importget_vllm_modelfrom ..types.pdfchunks importPDFChunkDatasetSchemafrom .pdf_to_text importVLLM_Preprocessingfrom .struct_extract.legacy_extractor.main_transformer importMagohDataExtractordef get_legacy_model():[docs]
    """Return the legacy model but with the vllm as pre-processing layer."""llm_model=get_vllm_model(temperature=0.05)withsklearn.config_context(transform_output="pandas"):returnPipeline([("vllm",VLLM_Preprocessing(vlm_provider='vllm',vlm_model_id="ibm-granite/granite-vision-3.3-2b",prompt="OCR this part of Italian document for markdown-based processing.",embedding_model_hf_id="nomic-ai/nomic-embed-text-v1.5",incipit_only=True,),),("extractor",MagohDataExtractor(llm_model)),],)