class OCRAgent(ABC):
"""Defines the interface for an Optical Character Recognition (OCR) service."""
@classmethod
def get_agent(cls) -> OCRAgent:
"""Get the configured OCRAgent instance.
The OCR package used by the agent is determined by the `OCR_AGENT` environment variable.
"""
ocr_agent_cls_qname = cls._get_ocr_agent_cls_qname()
try:
return cls.get_instance(ocr_agent_cls_qname)
except (ImportError, AttributeError):
raise ValueError(
f"Environment variable OCR_AGENT must be set to an existing OCR agent module,"
f" not {ocr_agent_cls_qname}."
)
try:
return cls.get_instance(ocr_agent_cls_qname)
except (ImportError, AttributeError) as e:
raise ValueError(
f"Environment variable OCR_AGENT must be set to an existing OCR agent module,"
f" not {ocr_agent_cls_qname}: {str(e)}"
)
1条答案
按热度按时间exdqitrt1#
根据您提供的错误信息,实际上是在以下代码中产生的:
https://github.com/Unstructured-IO/unstructured/blob/main/unstructured/partition/utils/ocr_models/ocr_interface.py#L38
这段文字的开头非常相似 :)
我认为针对这种情况的正确解决方案是同时打印原始错误信息和
"Environment variable ..."
消息。我猜测在您的案例中触发错误的原因是类似于
ImportError: cannot import package 'pandas'
之类的东西,而在那种情况下,可能正是您需要的指针 :)您对这个想法有什么看法?
实现方式可以是这样的: