遇到的问题
当我尝试重新执行使用之前生成的ChromaDB嵌入的代码时,遇到了ValueError。第一次运行时,代码根据用户输入从LLM生成响应。然而,当我停止执行并再次运行代码以重用这些嵌入时,我得到了以下错误:
ValueError: Query id d28e3de4-1c4f-420a-9184-97bf8556b11b not found in either retriever_dict
or query_engine_dict
.
实际行为
在重新执行时,代码会引发一个ValueError,指出查询ID既不在retriever_dict中,也不在query_engine_dict中。
预期行为
代码应该能够在不引发任何ValueError的情况下成功运行之前的嵌入。
版本
llama-index==0.10.12
重现步骤
class DenseXRetrievalPack(BaseLlamaPack):
def init(
self,
documents: List[Document],
proposition_llm: Optional[LLM] = None,
query_llm: Optional[LLM] = None,
embed_model: Optional[BaseEmbedding] = None,
text_splitter: TextSplitter = SentenceSplitter(),
vector_store: Optional[ElasticsearchStore] = None,
similarity_top_k: int = 4,
) -> None:
"""Init params."""
self._proposition_llm = llm
embed_model = embed_model
nodes = text_splitter.get_nodes_from_documents(documents)
sub_nodes = self._gen_propositions(nodes)
all_nodes = nodes + sub_nodes
all_nodes_dict = {n.node_id: n for n in all_nodes}
service_context = ServiceContext.from_defaults(
llm=query_llm ,
embed_model=embed_model,
num_output=self._proposition_llm.metadata.num_output,
)
if os.path.exists('./chroma_db'):
chroma_client = chromadb.PersistentClient(path="./chroma_db")
chroma_collection = chroma_client.get_or_create_collection("quickstart")
vector_store = ChromaVectorStore(chroma_collection=chroma_collection)
storage_context = StorageContext.from_defaults(vector_store=vector_store)
self.vector_index = VectorStoreIndex.from_vector_store(vector_store,service_context=service_context)
else:
chroma_client = chromadb.PersistentClient(path="./chroma_db")
chroma_collection = chroma_client.get_or_create_collection("quickstart")
vector_store = ChromaVectorStore(chroma_collection=chroma_collection)
storage_context = StorageContext.from_defaults(vector_store=vector_store)
self.vector_index = VectorStoreIndex(
all_nodes, service_context=service_context, show_progress=True,storage_context=storage_context,store_nodes_override=True
)
self.retriever = RecursiveRetriever(
"vector",
retriever_dict={
"vector": self.vector_index.as_retriever(
similarity_top_k=similarity_top_k
)
},
node_dict=all_nodes_dict,
)
self.query_engine = RetrieverQueryEngine.from_args(
self.retriever, service_context=service_context
)
async def _aget_proposition(self, node: TextNode) -> List[TextNode]:
"""Get proposition."""
inital_output = await self._proposition_llm.apredict(
PROPOSITIONS_PROMPT, node_text=node.text
)
outputs = inital_output.split("\n")
all_propositions = []
for output in outputs:
if not output.strip():
continue
if not output.strip().endswith("]"):
if not output.strip().endswith('"') and not output.strip().endswith(
","
):
output = output + '"'
output = output + " ]"
if not output.strip().startswith("["):
if not output.strip().startswith('"'):
output = '"' + output
output = "[ " + output
try:
propositions = json.loads(output)
except Exception:
# fallback to yaml
try:
propositions = yaml.safe_load(output)
except Exception:
# fallback to next output
continue
if not isinstance(propositions, list):
continue
all_propositions.extend(propositions)
assert isinstance(all_propositions, list)
nodes = [TextNode(text=prop) for prop in all_propositions if prop]
return [IndexNode.from_text_node(n, node.node_id) for n in nodes]
def _gen_propositions(self, nodes: List[TextNode]) -> List[TextNode]:
"""Get propositions."""
sub_nodes = asyncio.run(
run_jobs(
[self._aget_proposition(node) for node in nodes],
show_progress=True,
workers=8,
)
)
print(sub_nodes)
# Flatten list
return [node for sub_node in sub_nodes for node in sub_node]
def get_modules(self) -> Dict[str, Any]:
"""Get modules."""
return {
"query_engine": self.query_engine,
"retriever": self.retriever,
}
def run(self, query_str: str, **kwargs: Any) -> RESPONSE_TYPE:
"""Run the pipeline."""
return self.query_engine.query(query_str)
相关日志/回溯
- 无响应*
1条答案
按热度按时间9cbw7uwe1#
要解决在重新执行使用先前生成的ChromaDB嵌入的代码时,
ValueError
状态表示在retriever_dict
或query_engine_dict
中找不到查询ID的问题,请确保retriever
和query_engine
正确初始化并可访问。RecursiveRetriever
类初始化这些组件,因此需要确保retriever_dict
和query_engine_dict
包含必要的条目。以下是如何初始化和使用
RecursiveRetriever
类以避免ValueError
的示例:这段代码确保了
retriever
和query_engine
正确初始化和使用,这应该可以防止与缺少查询ID相关的ValueError
。请确保root_id
在retriever_dict
中存在,并且retriever_dict
和query_engine_dict
1(https://github.com/run-llama/llama_index/blob/main/llama-index-packs/llama-index-packs-chroma-autoretrieval/llama_index/packs/chroma_autoretrieval/base.py)2(https://github.com/run-llama/llama_index/blob/main/llama-index-legacy/llama_index/legacy/retrievers/recursive_retriever.py)3(https://github.com/run-llama/llama_index/blob/main/llama-index-packs/llama-index-packs-dense-x-retrieval/llama_index/packs/dense_x_retrieval/base.py)之间没有重叠的键。