llama_index 我想要构建一个本地的多模态RAG聊天机器人,但是遇到了以下错误,

fafcakar  于 2个月前  发布在  其他
关注(0)|答案(7)|浏览(48)

问题验证

  • 我已在文档和Discord中寻找答案。

问题

# This class will transform video to text and images
class VideoProcessor:
    def __init__(self, video_path, output_audio_path, output_folder):
        self.video_path = video_path
        self.output_audio_path = output_audio_path
        self.image_path = output_folder
        self.text_path = output_folder

    def extract_audio(self):
        video = VideoFileClip(os.path.join(self.video_path, "input.mp4"))
        audio_part = video.audio
        audio_part.write_audiofile(os.path.join(self.output_audio_path, "output_audio.mp3"))

    def extract_text(self):
        model = whisper.load_model("base")
        result = model.transcribe(os.path.join(self.output_audio_path, "output_audio.mp3"))
        with open(os.path.join(self.text_path, "audio.md"), "w") as file:
            file.write(result['text'])
            file.close()

    def extract_frames(self):
            clip = VideoFileClip(os.path.join(self.video_path, "input.mp4"))
            clip.write_images_sequence(os.path.join(self.image_path, "frame%04d.png"), fps=0.2)

    def process_video(self):
        self.extract_audio()
        self.extract_text()
        self.extract_frames()

# This class maker sure RAG and ask
class retriever_ask:
    def __init__(self, output_folder, similarity_top_k, image_similarity_top_k) -> None:
        self.text_store = LanceDBVectorStore(uri="lancedb", table_name="text_collection")
        self.image_store = LanceDBVectorStore(uri="lancedb", table_name="image_collection")
        self.output_folder = output_folder
        self.similarity_top_k = similarity_top_k
        self.image_similarity_top_k = image_similarity_top_k
        # self.llm = Ollama(model='llava', request_timeout=100)
        self.llm = OllamaMultiModal(model='llava', request_timeout=3600) 
        self.qa_tmpl_str = (
    """
 Given the provided information, including relevant images and retrieved context from the video,\
 accurately and precisely answer the query without any additional prior knowledge.\n"
    "Please ensure honesty and responsibility, refraining from any racist or sexist remarks.\n"
    "---------------------\n"
    "Context: {context_str}\n"
    "Metadata for video: {metadata_str} \n"
    "---------------------\n"
    "Query: {query_str}\n"
    "Answer: "
"""
)
    def retriever_engine(self):
        documents = SimpleDirectoryReader(self.output_folder).load_data()
        storage_context = StorageContext.from_defaults(vector_store=self.text_store, image_store=self.image_store)
        index = MultiModalVectorStoreIndex.from_documents(documents, storage_context=storage_context)
        retriever_engine = index.as_retriever(similarity_top_k=self.similarity_top_k, image_similarity_top_k=self.image_similarity_top_k)
        return retriever_engine
    
    def retrieve(self, retriever_engine, query_str):
        retrieval_results = retriever_engine.retrieve(query_str)
        retrieved_image = []
        retrieved_text = []
        for res_node in retrieval_results:
            if isinstance(res_node.node, ImageNode):
                retrieved_image.append(res_node.node.metadata["file_path"])
            else:
                retrieved_text.append(res_node.text)

        return retrieved_image, retrieved_text
    
    def ask(self):
        retriever_engine = self.retriever_engine()
  
        while True:
            try:
                query_str = input('\033[94m' +"Prompt: " + '\033[0m')
                image, text = self.retrieve(retriever_engine=retriever_engine, query_str=query_str)
                context_str = "".join(text)
                metadata_str= image
                image_documents = SimpleDirectoryReader(input_dir=self.output_folder, input_files=image).load_data()
                response = self.llm.complete(prompt=self.qa_tmpl_str.format(context_str=context_str, query_str=query_str, metadata_str=metadata_str),image_documents=image_documents)
                print(response)
            except KeyboardInterrupt:
                break

if __name__ == '__main__':

    Settings.embed_model = HuggingFaceEmbedding(model_name="BAAI/bge-small-en-v1.5")

    video_path = './video/'
    output_audio_path = './audio/'
    output_folder= './output/'
# process video to images and text
    processor = VideoProcessor(video_path, output_audio_path, output_folder)
    processor.process_video()
# Ask question
    engine = retriever_ask(output_folder, 5, 5)
    engine.ask()

但我遇到了以下问题:

Prompt: hello
Traceback (most recent call last):
  File "/usr/local/lib/python3.8/dist-packages/httpx/_transports/default.py", line 69, in map_httpcore_exceptions
    yield
  File "/usr/local/lib/python3.8/dist-packages/httpx/_transports/default.py", line 233, in handle_request
    resp = self._pool.handle_request(req)
  File "/usr/local/lib/python3.8/dist-packages/httpcore/_sync/connection_pool.py", line 216, in handle_request
    raise exc from None
  File "/usr/local/lib/python3.8/dist-packages/httpcore/_sync/connection_pool.py", line 196, in handle_request
    response = connection.handle_request(
  File "/usr/local/lib/python3.8/dist-packages/httpcore/_sync/connection.py", line 101, in handle_request
    return self._connection.handle_request(request)
  File "/usr/local/lib/python3.8/dist-packages/httpcore/_sync/http11.py", line 143, in handle_request
    raise exc
  File "/usr/local/lib/python3.8/dist-packages/httpcore/_sync/http11.py", line 113, in handle_request
    ) = self._receive_response_headers(**kwargs)
  File "/usr/local/lib/python3.8/dist-packages/httpcore/_sync/http11.py", line 186, in _receive_response_headers
    event = self._receive_event(timeout=timeout)
  File "/usr/local/lib/python3.8/dist-packages/httpcore/_sync/http11.py", line 224, in _receive_event
    data = self._network_stream.read(
  File "/usr/local/lib/python3.8/dist-packages/httpcore/_backends/sync.py", line 126, in read
    return self._sock.recv(max_bytes)
  File "/usr/lib/python3.8/contextlib.py", line 131, in __exit__
    self.gen.throw(type, value, traceback)
  File "/usr/local/lib/python3.8/dist-packages/httpcore/_exceptions.py", line 14, in map_exceptions
    raise to_exc(exc) from exc
httpcore.ReadTimeout: timed out

The above exception was the direct cause of the following exception:

Traceback (most recent call last):
  File "./mulitRAG.py", line 153, in <module>
    engine.ask()
  File "./mulitRAG.py", line 135, in ask
    response = self.llm.complete(prompt=self.qa_tmpl_str.format(context_str=context_str, query_str=query_str, metadata_str=metadata_str),image_documents=image_documents)
  File "/usr/local/lib/python3.8/dist-packages/llama_index/multi_modal_llms/ollama/base.py", line 160, in complete
    response = self._client.generate(
  File "/usr/local/lib/python3.8/dist-packages/ollama/_client.py", line 126, in generate
    return self._request_stream(
  File "/usr/local/lib/python3.8/dist-packages/ollama/_client.py", line 97, in _request_stream
    return self._stream(*args, **kwargs) if stream else self._request(*args, **kwargs).json()
  File "/usr/local/lib/python3.8/dist-packages/ollama/_client.py", line 68, in _request
    response = self._client.request(method, url, **kwargs)
  File "/usr/local/lib/python3.8/dist-packages/httpx/_client.py", line 827, in request
    return self.send(request, auth=auth, follow_redirects=follow_redirects)
  File "/usr/local/lib/python3.8/dist-packages/httpx/_client.py", line 914, in send
    response = self._send_handling_auth(
  File "/usr/local/lib/python3.8/dist-packages/httpx/_client.py", line 942, in _send_handling_auth
    response = self._send_handling_redirects(
  File "/usr/local/lib/python3.8/dist-packages/httpx/_client.py", line 979, in _send_handling_redirects
    response = self._send_single_request(request)
  File "/usr/local/lib/python3.8/dist-packages/httpx/_client.py", line 1015, in _send_single_request
    response = transport.handle_request(request)
  File "/usr/local/lib/python3.8/dist-packages/httpx/_transports/default.py", line 233, in handle_request
    resp = self._pool.handle_request(req)
  File "/usr/lib/python3.8/contextlib.py", line 131, in __exit__
    self.gen.throw(type, value, traceback)
  File "/usr/local/lib/python3.8/dist-packages/httpx/_transports/default.py", line 86, in map_httpcore_exceptions
    raise mapped_exc(message) from exc
httpx.ReadTimeout: timed out

我确信我的电脑正在对我的问题进行推理,但我没有得到对问题的回应。

qqrboqgw

qqrboqgw1#

要解决在运行本地多模态RAG聊天机器人代码时遇到的超时问题,你可以在 OllamaMultiModal 类中设置 request_timeout 参数。此参数指定向 Ollama API 服务器发出 HTTP 请求的超时时间。
以下是如何设置 request_timeout 参数的方法:

ollama_model = OllamaMultiModal(
    base_url="http://localhost:11434",
    model="llava",
    temperature=0.75,
    context_window=DEFAULT_CONTEXT_WINDOW,
    request_timeout=3600,  # Set the timeout to 3600 seconds (1 hour) or any other value you prefer
    additional_kwargs={}
)

在你的代码中,你已经将 request_timeout 设置为 3600 秒,这对于大多数情况来说应该足够了。然而,如果你仍然遇到超时问题,可以考虑以下附加步骤:

  1. 检查网络稳定性:确保你的网络连接稳定,请求过程中没有中断。
  2. 优化视频处理:大型视频文件需要花费大量时间进行处理。确保你的视频处理步骤(音频提取、文本提取和帧提取)针对性能进行了优化。
  3. 进一步增加超时:如果必要,你可以进一步增加 request_timeout 值以容纳更长的处理时间。
  4. 调试和日志记录:启用详细的日志记录以诊断超时发生的位置。你可以按照以下方式在应用程序中启用调试日志记录:
import logging
import sys

logging.basicConfig(stream=sys.stdout, level=logging.DEBUG)
logging.getLogger().addHandler(logging.StreamHandler(stream=sys.stdout))
  1. 异步操作:如果可能,考虑使用异步操作来处理长时间运行的任务,而不阻塞主线程。
    通过遵循这些步骤,你应该能够诊断并解决多模态 RAG 聊天机器人代码中的超时问题。
  • 要继续与 Dosu 交谈,请提及 @dosu。*
tpgth1q7

tpgth1q72#

所以有人帮忙,我是不是用了一些错误的功能?

tag5nh1u

tag5nh1u3#

要解决本地多模态RAG聊天机器人代码中的超时问题,请确保将 request_timeout 参数正确传递给 Client 初始化。提供的代码已经使用 request_timeout 值在 Client 初始化中设置了 timeout 参数。
以下是相关代码部分:

class OllamaMultiModal(MultiModalLLM):
    # ... other fields and methods ...

    def __init__(self, **kwargs: Any) -> None:
        """Init params."""
        super().__init__(**kwargs)
        self._client = Client(host=self.base_url, timeout=self.request_timeout)

如果您仍然遇到超时问题,请考虑检查以下内容:

  1. 网络问题:确保没有网络问题导致超时。
  2. 服务器性能:验证托管模型的服务器是否能够在指定的超时时间内处理请求。
  3. 客户端配置:在初始化 OllamaMultiModal 示例时,请仔细检查 request_timeout 是否正确设置。
    此外,请确保 ollama 库已正确安装和导入,如 OllamaMultiModal 类所示:
def __init__(self, **kwargs: Any) -> None:
    """Init params."""
    # make sure that ollama is installed
    try:
        import ollama  # noqa: F401
    except ImportError:
        raise ImportError(
            "Ollama is not installed. Please install it using `pip install ollama`."
        )
    super().__init__(**kwargs)

如果所有配置都正确,问题仍然存在,您可能需要进一步调查服务器日志或网络条件,以确定超时的根源。
此外,请确保 httpx.ClientTimeout 对象正确使用,如 Ollama 类所示(来自 llama-index 库):

with httpx.Client(timeout=Timeout(self.request_timeout)) as client:
    response = client.post(
        url=f"{self.base_url}/api/chat",
        json=payload,
    )
    response.raise_for_status()
    raw = response.json()
    message = raw["message"]
    return ChatResponse(
        message=ChatMessage(
            content=message.get("content"),
            role=MessageRole(message.get("role")),
            additional_kwargs=get_additional_kwargs(
                message, ("content", "role")
            ),
        ),
        raw=raw,
        additional_kwargs=get_additional_kwargs(raw, ("message",)),
    )

请确保正确示例化并将 Timeout 对象传递给 httpx.Client 以正确处理请求超时。要继续与 Dosu 交谈,请提及 @dosu。

9njqaruj

9njqaruj4#

你好,我想使用 clip 来嵌入我的图片和文本,我该如何使用它?你能给我一个例子吗?

isr3a4wc

isr3a4wc5#

要切换到使用CLIP将您的图像和文本嵌入到本地多模态RAG聊天机器人中,您可以使用llama_index/embeddings/clip/base.py文件中的ClipEmbedding类。以下是逐步指南:

  1. 安装所需软件包:
pip install git+https://github.com/openai/CLIP.git
pip install torch pillow
  1. 初始化ClipEmbedding:
from llama_index.embeddings.clip import ClipEmbedding

clip_embedding = ClipEmbedding(
    embed_batch_size=10,  # You can adjust the batch size as needed
    model_name="ViT-B/32"  # You can choose from the available CLIP models
)
  1. 生成文本嵌入:
text = "Your text here"
text_embedding = clip_embedding._get_text_embedding(text)
  1. 生成图像嵌入:
img_file_path = "path/to/your/image.jpg"
image_embedding = await clip_embedding._aget_image_embedding(img_file_path)

以下是一个完整示例:

import asyncio
from llama_index.embeddings.clip import ClipEmbedding

# Initialize the ClipEmbedding class
clip_embedding = ClipEmbedding(
    embed_batch_size=10,  # Adjust the batch size as needed
    model_name="ViT-B/32"  # Choose from the available CLIP models
)

# Generate text embedding
text = "Your text here"
text_embedding = clip_embedding._get_text_embedding(text)
print("Text Embedding:", text_embedding)

# Generate image embedding
async def get_image_embedding():
    img_file_path = "path/to/your/image.jpg"
    image_embedding = await clip_embedding._aget_image_embedding(img_file_path)
    print("Image Embedding:", image_embedding)

# Run the async function to get image embedding
asyncio.run(get_image_embedding())

此代码将初始化CLIP模型,为提供的文本和图像生成嵌入,并打印嵌入1(https://github.com/run-llama/llama_index/blob/main/llama-index-legacy/llama_index/legacy/embeddings/clip.py)2(https://github.com/run-llama/llama_index/blob/main/llama-index-integrations/embeddings/llama-index-embeddings-clip/llama_index/embeddings/clip/base.py)。

  • 要继续与Dosu交谈,请提及@dosu。*
arknldoa

arknldoa6#

要切换到使用CLIP将您的图像和文本嵌入到本地多模态RAG聊天机器人中,您可以使用llama_index/embeddings/clip/base.py文件中的ClipEmbedding类。以下是逐步指南:

  1. 安装所需的包:
pip install git+https://github.com/openai/CLIP.git
pip install torch pillow
  1. 初始化ClipEmbedding:
from llama_index.embeddings.clip import ClipEmbedding

clip_embedding = ClipEmbedding(
    embed_batch_size=10,  # You can adjust the batch size as needed
    model_name="ViT-B/32"  # You can choose from the available CLIP models
)
  1. 生成文本嵌入:
text = "Your text here"
text_embedding = clip_embedding._get_text_embedding(text)
  1. 生成图像嵌入:
img_file_path = "path/to/your/image.jpg"
image_embedding = await clip_embedding._aget_image_embedding(img_file_path)

以下是一个完整的例子:

import asyncio
from llama_index.embeddings.clip import ClipEmbedding

# Initialize the ClipEmbedding class
clip_embedding = ClipEmbedding(
    embed_batch_size=10,  # Adjust the batch size as needed
    model_name="ViT-B/32"  # Choose from the available CLIP models
)

# Generate text embedding
text = "Your text here"
text_embedding = clip_embedding._get_text_embedding(text)
print("Text Embedding:", text_embedding)

# Generate image embedding
async def get_image_embedding():
    img_file_path = "path/to/your/image.jpg"
    image_embedding = await clip_embedding._aget_image_embedding(img_file_path)
    print("Image Embedding:", image_embedding)

# Run the async function to get image embedding
asyncio.run(get_image_embedding())

这段代码将初始化CLIP模型,为提供的文本和图像生成嵌入,并打印嵌入 1(https://github.com/run-llama/llama_index/blob/main/llama-index-legacy/llama_index/legacy/embeddings/clip.py) 和 2(https://github.com/run-llama/llama_index/blob/main/llama-index-integrations/embeddings/llama-index-embeddings-clip/llama_index/embeddings/clip/base.py) 。

  • 要继续与 Dosu 交谈,请提及 @dosu。*

使用clip与LanceDBVectorStore的示例

w8f9ii69

w8f9ii697#

将当前的嵌入模型替换为CLIP并在本地多模态RAG聊天机器人中使用它,请按照以下步骤操作:

  1. 安装所需的包:
pip install git+https://github.com/openai/CLIP.git
pip install torch pillow
  1. 定义 ClipEmbedding:
    使用提供的 ClipEmbedding 类,使用CLIP模型生成嵌入。这里是完整的类定义:
import logging
from typing import Any, List
from llama_index.core.bridge.pydantic import Field, PrivateAttr
from llama_index.core.constants import DEFAULT_EMBED_BATCH_SIZE
from llama_index.core.embeddings.multi_modal_base import MultiModalEmbedding
from llama_index.core.schema import ImageType
from PIL import Image
import os

logger = logging.getLogger(__name__)

AVAILABLE_CLIP_MODELS = (
    "RN50",
    "RN101",
    "RN50x4",
    "RN50x16",
    "RN50x64",
    "ViT-B/32",
    "ViT-B/16",
    "ViT-L/14",
    "ViT-L/14@336px",
)
DEFAULT_CLIP_MODEL = "ViT-B/32"

class ClipEmbedding(MultiModalEmbedding):
    """CLIP embedding models for encoding text and image for Multi-Modal purpose.

This class provides an interface to generate embeddings using a model
deployed in OpenAI CLIP. At the initialization it requires a model name
of CLIP.

Note:
Requires `clip` package to be available in the PYTHONPATH. It can be installed with
`pip install git+https://github.com/openai/CLIP.git`.
"""

    embed_batch_size: int = Field(default=DEFAULT_EMBED_BATCH_SIZE, gt=0)

    _clip: Any = PrivateAttr()
    _model: Any = PrivateAttr()
    _preprocess: Any = PrivateAttr()
    _device: Any = PrivateAttr()

    @classmethod
    def class_name(cls) -> str:
        return "ClipEmbedding"

    def __init__(
        self,
        *,
        embed_batch_size: int = DEFAULT_EMBED_BATCH_SIZE,
        model_name: str = DEFAULT_CLIP_MODEL,
        **kwargs: Any,
    ):
        """Initializes the ClipEmbedding class.

During the initialization the `clip` package is imported.

Args:
embed_batch_size (int, optional): The batch size for embedding generation. Defaults to 10,
must be > 0 and <= 100.
model_name (str): The model name of Clip model.

Raises:
ImportError: If the `clip` package is not available in the PYTHONPATH.
ValueError: If the model cannot be fetched from Open AI. or if the embed_batch_size
is not in the range (0, 100].
"""
        if embed_batch_size <= 0:
            raise ValueError(f"Embed batch size {embed_batch_size}  must be > 0.")

        try:
            import clip
            import torch
        except ImportError:
            raise ImportError(
                "ClipEmbedding requires `pip install git+https://github.com/openai/CLIP.git` and torch."
            )

        super().__init__(
            embed_batch_size=embed_batch_size, model_name=model_name, **kwargs
        )

        try:
            self._device = "cuda" if torch.cuda.is_available() else "cpu"
            is_local_path = os.path.exists(self.model_name)
            if not is_local_path and self.model_name not in AVAILABLE_CLIP_MODELS:
                raise ValueError(
                    f"Model name {self.model_name} is not available in CLIP."
                )
            self._model, self._preprocess = clip.load(
                self.model_name, device=self._device
            )

        except Exception as e:
            logger.error("Error while loading clip model.")
            raise ValueError("Unable to fetch the requested embeddings model") from e

    # TEXT EMBEDDINGS

    async def _aget_query_embedding(self, query: str) -> Embedding:
        return self._get_query_embedding(query)

    def _get_text_embedding(self, text: str) -> Embedding:
        return self._get_text_embeddings([text])[0]

    def _get_text_embeddings(self, texts: List[str]) -> List<Embedding]:
        results = []
        for text in texts:
            try:
                import clip
            except ImportError:
                raise ImportError(
                    "ClipEmbedding requires `pip install git+https://github.com/openai/CLIP.git` and torch."
                )
            text_embedding = self._model.encode_text(
                clip.tokenize(text).to(self._device)
            )
            results.append(text_embedding.tolist()[0])

        return results

    def _get_query_embedding(self, query: str) -> Embedding:
        return self._get_text_embedding(query)

    # IMAGE EMBEDDINGS

    async def _aget_image_embedding(self, img_file_path: ImageType) -> Embedding:
        return self._get_image_embedding(img_file_path)

    def _get_image_embedding(self, img_file_path: ImageType) -> Embedding:
        import torch
        with torch.no_grad():
            image = (
                self._preprocess(Image.open(img_file_path))
                .unsqueeze(0)
                .to(self._device)
            )
            return self._model.encode_image(image).tolist()[0]
  1. LanceDBVectorStore 集成:
    假设你有一个 LanceDBVectorStore 类,你需要将其与 ClipEmbedding 类集成。下面是一个简化的示例,展示了如何进行此操作:
from lance_db_vector_store import LanceDBVectorStore  # Assuming you have this class
from llama_index.embeddings.clip import ClipEmbedding  # Importing ClipEmbedding from llama_index

# Initialize the ClipEmbedding
clip_embedding = ClipEmbedding(model_name="ViT-B/32")

# Create an instance of LanceDBVectorStore with the ClipEmbedding
vector_store = LanceDBVectorStore(embedding_model=clip_embedding)

# Example usage
text = "A sample text to encode"
text_embedding = clip_embedding._get_text_embedding(text)
vector_store.add_embedding(text, text_embedding)

# For image embeddings
image_path = "path/to/your/image.jpg"
image_embedding = clip_embedding._get_image_embedding(image_path)
vector_store.add_embedding(image_path, image_embedding)

这段代码演示了如何初始化 ClipEmbedding 类,生成文本和图像嵌入,并将它们添加到 LanceDBVectorStore 中。根据你对 LanceDBVectorStore1(https://github.com/run-llama/llama_index/blob/main/llama-index-legacy/llama_index/legacy/embeddings/clip.py)2(https://github.com/run-llama/llama_index/blob/main/llama-index-integrations/embeddings/llama-index-embeddings-clip/llama_index/embeddings/clip/base.py)3(https://github.com/run-llama/llama_index/blob/main/llama-index-integrations/embeddings/llama-index-embeddings-clip/llama_index/embeddings/clip/init.py) 的具体实现,调整集成方式。

  • 要继续与 Dosu 对话,请提及 @dosu。*

相关问题