描述

在使用BertTopic与OpenAI API结合时，我遇到了超时问题。在发起请求后，600秒后出现Read Timeout错误。

错误详情

requests.exceptions.ReadTimeout: HTTPSConnectionPool(host='api.openai.com', port=443): Read timed out. (read timeout=600)
  at .send (/opt/conda/lib/python3.10/site-packages/requests/adapters.py:532)
  at .send (/opt/conda/lib/python3.10/site-packages/requests/sessions.py:703)
  at .request (/opt/conda/lib/python3.10/site-packages/requests/sessions.py:589)

重现步骤

设置BertTopic并确保其配置正确。
向OpenAI API发起请求。
等待进程继续或失败。

from fastapi import FastAPI, HTTPException, Request
import numpy as np
import openai
from bertopic import BERTopic
from bertopic.representation import OpenAI
import time
from hdbscan import HDBSCAN
import os

app = FastAPI()

# Set your OpenAI API key securely
openai.api_key = os.getenv("OPENAI_API_KEY")

if not openai.api_key:
    raise ValueError("Please set the OPENAI_API_KEY environment variable.")

summarization_prompt = """
I have a topic that contains the following documents: 
[DOCUMENTS]
The topic is described by the following keywords: [KEYWORDS]

Based on the information above, extract a short topic label in the following format:
topic: <topic label>
"""

representation_model = OpenAI(model="gpt-3.5-turbo", prompt=summarization_prompt, nr_docs=3, chat=True)

@app.post("/topic_clustering")
async def topic_clustering_post(request: Request):
    start_time = time.time()  # Start timing
    
    content = await request.json()
    
    if 'topic_messages' in content and 'embeddings' in content:
        topic_messages = content['topic_messages']
        embeddings = content['embeddings']

        if not isinstance(topic_messages, list) or not isinstance(embeddings, list):
            raise HTTPException(status_code=400, detail="Both 'topic_messages' and 'embeddings' must be lists.")

        if len(topic_messages) != len(embeddings):
            raise HTTPException(status_code=400, detail="'topic_messages' and 'embeddings' must have the same length.")

        embeddings = np.asarray(embeddings)
        hdbscan_model = HDBSCAN(min_cluster_size=3, min_samples=1, metric='euclidean', cluster_selection_method='eom', prediction_data=True)

        topic_model = BERTopic(representation_model=representation_model, hdbscan_model=hdbscan_model)
        
        try:
            topic_model.fit(topic_messages, embeddings)
        except Exception as e:
            raise HTTPException(status_code=500, detail=str(e))

        document_df = topic_model.get_document_info(topic_messages)
        document_df_dict = document_df.to_dict(orient="records")

        topic_df = topic_model.get_topic_info()
        topic_df_dict = topic_df.to_dict(orient="records")

        response = {
            "document": document_df_dict,
            "topic": topic_df_dict
        }
        
        if len(topic_df) > 2:
            hierarchical_topics = topic_model.hierarchical_topics(topic_messages)
            hierarchical_topics_dict = hierarchical_topics.to_dict(orient="records")
            response["tree"] = hierarchical_topics_dict
        else:
            response["tree"] = None

        end_time = time.time()  # End timing
        execution_time = end_time - start_time  # Calculate execution time
        response['execution_time'] = execution_time  # Add execution time to the response
        return response
    
    else:
        raise HTTPException(status_code=400, detail="JSON must include 'topic_messages' and 'embeddings' fields.")