我正在使用express.write()方法来流式传输通过递归脚本收集的链接。它在本地和没有SSL的nginx服务器上工作正常。然而,当我添加SSL响应时,不会流式传输,而是在脚本完成时返回。下面是控制器
const getLinks = async (req, response) => {
response.flush = () => {};
if (!req.query.siteUrl) {
return res.json({ error: "Site url not provided" });
}
response.writeHead(206, {
"Content-Type": "text/plain; charset=utf-8",
"Transfer-Encoding": "chunked",
"X-Content-Type-Options": "nosniff",
});
const crawl = async (url) => {
totalUrlsVisited++;
// console.log(`Crawling: ${url}`);
const links = await getAllWebsiteLinks(url, internalUrls);
if (links.length) {
for (const link of links) {
if (totalUrlsVisited < 30) await crawl(link);
}
}
};
const isValid = (url) => {
if (url.includes("@")) return false;
if (url.includes("mailto")) return false;
const parsedUrl = urlparse.parse(url);
const result = Boolean(parsedUrl.hostname) && Boolean(parsedUrl.protocol);
return result;
};
const getAllWebsiteLinks = async (url, internalUrls) => {
const urls = [];
let res;
try {
res = await axios.get(url, { timeout: "10000" });
} catch (error) {
console.log(error.message, url);
return [];
}
const $ = cheerio.load(res.data);
const allATags = $("a").toArray();
for (const element of allATags) {
const $element = $(element);
let href = $element.attr("href");
if (!href || href === "") continue;
const absoluteUrl = new URL(href, url).toString();
const parsedUrl = urlparse.parse(absoluteUrl);
const sanitizedUrl = `${parsedUrl.protocol}//${parsedUrl.hostname}${
parsedUrl.pathname ? parsedUrl.pathname : ""
}`;
if (internalUrls.includes(sanitizedUrl)) continue;
if (parsedUrl.hostname !== domainName) continue;
if (!isValid(sanitizedUrl)) continue;
internalUrls.push(sanitizedUrl);
urls.push(sanitizedUrl);
urlsBatch.push(sanitizedUrl);
if (urlsBatch.length === 5) {
await response.write(JSON.stringify(urlsBatch));
console.log(urlsBatch);
urlsBatch = [];
}
}
return urls;
};
const internalUrls = [];
let totalUrlsVisited = 0;
let urlsBatch = [];
const url = req.query.siteUrl;
const domainName = urlparse.parse(url).hostname;
await crawl(url, internalUrls, totalUrlsVisited);
console.log(internalUrls.length, totalUrlsVisited);
response.end();
};
字符串
在客户端上,我阅读的响应如下:
try {
setFetchingMore(true);
const { auth } = await chrome.storage.local.get("auth");
const response = await fetch(
`http://localhost:8000/api/chat/get_links/?siteUrl=${window.location.origin}`,
{
signal: controller.signal,
headers: {
Accept: "text/event-stream",
Authorization: auth.id_token,
},
}
);
if (!response.ok) {
throw Error(response.statusText);
}
for (const reader = response.body.getReader(); ; ) {
const { value, done } = await reader.read();
if (done) {
setFetchingMore(false);
break;
}
const chunk = new TextDecoder().decode(value);
console.log(chunk);
// console.log(JSON.parse(chunk));
// setLinks((curr) => [...curr, ...JSON.parse(chunk)]);
}
} catch (error) {
console.log(error);
}
型
下面是我的nginx配置
server {
if ($host = api.domain.com) {
return 301 https://$host$request_uri;
} # managed by Certbot
listen 80;
server_name api.domain.com;
return 404; # managed by Certbot
}
}
型
1条答案
按热度按时间a8jjtwal1#
在nginx配置中关闭代理缓冲可以解决这个问题
第一个月