我有一个小的puppeter&expressjsnode应用程序,它给出了一个起始url,按照一系列url的顺序跟踪每个url,将一个页面重定向到另一个页面,例如,第四个页面,我开始https://examples.com/ 5秒钟后,页面重定向到新页面,我的Puppeter应用程序跟随这段旅程,并使用一个名为 currentUrl
.
在将链接到页面的数据放入名为 journey
.
我正试图找出如何通过另一个注入函数来实现这一点,也不确定 puppet 演员如何知道页面何时重定向?
现在我得到了 loaded_at
每个url的时间,并可能在最后循环,得到每个对象之间的差异并添加一个新的键?
const dayjs = require('dayjs');
const AdvancedFormat = require('dayjs/plugin/advancedFormat');
dayjs.extend(AdvancedFormat);
const puppeteer = require('puppeteer');
const { config } = require('./config');
const logs = require('./logs');
const runEmulation = async (body) => {
// vars
const journey = [];
let evalInterval;
// launcher arg options
const argOptions = [];
// sandbox config
if ((config.puppeteer.run_in_sandbox === 'true')) {
argOptions.push('--no-sandbox');
}
// initiate a Puppeteer instance with options and launch
const browser = await puppeteer.launch({
args: argOptions,
headless: (config.puppeteer.run_in_headless === 'true') ? true : false
});
// launch a new page
const page = await browser.newPage();
// go to a URL
await page.goto(body.url);
await page.setUserAgent(body.userAgent)
// create a function to inject into the page to scrape data
const currentUrl = () => {
return window.location.href;
}
// log the current page every 250ms
evalInterval = setInterval(async () => {
try {
const url = await page.evaluate(currentUrl);
journey.push({
loaded_at: new Date(),
// TODO: calculate duration on page
duration: 0,
url: url,
});
} catch (err) { }
}, parseInt(config.emulation.check_frequency))
// expose a function to close the browser
async function closeBrowser () {
await browser.close();
}
// dedupe functioon
function uniqueObjects (data, key) {
return [
...new Map(
data.map(x => [key(x), x])
).values()
]
}
// return our destinations after a defined time
return new Promise((resolve, reject) => {
setTimeout(async () => {
const deduped = uniqueObjects(journey, it => it.url)
closeBrowser();
clearInterval(evalInterval);
let hops = deduped.length - 1
if (hops < 1) hops = 0
const data = {
hops: hops,
destination: deduped.length > 1 ? deduped[deduped.length - 1] : deduped[0]
}
resolve(data);
}, parseInt(5 * 1000))
});
}
exports.runEmulation = runEmulation
我需要这样做的原因是,如果 puppet 剧在一页上的时间超过x秒,我可以关闭它。
暂无答案!
目前还没有任何答案,快来回答吧!