NodeJS 使axios等待重定向

bejyjqdl  于 2022-11-22  发布在  Node.js
关注(0)|答案(3)|浏览(183)

我正在尝试使用/search/ from maps从谷歌Map上抓取数据。
当我自己搜索时,我录下了这个:'https://www.google.com/maps/search/new约克'
然后我被重定向到这个网址:' https://www.google.com/maps/place/New+York,+%C3%89tat+de+New+York,+%C3%89tats-Unis/@40.6974881,-73.979681,10z/data=!3m1!4b1!4m5!3m4!1s0x89c24fa5d33f083b:0xc80b8f06e177fe62!8m2!3d40.7127753!4d-74.0059728 '
我无法使用axios来重现这个行为。我想也许可以用async / await来做一些事情,但是从那以后我就没有找到任何解决方案。
下面是我代码:

const axios = require('axios');
const cheerio = require('cheerio');

var map = 'www.google.com/maps/search/';

axios.get(map + 'New York')
    .then(response => {
        let getData = html => {
            coor = [];
            v= -1;
            const $ = cheerio.load(html);
            $('.widget-pane-content scrollable-y').each((i, elem) => {
                coor.push({
                    adress : $(elem).find('span.widget-pane-link').text(),
                });
            });
            console.log(coor);
            console.log(coor.length);
        }
        getData(response.coor);
    })
    .catch(error => {console.log(error);})

当我执行该文件时,我收到以下错误:

'Error: Request failed with status code 400'

如果你有任何线索来解决我的问题,谢谢分享它!

n53p2ov0

n53p2ov01#

看看像Selenium或Cypress.js这样的工具(selenium的 Package 器)
(搜索“端到端测试”或“自动浏览器”)
不幸的是,Axios这样的工具无法做到这一点。Google Maps不会返回redirect响应,而是使用JavaScript重新加载页面。
柏树示例:

cy.visit("https://www.google.com/maps/search/new york");
cy.wait(2000);  // sit for 2 seconds
cy.get('#thing-im-looking-for')
ncgqoxb0

ncgqoxb02#

尝试将cookieJar与axios一起使用
https://github.com/axios/axios/issues/943#issuecomment-599174929
或者使用request包将jar设置为true

request(url, { jar: true })

https://stackoverflow.com/a/48912841/11686526

zz2j4svz

zz2j4svz3#

你不能使用axios从Google Maps获取地点信息,因为结果是通过JavaScript构建在页面上的,所以你需要使用一些浏览器自动化,例如Puppeteer。在下面的代码中,我向你展示了如何实现这一点(也可以在在线IDE上查看):

const puppeteer = require("puppeteer-extra");
const StealthPlugin = require("puppeteer-extra-plugin-stealth");

puppeteer.use(StealthPlugin());

const requestParams = {
  baseURL: `http://google.com`,
  query: "starbucks", // what we want to search
  coordinates: "@47.6040174,-122.1854488,11z", // parameter defines GPS coordinates of location where you want your query to be applied
  hl: "en", // parameter defines the language to use for the Google maps search
};

async function scrollPage(page, scrollContainer) {
  let lastHeight = await page.evaluate(`document.querySelector("${scrollContainer}").scrollHeight`);

  while (true) {
    await page.evaluate(`document.querySelector("${scrollContainer}").scrollTo(0, document.querySelector("${scrollContainer}").scrollHeight)`);
    await page.waitForTimeout(5000);
    let newHeight = await page.evaluate(`document.querySelector("${scrollContainer}").scrollHeight`);
    if (newHeight === lastHeight) {
      break;
    }
    lastHeight = newHeight;
  }
}

async function fillDataFromPage(page) {
  const dataFromPage = await page.evaluate(() => {
    return Array.from(document.querySelectorAll(".bfdHYd")).map((el) => {
      const placeUrl = el.parentElement.querySelector(".hfpxzc")?.getAttribute("href");
      const urlPattern = /!1s(?<id>[^!]+).+!3d(?<latitude>[^!]+)!4d(?<longitude>[^!]+)/gm; // https://regex101.com/r/KFE09c/1
      const dataId = [...placeUrl.matchAll(urlPattern)].map(({ groups }) => groups.id)[0];
      const latitude = [...placeUrl.matchAll(urlPattern)].map(({ groups }) => groups.latitude)[0];
      const longitude = [...placeUrl.matchAll(urlPattern)].map(({ groups }) => groups.longitude)[0];
      return {
        title: el.querySelector(".qBF1Pd")?.textContent.trim(),
        rating: el.querySelector(".MW4etd")?.textContent.trim(),
        reviews: el.querySelector(".UY7F9")?.textContent.replace("(", "").replace(")", "").trim(),
        type: el.querySelector(".W4Efsd:last-child > .W4Efsd:nth-of-type(1) > span:first-child")?.textContent.replaceAll("·", "").trim(),
        address: el.querySelector(".W4Efsd:last-child > .W4Efsd:nth-of-type(1) > span:last-child")?.textContent.replaceAll("·", "").trim(),
        openState: el.querySelector(".W4Efsd:last-child > .W4Efsd:nth-of-type(3) > span:first-child")?.textContent.replaceAll("·", "").trim(),
        phone: el.querySelector(".W4Efsd:last-child > .W4Efsd:nth-of-type(3) > span:last-child")?.textContent.replaceAll("·", "").trim(),
        website: el.querySelector("a[data-value]")?.getAttribute("href"),
        description: el.querySelector(".W4Efsd:last-child > .W4Efsd:nth-of-type(2)")?.textContent.replace("·", "").trim(),
        serviceOptions: el.querySelector(".qty3Ue")?.textContent.replaceAll("·", "").replaceAll("  ", " ").trim(),
        gpsCoordinates: {
          latitude,
          longitude,
        },
        placeUrl,
        dataId,
      };
    });
  });
  return dataFromPage;
}

async function getLocalPlacesInfo() {
  const browser = await puppeteer.launch({
    headless: false,
    args: ["--no-sandbox", "--disable-setuid-sandbox"],
  });

  const page = await browser.newPage();

  const URL = `${requestParams.baseURL}/maps/search/${requestParams.query}/${requestParams.coordinates}?hl=${requestParams.hl}`;

  await page.setDefaultNavigationTimeout(60000);
  await page.goto(URL);

  await page.waitForNavigation();

  const scrollContainer = ".m6QErb[aria-label]";

  const localPlacesInfo = [];

  await page.waitForTimeout(2000);
  await scrollPage(page, scrollContainer);
  localPlacesInfo.push(...(await fillDataFromPage(page)));

  await browser.close();

  return localPlacesInfo;
}

getLocalPlacesInfo().then((result) => console.dir(result, { depth: null }));

输出量

[
   {
      "title":"Starbucks",
      "rating":"4.4",
      "reviews":"210",
      "type":"Coffee shop",
      "address":"3300 W McGraw St",
      "openState":"Closed ⋅ Opens 6AM",
      "phone":"(206) 298-3390",
      "description":"Iconic Seattle-based coffeehouse chain",
      "serviceOptions":"Dine-in   Takeout   Delivery",
      "gpsCoordinates":{
         "latitude":"47.639704",
         "longitude":"-122.399869"
      },
      "placeUrl":"https://www.google.com/maps/place/Starbucks/data=!4m7!3m6!1s0x54901580f2d8ba8b:0xcc4a61a86f6d87ec!8m2!3d47.639704!4d-122.399869!16s%2Fg%2F1td6mc1x!19sChIJi7rY8oAVkFQR7Idtb6hhSsw?authuser=0&hl=en&rclk=1",
      "dataId":"0x54901580f2d8ba8b:0xcc4a61a86f6d87ec"
   },
   {
      "title":"Starbucks",
      "rating":"4.3",
      "reviews":"201",
      "type":"Coffee shop",
      "address":"701 5th Ave",
      "openState":"Closed ⋅ Opens 5:30AM Mon",
      "phone":"(206) 447-9934",
      "description":"Iconic Seattle-based coffeehouse chain",
      "serviceOptions":"Dine-in   Takeout   No delivery",
      "gpsCoordinates":{
         "latitude":"47.604155",
         "longitude":"-122.330827"
      },
      "placeUrl":"https://www.google.com/maps/place/Starbucks/data=!4m7!3m6!1s0x54906ab0bab91e09:0xd1284ac9106e9c7e!8m2!3d47.604155!4d-122.330827!16s%2Fg%2F1tdmk5c9!19sChIJCR65urBqkFQRfpxuEMlKKNE?authuser=0&hl=en&rclk=1",
      "dataId":"0x54906ab0bab91e09:0xd1284ac9106e9c7e"
   },
    ... and other places
]

你可以从我的博客文章Web Scraping Google Maps Places with Nodejs中阅读更多关于抓取谷歌Map的内容。

相关问题