如何使用node和cheerio遍历表

c6ubokkw  于 2022-11-22  发布在  Node.js
关注(0)|答案(1)|浏览(133)

一整天我都在做这个网页抓取器,它可以从www.example.com上提取日期和日落时间sunrise-sunset.org
我设法让它刮擦和显示表中的第一天和时间,但我找不到如何迭代通过表,而使用节点,axios,和cheerio,因为我相当新的所有三个。
我的JSON只有一个对象,但我正在尝试找出如何遍历表,以便将表中的每一行都作为自己的对象。

[
  {
   "day": "Tue, Nov 1",
   "time": "5:59:57 pm"
  }
  {
   "day": "Tue, Nov 2",
   "time": "5:55:37 pm"
  }
  {
   "day": "Tue, Nov 2",
   "time": "5:42:47 pm"
  }
]

这是我的代码到目前为止,已导致拉的第一天和时间从网站。

app.get('/results', function (req, res) {
        axios(url)
            .then(response => {
                const html = response.data
                const $ = cheerio.load(html)
                const days = []
    
                $('#month', html).each(function () {
                    const day = $('.number', '.day').prop('innerText')
                    const time = $('.sunset', '.day').prop('innerText')
                    days.push({
                        day,
                        time
                    })
                })
                res.json(days)
            })
            .catch(err => console.log(err))
    })

然后用下面的代码在空div中显示结果

const feedDisplay = document.querySelector('#feed')

fetch('http://localhost:8000/results')
    .then(response => { return response.json() })
    // .then(data => console.log(data))
    .then(data => {
        data.forEach(daysaray => {
            const dayItem = `<div><h3>` + daysaray.day +`</h3><p>` + daysaray.time +`</div>`
            feedDisplay.insertAdjacentHTML("beforeend", dayItem)
        })
    })
    .catch(err => console.log(err))

编辑

这是我试图抓取的表的HTML

<table id="month">
<col>
<col>
<col>
<col>
<col>
<col>
<col>
<col>
<col>
<col>
<col>
<col>
<tbody><tr class="headers">
<th rowspan="2">Day</th>
<th rowspan="2">Twilight start</th>
<th rowspan="2">Sunrise</th>
<th rowspan="2">Sunset</th>
<th rowspan="2">Twilight end</th>
<th rowspan="2">Day length</th>
<th rowspan="2">Solar noon</th>
<th colspan="2">Nautical twilight</th>
<th colspan="2">Astronomical twilight</th>
</tr>
<tr class="headers">
<th>Start</th>
<th>End</th>
<th>Start</th>
<th>End</th>
</tr>
<tr class="day" rel="2022-11-01">
<th><span class="number">Tue, Nov 1</span></th>
<td>7:19:52 am</td>
<td><span class="sunrise" title="Sunrise time 2022-11-01 in Portland, Multnomah County, Oregon, USA">7:49:10 am</span></td>
<td><span title="Sunset time 2022-11-01 in Portland, Multnomah County, Oregon, USA" class="sunset">5:59:22 pm</span></td>
<td>6:28:40 pm</td>
<td>10:10:12</td>
<td>12:54:16 pm</td>
<td>6:44 am</td>
<td>7:03 pm</td>
<td>6:10 am</td>
<td>7:38 pm</td>
</tr>
<tr class="day" rel="2022-11-02">
<th><span class="number">Wed, Nov 2</span></th>
<td>7:21:11 am</td>
<td><span class="sunrise" title="Sunrise time 2022-11-02 in Portland, Multnomah County, Oregon, USA">7:50:33 am</span></td>
<td><span title="Sunset time 2022-11-02 in Portland, Multnomah County, Oregon, USA" class="sunset">5:57:57 pm</span></td>
<td>6:27:19 pm</td>
<td>10:07:24</td>
<td>12:54:15 pm</td>
<td>6:46 am</td>
<td>7:02 pm</td>
<td>6:11 am</td>
<td>7:36 pm</td>
</tr>
<tr class="day" rel="2022-11-03">
<th><span class="number">Thu, Nov 3</span></th>
<td>7:22:30 am</td>
<td><span class="sunrise" title="Sunrise time 2022-11-03 in Portland, Multnomah County, Oregon, USA">7:51:57 am</span></td>
<td><span title="Sunset time 2022-11-03 in Portland, Multnomah County, Oregon, USA" class="sunset">5:56:33 pm</span></td>
<td>6:26:00 pm</td>
<td>10:04:36</td>
<td>12:54:15 pm</td>
<td>6:47 am</td>
<td>7:01 pm</td>
<td>6:12 am</td>
<td>7:35 pm</td>
</tr>
<tr class="day" rel="2022-11-04">
<th><span class="number">Fri, Nov 4</span></th>
<td>7:23:49 am</td>
<td><span class="sunrise" title="Sunrise time 2022-11-04 in Portland, Multnomah County, Oregon, USA">7:53:21 am</span></td>
<td><span title="Sunset time 2022-11-04 in Portland, Multnomah County, Oregon, USA" class="sunset">5:55:11 pm</span></td>
<td>6:24:42 pm</td>
<td>10:01:50</td>
<td>12:54:16 pm</td>
<td>6:48 am</td>
<td>6:59 pm</td>
<td>6:14 am</td>
<td>7:34 pm</td>
</tr>
<tr class="day" rel="2022-11-05">
<th><span class="number">Sat, Nov 5</span></th>
<td>7:25:08 am</td>
<td><span class="sunrise" title="Sunrise time 2022-11-05 in Portland, Multnomah County, Oregon, USA">7:54:45 am</span></td>
<td><span title="Sunset time 2022-11-05 in Portland, Multnomah County, Oregon, USA" class="sunset">5:53:50 pm</span></td>
<td>6:23:27 pm</td>
<td>09:59:05</td>
<td>12:54:17 pm</td>
<td>6:49 am</td>
<td>6:58 pm</td>
<td>6:15 am</td>
<td>7:33 pm</td>
</tr>
<tr class="day" rel="2022-11-06">
<th><span class="number">Sun, Nov 6</span></th>
<td>6:26:27 am</td>
<td><span class="sunrise" title="Sunrise time 2022-11-06 in Portland, Multnomah County, Oregon, USA">6:56:09 am</span></td>
<td><span title="Sunset time 2022-11-06 in Portland, Multnomah County, Oregon, USA" class="sunset">4:52:31 pm</span></td>
<td>5:22:12 pm</td>
<td>09:56:22</td>
<td>11:54:20 am</td>
<td>5:51 am</td>
<td>5:57 pm</td>
<td>5:16 am</td>
<td>6:32 pm</td>
</tr>
<tr class="day" rel="2022-11-07">
<th><span class="number">Mon, Nov 7</span></th>
<td>6:27:46 am</td>
<td><span class="sunrise" title="Sunrise time 2022-11-07 in Portland, Multnomah County, Oregon, USA">6:57:32 am</span></td>
<td><span title="Sunset time 2022-11-07 in Portland, Multnomah County, Oregon, USA" class="sunset">4:51:14 pm</span></td>
<td>5:21:00 pm</td>
<td>09:53:42</td>
<td>11:54:23 am</td>
<td>5:52 am</td>
<td>5:56 pm</td>
<td>5:17 am</td>
<td>6:31 pm</td>
</tr>
<tr class="day" rel="2022-11-08">
<th><span class="number">Tue, Nov 8</span></th>
<td>6:29:05 am</td>
<td><span class="sunrise" title="Sunrise time 2022-11-08 in Portland, Multnomah County, Oregon, USA">6:58:56 am</span></td>
<td><span title="Sunset time 2022-11-08 in Portland, Multnomah County, Oregon, USA" class="sunset">4:49:59 pm</span></td>
<td>5:19:49 pm</td>
<td>09:51:03</td>
<td>11:54:27 am</td>
<td>5:53 am</td>
<td>5:55 pm</td>
<td>5:18 am</td>
<td>6:29 pm</td>
</tr>
<tr class="day" rel="2022-11-09">
<th><span class="number">Wed, Nov 9</span></th>
<td>6:30:24 am</td>
<td><span class="sunrise" title="Sunrise time 2022-11-09 in Portland, Multnomah County, Oregon, USA">7:00:20 am</span></td>
<td><span title="Sunset time 2022-11-09 in Portland, Multnomah County, Oregon, USA" class="sunset">4:48:45 pm</span></td>
<td>5:18:41 pm</td>
<td>09:48:25</td>
<td>11:54:32 am</td>
<td>5:54 am</td>
<td>5:54 pm</td>
<td>5:20 am</td>
<td>6:28 pm</td>
</tr>
<tr class="day" rel="2022-11-10">
<th><span class="number">Thu, Nov 10</span></th>
<td>6:31:42 am</td>
<td><span class="sunrise" title="Sunrise time 2022-11-10 in Portland, Multnomah County, Oregon, USA">7:01:43 am</span></td>
<td><span title="Sunset time 2022-11-10 in Portland, Multnomah County, Oregon, USA" class="sunset">4:47:33 pm</span></td>
<td>5:17:34 pm</td>
<td>09:45:50</td>
<td>11:54:38 am</td>
<td>5:56 am</td>
<td>5:53 pm</td>
<td>5:21 am</td>
<td>6:27 pm</td>
</tr>
<tr class="day" rel="2022-11-11">
<th><span class="number">Fri, Nov 11</span></th>
<td>6:33:01 am</td>
<td><span class="sunrise" title="Sunrise time 2022-11-11 in Portland, Multnomah County, Oregon, USA">7:03:07 am</span></td>
<td><span title="Sunset time 2022-11-11 in Portland, Multnomah County, Oregon, USA" class="sunset">4:46:23 pm</span></td>
<td>5:16:29 pm</td>
<td>09:43:16</td>
<td>11:54:45 am</td>
<td>5:57 am</td>
<td>5:52 pm</td>
<td>5:22 am</td>
<td>6:26 pm</td>
</tr>
<tr class="day" rel="2022-11-12">
<th><span class="number">Sat, Nov 12</span></th>
<td>6:34:19 am</td>
<td><span class="sunrise" title="Sunrise time 2022-11-12 in Portland, Multnomah County, Oregon, USA">7:04:30 am</span></td>
<td><span title="Sunset time 2022-11-12 in Portland, Multnomah County, Oregon, USA" class="sunset">4:45:15 pm</span></td>
<td>5:15:26 pm</td>
<td>09:40:45</td>
<td>11:54:52 am</td>
<td>5:58 am</td>
<td>5:51 pm</td>
<td>5:23 am</td>
<td>6:25 pm</td>
</tr>
<tr class="day" rel="2022-11-13">
<th><span class="number">Sun, Nov 13</span></th>
<td>6:35:37 am</td>
<td><span class="sunrise" title="Sunrise time 2022-11-13 in Portland, Multnomah County, Oregon, USA">7:05:53 am</span></td>
<td><span title="Sunset time 2022-11-13 in Portland, Multnomah County, Oregon, USA" class="sunset">4:44:09 pm</span></td>
<td>5:14:25 pm</td>
<td>09:38:16</td>
<td>11:55:01 am</td>
<td>5:59 am</td>
<td>5:50 pm</td>
<td>5:24 am</td>
<td>6:25 pm</td>
</tr>
<tr class="day" rel="2022-11-14">
<th><span class="number">Mon, Nov 14</span></th>
<td>6:36:54 am</td>
<td><span class="sunrise" title="Sunrise time 2022-11-14 in Portland, Multnomah County, Oregon, USA">7:07:15 am</span></td>
<td><span title="Sunset time 2022-11-14 in Portland, Multnomah County, Oregon, USA" class="sunset">4:43:05 pm</span></td>
<td>5:13:26 pm</td>
<td>09:35:50</td>
<td>11:55:10 am</td>
<td>6:01 am</td>
<td>5:49 pm</td>
<td>5:26 am</td>
<td>6:24 pm</td>
</tr>
<tr class="day" rel="2022-11-15">
<th><span class="number">Tue, Nov 15</span></th>
<td>6:38:12 am</td>
<td><span class="sunrise" title="Sunrise time 2022-11-15 in Portland, Multnomah County, Oregon, USA">7:08:37 am</span></td>
<td><span title="Sunset time 2022-11-15 in Portland, Multnomah County, Oregon, USA" class="sunset">4:42:03 pm</span></td>
<td>5:12:28 pm</td>
<td>09:33:26</td>
<td>11:55:20 am</td>
<td>6:02 am</td>
<td>5:48 pm</td>
<td>5:27 am</td>
<td>6:23 pm</td>
</tr>
<tr class="day" rel="2022-11-16">
<th><span class="number">Wed, Nov 16</span></th>
<td>6:39:28 am</td>
<td><span class="sunrise" title="Sunrise time 2022-11-16 in Portland, Multnomah County, Oregon, USA">7:09:59 am</span></td>
<td><span title="Sunset time 2022-11-16 in Portland, Multnomah County, Oregon, USA" class="sunset">4:41:03 pm</span></td>
<td>5:11:34 pm</td>
<td>09:31:04</td>
<td>11:55:31 am</td>
<td>6:03 am</td>
<td>5:47 pm</td>
<td>5:28 am</td>
<td>6:22 pm</td>
</tr>
<tr class="day" rel="2022-11-17">
<th><span class="number">Thu, Nov 17</span></th>
<td>6:40:45 am</td>
<td><span class="sunrise" title="Sunrise time 2022-11-17 in Portland, Multnomah County, Oregon, USA">7:11:21 am</span></td>
<td><span title="Sunset time 2022-11-17 in Portland, Multnomah County, Oregon, USA" class="sunset">4:40:05 pm</span></td>
<td>5:10:41 pm</td>
<td>09:28:44</td>
<td>11:55:43 am</td>
<td>6:04 am</td>
<td>5:46 pm</td>
<td>5:29 am</td>
<td>6:21 pm</td>
</tr>
<tr class="day today" rel="2022-11-18">
<th><span class="number">Fri, Nov 18</span></th>
<td>6:42:01 am</td>
<td><span class="sunrise" title="Sunrise time 2022-11-18 in Portland, Multnomah County, Oregon, USA">7:12:42 am</span></td>
<td><span title="Sunset time 2022-11-18 in Portland, Multnomah County, Oregon, USA" class="sunset">4:39:09 pm</span></td>
<td>5:09:50 pm</td>
<td>09:26:27</td>
<td>11:55:55 am</td>
<td>6:05 am</td>
<td>5:45 pm</td>
<td>5:30 am</td>
<td>6:21 pm</td>
</tr>
<tr class="day" rel="2022-11-19">
<th><span class="number">Sat, Nov 19</span></th>
<td>6:43:16 am</td>
<td><span class="sunrise" title="Sunrise time 2022-11-19 in Portland, Multnomah County, Oregon, USA">7:14:02 am</span></td>
<td><span title="Sunset time 2022-11-19 in Portland, Multnomah County, Oregon, USA" class="sunset">4:38:15 pm</span></td>
<td>5:09:01 pm</td>
<td>09:24:13</td>
<td>11:56:09 am</td>
<td>6:07 am</td>
<td>5:45 pm</td>
<td>5:31 am</td>
<td>6:20 pm</td>
</tr>
<tr class="day" rel="2022-11-20">
<th><span class="number">Sun, Nov 20</span></th>
<td>6:44:31 am</td>
<td><span class="sunrise" title="Sunrise time 2022-11-20 in Portland, Multnomah County, Oregon, USA">7:15:22 am</span></td>
<td><span title="Sunset time 2022-11-20 in Portland, Multnomah County, Oregon, USA" class="sunset">4:37:24 pm</span></td>
<td>5:08:15 pm</td>
<td>09:22:02</td>
<td>11:56:23 am</td>
<td>6:08 am</td>
<td>5:44 pm</td>
<td>5:33 am</td>
<td>6:19 pm</td>
</tr>
<tr class="day" rel="2022-11-21">
<th><span class="number">Mon, Nov 21</span></th>
<td>6:45:45 am</td>
<td><span class="sunrise" title="Sunrise time 2022-11-21 in Portland, Multnomah County, Oregon, USA">7:16:41 am</span></td>
<td><span title="Sunset time 2022-11-21 in Portland, Multnomah County, Oregon, USA" class="sunset">4:36:35 pm</span></td>
<td>5:07:31 pm</td>
<td>09:19:54</td>
<td>11:56:38 am</td>
<td>6:09 am</td>
<td>5:43 pm</td>
<td>5:34 am</td>
<td>6:19 pm</td>
</tr>
<tr class="day" rel="2022-11-22">
<th><span class="number">Tue, Nov 22</span></th>
<td>6:46:58 am</td>
<td><span class="sunrise" title="Sunrise time 2022-11-22 in Portland, Multnomah County, Oregon, USA">7:17:59 am</span></td>
<td><span title="Sunset time 2022-11-22 in Portland, Multnomah County, Oregon, USA" class="sunset">4:35:48 pm</span></td>
<td>5:06:49 pm</td>
<td>09:17:49</td>
<td>11:56:54 am</td>
<td>6:10 am</td>
<td>5:43 pm</td>
<td>5:35 am</td>
<td>6:18 pm</td>
</tr>
<tr class="day" rel="2022-11-23">
<th><span class="number">Wed, Nov 23</span></th>
<td>6:48:11 am</td>
<td><span class="sunrise" title="Sunrise time 2022-11-23 in Portland, Multnomah County, Oregon, USA">7:19:17 am</span></td>
<td><span title="Sunset time 2022-11-23 in Portland, Multnomah County, Oregon, USA" class="sunset">4:35:04 pm</span></td>
<td>5:06:10 pm</td>
<td>09:15:47</td>
<td>11:57:10 am</td>
<td>6:11 am</td>
<td>5:42 pm</td>
<td>5:36 am</td>
<td>6:18 pm</td>
</tr>
<tr class="day" rel="2022-11-24">
<th><span class="number">Thu, Nov 24</span></th>
<td>6:49:23 am</td>
<td><span class="sunrise" title="Sunrise time 2022-11-24 in Portland, Multnomah County, Oregon, USA">7:20:34 am</span></td>
<td><span title="Sunset time 2022-11-24 in Portland, Multnomah County, Oregon, USA" class="sunset">4:34:22 pm</span></td>
<td>5:05:32 pm</td>
<td>09:13:48</td>
<td>11:57:28 am</td>
<td>6:12 am</td>
<td>5:42 pm</td>
<td>5:37 am</td>
<td>6:17 pm</td>
</tr>
<tr class="day" rel="2022-11-25">
<th><span class="number">Fri, Nov 25</span></th>
<td>6:50:34 am</td>
<td><span class="sunrise" title="Sunrise time 2022-11-25 in Portland, Multnomah County, Oregon, USA">7:21:50 am</span></td>
<td><span title="Sunset time 2022-11-25 in Portland, Multnomah County, Oregon, USA" class="sunset">4:33:42 pm</span></td>
<td>5:04:57 pm</td>
<td>09:11:52</td>
<td>11:57:46 am</td>
<td>6:13 am</td>
<td>5:41 pm</td>
<td>5:38 am</td>
<td>6:17 pm</td>
</tr>
<tr class="day" rel="2022-11-26">
<th><span class="number">Sat, Nov 26</span></th>
<td>6:51:45 am</td>
<td><span class="sunrise" title="Sunrise time 2022-11-26 in Portland, Multnomah County, Oregon, USA">7:23:05 am</span></td>
<td><span title="Sunset time 2022-11-26 in Portland, Multnomah County, Oregon, USA" class="sunset">4:33:05 pm</span></td>
<td>5:04:25 pm</td>
<td>09:10:00</td>
<td>11:58:05 am</td>
<td>6:15 am</td>
<td>5:41 pm</td>
<td>5:39 am</td>
<td>6:16 pm</td>
</tr>
<tr class="day" rel="2022-11-27">
<th><span class="number">Sun, Nov 27</span></th>
<td>6:52:54 am</td>
<td><span class="sunrise" title="Sunrise time 2022-11-27 in Portland, Multnomah County, Oregon, USA">7:24:19 am</span></td>
<td><span title="Sunset time 2022-11-27 in Portland, Multnomah County, Oregon, USA" class="sunset">4:32:30 pm</span></td>
<td>5:03:55 pm</td>
<td>09:08:11</td>
<td>11:58:24 am</td>
<td>6:16 am</td>
<td>5:40 pm</td>
<td>5:40 am</td>
<td>6:16 pm</td>
</tr>
<tr class="day" rel="2022-11-28">
<th><span class="number">Mon, Nov 28</span></th>
<td>6:54:03 am</td>
<td><span class="sunrise" title="Sunrise time 2022-11-28 in Portland, Multnomah County, Oregon, USA">7:25:31 am</span></td>
<td><span title="Sunset time 2022-11-28 in Portland, Multnomah County, Oregon, USA" class="sunset">4:31:58 pm</span></td>
<td>5:03:27 pm</td>
<td>09:06:27</td>
<td>11:58:45 am</td>
<td>6:17 am</td>
<td>5:40 pm</td>
<td>5:41 am</td>
<td>6:15 pm</td>
</tr>
<tr class="day" rel="2022-11-29">
<th><span class="number">Tue, Nov 29</span></th>
<td>6:55:10 am</td>
<td><span class="sunrise" title="Sunrise time 2022-11-29 in Portland, Multnomah County, Oregon, USA">7:26:43 am</span></td>
<td><span title="Sunset time 2022-11-29 in Portland, Multnomah County, Oregon, USA" class="sunset">4:31:28 pm</span></td>
<td>5:03:02 pm</td>
<td>09:04:45</td>
<td>11:59:06 am</td>
<td>6:18 am</td>
<td>5:39 pm</td>
<td>5:42 am</td>
<td>6:15 pm</td>
</tr>
<tr class="day" rel="2022-11-30">
<th><span class="number">Wed, Nov 30</span></th>
<td>6:56:16 am</td>
<td><span class="sunrise" title="Sunrise time 2022-11-30 in Portland, Multnomah County, Oregon, USA">7:27:54 am</span></td>
<td><span title="Sunset time 2022-11-30 in Portland, Multnomah County, Oregon, USA" class="sunset">4:31:01 pm</span></td>
<td>5:02:39 pm</td>
<td>09:03:07</td>
<td>11:59:27 am</td>
<td>6:19 am</td>
<td>5:39 pm</td>
<td>5:43 am</td>
<td>6:15 pm</td>
</tr>
</tbody></table>
5vf7fwbs

5vf7fwbs1#

模式$("some id").each没有多大意义。标识符在一个有效的HTML文档中应该是唯一的,所以它说“选择保证最多是一个元素的东西,然后迭代这个元素”。当然,一些页面滥用标识符作为类,所以这在罕见的无效HTML情况下是很有用的,但这并不适用于这里。
相反,请尝试选择<tr>行,如使用cheerio从表中抓取所有行和使用cheerio在节点中解析HTML中的表中所述,然后循环(或Map)这些行:

const axios = require("axios");
const cheerio = require("cheerio"); // 1.0.0-rc.12

const url = "https://sunrise-sunset.org/search?location=portland%20oregon&year=2022&month=11#calendar";

axios.get(url).then(({data: html}) => {
  const $ = cheerio.load(html);
  const rows = [...$("#month tr.day")].map(e => ({
    day: $(e).find(".number").text().trim(),
    time: $(e).find(".sunset").text().trim(),
  }));
  console.log(rows);
});

相关问题