RSelenium在单击下一步按钮后继续加载网页

1rhkuytd  于 2023-04-18  发布在  其他
关注(0)|答案(1)|浏览(133)

我是一个网页抓取新手,想从https://www.forwardpathway.com/us-college-database中抓取数据。我使用了以下代码从表中提取数据,但在单击“下一步”按钮后,页面仍在加载。有人能指出问题所在吗?

library(RSelenium)
library(tidyverse)
library(netstat)
library(xml2)
library(data.table)
library(rvest)

binman::list_versions("chromedriver")
rs_driver_object<-rsDriver(browser="chrome",
                          chromever="107.0.5304.62",
                          verbose=F,
                          port=free_port())

## create the client 
remDr<-rs_driver_object$client

## open the brower
remDr$open()

remDr$navigate("https://www.forwardpathway.com/us-college-database")

## locate the table that stores the data
data_table<-remDr$findElement(using = "id","table_1")

#And I tried three different methods to click the next button, but the problem persisted. 

## next button method 1
next_button<-remDr$findElement(using = "id",'table_1_next')
next_button$clickElement()

## next button method 2
remDr$executeScript("document.getElementById('table_1_next').click()")

## next button method 3
next_button <- remDr$findElement("id", "table_1_next")
next_button$sendKeysToElement(list(key="enter"))

all_data<-list()
cond<-TRUE

while(cond == TRUE){
  data_table_html<-data_table$getPageSource()
  page<-read_html(data_table_html %>% unlist())
  df<-html_table(page) %>% .[[1]]
  all_data<-rbindlist((list(all_data,df)))
  
  Sys.sleep(5)
  
  tryCatch(
    {next_button <- remDr$findElement("id", "table_1_next")
    next_button$sendKeysToElement(list(key="enter"))
    },
    error=function(e){
      print("script complete")
      cond<<-FALSE
    }
  )
  
  if (cond ==FALSE){
    break
  }
  
}
gab6jxml

gab6jxml1#

我已经能够用下面的代码点击下一个按钮并更改页面。

library(RSelenium)

shell('docker run -d -p 4446:4444 selenium/standalone-firefox')
remDr <- remoteDriver(remoteServerAddr = "localhost", port = 4446L, browserName = "firefox")
remDr$open()
remDr$navigate("https://www.forwardpathway.com/us-college-database")

for(i in 1 : 200)
{
  print(i)
  java_Script <- paste0("scroll(0,", i * 20, ")")
  remDr$executeScript(java_Script)
}

remDr$screenshot(TRUE)

web_Obj_Next <- remDr$findElement('xpath', '//*[@id="table_1_next"]')
web_Obj_Next$clickElement()

remDr$screenshot(TRUE)

for(i in 1 : 200)
{
  print(i)
  java_Script <- paste0("scroll(0,", i * 20, ")")
  remDr$executeScript(java_Script)
}

remDr$screenshot(TRUE)

相关问题