java 从Web元素列表中获取文本的速度很慢

mgdq6dx1  于 2023-02-14  发布在  Java
关注(0)|答案(1)|浏览(98)

我正在从一个web表中抓取数据,我有一些代码来读取列表中每个web元素(行)的文本,然后将此文本添加到另一个列表(列)中,并发送到一个方法以写入excel。读取web元素(大约200行)和将数据写入新列表的过程非常缓慢。是否有更快的方法?或者这是意料之中的?
下面是我的代码:

package mypackage;

import java.io.IOException;
import java.time.Duration;
import java.util.ArrayList;
import java.util.List;

import org.openqa.selenium.By;
import org.openqa.selenium.WebDriver;
import org.openqa.selenium.WebElement;
import org.openqa.selenium.chrome.ChromeDriver;

import com.seleniumpractice.utilities.XLUtils;

import io.github.bonigarcia.wdm.WebDriverManager;

public class CovidWebTable {
    static WebDriver driver;
    static XLUtils xl;
    static List<WebElement> header;
    static List<WebElement> rows;
    static List<ArrayList<String>>rowsXL;

    public static void main(String[] args) throws IOException {
        WebDriverManager.chromedriver().setup();
        driver = new ChromeDriver();
        driver.get("https://www.worldometers.info/coronavirus");
        driver.manage().window().maximize();
        driver.manage().timeouts().implicitlyWait(Duration.ofSeconds(10));
        
        WebElement table = driver.findElement(By.xpath("//table[@id='main_table_countries_today']"));
        rows = table.findElements(By.xpath(".//tr[@role='row']"));
        System.out.println("Total rows: "+rows.size());
        
        xl = new XLUtils(".\\datafiles\\covid.xls");
        //xl.setCellData(null, rows, rows, null);
        
        rowsXL = new ArrayList<ArrayList<String>>();
        
        //Add header
        header = table.findElements(By.xpath(".//thead//th"));
        System.out.println("Header cols: "+ header.size());
        
        ArrayList<String> headerXL = new ArrayList<String>();
        
        for(int col=1; col<header.size()-1; col++) {
            //xl.setCellData("Covid Data", 0, col-1, header.get(col).getText());
            headerXL.add(header.get(col).getText());
        }
        
        rowsXL.add(headerXL);
        
        int xlRow = 1;
        int skippedRows = 0;
                
        for(int r=1; r<rows.size(); r++) {
            
            String a = rows.get(r).getText();
            
            //skip empty rows
            if(rows.get(r).getText().equals("")) {
                skippedRows++;
                continue;
            }
            System.out.println("Reading row "+r);   
            
        ArrayList<String> cols = new ArrayList<String>();
            
            for(int c=1; c<header.size(); c++) {
                String data = rows.get(r).findElement(By.xpath(".//td["+(c+1)+"]")).getText();
                //xl.setCellData("Covid Data", xlRow, c-1, rows.get(r).findElement(By.xpath(".//td["+(c+1)+"]")).getText());
                cols.add(data);
                
            }
            rowsXL.add(cols);
            xlRow++;
            
        }
        xl.setCellDataFromList(rowsXL, "Orders");
        System.out.println("Scraped Rows: "+ rowsXL.size());
        System.out.println("Skipped Rows: "+skippedRows);
        System.out.println("Complete.");
        
        driver.close();
        
        
        
        

        }

}

相关问题