hdfs文件行计数

a7qyws3x  于 2021-06-02  发布在  Hadoop
关注(0)|答案(1)|浏览(296)

有没有一种方法可以像我们在命令提示符下执行命令那样计算java中hdfs目录的行数?

hadoop fs -cat  /abc/def/* | wc -l

尤其是使用hadoopapi而不是编写map-reduce或spark代码。

5cg8jx4n

5cg8jx4n1#

这样的方法应该有用:-

import java.io.BufferedReader;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.InputStreamReader;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FSDataInputStream;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;

public class LineCounter {

    public static void main(String[] args) throws IOException {
        // TODO Auto-generated method stub

        Configuration conf = new Configuration();
        conf.addResource(new FileInputStream("hdfs-site.xml"));
        conf.addResource(new FileInputStream("core-site.xml"));

        conf.set("fs.hdfs.impl", org.apache.hadoop.hdfs.DistributedFileSystem.class.getName());
        conf.set("fs.file.impl",org.apache.hadoop.fs.LocalFileSystem.class.getName());

        FileSystem fs = FileSystem.get(conf);
        Path pt = new Path("/some/path");

        FileStatus[] status = fs.listStatus(pt);

        int count = 0;

        for(FileStatus f : status){
            if(f.isFile()){
                 FSDataInputStream inputStream = fs.open(f.getPath());
                 BufferedReader reader = new BufferedReader(new InputStreamReader(inputStream));

                 String line = reader.readLine();

                 while(line!=null){
                     count++;
                     line = reader.readLine();
                 }

                 if(reader!=null){
                     reader.close();
                 }
            }
        }

    }

}

相关问题