如何在java程序中连接到hadoop。下面是一些细节:我以html形式从用户那里获取输入,使用jsp处理表单数据。我想连接到hadoop以获取一些基于表单输入的数据。在这种情况下,如何使用java连接到hadoop?
azpvetkf1#
此代码使用cloudera quickstart docker映像。它将文件从本地文件系统推送到hdfs。它需要作为jar文件导出并在命令行上运行。示例:java-jar connect\u hdfs.jar/local\u file.txt push/hdfs\u dir\u location/
import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import java.io.IOException; import java.net.URISyntaxException; public class Main { private static final String NAME_NODE = "hdfs://quickstart.cloudera:8020"; public static void main(String[] args) throws URISyntaxException, IOException { if (args.length != 3){ throw new IllegalArgumentException ("Must include inputs: source file location, action " + "(push or pull), and target file location"); } String sourceLocation = args[0]; String action = args[1]; String targetLocation = args[2]; Configuration configuration = new Configuration(); configuration.set("fs.defaultFS", NAME_NODE); configuration.set("fs.hdfs.impl", org.apache.hadoop.hdfs.DistributedFileSystem.class.getName() ); configuration.set("fs.file.impl", org.apache.hadoop.fs.LocalFileSystem.class.getName() ); FileSystem hdfsFileSystem = FileSystem.get(configuration); if (action.equals("push")) { hdfsFileSystem.copyFromLocalFile(new Path(sourceLocation), new Path(targetLocation)); } else if (action.equals("pull")) { hdfsFileSystem.copyToLocalFile(false, new Path(sourceLocation), new Path(targetLocation), true); } } }
pom.xml文件
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd"> <modelVersion>4.0.0</modelVersion> <groupId>connect_hdfs</groupId> <artifactId>connect_hdfs</artifactId> <version>0.0.1-SNAPSHOT</version> <packaging>jar</packaging> <dependencies> <dependency> <groupId>org.apache.hadoop</groupId> <artifactId>hadoop-common</artifactId> <version>2.7.0</version> <scope>provided</scope> </dependency> <dependency> <groupId>org.apache.hadoop</groupId> <artifactId>hadoop-hdfs</artifactId> <version>2.7.0</version> </dependency> <dependency> <groupId>commons-io</groupId> <artifactId>commons-io</artifactId> <version>2.6</version> </dependency> <dependency> <groupId>jdk.tools</groupId> <artifactId>jdk.tools</artifactId> <version>jdk1.7.0_67</version> <scope>system</scope> <systemPath>C:/Program Files/Java/jdk1.7.0_67/lib/tools.jar</systemPath> </dependency> </dependencies> <build> <sourceDirectory>src</sourceDirectory> <plugins> <plugin> <artifactId>maven-compiler-plugin</artifactId> <version>3.5.1</version> <configuration> <source>1.7</source> <target>1.7</target> </configuration> </plugin> </plugins> </build> </project>
bmp9r5qi2#
取决于你对hadoop的理解。hadoop可以以多种方式存储数据,它可以只是一个文件 hdfs (hadoop分布式文件系统)或者它可以是 Hive 或者 Hbase . 从hdfs读取文件有一个最简单的代码:
hdfs
Hive
Hbase
import org.apache.commons.io.IOUtils; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import java.io.IOException; import java.net.URI; import java.net.URISyntaxException; public class HdfsFileReader { private static final String NAME_NODE = "hdfs://nameNomeHost:8020";//nameNomeHost = localhost if you use hadoop in local mode public static void main(String[] args) throws URISyntaxException, IOException { String fileInHdfs = args[0]; FileSystem fs = FileSystem.get(new URI(NAME_NODE), new Configuration()); String fileContent = IOUtils.toString(fs.open(new Path(fileInHdfs)), "UTF-8"); System.out.println("File content - " + fileContent); } }
您需要的maven依赖项:
<dependency> <groupId>commons-io</groupId> <artifactId>commons-io</artifactId> <version>2.4</version> </dependency> <dependency> <groupId>org.apache.hadoop</groupId> <artifactId>hadoop-common</artifactId> <version>2.6.0</version> </dependency>
2条答案
按热度按时间azpvetkf1#
此代码使用cloudera quickstart docker映像。它将文件从本地文件系统推送到hdfs。它需要作为jar文件导出并在命令行上运行。
示例:java-jar connect\u hdfs.jar/local\u file.txt push/hdfs\u dir\u location/
pom.xml文件
bmp9r5qi2#
取决于你对hadoop的理解。hadoop可以以多种方式存储数据,它可以只是一个文件
hdfs
(hadoop分布式文件系统)或者它可以是Hive
或者Hbase
. 从hdfs读取文件有一个最简单的代码:您需要的maven依赖项: