我正在尝试从hbase获取数据,因为我发现为了获得hbase的数据,我必须通过Kafka,有没有可能直接将spark流媒体和hbase集成,而不将Kafka包括在链中。
ufj5ltwl1#
spark streaming和hbase之间是否可以直接集成而不包括kafka对。。这是可能的,因为我们没有使用Kafka也做了同样的事情。请参见下面的示例javahbasestreamingbulkputexample
package org.apache.hadoop.hbase.spark.example.hbasecontext; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hbase.HBaseConfiguration; import org.apache.hadoop.hbase.TableName; import org.apache.hadoop.hbase.client.Put; import org.apache.hadoop.hbase.spark.JavaHBaseContext; import org.apache.hadoop.hbase.util.Bytes; import org.apache.spark.SparkConf; import org.apache.spark.api.java.JavaSparkContext; import org.apache.spark.api.java.function.Function; import org.apache.spark.streaming.Duration; import org.apache.spark.streaming.api.java.JavaReceiverInputDStream; import org.apache.spark.streaming.api.java.JavaStreamingContext; /** * This is a simple example of BulkPut with Spark Streaming */ final public class JavaHBaseStreamingBulkPutExample { private JavaHBaseStreamingBulkPutExample() {} public static void main(String[] args) { if (args.length < 4) { System.out.println("JavaHBaseBulkPutExample " + "{host} {port} {tableName}"); return; } String host = args[0]; String port = args[1]; String tableName = args[2]; SparkConf sparkConf = new SparkConf().setAppName("JavaHBaseStreamingBulkPutExample " + tableName + ":" + port + ":" + tableName); JavaSparkContext jsc = new JavaSparkContext(sparkConf); try { JavaStreamingContext jssc = new JavaStreamingContext(jsc, new Duration(1000)); JavaReceiverInputDStream<String> javaDstream = jssc.socketTextStream(host, Integer.parseInt(port)); Configuration conf = HBaseConfiguration.create(); JavaHBaseContext hbaseContext = new JavaHBaseContext(jsc, conf); hbaseContext.streamBulkPut(javaDstream, TableName.valueOf(tableName), new PutFunction()); } finally { jsc.stop(); } } public static class PutFunction implements Function<String, Put> { private static final long serialVersionUID = 1L; public Put call(String v) throws Exception { String[] part = v.split(","); Put put = new Put(Bytes.toBytes(part[0])); put.addColumn(Bytes.toBytes(part[1]), Bytes.toBytes(part[2]), Bytes.toBytes(part[3])); return put; } } }
1条答案
按热度按时间ufj5ltwl1#
spark streaming和hbase之间是否可以直接集成而不包括kafka
对。。这是可能的,因为我们没有使用Kafka也做了同样的事情。请参见下面的示例javahbasestreamingbulkputexample