我有一个简单的scala-seq,我把它转换成Dataframe进行测试有没有类似的方法把scala-seq转换成dstream?
我需要一个从scala seq生成的简单流来进行简单的测试
val spark = SparkSession
.builder()
.master("local")
.getOrCreate()
import spark.implicits._
val df1 = Seq(
("1", "sri", "2020-01-01"),
("1", "hari", "2020-01-02")
).toDF("restaurant_id", "customer_name", "visit_date")
val df2 = Seq(
("1", "sri", "2020-01-01")
).toDF("restaurant_id", "customer_name", "visit_date")
df1.registerTempTable("tabl1")
df2.registerTempTable("tabl2")
spark
.sql(
"select a.*,b.* from tabl1 as a left join tabl2 as b " +
"on a.restaurant_id=b.restaurant_id and a.visit_date=b.visit_date")
.show()
spark
.sql(
"select a.*,b.* from tabl1 as a left join tabl2 as b " +
"on a.restaurant_id=b.restaurant_id and a.visit_date=b.visit_date where b.visit_date is null")
.show()
Dataframeoutput:-
+-------------+-------------+----------+-------------+-------------+----------+
|restaurant_id|customer_name|visit_date|restaurant_id|customer_name|visit_date|
+-------------+-------------+----------+-------------+-------------+----------+
| 1| sri|2020-01-01| 1| sri|2020-01-01|
| 1| hari|2020-01-02| null| null| null|
+-------------+-------------+----------+-------------+-------------+----------+
+-------------+-------------+----------+-------------+-------------+----------+
|restaurant_id|customer_name|visit_date|restaurant_id|customer_name|visit_date|
+-------------+-------------+----------+-------------+-------------+----------+
| 1| hari|2020-01-02| null| null| null|
+-------------+-------------+----------+-------------+-------------+----------+
谢谢,斯里
暂无答案!
目前还没有任何答案,快来回答吧!