>>> from pyspark.sql.functions import *
>>> df.show()
+-----+---------+---+----+
|fname| lname|age|dept|
+-----+---------+---+----+
| Jack| Felice | 25| IT|
| Mike| Gilbert | 30| CS|
| John| Shen| 45| DR|
+-----+---------+---+----+
>>> df1 = df.withColumn("sentence", concat( col("fname"), lit(" "), col("lname"), lit("is "), col("age"), lit(" year's old and he works in a "), col("dept"), lit(" department."))).select("sentence")
>>> df1.show(10,False)
+---------------------------------------------------------------+
|sentence |
+---------------------------------------------------------------+
|Jack Felice is 25 year's old and he works in a IT department. |
|Mike Gilbert is 30 year's old and he works in a CS department.|
|John Shenis 45 year's old and he works in a DR department. |
+---------------------------------------------------------------+
>>> df1.write.format("csv").option("header", "true").save("/out/")
1条答案
按热度按时间bfrts1fy1#
您可以添加名为“”的新列
Sentence
“如下所示,并使用concat
功能。我也写df到一个文件,如果你想它到csv文件。csv输出