我正在aws上运行spark程序。它简单地读取csv文件并用dataframe.show()打印它。在过去的15-20分钟里,我一直在等待这个步骤的执行,但是没有任何进展。s3 bucket中的csv文件非常小,只有10行2列。这是我的程序:
import org.apache.spark.SparkConf
import org.apache.spark.SparkContext
import org.apache.log4j.LogManager
import org.apache.log4j.Level
import org.apache.spark.sql.SparkSession
import org.apache.spark.sql._
import org.apache.spark.sql.types._
object TriangleCountMain {
//Edge object
case class Edge(from: Int, to: Int)
def main(args: Array[String]) {
val logger: org.apache.log4j.Logger = LogManager.getRootLogger
if (args.length != 2) {
logger.error("Usage:\nTwitterDataSet_Spark.TriangleCountMain <input dir> <output dir>")
System.exit(1)
}
//Spark Session
val spark = SparkSession
.builder()
.appName("Spark SQL basic example")
.getOrCreate()
import spark.implicits._
//Dataframe structure
val dfSchema = StructType(Array(
StructField("from", IntegerType, true),
StructField("to", IntegerType, true)))
//Data set of edges
val nonFilteredEdge: Dataset[Edge] = spark.read
.option("header", "false")
.option("inferSchema", "true")
.schema(dfSchema)
.csv(args(0))
.as[Edge]
val edge = nonFilteredEdge
edge.show
spark.stop
}
}
此程序在本地计算机上成功运行。谢谢您。
暂无答案!
目前还没有任何答案,快来回答吧!