from pyspark import SparkContext
from pyspark.sql import SparkSession
from pyspark.sql.types import *
import os
os.environ['PYSPARK_SUBMIT_ARGS'] = '--packages com.databricks:spark-xml_2.10:0.4.1 pyspark-shell'
conf = SparkConf().setAppName('Stackoverflow')
sc = SparkContext(master="local", appName="test")
sc.setLogLevel("Error")
spark = SparkSession.builder.getOrCreate()
df=spark.read.format("com.databricks.spark.xml").option("rowTag","Transaction").load("C:/Users/Rajaraman/Desktop/task/data/transactions.xml")
无法回溯(最近一次调用last):conf=sparkconf()中的第6行文件“c:/users/rajaraman/test.py”。setappname('stackoverflow')name错误:未定义名称“sparkconf”
敬请检阅
1条答案
按热度按时间vqlkdk9b1#
您需要导入代码中引用的库
添加此行以导入引用的包