我有一个spark作业,它将数据保存到hdfs,然后将相同的数据保存到hive表。当我在jupyter上运行它时,它成功运行了。但是当我通过oozie运行它时,当它到达将数据写入hive的步骤时,会引发以下异常。下面是我的代码,后跟异常和workflow.xml:
# coding: utf-8
# In[10]:
import os
JARS_HOME = "hdfs:///dataengineering/jars"
os.environ['PYSPARK_SUBMIT_ARGS'] = '--jars '+JARS_HOME+'/ojdbc6.jar,'+JARS_HOME+'/anonymize_udfs.jar pyspark-shell'
os.environ["HADOOP_CONF_DIR"] = '/etc/hadoop/conf'
# In[11]:
try:
from pyspark import SparkContext, SQLContext
from pyspark.sql import SparkSession
except:
import findspark
findspark.init('/opt/cloudera/parcels/CDH-6.1.1-1.cdh6.1.1.p0.875250/lib/spark')
from pyspark import SparkContext, SQLContext
from pyspark.sql import SparkSession
import sys
import pyspark.sql.functions as functions
from datetime import date
from dateutil.relativedelta import relativedelta
from datetime import datetime
from datetime import timedelta
from pyspark.sql.types import StringType
from pyspark.sql.functions import *
from pyspark.sql import functions as sf
from pyspark.sql.types import StringType
spark = SparkSession.builder .master("yarn") .appName("oozie_sample_spark") .config('spark.executor.cores','3') .config('spark.executor.memory','15g') .config('spark.driver.memory','5g') .config('spark.driver.maxResultSize','12g') .config("spark.dynamicAllocation.enabled", "true") .config("spark.shuffle.service.enabled", "true") .config("spark.executor.instances", "4") .config("spark.yarn.queue", "root.STREAMING") .config("spark.dynamicAllocation.cachedExecutorIdleTimeout", "300s") .config("hive.metastore.uris", "thrift://dchqmaster01.internal.eg.vodafone.com:9083") .getOrCreate()
# In[13]:
spark.sql("select current_timestamp() column_a").write.csv("/user/akhamis11/oozie-samples/spark-sample/current_column.csv", mode='append')
spark.sql("select current_timestamp() column_a").write.saveAsTable("bde.oozie_test", mode='append')
spark.stop()
2020-04-13 07:27:21,077 [dispatcher-event-loop-1] INFO org.apache.spark.deploy.yarn.YarnAllocator - Driver requested a total number of 0 executor(s).
2020-04-13 07:27:21,081 [Thread-10] INFO org.apache.spark.sql.execution.datasources.FileFormatWriter - Write Job 316245be-4c54-42d9-bd43-6246d77672b0 committed.
2020-04-13 07:27:21,108 [Thread-10] INFO org.apache.spark.sql.execution.datasources.FileFormatWriter - Finished processing stats for write job 316245be-4c54-42d9-bd43-6246d77672b0.
2020-04-13 07:27:21,191 [Thread-10] INFO com.cloudera.spark.lineage.NavigatorQueryListener - Failed to generate lineage for successful query execution.
java.lang.IllegalArgumentException: Error while instantiating 'org.apache.spark.sql.hive.HiveExternalCatalog':
at org.apache.spark.sql.internal.SharedState$.org$apache$spark$sql$internal$SharedState$$reflect(SharedState.scala:192)
at org.apache.spark.sql.internal.SharedState.externalCatalog$lzycompute(SharedState.scala:103)
at org.apache.spark.sql.internal.SharedState.externalCatalog(SharedState.scala:102)
at org.apache.spark.sql.hive.HiveSessionStateBuilder.org$apache$spark$sql$hive$HiveSessionStateBuilder$$externalCatalog(HiveSessionStateBuilder.scala:39)
at org.apache.spark.sql.hive.HiveSessionStateBuilder$$anonfun$1.apply(HiveSessionStateBuilder.scala:54)
at org.apache.spark.sql.hive.HiveSessionStateBuilder$$anonfun$1.apply(HiveSessionStateBuilder.scala:54)
at org.apache.spark.sql.catalyst.catalog.SessionCatalog.externalCatalog$lzycompute(SessionCatalog.scala:90)
at org.apache.spark.sql.catalyst.catalog.SessionCatalog.externalCatalog(SessionCatalog.scala:90)
at org.apache.spark.sql.query.analysis.QueryAnalysis$.hiveCatalog(QueryAnalysis.scala:63)
at org.apache.spark.sql.query.analysis.QueryAnalysis$.getLineageInfo(QueryAnalysis.scala:88)
at com.cloudera.spark.lineage.NavigatorQueryListener.onSuccess(ClouderaNavigatorListener.scala:60)
at org.apache.spark.sql.util.ExecutionListenerManager$$anonfun$onSuccess$1$$anonfun$apply$mcV$sp$1.apply(QueryExecutionListener.scala:124)
at org.apache.spark.sql.util.ExecutionListenerManager$$anonfun$onSuccess$1$$anonfun$apply$mcV$sp$1.apply(QueryExecutionListener.scala:123)
at org.apache.spark.sql.util.ExecutionListenerManager$$anonfun$org$apache$spark$sql$util$ExecutionListenerManager$$withErrorHandling$1.apply(QueryExecutionListener.scala:145)
at org.apache.spark.sql.util.ExecutionListenerManager$$anonfun$org$apache$spark$sql$util$ExecutionListenerManager$$withErrorHandling$1.apply(QueryExecutionListener.scala:143)
at scala.collection.immutable.List.foreach(List.scala:392)
at scala.collection.generic.TraversableForwarder$class.foreach(TraversableForwarder.scala:35)
at scala.collection.mutable.ListBuffer.foreach(ListBuffer.scala:45)
at org.apache.spark.sql.util.ExecutionListenerManager.org$apache$spark$sql$util$ExecutionListenerManager$$withErrorHandling(QueryExecutionListener.scala:143)
at org.apache.spark.sql.util.ExecutionListenerManager$$anonfun$onSuccess$1.apply$mcV$sp(QueryExecutionListener.scala:123)
at org.apache.spark.sql.util.ExecutionListenerManager$$anonfun$onSuccess$1.apply(QueryExecutionListener.scala:123)
at org.apache.spark.sql.util.ExecutionListenerManager$$anonfun$onSuccess$1.apply(QueryExecutionListener.scala:123)
at org.apache.spark.sql.util.ExecutionListenerManager.readLock(QueryExecutionListener.scala:156)
at org.apache.spark.sql.util.ExecutionListenerManager.onSuccess(QueryExecutionListener.scala:122)
at org.apache.spark.sql.DataFrameWriter.runCommand(DataFrameWriter.scala:670)
at org.apache.spark.sql.DataFrameWriter.saveToV1Source(DataFrameWriter.scala:276)
at org.apache.spark.sql.DataFrameWriter.save(DataFrameWriter.scala:270)
at org.apache.spark.sql.DataFrameWriter.save(DataFrameWriter.scala:228)
at org.apache.spark.sql.DataFrameWriter.csv(DataFrameWriter.scala:656)
at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
at java.lang.reflect.Method.invoke(Method.java:498)
at py4j.reflection.MethodInvoker.invoke(MethodInvoker.java:244)
at py4j.reflection.ReflectionEngine.invoke(ReflectionEngine.java:357)
at py4j.Gateway.invoke(Gateway.java:282)
at py4j.commands.AbstractCommand.invokeMethod(AbstractCommand.java:132)
at py4j.commands.CallCommand.execute(CallCommand.java:79)
at py4j.GatewayConnection.run(GatewayConnection.java:238)
at java.lang.Thread.run(Thread.java:748)
Caused by: java.lang.reflect.InvocationTargetException
at sun.reflect.NativeConstructorAccessorImpl.newInstance0(Native Method)
at sun.reflect.NativeConstructorAccessorImpl.newInstance(NativeConstructorAccessorImpl.java:62)
at sun.reflect.DelegatingConstructorAccessorImpl.newInstance(DelegatingConstructorAccessorImpl.java:45)
at java.lang.reflect.Constructor.newInstance(Constructor.java:423)
at org.apache.spark.sql.internal.SharedState$.org$apache$spark$sql$internal$SharedState$$reflect(SharedState.scala:189)
... 39 more
Caused by: java.lang.NoClassDefFoundError: org/apache/hadoop/hive/ql/metadata/HiveException
at org.apache.spark.sql.hive.HiveExternalCatalog.<init>(HiveExternalCatalog.scala:73)
... 44 more
Caused by: java.lang.ClassNotFoundException: org.apache.hadoop.hive.ql.metadata.HiveException
at java.net.URLClassLoader.findClass(URLClassLoader.java:382)
at java.lang.ClassLoader.loadClass(ClassLoader.java:424)
at sun.misc.Launcher$AppClassLoader.loadClass(Launcher.java:349)
at java.lang.ClassLoader.loadClass(ClassLoader.java:357)
... 45 more
Traceback (most recent call last):
File "/disk10/yarn/nm/usercache/akhamis11/appcache/application_1586733850175_0103/container_1586733850175_0103_02_000001/pyspark.zip/pyspark/sql/utils.py", line 63, in deco
File "/disk10/yarn/nm/usercache/akhamis11/appcache/application_1586733850175_0103/container_1586733850175_0103_02_000001/py4j-0.10.7-src.zip/py4j/protocol.py", line 328, in get_return_value
py4j.protocol.Py4JJavaError: An error occurred while calling o148.saveAsTable.
: java.lang.IllegalArgumentException: Error while instantiating 'org.apache.spark.sql.hive.HiveExternalCatalog':
at org.apache.spark.sql.internal.SharedState$.org$apache$spark$sql$internal$SharedState$$reflect(SharedState.scala:192)
at org.apache.spark.sql.internal.SharedState.externalCatalog$lzycompute(SharedState.scala:103)
at org.apache.spark.sql.internal.SharedState.externalCatalog(SharedState.scala:102)
at org.apache.spark.sql.hive.HiveSessionStateBuilder.org$apache$spark$sql$hive$HiveSessionStateBuilder$$externalCatalog(HiveSessionStateBuilder.scala:39)
at org.apache.spark.sql.hive.HiveSessionStateBuilder$$anonfun$1.apply(HiveSessionStateBuilder.scala:54)
at org.apache.spark.sql.hive.HiveSessionStateBuilder$$anonfun$1.apply(HiveSessionStateBuilder.scala:54)
at org.apache.spark.sql.catalyst.catalog.SessionCatalog.externalCatalog$lzycompute(SessionCatalog.scala:90)
at org.apache.spark.sql.catalyst.catalog.SessionCatalog.externalCatalog(SessionCatalog.scala:90)
at org.apache.spark.sql.catalyst.catalog.SessionCatalog.tableExists(SessionCatalog.scala:415)
at org.apache.spark.sql.DataFrameWriter.saveAsTable(DataFrameWriter.scala:405)
at org.apache.spark.sql.DataFrameWriter.saveAsTable(DataFrameWriter.scala:400)
at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
at java.lang.reflect.Method.invoke(Method.java:498)
at py4j.reflection.MethodInvoker.invoke(MethodInvoker.java:244)
at py4j.reflection.ReflectionEngine.invoke(ReflectionEngine.java:357)
at py4j.Gateway.invoke(Gateway.java:282)
at py4j.commands.AbstractCommand.invokeMethod(AbstractCommand.java:132)
at py4j.commands.CallCommand.execute(CallCommand.java:79)
at py4j.GatewayConnection.run(GatewayConnection.java:238)
at java.lang.Thread.run(Thread.java:748)
Caused by: java.lang.reflect.InvocationTargetException
at sun.reflect.NativeConstructorAccessorImpl.newInstance0(Native Method)
at sun.reflect.NativeConstructorAccessorImpl.newInstance(NativeConstructorAccessorImpl.java:62)
at sun.reflect.DelegatingConstructorAccessorImpl.newInstance(DelegatingConstructorAccessorImpl.java:45)
at java.lang.reflect.Constructor.newInstance(Constructor.java:423)
at org.apache.spark.sql.internal.SharedState$.org$apache$spark$sql$internal$SharedState$$reflect(SharedState.scala:189)
... 21 more
Caused by: java.lang.NoClassDefFoundError: org/apache/hadoop/hive/ql/metadata/HiveException
at org.apache.spark.sql.hive.HiveExternalCatalog.<init>(HiveExternalCatalog.scala:73)
... 26 more
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "oozie_sample_spark.py", line 53, in <module>
spark.sql("select current_timestamp() column_a").write.saveAsTable("bde.oozie_test", mode='append')
File "/disk10/yarn/nm/usercache/akhamis11/appcache/application_1586733850175_0103/container_1586733850175_0103_02_000001/pyspark.zip/pyspark/sql/readwriter.py", line 775, in saveAsTable
File "/disk10/yarn/nm/usercache/akhamis11/appcache/application_1586733850175_0103/container_1586733850175_0103_02_000001/py4j-0.10.7-src.zip/py4j/java_gateway.py", line 1257, in __call__
File "/disk10/yarn/nm/usercache/akhamis11/appcache/application_1586733850175_0103/container_1586733850175_0103_02_000001/pyspark.zip/pyspark/sql/utils.py", line 79, in deco
pyspark.sql.utils.IllegalArgumentException: "Error while instantiating 'org.apache.spark.sql.hive.HiveExternalCatalog':"
<action name='spark-node'>
<spark xmlns="uri:oozie:spark-action:1.0">
<resource-manager>${resourceManager}</resource-manager>
<name-node>${nameNode}</name-node>
<configuration>
<property>
<name>oozie.action.sharelib.for.spark</name>
<value>spark2</value>
</property>
<property>
<name>oozie.service.HCatAccessorService.hcat.configuration</name>
<value>/opt/cloudera/parcels/CDH/etc/hive/conf.dist/hive-site.xml</value>
</property>
</configuration>
<master>${master}</master>
<name>oozies_sample</name>
<jar>${nameNode}/user/${wf:user()}/oozie-samples/spark-sample/lib/oozie_sample_spark.py</jar>
</spark>
<ok to="end" />
<error to="fail" />
</action>
<kill name="fail">
<message>Workflow failed, error
message[${wf:errorMessage(wf:lastErrorNode())}]
</message>
</kill>
<end name='end' />
[~]$ hdfs dfs -ls /user/oozie/share/lib/lib_<ts>/spark2
/user/oozie/share/lib/lib_<ts>/spark2/HikariCP-java7-2.4.12.jar
/user/oozie/share/lib/lib_<ts>/spark2/RoaringBitmap-0.5.11.jar
/user/oozie/share/lib/lib_<ts>/spark2/accessors-smart-1.2.jar
/user/oozie/share/lib/lib_<ts>/spark2/activation-1.1.1.jar
/user/oozie/share/lib/lib_<ts>/spark2/annotations-2.0.3.jar
/user/oozie/share/lib/lib_<ts>/spark2/antlr4-runtime-4.7.jar
/user/oozie/share/lib/lib_<ts>/spark2/aopalliance-1.0.jar
/user/oozie/share/lib/lib_<ts>/spark2/aopalliance-repackaged-2.4.0-b34.jar
/user/oozie/share/lib/lib_<ts>/spark2/arpack_combined_all-0.1.jar
/user/oozie/share/lib/lib_<ts>/spark2/arrow-format-0.10.0.jar
/user/oozie/share/lib/lib_<ts>/spark2/arrow-memory-0.10.0.jar
/user/oozie/share/lib/lib_<ts>/spark2/arrow-vector-0.10.0.jar
/user/oozie/share/lib/lib_<ts>/spark2/audience-annotations-0.5.0.jar
/user/oozie/share/lib/lib_<ts>/spark2/avro-ipc.jar
/user/oozie/share/lib/lib_<ts>/spark2/avro-mapred-hadoop2.jar
/user/oozie/share/lib/lib_<ts>/spark2/avro.jar
/user/oozie/share/lib/lib_<ts>/spark2/aws-java-sdk-bundle-1.11.271.jar
/user/oozie/share/lib/lib_<ts>/spark2/azure-keyvault-core-0.8.0.jar
/user/oozie/share/lib/lib_<ts>/spark2/azure-storage-5.4.0.jar
/user/oozie/share/lib/lib_<ts>/spark2/breeze-macros_2.11-0.13.2.jar
/user/oozie/share/lib/lib_<ts>/spark2/breeze_2.11-0.13.2.jar
/user/oozie/share/lib/lib_<ts>/spark2/chill-java-0.9.3.jar
/user/oozie/share/lib/lib_<ts>/spark2/chill_2.11-0.9.3.jar
/user/oozie/share/lib/lib_<ts>/spark2/commons-beanutils-1.9.3.jar
/user/oozie/share/lib/lib_<ts>/spark2/commons-cli-1.4.jar
/user/oozie/share/lib/lib_<ts>/spark2/commons-codec-1.10.jar
/user/oozie/share/lib/lib_<ts>/spark2/commons-collections-3.2.2.jar
/user/oozie/share/lib/lib_<ts>/spark2/commons-compiler-3.0.9.jar
/user/oozie/share/lib/lib_<ts>/spark2/commons-compress-1.4.1.jar
/user/oozie/share/lib/lib_<ts>/spark2/commons-configuration2-2.1.1.jar
/user/oozie/share/lib/lib_<ts>/spark2/commons-crypto-1.0.0.jar
/user/oozie/share/lib/lib_<ts>/spark2/commons-io-2.6.jar
/user/oozie/share/lib/lib_<ts>/spark2/commons-lang-2.6.jar
/user/oozie/share/lib/lib_<ts>/spark2/commons-lang3-3.7.jar
/user/oozie/share/lib/lib_<ts>/spark2/commons-logging-1.2.jar
/user/oozie/share/lib/lib_<ts>/spark2/commons-math3-3.4.1.jar
/user/oozie/share/lib/lib_<ts>/spark2/commons-net-3.1.jar
/user/oozie/share/lib/lib_<ts>/spark2/commons-pool-1.6.jar
/user/oozie/share/lib/lib_<ts>/spark2/compress-lzf-1.0.3.jar
/user/oozie/share/lib/lib_<ts>/spark2/core-1.1.2.jar
/user/oozie/share/lib/lib_<ts>/spark2/curator-client-2.7.1.jar
/user/oozie/share/lib/lib_<ts>/spark2/curator-framework-2.7.1.jar
/user/oozie/share/lib/lib_<ts>/spark2/curator-recipes-2.7.1.jar
/user/oozie/share/lib/lib_<ts>/spark2/datanucleus-core-4.1.6.jar
/user/oozie/share/lib/lib_<ts>/spark2/derby-10.14.1.0.jar
/user/oozie/share/lib/lib_<ts>/spark2/ehcache-3.3.1.jar
/user/oozie/share/lib/lib_<ts>/spark2/flatbuffers-1.2.0-3f79e055.jar
/user/oozie/share/lib/lib_<ts>/spark2/flume-ng-config-filter-api-1.8.0-cdh6.1.1.jar
/user/oozie/share/lib/lib_<ts>/spark2/flume-ng-configuration-1.8.0-cdh6.1.1.jar
/user/oozie/share/lib/lib_<ts>/spark2/flume-ng-core-1.8.0-cdh6.1.1.jar
/user/oozie/share/lib/lib_<ts>/spark2/flume-ng-sdk-1.8.0-cdh6.1.1.jar
/user/oozie/share/lib/lib_<ts>/spark2/geronimo-jcache_1.0_spec-1.0-alpha-1.jar
/user/oozie/share/lib/lib_<ts>/spark2/gson-2.2.4.jar
/user/oozie/share/lib/lib_<ts>/spark2/guava-11.0.2.jar
/user/oozie/share/lib/lib_<ts>/spark2/guice-4.0.jar
/user/oozie/share/lib/lib_<ts>/spark2/guice-servlet-4.0.jar
/user/oozie/share/lib/lib_<ts>/spark2/hadoop-annotations.jar
/user/oozie/share/lib/lib_<ts>/spark2/hadoop-auth.jar
/user/oozie/share/lib/lib_<ts>/spark2/hadoop-aws.jar
/user/oozie/share/lib/lib_<ts>/spark2/hadoop-azure-3.0.0-cdh6.1.1.jar
/user/oozie/share/lib/lib_<ts>/spark2/hadoop-client-3.0.0-cdh6.1.1.jar
/user/oozie/share/lib/lib_<ts>/spark2/hadoop-common.jar
/user/oozie/share/lib/lib_<ts>/spark2/hadoop-hdfs-client.jar
/user/oozie/share/lib/lib_<ts>/spark2/hadoop-mapreduce-client-common.jar
/user/oozie/share/lib/lib_<ts>/spark2/hadoop-mapreduce-client-core.jar
/user/oozie/share/lib/lib_<ts>/spark2/hadoop-mapreduce-client-jobclient.jar
/user/oozie/share/lib/lib_<ts>/spark2/hadoop-openstack-3.0.0-cdh6.1.1.jar
/user/oozie/share/lib/lib_<ts>/spark2/hadoop-yarn-api.jar
/user/oozie/share/lib/lib_<ts>/spark2/hadoop-yarn-client.jar
/user/oozie/share/lib/lib_<ts>/spark2/hadoop-yarn-common.jar
/user/oozie/share/lib/lib_<ts>/spark2/hadoop-yarn-server-common.jar
/user/oozie/share/lib/lib_<ts>/spark2/hadoop-yarn-server-web-proxy.jar
/user/oozie/share/lib/lib_<ts>/spark2/hive-common.jar
/user/oozie/share/lib/lib_<ts>/spark2/hive-exec.jar
/user/oozie/share/lib/lib_<ts>/spark2/hive-hcatalog-core.jar
/user/oozie/share/lib/lib_<ts>/spark2/hive-hcatalog-pig-adapter.jar
/user/oozie/share/lib/lib_<ts>/spark2/hive-metastore.jar
/user/oozie/share/lib/lib_<ts>/spark2/hive-serde.jar
/user/oozie/share/lib/lib_<ts>/spark2/hive-site.xml
/user/oozie/share/lib/lib_<ts>/spark2/hive-webhcat-java-client.jar
/user/oozie/share/lib/lib_<ts>/spark2/hk2-api-2.4.0-b34.jar
/user/oozie/share/lib/lib_<ts>/spark2/hk2-locator-2.4.0-b34.jar
/user/oozie/share/lib/lib_<ts>/spark2/hk2-utils-2.4.0-b34.jar
/user/oozie/share/lib/lib_<ts>/spark2/hppc-0.7.2.jar
/user/oozie/share/lib/lib_<ts>/spark2/htrace-core4-4.1.0-incubating.jar
/user/oozie/share/lib/lib_<ts>/spark2/httpclient-4.5.3.jar
/user/oozie/share/lib/lib_<ts>/spark2/httpcore-4.4.6.jar
/user/oozie/share/lib/lib_<ts>/spark2/ivy-2.4.0.jar
/user/oozie/share/lib/lib_<ts>/spark2/jackson-annotations-2.9.5.jar
/user/oozie/share/lib/lib_<ts>/spark2/jackson-core-2.9.5.jar
/user/oozie/share/lib/lib_<ts>/spark2/jackson-core-asl-1.9.13.jar
/user/oozie/share/lib/lib_<ts>/spark2/jackson-databind-2.9.5.jar
/user/oozie/share/lib/lib_<ts>/spark2/jackson-dataformat-cbor-2.9.5.jar
/user/oozie/share/lib/lib_<ts>/spark2/jackson-jaxrs-base-2.9.5.jar
/user/oozie/share/lib/lib_<ts>/spark2/jackson-jaxrs-json-provider-2.9.5.jar
/user/oozie/share/lib/lib_<ts>/spark2/jackson-mapper-asl-1.9.13-cloudera.1.jar
/user/oozie/share/lib/lib_<ts>/spark2/jackson-module-jaxb-annotations-2.9.5.jar
/user/oozie/share/lib/lib_<ts>/spark2/jackson-module-paranamer-2.9.5.jar
/user/oozie/share/lib/lib_<ts>/spark2/jackson-module-scala_2.11-2.9.5.jar
/user/oozie/share/lib/lib_<ts>/spark2/janino-3.0.9.jar
/user/oozie/share/lib/lib_<ts>/spark2/javassist-3.18.1-GA.jar
/user/oozie/share/lib/lib_<ts>/spark2/javax.annotation-api-1.2.jar
/user/oozie/share/lib/lib_<ts>/spark2/javax.inject-1.jar
/user/oozie/share/lib/lib_<ts>/spark2/javax.inject-2.4.0-b34.jar
/user/oozie/share/lib/lib_<ts>/spark2/javax.servlet-api-3.1.0.jar
/user/oozie/share/lib/lib_<ts>/spark2/javax.ws.rs-api-2.0.1.jar
/user/oozie/share/lib/lib_<ts>/spark2/jaxb-api-2.2.11.jar
/user/oozie/share/lib/lib_<ts>/spark2/jcip-annotations-1.0-1.jar
/user/oozie/share/lib/lib_<ts>/spark2/jcl-over-slf4j-1.7.25.jar
/user/oozie/share/lib/lib_<ts>/spark2/jersey-client-2.22.2.jar
/user/oozie/share/lib/lib_<ts>/spark2/jersey-common-2.22.2.jar
/user/oozie/share/lib/lib_<ts>/spark2/jersey-container-servlet-2.22.2.jar
/user/oozie/share/lib/lib_<ts>/spark2/jersey-container-servlet-core-2.22.2.jar
/user/oozie/share/lib/lib_<ts>/spark2/jersey-guava-2.22.2.jar
/user/oozie/share/lib/lib_<ts>/spark2/jersey-media-jaxb-2.22.2.jar
/user/oozie/share/lib/lib_<ts>/spark2/jersey-server-2.22.2.jar
/user/oozie/share/lib/lib_<ts>/spark2/jetty-jmx-9.3.20.v20170531.jar
/user/oozie/share/lib/lib_<ts>/spark2/jetty-util-9.3.20.v20170531.jar
/user/oozie/share/lib/lib_<ts>/spark2/jetty-util-ajax-9.3.20.v20170531.jar
/user/oozie/share/lib/lib_<ts>/spark2/jetty-webapp-9.3.20.v20170531.jar
/user/oozie/share/lib/lib_<ts>/spark2/jetty-xml-9.3.20.v20170531.jar
/user/oozie/share/lib/lib_<ts>/spark2/joda-time-2.9.9.jar
/user/oozie/share/lib/lib_<ts>/spark2/jodd-core-3.5.2.jar
/user/oozie/share/lib/lib_<ts>/spark2/jsch-0.1.54.jar
/user/oozie/share/lib/lib_<ts>/spark2/json-smart-2.3.jar
/user/oozie/share/lib/lib_<ts>/spark2/json4s-ast_2.11-3.5.3.jar
/user/oozie/share/lib/lib_<ts>/spark2/json4s-core_2.11-3.5.3.jar
/user/oozie/share/lib/lib_<ts>/spark2/json4s-jackson_2.11-3.5.3.jar
/user/oozie/share/lib/lib_<ts>/spark2/json4s-scalap_2.11-3.5.3.jar
/user/oozie/share/lib/lib_<ts>/spark2/jsp-api-2.1.jar
/user/oozie/share/lib/lib_<ts>/spark2/jsr305-1.3.9.jar
/user/oozie/share/lib/lib_<ts>/spark2/jtransforms-2.4.0.jar
/user/oozie/share/lib/lib_<ts>/spark2/jul-to-slf4j-1.7.25.jar
/user/oozie/share/lib/lib_<ts>/spark2/kafka-clients-2.0.0-cdh6.1.1.jar
/user/oozie/share/lib/lib_<ts>/spark2/kerb-admin-1.0.0.jar
/user/oozie/share/lib/lib_<ts>/spark2/kerb-client-1.0.0.jar
/user/oozie/share/lib/lib_<ts>/spark2/kerb-common-1.0.0.jar
/user/oozie/share/lib/lib_<ts>/spark2/kerb-core-1.0.0.jar
/user/oozie/share/lib/lib_<ts>/spark2/kerb-crypto-1.0.0.jar
/user/oozie/share/lib/lib_<ts>/spark2/kerb-identity-1.0.0.jar
/user/oozie/share/lib/lib_<ts>/spark2/kerb-server-1.0.0.jar
/user/oozie/share/lib/lib_<ts>/spark2/kerb-simplekdc-1.0.0.jar
/user/oozie/share/lib/lib_<ts>/spark2/kerb-util-1.0.0.jar
/user/oozie/share/lib/lib_<ts>/spark2/kerby-asn1-1.0.0.jar
/user/oozie/share/lib/lib_<ts>/spark2/kerby-config-1.0.0.jar
/user/oozie/share/lib/lib_<ts>/spark2/kerby-pkix-1.0.0.jar
/user/oozie/share/lib/lib_<ts>/spark2/kerby-util-1.0.0.jar
/user/oozie/share/lib/lib_<ts>/spark2/kerby-xdr-1.0.0.jar
/user/oozie/share/lib/lib_<ts>/spark2/kryo-shaded-4.0.2.jar
/user/oozie/share/lib/lib_<ts>/spark2/leveldbjni-all-1.8.jar
/user/oozie/share/lib/lib_<ts>/spark2/libfb303-0.9.3.jar
/user/oozie/share/lib/lib_<ts>/spark2/libthrift-0.9.3.jar
/user/oozie/share/lib/lib_<ts>/spark2/log4j-1.2.17.jar
/user/oozie/share/lib/lib_<ts>/spark2/lz4-java-1.4.0.jar
/user/oozie/share/lib/lib_<ts>/spark2/machinist_2.11-0.6.1.jar
/user/oozie/share/lib/lib_<ts>/spark2/macro-compat_2.11-1.1.1.jar
/user/oozie/share/lib/lib_<ts>/spark2/metrics-core-3.1.5.jar
/user/oozie/share/lib/lib_<ts>/spark2/metrics-graphite-3.1.5.jar
/user/oozie/share/lib/lib_<ts>/spark2/metrics-json-3.1.5.jar
/user/oozie/share/lib/lib_<ts>/spark2/metrics-jvm-3.1.5.jar
/user/oozie/share/lib/lib_<ts>/spark2/mina-core-2.0.4.jar
/user/oozie/share/lib/lib_<ts>/spark2/minlog-1.3.0.jar
/user/oozie/share/lib/lib_<ts>/spark2/mssql-jdbc-6.2.1.jre7.jar
/user/oozie/share/lib/lib_<ts>/spark2/netty-3.10.6.Final.jar
/user/oozie/share/lib/lib_<ts>/spark2/netty-all-4.1.17.Final.jar
/user/oozie/share/lib/lib_<ts>/spark2/nimbus-jose-jwt-4.41.1.jar
/user/oozie/share/lib/lib_<ts>/spark2/objenesis-2.5.1.jar
/user/oozie/share/lib/lib_<ts>/spark2/okhttp-2.7.5.jar
/user/oozie/share/lib/lib_<ts>/spark2/okio-1.6.0.jar
/user/oozie/share/lib/lib_<ts>/spark2/oozie-sharelib-spark-5.0.0-cdh6.1.1.jar
/user/oozie/share/lib/lib_<ts>/spark2/oozie-sharelib-spark.jar
/user/oozie/share/lib/lib_<ts>/spark2/opencsv-2.3.jar
/user/oozie/share/lib/lib_<ts>/spark2/oro-2.0.8.jar
/user/oozie/share/lib/lib_<ts>/spark2/osgi-resource-locator-1.0.1.jar
/user/oozie/share/lib/lib_<ts>/spark2/paranamer-2.8.jar
/user/oozie/share/lib/lib_<ts>/spark2/parquet-column.jar
/user/oozie/share/lib/lib_<ts>/spark2/parquet-common.jar
/user/oozie/share/lib/lib_<ts>/spark2/parquet-encoding.jar
/user/oozie/share/lib/lib_<ts>/spark2/parquet-format.jar
/user/oozie/share/lib/lib_<ts>/spark2/parquet-hadoop.jar
/user/oozie/share/lib/lib_<ts>/spark2/parquet-jackson.jar
/user/oozie/share/lib/lib_<ts>/spark2/protobuf-java-2.5.0.jar
/user/oozie/share/lib/lib_<ts>/spark2/py4j-0.10.7-src.zip
/user/oozie/share/lib/lib_<ts>/spark2/py4j-0.10.7.jar
/user/oozie/share/lib/lib_<ts>/spark2/pyrolite-4.13.jar
/user/oozie/share/lib/lib_<ts>/spark2/pyspark.zip
/user/oozie/share/lib/lib_<ts>/spark2/re2j-1.0.jar
/user/oozie/share/lib/lib_<ts>/spark2/scala-compiler-2.11.12.jar
/user/oozie/share/lib/lib_<ts>/spark2/scala-library-2.11.12.jar
/user/oozie/share/lib/lib_<ts>/spark2/scala-parser-combinators_2.11-1.1.0.jar
/user/oozie/share/lib/lib_<ts>/spark2/scala-reflect-2.11.12.jar
/user/oozie/share/lib/lib_<ts>/spark2/scala-xml_2.11-1.0.5.jar
/user/oozie/share/lib/lib_<ts>/spark2/shapeless_2.11-2.3.2.jar
/user/oozie/share/lib/lib_<ts>/spark2/slf4j-api-1.7.25.jar
/user/oozie/share/lib/lib_<ts>/spark2/slf4j-log4j12-1.7.25.jar
/user/oozie/share/lib/lib_<ts>/spark2/snappy-java-1.1.4.jar
/user/oozie/share/lib/lib_<ts>/spark2/spark-avro_2.11-2.4.0-cdh6.1.1.jar
/user/oozie/share/lib/lib_<ts>/spark2/spark-catalyst_2.11-2.4.0-cdh6.1.1.jar
/user/oozie/share/lib/lib_<ts>/spark2/spark-core_2.11-2.4.0-cdh6.1.1.jar
/user/oozie/share/lib/lib_<ts>/spark2/spark-graphx_2.11-2.4.0-cdh6.1.1.jar
/user/oozie/share/lib/lib_<ts>/spark2/spark-hadoop-cloud_2.11-2.4.0-cdh6.1.1.jar
/user/oozie/share/lib/lib_<ts>/spark2/spark-hive_2.11-2.4.0-cdh6.1.1.jar
/user/oozie/share/lib/lib_<ts>/spark2/spark-kvstore_2.11-2.4.0-cdh6.1.1.jar
/user/oozie/share/lib/lib_<ts>/spark2/spark-launcher_2.11-2.4.0-cdh6.1.1.jar
/user/oozie/share/lib/lib_<ts>/spark2/spark-lineage_2.11-2.4.0-cdh6.1.1.jar
/user/oozie/share/lib/lib_<ts>/spark2/spark-mllib-local_2.11-2.4.0-cdh6.1.1.jar
/user/oozie/share/lib/lib_<ts>/spark2/spark-mllib_2.11-2.4.0-cdh6.1.1.jar
/user/oozie/share/lib/lib_<ts>/spark2/spark-network-common_2.11-2.4.0-cdh6.1.1.jar
/user/oozie/share/lib/lib_<ts>/spark2/spark-network-shuffle_2.11-2.4.0-cdh6.1.1.jar
/user/oozie/share/lib/lib_<ts>/spark2/spark-repl_2.11-2.4.0-cdh6.1.1.jar
/user/oozie/share/lib/lib_<ts>/spark2/spark-sketch_2.11-2.4.0-cdh6.1.1.jar
/user/oozie/share/lib/lib_<ts>/spark2/spark-sql-kafka-0-10_2.11-2.4.0-cdh6.1.1.jar
/user/oozie/share/lib/lib_<ts>/spark2/spark-sql_2.11-2.4.0-cdh6.1.1.jar
/user/oozie/share/lib/lib_<ts>/spark2/spark-streaming-flume-sink_2.11-2.4.0-cdh6.1.1.jar
/user/oozie/share/lib/lib_<ts>/spark2/spark-streaming-flume_2.11-2.4.0-cdh6.1.1.jar
/user/oozie/share/lib/lib_<ts>/spark2/spark-streaming-kafka-0-10_2.11-2.4.0-cdh6.1.1.jar
/user/oozie/share/lib/lib_<ts>/spark2/spark-streaming_2.11-2.4.0-cdh6.1.1.jar
/user/oozie/share/lib/lib_<ts>/spark2/spark-tags_2.11-2.4.0-cdh6.1.1.jar
/user/oozie/share/lib/lib_<ts>/spark2/spark-unsafe_2.11-2.4.0-cdh6.1.1.jar
/user/oozie/share/lib/lib_<ts>/spark2/spark-yarn_2.11-2.4.0-cdh6.1.1.jar
/user/oozie/share/lib/lib_<ts>/spark2/spire-macros_2.11-0.13.0.jar
/user/oozie/share/lib/lib_<ts>/spark2/spire_2.11-0.13.0.jar
/user/oozie/share/lib/lib_<ts>/spark2/stax2-api-3.1.4.jar
/user/oozie/share/lib/lib_<ts>/spark2/stream-2.7.0.jar
/user/oozie/share/lib/lib_<ts>/spark2/univocity-parsers-2.7.3.jar
/user/oozie/share/lib/lib_<ts>/spark2/validation-api-1.1.0.Final.jar
/user/oozie/share/lib/lib_<ts>/spark2/wildfly-openssl-1.0.4.Final.jar
/user/oozie/share/lib/lib_<ts>/spark2/woodstox-core-5.0.3.jar
/user/oozie/share/lib/lib_<ts>/spark2/xbean-asm6-shaded-4.8.jar
/user/oozie/share/lib/lib_<ts>/spark2/xz-1.6.jar
/user/oozie/share/lib/lib_<ts>/spark2/zookeeper.jar
/user/oozie/share/lib/lib_<ts>/spark2/zstd-jni-1.3.2-2.jar
1条答案
按热度按时间k2arahey1#
Error while instantiating 'org.apache.spark.sql.hive.HiveExternalCatalog'
这意味着Catalog
jar,其尝试查找不在ooziesharelib spark目录中。请在job.properties文件中添加以下属性。
这将允许Hive罐在Spark行动中使用。对于外部配置单元目录,spark ooziesharelib目录不包含每个jar。
org/apache/commons/dbcp/connectionfactory related jar位于hive lib文件夹下。因此,请检查jar是否存在于本地文件系统以及hdfs中
find <location> -name "*.jar" | xargs grep ConnectionFactory
另外,请将此添加到您的sparksession中,以启用对spark sql的配置单元支持.enableHiveSupport()