我已经在windows机器(单机版)中安装了spark,并尝试使用spark应用程序连接vm中可用的hdp2.6hivemetastore。
我使用nat作为hdp2.6vm的网络适配器。当我试图从spark应用程序(windows机器上的本地模式)连接hivemetastore(hdp2.6vm)时,我收到下面的错误消息。
17/08/12 17:00:16 INFO metastore: Waiting 1 seconds before next connection attempt.
17/08/12 17:00:17 INFO metastore: Trying to connect to metastore with URI thrift://172.0.0.1:9083
17/08/12 17:00:38 WARN metastore: Failed to connect to the MetaStore Server...
17/08/12 17:00:38 INFO metastore: Waiting 1 seconds before next connection attempt.
17/08/12 17:00:39 WARN Hive: Failed to access metastore. This class should not accessed in runtime.
org.apache.hadoop.hive.ql.metadata.HiveException: java.lang.RuntimeException: Unable to instantiate org.apache.hadoop.hive.ql.metadata.SessionHiveMetaStoreClient
at org.apache.hadoop.hive.ql.metadata.Hive.getAllDatabases(Hive.java:1236)
at org.apache.hadoop.hive.ql.metadata.Hive.reloadFunctions(Hive.java:174)
at org.apache.hadoop.hive.ql.metadata.Hive.<clinit>(Hive.java:166)
at org.apache.hadoop.hive.ql.session.SessionState.start(SessionState.java:503)
at org.apache.spark.sql.hive.client.HiveClientImpl.<init>(HiveClientImpl.scala:191)
at sun.reflect.NativeConstructorAccessorImpl.newInstance0(Native Method)
at sun.reflect.NativeConstructorAccessorImpl.newInstance(Unknown Source)
at sun.reflect.DelegatingConstructorAccessorImpl.newInstance(Unknown Source)
at java.lang.reflect.Constructor.newInstance(Unknown Source)
at org.apache.spark.sql.hive.client.IsolatedClientLoader.createClient(IsolatedClientLoader.scala:264)
at org.apache.spark.sql.hive.HiveUtils$.newClientForMetadata(HiveUtils.scala:362)
at org.apache.spark.sql.hive.HiveUtils$.newClientForMetadata(HiveUtils.scala:266)
at org.apache.spark.sql.hive.HiveExternalCatalog.client$lzycompute(HiveExternalCatalog.scala:66)
at org.apache.spark.sql.hive.HiveExternalCatalog.client(HiveExternalCatalog.scala:65)
at org.apache.spark.sql.hive.HiveExternalCatalog$$anonfun$databaseExists$1.apply$mcZ$sp(HiveExternalCatalog.scala:194)
at org.apache.spark.sql.hive.HiveExternalCatalog$$anonfun$databaseExists$1.apply(HiveExternalCatalog.scala:194)
at org.apache.spark.sql.hive.HiveExternalCatalog$$anonfun$databaseExists$1.apply(HiveExternalCatalog.scala:194)
at org.apache.spark.sql.hive.HiveExternalCatalog.withClient(HiveExternalCatalog.scala:97)
at org.apache.spark.sql.hive.HiveExternalCatalog.databaseExists(HiveExternalCatalog.scala:193)
at org.apache.spark.sql.internal.SharedState.externalCatalog$lzycompute(SharedState.scala:105)
at org.apache.spark.sql.internal.SharedState.externalCatalog(SharedState.scala:93)
at org.apache.spark.sql.hive.HiveSessionStateBuilder.externalCatalog(HiveSessionStateBuilder.scala:39)
at org.apache.spark.sql.hive.HiveSessionStateBuilder.catalog$lzycompute(HiveSessionStateBuilder.scala:54)
at org.apache.spark.sql.hive.HiveSessionStateBuilder.catalog(HiveSessionStateBuilder.scala:52)
at org.apache.spark.sql.hive.HiveSessionStateBuilder.catalog(HiveSessionStateBuilder.scala:35)
at org.apache.spark.sql.internal.BaseSessionStateBuilder.build(BaseSessionStateBuilder.scala:289)
at org.apache.spark.sql.SparkSession$.org$apache$spark$sql$SparkSession$$instantiateSessionState(SparkSession.scala:1050)
at org.apache.spark.sql.SparkSession$$anonfun$sessionState$2.apply(SparkSession.scala:130)
at org.apache.spark.sql.SparkSession$$anonfun$sessionState$2.apply(SparkSession.scala:130)
at scala.Option.getOrElse(Option.scala:121)
at org.apache.spark.sql.SparkSession.sessionState$lzycompute(SparkSession.scala:129)
at org.apache.spark.sql.SparkSession.sessionState(SparkSession.scala:126)
at org.apache.spark.sql.SparkSession.sql(SparkSession.scala:623)
at org.apache.spark.sql.SQLContext.sql(SQLContext.scala:691)
at com.psl.spark.RemoteHiveConnSpark1_6$.main(RemoteHiveConnSpark1_6.scala:29)
at com.psl.spark.RemoteHiveConnSpark1_6.main(RemoteHiveConnSpark1_6.scala)
Caused by: java.lang.RuntimeException: Unable to instantiate org.apache.hadoop.hive.ql.metadata.SessionHiveMetaStoreClient
at org.apache.hadoop.hive.metastore.MetaStoreUtils.newInstance(MetaStoreUtils.java:1523)
at org.apache.hadoop.hive.metastore.RetryingMetaStoreClient.<init>(RetryingMetaStoreClient.java:86)
at org.apache.hadoop.hive.metastore.RetryingMetaStoreClient.getProxy(RetryingMetaStoreClient.java:132)
Spark应用:
import org.apache.spark.SparkConf
import org.apache.spark.SparkContext
/**
* @author prasanta_sahoo
*/
object RemoteHiveConnSpark1_6 {
def main(arg: Array[String]) {
//Create conf object
val conf = new SparkConf()
.setAppName("RemoteHiveConnSpark1_6")
.setMaster("local") // local mode
.set("spark.storage.memoryFraction", "1")
System.setProperty("hive.metastore.uris", "thrift://172.0.0.1:9083");
//create spark context object
val sc = new SparkContext(conf)
val hiveContext = new org.apache.spark.sql.hive.HiveContext(sc)
//hiveContext.setConf("hive.metastore.uris", "thrift://172.0.0.1:9083");
//disable case sensitivity of SQL
//hiveContext.sql("set spark.sql.caseSensitive=false");
hiveContext.sql("FROM default.sample_07 SELECT code, description, total_emp, salary").collect().foreach(println)
}
}
有人能帮我解决这个问题吗?
1条答案
按热度按时间webghufk1#
要连接到hive metastore,需要以下配置:,
spark.yarn.dist.files///apps/spark/hive site.xml,///apps/spark/datanucleus-rdbms-4.1.7.jar,///apps/spark/datanucleus-core-4.1.6.jar,///apps/spark/datanucleus-api-jdo-4.2.1.jar
spark.sql.hive.metastore.version版本
请确认这些配置是否存在。