jupyter笔记本连接到远程配置单元

of1yzvn4  于 2021-05-29  发布在  Hadoop
关注(0)|答案(1)|浏览(392)

我想从我们公司的远程服务器上获取数据。我使用anaconda3(windows64位),我的hadoop在ambari上工作。
我试过这样做。。。

  1. import findspark
  2. findspark.init()
  3. from pyspark import SparkContext, SparkConf
  4. from pyspark.sql import HiveContext, SparkSession
  5. sparkSession = (SparkSession.builder.appName('example-pyspark-read-from-hive').config("hive.metastore.uris","http://serv_ip:serv_port").enableHiveSupport().getOrCreate())
  6. sparkSession.sql('show databases').show()

也许我的配置有问题?也许我应该在 hive 里做些配置。错误是。。。

  1. <details>
  2. <summary>Error </summary>
  3. Py4JJavaError Traceback (most recent call last) D:\Alanuccio\Progs\spark-2.3.0-bin-hadoop2.7\python\pyspark\sql\utils.py in deco(*a,**kw) 62 try: ---> 63 return f(*a,**kw) 64 except py4j.protocol.Py4JJavaError as e: D:\Alanuccio\Progs\spark-2.3.0-bin-hadoop2.7\python\lib\py4j-0.10.6-src.zip\py4j\protocol.py
  4. in get_return_value(answer, gateway_client, target_id, name) 319 "An error occurred while calling {0}{1}{2}.\n". --> 320 format(target_id, ".", name), value) 321 else: Py4JJavaError: An error occurred while calling o27.sql. : org.apache.spark.sql.AnalysisException:
  5. java.lang.RuntimeException: java.lang.RuntimeException: Unable to instantiate org.apache.hadoop.hive.ql.metadata.SessionHiveMetaStoreClient; at org.apache.spark.sql.hive.HiveExternalCatalog.withClient(HiveExternalCatalog.scala:106) at org.apache.spark.sql.hive.HiveExternalCatalog.databaseExists(HiveExternalCatalog.scala:194)
  6. at org.apache.spark.sql.internal.SharedState.externalCatalog$lzycompute(SharedState.scala:114) at org.apache.spark.sql.internal.SharedState.externalCatalog(SharedState.scala:102) at org.apache.spark.sql.hive.HiveSessionStateBuilder.externalCatalog(HiveSessionStateBuilder.scala:39)
  7. at org.apache.spark.sql.hive.HiveSessionStateBuilder.catalog$lzycompute(HiveSessionStateBuilder.scala:54) at org.apache.spark.sql.hive.HiveSessionStateBuilder.catalog(HiveSessionStateBuilder.scala:52) at org.apache.spark.sql.hive.HiveSessionStateBuilder$$anon$1.
  8. <init>(HiveSessionStateBuilder.scala:69) at org.apache.spark.sql.hive.HiveSessionStateBuilder.analyzer(HiveSessionStateBuilder.scala:69) at org.apache.spark.sql.internal.BaseSessionStateBuilder$$anonfun$build$2.apply(BaseSessionStateBuilder.scala:293) at
  9. org.apache.spark.sql.internal.BaseSessionStateBuilder$$anonfun$build$2.apply(BaseSessionStateBuilder.scala:293) at org.apache.spark.sql.internal.SessionState.analyzer$lzycompute(SessionState.scala:79) at org.apache.spark.sql.internal.SessionState.analyzer(SessionState.scala:79)
  10. at org.apache.spark.sql.execution.QueryExecution.analyzed$lzycompute(QueryExecution.scala:57) at org.apache.spark.sql.execution.QueryExecution.analyzed(QueryExecution.scala:55) at org.apache.spark.sql.execution.QueryExecution.assertAnalyzed(QueryExecution.scala:47)
  11. at org.apache.spark.sql.Dataset$.ofRows(Dataset.scala:74) at org.apache.spark.sql.SparkSession.sql(SparkSession.scala:638) at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method) at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
  12. at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43) at java.lang.reflect.Method.invoke(Method.java:498) at py4j.reflection.MethodInvoker.invoke(MethodInvoker.java:244) at py4j.reflection.ReflectionEngine.invoke(ReflectionEngine.java:357)
  13. at py4j.Gateway.invoke(Gateway.java:282) at py4j.commands.AbstractCommand.invokeMethod(AbstractCommand.java:132) at py4j.commands.CallCommand.execute(CallCommand.java:79) at py4j.GatewayConnection.run(GatewayConnection.java:214) at java.lang.Thread.run(Thread.java:748)
  14. Caused by: java.lang.RuntimeException: java.lang.RuntimeException: Unable to instantiate org.apache.hadoop.hive.ql.metadata.SessionHiveMetaStoreClient at org.apache.hadoop.hive.ql.session.SessionState.start(SessionState.java:522) at org.apache.spark.sql.hive.client.HiveClientImpl.newState(HiveClientImpl.scala:180)
  15. at org.apache.spark.sql.hive.client.HiveClientImpl.
  16. <init>(HiveClientImpl.scala:114) at sun.reflect.NativeConstructorAccessorImpl.newInstance0(Native Method) at sun.reflect.NativeConstructorAccessorImpl.newInstance(NativeConstructorAccessorImpl.java:62) at sun.reflect.DelegatingConstructorAccessorImpl.newInstance(DelegatingConstructorAccessorImpl.java:45)
  17. at java.lang.reflect.Constructor.newInstance(Constructor.java:423) at org.apache.spark.sql.hive.client.IsolatedClientLoader.createClient(IsolatedClientLoader.scala:264) at org.apache.spark.sql.hive.HiveUtils$.newClientForMetadata(HiveUtils.scala:385)
  18. at org.apache.spark.sql.hive.HiveUtils$.newClientForMetadata(HiveUtils.scala:287) at org.apache.spark.sql.hive.HiveExternalCatalog.client$lzycompute(HiveExternalCatalog.scala:66) at org.apache.spark.sql.hive.HiveExternalCatalog.client(HiveExternalCatalog.scala:65)
  19. at org.apache.spark.sql.hive.HiveExternalCatalog$$anonfun$databaseExists$1.apply$mcZ$sp(HiveExternalCatalog.scala:195) at org.apache.spark.sql.hive.HiveExternalCatalog$$anonfun$databaseExists$1.apply(HiveExternalCatalog.scala:195) at org.apache.spark.sql.hive.HiveExternalCatalog$$anonfun$databaseExists$1.apply(HiveExternalCatalog.scala:195)
  20. at org.apache.spark.sql.hive.HiveExternalCatalog.withClient(HiveExternalCatalog.scala:97) ... 28 more Caused by: java.lang.RuntimeException: Unable to instantiate org.apache.hadoop.hive.ql.metadata.SessionHiveMetaStoreClient at org.apache.hadoop.hive.metastore.MetaStoreUtils.newInstance(MetaStoreUtils.java:1523)
  21. at org.apache.hadoop.hive.metastore.RetryingMetaStoreClient.
  22. <init>(RetryingMetaStoreClient.java:86) at org.apache.hadoop.hive.metastore.RetryingMetaStoreClient.getProxy(RetryingMetaStoreClient.java:132) at org.apache.hadoop.hive.metastore.RetryingMetaStoreClient.getProxy(RetryingMetaStoreClient.java:104) at org.apache.hadoop.hive.ql.metadata.Hive.createMetaStoreClient(Hive.java:3005)
  23. at org.apache.hadoop.hive.ql.metadata.Hive.getMSC(Hive.java:3024) at org.apache.hadoop.hive.ql.session.SessionState.start(SessionState.java:503) ... 43 more Caused by: java.lang.reflect.InvocationTargetException at sun.reflect.NativeConstructorAccessorImpl.newInstance0(Native
  24. Method) at sun.reflect.NativeConstructorAccessorImpl.newInstance(NativeConstructorAccessorImpl.java:62) at sun.reflect.DelegatingConstructorAccessorImpl.newInstance(DelegatingConstructorAccessorImpl.java:45) at java.lang.reflect.Constructor.newInstance(Constructor.java:423)
  25. at org.apache.hadoop.hive.metastore.MetaStoreUtils.newInstance(MetaStoreUtils.java:1521) ... 49 more Caused by: java.lang.OutOfMemoryError: Java heap space During handling of the above exception, another exception occurred: AnalysisException Traceback
  26. (most recent call last)
  27. <ipython-input-12-9da3198f4ab3> in
  28. <module>() 4 print( help(sparkSession.sql) )''' 5 ----> 6 sparkSession.sql('show databases').show() D:\Alanuccio\Progs\spark-2.3.0-bin-hadoop2.7\python\pyspark\sql\session.py in sql(self, sqlQuery) 706 [Row(f1=1, f2=u'row1'), Row(f1=2, f2=u'row2'),
  29. Row(f1=3, f2=u'row3')] 707 """ --> 708 return DataFrame(self._jsparkSession.sql(sqlQuery), self._wrapped) 709 710 @since(2.0) D:\Alanuccio\Progs\spark-2.3.0-bin-hadoop2.7\python\lib\py4j-0.10.6-src.zip\py4j\java_gateway.py in __call__(self,
  30. *args) 1158 answer = self.gateway_client.send_command(command) 1159 return_value = get_return_value( -> 1160 answer, self.gateway_client, self.target_id, self.name) 1161 1162 for temp_arg in temp_args: D:\Alanuccio\Progs\spark-2.3.0-bin-hadoop2.7\python\pyspark\sql\utils.py
  31. in deco(*a,**kw) 67 e.java_exception.getStackTrace())) 68 if s.startswith('org.apache.spark.sql.AnalysisException: '): ---> 69 raise AnalysisException(s.split(': ', 1)[1], stackTrace) 70 if s.startswith('org.apache.spark.sql.catalyst.analysis'):
  32. 71 raise AnalysisException(s.split(': ', 1)[1], stackTrace) AnalysisException: 'java.lang.RuntimeException: java.lang.RuntimeException: Unable to instantiate org.apache.hadoop.hive.ql.metadata.SessionHiveMetaStoreClient;'
  33. </details>
wlp8pajw

wlp8pajw1#

试试这个,

  1. config("hive.metastore.uris","thrift://serv_ip:serv_port")

默认端口为 9083

相关问题