使用hbase运行apache predictionio时出错

wkyowqbh  于 2021-06-07  发布在  Hbase
关注(0)|答案(0)|浏览(373)

我正在尝试开发一个基于apachepredictionio的项目,并遵循此指南实现predictionio的dockerization。有一个问题。我想将hbase与hadoop一起用于pio的事件数据。因此,在docker目录(在上面链接的repo中)中,我正在创建一个hbase目录,并在其中使用这些docker compose文件。
docker-compose.base.yml文件

version: "3"
services:
  namenode:
    image: bde2020/hadoop-namenode:2.0.0-hadoop2.7.4-java8
    container_name: namenode
    volumes:
      - hadoop_namenode:/hadoop/dfs/name
    environment:
      - CLUSTER_NAME=test
    env_file:
      - hbase/hadoop.env
    ports:
      - 50070:50070

  datanode:
    image: bde2020/hadoop-datanode:2.0.0-hadoop2.7.4-java8
    container_name: datanode
    volumes:
      - hadoop_datanode:/hadoop/dfs/data
    environment:
      SERVICE_PRECONDITION: "namenode:50070"
    env_file:
      - hbase/hadoop.env
    ports:
      - 50075:50075

  resourcemanager:
    image: bde2020/hadoop-resourcemanager:2.0.0-hadoop2.7.4-java8
    container_name: resourcemanager
    environment:
      SERVICE_PRECONDITION: "namenode:50070 datanode:50075"
    env_file:
      - hbase/hadoop.env
    ports:
      - 8088:8088

  nodemanager1:
    image: bde2020/hadoop-nodemanager:2.0.0-hadoop2.7.4-java8
    container_name: nodemanager
    environment:
      SERVICE_PRECONDITION: "namenode:50070 datanode:50075 resourcemanager:8088"
    env_file:
      - hbase/hadoop.env
    ports:
      - 8042:8042

  historyserver:
    image: bde2020/hadoop-historyserver:2.0.0-hadoop2.7.4-java8
    container_name: historyserver
    volumes:
      - hadoop_historyserver:/hadoop/yarn/timeline
    environment:
      SERVICE_PRECONDITION: "namenode:50070 datanode:50075 resourcemanager:8088"
    env_file:
      - hbase/hadoop.env
    ports:
      - 8188:8188

  zoo:
    image: zookeeper:3.4.10
    container_name: zoo
    hostname: zoo
    restart: always
    environment:
      ZOO_MY_ID: 1
      ZOO_SERVERS: server.1=0.0.0.0:2888:3888
    ports:
      - 2181:2181

  hbase-master:
    image: bde2020/hbase-master:1.0.0-hbase1.2.6
    container_name: hbase-master
    hostname: hbase-master
    depends_on:
      - zoo
    env_file:
      - hbase/hbase-distributed-local.env
    environment:
      SERVICE_PRECONDITION: "namenode:50070 datanode:50075 zoo:2181"
    ports:
      - 16010:16010

  hbase-region:
    image: bde2020/hbase-regionserver:1.0.0-hbase1.2.6
    container_name: hbase-regionserver
    hostname: hbase-regionserver
    env_file:
      - hbase/hbase-distributed-local.env
    environment:
      HBASE_CONF_hbase_regionserver_hostname: hbase-region
      SERVICE_PRECONDITION: "namenode:50070 datanode:50075 zoo:2181 hbase-master:16010"
    ports:
      - 16030:16030
  pio:
    depends_on:
      - hbase-master
    environment:
      PIO_STORAGE_SOURCES_HBASE_TYPE: hbase
      PIO_STORAGE_SOURCES_HBASE_HOSTS: hbase-master

volumes:
  hadoop_namenode:
  hadoop_datanode:
  hadoop_historyserver:

和docker-compose.event.yml;

version: "3"
services:
  pio:
    environment:
        PIO_STORAGE_REPOSITORIES_EVENTDATA_NAME: pio_event
        PIO_STORAGE_REPOSITORIES_EVENTDATA_SOURCE: HBASE

我还有hbase-distributed-local.env这样的环境文件;

HBASE_CONF_hbase_rootdir=hdfs://namenode:9000/hbase
HBASE_CONF_hbase_cluster_distributed=true
HBASE_CONF_hbase_zookeeper_quorum=zoo
HBASE_CONF_hbase_zookeeper_property_clientPort=2181
HBASE_CONF_hbase_cluster_distributed=true
HBASE_CONF_hbase_master=hbase-master:16000
HBASE_CONF_hbase_master_hostname=hbase-master
HBASE_CONF_hbase_master_port=16000
HBASE_CONF_hbase_master_info_port=16010
HBASE_CONF_hbase_regionserver_port=16020
HBASE_CONF_hbase_regionserver_info_port=16030

HBASE_MANAGES_ZK=false

和hadoop.env;

CORE_CONF_fs_defaultFS=hdfs://namenode:9000
CORE_CONF_hadoop_http_staticuser_user=root
CORE_CONF_hadoop_proxyuser_hue_hosts=*
CORE_CONF_hadoop_proxyuser_hue_groups=*
CORE_CONF_io_compression_codecs=org.apache.hadoop.io.compress.SnappyCodec

HDFS_CONF_dfs_webhdfs_enabled=true
HDFS_CONF_dfs_permissions_enabled=false
HDFS_CONF_dfs_namenode_datanode_registration_ip___hostname___check=false

YARN_CONF_yarn_log___aggregation___enable=true
YARN_CONF_yarn_log_server_url=http://historyserver:8188/applicationhistory/logs/
YARN_CONF_yarn_resourcemanager_recovery_enabled=true
YARN_CONF_yarn_resourcemanager_store_class=org.apache.hadoop.yarn.server.resourcemanager.recovery.FileSystemRMStateStore
YARN_CONF_yarn_resourcemanager_scheduler_class=org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.CapacityScheduler
YARN_CONF_yarn_scheduler_capacity_root_default_maximum___allocation___mb=8192
YARN_CONF_yarn_scheduler_capacity_root_default_maximum___allocation___vcores=4
YARN_CONF_yarn_resourcemanager_fs_state___store_uri=/rmstate
YARN_CONF_yarn_resourcemanager_system___metrics___publisher_enabled=true
YARN_CONF_yarn_resourcemanager_hostname=resourcemanager
YARN_CONF_yarn_resourcemanager_address=resourcemanager:8032
YARN_CONF_yarn_resourcemanager_scheduler_address=resourcemanager:8030
YARN_CONF_yarn_resourcemanager_resource__tracker_address=resourcemanager:8031
YARN_CONF_yarn_timeline___service_enabled=true
YARN_CONF_yarn_timeline___service_generic___application___history_enabled=true
YARN_CONF_yarn_timeline___service_hostname=historyserver
YARN_CONF_mapreduce_map_output_compress=true
YARN_CONF_mapred_map_output_compress_codec=org.apache.hadoop.io.compress.SnappyCodec
YARN_CONF_yarn_nodemanager_resource_memory___mb=16384
YARN_CONF_yarn_nodemanager_resource_cpu___vcores=8
YARN_CONF_yarn_nodemanager_disk___health___checker_max___disk___utilization___per___disk___percentage=98.5
YARN_CONF_yarn_nodemanager_remote___app___log___dir=/app-logs
YARN_CONF_yarn_nodemanager_aux___services=mapreduce_shuffle

MAPRED_CONF_mapreduce_framework_name=yarn
MAPRED_CONF_mapred_child_java_opts=-Xmx4096m
MAPRED_CONF_mapreduce_map_memory_mb=4096
MAPRED_CONF_mapreduce_reduce_memory_mb=8192
MAPRED_CONF_mapreduce_map_java_opts=-Xmx3072m
MAPRED_CONF_mapreduce_reduce_java_opts=-Xmx6144m

因此,再次遵循文件,我试图用这些命令来编写;

docker-compose -f docker-compose.yml \
  -f docker-compose.spark.yml \
  -f elasticsearch/docker-compose.base.yml \
  -f elasticsearch/docker-compose.meta.yml \
  -f hbase/docker-compose.base.yml \
  -f hbase/docker-compose.event.yml \
  -f localfs/docker-compose.model.yml \
  up

这里是全部错误;

pio_1              | [INFO] [Management$] Inspecting PredictionIO...
pio_1              | [INFO] [Management$] PredictionIO 0.13.0 is installed at /usr/share/predictionio
pio_1              | [INFO] [Management$] Inspecting Apache Spark...
pio_1              | [INFO] [Management$] Apache Spark is installed at /usr/share/spark-2.2.2-bin-hadoop2.7
pio_1              | [INFO] [Management$] Apache Spark 2.2.2 detected (meets minimum requirement of 1.6.3)
pio_1              | [INFO] [Management$] Inspecting storage backend connections...
pio_1              | [INFO] [Storage$] Verifying Meta Data Backend (Source: ELASTICSEARCH)...
pio_1              | [INFO] [Storage$] Verifying Model Data Backend (Source: LOCALFS)...
pio_1              | [INFO] [Storage$] Verifying Event Data Backend (Source: HBASE)...
pio_1              | [ERROR] [RecoverableZooKeeper] ZooKeeper exists failed after 1 attempts
pio_1              | [ERROR] [ZooKeeperWatcher] hconnection-0x78de58ea, quorum=localhost:2181, baseZNode=/hbase Received unexpected KeeperException, re-throwing exception
pio_1              | [WARN] [ZooKeeperRegistry] Can't retrieve clusterId from Zookeeper
pio_1              | [ERROR] [StorageClient] Cannot connect to ZooKeeper (ZooKeeper ensemble: localhost). Please make sure that the configuration is pointing at the correct ZooKeeper ensemble. By default, HBase manages its own ZooKeeper, so if you have not configured HBase to use an external ZooKeeper, that means your HBase is not started or configured properly.
pio_1              | [ERROR] [Storage$] Error initializing storage client for source HBASE.
pio_1              | org.apache.hadoop.hbase.ZooKeeperConnectionException: Can't connect to ZooKeeper
pio_1              |    at org.apache.hadoop.hbase.client.HBaseAdmin.checkHBaseAvailable(HBaseAdmin.java:2358)
pio_1              |    at org.apache.predictionio.data.storage.hbase.StorageClient.<init>(StorageClient.scala:53)
pio_1              |    at sun.reflect.NativeConstructorAccessorImpl.newInstance0(Native Method)
pio_1              |    at sun.reflect.NativeConstructorAccessorImpl.newInstance(NativeConstructorAccessorImpl.java:62)
pio_1              |    at sun.reflect.DelegatingConstructorAccessorImpl.newInstance(DelegatingConstructorAccessorImpl.java:45)
pio_1              |    at java.lang.reflect.Constructor.newInstance(Constructor.java:423)
pio_1              |    at org.apache.predictionio.data.storage.Storage$.getClient(Storage.scala:252)
pio_1              |    at org.apache.predictionio.data.storage.Storage$.org$apache$predictionio$data$storage$Storage$$updateS2CM(Storage.scala:283)
pio_1              |    at org.apache.predictionio.data.storage.Storage$$anonfun$sourcesToClientMeta$1.apply(Storage.scala:244)
pio_1              |    at org.apache.predictionio.data.storage.Storage$$anonfun$sourcesToClientMeta$1.apply(Storage.scala:244)
pio_1              |    at scala.collection.mutable.HashMap.getOrElseUpdate(HashMap.scala:79)
pio_1              |    at org.apache.predictionio.data.storage.Storage$.sourcesToClientMeta(Storage.scala:244)
pio_1              |    at org.apache.predictionio.data.storage.Storage$.getDataObject(Storage.scala:315)
pio_1              |    at org.apache.predictionio.data.storage.Storage$.getDataObjectFromRepo(Storage.scala:300)
pio_1              |    at org.apache.predictionio.data.storage.Storage$.getLEvents(Storage.scala:448)
pio_1              |    at org.apache.predictionio.data.storage.Storage$.verifyAllDataObjects(Storage.scala:384)
pio_1              |    at org.apache.predictionio.tools.commands.Management$.status(Management.scala:156)
pio_1              |    at org.apache.predictionio.tools.console.Pio$.status(Pio.scala:155)
pio_1              |    at org.apache.predictionio.tools.console.Console$$anonfun$main$1.apply(Console.scala:721)
pio_1              |    at org.apache.predictionio.tools.console.Console$$anonfun$main$1.apply(Console.scala:656)
pio_1              |    at scala.Option.map(Option.scala:146)
pio_1              |    at org.apache.predictionio.tools.console.Console$.main(Console.scala:656)
pio_1              |    at org.apache.predictionio.tools.console.Console.main(Console.scala)
pio_1              | Caused by: org.apache.zookeeper.KeeperException$ConnectionLossException: KeeperErrorCode = ConnectionLoss for /hbase
pio_1              |    at org.apache.zookeeper.KeeperException.create(KeeperException.java:99)
pio_1              |    at org.apache.zookeeper.KeeperException.create(KeeperException.java:51)
pio_1              |    at org.apache.zookeeper.ZooKeeper.exists(ZooKeeper.java:1045)
pio_1              |    at org.apache.zookeeper.ZooKeeper.exists(ZooKeeper.java:1073)
pio_1              |    at org.apache.hadoop.hbase.client.HBaseAdmin.checkHBaseAvailable(HBaseAdmin.java:2349)
pio_1              |    ... 22 more
pio_1              | 
pio_1              | 
pio_1              | 
pio_1              | [ERROR] [Management$] Unable to connect to all storage backends successfully.
pio_1              | The following shows the error message from the storage backend.
pio_1              | 
pio_1              | Data source HBASE was not properly initialized. (org.apache.predictionio.data.storage.StorageClientException)
pio_1              | 
pio_1              | Dumping configuration of initialized storage backend sources.
pio_1              | Please make sure they are correct.
pio_1              | 
pio_1              | Source Name: ELASTICSEARCH; Type: elasticsearch; Configuration: HOSTS -> elasticsearch, TYPE -> elasticsearch, SCHEMES -> http, PORTS -> 9200
pio_1              | Source Name: LOCALFS; Type: localfs; Configuration: PATH -> /work/pio_store/models, TYPE -> localfs
pio_1              | Source Name: HBASE; Type: (error); Configuration: (error)
pio_1              | [INFO] [Management$] Creating Event Server at 0.0.0.0:7070
pio_1              | [ERROR] [RecoverableZooKeeper] ZooKeeper exists failed after 1 attempts
pio_1              | [ERROR] [ZooKeeperWatcher] hconnection-0x159a48a6, quorum=localhost:2181, baseZNode=/hbase Received unexpected KeeperException, re-throwing exception
pio_1              | [WARN] [ZooKeeperRegistry] Can't retrieve clusterId from Zookeeper
pio_1              | [ERROR] [StorageClient] Cannot connect to ZooKeeper (ZooKeeper ensemble: localhost). Please make sure that the configuration is pointing at the correct ZooKeeper ensemble. By default, HBase manages its own ZooKeeper, so if you have not configured HBase to use an external ZooKeeper, that means your HBase is not started or configured properly.
pio_1              | [ERROR] [Storage$] Error initializing storage client for source HBASE.
pio_1              | org.apache.hadoop.hbase.ZooKeeperConnectionException: Can't connect to ZooKeeper
pio_1              |    at org.apache.hadoop.hbase.client.HBaseAdmin.checkHBaseAvailable(HBaseAdmin.java:2358)
pio_1              |    at org.apache.predictionio.data.storage.hbase.StorageClient.<init>(StorageClient.scala:53)
pio_1              |    at sun.reflect.NativeConstructorAccessorImpl.newInstance0(Native Method)
pio_1              |    at sun.reflect.NativeConstructorAccessorImpl.newInstance(NativeConstructorAccessorImpl.java:62)
pio_1              |    at sun.reflect.DelegatingConstructorAccessorImpl.newInstance(DelegatingConstructorAccessorImpl.java:45)
pio_1              |    at java.lang.reflect.Constructor.newInstance(Constructor.java:423)
pio_1              |    at org.apache.predictionio.data.storage.Storage$.getClient(Storage.scala:252)
pio_1              |    at org.apache.predictionio.data.storage.Storage$.org$apache$predictionio$data$storage$Storage$$updateS2CM(Storage.scala:283)
pio_1              |    at org.apache.predictionio.data.storage.Storage$$anonfun$sourcesToClientMeta$1.apply(Storage.scala:244)
pio_1              |    at org.apache.predictionio.data.storage.Storage$$anonfun$sourcesToClientMeta$1.apply(Storage.scala:244)
pio_1              |    at scala.collection.mutable.HashMap.getOrElseUpdate(HashMap.scala:79)
pio_1              |    at org.apache.predictionio.data.storage.Storage$.sourcesToClientMeta(Storage.scala:244)
pio_1              |    at org.apache.predictionio.data.storage.Storage$.getDataObject(Storage.scala:315)
pio_1              |    at org.apache.predictionio.data.storage.Storage$.getDataObjectFromRepo(Storage.scala:300)
pio_1              |    at org.apache.predictionio.data.storage.Storage$.getLEvents(Storage.scala:448)
pio_1              |    at org.apache.predictionio.data.api.EventServer$.createEventServer(EventServer.scala:636)
pio_1              |    at org.apache.predictionio.tools.commands.Management$.eventserver(Management.scala:77)
pio_1              |    at org.apache.predictionio.tools.console.Pio$.eventserver(Pio.scala:124)
pio_1              |    at org.apache.predictionio.tools.console.Console$$anonfun$main$1.apply(Console.scala:708)
pio_1              |    at org.apache.predictionio.tools.console.Console$$anonfun$main$1.apply(Console.scala:656)
pio_1              |    at scala.Option.map(Option.scala:146)
pio_1              |    at org.apache.predictionio.tools.console.Console$.main(Console.scala:656)
pio_1              |    at org.apache.predictionio.tools.console.Console.main(Console.scala)
pio_1              | Caused by: org.apache.zookeeper.KeeperException$ConnectionLossException: KeeperErrorCode = ConnectionLoss for /hbase
pio_1              |    at org.apache.zookeeper.KeeperException.create(KeeperException.java:99)
pio_1              |    at org.apache.zookeeper.KeeperException.create(KeeperException.java:51)
pio_1              |    at org.apache.zookeeper.ZooKeeper.exists(ZooKeeper.java:1045)
pio_1              |    at org.apache.zookeeper.ZooKeeper.exists(ZooKeeper.java:1073)
pio_1              |    at org.apache.hadoop.hbase.client.HBaseAdmin.checkHBaseAvailable(HBaseAdmin.java:2349)
pio_1              |    ... 22 more
pio_1              | 
pio_1              | 
pio_1              | 
pio_1              | Exception in thread "main" org.apache.predictionio.data.storage.StorageClientException: Data source HBASE was not properly initialized.
pio_1              |    at org.apache.predictionio.data.storage.Storage$$anonfun$10.apply(Storage.scala:316)
pio_1              |    at org.apache.predictionio.data.storage.Storage$$anonfun$10.apply(Storage.scala:316)
pio_1              |    at scala.Option.getOrElse(Option.scala:121)
pio_1              |    at org.apache.predictionio.data.storage.Storage$.getDataObject(Storage.scala:315)
pio_1              |    at org.apache.predictionio.data.storage.Storage$.getDataObjectFromRepo(Storage.scala:300)
pio_1              |    at org.apache.predictionio.data.storage.Storage$.getLEvents(Storage.scala:448)
pio_1              |    at org.apache.predictionio.data.api.EventServer$.createEventServer(EventServer.scala:636)
pio_1              |    at org.apache.predictionio.tools.commands.Management$.eventserver(Management.scala:77)
pio_1              |    at org.apache.predictionio.tools.console.Pio$.eventserver(Pio.scala:124)
pio_1              |    at org.apache.predictionio.tools.console.Console$$anonfun$main$1.apply(Console.scala:708)
pio_1              |    at org.apache.predictionio.tools.console.Console$$anonfun$main$1.apply(Console.scala:656)
pio_1              |    at scala.Option.map(Option.scala:146)
pio_1              |    at org.apache.predictionio.tools.console.Console$.main(Console.scala:656)
pio_1              |    at org.apache.predictionio.tools.console.Console.main(Console.scala)

我进入了所有的集装箱,检查了一下箱子host:port with dev/tcp所有端口似乎都已打开,但有一个问题我无法解决。
pio版本:0.13.0
es版本:5.5.2
hbase:1.2.6版
hadoop:2.7.7版本
你知道吗?
编辑:在pio容器中出现这个错误之后,我执行了-jps-l命令,下面是输出,也许有帮助。

root@67662213df1e:/usr/share/predictionio# jps -l
1120 sun.tools.jps.Jps
926 org.apache.predictionio.tools.console.Console

暂无答案!

目前还没有任何答案,快来回答吧!

相关问题