我正在尝试开发一个基于apachepredictionio的项目,并遵循此指南实现predictionio的dockerization。有一个问题。我想将hbase与hadoop一起用于pio的事件数据。因此,在docker目录(在上面链接的repo中)中,我正在创建一个hbase目录,并在其中使用这些docker compose文件。
docker-compose.base.yml文件
version: "3"
services:
namenode:
image: bde2020/hadoop-namenode:2.0.0-hadoop2.7.4-java8
container_name: namenode
volumes:
- hadoop_namenode:/hadoop/dfs/name
environment:
- CLUSTER_NAME=test
env_file:
- hbase/hadoop.env
ports:
- 50070:50070
datanode:
image: bde2020/hadoop-datanode:2.0.0-hadoop2.7.4-java8
container_name: datanode
volumes:
- hadoop_datanode:/hadoop/dfs/data
environment:
SERVICE_PRECONDITION: "namenode:50070"
env_file:
- hbase/hadoop.env
ports:
- 50075:50075
resourcemanager:
image: bde2020/hadoop-resourcemanager:2.0.0-hadoop2.7.4-java8
container_name: resourcemanager
environment:
SERVICE_PRECONDITION: "namenode:50070 datanode:50075"
env_file:
- hbase/hadoop.env
ports:
- 8088:8088
nodemanager1:
image: bde2020/hadoop-nodemanager:2.0.0-hadoop2.7.4-java8
container_name: nodemanager
environment:
SERVICE_PRECONDITION: "namenode:50070 datanode:50075 resourcemanager:8088"
env_file:
- hbase/hadoop.env
ports:
- 8042:8042
historyserver:
image: bde2020/hadoop-historyserver:2.0.0-hadoop2.7.4-java8
container_name: historyserver
volumes:
- hadoop_historyserver:/hadoop/yarn/timeline
environment:
SERVICE_PRECONDITION: "namenode:50070 datanode:50075 resourcemanager:8088"
env_file:
- hbase/hadoop.env
ports:
- 8188:8188
zoo:
image: zookeeper:3.4.10
container_name: zoo
hostname: zoo
restart: always
environment:
ZOO_MY_ID: 1
ZOO_SERVERS: server.1=0.0.0.0:2888:3888
ports:
- 2181:2181
hbase-master:
image: bde2020/hbase-master:1.0.0-hbase1.2.6
container_name: hbase-master
hostname: hbase-master
depends_on:
- zoo
env_file:
- hbase/hbase-distributed-local.env
environment:
SERVICE_PRECONDITION: "namenode:50070 datanode:50075 zoo:2181"
ports:
- 16010:16010
hbase-region:
image: bde2020/hbase-regionserver:1.0.0-hbase1.2.6
container_name: hbase-regionserver
hostname: hbase-regionserver
env_file:
- hbase/hbase-distributed-local.env
environment:
HBASE_CONF_hbase_regionserver_hostname: hbase-region
SERVICE_PRECONDITION: "namenode:50070 datanode:50075 zoo:2181 hbase-master:16010"
ports:
- 16030:16030
pio:
depends_on:
- hbase-master
environment:
PIO_STORAGE_SOURCES_HBASE_TYPE: hbase
PIO_STORAGE_SOURCES_HBASE_HOSTS: hbase-master
volumes:
hadoop_namenode:
hadoop_datanode:
hadoop_historyserver:
和docker-compose.event.yml;
version: "3"
services:
pio:
environment:
PIO_STORAGE_REPOSITORIES_EVENTDATA_NAME: pio_event
PIO_STORAGE_REPOSITORIES_EVENTDATA_SOURCE: HBASE
我还有hbase-distributed-local.env这样的环境文件;
HBASE_CONF_hbase_rootdir=hdfs://namenode:9000/hbase
HBASE_CONF_hbase_cluster_distributed=true
HBASE_CONF_hbase_zookeeper_quorum=zoo
HBASE_CONF_hbase_zookeeper_property_clientPort=2181
HBASE_CONF_hbase_cluster_distributed=true
HBASE_CONF_hbase_master=hbase-master:16000
HBASE_CONF_hbase_master_hostname=hbase-master
HBASE_CONF_hbase_master_port=16000
HBASE_CONF_hbase_master_info_port=16010
HBASE_CONF_hbase_regionserver_port=16020
HBASE_CONF_hbase_regionserver_info_port=16030
HBASE_MANAGES_ZK=false
和hadoop.env;
CORE_CONF_fs_defaultFS=hdfs://namenode:9000
CORE_CONF_hadoop_http_staticuser_user=root
CORE_CONF_hadoop_proxyuser_hue_hosts=*
CORE_CONF_hadoop_proxyuser_hue_groups=*
CORE_CONF_io_compression_codecs=org.apache.hadoop.io.compress.SnappyCodec
HDFS_CONF_dfs_webhdfs_enabled=true
HDFS_CONF_dfs_permissions_enabled=false
HDFS_CONF_dfs_namenode_datanode_registration_ip___hostname___check=false
YARN_CONF_yarn_log___aggregation___enable=true
YARN_CONF_yarn_log_server_url=http://historyserver:8188/applicationhistory/logs/
YARN_CONF_yarn_resourcemanager_recovery_enabled=true
YARN_CONF_yarn_resourcemanager_store_class=org.apache.hadoop.yarn.server.resourcemanager.recovery.FileSystemRMStateStore
YARN_CONF_yarn_resourcemanager_scheduler_class=org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.CapacityScheduler
YARN_CONF_yarn_scheduler_capacity_root_default_maximum___allocation___mb=8192
YARN_CONF_yarn_scheduler_capacity_root_default_maximum___allocation___vcores=4
YARN_CONF_yarn_resourcemanager_fs_state___store_uri=/rmstate
YARN_CONF_yarn_resourcemanager_system___metrics___publisher_enabled=true
YARN_CONF_yarn_resourcemanager_hostname=resourcemanager
YARN_CONF_yarn_resourcemanager_address=resourcemanager:8032
YARN_CONF_yarn_resourcemanager_scheduler_address=resourcemanager:8030
YARN_CONF_yarn_resourcemanager_resource__tracker_address=resourcemanager:8031
YARN_CONF_yarn_timeline___service_enabled=true
YARN_CONF_yarn_timeline___service_generic___application___history_enabled=true
YARN_CONF_yarn_timeline___service_hostname=historyserver
YARN_CONF_mapreduce_map_output_compress=true
YARN_CONF_mapred_map_output_compress_codec=org.apache.hadoop.io.compress.SnappyCodec
YARN_CONF_yarn_nodemanager_resource_memory___mb=16384
YARN_CONF_yarn_nodemanager_resource_cpu___vcores=8
YARN_CONF_yarn_nodemanager_disk___health___checker_max___disk___utilization___per___disk___percentage=98.5
YARN_CONF_yarn_nodemanager_remote___app___log___dir=/app-logs
YARN_CONF_yarn_nodemanager_aux___services=mapreduce_shuffle
MAPRED_CONF_mapreduce_framework_name=yarn
MAPRED_CONF_mapred_child_java_opts=-Xmx4096m
MAPRED_CONF_mapreduce_map_memory_mb=4096
MAPRED_CONF_mapreduce_reduce_memory_mb=8192
MAPRED_CONF_mapreduce_map_java_opts=-Xmx3072m
MAPRED_CONF_mapreduce_reduce_java_opts=-Xmx6144m
因此,再次遵循文件,我试图用这些命令来编写;
docker-compose -f docker-compose.yml \
-f docker-compose.spark.yml \
-f elasticsearch/docker-compose.base.yml \
-f elasticsearch/docker-compose.meta.yml \
-f hbase/docker-compose.base.yml \
-f hbase/docker-compose.event.yml \
-f localfs/docker-compose.model.yml \
up
这里是全部错误;
pio_1 | [INFO] [Management$] Inspecting PredictionIO...
pio_1 | [INFO] [Management$] PredictionIO 0.13.0 is installed at /usr/share/predictionio
pio_1 | [INFO] [Management$] Inspecting Apache Spark...
pio_1 | [INFO] [Management$] Apache Spark is installed at /usr/share/spark-2.2.2-bin-hadoop2.7
pio_1 | [INFO] [Management$] Apache Spark 2.2.2 detected (meets minimum requirement of 1.6.3)
pio_1 | [INFO] [Management$] Inspecting storage backend connections...
pio_1 | [INFO] [Storage$] Verifying Meta Data Backend (Source: ELASTICSEARCH)...
pio_1 | [INFO] [Storage$] Verifying Model Data Backend (Source: LOCALFS)...
pio_1 | [INFO] [Storage$] Verifying Event Data Backend (Source: HBASE)...
pio_1 | [ERROR] [RecoverableZooKeeper] ZooKeeper exists failed after 1 attempts
pio_1 | [ERROR] [ZooKeeperWatcher] hconnection-0x78de58ea, quorum=localhost:2181, baseZNode=/hbase Received unexpected KeeperException, re-throwing exception
pio_1 | [WARN] [ZooKeeperRegistry] Can't retrieve clusterId from Zookeeper
pio_1 | [ERROR] [StorageClient] Cannot connect to ZooKeeper (ZooKeeper ensemble: localhost). Please make sure that the configuration is pointing at the correct ZooKeeper ensemble. By default, HBase manages its own ZooKeeper, so if you have not configured HBase to use an external ZooKeeper, that means your HBase is not started or configured properly.
pio_1 | [ERROR] [Storage$] Error initializing storage client for source HBASE.
pio_1 | org.apache.hadoop.hbase.ZooKeeperConnectionException: Can't connect to ZooKeeper
pio_1 | at org.apache.hadoop.hbase.client.HBaseAdmin.checkHBaseAvailable(HBaseAdmin.java:2358)
pio_1 | at org.apache.predictionio.data.storage.hbase.StorageClient.<init>(StorageClient.scala:53)
pio_1 | at sun.reflect.NativeConstructorAccessorImpl.newInstance0(Native Method)
pio_1 | at sun.reflect.NativeConstructorAccessorImpl.newInstance(NativeConstructorAccessorImpl.java:62)
pio_1 | at sun.reflect.DelegatingConstructorAccessorImpl.newInstance(DelegatingConstructorAccessorImpl.java:45)
pio_1 | at java.lang.reflect.Constructor.newInstance(Constructor.java:423)
pio_1 | at org.apache.predictionio.data.storage.Storage$.getClient(Storage.scala:252)
pio_1 | at org.apache.predictionio.data.storage.Storage$.org$apache$predictionio$data$storage$Storage$$updateS2CM(Storage.scala:283)
pio_1 | at org.apache.predictionio.data.storage.Storage$$anonfun$sourcesToClientMeta$1.apply(Storage.scala:244)
pio_1 | at org.apache.predictionio.data.storage.Storage$$anonfun$sourcesToClientMeta$1.apply(Storage.scala:244)
pio_1 | at scala.collection.mutable.HashMap.getOrElseUpdate(HashMap.scala:79)
pio_1 | at org.apache.predictionio.data.storage.Storage$.sourcesToClientMeta(Storage.scala:244)
pio_1 | at org.apache.predictionio.data.storage.Storage$.getDataObject(Storage.scala:315)
pio_1 | at org.apache.predictionio.data.storage.Storage$.getDataObjectFromRepo(Storage.scala:300)
pio_1 | at org.apache.predictionio.data.storage.Storage$.getLEvents(Storage.scala:448)
pio_1 | at org.apache.predictionio.data.storage.Storage$.verifyAllDataObjects(Storage.scala:384)
pio_1 | at org.apache.predictionio.tools.commands.Management$.status(Management.scala:156)
pio_1 | at org.apache.predictionio.tools.console.Pio$.status(Pio.scala:155)
pio_1 | at org.apache.predictionio.tools.console.Console$$anonfun$main$1.apply(Console.scala:721)
pio_1 | at org.apache.predictionio.tools.console.Console$$anonfun$main$1.apply(Console.scala:656)
pio_1 | at scala.Option.map(Option.scala:146)
pio_1 | at org.apache.predictionio.tools.console.Console$.main(Console.scala:656)
pio_1 | at org.apache.predictionio.tools.console.Console.main(Console.scala)
pio_1 | Caused by: org.apache.zookeeper.KeeperException$ConnectionLossException: KeeperErrorCode = ConnectionLoss for /hbase
pio_1 | at org.apache.zookeeper.KeeperException.create(KeeperException.java:99)
pio_1 | at org.apache.zookeeper.KeeperException.create(KeeperException.java:51)
pio_1 | at org.apache.zookeeper.ZooKeeper.exists(ZooKeeper.java:1045)
pio_1 | at org.apache.zookeeper.ZooKeeper.exists(ZooKeeper.java:1073)
pio_1 | at org.apache.hadoop.hbase.client.HBaseAdmin.checkHBaseAvailable(HBaseAdmin.java:2349)
pio_1 | ... 22 more
pio_1 |
pio_1 |
pio_1 |
pio_1 | [ERROR] [Management$] Unable to connect to all storage backends successfully.
pio_1 | The following shows the error message from the storage backend.
pio_1 |
pio_1 | Data source HBASE was not properly initialized. (org.apache.predictionio.data.storage.StorageClientException)
pio_1 |
pio_1 | Dumping configuration of initialized storage backend sources.
pio_1 | Please make sure they are correct.
pio_1 |
pio_1 | Source Name: ELASTICSEARCH; Type: elasticsearch; Configuration: HOSTS -> elasticsearch, TYPE -> elasticsearch, SCHEMES -> http, PORTS -> 9200
pio_1 | Source Name: LOCALFS; Type: localfs; Configuration: PATH -> /work/pio_store/models, TYPE -> localfs
pio_1 | Source Name: HBASE; Type: (error); Configuration: (error)
pio_1 | [INFO] [Management$] Creating Event Server at 0.0.0.0:7070
pio_1 | [ERROR] [RecoverableZooKeeper] ZooKeeper exists failed after 1 attempts
pio_1 | [ERROR] [ZooKeeperWatcher] hconnection-0x159a48a6, quorum=localhost:2181, baseZNode=/hbase Received unexpected KeeperException, re-throwing exception
pio_1 | [WARN] [ZooKeeperRegistry] Can't retrieve clusterId from Zookeeper
pio_1 | [ERROR] [StorageClient] Cannot connect to ZooKeeper (ZooKeeper ensemble: localhost). Please make sure that the configuration is pointing at the correct ZooKeeper ensemble. By default, HBase manages its own ZooKeeper, so if you have not configured HBase to use an external ZooKeeper, that means your HBase is not started or configured properly.
pio_1 | [ERROR] [Storage$] Error initializing storage client for source HBASE.
pio_1 | org.apache.hadoop.hbase.ZooKeeperConnectionException: Can't connect to ZooKeeper
pio_1 | at org.apache.hadoop.hbase.client.HBaseAdmin.checkHBaseAvailable(HBaseAdmin.java:2358)
pio_1 | at org.apache.predictionio.data.storage.hbase.StorageClient.<init>(StorageClient.scala:53)
pio_1 | at sun.reflect.NativeConstructorAccessorImpl.newInstance0(Native Method)
pio_1 | at sun.reflect.NativeConstructorAccessorImpl.newInstance(NativeConstructorAccessorImpl.java:62)
pio_1 | at sun.reflect.DelegatingConstructorAccessorImpl.newInstance(DelegatingConstructorAccessorImpl.java:45)
pio_1 | at java.lang.reflect.Constructor.newInstance(Constructor.java:423)
pio_1 | at org.apache.predictionio.data.storage.Storage$.getClient(Storage.scala:252)
pio_1 | at org.apache.predictionio.data.storage.Storage$.org$apache$predictionio$data$storage$Storage$$updateS2CM(Storage.scala:283)
pio_1 | at org.apache.predictionio.data.storage.Storage$$anonfun$sourcesToClientMeta$1.apply(Storage.scala:244)
pio_1 | at org.apache.predictionio.data.storage.Storage$$anonfun$sourcesToClientMeta$1.apply(Storage.scala:244)
pio_1 | at scala.collection.mutable.HashMap.getOrElseUpdate(HashMap.scala:79)
pio_1 | at org.apache.predictionio.data.storage.Storage$.sourcesToClientMeta(Storage.scala:244)
pio_1 | at org.apache.predictionio.data.storage.Storage$.getDataObject(Storage.scala:315)
pio_1 | at org.apache.predictionio.data.storage.Storage$.getDataObjectFromRepo(Storage.scala:300)
pio_1 | at org.apache.predictionio.data.storage.Storage$.getLEvents(Storage.scala:448)
pio_1 | at org.apache.predictionio.data.api.EventServer$.createEventServer(EventServer.scala:636)
pio_1 | at org.apache.predictionio.tools.commands.Management$.eventserver(Management.scala:77)
pio_1 | at org.apache.predictionio.tools.console.Pio$.eventserver(Pio.scala:124)
pio_1 | at org.apache.predictionio.tools.console.Console$$anonfun$main$1.apply(Console.scala:708)
pio_1 | at org.apache.predictionio.tools.console.Console$$anonfun$main$1.apply(Console.scala:656)
pio_1 | at scala.Option.map(Option.scala:146)
pio_1 | at org.apache.predictionio.tools.console.Console$.main(Console.scala:656)
pio_1 | at org.apache.predictionio.tools.console.Console.main(Console.scala)
pio_1 | Caused by: org.apache.zookeeper.KeeperException$ConnectionLossException: KeeperErrorCode = ConnectionLoss for /hbase
pio_1 | at org.apache.zookeeper.KeeperException.create(KeeperException.java:99)
pio_1 | at org.apache.zookeeper.KeeperException.create(KeeperException.java:51)
pio_1 | at org.apache.zookeeper.ZooKeeper.exists(ZooKeeper.java:1045)
pio_1 | at org.apache.zookeeper.ZooKeeper.exists(ZooKeeper.java:1073)
pio_1 | at org.apache.hadoop.hbase.client.HBaseAdmin.checkHBaseAvailable(HBaseAdmin.java:2349)
pio_1 | ... 22 more
pio_1 |
pio_1 |
pio_1 |
pio_1 | Exception in thread "main" org.apache.predictionio.data.storage.StorageClientException: Data source HBASE was not properly initialized.
pio_1 | at org.apache.predictionio.data.storage.Storage$$anonfun$10.apply(Storage.scala:316)
pio_1 | at org.apache.predictionio.data.storage.Storage$$anonfun$10.apply(Storage.scala:316)
pio_1 | at scala.Option.getOrElse(Option.scala:121)
pio_1 | at org.apache.predictionio.data.storage.Storage$.getDataObject(Storage.scala:315)
pio_1 | at org.apache.predictionio.data.storage.Storage$.getDataObjectFromRepo(Storage.scala:300)
pio_1 | at org.apache.predictionio.data.storage.Storage$.getLEvents(Storage.scala:448)
pio_1 | at org.apache.predictionio.data.api.EventServer$.createEventServer(EventServer.scala:636)
pio_1 | at org.apache.predictionio.tools.commands.Management$.eventserver(Management.scala:77)
pio_1 | at org.apache.predictionio.tools.console.Pio$.eventserver(Pio.scala:124)
pio_1 | at org.apache.predictionio.tools.console.Console$$anonfun$main$1.apply(Console.scala:708)
pio_1 | at org.apache.predictionio.tools.console.Console$$anonfun$main$1.apply(Console.scala:656)
pio_1 | at scala.Option.map(Option.scala:146)
pio_1 | at org.apache.predictionio.tools.console.Console$.main(Console.scala:656)
pio_1 | at org.apache.predictionio.tools.console.Console.main(Console.scala)
我进入了所有的集装箱,检查了一下箱子host:port with dev/tcp所有端口似乎都已打开,但有一个问题我无法解决。
pio版本:0.13.0
es版本:5.5.2
hbase:1.2.6版
hadoop:2.7.7版本
你知道吗?
编辑:在pio容器中出现这个错误之后,我执行了-jps-l命令,下面是输出,也许有帮助。
root@67662213df1e:/usr/share/predictionio# jps -l
1120 sun.tools.jps.Jps
926 org.apache.predictionio.tools.console.Console
暂无答案!
目前还没有任何答案,快来回答吧!