当前体系结构x1c 0d1x
说明
1.我正在使用JanusGraph 0.6.2进行图形处理。
- GCP BigTable作为JanusGraph后端/数据库。
- Spark 3.0.0与Hadoop 2.7用于数据处理,本地设置(计划在POC后在GCP中设置env)。
- Gremlin客户端和Java 11作为客户端运行Spark Job,通过 SparkGraphComputer 执行遍历、查找节点等查询
问题
我能够触发一个查询作业,使用Gremlin客户端在Spark上进行节点计数,但我面临着使用Java API触发查询作业的问题。
期望
使用Java API触发查询作业。
Apache Spark安装程序已完成
Gremlin客户端的配置正在运行
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
# Hadoop Graph Configuration
#
gremlin.graph=org.apache.tinkerpop.gremlin.hadoop.structure.HadoopGraph
gremlin.hadoop.graphReader=org.janusgraph.hadoop.formats.hbase.HBaseInputFormat
gremlin.hadoop.graphWriter=org.apache.hadoop.mapreduce.lib.output.NullOutputFormat
gremlin.hadoop.jarsInDistributedCache=true
gremlin.hadoop.inputLocation=none
gremlin.hadoop.outputLocation=output
gremlin.spark.persistContext=true
#
# JanusGraph HBase InputFormat configuration
#
#janusgraphmr.ioformat.conf.storage.backend=hbase
#janusgraphmr.ioformat.conf.storage.hostname=localhost
#janusgraphmr.ioformat.conf.storage.port=8586
#janusgraphmr.ioformat.conf.storage.hbase.table=janusgraph
janusgraphmr.ioformat.conf.storage.backend=hbase
janusgraphmr.ioformat.conf.storage.hbase.ext.hbase.client.connection.impl=com.google.cloud.bigtable.hbase2_x.BigtableConnection
janusgraphmr.ioformat.conf.storage.hbase.ext.google.bigtable.project.id= *****
janusgraphmr.ioformat.conf.storage.hbase.ext.google.bigtable.instance.id= *****
janusgraphmr.ioformat.conf.storage.hbase.table= ******
janusgraphmr.ioformat.conf.storage.hbase.ext.hbase.regionsizecalculator.enable=false
# This defines the indexing backend configuration used while writing data to JanusGraph.
janusgraphmr.ioformat.conf.index.search.backend=elasticsearch
janusgraphmr.ioformat.conf.index.search.hostname=localhost
#
# SparkGraphComputer Configuration
#
spark.master=spark://RINMAC1714:7077
spark.executor.memory=1g
spark.executor.extraClassPath=/Users/rohit.pahan/portables/janusgraph-0.6.2/lib/*
spark.serializer=org.apache.spark.serializer.KryoSerializer
spark.kryo.registrator=org.janusgraph.hadoop.serialize.JanusGraphKryoRegistrator
以上配置工作正常,我得到了结果。请按照屏幕截图
x 1c 3d 1x
Java API配置对我不起作用
GraphTraversalProvider.java
import org.apache.commons.configuration.Configuration;
import org.apache.tinkerpop.gremlin.hadoop.Constants;
public class GraphTraversalProvider {
public static Configuration makeLocal() {
return make(true);
}
public static Configuration makeRemote() {
return make(false);
}
private static Configuration make(boolean local) {
final Configuration hadoopConfig = new BaseConfiguration();
hadoopConfig.setProperty("gremlin.graph", "org.apache.tinkerpop.gremlin.hadoop.structure.HadoopGraph");
hadoopConfig.setProperty(Constants.GREMLIN_HADOOP_GRAPH_READER, "org.janusgraph.hadoop.formats.hbase.HBaseInputFormat");
hadoopConfig.setProperty(Constants.GREMLIN_HADOOP_GRAPH_WRITER, "org.apache.hadoop.mapreduce.lib.output.NullOutputFormat");
hadoopConfig.setProperty(Constants.GREMLIN_HADOOP_JARS_IN_DISTRIBUTED_CACHE, true);
hadoopConfig.setProperty(Constants.GREMLIN_HADOOP_INPUT_LOCATION, "none");
hadoopConfig.setProperty(Constants.GREMLIN_HADOOP_OUTPUT_LOCATION, "output");
hadoopConfig.setProperty(Constants.GREMLIN_SPARK_PERSIST_CONTEXT, true);
hadoopConfig.setProperty("janusgraphmr.ioformat.conf.storage.backend", "hbase");
hadoopConfig.setProperty("janusgraphmr.ioformat.conf.storage.hbase.ext.hbase.client.connection.impl", "com.google.cloud.bigtable.hbase2_x.BigtableConnectio");
hadoopConfig.setProperty("janusgraphmr.ioformat.conf.storage.hbase.ext.google.bigtable.project.id", "******");
hadoopConfig.setProperty("janusgraphmr.ioformat.conf.storage.hbase.ext.google.bigtable.instance.id", "*******");
hadoopConfig.setProperty("janusgraphmr.ioformat.conf.storage.hbase.table", "******");
hadoopConfig.setProperty("janusgraphmr.ioformat.conf.storage.hbase.ext.hbase.regionsizecalculator.enable", false);
hadoopConfig.setProperty("janusgraphmr.ioformat.conf.index.search.backend", "elasticsearch");
hadoopConfig.setProperty("janusgraphmr.ioformat.conf.index.search.hostname", "localhost");
if (local) {
hadoopConfig.setProperty("spark.master", "local[*]"); // Run Spark locally with as many worker threads as logical cores on your machine.
} else {
hadoopConfig.setProperty("spark.master", "spark://MAC1714:7077");
}
hadoopConfig.setProperty("spark.executor.memory", "1g");
hadoopConfig.setProperty(Constants.SPARK_SERIALIZER, "org.apache.spark.serializer.KryoSerializer");
hadoopConfig.setProperty("spark.kryo.registrator", "org.janusgraph.hadoop.serialize.JanusGraphKryoRegistrator");
hadoopConfig.setProperty("spark.kryo.registrationRequired","false");
return hadoopConfig;
}
}
主类
public static void main(String[] args) throws Exception {
runSpark();
}
private static void runSpark() throws Exception {
Configuration config = GraphTraversalProvider.makeRemote();
Graph hadoopGraph = GraphFactory.open(config);
Long totalVertices = hadoopGraph.traversal().withComputer(SparkGraphComputer.class).V().count().next();
System.out.println("IT WORKED: " + totalVertices);
hadoopGraph.close();
}
}
pom.xml
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 https://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>
<parent>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-starter-parent</artifactId>
<version>2.2.6.RELEASE</version>
<relativePath/> <!-- lookup parent from repository -->
</parent>
<groupId>com.janus</groupId>
<artifactId>janus-spark</artifactId>
<version>0.0.1-SNAPSHOT</version>
<name>janus-spark</name>
<description>Demo project for Spring Boot</description>
<properties>
<janus.version>0.6.2</janus.version>
<spark.version>3.0.0</spark.version>
<gremlin.version>3.4.6</gremlin.version>
</properties>
<dependencies>
<dependency>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-starter-web</artifactId>
</dependency>
<!-- https://mvnrepository.com/artifact/org.janusgraph/janusgraph-bigtable -->
<dependency>
<groupId>org.janusgraph</groupId>
<artifactId>janusgraph-bigtable</artifactId>
<version>${janus.version}</version>
</dependency>
<!-- https://mvnrepository.com/artifact/org.janusgraph/janusgraph-hadoop -->
<dependency>
<groupId>org.janusgraph</groupId>
<artifactId>janusgraph-hadoop</artifactId>
<version>${janus.version}</version>
</dependency>
<!-- https://mvnrepository.com/artifact/org.janusgraph/janusgraph-hbase -->
<dependency>
<groupId>org.janusgraph</groupId>
<artifactId>janusgraph-hbase</artifactId>
<version>${janus.version}</version>
</dependency>
<dependency>
<groupId>org.janusgraph</groupId>
<artifactId>janusgraph-solr</artifactId>
<version>${janus.version}</version>
</dependency>
<!-- https://mvnrepository.com/artifact/com.esotericsoftware.kryo/kryo -->
<dependency>
<groupId>com.esotericsoftware.kryo</groupId>
<artifactId>kryo</artifactId>
<version>2.16</version>
</dependency>
<!--
<dependency>
<groupId>com.twitter</groupId>
<artifactId>chill_2.13</artifactId>
<version>0.10.0</version>
</dependency>-->
<!-- GREMLIN -->
<dependency>
<groupId>org.apache.tinkerpop</groupId>
<artifactId>spark-gremlin</artifactId>
<version>${gremlin.version}</version>
<exclusions>
<exclusion>
<groupId>com.fasterxml.jackson.core</groupId>
<artifactId>jackson-databind</artifactId>
</exclusion>
<exclusion>
<groupId>com.google.guava</groupId>
<artifactId>guava</artifactId>
</exclusion>
</exclusions>
</dependency>
<dependency>
<groupId>org.apache.tinkerpop</groupId>
<artifactId>hadoop-gremlin</artifactId>
<version>${gremlin.version}</version>
</dependency>
<!-- SPARK -->
<dependency>
<groupId>org.apache.spark</groupId>
<artifactId>spark-core_2.12</artifactId>
<version>${spark.version}</version>
<exclusions>
<exclusion>
<groupId>com.fasterxml.jackson.core</groupId>
<artifactId>jackson-databind</artifactId>
</exclusion>
</exclusions>
</dependency>
<dependency>
<groupId>com.google.guava</groupId>
<artifactId>guava</artifactId>
<version>27.0-jre</version>
</dependency>
</dependencies>
<build>
<plugins>
<plugin>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-maven-plugin</artifactId>
</plugin>
</plugins>
</build>
</project>
错误日志
SLF4J: Found binding in [jar:file:/Users/rohit.pahan/portables/janusgraph-0.6.2/lib/slf4j-log4j12-1.7.30.jar!/org/slf4j/impl/StaticLoggerBinder.class]
SLF4J: Found binding in [jar:file:/Users/rohit.pahan/portables/janusgraph-0.6.2/lib/logback-classic-1.1.3.jar!/org/slf4j/impl/StaticLoggerBinder.class]
SLF4J: Found binding in [jar:file:/Users/rohit.pahan/.m2/repository/ch/qos/logback/logback-classic/1.2.3/logback-classic-1.2.3.jar!/org/slf4j/impl/StaticLoggerBinder.class]
SLF4J: Found binding in [jar:file:/Users/rohit.pahan/.m2/repository/org/slf4j/slf4j-log4j12/1.7.30/slf4j-log4j12-1.7.30.jar!/org/slf4j/impl/StaticLoggerBinder.class]
SLF4J: See http://www.slf4j.org/codes.html#multiple_bindings for an explanation.
SLF4J: Actual binding is of type [org.slf4j.impl.Log4jLoggerFactory]
0 [main] WARN org.apache.tinkerpop.gremlin.spark.process.computer.SparkGraphComputer - class org.apache.hadoop.mapreduce.lib.output.NullOutputFormat does not implement PersistResultGraphAware and thus, persistence options are unknown -- assuming all options are possible
Exception in thread "main" java.lang.IllegalStateException: java.util.ServiceConfigurationError: org.apache.tinkerpop.gremlin.structure.io.gryo.kryoshim.KryoShimService: Provider org.janusgraph.hadoop.serialize.JanusGraphKryoShimService could not be instantiated
at org.apache.tinkerpop.gremlin.process.computer.traversal.step.map.VertexProgramStep.processNextStart(VertexProgramStep.java:88)
at org.apache.tinkerpop.gremlin.process.traversal.step.util.AbstractStep.hasNext(AbstractStep.java:150)
at org.apache.tinkerpop.gremlin.process.traversal.step.util.ExpandableStepIterator.next(ExpandableStepIterator.java:55)
at org.apache.tinkerpop.gremlin.process.computer.traversal.step.map.ComputerResultStep.processNextStart(ComputerResultStep.java:68)
at org.apache.tinkerpop.gremlin.process.traversal.step.util.AbstractStep.next(AbstractStep.java:135)
at org.apache.tinkerpop.gremlin.process.traversal.step.util.AbstractStep.next(AbstractStep.java:40)
at org.apache.tinkerpop.gremlin.process.traversal.util.DefaultTraversal.next(DefaultTraversal.java:240)
at com.janus.app.services.RunSparkJob.runSpark(RunSparkJob.java:20)
at com.janus.app.services.RunSparkJob.main(RunSparkJob.java:14)
Caused by: java.util.concurrent.ExecutionException: java.util.ServiceConfigurationError: org.apache.tinkerpop.gremlin.structure.io.gryo.kryoshim.KryoShimService: Provider org.janusgraph.hadoop.serialize.JanusGraphKryoShimService could not be instantiated
at java.base/java.util.concurrent.FutureTask.report(FutureTask.java:122)
at java.base/java.util.concurrent.FutureTask.get(FutureTask.java:191)
at org.apache.tinkerpop.gremlin.process.computer.traversal.step.map.VertexProgramStep.processNextStart(VertexProgramStep.java:68)
... 8 more
Caused by: java.util.ServiceConfigurationError: org.apache.tinkerpop.gremlin.structure.io.gryo.kryoshim.KryoShimService: Provider org.janusgraph.hadoop.serialize.JanusGraphKryoShimService could not be instantiated
at java.base/java.util.ServiceLoader.fail(ServiceLoader.java:582)
at java.base/java.util.ServiceLoader$ProviderImpl.newInstance(ServiceLoader.java:804)
at java.base/java.util.ServiceLoader$ProviderImpl.get(ServiceLoader.java:722)
at java.base/java.util.ServiceLoader$3.next(ServiceLoader.java:1393)
at org.apache.tinkerpop.gremlin.structure.io.gryo.kryoshim.KryoShimServiceLoader.load(KryoShimServiceLoader.java:97)
at org.apache.tinkerpop.gremlin.structure.io.gryo.kryoshim.KryoShimServiceLoader.applyConfiguration(KryoShimServiceLoader.java:58)
at org.apache.tinkerpop.gremlin.spark.process.computer.SparkGraphComputer.lambda$submitWithExecutor$1(SparkGraphComputer.java:248)
at java.base/java.util.concurrent.FutureTask.run(FutureTask.java:264)
at java.base/java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1130)
at java.base/java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:630)
at java.base/java.lang.Thread.run(Thread.java:831)
Caused by: java.lang.IllegalArgumentException: Unable to create serializer "org.apache.tinkerpop.shaded.kryo.serializers.FieldSerializer" for class: java.util.concurrent.atomic.AtomicLong
at org.apache.tinkerpop.shaded.kryo.factories.ReflectionSerializerFactory.makeSerializer(ReflectionSerializerFactory.java:67)
at org.apache.tinkerpop.shaded.kryo.factories.ReflectionSerializerFactory.makeSerializer(ReflectionSerializerFactory.java:45)
at org.apache.tinkerpop.shaded.kryo.Kryo.newDefaultSerializer(Kryo.java:380)
at org.apache.tinkerpop.shaded.kryo.Kryo.getDefaultSerializer(Kryo.java:364)
at org.apache.tinkerpop.gremlin.structure.io.gryo.GryoTypeReg.registerWith(GryoTypeReg.java:122)
at org.apache.tinkerpop.gremlin.structure.io.gryo.GryoMapper.createMapper(GryoMapper.java:101)
at org.apache.tinkerpop.gremlin.structure.io.gryo.GryoMapper.createMapper(GryoMapper.java:75)
at org.apache.tinkerpop.gremlin.structure.io.gryo.GryoReader.<init>(GryoReader.java:71)
at org.apache.tinkerpop.gremlin.structure.io.gryo.GryoReader.<init>(GryoReader.java:64)
at org.apache.tinkerpop.gremlin.structure.io.gryo.GryoReader$Builder.create(GryoReader.java:302)
at org.apache.tinkerpop.gremlin.structure.io.gryo.GryoPool.createPool(GryoPool.java:126)
at org.apache.tinkerpop.gremlin.structure.io.gryo.GryoPool.access$100(GryoPool.java:40)
at org.apache.tinkerpop.gremlin.structure.io.gryo.GryoPool$Builder.create(GryoPool.java:227)
at org.apache.tinkerpop.gremlin.hadoop.structure.io.HadoopPools.initialize(HadoopPools.java:51)
at org.janusgraph.hadoop.serialize.JanusGraphKryoShimService.<init>(JanusGraphKryoShimService.java:30)
at java.base/jdk.internal.reflect.NativeConstructorAccessorImpl.newInstance0(Native Method)
at java.base/jdk.internal.reflect.NativeConstructorAccessorImpl.newInstance(NativeConstructorAccessorImpl.java:78)
at java.base/jdk.internal.reflect.DelegatingConstructorAccessorImpl.newInstance(DelegatingConstructorAccessorImpl.java:45)
at java.base/java.lang.reflect.Constructor.newInstanceWithCaller(Constructor.java:499)
at java.base/java.lang.reflect.Constructor.newInstance(Constructor.java:480)
at java.base/java.util.ServiceLoader$ProviderImpl.newInstance(ServiceLoader.java:780)
... 9 more
Caused by: java.lang.reflect.InvocationTargetException
at jdk.internal.reflect.GeneratedConstructorAccessor3.newInstance(Unknown Source)
at java.base/jdk.internal.reflect.DelegatingConstructorAccessorImpl.newInstance(DelegatingConstructorAccessorImpl.java:45)
at java.base/java.lang.reflect.Constructor.newInstanceWithCaller(Constructor.java:499)
at java.base/java.lang.reflect.Constructor.newInstance(Constructor.java:480)
at org.apache.tinkerpop.shaded.kryo.factories.ReflectionSerializerFactory.makeSerializer(ReflectionSerializerFactory.java:54)
... 29 more
Caused by: java.lang.reflect.InaccessibleObjectException: Unable to make field private volatile long java.util.concurrent.atomic.AtomicLong.value accessible: module java.base does not "opens java.util.concurrent.atomic" to unnamed module @1d9b7cce
at java.base/java.lang.reflect.AccessibleObject.checkCanSetAccessible(AccessibleObject.java:357)
at java.base/java.lang.reflect.AccessibleObject.checkCanSetAccessible(AccessibleObject.java:297)
at java.base/java.lang.reflect.Field.checkCanSetAccessible(Field.java:177)
at java.base/java.lang.reflect.Field.setAccessible(Field.java:171)
at org.apache.tinkerpop.shaded.kryo.serializers.FieldSerializer.buildValidFields(FieldSerializer.java:306)
at org.apache.tinkerpop.shaded.kryo.serializers.FieldSerializer.rebuildCachedFields(FieldSerializer.java:239)
at org.apache.tinkerpop.shaded.kryo.serializers.FieldSerializer.rebuildCachedFields(FieldSerializer.java:182)
at org.apache.tinkerpop.shaded.kryo.serializers.FieldSerializer.<init>(FieldSerializer.java:155)
... 34 more
Process finished with exit code 1
我仍在探索Janusgraph及其与Spark的处理能力。我已经在这里给出了所有细节,如果需要更多细节,请告诉我。这对我来说是一个非常新的技术堆栈。我将感谢任何帮助。
1条答案
按热度按时间1aaf6o9v1#
JanusGraph-0.6.2依赖于TinkerPop-3.5.3。与其他TinkerPop版本混合很容易导致此类问题。