本文整理了Java中org.apache.tez.dag.api.Vertex
类的一些代码示例,展示了Vertex
类的具体用法。这些代码示例主要来源于Github
/Stackoverflow
/Maven
等平台,是从一些精选项目中提取出来的代码,具有较强的参考意义,能在一定程度帮忙到你。Vertex
类的具体详情如下:
包路径:org.apache.tez.dag.api.Vertex
类名称:Vertex
[英]Defines a vertex in the DAG. It represents the application logic that processes and transforms the input data to create the output data. The vertex represents the template from which tasks are created to execute the application in parallel across a distributed execution environment.
[中]定义DAG中的顶点。它表示处理和转换输入数据以创建输出数据的应用程序逻辑。顶点表示从中创建任务以跨分布式执行环境并行执行应用程序的模板。
代码示例来源:origin: apache/hive
private Vertex createVertex(JobConf conf, ReduceWork reduceWork, FileSystem fs,
Path mrScratchDir, Context ctx, Map<String, LocalResource> localResources)
throws Exception {
// set up operator plan
conf.set(Utilities.INPUT_NAME, reduceWork.getName());
Utilities.setReduceWork(conf, reduceWork, mrScratchDir, false);
// create the directories FileSinkOperators need
Utilities.createTmpDirs(conf, reduceWork);
VertexExecutionContext vertexExecutionContext = createVertexExecutionContext(reduceWork);
// create the vertex
Vertex reducer = Vertex.create(reduceWork.getName(),
ProcessorDescriptor.create(ReduceTezProcessor.class.getName()).
setUserPayload(TezUtils.createUserPayloadFromConf(conf)),
reduceWork.isAutoReduceParallelism() ?
reduceWork.getMaxReduceTasks() :
reduceWork.getNumReduceTasks(), getContainerResource(conf));
reducer.setTaskEnvironment(getContainerEnvironment(conf, false));
reducer.setExecutionContext(vertexExecutionContext);
reducer.setTaskLaunchCmdOpts(getContainerJavaOpts(conf));
reducer.addTaskLocalFiles(localResources);
return reducer;
}
代码示例来源:origin: apache/hive
v.setVertexManagerPlugin(
VertexManagerPluginDescriptor.create(CartesianProductVertexManager.class.getName())
.setUserPayload(cpConfig.toUserPayload(new TezConfiguration(conf))));
v.addDataSink("out_"+work.getName(), new DataSinkDescriptor(
OutputDescriptor.create(MROutput.class.getName())
.setUserPayload(TezUtils.createUserPayloadFromConf(conf)), null, null));
代码示例来源:origin: apache/hive
conf.set(Utilities.INPUT_NAME, mapWork.getName());
LOG.info("Going through each work and adding MultiMRInput");
mergeVx.addDataSource(mapWork.getName(),
MultiMRInput.createConfigBuilder(conf, HiveInputFormat.class).build());
byte[] userPayload = dob.getData();
desc.setUserPayload(UserPayload.create(ByteBuffer.wrap(userPayload)));
mergeVx.setVertexManagerPlugin(desc);
return mergeVx;
} else {
代码示例来源:origin: apache/hive
private long hiveInputRecordsFromOtherVertices(String vertexName) {
List<Vertex> inputVerticesList = dag.getVertex(vertexName).getInputVertices();
long result = 0;
for (Vertex inputVertex : inputVerticesList) {
long inputVertexRecords = hiveInputRecordsFromTezCounters(vertexName, inputVertex.getName());
if (inputVertexRecords < 0) {
inputVertexRecords = hiveInputRecordsFromHiveCounters(inputVertex.getName());
}
result += inputVertexRecords;
}
return result;
}
代码示例来源:origin: apache/hive
public TaskSpec constructTaskSpec(DAG dag, String vertexName, int numSplits, ApplicationId appId, int index) {
Vertex vertex = dag.getVertex(vertexName);
ProcessorDescriptor processorDescriptor = vertex.getProcessorDescriptor();
List<RootInputLeafOutput<InputDescriptor, InputInitializerDescriptor>> inputs =
vertex.getInputs();
List<RootInputLeafOutput<OutputDescriptor, OutputCommitterDescriptor>> outputs =
vertex.getOutputs();
Preconditions.checkState(inputs.size() == 1);
Preconditions.checkState(outputs.size() == 1);
List<InputSpec> inputSpecs = new ArrayList<>();
for (RootInputLeafOutput<InputDescriptor, InputInitializerDescriptor> input : inputs) {
InputSpec inputSpec = new InputSpec(input.getName(), input.getIODescriptor(), 1);
inputSpecs.add(inputSpec);
}
List<OutputSpec> outputSpecs = new ArrayList<>();
for (RootInputLeafOutput<OutputDescriptor, OutputCommitterDescriptor> output : outputs) {
OutputSpec outputSpec = new OutputSpec(output.getName(), output.getIODescriptor(), 1);
outputSpecs.add(outputSpec);
}
TezDAGID dagId = TezDAGID.getInstance(appId, 0);
TezVertexID vertexId = TezVertexID.getInstance(dagId, 0);
TezTaskID taskId = TezTaskID.getInstance(vertexId, index);
TezTaskAttemptID taskAttemptId = TezTaskAttemptID.getInstance(taskId, 0);
return new TaskSpec(taskAttemptId, dag.getName(), vertexName, numSplits, processorDescriptor, inputSpecs, outputSpecs, null);
}
代码示例来源:origin: org.apache.tez/tez-mapreduce
Vertex vertex = Vertex.create(vertexName,
ProcessorDescriptor.create(processorName).setUserPayload(vertexUserPayload),
numTasks, taskResource);
if (stageConf.getBoolean(TezRuntimeConfiguration.TEZ_RUNTIME_CONVERT_USER_PAYLOAD_TO_HISTORY_TEXT,
TezRuntimeConfiguration.TEZ_RUNTIME_CONVERT_USER_PAYLOAD_TO_HISTORY_TEXT_DEFAULT)) {
vertex.getProcessorDescriptor().setHistoryText(TezUtils.convertToHistoryText(stageConf));
vertex.addDataSource("MRInput",
configureMRInputWithLegacySplitsGenerated(stageConf, true));
od.setHistoryText(TezUtils.convertToHistoryText(stageConf));
vertex.addDataSink("MROutput", DataSinkDescriptor.create(od,
OutputCommitterDescriptor.create(MROutputCommitter.class.getName()), null));
: MRHelpers.getJavaOptsForMRReducer(stageConf);
vertex.setTaskEnvironment(taskEnv)
.addTaskLocalFiles(taskLocalResources)
.setLocationHint(VertexLocationHint.create(locations))
.setTaskLaunchCmdOpts(taskJavaOpts);
vertex.setVertexManagerPlugin((ShuffleVertexManager.createConfigBuilder(stageConf).build()));
+ vertex.getName() + ", processor="
+ vertex.getProcessorDescriptor().getClassName() + ", parallelism="
+ vertex.getParallelism() + ", javaOpts=" + vertex.getTaskLaunchCmdOpts()
+ ", resources=" + vertex.getTaskResource()
代码示例来源:origin: org.apache.pig/pig
Vertex vertex = Vertex.create(tezOp.getOperatorKey().toString(), procDesc, parallel, resource);
vertex.setTaskLaunchCmdOpts(mapTaskLaunchCmdOpts);
vertex.setTaskEnvironment(mapTaskEnv);
} else {
vertex.setTaskLaunchCmdOpts(reduceTaskLaunchCmdOpts);
vertex.setTaskEnvironment(reduceTaskEnv);
+ ", memory=" + vertex.getTaskResource().getMemory()
+ ", java opts=" + vertex.getTaskLaunchCmdOpts()
);
log.info("Processing aliases: " + alias);
+ " for vertex " + vertex.getName()
+ " as the serialized size in memory is "
+ splitsSerializedSize + ". Configured "
fs, inputSplitInfo,
additionalLocalResources);
inputSplitInDiskVertices.add(vertex.getName());
} else {
vertex.setLocationHint(VertexLocationHint.create(inputSplitInfo.getTaskLocationHints()));
vertex.addDataSource(ld.getOperatorKey().toString(),
DataSourceDescriptor.create(InputDescriptor.create(MRInput.class.getName())
.setUserPayload(UserPayload.create(userPayLoadBuilder.build().toByteString().asReadOnlyByteBuffer())),
vertex.addDataSink(outputKey.toString(),
DataSinkDescriptor.create(storeOutDescriptor,
代码示例来源:origin: org.apache.tez/tez-tests
DataSourceDescriptor.create(inputDescriptor, inputInitializerDescriptor, null);
Vertex v1 = Vertex.create(VERTEX1, ProcessorDescriptor.create(TokenProcessor.class.getName()));
v1.addDataSource(INPUT, dataSourceDescriptor);
Vertex v2 = Vertex.create(VERTEX2, ProcessorDescriptor.create(TokenProcessor.class.getName()));
v2.addDataSource(INPUT, dataSourceDescriptor);
UserPayload userPayload = cartesianProductConfig.toUserPayload(tezConf);
Vertex v3 = Vertex.create(VERTEX3, ProcessorDescriptor.create(JoinProcessor.class.getName()));
v3.addDataSink(OUTPUT, dataSinkDescriptor);
v3.setVertexManagerPlugin(
VertexManagerPluginDescriptor.create(CartesianProductVertexManager.class.getName())
.setUserPayload(userPayload));
代码示例来源:origin: org.apache.tez/tez-tests
DataSourceDescriptor dataSource = configurer.generateSplitsInAM(false).build();
Vertex mapVertex1 = Vertex.create("map1", ProcessorDescriptor.create(
TokenProcessor.class.getName()), numMaps).addDataSource("MRInput", dataSource);
Vertex mapVertex2 = Vertex.create("map2", ProcessorDescriptor.create(
TokenProcessor.class.getName()), numMaps).addDataSource("MRInput", dataSource);
Vertex mapVertex3 = Vertex.create("map3", ProcessorDescriptor.create(
TokenProcessor.class.getName()), numMaps).addDataSource("MRInput", dataSource);
Vertex checkerVertex = Vertex.create("checker", ProcessorDescriptor.create(
UnionProcessor.class.getName()), 1);
outputConf.set(FileOutputFormat.OUTDIR, outputPath);
DataSinkDescriptor od = MROutput.createConfigBuilder(outputConf, null).build();
checkerVertex.addDataSink("union", od);
DataSinkDescriptor od2 = MROutput.createConfigBuilder(allPartsConf,
TextOutputFormat.class, outputPath + "-all-parts").build();
checkerVertex.addDataSink("all-parts", od2);
代码示例来源:origin: org.apache.tez/tez-tests
Vertex stage1Vertex = Vertex.create("stage1", ProcessorDescriptor.create(
FilterByWordInputProcessor.class.getName()).setUserPayload(stage1Payload))
.addTaskLocalFiles(commonLocalResources);
.groupSplits(false).build();
stage1Vertex.addDataSource("MRInput", dsd);
Vertex stage2Vertex = Vertex.create("stage2", ProcessorDescriptor.create(
FilterByWordOutputProcessor.class.getName()).setUserPayload(TezUtils
.createUserPayloadFromConf(stage2Conf)), dsd.getNumberOfShards());
stage2Vertex.addTaskLocalFiles(commonLocalResources);
stage2Vertex.addDataSink(
"MROutput",
DataSinkDescriptor.create(OutputDescriptor.create(MROutput.class.getName())
代码示例来源:origin: org.apache.tez/tez-api
groupBuilder.setGroupName(groupInfo.getGroupName());
for (Vertex v : groupInfo.getMembers()) {
groupBuilder.addGroupMembers(v.getName());
Resource vertexTaskResource = vertex.getTaskResource();
if (vertexTaskResource == null) {
vertexTaskResource = Resource.newInstance(tezConf.getInt(
vertexLRs.putAll(vertex.getTaskLocalFiles());
List<DataSourceDescriptor> dataSources = vertex.getDataSources();
for (DataSourceDescriptor dataSource : dataSources) {
if (dataSource.getCredentials() != null) {
TezCommonUtils
.addAdditionalLocalResources(dataSource.getAdditionalLocalFiles(), vertexLRs,
"Vertex " + vertex.getName());
.addAdditionalLocalResources(tezJarResources, vertexLRs, "Vertex " + vertex.getName());
int vertexParallelism = vertex.getParallelism();
VertexLocationHint vertexLocationHint = vertex.getLocationHint();
if (dataSources.size() == 1) {
DataSourceDescriptor dataSource = dataSources.get(0);
Preconditions.checkState(vertexLocationHint == null,
"Cannot specify vertex location hint without specifying vertex parallelism. Vertex: "
+ vertex.getName());
} else if (vertexLocationHint != null) {
Preconditions.checkState(vertexParallelism == vertexLocationHint.getTaskLocationHints().size(),
代码示例来源:origin: cwensel/cascading
vertex.addTaskLocalFiles( taskLocalResources );
vertex.addDataSource( FlowElements.id( flowElement ), dataSourceDescriptor );
vertex.addDataSink( FlowElements.id( flowElement ), dataSinkDescriptor );
addRemoteProfiling( flowNode, vertex );
if( vertex.getTaskLaunchCmdOpts() != null )
flowNode.addProcessAnnotation( TezConfiguration.TEZ_TASK_LAUNCH_CMD_OPTS, vertex.getTaskLaunchCmdOpts() );
代码示例来源:origin: cwensel/cascading
private void addRemoteDebug( FlowNode flowNode, Vertex vertex )
{
String value = System.getProperty( "test.debug.node", null );
if( Util.isEmpty( value ) )
return;
if( !flowNode.getSourceElementNames().contains( value ) && asInt( value ) != flowNode.getOrdinal() )
return;
LOG.warn( "remote debugging enabled with property: {}, on node: {}, with node id: {}", "test.debug.node", value, flowNode.getID() );
String opts = vertex.getTaskLaunchCmdOpts();
if( opts == null )
opts = "";
String address = System.getProperty( "test.debug.address", "localhost:5005" ).trim();
opts += " -agentlib:jdwp=transport=dt_socket,server=n,address=" + address + ",suspend=y";
vertex.setTaskLaunchCmdOpts( opts );
}
代码示例来源:origin: apache/drill
LOG.info("Creating Edge between " + group.getGroupName() + " and " + w.getName());
ByteBuffer userPayload = ByteBuffer.wrap(userPayloadBytes);
desc.setUserPayload(UserPayload.create(userPayload));
w.setVertexManagerPlugin(desc);
break;
代码示例来源:origin: apache/hive
Vertex wx = utils.createVertex(wxConf, mapWork, scratchDir, fs, ctx, false, work,
work.getVertexType(mapWork), DagUtils.createTezLrMap(appJarLr, null));
String vertexName = wx.getName();
dag.addVertex(wx);
utils.addCredentials(mapWork, dag);
代码示例来源:origin: org.apache.tez/tez-examples
private DAG createDag(TezConfiguration tezConf, Path largeOutPath, Path smallOutPath,
Path expectedOutputPath, int numTasks, long largeOutSize, long smallOutSize)
throws IOException {
long largeOutSizePerTask = largeOutSize / numTasks;
long smallOutSizePerTask = smallOutSize / numTasks;
DAG dag = DAG.create("JoinDataGen");
Vertex genDataVertex = Vertex.create("datagen", ProcessorDescriptor.create(
GenDataProcessor.class.getName()).setUserPayload(
UserPayload.create(ByteBuffer.wrap(GenDataProcessor.createConfiguration(largeOutSizePerTask,
smallOutSizePerTask)))), numTasks);
genDataVertex.addDataSink(STREAM_OUTPUT_NAME,
MROutput.createConfigBuilder(new Configuration(tezConf),
TextOutputFormat.class, largeOutPath.toUri().toString()).build());
genDataVertex.addDataSink(HASH_OUTPUT_NAME,
MROutput.createConfigBuilder(new Configuration(tezConf),
TextOutputFormat.class, smallOutPath.toUri().toString()).build());
genDataVertex.addDataSink(EXPECTED_OUTPUT_NAME,
MROutput.createConfigBuilder(new Configuration(tezConf),
TextOutputFormat.class, expectedOutputPath.toUri().toString()).build());
dag.addVertex(genDataVertex);
return dag;
}
代码示例来源:origin: cascading/cascading-hadoop2-tez
private Vertex newVertex( FlowNode flowNode, Configuration conf, int parallelism )
{
conf.set( FlowNode.CASCADING_FLOW_NODE, pack( flowNode, conf ) ); // todo: pack into payload directly
ProcessorDescriptor descriptor = ProcessorDescriptor.create( FlowProcessor.class.getName() );
descriptor.setUserPayload( getPayload( conf ) );
Vertex vertex = Vertex.create( flowNode.getID(), descriptor, parallelism );
if( environment != null )
vertex.setTaskEnvironment( environment );
return vertex;
}
代码示例来源:origin: org.apache.tez/tez-api
if (vertex.getParallelism() > -1) {
newKnownTasksVertices.add(vertex);
newKnownTasksVertices.clear();
for (Vertex v : knownTasksVertices) {
for (Edge e : v.getOutputEdges()) {
if (e.getEdgeProperty().getDataMovementType() == DataMovementType.ONE_TO_ONE) {
Vertex outVertex = e.getOutputVertex();
if (outVertex.getParallelism() == -1) {
LOG.info("Inferring parallelism for vertex: "
+ outVertex.getName() + " to be " + v.getParallelism()
+ " from 1-1 connection with vertex " + v.getName());
outVertex.setParallelism(v.getParallelism());
newKnownTasksVertices.add(outVertex);
if (inputVertex.getParallelism() != outputVertex.getParallelism()) {
if (outputVertex.getParallelism() != -1) {
throw new TezUncheckedException(
"1-1 Edge. Destination vertex parallelism must match source vertex. "
+ "Vertex: " + inputVertex.getName() + " does not match vertex: "
+ outputVertex.getName());
if (vertex.getParallelism() == -1) {
boolean hasInputInitializer = false;
if (vertex.getDataSources() != null && !vertex.getDataSources().isEmpty()) {
for (DataSourceDescriptor ds : vertex.getDataSources()) {
if (ds.getInputInitializerDescriptor() != null) {
hasInputInitializer = true;
代码示例来源:origin: org.apache.tez/tez-examples
.build();
Vertex lhsVertex = Vertex.create(LHS_INPUT_NAME, ProcessorDescriptor.create(
ForwardingProcessor.class.getName())).addDataSource("lhs",
MRInput
.createConfigBuilder(new Configuration(tezConf), TextInputFormat.class,
setVertexExecutionContext(lhsVertex, getLhsExecutionContext());
Vertex rhsVertex = Vertex.create(RHS_INPUT_NAME, ProcessorDescriptor.create(
ForwardingProcessor.class.getName())).addDataSource("rhs",
MRInput
.createConfigBuilder(new Configuration(tezConf), TextInputFormat.class,
setVertexExecutionContext(rhsVertex, getRhsExecutionContext());
Vertex joinValidateVertex = Vertex.create("joinvalidate", ProcessorDescriptor.create(
JoinValidateProcessor.class.getName()), numPartitions);
setVertexExecutionContext(joinValidateVertex, getValidateExecutionContext());
代码示例来源:origin: apache/hive
private void setupAutoReducerParallelism(TezEdgeProperty edgeProp, Vertex v)
throws IOException {
if (edgeProp.isAutoReduce()) {
Configuration pluginConf = new Configuration(false);
VertexManagerPluginDescriptor desc =
VertexManagerPluginDescriptor.create(ShuffleVertexManager.class.getName());
pluginConf.setBoolean(
ShuffleVertexManager.TEZ_SHUFFLE_VERTEX_MANAGER_ENABLE_AUTO_PARALLEL, true);
pluginConf.setInt(ShuffleVertexManager.TEZ_SHUFFLE_VERTEX_MANAGER_MIN_TASK_PARALLELISM,
edgeProp.getMinReducer());
pluginConf.setLong(
ShuffleVertexManager.TEZ_SHUFFLE_VERTEX_MANAGER_DESIRED_TASK_INPUT_SIZE,
edgeProp.getInputSizePerReducer());
UserPayload payload = TezUtils.createUserPayloadFromConf(pluginConf);
desc.setUserPayload(payload);
v.setVertexManagerPlugin(desc);
}
}
内容来源于网络,如有侵权,请联系作者删除!