org.apache.tez.dag.api.Vertex类的使用及代码示例

x33g5p2x  于2022-02-01 转载在 其他  
字(16.3k)|赞(0)|评价(0)|浏览(171)

本文整理了Java中org.apache.tez.dag.api.Vertex类的一些代码示例,展示了Vertex类的具体用法。这些代码示例主要来源于Github/Stackoverflow/Maven等平台,是从一些精选项目中提取出来的代码,具有较强的参考意义,能在一定程度帮忙到你。Vertex类的具体详情如下:
包路径:org.apache.tez.dag.api.Vertex
类名称:Vertex

Vertex介绍

[英]Defines a vertex in the DAG. It represents the application logic that processes and transforms the input data to create the output data. The vertex represents the template from which tasks are created to execute the application in parallel across a distributed execution environment.
[中]定义DAG中的顶点。它表示处理和转换输入数据以创建输出数据的应用程序逻辑。顶点表示从中创建任务以跨分布式执行环境并行执行应用程序的模板。

代码示例

代码示例来源:origin: apache/hive

private Vertex createVertex(JobConf conf, ReduceWork reduceWork, FileSystem fs,
  Path mrScratchDir, Context ctx, Map<String, LocalResource> localResources)
    throws Exception {
 // set up operator plan
 conf.set(Utilities.INPUT_NAME, reduceWork.getName());
 Utilities.setReduceWork(conf, reduceWork, mrScratchDir, false);
 // create the directories FileSinkOperators need
 Utilities.createTmpDirs(conf, reduceWork);
 VertexExecutionContext vertexExecutionContext = createVertexExecutionContext(reduceWork);
 // create the vertex
 Vertex reducer = Vertex.create(reduceWork.getName(),
   ProcessorDescriptor.create(ReduceTezProcessor.class.getName()).
     setUserPayload(TezUtils.createUserPayloadFromConf(conf)),
   reduceWork.isAutoReduceParallelism() ?
     reduceWork.getMaxReduceTasks() :
     reduceWork.getNumReduceTasks(), getContainerResource(conf));
 reducer.setTaskEnvironment(getContainerEnvironment(conf, false));
 reducer.setExecutionContext(vertexExecutionContext);
 reducer.setTaskLaunchCmdOpts(getContainerJavaOpts(conf));
 reducer.addTaskLocalFiles(localResources);
 return reducer;
}

代码示例来源:origin: apache/hive

v.setVertexManagerPlugin(
  VertexManagerPluginDescriptor.create(CartesianProductVertexManager.class.getName())
   .setUserPayload(cpConfig.toUserPayload(new TezConfiguration(conf))));
v.addDataSink("out_"+work.getName(), new DataSinkDescriptor(
  OutputDescriptor.create(MROutput.class.getName())
  .setUserPayload(TezUtils.createUserPayloadFromConf(conf)), null, null));

代码示例来源:origin: apache/hive

conf.set(Utilities.INPUT_NAME, mapWork.getName());
  LOG.info("Going through each work and adding MultiMRInput");
  mergeVx.addDataSource(mapWork.getName(),
    MultiMRInput.createConfigBuilder(conf, HiveInputFormat.class).build());
 byte[] userPayload = dob.getData();
 desc.setUserPayload(UserPayload.create(ByteBuffer.wrap(userPayload)));
 mergeVx.setVertexManagerPlugin(desc);
 return mergeVx;
} else {

代码示例来源:origin: apache/hive

private long hiveInputRecordsFromOtherVertices(String vertexName) {
 List<Vertex> inputVerticesList = dag.getVertex(vertexName).getInputVertices();
 long result = 0;
 for (Vertex inputVertex : inputVerticesList) {
  long inputVertexRecords = hiveInputRecordsFromTezCounters(vertexName, inputVertex.getName());
  if (inputVertexRecords < 0) {
   inputVertexRecords = hiveInputRecordsFromHiveCounters(inputVertex.getName());
  }
  result += inputVertexRecords;
 }
 return result;
}

代码示例来源:origin: apache/hive

public TaskSpec constructTaskSpec(DAG dag, String vertexName, int numSplits, ApplicationId appId, int index) {
 Vertex vertex = dag.getVertex(vertexName);
 ProcessorDescriptor processorDescriptor = vertex.getProcessorDescriptor();
 List<RootInputLeafOutput<InputDescriptor, InputInitializerDescriptor>> inputs =
   vertex.getInputs();
 List<RootInputLeafOutput<OutputDescriptor, OutputCommitterDescriptor>> outputs =
   vertex.getOutputs();
 Preconditions.checkState(inputs.size() == 1);
 Preconditions.checkState(outputs.size() == 1);
 List<InputSpec> inputSpecs = new ArrayList<>();
 for (RootInputLeafOutput<InputDescriptor, InputInitializerDescriptor> input : inputs) {
  InputSpec inputSpec = new InputSpec(input.getName(), input.getIODescriptor(), 1);
  inputSpecs.add(inputSpec);
 }
 List<OutputSpec> outputSpecs = new ArrayList<>();
 for (RootInputLeafOutput<OutputDescriptor, OutputCommitterDescriptor> output : outputs) {
  OutputSpec outputSpec = new OutputSpec(output.getName(), output.getIODescriptor(), 1);
  outputSpecs.add(outputSpec);
 }
 TezDAGID dagId = TezDAGID.getInstance(appId, 0);
 TezVertexID vertexId = TezVertexID.getInstance(dagId, 0);
 TezTaskID taskId = TezTaskID.getInstance(vertexId, index);
 TezTaskAttemptID taskAttemptId = TezTaskAttemptID.getInstance(taskId, 0);
 return new TaskSpec(taskAttemptId, dag.getName(), vertexName, numSplits, processorDescriptor, inputSpecs, outputSpecs, null);
}

代码示例来源:origin: org.apache.tez/tez-mapreduce

Vertex vertex = Vertex.create(vertexName,
  ProcessorDescriptor.create(processorName).setUserPayload(vertexUserPayload),
  numTasks, taskResource);
if (stageConf.getBoolean(TezRuntimeConfiguration.TEZ_RUNTIME_CONVERT_USER_PAYLOAD_TO_HISTORY_TEXT,
  TezRuntimeConfiguration.TEZ_RUNTIME_CONVERT_USER_PAYLOAD_TO_HISTORY_TEXT_DEFAULT)) {
 vertex.getProcessorDescriptor().setHistoryText(TezUtils.convertToHistoryText(stageConf));
 vertex.addDataSource("MRInput",
   configureMRInputWithLegacySplitsGenerated(stageConf, true));
  od.setHistoryText(TezUtils.convertToHistoryText(stageConf));
 vertex.addDataSink("MROutput", DataSinkDescriptor.create(od,
   OutputCommitterDescriptor.create(MROutputCommitter.class.getName()), null));
  : MRHelpers.getJavaOptsForMRReducer(stageConf);
vertex.setTaskEnvironment(taskEnv)
  .addTaskLocalFiles(taskLocalResources)
  .setLocationHint(VertexLocationHint.create(locations))
  .setTaskLaunchCmdOpts(taskJavaOpts);
 vertex.setVertexManagerPlugin((ShuffleVertexManager.createConfigBuilder(stageConf).build()));
   + vertex.getName() + ", processor="
   + vertex.getProcessorDescriptor().getClassName() + ", parallelism="
   + vertex.getParallelism() + ", javaOpts=" + vertex.getTaskLaunchCmdOpts()
   + ", resources=" + vertex.getTaskResource()

代码示例来源:origin: org.apache.pig/pig

Vertex vertex = Vertex.create(tezOp.getOperatorKey().toString(), procDesc, parallel, resource);
  vertex.setTaskLaunchCmdOpts(mapTaskLaunchCmdOpts);
  vertex.setTaskEnvironment(mapTaskEnv);
} else {
  vertex.setTaskLaunchCmdOpts(reduceTaskLaunchCmdOpts);
  vertex.setTaskEnvironment(reduceTaskEnv);
    + ", memory=" + vertex.getTaskResource().getMemory()
    + ", java opts=" + vertex.getTaskLaunchCmdOpts()
    );
log.info("Processing aliases: " + alias);
          + " for vertex " + vertex.getName()
          + " as the serialized size in memory is "
          + splitsSerializedSize + ". Configured "
          fs, inputSplitInfo,
          additionalLocalResources);
      inputSplitInDiskVertices.add(vertex.getName());
    } else {
  vertex.setLocationHint(VertexLocationHint.create(inputSplitInfo.getTaskLocationHints()));
  vertex.addDataSource(ld.getOperatorKey().toString(),
      DataSourceDescriptor.create(InputDescriptor.create(MRInput.class.getName())
          .setUserPayload(UserPayload.create(userPayLoadBuilder.build().toByteString().asReadOnlyByteBuffer())),
    vertex.addDataSink(outputKey.toString(),
        DataSinkDescriptor.create(storeOutDescriptor,

代码示例来源:origin: org.apache.tez/tez-tests

DataSourceDescriptor.create(inputDescriptor, inputInitializerDescriptor, null);
Vertex v1 = Vertex.create(VERTEX1, ProcessorDescriptor.create(TokenProcessor.class.getName()));
v1.addDataSource(INPUT, dataSourceDescriptor);
Vertex v2 = Vertex.create(VERTEX2, ProcessorDescriptor.create(TokenProcessor.class.getName()));
v2.addDataSource(INPUT, dataSourceDescriptor);
UserPayload userPayload = cartesianProductConfig.toUserPayload(tezConf);
Vertex v3 = Vertex.create(VERTEX3, ProcessorDescriptor.create(JoinProcessor.class.getName()));
v3.addDataSink(OUTPUT, dataSinkDescriptor);
v3.setVertexManagerPlugin(
 VertexManagerPluginDescriptor.create(CartesianProductVertexManager.class.getName())
                .setUserPayload(userPayload));

代码示例来源:origin: org.apache.tez/tez-tests

DataSourceDescriptor dataSource = configurer.generateSplitsInAM(false).build();
Vertex mapVertex1 = Vertex.create("map1", ProcessorDescriptor.create(
  TokenProcessor.class.getName()), numMaps).addDataSource("MRInput", dataSource);
Vertex mapVertex2 = Vertex.create("map2", ProcessorDescriptor.create(
  TokenProcessor.class.getName()), numMaps).addDataSource("MRInput", dataSource);
Vertex mapVertex3 = Vertex.create("map3", ProcessorDescriptor.create(
  TokenProcessor.class.getName()), numMaps).addDataSource("MRInput", dataSource);
Vertex checkerVertex = Vertex.create("checker", ProcessorDescriptor.create(
  UnionProcessor.class.getName()), 1);
outputConf.set(FileOutputFormat.OUTDIR, outputPath);
DataSinkDescriptor od = MROutput.createConfigBuilder(outputConf, null).build();
checkerVertex.addDataSink("union", od);
DataSinkDescriptor od2 = MROutput.createConfigBuilder(allPartsConf,
  TextOutputFormat.class, outputPath + "-all-parts").build();
checkerVertex.addDataSink("all-parts", od2);

代码示例来源:origin: org.apache.tez/tez-tests

Vertex stage1Vertex = Vertex.create("stage1", ProcessorDescriptor.create(
  FilterByWordInputProcessor.class.getName()).setUserPayload(stage1Payload))
  .addTaskLocalFiles(commonLocalResources);
   .groupSplits(false).build();
stage1Vertex.addDataSource("MRInput", dsd);
Vertex stage2Vertex = Vertex.create("stage2", ProcessorDescriptor.create(
  FilterByWordOutputProcessor.class.getName()).setUserPayload(TezUtils
  .createUserPayloadFromConf(stage2Conf)), dsd.getNumberOfShards());
stage2Vertex.addTaskLocalFiles(commonLocalResources);
stage2Vertex.addDataSink(
  "MROutput",
  DataSinkDescriptor.create(OutputDescriptor.create(MROutput.class.getName())

代码示例来源:origin: org.apache.tez/tez-api

groupBuilder.setGroupName(groupInfo.getGroupName());
 for (Vertex v : groupInfo.getMembers()) {
  groupBuilder.addGroupMembers(v.getName());
Resource vertexTaskResource = vertex.getTaskResource();
if (vertexTaskResource == null) {
 vertexTaskResource = Resource.newInstance(tezConf.getInt(
vertexLRs.putAll(vertex.getTaskLocalFiles());
List<DataSourceDescriptor> dataSources = vertex.getDataSources();
for (DataSourceDescriptor dataSource : dataSources) {
 if (dataSource.getCredentials() != null) {
  TezCommonUtils
    .addAdditionalLocalResources(dataSource.getAdditionalLocalFiles(), vertexLRs,
      "Vertex " + vertex.getName());
   .addAdditionalLocalResources(tezJarResources, vertexLRs, "Vertex " + vertex.getName());
int vertexParallelism = vertex.getParallelism();
VertexLocationHint vertexLocationHint = vertex.getLocationHint();
if (dataSources.size() == 1) {
 DataSourceDescriptor dataSource = dataSources.get(0);
 Preconditions.checkState(vertexLocationHint == null,
   "Cannot specify vertex location hint without specifying vertex parallelism. Vertex: "
     + vertex.getName());
} else if (vertexLocationHint != null) {
 Preconditions.checkState(vertexParallelism == vertexLocationHint.getTaskLocationHints().size(),

代码示例来源:origin: cwensel/cascading

vertex.addTaskLocalFiles( taskLocalResources );
 vertex.addDataSource( FlowElements.id( flowElement ), dataSourceDescriptor );
 vertex.addDataSink( FlowElements.id( flowElement ), dataSinkDescriptor );
addRemoteProfiling( flowNode, vertex );
if( vertex.getTaskLaunchCmdOpts() != null )
 flowNode.addProcessAnnotation( TezConfiguration.TEZ_TASK_LAUNCH_CMD_OPTS, vertex.getTaskLaunchCmdOpts() );

代码示例来源:origin: cwensel/cascading

private void addRemoteDebug( FlowNode flowNode, Vertex vertex )
 {
 String value = System.getProperty( "test.debug.node", null );
 if( Util.isEmpty( value ) )
  return;
 if( !flowNode.getSourceElementNames().contains( value ) && asInt( value ) != flowNode.getOrdinal() )
  return;
 LOG.warn( "remote debugging enabled with property: {}, on node: {}, with node id: {}", "test.debug.node", value, flowNode.getID() );
 String opts = vertex.getTaskLaunchCmdOpts();
 if( opts == null )
  opts = "";
 String address = System.getProperty( "test.debug.address", "localhost:5005" ).trim();
 opts += " -agentlib:jdwp=transport=dt_socket,server=n,address=" + address + ",suspend=y";
 vertex.setTaskLaunchCmdOpts( opts );
 }

代码示例来源:origin: apache/drill

LOG.info("Creating Edge between " + group.getGroupName() + " and " + w.getName());
 ByteBuffer userPayload = ByteBuffer.wrap(userPayloadBytes);
 desc.setUserPayload(UserPayload.create(userPayload));
 w.setVertexManagerPlugin(desc);
 break;

代码示例来源:origin: apache/hive

Vertex wx = utils.createVertex(wxConf, mapWork, scratchDir, fs, ctx, false, work,
  work.getVertexType(mapWork), DagUtils.createTezLrMap(appJarLr, null));
String vertexName = wx.getName();
dag.addVertex(wx);
utils.addCredentials(mapWork, dag);

代码示例来源:origin: org.apache.tez/tez-examples

private DAG createDag(TezConfiguration tezConf, Path largeOutPath, Path smallOutPath,
  Path expectedOutputPath, int numTasks, long largeOutSize, long smallOutSize)
  throws IOException {
 long largeOutSizePerTask = largeOutSize / numTasks;
 long smallOutSizePerTask = smallOutSize / numTasks;
 DAG dag = DAG.create("JoinDataGen");
 Vertex genDataVertex = Vertex.create("datagen", ProcessorDescriptor.create(
   GenDataProcessor.class.getName()).setUserPayload(
   UserPayload.create(ByteBuffer.wrap(GenDataProcessor.createConfiguration(largeOutSizePerTask,
     smallOutSizePerTask)))), numTasks);
 genDataVertex.addDataSink(STREAM_OUTPUT_NAME, 
   MROutput.createConfigBuilder(new Configuration(tezConf),
     TextOutputFormat.class, largeOutPath.toUri().toString()).build());
 genDataVertex.addDataSink(HASH_OUTPUT_NAME, 
   MROutput.createConfigBuilder(new Configuration(tezConf),
     TextOutputFormat.class, smallOutPath.toUri().toString()).build());
 genDataVertex.addDataSink(EXPECTED_OUTPUT_NAME, 
   MROutput.createConfigBuilder(new Configuration(tezConf),
     TextOutputFormat.class, expectedOutputPath.toUri().toString()).build());
 dag.addVertex(genDataVertex);
 return dag;
}

代码示例来源:origin: cascading/cascading-hadoop2-tez

private Vertex newVertex( FlowNode flowNode, Configuration conf, int parallelism )
 {
 conf.set( FlowNode.CASCADING_FLOW_NODE, pack( flowNode, conf ) ); // todo: pack into payload directly
 ProcessorDescriptor descriptor = ProcessorDescriptor.create( FlowProcessor.class.getName() );
 descriptor.setUserPayload( getPayload( conf ) );
 Vertex vertex = Vertex.create( flowNode.getID(), descriptor, parallelism );
 if( environment != null )
  vertex.setTaskEnvironment( environment );
 return vertex;
 }

代码示例来源:origin: org.apache.tez/tez-api

if (vertex.getParallelism() > -1) {
 newKnownTasksVertices.add(vertex);
newKnownTasksVertices.clear();
for (Vertex v : knownTasksVertices) {
 for (Edge e : v.getOutputEdges()) {
  if (e.getEdgeProperty().getDataMovementType() == DataMovementType.ONE_TO_ONE) {
   Vertex outVertex = e.getOutputVertex();
   if (outVertex.getParallelism() == -1) {
    LOG.info("Inferring parallelism for vertex: "
      + outVertex.getName() + " to be " + v.getParallelism()
      + " from 1-1 connection with vertex " + v.getName());
    outVertex.setParallelism(v.getParallelism());
    newKnownTasksVertices.add(outVertex);
 if (inputVertex.getParallelism() != outputVertex.getParallelism()) {
  if (outputVertex.getParallelism() != -1) {
   throw new TezUncheckedException(
     "1-1 Edge. Destination vertex parallelism must match source vertex. "
     + "Vertex: " + inputVertex.getName() + " does not match vertex: " 
     + outputVertex.getName());
if (vertex.getParallelism() == -1) {
 boolean hasInputInitializer = false;
 if (vertex.getDataSources() != null && !vertex.getDataSources().isEmpty()) {
  for (DataSourceDescriptor ds : vertex.getDataSources()) {
   if (ds.getInputInitializerDescriptor() != null) {
    hasInputInitializer = true;

代码示例来源:origin: org.apache.tez/tez-examples

.build();
Vertex lhsVertex = Vertex.create(LHS_INPUT_NAME, ProcessorDescriptor.create(
  ForwardingProcessor.class.getName())).addDataSource("lhs",
  MRInput
    .createConfigBuilder(new Configuration(tezConf), TextInputFormat.class,
setVertexExecutionContext(lhsVertex, getLhsExecutionContext());
Vertex rhsVertex = Vertex.create(RHS_INPUT_NAME, ProcessorDescriptor.create(
  ForwardingProcessor.class.getName())).addDataSource("rhs",
  MRInput
    .createConfigBuilder(new Configuration(tezConf), TextInputFormat.class,
setVertexExecutionContext(rhsVertex, getRhsExecutionContext());
Vertex joinValidateVertex = Vertex.create("joinvalidate", ProcessorDescriptor.create(
  JoinValidateProcessor.class.getName()), numPartitions);
setVertexExecutionContext(joinValidateVertex, getValidateExecutionContext());

代码示例来源:origin: apache/hive

private void setupAutoReducerParallelism(TezEdgeProperty edgeProp, Vertex v)
 throws IOException {
 if (edgeProp.isAutoReduce()) {
  Configuration pluginConf = new Configuration(false);
  VertexManagerPluginDescriptor desc =
    VertexManagerPluginDescriptor.create(ShuffleVertexManager.class.getName());
  pluginConf.setBoolean(
    ShuffleVertexManager.TEZ_SHUFFLE_VERTEX_MANAGER_ENABLE_AUTO_PARALLEL, true);
  pluginConf.setInt(ShuffleVertexManager.TEZ_SHUFFLE_VERTEX_MANAGER_MIN_TASK_PARALLELISM,
    edgeProp.getMinReducer());
  pluginConf.setLong(
    ShuffleVertexManager.TEZ_SHUFFLE_VERTEX_MANAGER_DESIRED_TASK_INPUT_SIZE,
    edgeProp.getInputSizePerReducer());
  UserPayload payload = TezUtils.createUserPayloadFromConf(pluginConf);
  desc.setUserPayload(payload);
  v.setVertexManagerPlugin(desc);
 }
}

相关文章