org.apache.hadoop.mapreduce.OutputFormat类的使用及代码示例

x33g5p2x  于2022-01-25 转载在 其他  
字(13.8k)|赞(0)|评价(0)|浏览(89)

本文整理了Java中org.apache.hadoop.mapreduce.OutputFormat类的一些代码示例,展示了OutputFormat类的具体用法。这些代码示例主要来源于Github/Stackoverflow/Maven等平台,是从一些精选项目中提取出来的代码,具有较强的参考意义,能在一定程度帮忙到你。OutputFormat类的具体详情如下:
包路径:org.apache.hadoop.mapreduce.OutputFormat
类名称:OutputFormat

OutputFormat介绍

[英]OutputFormat describes the output-specification for a Map-Reduce job.

The Map-Reduce framework relies on the OutputFormat of the job to:

  1. Validate the output-specification of the job. For e.g. check that the output directory doesn't already exist.
  2. Provide the RecordWriter implementation to be used to write out the output files of the job. Output files are stored in a FileSystem.
    [中]OutputFormat描述Map Reduce作业的输出规范。
    Map Reduce框架依赖于作业的OutputFormat来:
    1.验证作业的输出规格。例如,检查输出目录是否不存在。
    1.提供用于写出作业输出文件的RecordWriter实现。输出文件存储在文件系统中。

代码示例

代码示例来源:origin: apache/hive

/**
 * Get the output committer for this output format. This is responsible
 * for ensuring the output is committed correctly.
 * @param context the task context
 * @return an output committer
 * @throws IOException
 * @throws InterruptedException
 */
@Override
public OutputCommitter getOutputCommitter(TaskAttemptContext context
) throws IOException, InterruptedException {
 return getOutputFormat(context).getOutputCommitter(context);
}

代码示例来源:origin: apache/avro

@SuppressWarnings("unchecked")
private synchronized RecordWriter getRecordWriter(
  TaskAttemptContext taskContext, String baseFileName)
  throws IOException, InterruptedException {
 // look for record-writer in the cache
 RecordWriter writer = recordWriters.get(baseFileName);
 // If not in cache, create a new one
 if (writer == null) {
  // get the record writer from context output format
  //FileOutputFormat.setOutputName(taskContext, baseFileName);
  taskContext.getConfiguration().set("avro.mo.config.namedOutput",baseFileName);
  try {
   writer = ((OutputFormat) ReflectionUtils.newInstance(
    taskContext.getOutputFormatClass(), taskContext.getConfiguration()))
    .getRecordWriter(taskContext);
  } catch (ClassNotFoundException e) {
   throw new IOException(e);
  }
  // if counters are enabled, wrap the writer with context
  // to increment counters
  if (countersEnabled) {
   writer = new RecordWriterWithCounter(writer, baseFileName, context);
  }
  // add the record-writer to the cache
  recordWriters.put(baseFileName, writer);
 }
 return writer;
}

代码示例来源:origin: apache/hive

/**
 * Check for validity of the output-specification for the job.
 * @param context information about the job
 * @throws IOException when output should not be attempted
 */
@Override
public void checkOutputSpecs(JobContext context
) throws IOException, InterruptedException {
 getOutputFormat(context).checkOutputSpecs(context);
}

代码示例来源:origin: apache/flink

this.outputCommitter = this.mapreduceOutputFormat.getOutputCommitter(this.context);
  this.outputCommitter.setupJob(new JobContextImpl(this.configuration, new JobID()));
} catch (Exception e) {
this.context.getCredentials().addAll(this.credentials);
Credentials currentUserCreds = getCredentialsFromUGI(UserGroupInformation.getCurrentUser());
if (currentUserCreds != null) {
  this.context.getCredentials().addAll(currentUserCreds);
  this.recordWriter = this.mapreduceOutputFormat.getRecordWriter(this.context);
} catch (InterruptedException e) {
  throw new IOException("Could not create RecordWriter.", e);

代码示例来源:origin: apache/ignite

/**
 * Put write into Hadoop context and return associated output format instance.
 *
 * @param jobCtx Job context.
 * @return Output format.
 * @throws IgniteCheckedException In case of Grid exception.
 * @throws InterruptedException In case of interrupt.
 */
protected OutputFormat prepareWriter(JobContext jobCtx)
  throws IgniteCheckedException, InterruptedException {
  try {
    OutputFormat outputFormat = getOutputFormat(jobCtx);
    assert outputFormat != null;
    OutputCommitter outCommitter = outputFormat.getOutputCommitter(hadoopCtx);
    if (outCommitter != null)
      outCommitter.setupTask(hadoopCtx);
    RecordWriter writer = outputFormat.getRecordWriter(hadoopCtx);
    hadoopCtx.writer(writer);
    return outputFormat;
  }
  catch (IOException | ClassNotFoundException e) {
    throw new IgniteCheckedException(e);
  }
}

代码示例来源:origin: apache/ignite

/** {@inheritDoc} */
  @Override protected void run0(HadoopV2TaskContext taskCtx) throws IgniteCheckedException {
    try {
      JobContextImpl jobCtx = taskCtx.jobContext();

      OutputFormat outputFormat = getOutputFormat(jobCtx);

      outputFormat.checkOutputSpecs(jobCtx);

      OutputCommitter committer = outputFormat.getOutputCommitter(hadoopContext());

      if (committer != null)
        committer.setupJob(jobCtx);
    }
    catch (ClassNotFoundException | IOException e) {
      throw new IgniteCheckedException(e);
    }
    catch (InterruptedException e) {
      Thread.currentThread().interrupt();

      throw new IgniteInterruptedCheckedException(e);
    }
  }
}

代码示例来源:origin: org.apache.hadoop/hadoop-mapreduce-client-common

private org.apache.hadoop.mapreduce.OutputCommitter 
createOutputCommitter(boolean newApiCommitter, JobID jobId, Configuration conf) throws Exception {
 org.apache.hadoop.mapreduce.OutputCommitter committer = null;
 LOG.info("OutputCommitter set in config "
   + conf.get("mapred.output.committer.class"));
 if (newApiCommitter) {
  org.apache.hadoop.mapreduce.TaskID taskId =
    new org.apache.hadoop.mapreduce.TaskID(jobId, TaskType.MAP, 0);
  org.apache.hadoop.mapreduce.TaskAttemptID taskAttemptID =
    new org.apache.hadoop.mapreduce.TaskAttemptID(taskId, 0);
  org.apache.hadoop.mapreduce.TaskAttemptContext taskContext = 
    new TaskAttemptContextImpl(conf, taskAttemptID);
  OutputFormat outputFormat =
   ReflectionUtils.newInstance(taskContext.getOutputFormatClass(), conf);
  committer = outputFormat.getOutputCommitter(taskContext);
 } else {
  committer = ReflectionUtils.newInstance(conf.getClass(
    "mapred.output.committer.class", FileOutputCommitter.class,
    org.apache.hadoop.mapred.OutputCommitter.class), conf);
 }
 LOG.info("OutputCommitter is " + committer.getClass().getName());
 return committer;
}

代码示例来源:origin: cdapio/cdap

@Override
public void checkOutputSpecs(JobContext context) throws IOException, InterruptedException {
 for (String name : MultipleOutputs.getNamedOutputsList(context)) {
  Class<? extends OutputFormat> namedOutputFormatClass =
   MultipleOutputs.getNamedOutputFormatClass(context, name);
  JobContext namedContext = MultipleOutputs.getNamedJobContext(context, name);
  OutputFormat<K, V> outputFormat =
   ReflectionUtils.newInstance(namedOutputFormatClass, namedContext.getConfiguration());
  outputFormat.checkOutputSpecs(namedContext);
 }
}

代码示例来源:origin: apache/hive

public MultiRecordWriter(TaskAttemptContext context) throws IOException,
  InterruptedException {
 baseRecordWriters = new LinkedHashMap<String, BaseRecordWriterContainer>();
 String[] aliases = getOutputFormatAliases(context);
 for (String alias : aliases) {
  LOGGER.info("Creating record writer for alias: " + alias);
  TaskAttemptContext aliasContext = getTaskAttemptContext(alias, context);
  Configuration aliasConf = aliasContext.getConfiguration();
  // Create output directory if not already created.
  String outDir = aliasConf.get("mapred.output.dir");
  if (outDir != null) {
   Path outputDir = new Path(outDir);
   FileSystem fs = outputDir.getFileSystem(aliasConf);
   if (!fs.exists(outputDir)) {
    fs.mkdirs(outputDir);
   }
  }
  OutputFormat<?, ?> outputFormat = getOutputFormatInstance(aliasContext);
  baseRecordWriters.put(alias,
    new BaseRecordWriterContainer(outputFormat.getRecordWriter(aliasContext),
      aliasContext));
 }
}

代码示例来源:origin: org.apache.crunch/crunch-core

public static OutputCommitter getOutputCommitter(TaskAttemptContext tac) throws IOException, InterruptedException {
 Map<String, OutputConfig> outputs = getNamedOutputs(tac.getConfiguration());
 Map<String, OutputCommitter> committers = Maps.newHashMap();
 for (Map.Entry<String, OutputConfig> e : outputs.entrySet()) {
  String namedOutput = e.getKey();
  Job job = getJob(tac.getJobID(), e.getKey(), tac.getConfiguration());
  OutputFormat fmt = getOutputFormat(namedOutput, job, e.getValue());
  TaskAttemptContext taskContext = getTaskContext(tac, job);
  OutputCommitter oc = fmt.getOutputCommitter(taskContext);
  committers.put(namedOutput, oc);
 }
 return new CompositeOutputCommitter(outputs, committers);
}

代码示例来源:origin: apache/hive

/**
 * Get the record writer for the job. This uses the StorageHandler's default 
 * OutputFormat to get the record writer.
 * @param context the information about the current task
 * @return a RecordWriter to write the output for the job
 * @throws IOException
 * @throws InterruptedException
 */
@Override
public RecordWriter<WritableComparable<?>, HCatRecord>
getRecordWriter(TaskAttemptContext context)
 throws IOException, InterruptedException {
 return getOutputFormat(context).getRecordWriter(context);
}

代码示例来源:origin: seznam/euphoria

@Override
public void commit() throws IOException {
 try {
  final TaskAttemptContext cleanupContext =
    HadoopUtils.createCleanupTaskContext(conf.get(), jobID.get());
  getOutputFormat(cleanupContext.getTaskAttemptID())
    .getOutputCommitter(cleanupContext)
    .commitJob(cleanupContext);
 } catch (Exception e) {
  throw new IOException("Unable to commit output", e);
 }
}

代码示例来源:origin: seznam/euphoria

@Override
public HadoopWriter<K, V> openWriter(int partitionId) {
 try {
  final TaskAttemptContext taskContext =
    HadoopUtils.createTaskContext(conf.get(), jobID.get(), partitionId);
  final OutputFormat<K, V> outputFormat =
    getOutputFormat(taskContext.getTaskAttemptID());
  return new HadoopWriter<>(
    outputFormat.getRecordWriter(taskContext),
    outputFormat.getOutputCommitter(taskContext),
    taskContext);
 } catch (Exception ex) {
  throw new RuntimeException(ex);
 }
}

代码示例来源:origin: apache/attic-mrunit

@SuppressWarnings({ "unchecked", "rawtypes" })
@Override
public void collect(K key, V value) throws IOException {
 try {
  if (recordWriter == null) {
   if(taskAttemptContext.getOutputKeyClass() == null) {
    when(taskAttemptContext.getOutputKeyClass()).thenReturn((Class)key.getClass());
   }
   if(taskAttemptContext.getOutputValueClass() == null) {
    when(taskAttemptContext.getOutputValueClass()).thenReturn((Class)value.getClass());
   }
   if(taskAttemptContext.getTaskAttemptID() == null) {
    when(taskAttemptContext.getTaskAttemptID()).thenReturn(TASK_ID);
   }
   recordWriter = outputFormat.getRecordWriter(taskAttemptContext);
  }
  recordWriter.write(key, value);
 } catch (InterruptedException e) {
  throw new IllegalStateException(e);
 }
}

代码示例来源:origin: seznam/euphoria

@Override
public void initialize() {
 ExceptionUtils.unchecked(() -> {
  final TaskAttemptContext setupContext =
    HadoopUtils.createSetupTaskContext(conf.get(), jobID.get());
  final OutputFormat<K, V> outputFormat =
    getOutputFormat(setupContext.getTaskAttemptID());
  // Check for validity of the output-specification for the job.
  outputFormat.checkOutputSpecs(setupContext);
  // Setup the job output.
  outputFormat.getOutputCommitter(setupContext).setupJob(setupContext);
 });
}

代码示例来源:origin: org.apache.beam/beam-sdks-java-io-hadoop-format

private RecordWriter<KeyT, ValueT> initRecordWriter(
  OutputFormat<KeyT, ValueT> outputFormatObj, TaskAttemptContext taskAttemptContext)
  throws IllegalStateException {
 try {
  LOGGER.info(
    "Creating new RecordWriter for task {} of Job with id {}.",
    taskAttemptContext.getTaskAttemptID().getTaskID().getId(),
    taskAttemptContext.getJobID().getJtIdentifier());
  return outputFormatObj.getRecordWriter(taskAttemptContext);
 } catch (InterruptedException | IOException e) {
  throw new IllegalStateException("Unable to create RecordWriter object: ", e);
 }
}

代码示例来源:origin: org.apache.beam/beam-sdks-java-io-hadoop-format

private static OutputCommitter initOutputCommitter(
  OutputFormat<?, ?> outputFormatObj,
  Configuration conf,
  TaskAttemptContext taskAttemptContext)
  throws IllegalStateException {
 OutputCommitter outputCommitter;
 try {
  outputCommitter = outputFormatObj.getOutputCommitter(taskAttemptContext);
  if (outputCommitter != null) {
   outputCommitter.setupJob(new JobContextImpl(conf, taskAttemptContext.getJobID()));
  }
 } catch (Exception e) {
  throw new IllegalStateException("Unable to create OutputCommitter object: ", e);
 }
 return outputCommitter;
}

代码示例来源:origin: com.github.jiayuhan-it/hadoop-mapreduce-client-common

private org.apache.hadoop.mapreduce.OutputCommitter 
createOutputCommitter(boolean newApiCommitter, JobID jobId, Configuration conf) throws Exception {
 org.apache.hadoop.mapreduce.OutputCommitter committer = null;
 LOG.info("OutputCommitter set in config "
   + conf.get("mapred.output.committer.class"));
 if (newApiCommitter) {
  org.apache.hadoop.mapreduce.TaskID taskId =
    new org.apache.hadoop.mapreduce.TaskID(jobId, TaskType.MAP, 0);
  org.apache.hadoop.mapreduce.TaskAttemptID taskAttemptID =
    new org.apache.hadoop.mapreduce.TaskAttemptID(taskId, 0);
  org.apache.hadoop.mapreduce.TaskAttemptContext taskContext = 
    new TaskAttemptContextImpl(conf, taskAttemptID);
  OutputFormat outputFormat =
   ReflectionUtils.newInstance(taskContext.getOutputFormatClass(), conf);
  committer = outputFormat.getOutputCommitter(taskContext);
 } else {
  committer = ReflectionUtils.newInstance(conf.getClass(
    "mapred.output.committer.class", FileOutputCommitter.class,
    org.apache.hadoop.mapred.OutputCommitter.class), conf);
 }
 LOG.info("OutputCommitter is " + committer.getClass().getName());
 return committer;
}

代码示例来源:origin: co.cask.cdap/cdap-app-fabric

@Override
public void checkOutputSpecs(JobContext context) throws IOException, InterruptedException {
 for (String name : MultipleOutputs.getNamedOutputsList(context)) {
  Class<? extends OutputFormat> namedOutputFormatClass =
   MultipleOutputs.getNamedOutputFormatClass(context, name);
  JobContext namedContext = MultipleOutputs.getNamedJobContext(context, name);
  OutputFormat<K, V> outputFormat =
   ReflectionUtils.newInstance(namedOutputFormatClass, namedContext.getConfiguration());
  outputFormat.checkOutputSpecs(namedContext);
 }
}

代码示例来源:origin: org.spark-project.hive.hcatalog/hive-hcatalog-core

public MultiRecordWriter(TaskAttemptContext context) throws IOException,
  InterruptedException {
 baseRecordWriters = new LinkedHashMap<String, BaseRecordWriterContainer>();
 String[] aliases = getOutputFormatAliases(context);
 for (String alias : aliases) {
  LOGGER.info("Creating record writer for alias: " + alias);
  TaskAttemptContext aliasContext = getTaskAttemptContext(alias, context);
  Configuration aliasConf = aliasContext.getConfiguration();
  // Create output directory if not already created.
  String outDir = aliasConf.get("mapred.output.dir");
  if (outDir != null) {
   Path outputDir = new Path(outDir);
   FileSystem fs = outputDir.getFileSystem(aliasConf);
   if (!fs.exists(outputDir)) {
    fs.mkdirs(outputDir);
   }
  }
  OutputFormat<?, ?> outputFormat = getOutputFormatInstance(aliasContext);
  baseRecordWriters.put(alias,
    new BaseRecordWriterContainer(outputFormat.getRecordWriter(aliasContext),
      aliasContext));
 }
}

相关文章