本文整理了Java中org.apache.hadoop.mapreduce.RecordReader
类的一些代码示例,展示了RecordReader
类的具体用法。这些代码示例主要来源于Github
/Stackoverflow
/Maven
等平台,是从一些精选项目中提取出来的代码,具有较强的参考意义,能在一定程度帮忙到你。RecordReader
类的具体详情如下:
包路径:org.apache.hadoop.mapreduce.RecordReader
类名称:RecordReader
[英]The record reader breaks the data into key/value pairs for input to the Mapper.
[中]记录读取器将数据分成键/值对,以输入映射器。
代码示例来源:origin: thinkaurelius/titan
@Override
public boolean nextKeyValue() throws IOException, InterruptedException {
while (reader.nextKeyValue()) {
// TODO titan05 integration -- the duplicate() call may be unnecessary
final TinkerVertex maybeNullTinkerVertex =
deser.readHadoopVertex(reader.getCurrentKey(), reader.getCurrentValue());
if (null != maybeNullTinkerVertex) {
vertex = new VertexWritable(maybeNullTinkerVertex);
//vertexQuery.filterRelationsOf(vertex); // TODO reimplement vertexquery filtering
return true;
}
}
return false;
}
代码示例来源:origin: apache/flink
@Override
public void open(HadoopInputSplit split) throws IOException {
// enforce sequential open() calls
synchronized (OPEN_MUTEX) {
TaskAttemptContext context = new TaskAttemptContextImpl(configuration, new TaskAttemptID());
try {
this.recordReader = this.mapreduceInputFormat
.createRecordReader(split.getHadoopInputSplit(), context);
this.recordReader.initialize(split.getHadoopInputSplit(), context);
} catch (InterruptedException e) {
throw new IOException("Could not create RecordReader.", e);
} finally {
this.fetched = false;
}
}
}
代码示例来源:origin: apache/hive
@Override
public boolean hasNext() {
try {
boolean retVal = curRecReader.nextKeyValue();
if (retVal) {
return true;
}
// if its false, we need to close recordReader.
curRecReader.close();
return false;
} catch (IOException e) {
throw new RuntimeException(e);
} catch (InterruptedException e) {
throw new RuntimeException(e);
}
}
代码示例来源:origin: apache/flink
@Override
public Tuple2<K, V> nextRecord(Tuple2<K, V> record) throws IOException {
if (!this.fetched) {
fetchNext();
}
if (!this.hasNext) {
return null;
}
try {
record.f0 = recordReader.getCurrentKey();
record.f1 = recordReader.getCurrentValue();
} catch (InterruptedException e) {
throw new IOException("Could not get KeyValue pair.", e);
}
this.fetched = false;
return record;
}
代码示例来源:origin: ZuInnoTe/hadoopcryptoledger
@Test
public void readBitcoinRawBlockInputFormatBlockVersion1() throws IOException, InterruptedException {
Configuration conf = new Configuration(defaultConf);
ClassLoader classLoader = getClass().getClassLoader();
String fileName="version1.blk";
String fileNameBlock=classLoader.getResource("testdata/"+fileName).getFile();
Path file = new Path(fileNameBlock);
Job job = Job.getInstance(conf);
FileInputFormat.setInputPaths(job, file);
BitcoinRawBlockFileInputFormat format = new BitcoinRawBlockFileInputFormat();
List<InputSplit> splits = format.getSplits(job);
TaskAttemptContext context = new TaskAttemptContextImpl(conf, new TaskAttemptID());
assertEquals( 1, splits.size(),"Only one split generated for block version 1");
RecordReader<BytesWritable, BytesWritable> reader = format.createRecordReader(splits.get(0), context);
assertNotNull( reader,"Format returned null RecordReader");
reader.initialize(splits.get(0),context);
BytesWritable key = new BytesWritable();
BytesWritable block = new BytesWritable();
assertTrue( reader.nextKeyValue(),"Input Split for block version contains at least one block");
block=reader.getCurrentValue();
assertEquals( 482, block.getLength(),"Random block version 1 must have size of 482 bytes");
assertFalse( reader.nextKeyValue(),"No further blocks in block version 1");
reader.close();
}
代码示例来源:origin: ZuInnoTe/hadoopoffice
@Test
public void readExcelInputFormatExcel2013SingleSheetEncryptedNegative() throws IOException, InterruptedException {
Configuration conf = new Configuration(defaultConf);
ClassLoader classLoader = getClass().getClassLoader();
String fileName = "excel2013encrypt.xlsx";
String fileNameSpreadSheet = classLoader.getResource(fileName).getFile();
Path file = new Path(fileNameSpreadSheet);
// set locale to the one of the test data
conf.set("hadoopoffice.read.locale.bcp47", "de");
// for decryption simply set the password
conf.set("hadoopoffice.read.security.crypt.password", "test2");
Job job = Job.getInstance(conf);
FileInputFormat.setInputPaths(job, file);
TaskAttemptContext context = new TaskAttemptContextImpl(conf, new TaskAttemptID());
ExcelFileInputFormat format = new ExcelFileInputFormat();
List<InputSplit> splits = format.getSplits(job);
assertEquals(1, splits.size(), "Only one split generated for Excel file");
RecordReader<Text, ArrayWritable> reader = format.createRecordReader(splits.get(0), context);
InterruptedException ex = assertThrows(InterruptedException.class,
() -> reader.initialize(splits.get(0), context), "Exception is thrown in case of wrong password");
}
代码示例来源:origin: apache/hive
private void writeThenReadByRecordReader(int intervalRecordCount,
int writeCount, int splitNumber, long maxSplitSize, CompressionCodec codec)
throws IOException, InterruptedException {
Path testDir = new Path(System.getProperty("test.tmp.dir", ".")
+ "/mapred/testsmallfirstsplit");
Path testFile = new Path(testDir, "test_rcfile");
fs.delete(testFile, true);
Configuration cloneConf = new Configuration(conf);
RCFileOutputFormat.setColumnNumber(cloneConf, bytesArray.length);
cloneConf.setInt(HiveConf.ConfVars.HIVE_RCFILE_RECORD_INTERVAL.varname, intervalRecordCount);
Configuration jonconf = new Configuration(cloneConf);
jonconf.set("mapred.input.dir", testDir.toString());
JobContext context = new Job(jonconf);
HiveConf.setLongVar(context.getConfiguration(),
HiveConf.ConfVars.MAPREDMAXSPLITSIZE, maxSplitSize);
new TaskAttemptID());
RecordReader<LongWritable, BytesRefArrayWritable> rr = inputFormat.createRecordReader(splits.get(i), tac);
rr.initialize(splits.get(i), tac);
while (rr.nextKeyValue()) {
readCount++;
代码示例来源:origin: apache/avro
Configuration conf = new Configuration();
expect(context.getConfiguration()).andReturn(conf).anyTimes();
recordReader.initialize(inputSplit, context);
0.0f, recordReader.getProgress(), 0.0f);
assertTrue("Expected at least one record", recordReader.nextKeyValue());
key = recordReader.getCurrentKey();
value = recordReader.getCurrentValue();
key == recordReader.getCurrentKey());
assertTrue("getCurrentValue() returned different values for the same record",
value == recordReader.getCurrentValue());
assertTrue("Expected to read a second record", recordReader.nextKeyValue());
key = recordReader.getCurrentKey();
value = recordReader.getCurrentValue();
1.0f, recordReader.getProgress(), 0.0f);
assertFalse("Expected only 2 records", recordReader.nextKeyValue());
recordReader.close();
代码示例来源:origin: apache/tinkerpop
@Override
public void initialize(final InputSplit inputSplit, final TaskAttemptContext taskAttemptContext) throws IOException, InterruptedException {
final Configuration configuration = taskAttemptContext.getConfiguration();
final InputFormat<NullWritable, VertexWritable> inputFormat = ReflectionUtils.newInstance(configuration.getClass(Constants.GREMLIN_HADOOP_GRAPH_READER, InputFormat.class, InputFormat.class), configuration);
if (!(inputFormat instanceof GraphFilterAware) && configuration.get(Constants.GREMLIN_HADOOP_GRAPH_FILTER, null) != null)
this.graphFilter = VertexProgramHelper.deserialize(ConfUtil.makeApacheConfiguration(configuration), Constants.GREMLIN_HADOOP_GRAPH_FILTER);
this.recordReader = inputFormat.createRecordReader(inputSplit, taskAttemptContext);
this.recordReader.initialize(inputSplit, taskAttemptContext);
}
代码示例来源:origin: apache/jena
@Override
public void initialize(InputSplit genericSplit, TaskAttemptContext context) throws IOException,
InterruptedException {
LOG.debug("initialize({}, {})", genericSplit, context);
// Assuming file split
if (!(genericSplit instanceof FileSplit))
throw new IOException("This record reader only supports FileSplit inputs");
// Find RDF language
FileSplit split = (FileSplit) genericSplit;
Path path = split.getPath();
Lang lang = RDFLanguages.filenameToLang(path.getName());
if (lang == null)
throw new IOException("There is no registered RDF language for the input file " + path.toString());
// Select the record reader and initialize
this.reader = this.selectRecordReader(lang);
this.reader.initialize(split, context);
}
代码示例来源:origin: org.janusgraph/janusgraph-hadoop
@Override
public void initialize(final InputSplit inputSplit, final TaskAttemptContext taskAttemptContext) throws IOException, InterruptedException {
reader.initialize(inputSplit, taskAttemptContext);
final Configuration conf = taskAttemptContext.getConfiguration();
if (conf.get(Constants.GREMLIN_HADOOP_GRAPH_FILTER, null) != null) {
graphFilter = VertexProgramHelper.deserialize(ConfUtil.makeApacheConfiguration(conf),
Constants.GREMLIN_HADOOP_GRAPH_FILTER);
}
}
代码示例来源:origin: apache/incubator-gobblin
@Test
public void testRecordReader()
throws Exception {
List<String> paths = Lists.newArrayList("/path1", "/path2");
GobblinWorkUnitsInputFormat.GobblinSplit split = new GobblinWorkUnitsInputFormat.GobblinSplit(paths);
GobblinWorkUnitsInputFormat inputFormat = new GobblinWorkUnitsInputFormat();
RecordReader<LongWritable, Text> recordReader =
inputFormat.createRecordReader(split, new TaskAttemptContextImpl(new Configuration(), new TaskAttemptID("a", 1,
TaskType.MAP, 1, 1)));
recordReader.nextKeyValue();
Assert.assertEquals(recordReader.getCurrentKey().get(), 0);
Assert.assertEquals(recordReader.getCurrentValue().toString(), "/path1");
recordReader.nextKeyValue();
Assert.assertEquals(recordReader.getCurrentKey().get(), 1);
Assert.assertEquals(recordReader.getCurrentValue().toString(), "/path2");
Assert.assertFalse(recordReader.nextKeyValue());
}
代码示例来源:origin: apache/hbase
job.getConfiguration().getBoolean(SNAPSHOT_INPUTFORMAT_LOCALITY_ENABLED_KEY,
SNAPSHOT_INPUTFORMAT_LOCALITY_ENABLED_DEFAULT);
when(taskAttemptContext.getConfiguration()).thenReturn(job.getConfiguration());
RecordReader<ImmutableBytesWritable, Result> rr =
tsif.createRecordReader(split, taskAttemptContext);
rr.initialize(split, taskAttemptContext);
while (rr.nextKeyValue()) {
byte[] row = rr.getCurrentKey().get();
verifyRowFromMap(rr.getCurrentKey(), rr.getCurrentValue());
rowTracker.addRow(row);
rr.close();
代码示例来源:origin: apache/incubator-gobblin
@Override
public Extractor<S, D> getExtractor(WorkUnitState workUnitState) throws IOException {
if (!workUnitState.contains(FILE_SPLIT_BYTES_STRING_KEY)) {
throw new IOException("No serialized FileSplit found in WorkUnitState " + workUnitState.getId());
}
Configuration configuration = new Configuration();
FileInputFormat<K, V> fileInputFormat = getFileInputFormat(workUnitState, configuration);
String fileSplitBytesStr = workUnitState.getProp(FILE_SPLIT_BYTES_STRING_KEY);
FileSplit fileSplit = (FileSplit) HadoopUtils.deserializeFromString(FileSplit.class, fileSplitBytesStr);
TaskAttemptContext taskAttemptContext =
getTaskAttemptContext(configuration, DummyTaskAttemptIDFactory.newTaskAttemptID());
try {
RecordReader<K, V> recordReader = fileInputFormat.createRecordReader(fileSplit, taskAttemptContext);
recordReader.initialize(fileSplit, taskAttemptContext);
boolean readKeys = workUnitState.getPropAsBoolean(FILE_INPUT_READ_KEYS_KEY, DEFAULT_FILE_INPUT_READ_KEYS);
return getExtractor(workUnitState, recordReader, fileSplit, readKeys);
} catch (InterruptedException ie) {
throw new IOException(ie);
}
}
代码示例来源:origin: org.apache.hadoop/hadoop-mapred-test
private static List<Text> readSplit(KeyValueTextInputFormat format,
InputSplit split, Job job) throws IOException, InterruptedException {
List<Text> result = new ArrayList<Text>();
Configuration conf = job.getConfiguration();
TaskAttemptContext context = MapReduceTestUtil.
createDummyMapTaskAttemptContext(conf);
RecordReader<Text, Text> reader = format.createRecordReader(split,
MapReduceTestUtil.createDummyMapTaskAttemptContext(conf));
MapContext<Text, Text, Text, Text> mcontext =
new MapContextImpl<Text, Text, Text, Text>(conf,
context.getTaskAttemptID(), reader, null, null,
MapReduceTestUtil.createDummyReporter(),
split);
reader.initialize(split, mcontext);
while (reader.nextKeyValue()) {
result.add(new Text(reader.getCurrentValue()));
}
return result;
}
代码示例来源:origin: org.neolumin.vertexium/vertexium-accumulo
@Override
public void initialize(InputSplit inputSplit, TaskAttemptContext ctx) throws IOException, InterruptedException {
reader.initialize(inputSplit, ctx);
Map configurationMap = VertexiumMRUtils.toMap(ctx.getConfiguration());
this.graph = (AccumuloGraph) new GraphFactory().createGraph(MapUtils.getAllWithPrefix(configurationMap, "graph"));
this.authorizations = new AccumuloAuthorizations(ctx.getConfiguration().getStrings(VertexiumMRUtils.CONFIG_AUTHORIZATIONS));
}
代码示例来源:origin: apache/avro
/**
* Verifies that a non-null record reader can be created, and the key/value types are
* as expected.
*/
@Test
public void testCreateRecordReader() throws IOException, InterruptedException {
// Set up the job configuration.
Job job = new Job();
AvroJob.setInputKeySchema(job, Schema.create(Schema.Type.STRING));
Configuration conf = job.getConfiguration();
FileSplit inputSplit = createMock(FileSplit.class);
TaskAttemptContext context = createMock(TaskAttemptContext.class);
expect(context.getConfiguration()).andReturn(conf).anyTimes();
replay(inputSplit);
replay(context);
AvroKeyInputFormat inputFormat = new AvroKeyInputFormat();
@SuppressWarnings("unchecked")
RecordReader<AvroKey<Object>, NullWritable> recordReader = inputFormat.createRecordReader(
inputSplit, context);
assertNotNull(inputFormat);
recordReader.close();
verify(inputSplit);
verify(context);
}
}
代码示例来源:origin: larsgeorge/hbase-book
public Object[] getSample(InputFormat inf, Job job) throws IOException, InterruptedException {
long counter = 0;
List<InputSplit> splits = inf.getSplits(job);
ArrayList<K> samples = new ArrayList<K>(numSamples);
int splitsToSample = Math.min(maxSplitsSampled, splits.size());
job.getConfiguration(), new TaskAttemptID());
RecordReader<K, V> reader = inf.createRecordReader(splits.get(i), samplingContext);
reader.initialize(splits.get(i), samplingContext);
while (reader.nextKeyValue()) {
if (r.nextDouble() <= freq) {
if (samples.size() < numSamples) {
LOG.info(String.format("Fill: Collected %d samples from %d splits", counter, i));
counter++;
samples.add(ReflectionUtils.copy(job.getConfiguration(), reader.getCurrentKey(), null));
} else {
samples.set(ind, ReflectionUtils.copy(job.getConfiguration(),
reader.getCurrentKey(), null));
if (counter % 1000 == 0)
LOG.info(String.format("Replace Random: Collected %d samples from %d splits", counter, i));
reader.close();
代码示例来源:origin: apache/hive
Job job = new Job(jobConf);
TaskAttemptContext tac = ShimLoader.getHadoopShims().newTaskAttemptContext(job.getConfiguration(), reporter);
recordReader = createRecordReader(tableSplit, tac);
try {
recordReader.initialize(tableSplit, tac);
} catch (InterruptedException e) {
代码示例来源:origin: apache/hive
@Override public boolean next(ImmutableBytesWritable rowKey, ResultWritable value) throws IOException {
boolean next = false;
try {
next = recordReader.nextKeyValue();
if (next) {
rowKey.set(recordReader.getCurrentValue().getRow());
value.setResult(recordReader.getCurrentValue());
}
} catch (InterruptedException e) {
throw new IOException(e);
}
return next;
}
};
内容来源于网络,如有侵权,请联系作者删除!