本文整理了Java中org.apache.parquet.column.Encoding.initDictionary()
方法的一些代码示例,展示了Encoding.initDictionary()
的具体用法。这些代码示例主要来源于Github
/Stackoverflow
/Maven
等平台,是从一些精选项目中提取出来的代码,具有较强的参考意义,能在一定程度帮忙到你。Encoding.initDictionary()
方法的具体详情如下:
包路径:org.apache.parquet.column.Encoding
类名称:Encoding
方法名:initDictionary
[英]initializes a dictionary from a page
[中]从页面初始化字典
代码示例来源:origin: apache/hive
public BaseVectorizedColumnReader(
ColumnDescriptor descriptor,
PageReader pageReader,
boolean skipTimestampConversion,
Type parquetType, TypeInfo hiveType) throws IOException {
this.descriptor = descriptor;
this.type = parquetType;
this.pageReader = pageReader;
this.maxDefLevel = descriptor.getMaxDefinitionLevel();
this.skipTimestampConversion = skipTimestampConversion;
this.hiveType = hiveType;
DictionaryPage dictionaryPage = pageReader.readDictionaryPage();
if (dictionaryPage != null) {
try {
this.dictionary = ParquetDataColumnReaderFactory
.getDataColumnReaderByTypeOnDictionary(parquetType.asPrimitiveType(), hiveType,
dictionaryPage.getEncoding().initDictionary(descriptor, dictionaryPage),
skipTimestampConversion);
this.isCurrentPageDictionaryEncoded = true;
} catch (IOException e) {
throw new IOException("could not decode the dictionary for " + descriptor, e);
}
} else {
this.dictionary = null;
this.isCurrentPageDictionaryEncoded = false;
}
}
代码示例来源:origin: org.apache.spark/spark-sql_2.10
public VectorizedColumnReader(ColumnDescriptor descriptor, PageReader pageReader)
throws IOException {
this.descriptor = descriptor;
this.pageReader = pageReader;
this.maxDefLevel = descriptor.getMaxDefinitionLevel();
DictionaryPage dictionaryPage = pageReader.readDictionaryPage();
if (dictionaryPage != null) {
try {
this.dictionary = dictionaryPage.getEncoding().initDictionary(descriptor, dictionaryPage);
this.isCurrentPageDictionaryEncoded = true;
} catch (IOException e) {
throw new IOException("could not decode the dictionary for " + descriptor, e);
}
} else {
this.dictionary = null;
this.isCurrentPageDictionaryEncoded = false;
}
this.totalValueCount = pageReader.getTotalValueCount();
if (totalValueCount == 0) {
throw new IOException("totalValueCount == 0");
}
}
代码示例来源:origin: org.apache.spark/spark-sql
public VectorizedColumnReader(
ColumnDescriptor descriptor,
OriginalType originalType,
PageReader pageReader,
TimeZone convertTz) throws IOException {
this.descriptor = descriptor;
this.pageReader = pageReader;
this.convertTz = convertTz;
this.originalType = originalType;
this.maxDefLevel = descriptor.getMaxDefinitionLevel();
DictionaryPage dictionaryPage = pageReader.readDictionaryPage();
if (dictionaryPage != null) {
try {
this.dictionary = dictionaryPage.getEncoding().initDictionary(descriptor, dictionaryPage);
this.isCurrentPageDictionaryEncoded = true;
} catch (IOException e) {
throw new IOException("could not decode the dictionary for " + descriptor, e);
}
} else {
this.dictionary = null;
this.isCurrentPageDictionaryEncoded = false;
}
this.totalValueCount = pageReader.getTotalValueCount();
if (totalValueCount == 0) {
throw new IOException("totalValueCount == 0");
}
}
代码示例来源:origin: org.apache.spark/spark-sql_2.11
public VectorizedColumnReader(
ColumnDescriptor descriptor,
OriginalType originalType,
PageReader pageReader,
TimeZone convertTz) throws IOException {
this.descriptor = descriptor;
this.pageReader = pageReader;
this.convertTz = convertTz;
this.originalType = originalType;
this.maxDefLevel = descriptor.getMaxDefinitionLevel();
DictionaryPage dictionaryPage = pageReader.readDictionaryPage();
if (dictionaryPage != null) {
try {
this.dictionary = dictionaryPage.getEncoding().initDictionary(descriptor, dictionaryPage);
this.isCurrentPageDictionaryEncoded = true;
} catch (IOException e) {
throw new IOException("could not decode the dictionary for " + descriptor, e);
}
} else {
this.dictionary = null;
this.isCurrentPageDictionaryEncoded = false;
}
this.totalValueCount = pageReader.getTotalValueCount();
if (totalValueCount == 0) {
throw new IOException("totalValueCount == 0");
}
}
代码示例来源:origin: org.lasersonlab.apache.parquet/parquet-column
@Override
public Dictionary initDictionary(ColumnDescriptor descriptor, DictionaryPage dictionaryPage) throws IOException {
return PLAIN.initDictionary(descriptor, dictionaryPage);
}
代码示例来源:origin: org.apache.parquet/parquet-column
@Override
public Dictionary initDictionary(ColumnDescriptor descriptor, DictionaryPage dictionaryPage) throws IOException {
return PLAIN.initDictionary(descriptor, dictionaryPage);
}
代码示例来源:origin: com.alibaba.blink/flink-table
public VectorizedColumnReader(ColumnDescriptor descriptor, PageReader pageReader) throws IOException {
this.descriptor = descriptor;
this.pageReader = pageReader;
this.maxDefLevel = descriptor.getMaxDefinitionLevel();
DictionaryPage dictionaryPage = pageReader.readDictionaryPage();
if (dictionaryPage != null) {
try {
this.dictionary = dictionaryPage.getEncoding().initDictionary(descriptor, dictionaryPage);
this.isCurrentPageDictionaryEncoded = true;
} catch (IOException e) {
throw new IOException("could not decode the dictionary for " + descriptor, e);
}
} else {
this.dictionary = null;
this.isCurrentPageDictionaryEncoded = false;
}
}
代码示例来源:origin: Netflix/iceberg
private static Dictionary readDictionary(ColumnDescriptor desc, PageReader pageSource) {
DictionaryPage dictionaryPage = pageSource.readDictionaryPage();
if (dictionaryPage != null) {
try {
return dictionaryPage.getEncoding().initDictionary(desc, dictionaryPage);
// if (converter.hasDictionarySupport()) {
// converter.setDictionary(dictionary);
// }
} catch (IOException e) {
throw new ParquetDecodingException("could not decode the dictionary for " + desc, e);
}
}
return null;
}
}
代码示例来源:origin: io.snappydata/snappy-spark-sql
public VectorizedColumnReader(ColumnDescriptor descriptor, PageReader pageReader)
throws IOException {
this.descriptor = descriptor;
this.pageReader = pageReader;
this.maxDefLevel = descriptor.getMaxDefinitionLevel();
DictionaryPage dictionaryPage = pageReader.readDictionaryPage();
if (dictionaryPage != null) {
try {
this.dictionary = dictionaryPage.getEncoding().initDictionary(descriptor, dictionaryPage);
this.isCurrentPageDictionaryEncoded = true;
} catch (IOException e) {
throw new IOException("could not decode the dictionary for " + descriptor, e);
}
} else {
this.dictionary = null;
this.isCurrentPageDictionaryEncoded = false;
}
this.totalValueCount = pageReader.getTotalValueCount();
if (totalValueCount == 0) {
throw new IOException("totalValueCount == 0");
}
}
代码示例来源:origin: Netflix/iceberg
dict = page.getEncoding().initDictionary(col, page);
} catch (IOException e) {
throw new RuntimeIOException("Failed to create reader for dictionary page");
代码示例来源:origin: org.apache.parquet/parquet-hadoop
@SuppressWarnings("unchecked")
private <T extends Comparable<T>> Set<T> expandDictionary(ColumnChunkMetaData meta) throws IOException {
ColumnDescriptor col = new ColumnDescriptor(meta.getPath().toArray(), meta.getPrimitiveType(), -1, -1);
DictionaryPage page = dictionaries.readDictionaryPage(col);
// the chunk may not be dictionary-encoded
if (page == null) {
return null;
}
Dictionary dict = page.getEncoding().initDictionary(col, page);
Set dictSet = new HashSet<T>();
for (int i=0; i<=dict.getMaxId(); i++) {
switch(meta.getType()) {
case BINARY: dictSet.add(dict.decodeToBinary(i));
break;
case INT32: dictSet.add(dict.decodeToInt(i));
break;
case INT64: dictSet.add(dict.decodeToLong(i));
break;
case FLOAT: dictSet.add(dict.decodeToFloat(i));
break;
case DOUBLE: dictSet.add(dict.decodeToDouble(i));
break;
default:
LOG.warn("Unknown dictionary type{}", meta.getType());
}
}
return (Set<T>) dictSet;
}
代码示例来源:origin: org.apache.drill.exec/drill-java-exec
private void readDictionaryPage(final PageHeader pageHeader,
final ColumnReader<?> parentStatus) throws IOException {
int compressedSize = pageHeader.getCompressed_page_size();
int uncompressedSize = pageHeader.getUncompressed_page_size();
final DrillBuf dictionaryData = readPage(pageHeader, compressedSize, uncompressedSize);
allocatedDictionaryBuffers.add(dictionaryData);
DictionaryPage page = new DictionaryPage(
asBytesInput(dictionaryData, 0, uncompressedSize),
pageHeader.uncompressed_page_size,
pageHeader.dictionary_page_header.num_values,
valueOf(pageHeader.dictionary_page_header.encoding.name()));
this.dictionary = page.getEncoding().initDictionary(parentStatus.columnDescriptor, page);
}
代码示例来源:origin: org.lasersonlab.apache.parquet/parquet-hadoop
Dictionary dict = page.getEncoding().initDictionary(col, page);
代码示例来源:origin: dremio/dremio-oss
public static Dictionary readDictionary(FSDataInputStream in, ColumnDescriptor column, PageHeaderWithOffset pageHeader, BytesDecompressor decompressor) throws IOException {
in.seek(pageHeader.getOffset());
final byte[] data = new byte[pageHeader.getPageHeader().getCompressed_page_size()];
int read = in.read(data);
if (read != data.length) {
throw new IOException(format("Failed to read dictionary page, read %d bytes, expected %d", read, data.length));
}
final DictionaryPage dictionaryPage = new DictionaryPage(
decompressor.decompress(BytesInput.from(data), pageHeader.getPageHeader().getUncompressed_page_size()),
pageHeader.getPageHeader().getDictionary_page_header().getNum_values(),
CONVERTER.getEncoding(pageHeader.getPageHeader().getDictionary_page_header().getEncoding()));
return dictionaryPage.getEncoding().initDictionary(column, dictionaryPage);
}
代码示例来源:origin: org.apache.drill.exec/drill-java-exec
private void readDictionaryPageData(final ReadStatus readStatus, final ColumnReader<?> parentStatus)
throws UserException {
try {
pageHeader = readStatus.getPageHeader();
int uncompressedSize = pageHeader.getUncompressed_page_size();
final DrillBuf dictionaryData = getDecompressedPageData(readStatus);
Stopwatch timer = Stopwatch.createStarted();
allocatedDictionaryBuffers.add(dictionaryData);
DictionaryPage page = new DictionaryPage(asBytesInput(dictionaryData, 0, uncompressedSize),
pageHeader.uncompressed_page_size, pageHeader.dictionary_page_header.num_values,
valueOf(pageHeader.dictionary_page_header.encoding.name()));
this.dictionary = page.getEncoding().initDictionary(parentStatus.columnDescriptor, page);
long timeToDecode = timer.elapsed(TimeUnit.NANOSECONDS);
stats.timeDictPageDecode.addAndGet(timeToDecode);
} catch (Exception e) {
handleAndThrowException(e, "Error decoding dictionary page.");
}
}
代码示例来源:origin: dremio/dremio-oss
private void readDictionaryPage(final PageHeader pageHeader,
final ColumnReader<?> parentStatus) throws IOException {
int compressedSize = pageHeader.getCompressed_page_size();
int uncompressedSize = pageHeader.getUncompressed_page_size();
final ArrowBuf dictionaryData = allocateDictionaryBuffer(uncompressedSize);
readPage(pageHeader, compressedSize, uncompressedSize, dictionaryData);
DictionaryPage page = new DictionaryPage(
asBytesInput(dictionaryData, 0, uncompressedSize),
pageHeader.uncompressed_page_size,
pageHeader.dictionary_page_header.num_values,
valueOf(pageHeader.dictionary_page_header.encoding.name()));
this.dictionary = page.getEncoding().initDictionary(parentStatus.columnDescriptor, page);
}
代码示例来源:origin: org.lasersonlab.apache.parquet/parquet-column
if (dictionaryPage != null) {
try {
this.dictionary = dictionaryPage.getEncoding().initDictionary(path, dictionaryPage);
if (converter.hasDictionarySupport()) {
converter.setDictionary(dictionary);
代码示例来源:origin: org.apache.parquet/parquet-column
if (dictionaryPage != null) {
try {
this.dictionary = dictionaryPage.getEncoding().initDictionary(path, dictionaryPage);
if (converter.hasDictionarySupport()) {
converter.setDictionary(dictionary);
内容来源于网络,如有侵权,请联系作者删除!