org.apache.parquet.column.Encoding.initDictionary()方法的使用及代码示例

x33g5p2x  于2022-01-19 转载在 其他  
字(11.0k)|赞(0)|评价(0)|浏览(160)

本文整理了Java中org.apache.parquet.column.Encoding.initDictionary()方法的一些代码示例,展示了Encoding.initDictionary()的具体用法。这些代码示例主要来源于Github/Stackoverflow/Maven等平台,是从一些精选项目中提取出来的代码,具有较强的参考意义,能在一定程度帮忙到你。Encoding.initDictionary()方法的具体详情如下:
包路径:org.apache.parquet.column.Encoding
类名称:Encoding
方法名:initDictionary

Encoding.initDictionary介绍

[英]initializes a dictionary from a page
[中]从页面初始化字典

代码示例

代码示例来源:origin: apache/hive

public BaseVectorizedColumnReader(
  ColumnDescriptor descriptor,
  PageReader pageReader,
  boolean skipTimestampConversion,
  Type parquetType, TypeInfo hiveType) throws IOException {
 this.descriptor = descriptor;
 this.type = parquetType;
 this.pageReader = pageReader;
 this.maxDefLevel = descriptor.getMaxDefinitionLevel();
 this.skipTimestampConversion = skipTimestampConversion;
 this.hiveType = hiveType;
 DictionaryPage dictionaryPage = pageReader.readDictionaryPage();
 if (dictionaryPage != null) {
  try {
   this.dictionary = ParquetDataColumnReaderFactory
     .getDataColumnReaderByTypeOnDictionary(parquetType.asPrimitiveType(), hiveType,
       dictionaryPage.getEncoding().initDictionary(descriptor, dictionaryPage),
       skipTimestampConversion);
   this.isCurrentPageDictionaryEncoded = true;
  } catch (IOException e) {
   throw new IOException("could not decode the dictionary for " + descriptor, e);
  }
 } else {
  this.dictionary = null;
  this.isCurrentPageDictionaryEncoded = false;
 }
}

代码示例来源:origin: org.apache.spark/spark-sql_2.10

public VectorizedColumnReader(ColumnDescriptor descriptor, PageReader pageReader)
  throws IOException {
 this.descriptor = descriptor;
 this.pageReader = pageReader;
 this.maxDefLevel = descriptor.getMaxDefinitionLevel();
 DictionaryPage dictionaryPage = pageReader.readDictionaryPage();
 if (dictionaryPage != null) {
  try {
   this.dictionary = dictionaryPage.getEncoding().initDictionary(descriptor, dictionaryPage);
   this.isCurrentPageDictionaryEncoded = true;
  } catch (IOException e) {
   throw new IOException("could not decode the dictionary for " + descriptor, e);
  }
 } else {
  this.dictionary = null;
  this.isCurrentPageDictionaryEncoded = false;
 }
 this.totalValueCount = pageReader.getTotalValueCount();
 if (totalValueCount == 0) {
  throw new IOException("totalValueCount == 0");
 }
}

代码示例来源:origin: org.apache.spark/spark-sql

public VectorizedColumnReader(
  ColumnDescriptor descriptor,
  OriginalType originalType,
  PageReader pageReader,
  TimeZone convertTz) throws IOException {
 this.descriptor = descriptor;
 this.pageReader = pageReader;
 this.convertTz = convertTz;
 this.originalType = originalType;
 this.maxDefLevel = descriptor.getMaxDefinitionLevel();
 DictionaryPage dictionaryPage = pageReader.readDictionaryPage();
 if (dictionaryPage != null) {
  try {
   this.dictionary = dictionaryPage.getEncoding().initDictionary(descriptor, dictionaryPage);
   this.isCurrentPageDictionaryEncoded = true;
  } catch (IOException e) {
   throw new IOException("could not decode the dictionary for " + descriptor, e);
  }
 } else {
  this.dictionary = null;
  this.isCurrentPageDictionaryEncoded = false;
 }
 this.totalValueCount = pageReader.getTotalValueCount();
 if (totalValueCount == 0) {
  throw new IOException("totalValueCount == 0");
 }
}

代码示例来源:origin: org.apache.spark/spark-sql_2.11

public VectorizedColumnReader(
  ColumnDescriptor descriptor,
  OriginalType originalType,
  PageReader pageReader,
  TimeZone convertTz) throws IOException {
 this.descriptor = descriptor;
 this.pageReader = pageReader;
 this.convertTz = convertTz;
 this.originalType = originalType;
 this.maxDefLevel = descriptor.getMaxDefinitionLevel();
 DictionaryPage dictionaryPage = pageReader.readDictionaryPage();
 if (dictionaryPage != null) {
  try {
   this.dictionary = dictionaryPage.getEncoding().initDictionary(descriptor, dictionaryPage);
   this.isCurrentPageDictionaryEncoded = true;
  } catch (IOException e) {
   throw new IOException("could not decode the dictionary for " + descriptor, e);
  }
 } else {
  this.dictionary = null;
  this.isCurrentPageDictionaryEncoded = false;
 }
 this.totalValueCount = pageReader.getTotalValueCount();
 if (totalValueCount == 0) {
  throw new IOException("totalValueCount == 0");
 }
}

代码示例来源:origin: org.lasersonlab.apache.parquet/parquet-column

@Override
public Dictionary initDictionary(ColumnDescriptor descriptor, DictionaryPage dictionaryPage) throws IOException {
 return PLAIN.initDictionary(descriptor, dictionaryPage);
}

代码示例来源:origin: org.apache.parquet/parquet-column

@Override
public Dictionary initDictionary(ColumnDescriptor descriptor, DictionaryPage dictionaryPage) throws IOException {
 return PLAIN.initDictionary(descriptor, dictionaryPage);
}

代码示例来源:origin: com.alibaba.blink/flink-table

public VectorizedColumnReader(ColumnDescriptor descriptor, PageReader pageReader) throws IOException {
  this.descriptor = descriptor;
  this.pageReader = pageReader;
  this.maxDefLevel = descriptor.getMaxDefinitionLevel();
  DictionaryPage dictionaryPage = pageReader.readDictionaryPage();
  if (dictionaryPage != null) {
    try {
      this.dictionary = dictionaryPage.getEncoding().initDictionary(descriptor, dictionaryPage);
      this.isCurrentPageDictionaryEncoded = true;
    } catch (IOException e) {
      throw new IOException("could not decode the dictionary for " + descriptor, e);
    }
  } else {
    this.dictionary = null;
    this.isCurrentPageDictionaryEncoded = false;
  }
}

代码示例来源:origin: Netflix/iceberg

private static Dictionary readDictionary(ColumnDescriptor desc, PageReader pageSource) {
  DictionaryPage dictionaryPage = pageSource.readDictionaryPage();
  if (dictionaryPage != null) {
   try {
    return dictionaryPage.getEncoding().initDictionary(desc, dictionaryPage);
//        if (converter.hasDictionarySupport()) {
//          converter.setDictionary(dictionary);
//        }
   } catch (IOException e) {
    throw new ParquetDecodingException("could not decode the dictionary for " + desc, e);
   }
  }
  return null;
 }
}

代码示例来源:origin: io.snappydata/snappy-spark-sql

public VectorizedColumnReader(ColumnDescriptor descriptor, PageReader pageReader)
  throws IOException {
 this.descriptor = descriptor;
 this.pageReader = pageReader;
 this.maxDefLevel = descriptor.getMaxDefinitionLevel();
 DictionaryPage dictionaryPage = pageReader.readDictionaryPage();
 if (dictionaryPage != null) {
  try {
   this.dictionary = dictionaryPage.getEncoding().initDictionary(descriptor, dictionaryPage);
   this.isCurrentPageDictionaryEncoded = true;
  } catch (IOException e) {
   throw new IOException("could not decode the dictionary for " + descriptor, e);
  }
 } else {
  this.dictionary = null;
  this.isCurrentPageDictionaryEncoded = false;
 }
 this.totalValueCount = pageReader.getTotalValueCount();
 if (totalValueCount == 0) {
  throw new IOException("totalValueCount == 0");
 }
}

代码示例来源:origin: Netflix/iceberg

dict = page.getEncoding().initDictionary(col, page);
} catch (IOException e) {
 throw new RuntimeIOException("Failed to create reader for dictionary page");

代码示例来源:origin: org.apache.parquet/parquet-hadoop

@SuppressWarnings("unchecked")
private <T extends Comparable<T>> Set<T> expandDictionary(ColumnChunkMetaData meta) throws IOException {
 ColumnDescriptor col = new ColumnDescriptor(meta.getPath().toArray(), meta.getPrimitiveType(), -1, -1);
 DictionaryPage page = dictionaries.readDictionaryPage(col);
 // the chunk may not be dictionary-encoded
 if (page == null) {
  return null;
 }
 Dictionary dict = page.getEncoding().initDictionary(col, page);
 Set dictSet = new HashSet<T>();
 for (int i=0; i<=dict.getMaxId(); i++) {
  switch(meta.getType()) {
   case BINARY: dictSet.add(dict.decodeToBinary(i));
    break;
   case INT32: dictSet.add(dict.decodeToInt(i));
    break;
   case INT64: dictSet.add(dict.decodeToLong(i));
    break;
   case FLOAT: dictSet.add(dict.decodeToFloat(i));
    break;
   case DOUBLE: dictSet.add(dict.decodeToDouble(i));
    break;
   default:
    LOG.warn("Unknown dictionary type{}", meta.getType());
  }
 }
 return (Set<T>) dictSet;
}

代码示例来源:origin: org.apache.drill.exec/drill-java-exec

private void readDictionaryPage(final PageHeader pageHeader,
                final ColumnReader<?> parentStatus) throws IOException {
 int compressedSize = pageHeader.getCompressed_page_size();
 int uncompressedSize = pageHeader.getUncompressed_page_size();
 final DrillBuf dictionaryData = readPage(pageHeader, compressedSize, uncompressedSize);
 allocatedDictionaryBuffers.add(dictionaryData);
 DictionaryPage page = new DictionaryPage(
   asBytesInput(dictionaryData, 0, uncompressedSize),
   pageHeader.uncompressed_page_size,
   pageHeader.dictionary_page_header.num_values,
   valueOf(pageHeader.dictionary_page_header.encoding.name()));
 this.dictionary = page.getEncoding().initDictionary(parentStatus.columnDescriptor, page);
}

代码示例来源:origin: org.lasersonlab.apache.parquet/parquet-hadoop

Dictionary dict = page.getEncoding().initDictionary(col, page);

代码示例来源:origin: dremio/dremio-oss

public static Dictionary readDictionary(FSDataInputStream in, ColumnDescriptor column, PageHeaderWithOffset pageHeader, BytesDecompressor decompressor) throws IOException {
 in.seek(pageHeader.getOffset());
 final byte[] data = new byte[pageHeader.getPageHeader().getCompressed_page_size()];
 int read = in.read(data);
 if (read != data.length) {
  throw new IOException(format("Failed to read dictionary page, read %d bytes, expected %d", read, data.length));
 }
 final DictionaryPage dictionaryPage = new DictionaryPage(
  decompressor.decompress(BytesInput.from(data), pageHeader.getPageHeader().getUncompressed_page_size()),
  pageHeader.getPageHeader().getDictionary_page_header().getNum_values(),
  CONVERTER.getEncoding(pageHeader.getPageHeader().getDictionary_page_header().getEncoding()));
 return dictionaryPage.getEncoding().initDictionary(column, dictionaryPage);
}

代码示例来源:origin: org.apache.drill.exec/drill-java-exec

private void readDictionaryPageData(final ReadStatus readStatus, final ColumnReader<?> parentStatus)
  throws UserException {
 try {
  pageHeader = readStatus.getPageHeader();
  int uncompressedSize = pageHeader.getUncompressed_page_size();
  final DrillBuf dictionaryData = getDecompressedPageData(readStatus);
  Stopwatch timer = Stopwatch.createStarted();
  allocatedDictionaryBuffers.add(dictionaryData);
  DictionaryPage page = new DictionaryPage(asBytesInput(dictionaryData, 0, uncompressedSize),
    pageHeader.uncompressed_page_size, pageHeader.dictionary_page_header.num_values,
    valueOf(pageHeader.dictionary_page_header.encoding.name()));
  this.dictionary = page.getEncoding().initDictionary(parentStatus.columnDescriptor, page);
  long timeToDecode = timer.elapsed(TimeUnit.NANOSECONDS);
  stats.timeDictPageDecode.addAndGet(timeToDecode);
 } catch (Exception e) {
  handleAndThrowException(e, "Error decoding dictionary page.");
 }
}

代码示例来源:origin: dremio/dremio-oss

private void readDictionaryPage(final PageHeader pageHeader,
                final ColumnReader<?> parentStatus) throws IOException {
 int compressedSize = pageHeader.getCompressed_page_size();
 int uncompressedSize = pageHeader.getUncompressed_page_size();
 final ArrowBuf dictionaryData = allocateDictionaryBuffer(uncompressedSize);
 readPage(pageHeader, compressedSize, uncompressedSize, dictionaryData);
 DictionaryPage page = new DictionaryPage(
   asBytesInput(dictionaryData, 0, uncompressedSize),
   pageHeader.uncompressed_page_size,
   pageHeader.dictionary_page_header.num_values,
   valueOf(pageHeader.dictionary_page_header.encoding.name()));
 this.dictionary = page.getEncoding().initDictionary(parentStatus.columnDescriptor, page);
}

代码示例来源:origin: org.lasersonlab.apache.parquet/parquet-column

if (dictionaryPage != null) {
 try {
  this.dictionary = dictionaryPage.getEncoding().initDictionary(path, dictionaryPage);
  if (converter.hasDictionarySupport()) {
   converter.setDictionary(dictionary);

代码示例来源:origin: org.apache.parquet/parquet-column

if (dictionaryPage != null) {
 try {
  this.dictionary = dictionaryPage.getEncoding().initDictionary(path, dictionaryPage);
  if (converter.hasDictionarySupport()) {
   converter.setDictionary(dictionary);

相关文章