org.apache.parquet.column.Encoding类的使用及代码示例

x33g5p2x  于2022-01-19 转载在 其他  
字(14.9k)|赞(0)|评价(0)|浏览(252)

本文整理了Java中org.apache.parquet.column.Encoding类的一些代码示例,展示了Encoding类的具体用法。这些代码示例主要来源于Github/Stackoverflow/Maven等平台,是从一些精选项目中提取出来的代码,具有较强的参考意义,能在一定程度帮忙到你。Encoding类的具体详情如下:
包路径:org.apache.parquet.column.Encoding
类名称:Encoding

Encoding介绍

[英]encoding of the data
[中]数据的编码

代码示例

代码示例来源:origin: apache/hive

public BaseVectorizedColumnReader(
  ColumnDescriptor descriptor,
  PageReader pageReader,
  boolean skipTimestampConversion,
  Type parquetType, TypeInfo hiveType) throws IOException {
 this.descriptor = descriptor;
 this.type = parquetType;
 this.pageReader = pageReader;
 this.maxDefLevel = descriptor.getMaxDefinitionLevel();
 this.skipTimestampConversion = skipTimestampConversion;
 this.hiveType = hiveType;
 DictionaryPage dictionaryPage = pageReader.readDictionaryPage();
 if (dictionaryPage != null) {
  try {
   this.dictionary = ParquetDataColumnReaderFactory
     .getDataColumnReaderByTypeOnDictionary(parquetType.asPrimitiveType(), hiveType,
       dictionaryPage.getEncoding().initDictionary(descriptor, dictionaryPage),
       skipTimestampConversion);
   this.isCurrentPageDictionaryEncoded = true;
  } catch (IOException e) {
   throw new IOException("could not decode the dictionary for " + descriptor, e);
  }
 } else {
  this.dictionary = null;
  this.isCurrentPageDictionaryEncoded = false;
 }
}

代码示例来源:origin: apache/hive

private void initDataReader(Encoding dataEncoding, ByteBufferInputStream in, int valueCount)
  throws IOException {
 this.pageValueCount = valueCount;
 this.endOfPageValueCount = valuesRead + pageValueCount;
 if (dataEncoding.usesDictionary()) {
  this.dataColumn = null;
  if (dictionary == null) {
   throw new IOException(
     "could not read page in col " + descriptor +
       " as the dictionary was missing for encoding " + dataEncoding);
  }
  dataColumn = ParquetDataColumnReaderFactory.getDataColumnReaderByType(type.asPrimitiveType(), hiveType,
    dataEncoding.getDictionaryBasedValuesReader(descriptor, VALUES, dictionary
      .getDictionary()), skipTimestampConversion);
  this.isCurrentPageDictionaryEncoded = true;
 } else {
  dataColumn = ParquetDataColumnReaderFactory.getDataColumnReaderByType(type.asPrimitiveType(), hiveType,
    dataEncoding.getValuesReader(descriptor, VALUES), skipTimestampConversion);
  this.isCurrentPageDictionaryEncoded = false;
 }
 try {
  dataColumn.initFromPage(pageValueCount, in);
 } catch (IOException e) {
  throw new IOException("could not read page in col " + descriptor, e);
 }
}

代码示例来源:origin: apache/hive

private void readPageV1(DataPageV1 page) {
 ValuesReader rlReader = page.getRlEncoding().getValuesReader(descriptor, REPETITION_LEVEL);
 ValuesReader dlReader = page.getDlEncoding().getValuesReader(descriptor, DEFINITION_LEVEL);
 this.repetitionLevelColumn = new ValuesReaderIntIterator(rlReader);
 this.definitionLevelColumn = new ValuesReaderIntIterator(dlReader);
 try {
  BytesInput bytes = page.getBytes();
  LOG.debug("page size " + bytes.size() + " bytes and " + pageValueCount + " records");
  ByteBufferInputStream in = bytes.toInputStream();
  LOG.debug("reading repetition levels at " + in.position());
  rlReader.initFromPage(pageValueCount, in);
  LOG.debug("reading definition levels at " + in.position());
  dlReader.initFromPage(pageValueCount, in);
  LOG.debug("reading data at " + in.position());
  initDataReader(page.getValueEncoding(), in, page.getValueCount());
 } catch (IOException e) {
  throw new ParquetDecodingException("could not read page " + page + " in col " + descriptor, e);
 }
}

代码示例来源:origin: org.apache.drill.exec/drill-java-exec

private void readDictionaryPage(final PageHeader pageHeader,
                final ColumnReader<?> parentStatus) throws IOException {
 int compressedSize = pageHeader.getCompressed_page_size();
 int uncompressedSize = pageHeader.getUncompressed_page_size();
 final DrillBuf dictionaryData = readPage(pageHeader, compressedSize, uncompressedSize);
 allocatedDictionaryBuffers.add(dictionaryData);
 DictionaryPage page = new DictionaryPage(
   asBytesInput(dictionaryData, 0, uncompressedSize),
   pageHeader.uncompressed_page_size,
   pageHeader.dictionary_page_header.num_values,
   valueOf(pageHeader.dictionary_page_header.encoding.name()));
 this.dictionary = page.getEncoding().initDictionary(parentStatus.columnDescriptor, page);
}

代码示例来源:origin: org.apache.spark/spark-sql

private void initDataReader(Encoding dataEncoding, ByteBufferInputStream in) throws IOException {
 this.endOfPageValueCount = valuesRead + pageValueCount;
 if (dataEncoding.usesDictionary()) {
  this.dataColumn = null;
  if (dictionary == null) {
   throw new IOException(
     "could not read page in col " + descriptor +
       " as the dictionary was missing for encoding " + dataEncoding);
  }
  @SuppressWarnings("deprecation")
  Encoding plainDict = Encoding.PLAIN_DICTIONARY; // var to allow warning suppression
  if (dataEncoding != plainDict && dataEncoding != Encoding.RLE_DICTIONARY) {
   throw new UnsupportedOperationException("Unsupported encoding: " + dataEncoding);
  }
  this.dataColumn = new VectorizedRleValuesReader();
  this.isCurrentPageDictionaryEncoded = true;
 } else {
  if (dataEncoding != Encoding.PLAIN) {
   throw new UnsupportedOperationException("Unsupported encoding: " + dataEncoding);
  }
  this.dataColumn = new VectorizedPlainValuesReader();
  this.isCurrentPageDictionaryEncoded = false;
 }
 try {
  dataColumn.initFromPage(pageValueCount, in);
 } catch (IOException e) {
  throw new IOException("could not read page in col " + descriptor, e);
 }
}

代码示例来源:origin: org.apache.drill.exec/drill-java-exec

repetitionLevels = rlEncoding.getValuesReader(parentColumnReader.columnDescriptor, ValuesType.REPETITION_LEVEL);
 repetitionLevels.initFromPage(currentPageCount, in);
 definitionLevels = dlEncoding.getValuesReader(parentColumnReader.columnDescriptor, ValuesType.DEFINITION_LEVEL);
 definitionLevels.initFromPage(currentPageCount, in);
 readPosInBytes = in.position();
 if (!valueEncoding.usesDictionary()) {
  valueReader = valueEncoding.getValuesReader(parentColumnReader.columnDescriptor, ValuesType.VALUES);
  valueReader.initFromPage(currentPageCount, in);
 valueReader = valueEncoding.getValuesReader(parentColumnReader.columnDescriptor, ValuesType.VALUES);
 valueReader.initFromPage(currentPageCount, in);
if (valueEncoding.usesDictionary()) {

代码示例来源:origin: io.prestosql/presto-parquet

private static Set<org.apache.parquet.column.Encoding> readEncodings(List<Encoding> encodings)
{
  Set<org.apache.parquet.column.Encoding> columnEncodings = new HashSet<>();
  for (Encoding encoding : encodings) {
    columnEncodings.add(org.apache.parquet.column.Encoding.valueOf(encoding.name()));
  }
  return Collections.unmodifiableSet(columnEncodings);
}

代码示例来源:origin: org.apache.parquet/parquet-column

/**
 * To read decoded values that don't require a dictionary
 *
 * @param descriptor the column to read
 * @param valuesType the type of values
 * @return the proper values reader for the given column
 * @throws UnsupportedOperationException if the encoding is dictionary based
 */
public ValuesReader getValuesReader(ColumnDescriptor descriptor, ValuesType valuesType) {
 throw new UnsupportedOperationException("Error decoding " + descriptor + ". " + this.name() + " is dictionary based");
}

代码示例来源:origin: org.lasersonlab.apache.parquet/parquet-column

@Override
public ValuesReader getDictionaryBasedValuesReader(ColumnDescriptor descriptor, ValuesType valuesType, Dictionary dictionary) {
 return RLE_DICTIONARY.getDictionaryBasedValuesReader(descriptor, valuesType, dictionary);
}

代码示例来源:origin: org.apache.spark/spark-sql_2.11

private void readPageV1(DataPageV1 page) throws IOException {
 this.pageValueCount = page.getValueCount();
 ValuesReader rlReader = page.getRlEncoding().getValuesReader(descriptor, REPETITION_LEVEL);
 ValuesReader dlReader;
 // Initialize the decoders.
 if (page.getDlEncoding() != Encoding.RLE && descriptor.getMaxDefinitionLevel() != 0) {
  throw new UnsupportedOperationException("Unsupported encoding: " + page.getDlEncoding());
 }
 int bitWidth = BytesUtils.getWidthFromMaxInt(descriptor.getMaxDefinitionLevel());
 this.defColumn = new VectorizedRleValuesReader(bitWidth);
 dlReader = this.defColumn;
 this.repetitionLevelColumn = new ValuesReaderIntIterator(rlReader);
 this.definitionLevelColumn = new ValuesReaderIntIterator(dlReader);
 try {
  BytesInput bytes = page.getBytes();
  ByteBufferInputStream in = bytes.toInputStream();
  rlReader.initFromPage(pageValueCount, in);
  dlReader.initFromPage(pageValueCount, in);
  initDataReader(page.getValueEncoding(), in);
 } catch (IOException e) {
  throw new IOException("could not read page " + page + " in col " + descriptor, e);
 }
}

代码示例来源:origin: dremio/dremio-oss

private void readDictionaryPage(final PageHeader pageHeader,
                final ColumnReader<?> parentStatus) throws IOException {
 int compressedSize = pageHeader.getCompressed_page_size();
 int uncompressedSize = pageHeader.getUncompressed_page_size();
 final ArrowBuf dictionaryData = allocateDictionaryBuffer(uncompressedSize);
 readPage(pageHeader, compressedSize, uncompressedSize, dictionaryData);
 DictionaryPage page = new DictionaryPage(
   asBytesInput(dictionaryData, 0, uncompressedSize),
   pageHeader.uncompressed_page_size,
   pageHeader.dictionary_page_header.num_values,
   valueOf(pageHeader.dictionary_page_header.encoding.name()));
 this.dictionary = page.getEncoding().initDictionary(parentStatus.columnDescriptor, page);
}

代码示例来源:origin: org.apache.spark/spark-sql_2.11

private void initDataReader(Encoding dataEncoding, ByteBufferInputStream in) throws IOException {
 this.endOfPageValueCount = valuesRead + pageValueCount;
 if (dataEncoding.usesDictionary()) {
  this.dataColumn = null;
  if (dictionary == null) {
   throw new IOException(
     "could not read page in col " + descriptor +
       " as the dictionary was missing for encoding " + dataEncoding);
  }
  @SuppressWarnings("deprecation")
  Encoding plainDict = Encoding.PLAIN_DICTIONARY; // var to allow warning suppression
  if (dataEncoding != plainDict && dataEncoding != Encoding.RLE_DICTIONARY) {
   throw new UnsupportedOperationException("Unsupported encoding: " + dataEncoding);
  }
  this.dataColumn = new VectorizedRleValuesReader();
  this.isCurrentPageDictionaryEncoded = true;
 } else {
  if (dataEncoding != Encoding.PLAIN) {
   throw new UnsupportedOperationException("Unsupported encoding: " + dataEncoding);
  }
  this.dataColumn = new VectorizedPlainValuesReader();
  this.isCurrentPageDictionaryEncoded = false;
 }
 try {
  dataColumn.initFromPage(pageValueCount, in);
 } catch (IOException e) {
  throw new IOException("could not read page in col " + descriptor, e);
 }
}

代码示例来源:origin: dremio/dremio-oss

repetitionLevels = rlEncoding.getValuesReader(parentColumnReader.columnDescriptor, ValuesType.REPETITION_LEVEL);
 repetitionLevels.initFromPage(currentPageCount, pageDataBuffer, (int) readPosInBytes);
 definitionLevels = dlEncoding.getValuesReader(parentColumnReader.columnDescriptor, ValuesType.DEFINITION_LEVEL);
 definitionLevels.initFromPage(currentPageCount, pageDataBuffer, (int) readPosInBytes);
 readPosInBytes = definitionLevels.getNextOffset();
 if (!valueEncoding.usesDictionary()) {
  valueReader = valueEncoding.getValuesReader(parentColumnReader.columnDescriptor, ValuesType.VALUES);
  valueReader.initFromPage(currentPageCount, pageDataBuffer, (int) readPosInBytes);
 valueReader = valueEncoding.getValuesReader(parentColumnReader.columnDescriptor, ValuesType.VALUES);
 valueReader.initFromPage(currentPageCount, pageDataBuffer, (int) readPosInBytes);
if (valueEncoding.usesDictionary()) {

代码示例来源:origin: prestosql/presto

private static Set<org.apache.parquet.column.Encoding> readEncodings(List<Encoding> encodings)
{
  Set<org.apache.parquet.column.Encoding> columnEncodings = new HashSet<>();
  for (Encoding encoding : encodings) {
    columnEncodings.add(org.apache.parquet.column.Encoding.valueOf(encoding.name()));
  }
  return Collections.unmodifiableSet(columnEncodings);
}

代码示例来源:origin: org.apache.parquet/parquet-column

/**
 * To read decoded values that require a dictionary
 *
 * @param descriptor the column to read
 * @param valuesType the type of values
 * @param dictionary the dictionary
 * @return the proper values reader for the given column
 * @throws UnsupportedOperationException if the encoding is not dictionary based
 */
public ValuesReader getDictionaryBasedValuesReader(ColumnDescriptor descriptor, ValuesType valuesType, Dictionary dictionary) {
 throw new UnsupportedOperationException(this.name() + " is not dictionary based");
}

代码示例来源:origin: org.apache.parquet/parquet-column

@Override
public ValuesReader getDictionaryBasedValuesReader(ColumnDescriptor descriptor, ValuesType valuesType, Dictionary dictionary) {
 return RLE_DICTIONARY.getDictionaryBasedValuesReader(descriptor, valuesType, dictionary);
}

代码示例来源:origin: Netflix/iceberg

if (dataEncoding.usesDictionary()) {
 if (dict == null) {
  throw new ParquetDecodingException(
    "could not read page in col " + desc + " as the dictionary was missing for encoding " + dataEncoding);
 this.values = dataEncoding.getDictionaryBasedValuesReader(desc, VALUES, dict);
} else {
 this.values = dataEncoding.getValuesReader(desc, VALUES);

代码示例来源:origin: org.apache.spark/spark-sql_2.10

public VectorizedColumnReader(ColumnDescriptor descriptor, PageReader pageReader)
  throws IOException {
 this.descriptor = descriptor;
 this.pageReader = pageReader;
 this.maxDefLevel = descriptor.getMaxDefinitionLevel();
 DictionaryPage dictionaryPage = pageReader.readDictionaryPage();
 if (dictionaryPage != null) {
  try {
   this.dictionary = dictionaryPage.getEncoding().initDictionary(descriptor, dictionaryPage);
   this.isCurrentPageDictionaryEncoded = true;
  } catch (IOException e) {
   throw new IOException("could not decode the dictionary for " + descriptor, e);
  }
 } else {
  this.dictionary = null;
  this.isCurrentPageDictionaryEncoded = false;
 }
 this.totalValueCount = pageReader.getTotalValueCount();
 if (totalValueCount == 0) {
  throw new IOException("totalValueCount == 0");
 }
}

代码示例来源:origin: org.apache.spark/spark-sql

private void readPageV1(DataPageV1 page) throws IOException {
 this.pageValueCount = page.getValueCount();
 ValuesReader rlReader = page.getRlEncoding().getValuesReader(descriptor, REPETITION_LEVEL);
 ValuesReader dlReader;
 // Initialize the decoders.
 if (page.getDlEncoding() != Encoding.RLE && descriptor.getMaxDefinitionLevel() != 0) {
  throw new UnsupportedOperationException("Unsupported encoding: " + page.getDlEncoding());
 }
 int bitWidth = BytesUtils.getWidthFromMaxInt(descriptor.getMaxDefinitionLevel());
 this.defColumn = new VectorizedRleValuesReader(bitWidth);
 dlReader = this.defColumn;
 this.repetitionLevelColumn = new ValuesReaderIntIterator(rlReader);
 this.definitionLevelColumn = new ValuesReaderIntIterator(dlReader);
 try {
  BytesInput bytes = page.getBytes();
  ByteBufferInputStream in = bytes.toInputStream();
  rlReader.initFromPage(pageValueCount, in);
  dlReader.initFromPage(pageValueCount, in);
  initDataReader(page.getValueEncoding(), in);
 } catch (IOException e) {
  throw new IOException("could not read page " + page + " in col " + descriptor, e);
 }
}

代码示例来源:origin: org.apache.drill.exec/drill-java-exec

private void readDictionaryPageData(final ReadStatus readStatus, final ColumnReader<?> parentStatus)
  throws UserException {
 try {
  pageHeader = readStatus.getPageHeader();
  int uncompressedSize = pageHeader.getUncompressed_page_size();
  final DrillBuf dictionaryData = getDecompressedPageData(readStatus);
  Stopwatch timer = Stopwatch.createStarted();
  allocatedDictionaryBuffers.add(dictionaryData);
  DictionaryPage page = new DictionaryPage(asBytesInput(dictionaryData, 0, uncompressedSize),
    pageHeader.uncompressed_page_size, pageHeader.dictionary_page_header.num_values,
    valueOf(pageHeader.dictionary_page_header.encoding.name()));
  this.dictionary = page.getEncoding().initDictionary(parentStatus.columnDescriptor, page);
  long timeToDecode = timer.elapsed(TimeUnit.NANOSECONDS);
  stats.timeDictPageDecode.addAndGet(timeToDecode);
 } catch (Exception e) {
  handleAndThrowException(e, "Error decoding dictionary page.");
 }
}

相关文章