org.apache.spark.sql.Row.schema()方法的使用及代码示例

x33g5p2x 于2022-01-28 转载在其他

字(10.2k)|赞(0)|评价(0)|浏览(370)

本文整理了Java中org.apache.spark.sql.Row.schema方法的一些代码示例，展示了Row.schema的具体用法。这些代码示例主要来源于Github/Stackoverflow/Maven等平台，是从一些精选项目中提取出来的代码，具有较强的参考意义，能在一定程度帮忙到你。Row.schema方法的具体详情如下：
包路径：org.apache.spark.sql.Row
类名称：Row
方法名：schema

Row.schema介绍

暂无

代码示例

代码示例来源：origin: cloudera-labs/envelope

/**
 * True if the provided row contains a mutation type field.
 */
public static boolean hasMutationTypeField(Row row) {
 for (String fieldName : row.schema().fieldNames()) {
  if (fieldName.equals(MutationType.MUTATION_TYPE_FIELD_NAME)) {
   return true;
  }
 }
 
 return false;
}

代码示例来源：origin: cloudera-labs/envelope

private Set<String> getColumnFamilies(Row row) {
 Set<String> families = Sets.newHashSet();
 
 for (String fieldName : row.schema().fieldNames()) {
  ColumnDef def = columns.get(fieldName);
  if (!def.cf.equals("rowkey")) {
   families.add(def.cf);
  }
 }
 
 return families;
}

代码示例来源：origin: cloudera-labs/envelope

private boolean filtersRowKeyPrefix(Row row) {
 Set<String> rowColumnNames = Sets.newHashSet(row.schema().fieldNames());
 Set<String> prefixColumnNames = Sets.newHashSet(keyColumns.subList(0, rowColumnNames.size()));
 
 return rowColumnNames.equals(prefixColumnNames);
}

代码示例来源：origin: cloudera-labs/envelope

private boolean filtersEntireRowKey(Row row) {
 for (String keyColumn : keyColumns) {
  if (!Arrays.asList(row.schema().fieldNames()).contains(keyColumn)) {
   return false;
  }
 }
 
 return true;
}

代码示例来源：origin: cloudera-labs/envelope

private byte[] serializeRow(Row row) throws IOException {
 StringBuilder sb = new StringBuilder();
 
 for (StructField field : row.schema().fields()) {
  sb.append("/");
  sb.append(field.name());
  sb.append("=");
  sb.append(RowUtils.get(row, field.name()));
 }
 byte[] serialized = sb.toString().getBytes(Charsets.UTF_8);
 
 return serialized;
}

代码示例来源：origin: cloudera-labs/envelope

public static Row remove(Row row, String fieldName) {
 List<StructField> removedFields = Lists.newArrayList(row.schema().fields());
 removedFields.remove(row.fieldIndex(fieldName));
 StructType removedSchema = new StructType(removedFields.toArray(new StructField[removedFields.size()]));
 
 List<Object> removedValues = Lists.newArrayList(RowUtils.valuesFor(row));
 removedValues.remove(row.fieldIndex(fieldName));
 
 return new RowWithSchema(removedSchema, removedValues.toArray());
}

代码示例来源：origin: uk.gov.gchq.gaffer/spark-library

private Stream<String> getPropertyNames(final Row row) {
    return Arrays.stream(row.schema().fieldNames())
        .filter(f -> !ReservedPropertyNames.contains(f))
        .filter(n -> !row.isNullAt(row.fieldIndex(n)));
  }
}

代码示例来源：origin: cloudera-labs/envelope

@Override
 public Row call(Row arrived) throws Exception {
  long startTime = System.nanoTime();
  if (schema == null) {
   schema = RowUtils.subsetSchema(arrived.schema(), keyFieldNames);
  }
  Row key = RowUtils.subsetRow(arrived, schema);
  
  long endTime = System.nanoTime();
  accumulators.getDoubleAccumulators().get(ACCUMULATOR_SECONDS_EXTRACTING_KEYS).add(
    (endTime - startTime) / 1000.0 / 1000.0 / 1000.0);
  return key;
 }
}

代码示例来源：origin: cloudera-labs/envelope

private FilterList getColumnValueFilters(Row row) {
 FilterList filterList = new FilterList(Operator.MUST_PASS_ALL);
 Set<String> filterColumnNames = Sets.newHashSet(row.schema().fieldNames());
 
 for (Map.Entry<String, ColumnDef> column : columns.entrySet()) {
  if (!column.getValue().cf.equals("rowkey")) {
   if (filterColumnNames.contains(column.getKey())) {
    byte[] value = getColumnValueAsBytes(column.getValue().name, column.getValue().type, row);
    if (value != null) {
     SingleColumnValueFilter columnValueFilter = new SingleColumnValueFilter(
       Bytes.toBytes(column.getValue().cf),
       Bytes.toBytes(column.getValue().name),
       CompareFilter.CompareOp.EQUAL,
       value
     );
     filterList.addFilter(columnValueFilter);
    }
   }
  }
 }
 
 return filterList;
}

代码示例来源：origin: cloudera-labs/envelope

public static Row set(Row row, String fieldName, Object replacement) {
 Object[] values = new Object[row.length()];
 for (int i = 0; i < row.schema().fields().length; i++) {
  if (i == row.fieldIndex(fieldName)) {
   values[i] = replacement;
  } else {
   values[i] = row.get(i);
  }
 }
 return new RowWithSchema(row.schema(), values);
}

代码示例来源：origin: cloudera-labs/envelope

private boolean matchesValueFilter(Row row, Row filter) {
 for (String filterFieldName : filter.schema().fieldNames()) {
  Object rowValue = row.get(row.fieldIndex(filterFieldName));
  Object filterValue = RowUtils.get(filter, filterFieldName);
  
  if (!rowValue.equals(filterValue)) {
   return false;
  }
 }
 
 return true;
}

代码示例来源：origin: cloudera-labs/envelope

private Row carryForwardWhenNull(Row into, Row from) {
 if (!config.hasPath(CARRY_FORWARD_CONFIG_NAME) || !config.getBoolean(CARRY_FORWARD_CONFIG_NAME)) {
  return into;
 }
 for (StructField field : into.schema().fields()) {
  String fieldName = field.name();
  if (RowUtils.get(into, fieldName) == null && RowUtils.get(from, fieldName) != null) {
   into = RowUtils.set(into, fieldName, RowUtils.get(from, fieldName));
  }
 }
 return into;
}

代码示例来源：origin: cloudera-labs/envelope

public static Row append(Row row, String fieldName, DataType fieldType, Object value) {
 StructType appendedSchema = row.schema().add(fieldName, fieldType);
 Object[] appendedValues = ObjectArrays.concat(valuesFor(row), value);
 Row appendedRow = new RowWithSchema(appendedSchema, appendedValues);
 return appendedRow;
}

代码示例来源：origin: cloudera-labs/envelope

private KuduScanner scannerForFilters(Iterable<Row> filters, KuduTable table) throws KuduException {
 List<Row> filtersList = Lists.newArrayList(filters);
 if (filtersList.size() == 0) {
  throw new RuntimeException("Kudu existing filter was not provided.");
 }
 
 if (filtersList.get(0).schema() == null) {
  throw new RuntimeException("Kudu existing filter did not contain a schema.");
 }
 
 if (hasAccumulators()) {
  accumulators.getLongAccumulators().get(ACCUMULATOR_NUMBER_OF_SCANNERS).add(1);
  accumulators.getLongAccumulators().get(ACCUMULATOR_NUMBER_OF_FILTERS_SCANNED).add(filtersList.size());
 }
 
 KuduScannerBuilder builder = getConnection().getClient().newScannerBuilder(table);
 for (String fieldName : filtersList.get(0).schema().fieldNames()) {
  ColumnSchema columnSchema = table.getSchema().getColumn(fieldName);
  List<Object> columnValues = Lists.newArrayList();
  for (Row filter : filtersList) {
   Object columnValue = RowUtils.get(filter, fieldName);
   columnValues.add(columnValue);
  }
  KuduPredicate predicate = KuduPredicate.newInListPredicate(columnSchema, columnValues);
  builder = builder.addPredicate(predicate);
 }
 KuduScanner scanner = builder.build();
 return scanner;
}

代码示例来源：origin: cloudera-labs/envelope

@Override
 public Iterator<Row> call(Row row) throws Exception {
  // Retrieve the Command pipeline via ThreadLocal
  Pipeline pipeline = MorphlineUtils.getPipeline(morphlineFile, morphlineId);
  if (null == pipeline) {
   pipeline = MorphlineUtils.setPipeline(morphlineFile, morphlineId, new Collector(), true);
  }
  // Convert each Row into a Record
  StructType inputSchema = row.schema();
  if (null == inputSchema) {
   throw new RuntimeException("Row does not have an associated StructType schema");
  }
  Record inputRecord = new Record();
  String[] fieldNames = inputSchema.fieldNames();
  // TODO : Confirm nested object conversion
  for (int i = 0; i < fieldNames.length; i++) {
   inputRecord.put(fieldNames[i], row.get(i));
  }
  // Process each Record via the Command pipeline
  List<Record> outputRecords = MorphlineUtils.executePipeline(pipeline, inputRecord, errorOnEmpty);
  // Convert each Record into a new Row
  List<Row> outputRows = Lists.newArrayListWithCapacity(outputRecords.size());
  for (Record record : outputRecords) {
   outputRows.add(MorphlineUtils.convertToRow(outputSchema, record));
  }
  return outputRows.iterator();
 }
};

代码示例来源：origin: uk.gov.gchq.gaffer/spark-library

@Override
public Element _apply(final Row row) {
  final Element element;
  final String group = row.getAs(SchemaToStructTypeConverter.GROUP);
  final Object source = ArrayUtils.contains(row.schema().fieldNames(), SchemaToStructTypeConverter.SRC_COL_NAME) ? row.getAs(SchemaToStructTypeConverter.SRC_COL_NAME) : null;
  if (null != source) {
    final Object destination = row.getAs(SchemaToStructTypeConverter.DST_COL_NAME);
    final boolean directed = row.getAs(SchemaToStructTypeConverter.DIRECTED_COL_NAME);
    final MatchedVertex matchedVertex;
    if (ArrayUtils.contains(row.schema().fieldNames(), SchemaToStructTypeConverter.MATCHED_VERTEX_COL_NAME)) {
      final String matchedVertexStr = row.getAs(SchemaToStructTypeConverter.MATCHED_VERTEX_COL_NAME);
      matchedVertex = null != matchedVertexStr ? MatchedVertex.valueOf(matchedVertexStr) : null;
    } else {
      matchedVertex = null;
    }
    element = new Edge(group, source, destination, directed, matchedVertex, null);
  } else {
    final Object vertex = ArrayUtils.contains(row.schema().fieldNames(), SchemaToStructTypeConverter.VERTEX_COL_NAME) ? row.getAs(SchemaToStructTypeConverter.VERTEX_COL_NAME) : row.getAs(SchemaToStructTypeConverter.ID);
    element = new Entity(group, vertex);
  }
  getPropertyNames(row).forEach(n -> {
    element.putProperty(n, row.getAs(n));
  });
  return element;
}

代码示例来源：origin: cloudera-labs/envelope

private Row carryForwardWhenNull(Row into, Row from) {
 if (!doesCarryForward()) {
  return into;
 }
 for (StructField field : into.schema().fields()) {
  String fieldName = field.name();
  if (RowUtils.get(into, fieldName) == null && RowUtils.get(from, fieldName) != null) {
   into = RowUtils.set(into, fieldName, RowUtils.get(from, fieldName));
  }
 }
 return into;
}

代码示例来源：origin: cloudera-labs/envelope

@Test
public void testAppendMutationTypeField() {
 Row rowWithout = new RowWithSchema(schemaWithoutMT, "hello");
 Row rowWith = PlannerUtils.appendMutationTypeField(rowWithout);
 
 assertEquals(rowWith.schema(), schemaWithMT);
}

代码示例来源：origin: cloudera-labs/envelope

@Test
public void testRemoveMutationTypeField() {
 Row rowWith = new RowWithSchema(schemaWithMT, "hello", MutationType.DELETE.toString());
 Row rowWithout = PlannerUtils.removeMutationTypeField(rowWith);
 
 assertEquals(rowWithout.schema(), schemaWithoutMT);
}

代码示例来源：origin: cloudera-labs/envelope

@Test
public void testToRowValueMapRowNested(
  final @Mocked Row inputRow,
  final @Mocked StructType innerSchema,
  final @Mocked StructType outerSchema
) {
 DataType field = DataTypes.createMapType(DataTypes.StringType,
   DataTypes.createMapType(DataTypes.StringType, DataTypes.IntegerType, true)
 );
 Map<Object, Object> expectedInnerMap = Maps.newHashMap();
 expectedInnerMap.put("field1", 1);
 expectedInnerMap.put("field2", 2);
 Map<Object, Object> expectedOuterMap = Maps.newHashMap();
 expectedOuterMap.put("outer", expectedInnerMap);
 new Expectations() {{
  inputRow.schema(); returns(outerSchema, innerSchema);
  outerSchema.fieldNames(); result = new String[] {"outer"};
  innerSchema.fieldNames(); result = new String[] {"field1", "field2"};
  inputRow.get(0); returns(inputRow, 1);
  inputRow.get(1); result = 2;
 }};
 assertEquals("Invalid list of values", expectedOuterMap, RowUtils.toRowValue(inputRow, field));
}

内容来源于网络，如有侵权，请联系作者删除！

相关文章

热门标签

Java query python Node 开发语言 request Util 数据库 Table 后端算法 Logger Message Element Parser

最新文章

高级程序员和新手小白程序员区别你是那个等级看解决bug速度
浏览(1204) 发布于 6个月前
还在用双层for循环吗？太慢了
浏览(1012) 发布于 5个月前
我用EasyExcel优化了公司的导出（附踩坑记录）
浏览(1057) 发布于 6个月前
记录因Sharding Jdbc批量操作引发的一次fullGC
浏览(885) 发布于 6个月前
进大厂必须要会的单元测试
浏览(888) 发布于 6个月前