org.apache.spark.sql.Row.schema()方法的使用及代码示例

x33g5p2x  于2022-01-28 转载在 其他  
字(10.2k)|赞(0)|评价(0)|浏览(370)

本文整理了Java中org.apache.spark.sql.Row.schema方法的一些代码示例,展示了Row.schema的具体用法。这些代码示例主要来源于Github/Stackoverflow/Maven等平台,是从一些精选项目中提取出来的代码,具有较强的参考意义,能在一定程度帮忙到你。Row.schema方法的具体详情如下:
包路径:org.apache.spark.sql.Row
类名称:Row
方法名:schema

Row.schema介绍

暂无

代码示例

代码示例来源:origin: cloudera-labs/envelope

/**
 * True if the provided row contains a mutation type field.
 */
public static boolean hasMutationTypeField(Row row) {
 for (String fieldName : row.schema().fieldNames()) {
  if (fieldName.equals(MutationType.MUTATION_TYPE_FIELD_NAME)) {
   return true;
  }
 }
 
 return false;
}

代码示例来源:origin: cloudera-labs/envelope

private Set<String> getColumnFamilies(Row row) {
 Set<String> families = Sets.newHashSet();
 
 for (String fieldName : row.schema().fieldNames()) {
  ColumnDef def = columns.get(fieldName);
  if (!def.cf.equals("rowkey")) {
   families.add(def.cf);
  }
 }
 
 return families;
}

代码示例来源:origin: cloudera-labs/envelope

private boolean filtersRowKeyPrefix(Row row) {
 Set<String> rowColumnNames = Sets.newHashSet(row.schema().fieldNames());
 Set<String> prefixColumnNames = Sets.newHashSet(keyColumns.subList(0, rowColumnNames.size()));
 
 return rowColumnNames.equals(prefixColumnNames);
}

代码示例来源:origin: cloudera-labs/envelope

private boolean filtersEntireRowKey(Row row) {
 for (String keyColumn : keyColumns) {
  if (!Arrays.asList(row.schema().fieldNames()).contains(keyColumn)) {
   return false;
  }
 }
 
 return true;
}

代码示例来源:origin: cloudera-labs/envelope

private byte[] serializeRow(Row row) throws IOException {
 StringBuilder sb = new StringBuilder();
 
 for (StructField field : row.schema().fields()) {
  sb.append("/");
  sb.append(field.name());
  sb.append("=");
  sb.append(RowUtils.get(row, field.name()));
 }
 byte[] serialized = sb.toString().getBytes(Charsets.UTF_8);
 
 return serialized;
}

代码示例来源:origin: cloudera-labs/envelope

public static Row remove(Row row, String fieldName) {
 List<StructField> removedFields = Lists.newArrayList(row.schema().fields());
 removedFields.remove(row.fieldIndex(fieldName));
 StructType removedSchema = new StructType(removedFields.toArray(new StructField[removedFields.size()]));
 
 List<Object> removedValues = Lists.newArrayList(RowUtils.valuesFor(row));
 removedValues.remove(row.fieldIndex(fieldName));
 
 return new RowWithSchema(removedSchema, removedValues.toArray());
}

代码示例来源:origin: uk.gov.gchq.gaffer/spark-library

private Stream<String> getPropertyNames(final Row row) {
    return Arrays.stream(row.schema().fieldNames())
        .filter(f -> !ReservedPropertyNames.contains(f))
        .filter(n -> !row.isNullAt(row.fieldIndex(n)));
  }
}

代码示例来源:origin: cloudera-labs/envelope

@Override
 public Row call(Row arrived) throws Exception {
  long startTime = System.nanoTime();
  if (schema == null) {
   schema = RowUtils.subsetSchema(arrived.schema(), keyFieldNames);
  }
  Row key = RowUtils.subsetRow(arrived, schema);
  
  long endTime = System.nanoTime();
  accumulators.getDoubleAccumulators().get(ACCUMULATOR_SECONDS_EXTRACTING_KEYS).add(
    (endTime - startTime) / 1000.0 / 1000.0 / 1000.0);
  return key;
 }
}

代码示例来源:origin: cloudera-labs/envelope

private FilterList getColumnValueFilters(Row row) {
 FilterList filterList = new FilterList(Operator.MUST_PASS_ALL);
 Set<String> filterColumnNames = Sets.newHashSet(row.schema().fieldNames());
 
 for (Map.Entry<String, ColumnDef> column : columns.entrySet()) {
  if (!column.getValue().cf.equals("rowkey")) {
   if (filterColumnNames.contains(column.getKey())) {
    byte[] value = getColumnValueAsBytes(column.getValue().name, column.getValue().type, row);
    if (value != null) {
     SingleColumnValueFilter columnValueFilter = new SingleColumnValueFilter(
       Bytes.toBytes(column.getValue().cf),
       Bytes.toBytes(column.getValue().name),
       CompareFilter.CompareOp.EQUAL,
       value
     );
     filterList.addFilter(columnValueFilter);
    }
   }
  }
 }
 
 return filterList;
}

代码示例来源:origin: cloudera-labs/envelope

public static Row set(Row row, String fieldName, Object replacement) {
 Object[] values = new Object[row.length()];
 for (int i = 0; i < row.schema().fields().length; i++) {
  if (i == row.fieldIndex(fieldName)) {
   values[i] = replacement;
  } else {
   values[i] = row.get(i);
  }
 }
 return new RowWithSchema(row.schema(), values);
}

代码示例来源:origin: cloudera-labs/envelope

private boolean matchesValueFilter(Row row, Row filter) {
 for (String filterFieldName : filter.schema().fieldNames()) {
  Object rowValue = row.get(row.fieldIndex(filterFieldName));
  Object filterValue = RowUtils.get(filter, filterFieldName);
  
  if (!rowValue.equals(filterValue)) {
   return false;
  }
 }
 
 return true;
}

代码示例来源:origin: cloudera-labs/envelope

private Row carryForwardWhenNull(Row into, Row from) {
 if (!config.hasPath(CARRY_FORWARD_CONFIG_NAME) || !config.getBoolean(CARRY_FORWARD_CONFIG_NAME)) {
  return into;
 }
 for (StructField field : into.schema().fields()) {
  String fieldName = field.name();
  if (RowUtils.get(into, fieldName) == null && RowUtils.get(from, fieldName) != null) {
   into = RowUtils.set(into, fieldName, RowUtils.get(from, fieldName));
  }
 }
 return into;
}

代码示例来源:origin: cloudera-labs/envelope

public static Row append(Row row, String fieldName, DataType fieldType, Object value) {
 StructType appendedSchema = row.schema().add(fieldName, fieldType);
 Object[] appendedValues = ObjectArrays.concat(valuesFor(row), value);
 Row appendedRow = new RowWithSchema(appendedSchema, appendedValues);
 return appendedRow;
}

代码示例来源:origin: cloudera-labs/envelope

private KuduScanner scannerForFilters(Iterable<Row> filters, KuduTable table) throws KuduException {
 List<Row> filtersList = Lists.newArrayList(filters);
 if (filtersList.size() == 0) {
  throw new RuntimeException("Kudu existing filter was not provided.");
 }
 
 if (filtersList.get(0).schema() == null) {
  throw new RuntimeException("Kudu existing filter did not contain a schema.");
 }
 
 if (hasAccumulators()) {
  accumulators.getLongAccumulators().get(ACCUMULATOR_NUMBER_OF_SCANNERS).add(1);
  accumulators.getLongAccumulators().get(ACCUMULATOR_NUMBER_OF_FILTERS_SCANNED).add(filtersList.size());
 }
 
 KuduScannerBuilder builder = getConnection().getClient().newScannerBuilder(table);
 for (String fieldName : filtersList.get(0).schema().fieldNames()) {
  ColumnSchema columnSchema = table.getSchema().getColumn(fieldName);
  List<Object> columnValues = Lists.newArrayList();
  for (Row filter : filtersList) {
   Object columnValue = RowUtils.get(filter, fieldName);
   columnValues.add(columnValue);
  }
  KuduPredicate predicate = KuduPredicate.newInListPredicate(columnSchema, columnValues);
  builder = builder.addPredicate(predicate);
 }
 KuduScanner scanner = builder.build();
 return scanner;
}

代码示例来源:origin: cloudera-labs/envelope

@Override
 public Iterator<Row> call(Row row) throws Exception {
  // Retrieve the Command pipeline via ThreadLocal
  Pipeline pipeline = MorphlineUtils.getPipeline(morphlineFile, morphlineId);
  if (null == pipeline) {
   pipeline = MorphlineUtils.setPipeline(morphlineFile, morphlineId, new Collector(), true);
  }
  // Convert each Row into a Record
  StructType inputSchema = row.schema();
  if (null == inputSchema) {
   throw new RuntimeException("Row does not have an associated StructType schema");
  }
  Record inputRecord = new Record();
  String[] fieldNames = inputSchema.fieldNames();
  // TODO : Confirm nested object conversion
  for (int i = 0; i < fieldNames.length; i++) {
   inputRecord.put(fieldNames[i], row.get(i));
  }
  // Process each Record via the Command pipeline
  List<Record> outputRecords = MorphlineUtils.executePipeline(pipeline, inputRecord, errorOnEmpty);
  // Convert each Record into a new Row
  List<Row> outputRows = Lists.newArrayListWithCapacity(outputRecords.size());
  for (Record record : outputRecords) {
   outputRows.add(MorphlineUtils.convertToRow(outputSchema, record));
  }
  return outputRows.iterator();
 }
};

代码示例来源:origin: uk.gov.gchq.gaffer/spark-library

@Override
public Element _apply(final Row row) {
  final Element element;
  final String group = row.getAs(SchemaToStructTypeConverter.GROUP);
  final Object source = ArrayUtils.contains(row.schema().fieldNames(), SchemaToStructTypeConverter.SRC_COL_NAME) ? row.getAs(SchemaToStructTypeConverter.SRC_COL_NAME) : null;
  if (null != source) {
    final Object destination = row.getAs(SchemaToStructTypeConverter.DST_COL_NAME);
    final boolean directed = row.getAs(SchemaToStructTypeConverter.DIRECTED_COL_NAME);
    final MatchedVertex matchedVertex;
    if (ArrayUtils.contains(row.schema().fieldNames(), SchemaToStructTypeConverter.MATCHED_VERTEX_COL_NAME)) {
      final String matchedVertexStr = row.getAs(SchemaToStructTypeConverter.MATCHED_VERTEX_COL_NAME);
      matchedVertex = null != matchedVertexStr ? MatchedVertex.valueOf(matchedVertexStr) : null;
    } else {
      matchedVertex = null;
    }
    element = new Edge(group, source, destination, directed, matchedVertex, null);
  } else {
    final Object vertex = ArrayUtils.contains(row.schema().fieldNames(), SchemaToStructTypeConverter.VERTEX_COL_NAME) ? row.getAs(SchemaToStructTypeConverter.VERTEX_COL_NAME) : row.getAs(SchemaToStructTypeConverter.ID);
    element = new Entity(group, vertex);
  }
  getPropertyNames(row).forEach(n -> {
    element.putProperty(n, row.getAs(n));
  });
  return element;
}

代码示例来源:origin: cloudera-labs/envelope

private Row carryForwardWhenNull(Row into, Row from) {
 if (!doesCarryForward()) {
  return into;
 }
 for (StructField field : into.schema().fields()) {
  String fieldName = field.name();
  if (RowUtils.get(into, fieldName) == null && RowUtils.get(from, fieldName) != null) {
   into = RowUtils.set(into, fieldName, RowUtils.get(from, fieldName));
  }
 }
 return into;
}

代码示例来源:origin: cloudera-labs/envelope

@Test
public void testAppendMutationTypeField() {
 Row rowWithout = new RowWithSchema(schemaWithoutMT, "hello");
 Row rowWith = PlannerUtils.appendMutationTypeField(rowWithout);
 
 assertEquals(rowWith.schema(), schemaWithMT);
}

代码示例来源:origin: cloudera-labs/envelope

@Test
public void testRemoveMutationTypeField() {
 Row rowWith = new RowWithSchema(schemaWithMT, "hello", MutationType.DELETE.toString());
 Row rowWithout = PlannerUtils.removeMutationTypeField(rowWith);
 
 assertEquals(rowWithout.schema(), schemaWithoutMT);
}

代码示例来源:origin: cloudera-labs/envelope

@Test
public void testToRowValueMapRowNested(
  final @Mocked Row inputRow,
  final @Mocked StructType innerSchema,
  final @Mocked StructType outerSchema
) {
 DataType field = DataTypes.createMapType(DataTypes.StringType,
   DataTypes.createMapType(DataTypes.StringType, DataTypes.IntegerType, true)
 );
 Map<Object, Object> expectedInnerMap = Maps.newHashMap();
 expectedInnerMap.put("field1", 1);
 expectedInnerMap.put("field2", 2);
 Map<Object, Object> expectedOuterMap = Maps.newHashMap();
 expectedOuterMap.put("outer", expectedInnerMap);
 new Expectations() {{
  inputRow.schema(); returns(outerSchema, innerSchema);
  outerSchema.fieldNames(); result = new String[] {"outer"};
  innerSchema.fieldNames(); result = new String[] {"field1", "field2"};
  inputRow.get(0); returns(inputRow, 1);
  inputRow.get(1); result = 2;
 }};
 assertEquals("Invalid list of values", expectedOuterMap, RowUtils.toRowValue(inputRow, field));
}

相关文章