本文整理了Java中org.apache.spark.sql.Row.schema
方法的一些代码示例,展示了Row.schema
的具体用法。这些代码示例主要来源于Github
/Stackoverflow
/Maven
等平台,是从一些精选项目中提取出来的代码,具有较强的参考意义,能在一定程度帮忙到你。Row.schema
方法的具体详情如下:
包路径:org.apache.spark.sql.Row
类名称:Row
方法名:schema
暂无
代码示例来源:origin: cloudera-labs/envelope
/**
* True if the provided row contains a mutation type field.
*/
public static boolean hasMutationTypeField(Row row) {
for (String fieldName : row.schema().fieldNames()) {
if (fieldName.equals(MutationType.MUTATION_TYPE_FIELD_NAME)) {
return true;
}
}
return false;
}
代码示例来源:origin: cloudera-labs/envelope
private Set<String> getColumnFamilies(Row row) {
Set<String> families = Sets.newHashSet();
for (String fieldName : row.schema().fieldNames()) {
ColumnDef def = columns.get(fieldName);
if (!def.cf.equals("rowkey")) {
families.add(def.cf);
}
}
return families;
}
代码示例来源:origin: cloudera-labs/envelope
private boolean filtersRowKeyPrefix(Row row) {
Set<String> rowColumnNames = Sets.newHashSet(row.schema().fieldNames());
Set<String> prefixColumnNames = Sets.newHashSet(keyColumns.subList(0, rowColumnNames.size()));
return rowColumnNames.equals(prefixColumnNames);
}
代码示例来源:origin: cloudera-labs/envelope
private boolean filtersEntireRowKey(Row row) {
for (String keyColumn : keyColumns) {
if (!Arrays.asList(row.schema().fieldNames()).contains(keyColumn)) {
return false;
}
}
return true;
}
代码示例来源:origin: cloudera-labs/envelope
private byte[] serializeRow(Row row) throws IOException {
StringBuilder sb = new StringBuilder();
for (StructField field : row.schema().fields()) {
sb.append("/");
sb.append(field.name());
sb.append("=");
sb.append(RowUtils.get(row, field.name()));
}
byte[] serialized = sb.toString().getBytes(Charsets.UTF_8);
return serialized;
}
代码示例来源:origin: cloudera-labs/envelope
public static Row remove(Row row, String fieldName) {
List<StructField> removedFields = Lists.newArrayList(row.schema().fields());
removedFields.remove(row.fieldIndex(fieldName));
StructType removedSchema = new StructType(removedFields.toArray(new StructField[removedFields.size()]));
List<Object> removedValues = Lists.newArrayList(RowUtils.valuesFor(row));
removedValues.remove(row.fieldIndex(fieldName));
return new RowWithSchema(removedSchema, removedValues.toArray());
}
代码示例来源:origin: uk.gov.gchq.gaffer/spark-library
private Stream<String> getPropertyNames(final Row row) {
return Arrays.stream(row.schema().fieldNames())
.filter(f -> !ReservedPropertyNames.contains(f))
.filter(n -> !row.isNullAt(row.fieldIndex(n)));
}
}
代码示例来源:origin: cloudera-labs/envelope
@Override
public Row call(Row arrived) throws Exception {
long startTime = System.nanoTime();
if (schema == null) {
schema = RowUtils.subsetSchema(arrived.schema(), keyFieldNames);
}
Row key = RowUtils.subsetRow(arrived, schema);
long endTime = System.nanoTime();
accumulators.getDoubleAccumulators().get(ACCUMULATOR_SECONDS_EXTRACTING_KEYS).add(
(endTime - startTime) / 1000.0 / 1000.0 / 1000.0);
return key;
}
}
代码示例来源:origin: cloudera-labs/envelope
private FilterList getColumnValueFilters(Row row) {
FilterList filterList = new FilterList(Operator.MUST_PASS_ALL);
Set<String> filterColumnNames = Sets.newHashSet(row.schema().fieldNames());
for (Map.Entry<String, ColumnDef> column : columns.entrySet()) {
if (!column.getValue().cf.equals("rowkey")) {
if (filterColumnNames.contains(column.getKey())) {
byte[] value = getColumnValueAsBytes(column.getValue().name, column.getValue().type, row);
if (value != null) {
SingleColumnValueFilter columnValueFilter = new SingleColumnValueFilter(
Bytes.toBytes(column.getValue().cf),
Bytes.toBytes(column.getValue().name),
CompareFilter.CompareOp.EQUAL,
value
);
filterList.addFilter(columnValueFilter);
}
}
}
}
return filterList;
}
代码示例来源:origin: cloudera-labs/envelope
public static Row set(Row row, String fieldName, Object replacement) {
Object[] values = new Object[row.length()];
for (int i = 0; i < row.schema().fields().length; i++) {
if (i == row.fieldIndex(fieldName)) {
values[i] = replacement;
} else {
values[i] = row.get(i);
}
}
return new RowWithSchema(row.schema(), values);
}
代码示例来源:origin: cloudera-labs/envelope
private boolean matchesValueFilter(Row row, Row filter) {
for (String filterFieldName : filter.schema().fieldNames()) {
Object rowValue = row.get(row.fieldIndex(filterFieldName));
Object filterValue = RowUtils.get(filter, filterFieldName);
if (!rowValue.equals(filterValue)) {
return false;
}
}
return true;
}
代码示例来源:origin: cloudera-labs/envelope
private Row carryForwardWhenNull(Row into, Row from) {
if (!config.hasPath(CARRY_FORWARD_CONFIG_NAME) || !config.getBoolean(CARRY_FORWARD_CONFIG_NAME)) {
return into;
}
for (StructField field : into.schema().fields()) {
String fieldName = field.name();
if (RowUtils.get(into, fieldName) == null && RowUtils.get(from, fieldName) != null) {
into = RowUtils.set(into, fieldName, RowUtils.get(from, fieldName));
}
}
return into;
}
代码示例来源:origin: cloudera-labs/envelope
public static Row append(Row row, String fieldName, DataType fieldType, Object value) {
StructType appendedSchema = row.schema().add(fieldName, fieldType);
Object[] appendedValues = ObjectArrays.concat(valuesFor(row), value);
Row appendedRow = new RowWithSchema(appendedSchema, appendedValues);
return appendedRow;
}
代码示例来源:origin: cloudera-labs/envelope
private KuduScanner scannerForFilters(Iterable<Row> filters, KuduTable table) throws KuduException {
List<Row> filtersList = Lists.newArrayList(filters);
if (filtersList.size() == 0) {
throw new RuntimeException("Kudu existing filter was not provided.");
}
if (filtersList.get(0).schema() == null) {
throw new RuntimeException("Kudu existing filter did not contain a schema.");
}
if (hasAccumulators()) {
accumulators.getLongAccumulators().get(ACCUMULATOR_NUMBER_OF_SCANNERS).add(1);
accumulators.getLongAccumulators().get(ACCUMULATOR_NUMBER_OF_FILTERS_SCANNED).add(filtersList.size());
}
KuduScannerBuilder builder = getConnection().getClient().newScannerBuilder(table);
for (String fieldName : filtersList.get(0).schema().fieldNames()) {
ColumnSchema columnSchema = table.getSchema().getColumn(fieldName);
List<Object> columnValues = Lists.newArrayList();
for (Row filter : filtersList) {
Object columnValue = RowUtils.get(filter, fieldName);
columnValues.add(columnValue);
}
KuduPredicate predicate = KuduPredicate.newInListPredicate(columnSchema, columnValues);
builder = builder.addPredicate(predicate);
}
KuduScanner scanner = builder.build();
return scanner;
}
代码示例来源:origin: cloudera-labs/envelope
@Override
public Iterator<Row> call(Row row) throws Exception {
// Retrieve the Command pipeline via ThreadLocal
Pipeline pipeline = MorphlineUtils.getPipeline(morphlineFile, morphlineId);
if (null == pipeline) {
pipeline = MorphlineUtils.setPipeline(morphlineFile, morphlineId, new Collector(), true);
}
// Convert each Row into a Record
StructType inputSchema = row.schema();
if (null == inputSchema) {
throw new RuntimeException("Row does not have an associated StructType schema");
}
Record inputRecord = new Record();
String[] fieldNames = inputSchema.fieldNames();
// TODO : Confirm nested object conversion
for (int i = 0; i < fieldNames.length; i++) {
inputRecord.put(fieldNames[i], row.get(i));
}
// Process each Record via the Command pipeline
List<Record> outputRecords = MorphlineUtils.executePipeline(pipeline, inputRecord, errorOnEmpty);
// Convert each Record into a new Row
List<Row> outputRows = Lists.newArrayListWithCapacity(outputRecords.size());
for (Record record : outputRecords) {
outputRows.add(MorphlineUtils.convertToRow(outputSchema, record));
}
return outputRows.iterator();
}
};
代码示例来源:origin: uk.gov.gchq.gaffer/spark-library
@Override
public Element _apply(final Row row) {
final Element element;
final String group = row.getAs(SchemaToStructTypeConverter.GROUP);
final Object source = ArrayUtils.contains(row.schema().fieldNames(), SchemaToStructTypeConverter.SRC_COL_NAME) ? row.getAs(SchemaToStructTypeConverter.SRC_COL_NAME) : null;
if (null != source) {
final Object destination = row.getAs(SchemaToStructTypeConverter.DST_COL_NAME);
final boolean directed = row.getAs(SchemaToStructTypeConverter.DIRECTED_COL_NAME);
final MatchedVertex matchedVertex;
if (ArrayUtils.contains(row.schema().fieldNames(), SchemaToStructTypeConverter.MATCHED_VERTEX_COL_NAME)) {
final String matchedVertexStr = row.getAs(SchemaToStructTypeConverter.MATCHED_VERTEX_COL_NAME);
matchedVertex = null != matchedVertexStr ? MatchedVertex.valueOf(matchedVertexStr) : null;
} else {
matchedVertex = null;
}
element = new Edge(group, source, destination, directed, matchedVertex, null);
} else {
final Object vertex = ArrayUtils.contains(row.schema().fieldNames(), SchemaToStructTypeConverter.VERTEX_COL_NAME) ? row.getAs(SchemaToStructTypeConverter.VERTEX_COL_NAME) : row.getAs(SchemaToStructTypeConverter.ID);
element = new Entity(group, vertex);
}
getPropertyNames(row).forEach(n -> {
element.putProperty(n, row.getAs(n));
});
return element;
}
代码示例来源:origin: cloudera-labs/envelope
private Row carryForwardWhenNull(Row into, Row from) {
if (!doesCarryForward()) {
return into;
}
for (StructField field : into.schema().fields()) {
String fieldName = field.name();
if (RowUtils.get(into, fieldName) == null && RowUtils.get(from, fieldName) != null) {
into = RowUtils.set(into, fieldName, RowUtils.get(from, fieldName));
}
}
return into;
}
代码示例来源:origin: cloudera-labs/envelope
@Test
public void testAppendMutationTypeField() {
Row rowWithout = new RowWithSchema(schemaWithoutMT, "hello");
Row rowWith = PlannerUtils.appendMutationTypeField(rowWithout);
assertEquals(rowWith.schema(), schemaWithMT);
}
代码示例来源:origin: cloudera-labs/envelope
@Test
public void testRemoveMutationTypeField() {
Row rowWith = new RowWithSchema(schemaWithMT, "hello", MutationType.DELETE.toString());
Row rowWithout = PlannerUtils.removeMutationTypeField(rowWith);
assertEquals(rowWithout.schema(), schemaWithoutMT);
}
代码示例来源:origin: cloudera-labs/envelope
@Test
public void testToRowValueMapRowNested(
final @Mocked Row inputRow,
final @Mocked StructType innerSchema,
final @Mocked StructType outerSchema
) {
DataType field = DataTypes.createMapType(DataTypes.StringType,
DataTypes.createMapType(DataTypes.StringType, DataTypes.IntegerType, true)
);
Map<Object, Object> expectedInnerMap = Maps.newHashMap();
expectedInnerMap.put("field1", 1);
expectedInnerMap.put("field2", 2);
Map<Object, Object> expectedOuterMap = Maps.newHashMap();
expectedOuterMap.put("outer", expectedInnerMap);
new Expectations() {{
inputRow.schema(); returns(outerSchema, innerSchema);
outerSchema.fieldNames(); result = new String[] {"outer"};
innerSchema.fieldNames(); result = new String[] {"field1", "field2"};
inputRow.get(0); returns(inputRow, 1);
inputRow.get(1); result = 2;
}};
assertEquals("Invalid list of values", expectedOuterMap, RowUtils.toRowValue(inputRow, field));
}
内容来源于网络,如有侵权,请联系作者删除!