我在PySpark中有一个Schema,当item_platform_id
元素出现时,我需要从items
数组中删除它。我使用drop进行了测试,但它不起作用。
root
|-- MISSION_ID: string (nullable = true)
|-- COUNTRY: string (nullable = true)
|-- SPONSORED_MISSION: string (nullable = true)
|-- MISSION_TYPE: string (nullable = true)
|-- SPONSORED_SEGMENTATION: string (nullable = true)
|-- START_DATE: timestamp (nullable = true)
|-- END_DATE: timestamp (nullable = true)
|-- CREATE_DATE: timestamp (nullable = true)
|-- UPDATE_DATE: timestamp (nullable = true)
|-- SPONSOR_PARTNER_ID: string (nullable = true)
|-- CONSIDER_DELIVERY_WINDOW: boolean (nullable = true)
|-- CONSIDER_BLOCK_LIST: boolean (nullable = true)
|-- DIGITALIZATION_LEVEL: string (nullable = true)
|-- ITEMS: array (nullable = true)
| |-- element: struct (containsNull = true)
| | |-- _id: string (nullable = true)
| | |-- quantity: integer (nullable = true)
| | |-- item_platform_id: string (nullable = true)
|-- COMBOS: array (nullable = true)
| |-- element: struct (containsNull = true)
| | |-- _id: string (nullable = true)
| | |-- comboId: integer (nullable = true)
| | |-- quantity: integer (nullable = true)
|-- ENABLED: boolean (nullable = true)
预期:
root
|-- MISSION_ID: string (nullable = true)
|-- COUNTRY: string (nullable = true)
|-- SPONSORED_MISSION: string (nullable = true)
|-- MISSION_TYPE: string (nullable = true)
|-- SPONSORED_SEGMENTATION: string (nullable = true)
|-- START_DATE: timestamp (nullable = true)
|-- END_DATE: timestamp (nullable = true)
|-- CREATE_DATE: timestamp (nullable = true)
|-- UPDATE_DATE: timestamp (nullable = true)
|-- SPONSOR_PARTNER_ID: string (nullable = true)
|-- CONSIDER_DELIVERY_WINDOW: boolean (nullable = true)
|-- CONSIDER_BLOCK_LIST: boolean (nullable = true)
|-- DIGITALIZATION_LEVEL: string (nullable = true)
|-- ITEMS: array (nullable = true)
| |-- element: struct (containsNull = true)
| | |-- _id: string (nullable = true)
| | |-- quantity: integer (nullable = true)
|-- COMBOS: array (nullable = true)
| |-- element: struct (containsNull = true)
| | |-- _id: string (nullable = true)
| | |-- comboId: integer (nullable = true)
| | |-- quantity: integer (nullable = true)
|-- ENABLED: boolean (nullable = true)
1条答案
按热度按时间qmb5sa221#
你可以在数组的结构体中检查字段是否存在,你可以使用
dropFields
从结构体中删除字段 (从spark 3.1.2开始可用)。范例