我只是在spark版本2.4.0-cdh6.3.3中运行下面的简单查询,并在将数据插入表中时得到下面的错误。在版本2.2.0.cloudera2中执行相同的查询
查询如下:-
spark.sql("""Select Id , OwnerId owner_id, IsDeleted is_deleted, Name name, RecordTypeId record_type_id, CreatedDate create_date, CreatedById created_by_id, LastModifiedDate last_modified_date, LastModifiedById last_modified_by_id, SystemModstamp system_mod_stamp, cast(MayEdit as string) may_edit, cast(IsLocked as string) is_locked, Account_Id_vod__c , Account_Name_vod__c , Call_Name_vod__c , Comments_vod__c , Confirmed_Quantity_vod__c , Distributor_vod__c , Quantity_vod__c , Sample_U_M__c , Sample_vod__c , Ship_Address_Line_1_vod__c ship_addr_line_1_vod__c, Ship_Address_Line_2_vod__c ship_addr_line_2_vod__c, Ship_City_vod__c , Ship_Country_vod__c , Ship_License_Expiration_Date_vod__c ship_license_exp_date_vod__c, Ship_License_Status_vod__c , Ship_License_vod__c , Ship_State_vod__c , Ship_Zip_vod__c , Shipment_Id_vod__c , Signature_Date_vod__c , Status_vod__c , Territory_vod__c , cast(Unlock_vod__c as string) , Call_Id_vod__c , Call_Sample_Id_vod__c , Call_Sample_Name_vod__c call_sample_name_vod__c, ASSMCA_vod__c assmca_vod__c, Account_vod__c , Call_Date_vod__c , Call_Datetime_vod__c , DEA_Expiration_Date_vod__c , DEA_vod__c, cast(Request_Receipt_vod__c as string), Sample_Card_Reason_vod__c , Sample_Send_Card_vod__c , Ship_Zip_4_vod__c , credentials_vod__c, Manufacturer_vod__c , Salutation_vod__c , Delivery_Status_vod__c , shipping_address__c shipping_addr__c, Sample_Request__c , Product_Id__c Product_Id__c, 263669 batch_id, current_timestamp() rec_insert_date, 'vod' rec_insert_by, '' reject_reason, 'Y' status_flag from ph_com_p_usa_veeva.Sample_Order_Transaction_vod__c where Item_status__c in ('Processing') and ingested_time > '2020-08-08 13:15:05' union all select id, owner_id, cast(is_deleted as boolean), name, record_type_id, create_date, created_by_id, last_modified_date, last_modified_by_id, system_mod_stamp, may_edit, is_locked, account_id_vod__c, account_name_vod__c, call_name_vod__c, comments_vod__c, confirmed_quantity_vod__c, distributor_vod__c, quantity_vod__c, sample_u_m__c, sample_vod__c, ship_addr_line_1_vod__c, ship_addr_line_2_vod__c, ship_city_vod__c, ship_country_vod__c, ship_license_exp_date_vod__c, ship_license_status_vod__c, ship_license_vod__c, ship_state_vod__c, ship_zip_vod__c, shipment_id_vod__c, signature_date_vod__c, status_vod__c, territory_vod__c, unlock_vod__c, call_id_vod__c, call_sample_id_vod__c, call_sample_name_vod__c, assmca_vod__c, account_vod__c, call_date_vod__c, call_datetime_vod__c,dea_expiration_date_vod__c, dea_vod__c, request_receipt_vod__c, sample_card_reason_vod__c, sample_send_card_vod__c, ship_zip_4_vod__c, credentials_vod__c, manufacturer_vod__c, salutation_vod__c, delivery_status_vod__c, shipping_addr__c, sample_request__c, product_id__c,263669 batch_id,CURRENT_TIMESTAMP() rec_insert_date, rec_insert_by, reject_reason,'Y' status_flag from all_all_r_usa_vntg.stg_vod_smpl_req_trsn stg_vod_smpl_req_trsn left anti join (select * from ph_com_p_usa_veeva.Sample_Order_Transaction_vod__c where Item_status__c in ('Processing') and ingested_time > '2020-08-08 13:15:05' ) b on (stg_vod_smpl_req_trsn.id=b.id) where status_flag!='Y' and rec_insert_date>'2020-08-08 13:15:05'""").createTempView("udldata")
spark.sql("""select Id, owner_id, is_deleted, name, record_type_id, create_date, created_by_id, last_modified_date, last_modified_by_id, system_mod_stamp, may_edit, is_locked, Account_Id_vod__c, Account_Name_vod__c, Call_Name_vod__c, Comments_vod__c, Confirmed_Quantity_vod__c, Distributor_vod__c, Quantity_vod__c, Sample_U_M__c, Sample_vod__c, ship_addr_line_1_vod__c, ship_addr_line_2_vod__c, Ship_City_vod__c, Ship_Country_vod__c, ship_license_exp_date_vod__c, Ship_License_Status_vod__c, Ship_License_vod__c, Ship_State_vod__c, Ship_Zip_vod__c, Shipment_Id_vod__c, Signature_Date_vod__c, Status_vod__c, Territory_vod__c, Unlock_vod__c, Call_Id_vod__c, Call_Sample_Id_vod__c, call_sample_name_vod__c, assmca_vod__c, Account_vod__c, Call_Date_vod__c, Call_Datetime_vod__c, DEA_Expiration_Date_vod__c, DEA_vod__c, Request_Receipt_vod__c, Sample_Card_Reason_vod__c, Sample_Send_Card_vod__c, Ship_Zip_4_vod__c, credentials_vod__c, Manufacturer_vod__c, Salutation_vod__c, Delivery_Status_vod__c, shipping_addr__c, Sample_Request__c, Product_Id__c, batch_id, rec_insert_date, rec_insert_by, 'Customer not present in VDW' reject_reason, 'R' status_flag FROM udldata ul WHERE ul.status_flag = 'Y' AND NOT EXISTS (SELECT 1 FROM all_all_b_usa_crmods.ODS_CONTACTS oc WHERE oc.sf_id = ul.ACCOUNT_ID_VOD__C AND UPPER(oc.inactive) = 'FALSE')""").createTempView("rejected")
spark.sql("""select * from udldata ul left anti join rejected rj on ul.id = rj.id""").createTempView("valid")
spark.sql("""insert into ALL_ALL_R_USA_VNTG.STG_VOD_SMPL_REQ_TRSN select * from valid union select * from rejected""")
错误:-
java.lang.AssertionError: assertion failed
at scala.Predef$.assert(Predef.scala:156)
at org.apache.spark.sql.catalyst.optimizer.PushDownPredicate$$anonfun$apply$19$$anonfun$39.apply(Optimizer.scala:1038)
at org.apache.spark.sql.catalyst.optimizer.PushDownPredicate$$anonfun$apply$19$$anonfun$39.apply(Optimizer.scala:1033)
at scala.collection.TraversableLike$$anonfun$map$1.apply(TraversableLike.scala:234)
at scala.coll`enter code here`ection.Trav`enter code here`ersableLike$$anonfun$map$1.apply(TraversableLike.scala:234)
at scala.collection.immutable.List.foreach(List.scala:392)
at scala.collection.TraversableLike$class.map(TraversableLike.scala:234)
at scala.collectio`enter code here`n.immutable.List.map(List.scala:296)
at org.apache.spark.sql.catalyst.optimizer.PushDownPredicate$$anonfun$apply$19.applyOrElse(Optimizer.scala:1033)
at org.apache.spark.sql.catalyst.optimizer.PushDownPredicate$$anonfun$apply$19.applyOrElse(Optimizer.scala:949)
at org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$2.apply(TreeNode.scala:256)
at org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$2.apply(TreeNode.scala:256)
at org.apache.spark.sql.catalyst.trees.CurrentOrigin$.withOrigin(TreeNode.scala:70)
at org.apache.spark.sql.catalyst.trees.TreeNode.transformDown(TreeNode.scala:255)
at org.apache.spark.sql.catalyst.plans.logical.LogicalPlan.org$apache$spark$sql$catalyst$plans$logical$AnalysisHelper$$super$transformDown(LogicalPlan.scala:29)
at `enter code here`org.apache.spark.sql.catalyst.plans.logical.AnalysisHelper$class.transformDown(AnalysisHelper.scala:149)
at org.apache.spark.sql.catalyst.plans.logical.LogicalPlan.transformDown(LogicalPlan.scala:29)
at org.apache.spark.sql.catalyst.plans.logical.LogicalPlan.transformDown(LogicalPlan.scala:29)
at org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$transformDown$1.apply(TreeNode.scala:261)
at org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$transformDown$1.apply(TreeNode.scala:261)
at org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$4.apply(TreeNode.scala:326)
at org.apache.spark.sql.catalyst.trees.TreeNode.mapProductIterator(TreeNode.scala:187)
at org.apache.spark.sql.catalyst.trees.TreeNode.mapChildren(TreeNode.scala:324)
at org.apache.spark.sql.catalyst.trees.TreeNode.transformDown(TreeNode.scala:261)
at org.apache.spark.sql.catalyst.plans.logical.LogicalPlan.org$apache$spark$sql$catalyst$plans$logical$AnalysisHelper$$super$transformDown(LogicalPlan.scala:29)
at org.apache.spark.sql.catalyst.plans.logical.AnalysisHelper$class.transformDown(AnalysisHelper.scala:149)
at org.apache.spark.sql.catalyst.plans.logical.LogicalPlan.transformDown(LogicalPlan.scala:29)
at org.apache.spark.sql.catalyst.plans.logical.LogicalPlan.transformDown(LogicalPlan.scala:29)
at org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$transformDown$1.apply(TreeNode.scala:261)
at org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$transformDown$1.apply(TreeNode.scala:261)
at org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$4.apply(TreeNode.scala:326)
at org.apache.spark.sql.catalyst.trees.TreeNode.mapProductIterator(TreeNode.scala:187)
at org.apache.spark.sql.catalyst.trees.TreeNode.mapChildren(TreeNode.scala:324)
at org.apache.spark.sql.catalyst.trees.TreeNode.transformDown(TreeNode.scala:261)
at org.apache.spark.sql.catalyst.plans.logical.LogicalPlan.org$apache$spark$sql$catalyst$plans$logical$AnalysisHelper$$super$transformDown(LogicalPlan.scala:29)
at org.apache.spark.sql.catalyst.plans.logical.AnalysisHelper$class.transformDown(AnalysisHelper.scala:149)
at org.apache.spark.sql.catalyst.plans.logical.LogicalPlan.transformDown(LogicalPlan.scala:29)
at org.apache.spark.sql.catalyst.plans.logical.LogicalPlan.transformDown(LogicalPlan.scala:29)
at org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$transformDown$1.apply(TreeNode.scala:261)
at org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$transformDown$1.apply(TreeNode.scala:261)
at org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$4.apply(TreeNode.scala:326)
at org.apache.spark.sql.catalyst.trees.TreeNode.mapProductIterator(TreeNode.scala:187)
at org.apache.spark.sql.catalyst.trees.TreeNode.mapChildren(TreeNode.scala:324)
at org.apache.spark.sql.catalyst.trees.TreeNode.transformDown(TreeNode.scala:261)
at org.apache.spark.sql.catalyst.plans.logical.LogicalPlan.org$apache$spark$sql$catalyst$plans$logical$AnalysisHelper$$super$tran`enter code here`sformDown(LogicalPlan.scala:29)
at org.apache.spark.sql.catalyst.plans.logical.AnalysisHelper$class.transformDown(AnalysisHelper.scala:149)
at org.apache.spark.sql.catalyst.plans.logical.LogicalPlan.transformDown(LogicalPlan.scala:29)
at org.apache.spark.sql.catalyst.plans.logical.LogicalPlan.transformDown(LogicalPlan.scala:29)
at org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$transformDown$1.apply(TreeNode.scala:261)
at org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$transformDown$1.apply(TreeNode.scala:261)
at org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$4.apply(TreeNode.scala:326)
at org.apache.spark.sql.catalyst.trees.TreeNode.mapProductIterator(TreeNode.scala:187)
at org.apache.spark.sql.catalyst.trees.TreeNode.mapChildren(TreeNode.scala:324)
at org.apache.spark.sql.catalyst.trees.TreeNode.transformDown(TreeNode.scala:261)
at org.apache.spark.sql.catalyst.plans.logical.LogicalPlan.org$apache$spark$sql$catalyst$plans$logical$AnalysisHelper$$super$transformDown(LogicalPlan.scala:29)
at org.apache.spark.sql.catalyst.plans.logical.AnalysisHelper$class.transformDown(AnalysisHelper.scala:149)
at org.apache.spark.sql.catalyst.plans.logical.LogicalPlan.transformDown(LogicalPlan.scala:29)
at org.apache.spark.sql.catalyst.plans.logical.LogicalPlan.transformDown(LogicalPlan.scala:29)
at org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$transformDown$1.apply(TreeNode.scala:261)
at org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$transformDown$1.apply(TreeNode.scala:261)
at org.apache.spark.sql.catalyst.trees.TreeNode.org$apache$spark$sql$catalyst$trees$TreeNode$$mapChild$2(TreeNode.scala:295)
at org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$4$$anonfun$apply$13.apply(TreeNode.scala:354)
at scala.collection.TraversableLike$$anonfun$map$1.apply(TraversableLike.scala:234)
at scala.collection.TraversableLike$$anonfun$map$1.apply(TraversableLike.scala:234)
at scala.collection.immutable.List.foreach(List.scala:392)
at scala.collection.TraversableLike$class.map(TraversableLike.scala:234)
at scala.collection.immutable.List.map(List.scala:296)
at org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$4.apply(TreeNode.scala:354)
at org.apache.spark.sql.catalyst.trees.TreeNode.mapProductIterator(TreeNode.scala:187)
at org.apache.spark.sql.catalyst.trees.TreeNode.mapChildren(TreeNode.scala:324)
at org.apache.spark.sql.catalyst.trees.TreeNode.transformDown(TreeNode.scala:261)
at org.apache.spark.sql.catalyst.plans.logical.LogicalPlan.org$apache$spark$sql$catalyst$plans$logical$AnalysisHelper$$super$transformDown(LogicalPlan.scala:29)
at org.apache.spark.sql.catalyst.plans.logical.AnalysisHelper$class.transformDown(AnalysisHelper.scala:149)
at org.apache.spark.sql.catalyst.plans.logical.LogicalPlan.transformDown(LogicalPlan.scala:29)
at org.apache.spark.sql.catalyst.plans.logical.LogicalPlan.transformDown(LogicalPlan.scala:29)
at org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$transformDown$1.apply(TreeNode.scala:261)
at org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$transformDown$1.apply(TreeNode.scala:261)
at org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$4.apply(TreeNode.scala:326)
at org.apache.spark.sql.catalyst.trees.TreeNode.mapProductIterator(TreeNode.scala:187)
at org.apache.spark.sql.catalyst.trees.TreeNode.mapChildren(TreeNode.scala:324)
at org.apache.spark.sql.catalyst.trees.TreeNode.transformDown(TreeNode.scala:261)
at org.apache.spark.sql.catalyst.plans.logical.LogicalPlan.org$apache$spark$sql$catalyst$plans$logical$AnalysisHelper$$super$transformDown(LogicalPlan.scala:29)
at org.apache.spark.sql.catalyst.plans.logical.AnalysisHelper$class.transformDown(AnalysisHelper.scala:149)
at org.apache.spark.sql.catalyst.plans.logical.LogicalPlan.transformDown(LogicalPlan.scala:29)
at org.apache.spark.sql.catalyst.plans.logical.LogicalPlan.transformDown(LogicalPlan.scala:29)
at org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$transformDown$1.apply(TreeNode.scala:261)
at org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$transformDown$1.apply(TreeNode.scala:261)
at org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$4.apply(TreeNode.scala:326)
at org.apache.spark.sql.catalyst.trees.TreeNode.mapProductIterator(TreeNode.scala:187)
at org.apache.spark.sql.catalyst.trees.TreeNode.mapChildren(TreeNode.scala:324)
at org.apache.spark.sql.catalyst.trees.TreeNode.transformDown(TreeNode.scala:261)
at org.apache.spark.sql.catalyst.plans.logical.LogicalPlan.org$apache$spark$sql$catalyst$plans$logical$AnalysisHelper$$super$transformDown(LogicalPlan.scala:29)
at org.apache.spark.sql.catalyst.plans.logical.AnalysisHelper$class.transformDown(AnalysisHelper.scala:149)
at org.apache.spark.sql.catalyst.plans.logical.LogicalPlan.transformDown(LogicalPlan.scala:29)
at org.apache.spark.sql.catalyst.plans.logical.LogicalPlan.transformDown(LogicalPlan.scala:29)
at org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$transformDown$1.apply(TreeNode.scala:261)
at org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$transformDown$1.apply(TreeNode.scala:261)
at org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$4.apply(TreeNode.scala:326)
at org.apache.spark.sql.catalyst.trees.TreeNode.mapProductIterator(TreeNode.scala:187)
at org.apache.spark.sql.catalyst.trees.TreeNode.mapChildren(TreeNode.scala:324)
at org.apache.spark.sql.catalyst.trees.TreeNode.transformDown(TreeNode.scala:261)
at org.apache.spark.sql.catalyst.plans.logical.LogicalPlan.org$apache$spark$sql$catalyst$plans$logical$AnalysisHelper$$super$transformDown(LogicalPlan.scala:29)
at org.apache.spark.sql.catalyst.plans.logical.AnalysisHelper$class.transformDown(AnalysisHelper.scala:149)
at org.apache.spark.sql.catalyst.plans.logical.LogicalPlan.transformDown(LogicalPlan.scala:29)
at org.apache.spark.sql.catalyst.plans.logical.LogicalPlan.transformDown(LogicalPlan.scala:29)
at org.apache.spark.sql.catalyst.trees.TreeNode.transform(TreeNode.scala:245)
at org.apache.spark.sql.catalyst.optimizer.PushDownPredicate$.apply(Optimizer.scala:949)
at org.apache.spark.sql.catalyst.optimizer.PushDownPredicate$.apply(Optimizer.scala:948)
at org.apache.spark.sql.catalyst.rules.RuleExecutor$$anonfun$execute$1$$anonfun$apply$1.apply(RuleExecutor.scala:87)
at org.apache.spark.sql.catalyst.rules.RuleExecutor$$anonfun$execute$1$$anonfun$apply$1.apply(RuleExecutor.scala:84)
at scala.collection.LinearSeqOptimized$class.foldLeft(LinearSeqOptimized.scala:124)
at scala.collection.immutable.List.foldLeft(List.scala:84)
at org.apache.spark.sql.catalyst.rules.RuleExecutor$$anonfun$execute$1.apply(RuleExecutor.scala:84)
at org.apache.spark.sql.catalyst.rules.RuleExecutor$$anonfun$execute$1.apply(RuleExecutor.scala:76)
at scala.collection.immutable.List.foreach(List.scala:392)
at org.apache.spark.sql.catalyst.rules.RuleExecutor.execute(RuleExecutor.scala:76)
at org.apache.spark.sql.execution.QueryExecution.optimizedPlan$lzycompute(QueryExecution.scala:66)
at org.apache.spark.sql.execution.QueryExecution.optimizedPlan(QueryExecution.scala:66)
at org.apache.spark.sql.execution.QueryExecution.sparkPlan$lzycompute(QueryExecution.scala:72)
at org.apache.spark.sql.execution.Qu eryExecution.sparkPlan(QueryExecution.scala:68)
at org.apache.spark.sql.execution.QueryExecution.executedPlan$lzycompute(QueryExecution.scala:77)
at org.apache.spark.sql.execution.QueryExecution.executedPlan(QueryExecution.scala:77)
at org.apache.spark.`enter code here`sql.Dataset.withAction(Dataset.scala:3359)
at org.apache.spark.sql.Dataset.<init>(Dataset.scala:194)
at org.apache.spark.sql.Dataset$.ofRows(Dataset.scala:79)
at org.apache.spark.sql.SparkSession.sql(SparkSession.scala:651)
... 49 elided
1条答案
按热度按时间wlp8pajw1#
通过设置以下参数解决:-在将数据插入表中之前设置此参数:-