我有一个配置单元表tweets存储为文本,我正试图写入另一个表tweetsorc,即orc。两者结构相同:
col_name data_type comment
racist boolean from deserializer
contributors string from deserializer
coordinates string from deserializer
created_at string from deserializer
entities struct<hashtags:array<string>,symbols:array<string>,urls:array<struct<display_url:string,expanded_url:string,indices:array<tinyint>,url:string>>,user_mentions:array<string>> from deserializer
favorite_count tinyint from deserializer
favorited boolean from deserializer
filter_level string from deserializer
geo string from deserializer
id bigint from deserializer
id_str string from deserializer
in_reply_to_screen_name string from deserializer
in_reply_to_status_id string from deserializer
in_reply_to_status_id_str string from deserializer
in_reply_to_user_id string from deserializer
in_reply_to_user_id_str string from deserializer
is_quote_status boolean from deserializer
lang string from deserializer
place string from deserializer
possibly_sensitive boolean from deserializer
retweet_count tinyint from deserializer
retweeted boolean from deserializer
source string from deserializer
text string from deserializer
timestamp_ms string from deserializer
truncated boolean from deserializer
user struct<contributors_enabled:boolean,created_at:string,default_profile:boolean,default_profile_image:boolean,description:string,favourites_count:tinyint,follow_request_sent:string,followers_count:tinyint,following:string,friends_count:tinyint,geo_enabled:boolean,id:bigint,id_str:string,is_translator:boolean,lang:string,listed_count:tinyint,location:string,name:string,notifications:string,profile_background_color:string,profile_background_image_url:string,profile_background_image_url_https:string,profile_background_tile:boolean,profile_image_url:string,profile_image_url_https:string,profile_link_color:string,profile_sidebar_border_color:string,profile_sidebar_fill_color:string,profile_text_color:string,profile_use_background_image:boolean,protected:boolean,screen_name:string,statuses_count:smallint,time_zone:string,url:string,utc_offset:string,verified:boolean> from deserializer
当我尝试从tweets插入tweetsorc时,我得到:
INSERT OVERWRITE TABLE tweetsORC SELECT * FROM tweets;
FAILED: NoMatchingMethodException No matching method for class org.apache.hadoop.hive.ql.udf.UDFToString with (struct<hashtags:array<string>,symbols:array<string>,urls:array<struct<display_url:string,expanded_url:string,indices:array<tinyint>,url:string>>,user_mentions:array<string>>). Possible choices: _FUNC_(bigint) _FUNC_(binary) _FUNC_(boolean) _FUNC_(date) _FUNC_(decimal(38,18)) _FUNC_(double) _FUNC_(float) _FUNC_(int) _FUNC_(smallint) _FUNC_(string) _FUNC_(timestamp) _FUNC_(tinyint) _FUNC_(void)
我在这类问题上找到的唯一帮助是让一个udf使用基元类型,但我没有使用udf!非常感谢您的帮助!
仅供参考:配置单元版本:
配置单元1.2.1000.2.4.2.0-258 subversiongit://u12-slave-5708dfcd-10/grid/0/jenkins/workspace/hdp-build-ubuntu12/bigtop/output/hive/hive-1.2.1000.2.4.2.0 -r 240760457150036e13035cbb82bcda0c65362f3a型
编辑:创建表和示例数据:
create table tweets (
contributors string,
coordinates string,
created_at string,
entities struct <
hashtags: array <string>,
symbols: array <string>,
urls: array <struct <
display_url: string,
expanded_url: string,
indices: array <tinyint>,
url: string>>,
user_mentions: array <string>>,
favorite_count tinyint,
favorited boolean,
filter_level string,
geo string,
id bigint,
id_str string,
in_reply_to_screen_name string,
in_reply_to_status_id string,
in_reply_to_status_id_str string,
in_reply_to_user_id string,
in_reply_to_user_id_str string,
is_quote_status boolean,
lang string,
place string,
possibly_sensitive boolean,
retweet_count tinyint,
retweeted boolean,
source string,
text string,
timestamp_ms string,
truncated boolean,
`user` struct <
contributors_enabled: boolean,
created_at: string,
default_profile: boolean,
default_profile_image: boolean,
description: string,
favourites_count: tinyint,
follow_request_sent: string,
followers_count: tinyint,
`following`: string,
friends_count: tinyint,
geo_enabled: boolean,
id: bigint,
id_str: string,
is_translator: boolean,
lang: string,
listed_count: tinyint,
location: string,
name: string,
notifications: string,
profile_background_color: string,
profile_background_image_url: string,
profile_background_image_url_https: string,
profile_background_tile: boolean,
profile_image_url: string,
profile_image_url_https: string,
profile_link_color: string,
profile_sidebar_border_color: string,
profile_sidebar_fill_color: string,
profile_text_color: string,
profile_use_background_image: boolean,
protected: boolean,
screen_name: string,
statuses_count: smallint,
time_zone: string,
url: string,
utc_offset: string,
verified: boolean>
)
ROW FORMAT SERDE 'org.openx.data.jsonserde.JsonSerDe'
STORED AS TEXTFILE;
LOAD DATA LOCAL INPATH '/home/ed/Downloads/hive-json-master/1abbo.txt' OVERWRITE INTO TABLE tweets;
create table tweetsORC (
racist boolean,
contributors string,
coordinates string,
created_at string,
entities struct <
hashtags: array <string>,
symbols: array <string>,
urls: array <struct <
display_url: string,
expanded_url: string,
indices: array <tinyint>,
url: string>>,
user_mentions: array <string>>,
favorite_count tinyint,
favorited boolean,
filter_level string,
geo string,
id bigint,
id_str string,
in_reply_to_screen_name string,
in_reply_to_status_id string,
in_reply_to_status_id_str string,
in_reply_to_user_id string,
in_reply_to_user_id_str string,
is_quote_status boolean,
lang string,
place string,
possibly_sensitive boolean,
retweet_count tinyint,
retweeted boolean,
source string,
text string,
timestamp_ms string,
truncated boolean,
`user` struct <
contributors_enabled: boolean,
created_at: string,
default_profile: boolean,
default_profile_image: boolean,
description: string,
favourites_count: tinyint,
follow_request_sent: string,
followers_count: tinyint,
`following`: string,
friends_count: tinyint,
geo_enabled: boolean,
id: bigint,
id_str: string,
is_translator: boolean,
lang: string,
listed_count: tinyint,
location: string,
name: string,
notifications: string,
profile_background_color: string,
profile_background_image_url: string,
profile_background_image_url_https: string,
profile_background_tile: boolean,
profile_image_url: string,
profile_image_url_https: string,
profile_link_color: string,
profile_sidebar_border_color: string,
profile_sidebar_fill_color: string,
profile_text_color: string,
profile_use_background_image: boolean,
protected: boolean,
screen_name: string,
statuses_count: smallint,
time_zone: string,
url: string,
utc_offset: string,
verified: boolean>
)
ROW FORMAT SERDE 'org.openx.data.jsonserde.JsonSerDe'
STORED AS ORC tblproperties ("orc.compress"="ZLIB");
数据在这里。
2条答案
按热度按时间zsbz8rwp1#
我没有使用select*而是按名称列出字段,然后错误就消失了。
brjng4g32#
数据类型不匹配:要插入的数据类型与相应数据表中的字段类型不一致。例如,如果创建表时声明的字段类型是string,但插入的字段类型确实是list类型,则会抛出此错误。