我正在尝试使用hivejson-serde将twitterjson放入配置单元表中。我首先将json导入行格式serde定义的一个表中,然后将它导入另一个存储为rcfile的表中。它在某种程度上起作用,但是我得到了一个classcastexception,其性质如下:
java.lang.RuntimeException: org.apache.hadoop.hive.ql.metadata.HiveException: Hive Runtime Error while processing row [Error getting row data with exception java.lang.ClassCastException: java.lang.Integer cannot be cast to java.lang.Double
at org.apache.hadoop.hive.serde2.objectinspector.primitive.JavaDoubleObjectInspector.get(JavaDoubleObjectInspector.java:40)
at org.apache.hadoop.hive.serde2.SerDeUtils.buildJSONString(SerDeUtils.java:259)
at org.apache.hadoop.hive.serde2.SerDeUtils.buildJSONString(SerDeUtils.java:307)
at org.apache.hadoop.hive.serde2.SerDeUtils.buildJSONString(SerDeUtils.java:354)
at org.apache.hadoop.hive.serde2.SerDeUtils.buildJSONString(SerDeUtils.java:354)
at org.apache.hadoop.hive.serde2.SerDeUtils.buildJSONString(SerDeUtils.java:354)
at org.apache.hadoop.hive.serde2.SerDeUtils.getJSONString(SerDeUtils.java:220)
at org.apache.hadoop.hive.ql.exec.MapOperator.process(MapOperator.java:667)
at org.apache.hadoop.hive.ql.exec.ExecMapper.map(ExecMapper.java:141)
at org.apache.hadoop
下面是我用来定义serde表的模式:
CREATE EXTERNAL TABLE gh_raw (
coordinates struct <
coordinates: array <double>,
type: string>,
created_at string,
entities struct <
hashtags: array <struct <text: string>>,
media: array <struct <
display_url: string,
expanded_url: string,
media_url: string,
media_url_https: string,
sizes: struct <
large: struct <
h: int,
resize: string,
w: int>,
medium: struct <
h: int,
resize: string,
w: int>,
small: struct <
h: int,
resize: string,
w: int>,
thumb: struct <
h: int,
resize: string,
w: int>>,
type: string,
url: string>>,
urls: array <struct <
display_url: string,
expanded_url: string,
url: string>>,
user_mentions: array <struct <
id: int,
name: string,
screen_name: string>>>,
geo struct <
coordinates: array <double>,
type: string>,
id_str string,
in_reply_to_screen_name string,
in_reply_to_status_id_str string,
in_reply_to_user_id_str string,
place struct <
attributes: struct <
locality: string,
region: string,
street_address: string>,
bounding_box: struct <
coordinates: array <array <array <double>>>,
type: string>,
country: string,
country_code: string,
full_name: string,
name: string,
place_type: string,
url: string>,
possibly_sensitive boolean,
retweeted_status struct <
coordinates: struct <
coordinates: array <double>,
type: string>,
created_at: string,
entities: struct <
hashtags: array <struct <
text: string>>,
media: array <struct <
display_url: string,
expanded_url: string,
media_url: string,
media_url_https: string,
sizes: struct <
large: struct <
h: int,
resize: string,
w: int>,
medium: struct <
h: int,
resize: string,
w: int>,
small: struct <
h: int,
resize: string,
w: int>,
thumb: struct <
h: int,
resize: string,
w: int>>,
type: string,
url: string>>,
urls: array <struct <
display_url: string,
expanded_url: string,
url: string>>,
user_mentions: array <struct <
id: int,
name: string,
screen_name: string>>>,
favorited: boolean,
geo: struct <
coordinates: array <double>,
type: string>,
id_str: string,
in_reply_to_screen_name: string,
in_reply_to_status_id_str: string,
in_reply_to_user_id_str: string,
place: struct <
attributes: struct <
locality: string,
region: string,
street_address: string
>,
bounding_box: struct <
coordinates: array <array <array <double>>>,
type: string>,
country: string,
country_code: string,
full_name: string,
name: string,
place_type: string,
url: string>,
possibly_sensitive: boolean,
scopes: struct <
followers: boolean>,
source: string,
text: string,
truncated: boolean,
user: struct <
contributors_enabled: boolean,
created_at: string,
default_profile: boolean,
default_profile_image: boolean,
description: string,
favourites_count: int,
followers_count: int,
friends_count: int,
geo_enabled: boolean,
id: int,
id_str: string,
is_translator: boolean,
lang: string,
listed_count: int,
`location`: string,
name: string,
profile_background_color: string,
profile_background_image_url: string,
profile_background_image_url_https: string,
profile_background_tile: boolean,
profile_banner_url: string,
profile_image_url: string,
profile_image_url_https: string,
profile_link_color: string,
profile_sidebar_border_color: string,
profile_sidebar_fill_color: string,
profile_text_color: string,
profile_use_background_image: boolean,
protected: boolean,
screen_name: string,
statuses_count: int,
time_zone: string,
url: string,
utc_offset: int,
verified: boolean>>,
source string,
text string,
truncated boolean,
user struct <
contributors_enabled: boolean,
created_at: string,
default_profile: boolean,
default_profile_image: boolean,
description: string,
favourites_count: int,
followers_count: int,
friends_count: int,
geo_enabled: boolean,
id: int,
id_str: string,
is_translator: boolean,
lang: string,
listed_count: int,
`location`: string,
name: string,
profile_background_color: string,
profile_background_image_url: string,
profile_background_image_url_https: string,
profile_background_tile: boolean,
profile_banner_url: string,
profile_image_url: string,
profile_image_url_https: string,
profile_link_color: string,
profile_sidebar_border_color: string,
profile_sidebar_fill_color: string,
profile_text_color: string,
profile_use_background_image: boolean,
protected: boolean,
screen_name: string,
statuses_count: int,
time_zone: string,
url: string,
utc_offset: int,
verified: boolean>
)
ROW FORMAT SERDE 'org.openx.data.jsonserde.JsonSerDe'
LOCATION '/user/ahanna/gh_raw';
我想这是崩溃时,找到一组坐标或边界框。
我想这是我使用的json serde的一个bug,但我不确定。我从一个说他们已经解决了这个问题的人那里,从零开始编译了我正在使用的一个:https://github.com/brndnmtthws/hive-json-serde
2条答案
按热度按时间5cnsuln71#
试试这个-https://github.com/rcongiu/hive-json-serde . 我在尝试从tweet读取坐标时也遇到了同样的异常。用这个给我修好了!
二进制文件在这里可用,所以你不需要构建它-http://www.congiu.net/hive-json-serde/
djmepvbi2#
试试bigint而不是int。它对我有用。