create database retail_db;
use retail_db;
create external table categories(
category_id int,
category_department_id int,
category_name string)
row format delimited
fields terminated by ','
stored as textfile
location '/user/cloudera/sqoop/import-all-tables-text/categories';
create external table customers(
customer_id int,
customer_fname string,
customer_lname string,
customer_email string,
customer_password string,
customer_street string,
customer_city string,
customer_state string,
customer_zipcode string)
row format delimited
fields terminated by ','
stored as textfile
location '/user/cloudera/sqoop/import-all-tables-text/customers';
create external table departments(
department_id int,
department_name string)
row format delimited
fields terminated by ','
stored as textfile
location '/user/cloudera/sqoop/import-all-tables-text/departments';
create external table order_items(
order_item_id int,
order_item_order_id int,
order_item_product_id int,
order_item_quantity int,
order_item_subtotal float,
order_item_product_price float)
row format delimited
fields terminated by ','
stored as textfile
location '/user/cloudera/sqoop/import-all-tables-text/order_items';
create external table orders(
order_id int,
order_date string,
order_customer_id int,
order_status string)
row format delimited
fields terminated by ','
stored as textfile
location '/user/cloudera/sqoop/import-all-tables-text/orders';
create external table products(
product_id int,
product_category_id int,
product_name string,
product_description string,
product_price float,
product_image string)
row format delimited
fields terminated by ','
stored as textfile
location '/user/cloudera/sqoop/import-all-tables-text/products';
步骤3:执行连接查询
SET hive.cli.print.current.db=true;
select o.order_date, sum(oi.order_item_subtotal)
from orders o join order_items oi on (o.order_id = oi.order_item_order_id)
group by o.order_date
limit 10;
以上查询给出了以下问题: query id=cloudera\u 20171029182323\u 6eedd682-256b-466c-b2e5-58ea100715fb total jobs=1失败:执行错误,从org.apache.hadoop.hive.ql.exec.mr.mapredlocaltask返回代码1 步骤4:通过在配置单元提示符下执行以下语句解决了上述问题:
SET hive.auto.convert.join=false;
步骤5:查询结果
select o.order_date, sum(oi.order_item_subtotal)
from orders o join order_items oi on (o.order_id = oi.order_item_order_id)
group by o.order_date
limit 10;
Query ID = cloudera_20171029182525_cfc70553-89d2-4c61-8a14-4bbeecadb3cf
Total jobs = 2
Launching Job 1 out of 2
Number of reduce tasks not specified. Estimated from input data size: 1
In order to change the average load for a reducer (in bytes):
set hive.exec.reducers.bytes.per.reducer=<number>
In order to limit the maximum number of reducers:
set hive.exec.reducers.max=<number>
In order to set a constant number of reducers:
set mapreduce.job.reduces=<number>
Starting Job = job_1509278183296_0005, Tracking URL = http://quickstart.cloudera:8088/proxy/application_1509278183296_0005/
Kill Command = /usr/lib/hadoop/bin/hadoop job -kill job_1509278183296_0005
Hadoop job information for Stage-1: number of mappers: 2; number of reducers: 1
2017-10-29 18:25:19,861 Stage-1 map = 0%, reduce = 0%
2017-10-29 18:25:26,181 Stage-1 map = 50%, reduce = 0%, Cumulative CPU 2.72 sec
2017-10-29 18:25:27,240 Stage-1 map = 100%, reduce = 0%, Cumulative CPU 5.42 sec
2017-10-29 18:25:32,479 Stage-1 map = 100%, reduce = 100%, Cumulative CPU 8.01 sec
MapReduce Total cumulative CPU time: 8 seconds 10 msec
Ended Job = job_1509278183296_0005
Launching Job 2 out of 2
Number of reduce tasks not specified. Estimated from input data size: 1
In order to change the average load for a reducer (in bytes):
set hive.exec.reducers.bytes.per.reducer=<number>
In order to limit the maximum number of reducers:
set hive.exec.reducers.max=<number>
In order to set a constant number of reducers:
set mapreduce.job.reduces=<number>
Starting Job = job_1509278183296_0006, Tracking URL = http://quickstart.cloudera:8088/proxy/application_1509278183296_0006/
Kill Command = /usr/lib/hadoop/bin/hadoop job -kill job_1509278183296_0006
Hadoop job information for Stage-2: number of mappers: 1; number of reducers: 1
2017-10-29 18:25:38,676 Stage-2 map = 0%, reduce = 0%
2017-10-29 18:25:43,925 Stage-2 map = 100%, reduce = 0%, Cumulative CPU 0.85 sec
2017-10-29 18:25:49,142 Stage-2 map = 100%, reduce = 100%, Cumulative CPU 2.13 sec
MapReduce Total cumulative CPU time: 2 seconds 130 msec
Ended Job = job_1509278183296_0006
MapReduce Jobs Launched:
Stage-Stage-1: Map: 2 Reduce: 1 Cumulative CPU: 8.01 sec HDFS Read: 8422614 HDFS Write: 17364 SUCCESS
Stage-Stage-2: Map: 1 Reduce: 1 Cumulative CPU: 2.13 sec HDFS Read: 22571 HDFS Write: 407 SUCCESS
Total MapReduce CPU Time Spent: 10 seconds 140 msec
OK
2013-07-25 00:00:00.0 68153.83132743835
2013-07-26 00:00:00.0 136520.17266082764
2013-07-27 00:00:00.0 101074.34193611145
2013-07-28 00:00:00.0 87123.08192253113
2013-07-29 00:00:00.0 137287.09244918823
2013-07-30 00:00:00.0 102745.62186431885
2013-07-31 00:00:00.0 131878.06256484985
2013-08-01 00:00:00.0 129001.62241744995
2013-08-02 00:00:00.0 109347.00200462341
2013-08-03 00:00:00.0 95266.89186286926
Time taken: 35.721 seconds, Fetched: 10 row(s)
6条答案
按热度按时间eni9jsuy1#
在我的例子中,问题是没有设置队列,所以我做了以下工作:
**set mapred.job.queue.name=**队列名称
这解决了我的问题。希望这对某人有帮助。
3j86kqsm2#
在运行我的查询之前,我刚刚添加了以下内容。
uurity8g3#
在我的例子中,添加参数
configuration
为了execute
会解决这个问题。这个问题是由写访问冲突引起的。你应该使用configuration
以确保您具有写入权限。bfhwhh0e4#
我在cloudera quick start vm-5.12上也遇到了这个问题,通过在hive prompt上执行以下语句解决了这个问题:
我希望以下信息更有用:
第一步:从mysql零售数据库导入所有表
步骤2:在配置单元中创建名为retail\u db的数据库和所需的表
步骤3:执行连接查询
以上查询给出了以下问题:
query id=cloudera\u 20171029182323\u 6eedd682-256b-466c-b2e5-58ea100715fb total jobs=1失败:执行错误,从org.apache.hadoop.hive.ql.exec.mr.mapredlocaltask返回代码1
步骤4:通过在配置单元提示符下执行以下语句解决了上述问题:
步骤5:查询结果
plupiseo5#
尝试在连接上设置authmech参数
我已经将其设置为2并定义了用户名
解决了我在CTA上的问题
你好,奥肯
szqfcxe26#
只需将此命令置于查询之前:
这绝对管用!