A = LOAD 'dataA' USING PigStorage('\t') AS (aid:long, atuple : tuple(af1:long, af2:long));
B = LOAD 'dataB' USING PigStorage('\t') AS (bid:long, btuple : tuple(bf1:chararray, bf2:chararray, bf3:chararray, bf4:chararray));
C = JOIN A BY aid, B BY bid;
D = FOREACH C GENERATE aid AS id, FLATTEN(atuple) AS (af1:long, af2:long) , FLATTEN(btuple) AS (bf1:chararray, bf2:chararray, bf3:chararray, bf4:chararray);
E = FOREACH D GENERATE id, (af1..bf4);
DUMP E;
1条答案
按热度按时间rbl8hiat1#
一个值得深思的想法。。。
输入:
数据A:
数据库:
Pig脚本:
输出:转储e: