如何在hive中实现数据范围的左连接

jdgnovmf  于 2021-05-29  发布在  Hadoop
关注(0)|答案(3)|浏览(424)

我想把下面的oracle逻辑转换成hive。逻辑:

Select a.id,a.name,b.desc from table a left join table b on
a.num between b.min_num and b.max_num;

有谁能帮我在Hive里实现上述逻辑吗。

rsaldnfx

rsaldnfx1#

select  a.id
       ,a.name
       ,b.desc

from                table_a as a 

        left join  (select  b.min_num + pe.pos  as num
                           ,b.desc

                    from    table_b as b
                                lateral view 
                                    posexplode(split(space(b.max_num-b.min_num),' ')) pe
                    ) b                    

        on          b.num   =
                    a.num
;
l3zydbqr

l3zydbqr2#

使用此解决方案,您可以控制性能。 b 范围正在被拆分为子范围,如您所愿( x ).
太大了 x 实际上会引起交叉连接。
太小了 x 可能会从 b ( x =1将生成所有 b 范围值)。

set hivevar:x=100;

select  a.id
       ,a.name
       ,b.desc

from        table_a as a 

        left join  

            (select  a.id
                   ,b.desc

            from        table_a as a

                    inner join 

                       (select   b.min_num div ${hivevar:x} + pe.pos    as sub_range_id
                                ,b.*

                        from        table_b as b
                                lateral view 
                                    posexplode(split(space(cast (b.max_num div ${hivevar:x} - b.min_num div ${hivevar:x} as int)),' ')) pe
                        ) as b

                    on          a.num div ${hivevar:x}  =
                                b.sub_range_id

            where   a.num between b.min_num and b.max_num
            ) b                    

        on          b.id   =
                    a.id
;
6yjfywim

6yjfywim3#

select  a.id
       ,a.name
       ,b.desc

from                table_a as a 

        left join  (select  a.id
                           ,b.desc

                    from                table_a as a
                            cross join  table_b as b

                    where   a.num between b.min_num and b.max_num
                    ) b                    

        on          b.id   =
                    a.id
;

相关问题