将带有日期字段的两个表合并到一个表中,而不重叠日期

snz8szmq  于 2021-07-26  发布在  Java
关注(0)|答案(1)|浏览(325)

我们从产品的遗留应用程序接收数据,表的数据如下所示

Table 1  (KEY1, ID,Valid_From, Valid_To, IS_LATEST_FLAG)

'1',    'abc',  '3/2/2020 12:00:00 AM', '3/3/2020 6:01:12 PM',   'N'
'2',    'abc',  '3/3/2020  6:01:13 PM', '3/3/2020 6:01:14 PM',   'N'
'3',    'abc',  '3/3/2020  6:01:15 PM', '3/3/2020 6:01:15 PM',   'N'
'4',    'abc',  '3/3/2020  6:01:16 PM', '3/3/2020 6:01:51 PM',   'N'
'5',    'abc',  '3/3/2020  6:01:52 PM', '3/4/2020 4:28:59 PM',   'N'
'6',    'abc',  '3/4/2020  4:29:00 PM', '3/4/2020 4:31:40 PM',   'N'
'7',    'abc',  '3/4/2020  4:31:41 PM', '12/31/4712',            'Y'
'8',    'bbc',  '3/19/2020 2:47:08 PM', '3/19/2020 2:50:36 PM',  'N'
'9',    'bbc',  '3/19/2020 2:50:37 PM', '3/19/2020 2:56:23 PM',  'N'
'10',   'bbc',  '3/19/2020 2:56:24 PM', '12/31/4712',            'Y'

Table 2  (KEY2, ID,Valid_From, Valid_To, IS_LATEST_FLAG)
'1',    'abc',  '3/3/2020 12:00:00 AM', '3/3/2020 6:01:13 PM', 'N'
'2',    'abc',  '3/3/2020 6:01:14  PM'  '3/3/2020 6:01:14 PM', 'N'
'3',    'abc',  '3/3/2020 6:01:15  PM'  '3/4/2020 4:28:59 PM', 'N'
'4',    'abc',  '3/4/2020 4:29:00  PM'  '12/31/4712',          'Y'
'5',    'bbc',  '3/4/2020 4:31:41  PM'  '3/19/2020 2:47:07 PM','N'
'6',    'bbc',  '3/19/2020 2:47:08 PM'  '3/19/2020 2:50:36 PM','N'
'7',    'bbc',  '3/19/2020 2:50:37 PM'  '3/20/2020 2:56:23 PM','N'
'8',    'bbc',  '3/20/2020 2:56:24 PM'  '12/31/4712',          'Y'

我们需要为两个表创建一个超集,其中包含两个表中所有不同的有效起始日期,并且记录结束日期将自动比下一个记录短1秒,这样就不会有重叠的日期或间隔。最具挑战性的部分是为这个时间段找到合适的关键点。
输出表如下所示。我试过联合,但效果不尽如人意。看看下面是如何实现的。

Table 2  (KEY1,KEY2, ID,Valid_From, Valid_To, IS_LATEST_FLAG)

'1','NULL', 'abc', '3/2/2020 12:00:00 AM',  '3/2/2020 11:59:59 PM',   'N'
'1','1',    'abc', '3/3/2020 12:00:00 AM'   '3/2/2020 6:01:12 PM',    'N' 
'2','1',    'abc', '3/3/2020 6:01:13  PM'   '3/3/2020 6:01:13 PM',    'N'
'2','2',    'abc', '3/3/2020 6:01:14  PM'   '3/3/2020 6:01:14 PM',    'N'
'3','3',    'abc', '3/3/2020 6:01:15  PM'   '3/3/2020 6:01:15 PM',    'N'
'4','3',    'abc', '3/3/2020 6:01:16  PM'   '3/3/2020 6:01:51 PM',    'N'
'5','3',    'abc', '3/3/2020  6:01:52 PM',  '3/4/2020 4:28:59 PM',    'N'
'6','4',    'abc', '3/4/2020  4:29:00 PM',  '3/4/2020 4:31:40 PM',    'N'
'7','4',    'abc', '3/4/2020  4:31:41 PM',  '12/31/4712',             'Y'
'NULL','5', 'bbc', '3/4/2020 4:31:41  PM'   '3/19/2020 2:47:07 PM',   'N'
'8','6',    'bbc',  '3/19/2020 2:47:08 PM'  '3/19/2020 2:50:36 PM',   'N'
'9','7',    'bbc',  '3/19/2020 2:50:37 PM'  '3/20/2020 2:56:23 PM',   'N'
'10','8',   'bbc',  '3/20/2020 2:56:24 PM'  '12/31/4712',             'Y'
z9gpfhce

z9gpfhce1#

看起来这就是你需要的:

with tabs as (
   -- combining both tables: key2 is null for rows from table1 and key1 is null for rows from table2
   select key1,null as key2,ID,Valid_From, Valid_To, IS_LATEST_FLAG from table_1
   union all
   select null,key2,ID,Valid_From, Valid_To, IS_LATEST_FLAG from table_2
)
,time_points as (
   -- using UNPIVOT to get all time points from all intervals from both tables
   select distinct
      id,
      --pt_type,IS_LATEST_FLAG,
      case when pt_type='VALID_FROM' then pt else pt + interval '1' second end as pit
   from (select distinct id,Valid_From, Valid_To,IS_LATEST_FLAG from tabs)
   unpivot (
     pt for pt_type in (Valid_From, Valid_To)
   )
)
,intervals as (
   -- combining time points into intervals:
   select 
     id,row_number()over(partition by id order by pit) rnk
    ,pit as Valid_From
    ,lead(pit)over(partition by id order by pit)-interval '1'second as Valid_To
   from time_points
)
-- main query:
-- simply joining table1 and table2 to our intervals:
select
   i.id,
   i.rnk,
   i.valid_from,
   i.valid_to,
   t1.key1,
   t2.key2,
   least(nvl(t1.is_latest_flag,'N'),nvl(t2.is_latest_flag,'N')) as is_latest_flag
from intervals i
     left join table_1 t1
          on  i.id = t1.id
          and i.valid_from between t1.valid_from and t1.valid_to
     left join table_2 t2
          on  i.id = t2.id
          and i.valid_from between t2.valid_from and t2.valid_to
where i.valid_to is not null
order by i.id,i.valid_from,i.valid_to;

完整示例和示例数据:

with Table_1 (KEY1, ID,Valid_From, Valid_To, IS_LATEST_FLAG) as (
   select '1',    'abc',  to_date('3/2/2020 12:00:00 AM','mm/dd/yyyy hh:mi:ss AM'), to_date('3/3/2020 6:01:12 PM', 'mm/dd/yyyy hh:mi:ss AM'),  'N' from dual union all
   select '2',    'abc',  to_date('3/3/2020  6:01:13 PM','mm/dd/yyyy hh:mi:ss AM'), to_date('3/3/2020 6:01:14 PM', 'mm/dd/yyyy hh:mi:ss AM'),  'N' from dual union all
   select '3',    'abc',  to_date('3/3/2020  6:01:15 PM','mm/dd/yyyy hh:mi:ss AM'), to_date('3/3/2020 6:01:15 PM', 'mm/dd/yyyy hh:mi:ss AM'),  'N' from dual union all
   select '4',    'abc',  to_date('3/3/2020  6:01:16 PM','mm/dd/yyyy hh:mi:ss AM'), to_date('3/3/2020 6:01:51 PM', 'mm/dd/yyyy hh:mi:ss AM'),  'N' from dual union all
   select '5',    'abc',  to_date('3/3/2020  6:01:52 PM','mm/dd/yyyy hh:mi:ss AM'), to_date('3/4/2020 4:28:59 PM', 'mm/dd/yyyy hh:mi:ss AM'),  'N' from dual union all
   select '6',    'abc',  to_date('3/4/2020  4:29:00 PM','mm/dd/yyyy hh:mi:ss AM'), to_date('3/4/2020 4:31:40 PM', 'mm/dd/yyyy hh:mi:ss AM'),  'N' from dual union all
   select '7',    'abc',  to_date('3/4/2020  4:31:41 PM','mm/dd/yyyy hh:mi:ss AM'), to_date('12/31/4712',          'mm/dd/yyyy hh:mi:ss AM'),  'Y' from dual union all
   select '8',    'bbc',  to_date('3/19/2020 2:47:08 PM','mm/dd/yyyy hh:mi:ss AM'), to_date('3/19/2020 2:50:36 PM','mm/dd/yyyy hh:mi:ss AM'),  'N' from dual union all
   select '9',    'bbc',  to_date('3/19/2020 2:50:37 PM','mm/dd/yyyy hh:mi:ss AM'), to_date('3/19/2020 2:56:23 PM','mm/dd/yyyy hh:mi:ss AM'),  'N' from dual union all
   select '10',   'bbc',  to_date('3/19/2020 2:56:24 PM','mm/dd/yyyy hh:mi:ss AM'), to_date('12/31/4712',          'mm/dd/yyyy hh:mi:ss AM'),  'Y' from dual 
)
,Table_2  (KEY2, ID,Valid_From, Valid_To, IS_LATEST_FLAG) as (
   select '1',    'abc',  to_date('3/3/2020 12:00:00 AM','mm/dd/yyyy hh:mi:ss AM'), to_date('3/3/2020 6:01:13 PM', 'mm/dd/yyyy hh:mi:ss AM'),'N' from dual union all
   select '2',    'abc',  to_date('3/3/2020 6:01:14  PM','mm/dd/yyyy hh:mi:ss AM'), to_date('3/3/2020 6:01:14 PM', 'mm/dd/yyyy hh:mi:ss AM'),'N' from dual union all
   select '3',    'abc',  to_date('3/3/2020 6:01:15  PM','mm/dd/yyyy hh:mi:ss AM'), to_date('3/4/2020 4:28:59 PM', 'mm/dd/yyyy hh:mi:ss AM'),'N' from dual union all
   select '4',    'abc',  to_date('3/4/2020 4:29:00  PM','mm/dd/yyyy hh:mi:ss AM'), to_date('12/31/4712',          'mm/dd/yyyy hh:mi:ss AM'),'Y' from dual union all
   select '5',    'bbc',  to_date('3/4/2020 4:31:41  PM','mm/dd/yyyy hh:mi:ss AM'), to_date('3/19/2020 2:47:07 PM','mm/dd/yyyy hh:mi:ss AM'),'N' from dual union all
   select '6',    'bbc',  to_date('3/19/2020 2:47:08 PM','mm/dd/yyyy hh:mi:ss AM'), to_date('3/19/2020 2:50:36 PM','mm/dd/yyyy hh:mi:ss AM'),'N' from dual union all
   select '7',    'bbc',  to_date('3/19/2020 2:50:37 PM','mm/dd/yyyy hh:mi:ss AM'), to_date('3/20/2020 2:56:23 PM','mm/dd/yyyy hh:mi:ss AM'),'N' from dual union all
   select '8',    'bbc',  to_date('3/20/2020 2:56:24 PM','mm/dd/yyyy hh:mi:ss AM'), to_date('12/31/4712',          'mm/dd/yyyy hh:mi:ss AM'),'Y' from dual 
)
,tabs as (
   -- combining both tables: key2 is null for rows from table1 and key1 is null for rows from table2
   select key1,null as key2,ID,Valid_From, Valid_To, IS_LATEST_FLAG from table_1
   union all
   select null,key2,ID,Valid_From, Valid_To, IS_LATEST_FLAG from table_2
)
,time_points as (
   -- using UNPIVOT to get all time points from all intervals from both tables
   select distinct
  id,
  --pt_type,IS_LATEST_FLAG,
  case when pt_type='VALID_FROM' then pt else pt + interval '1' second end as pit
   from (select distinct id,Valid_From, Valid_To,IS_LATEST_FLAG from tabs)
   unpivot (
 pt for pt_type in (Valid_From, Valid_To)
   )
)
,intervals as (
   -- combining time points into intervals:
   select 
 id,row_number()over(partition by id order by pit) rnk
,pit as Valid_From
,lead(pit)over(partition by id order by pit)-interval '1'second as Valid_To
   from time_points
)
-- main query:
-- simply joining table1 and table2 to our intervals:
select
   i.id,
   i.rnk,
   i.valid_from,
   i.valid_to,
   t1.key1,
   t2.key2,
   least(nvl(t1.is_latest_flag,'N'),nvl(t2.is_latest_flag,'N')) as is_latest_flag
from intervals i
 left join table_1 t1
      on  i.id = t1.id
      and i.valid_from between t1.valid_from and t1.valid_to
 left join table_2 t2
      on  i.id = t2.id
      and i.valid_from between t2.valid_from and t2.valid_to
where i.valid_to is not null
order by i.id,i.valid_from,i.valid_to;

结果:

ID         RNK VALID_FROM          VALID_TO            KEY1  KEY2  IS_LATEST_FL
--- ---------- ------------------- ------------------- ----- ----- ------------
abc          1 2020-03-02 00:00:00 2020-03-02 23:59:59 1           N
abc          2 2020-03-03 00:00:00 2020-03-03 18:01:12 1     1     N
abc          3 2020-03-03 18:01:13 2020-03-03 18:01:13 2     1     N
abc          4 2020-03-03 18:01:14 2020-03-03 18:01:14 2     2     N
abc          5 2020-03-03 18:01:15 2020-03-03 18:01:15 3     3     N
abc          6 2020-03-03 18:01:16 2020-03-03 18:01:51 4     3     N
abc          7 2020-03-03 18:01:52 2020-03-04 16:28:59 5     3     N
abc          8 2020-03-04 16:29:00 2020-03-04 16:31:40 6     4     N
abc          9 2020-03-04 16:31:41 4712-12-31 00:00:00 7     4     Y
bbc          1 2020-03-04 16:31:41 2020-03-19 14:47:07       5     N
bbc          2 2020-03-19 14:47:08 2020-03-19 14:50:36 8     6     N
bbc          3 2020-03-19 14:50:37 2020-03-19 14:56:23 9     7     N
bbc          4 2020-03-19 14:56:24 2020-03-20 14:56:23 10    7     N
bbc          5 2020-03-20 14:56:24 4712-12-31 00:00:00 10    8     Y

14 rows selected.

相关问题