SQL Server covered days with multiple contracts

hrirmatl  于 2023-05-16  发布在  其他
关注(0)|答案(2)|浏览(170)

I wrote a query which calculated the difference in days between the startdate of a contract and the enddate , given that no new contract was concluded.
In some cases, however, there is a new contract that directly follows the old one. Therefore, there is no gap in between.
My query is wrong in exactly these cases.

As an example here is the problem with ID 1
| id | year | startdate | enddate | drop_out | covered_days |
| ------------ | ------------ | ------------ | ------------ | ------------ | ------------ |
| 1 | 2015 | 2015-03-01 | 2020-12-31 | (null) | (null) |
| 1 | 2016 | 2015-03-01 | 2020-12-31 | (null) | (null) |
| 1 | 2017 | 2015-03-01 | 2020-12-31 | (null) | (null) |
| 1 | 2018 | 2015-03-01 | 2020-12-31 | (null) | (null) |
| 1 | 2019 | 2015-03-01 | 2020-12-31 | (null) | (null) |
| 1 | 2020 | 2015-03-01 | 2020-12-31 | (null) | (null) |
| 1 | 2021 | 2021-01-01 | 2021-04-20 | 2021-04-20 | 109 |

Because there is no gap between the enddate of the first contract and the startdate of the second contract the calculation should be

SELECT DATEDIFF(DAY, '2015-03-01', '2021-04-20') AS covered_days

instead of

SELECT DATEDIFF(DAY, '2021-01-01', '2021-04-20') AS covered_days

Query:

WITH t AS (
  SELECT 
    id, 
    enddate, 
    startdate,
    LEAD(startdate) OVER (PARTITION BY id ORDER BY startdate) AS next_startdate
  FROM 
    tab2
),
t_max AS (
  SELECT 
    id, 
    MAX(enddate) AS max_enddate 
  FROM 
    tab2 
  GROUP BY 
    id
)
SELECT 
  tab1.id,
  YEAR(CAST(DATEADD(YEAR, CAST(tab1.year AS INT) - 1900, '19000101') AS DATE)) AS year,
  tab2.startdate,
  tab2.enddate,
  CASE 
    WHEN DATEDIFF(day, tab2.enddate, t.next_startdate) > 1 
       AND tab2.enddate <> '2222-01-01' 
       AND YEAR(tab2.enddate) = YEAR(CAST(DATEADD(YEAR, CAST(tab1.year AS INT) - 1900, '19000101') AS DATE)) THEN tab2.enddate
    WHEN tab2.enddate = t_max.max_enddate 
       AND tab2.enddate <> '2222-01-01' 
       AND YEAR(tab2.enddate) = YEAR(CAST(DATEADD(YEAR, CAST(tab1.year AS INT) - 1900, '19000101') AS DATE)) THEN t_max.max_enddate
    ELSE NULL 
  END AS drop_out,

DATEDIFF(DAY, tab2.startdate, CASE
  WHEN DATEDIFF(DAY, tab2.enddate, t.next_startdate) > 1
    AND tab2.enddate <> '2222-01-01' 
    AND YEAR(tab2.enddate) = YEAR(CAST(DATEADD(YEAR, CAST(tab1.year AS INT) - 1900, '19000101') AS DATE)) 
    THEN tab2.enddate
  WHEN tab2.enddate = t_max.max_enddate
    AND tab2.enddate <> '2222-01-01' 
    AND YEAR(tab2.enddate) = YEAR(CAST(DATEADD(YEAR, CAST(tab1.year AS INT) - 1900, '19000101') AS DATE)) 
    THEN t_max.max_enddate
  ELSE NULL
END) AS days_insured
  
FROM 
  tab1
JOIN tab2 
  ON tab1.id = tab2.id 
  AND YEAR(tab2.startdate) <= YEAR(CAST(DATEADD(YEAR, CAST(tab1.year AS INT) - 1900, '19000101') AS DATE)) 
  AND YEAR(tab2.enddate) >= YEAR(CAST(DATEADD(YEAR, CAST(tab1.year AS INT) - 1900, '19000101') AS DATE))
LEFT JOIN t 
  ON tab2.id = t.id 
  AND tab2.startdate >= t.startdate
  AND tab2.startdate < t.next_startdate
LEFT JOIN t_max 
  ON tab2.id = t_max.id;

For data and query see: db<>fiddle

Expected Output:
| id | year | startdate | enddate | drop_out | covered_days |
| ------------ | ------------ | ------------ | ------------ | ------------ | ------------ |
| 1 | 2015 | 2015-03-01 | 2020-12-31 | (null) | (null) |
| 1 | 2016 | 2015-03-01 | 2020-12-31 | (null) | (null) |
| 1 | 2017 | 2015-03-01 | 2020-12-31 | (null) | (null) |
| 1 | 2018 | 2015-03-01 | 2020-12-31 | (null) | (null) |
| 1 | 2019 | 2015-03-01 | 2020-12-31 | (null) | (null) |
| 1 | 2020 | 2015-03-01 | 2020-12-31 | (null) | (null) |
| 1 | 2021 | 2021-01-01 | 2021-04-20 | 2021-04-20 | 2242 |
| 3 | 2014 | 2014-03-01 | 2019-08-09 | (null) | (null) |
| 3 | 2015 | 2014-03-01 | 2019-08-09 | (null) | (null) |
| 3 | 2016 | 2014-03-01 | 2019-08-09 | (null) | (null) |
| 3 | 2017 | 2014-03-01 | 2019-08-09 | (null) | (null) |
| 3 | 2018 | 2014-03-01 | 2019-08-09 | (null) | (null) |
| 3 | 2019 | 2014-03-01 | 2019-08-09 | 2019-08-09 | 1987 |
| 3 | 2019 | 2019-08-12 | 2020-01-31 | (null) | (null) |
| 3 | 2020 | 2019-08-12 | 2020-01-31 | 2020-01-31 | 172 |
| 5 | 2015 | 2015-03-01 | 2015-03-31 | (null) | (null) |
| 5 | 2015 | 2015-04-01 | 2015-04-09 | 2015-04-09 | 8 |
| 5 | 2015 | 2015-04-11 | 2015-04-16 | 2015-04-16 | 5 |
| 5 | 2015 | 2015-04-18 | 2015-04-23 | 2015-04-23 | 5 |
| 5 | 2016 | 2016-06-01 | 2016-07-30 | (null) | (null) |
| 5 | 2016 | 2016-07-31 | 2017-02-03 | (null) | (null) |
| 5 | 2017 | 2016-07-31 | 2017-02-03 | (null) | (null) |
| 5 | 2017 | 2017-02-04 | 2017-09-13 | 2017-09-13 | 469 |
| 5 | 2017 | 2017-09-15 | 2017-09-17 | 2017-09-17 | 2 |
| 5 | 2017 | 2017-09-19 | 2019-04-08 | (null) | (null) |
| 5 | 2018 | 2017-09-19 | 2019-04-08 | (null) | (null) |
| 5 | 2019 | 2017-09-19 | 2019-04-08 | 2019-04-08 | 566 |
| 5 | 2019 | 2019-04-10 | 2019-04-26 | 2019-04-26 | 16 |
| 5 | 2019 | 2019-04-28 | 2020-12-31 | (null) | (null) |
| 5 | 2020 | 2019-04-28 | 2020-12-31 | 2020-12-31 | 613 |

kqqjbcuj

kqqjbcuj1#

Your expected data bugged a bit for me, but here's my solution:

CREATE TABLE tab2 (
    id int,
    startdate date,
    enddate date
);

INSERT INTO tab2 (id, startdate, enddate)
VALUES 
  (1,'2015-03-01', '2020-12-31'),
       (1,'2021-01-01', '2021-04-20'),
  (3, '2014-03-01', '2019-08-09'),
       (3, '2019-08-12', '2020-01-31'),
  (5, '2015-03-01', '2015-03-31'),
      (5, '2015-04-01', '2015-04-09'), 
      (5, '2015-04-11', '2015-04-16'), 
      (5, '2015-04-18', '2015-04-23'), 
      (5, '2016-06-01', '2016-07-30'),       
      (5, '2016-07-31', '2017-02-03'), 
      (5, '2017-02-04', '2017-09-13'), 
      (5, '2017-09-15', '2017-09-17'), 
      (5, '2017-09-19', '2019-04-08'),          
      (5, '2019-04-10', '2019-04-26'),  
      (5, '2019-04-28', '2020-12-31')
      
SELECT  id, year(startdate) + y.years, startdate, enddate
,   CASE WHEN year(enddate) = year(startdate) + y.years THEN enddate END AS dropoff
,   CASE WHEN year(enddate) = year(startdate) + y.years THEN datediff(day, startdate, enddate) END AS dropoffdays
FROM    (
    SELECT  id, miN(startdate) AS startdate, max(enddate) AS enddate
    ,   changes
    FROM    (
        SELECT  *
        ,   SUM(groupChange) OVER(partition BY id ORDER BY startdate) AS changes
        FROM    (
                SELECT  *
                ,   CASE WHEN datediff(day, lag(enddate) OVER(partition BY id ORDER BY startdate),startdate) = 1 THEN 0 ELSE 1 END AS groupChange
                FROM    tab2 t
            ) x
        ) x
    GROUP BY id, changes
    ) x
CROSS APPLY (
     SELECT row_number() OVER(ORDER BY id) -1 AS years
    FROM    (
        VALUES(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1)
    ) x(id)
) y
WHERE   years BETWEEN 0 AND year(enddate) - year(startdate)
ORDER BY id, startdate

I skipped year table and generate data on the fly.

It's pretty standard stuff otherwise, you check previous date to see if it's 1 day or more off, then you build a group field by using a rolling SUM, this allows you to find the islands.

Then by joining in the years, you can "explode" the start / enddate rows into each year's specific rows.

8mmmxcuj

8mmmxcuj2#

with flagged as (
    select *,
        case when dateadd(day, -1, startdate)
                    > lag(enddate) over (partition by id order by startdate)
             then 1 end as flag /* break in contract period */
    from tab2
), grouped as (
    select *,
        count(flag) over (partition by id order by startdate) as grp
    from flagged
), exploded as (
    select "year", t2.*,
        row_number()   over (partition by t2.id, grp
                             order by startdate desc, "year" desc) as rn,
        min(startdate) over (partition by t2.id, grp) as contract_start,
        max(enddate)   over (partition by t2.id, grp) as drop_out  
    from tab1 t1 inner join grouped t2
        on t2.id = t1.id and "year" between year(startdate) and year(enddate)
)
select id, "year", grp, startdate, enddate,
      case when rn = 1 then drop_out end as drop_out,  
      case when rn = 1 then datediff(day, contract_start, drop_out) end as covered_days
from exploded
order by id, "year", startdate;

https://dbfiddle.uk/RNalulfk?hide=2

相关问题