SQL Server SQL Find consecutive numbers in groups

piwo6bdm  于 2024-01-05  发布在  其他
关注(0)|答案(5)|浏览(138)

I have a table similar to the one shown. It contains a list of user ids, the hour value for each hour of the day and an Avail flag to determine if that user is available on that hour.

I need to list all User ids which are available for a number of consecutive hours defined as @n

#####################
# UID # Avail # Hour#
#####################
# 123 #   1   #  0  #
# 123 #   1   #  1  #
# 123 #   0   #  2  #
# 123 #   0   #  3  #
# 123 #   0   #  4  #
# 123 #   1   #  5  #
# 123 #   1   #  6  #
# 123 #   1   #  7  #
# 123 #   1   #  8  #
# 341 #   1   #  0  #
# 341 #   1   #  1  #
# 341 #   0   #  2  #
# 341 #   1   #  3  #
# 341 #   1   #  4  #
# 341 #   0   #  5  #
# 341 #   1   #  6  # 
# 341 #   1   #  7  #
# 341 #   0   #  8  #
######################

This should result in the following output for @n=3

#######
# UID #
#######
# 123 #
#######

I have attempted to use the ROW_NUMBER() over (partition by UID,Avail ORDER BY UID,Hour) to assign a number to each row partitioned by the UID and Whether or not they are flagged as available. However this does not work as the periods of availability may change multiple times a day and the ROW_NUMBER() function was only keeping two counts per user based on the Avail flag.

3htmauhk

3htmauhk1#

If you're using SQL Server 2012+ you could using a windowed SUM, but you have to specify the number of rows in the window frame in advance as it won't accept variables so it's not that flexible:

;with cte as 
(
    select distinct 
       UID, 
       SUM(avail) over (partition by uid 
                        order by hour 
                        rows between current row and 2 following
                       ) count 
    from table1
)
select uid from cte where count = 3;

If you want flexibility you could make it a stored procedure and use dynamic SQL to build and execute the statement, something like this:

create procedure testproc (@n int) as
declare @sql nvarchar(max)
set @sql = concat('
    ;with cte as 
    (
       select distinct 
          UID, 
          SUM(avail) over (partition by uid 
                        order by hour 
                        rows between current row and ', @n - 1 , ' following
                        ) count 
       from table1
    )
    select uid from cte where count = ' , @n , ';')
exec sp_executesql @sql

and execute it using execute testproc 3

An even more inflexible solution is to use correlated subqueries, but then you have to add another subquery for each added count:

select distinct uid 
from Table1 t1
where Avail = 1
  and exists (select 1 from Table1 where Avail = 1 and UID = t1.UID and Hour = t1.Hour + 1)
  and exists (select 1 from Table1 where Avail = 1 and UID = t1.UID and Hour = t1.Hour + 2);

And yet another way, using row_number to find islands and then filtering by sum of avail for each island:

;with c as (
    select 
       uid, avail, 
       row_number() over (partition by uid order by hour) 
       - row_number() over (partition by uid, avail order by hour) grp
from table1
)

select uid from c
group by uid, grp
having sum(avail) >= 3
l5tcr1uw

l5tcr1uw2#

This works... It does a self join on userID and anything in 2nd table with in @n (3hr) then returns only those records having a count of 3 records.

SELECT A.UID
FROM foo A
INNER JOIN foo B
 on A.UId = B.UID
 and A.Hour+3 <= B.Hour
 and A.Avail= 1 and B.Avail=1
GROUP BY A.UID
HAVING count(distinct B.hour) =3

http://sqlfiddle.com/#!6/f97ee

xxe27gdn

xxe27gdn3#

Didn't have time to polish this ... but this is one option.

  • First CTE (c) creates the new column Id
  • Second CTE (mx) gets the max row number since you cannot use aggregates in recursive CTEs
  • Final CTE (rc) is where the meat is.
;WITH c AS (
    SELECT ROW_NUMBER() OVER (ORDER BY [UID],[Hour]) Id, 
        [UID],Avail,[Hour]
    FROM #tmp
), mx AS (
    SELECT MAX(Id) MaxRowCount FROM c
), rc AS (

    SELECT Id, [UID], Avail, [Hour], c.Avail AS CummulativeHour
    FROM c
    WHERE Id = 1

    UNION ALL

    SELECT c.Id, c.[UID], c.Avail, c.[Hour], CASE WHEN rc.Avail = 0 OR c.Avail = 0 OR rc.[UID] <> c.[UID] THEN c.Avail
                                                WHEN rc. Avail = 1 AND c.Avail = 1 THEN rc.CummulativeHour + 1 END AS CummulativeHour
    FROM rc
    JOIN c
        ON rc.Id + 1 = c.Id
    WHERE c.Id <= (SELECT mx.MaxRowCount FROM mx)

)
SELECT * FROM rc

Here is the sample data creation...

CREATE TABLE #tmp ([UID] INT, Avail INT, [Hour] INT)

INSERT INTO #tmp
        ( UID, Avail, Hour )
VALUES  (123,1,0),
(123,1,1),
(123,0,2),
(123,0,3),
(123,0,4),
(123,1,5),
(123,1,7),
(123,1,8),
(341,1,0),
(341,0,2),
(341,1,3),
(341,1,4),
(341,0,5),
(341,1,6),
(341,1,7),
(341,0,8)
nwlls2ji

nwlls2ji4#

The main query with several CTE below give you several possibility in order to show what you need (max per user, user with N hours, etc.). Just update the last query below the CTE.

Create table and data:

declare @hours table(
uid int
, avail bit
, h tinyint
)
insert into @hours(uid, avail, h) values 
(123, 1, 0),
(123, 1, 1),
(123, 0, 2),
(123, 0, 3),
(123, 0, 4),
(123, 1, 5),
(123, 1, 6),
(123, 1, 7),
(123, 1, 8),
(341, 1, 0),
(341, 1, 1),
(341, 0, 2),
(341, 1, 3),
(341, 1, 4),
(341, 0, 5),
(341, 1, 6), 
(341, 1, 7),
(341, 0, 8),
(341, 1, 23) -- loop through midnight

Last row has been added to show that it can detect continuous hours around midnight (see back cte). i.e. 23 => 2AM for uid 341

Query MAX continous hours per user:

-- remove duplicate, wrong hours and not available hours
;with hs as (
    Select distinct uid, h from @hours where avail = 1 and h < 24 
), loop(uid, first, last, diff) as (
    --loop through successive hours
    select uid, cast(h as tinyint), cast(h+1 as int), cast(1 as int) from hs
    union all
    select l.uid, l.first, l.last+1, l.diff+1 from loop as l
    inner join hs as h on l.uid = h.uid and l.last = h.h
), back(uid, first, last, diff) as (
    -- search for successive hours around midnight
    select uid, first, last, diff from loop
    union
    select l1.uid, l1.first, l2.last, l1.diff+l2.diff from loop as l1
    inner join loop as l2 on l1.uid = l2.uid and l1.last = 24 and l2.first = 0
), mx(uid, diff) as (
    -- get max continuous hours per user
    select uid, max(diff) from back group by uid
)
-- main query, change it based on what you need (see below)
select b.uid, b.first, b.last, b.diff from back as b
inner join mx as m on m.uid = b.uid and m.diff = b.diff
order by uid, diff desc

results:

uid first   last    diff
123 5       9       4
341 23      2       3 <= present because I added 341/1/23. Would not be here otherwise

Get user with at least 3 continuous hours (replace last select with this one):

select distinct uid from back where diff >= 3 -- @n goes here

Please not that I considered that (123, 1, 5) give 1 available hour from 5 to 6. Therefore 5 to 8 gives you 4 available hours from 5 to 9.

dxxyhpgq

dxxyhpgq5#

SQL CODE

create temporary table rank_test(
val numeric)

insert into rank_test select 1;
insert into rank_test select 2;
insert into rank_test select 3;
insert into rank_test select 6;
insert into rank_test select 7;
insert into rank_test select 9;
insert into rank_test select 11;
insert into rank_test select 14;
insert into rank_test select 15;

select val,row_number() over (partition by vno order by val) from (
select val,val-row_number() over (order by val) as vno
from rank_test)a

Output

A   | Consecutiveness
--------+--------
    1   | 1
    2   | 2
    4   | 1
    6   | 1
    7   | 2
    9   | 1
   11   | 1
   14   | 1
   15   | 2

相关问题