SQL Server How to convert employee punch data to a 15-minute interval matrix?

svmlkihl  于 2023-06-04  发布在  其他
关注(0)|答案(1)|浏览(102)

I am attempting to take a table similar to the below raw data example with punch-in and punch-out times and convert it to a table (desired format below) which will allow the data to be easily used in Power BI area charts (this data will be plotted with transaction data). Essentially, I'd like to take raw punch time data and place it on a matrix to then count the number of employees "on the clock" per 15-minute interval.

I am open to the toolset to complete this in the simplest manner. Excel, Python, SQL Server, and Power BI are my strongest platforms. Also, open to a better way of achieving this if there's one out there.

Raw Data:
| Employee ID | Punch In | Punch Out | Break In | Break Out |
| ------------ | ------------ | ------------ | ------------ | ------------ |
| 1234 | 9 am | 4 pm | 12:30 pm | 1 pm |
| 1235 | 9:30 am | 5 pm | null | null |
| 1236 | 8 am | 4 pm | 11 am | 11:45 am |

Desired Format:

Employee ID8 am8:15 am8:30 am8:45 am9 am9:15 am9:30 am9:45 am
12341111
123511
123611111111
edqdpe6u

edqdpe6u1#

Python Solution

import pandas as pd
from dateutil import parser

# Create a sample DataFrame with the raw punch data
data = {
    'Employee ID': [1234, 1235, 1236],
    'Punch In': ['9 am', '9:30 am', '8 am'],
    'Punch Out': ['4 pm', '5 pm', '4 pm'],
    'Break In': ['12:30 pm', None, '11 am'],
    'Break Out': ['1 pm', None, '11:45 am']
}

df = pd.DataFrame(data)

# Define the time range for the matrix
start_time = pd.to_datetime('8:00 am').time()
end_time = pd.to_datetime('5:00 pm').time()

# Convert start and end times to Timestamp objects
start_timestamp = pd.Timestamp.combine(pd.to_datetime('today').date(), start_time)
end_timestamp = pd.Timestamp.combine(pd.to_datetime('today').date(), end_time)

# Create a time range with 15-minute intervals
time_range = pd.date_range(start=start_timestamp, end=end_timestamp, freq='15min').time

# Create an empty matrix DataFrame with time intervals as columns
matrix = pd.DataFrame(columns=time_range)

# Iterate over each row in the raw data DataFrame
for _, row in df.iterrows():
    employee_id = row['Employee ID']

    # Get the punch in and punch out times for the employee
    try:
        punch_in = parser.parse(row['Punch In']).time()
        punch_out = parser.parse(row['Punch Out']).time()
    except ValueError:
        print(f"Invalid time format for Employee ID {employee_id}")
        continue

    # Get the break in and break out times for the employee
    break_in = parser.parse(row['Break In']).time() if row['Break In'] else None
    break_out = parser.parse(row['Break Out']).time() if row['Break Out'] else None

    # Create an empty row for the employee in the matrix
    matrix_row = pd.Series(index=matrix.columns)

    # Iterate over each time interval in the matrix
    for time_interval in matrix.columns:
        # Check if the employee is within the time interval
        if punch_in <= time_interval < punch_out and (break_in is None or break_in <= time_interval < break_out):
            matrix_row[time_interval] = 1

    # Append the row to the matrix DataFrame
    matrix_row.name = employee_id
    matrix = matrix.append(matrix_row)

# Fill the NaN values with empty strings
matrix = matrix.fillna('')

# Show all columns
pd.set_option('display.max_columns', None)

# Show table of 1's
print(matrix)

SQL Solution

-- Create the PunchData table
CREATE TABLE PunchData (
    EmployeeID  INT
    ,PunchIn    VARCHAR(10)
    ,PunchOut   VARCHAR(10)
    ,BreakIn    VARCHAR(10)
    ,BreakOut   VARCHAR(10)
);

-- Insert sample data into the PunchData table
INSERT INTO PunchData (EmployeeID, PunchIn, PunchOut, BreakIn, BreakOut)
VALUES
    (1234, '9 am', '4 pm', '12:30 pm', '1 pm')
    ,(1235, '9:30 am', '5 pm', NULL, NULL)
    ,(1236, '8 am', '4 pm', '11 am', '11:45 am');

-- Define a CTE called TimeSlots that generates time values in 15-minute increments
WITH TimeSlots AS (
    SELECT CAST('00:00' AS TIME) AS [_Time]
    UNION ALL
    SELECT DATEADD(minute, 15, [_Time])
    FROM TimeSlots
    WHERE [_Time] < CAST('23:45' AS TIME)
),

-- Define a CTE called Matrix that combines PunchData and TimeSlots
Matrix AS (
    SELECT
        pd.EmployeeID
        ,ts.[_Time]
        ,CASE
            -- Check if PunchIn and PunchOut fall within the time slot
            -- Check if BreakIn and BreakOut (if not NULL) fall within the time slot
            WHEN (
                (pd.PunchIn <= ts.[_Time] 
                    AND pd.PunchOut > ts.[_Time])  
                AND (
                    pd.BreakIn IS NULL 
                    OR (
                        pd.BreakIn <= ts.[_Time] 
                        AND pd.BreakOut > ts.[_Time]
                    )
                ) 
            ) 
            THEN '1'
            ELSE '0'
        END AS SlotValue
    FROM
        PunchData AS pd
    CROSS JOIN
        TimeSlots AS ts
)

-- Pivot the Matrix data to generate desired columns for each time slot
SELECT
    EmployeeID

    ,MAX(CASE WHEN [_Time] = '09:00' THEN SlotValue ELSE '' END) AS [09:00]
    ,MAX(CASE WHEN [_Time] = '09:15' THEN SlotValue ELSE '' END) AS [09:15]
    ,MAX(CASE WHEN [_Time] = '09:30' THEN SlotValue ELSE '' END) AS [09:30]
    ,MAX(CASE WHEN [_Time] = '09:45' THEN SlotValue ELSE '' END) AS [09:45]

    ,MAX(CASE WHEN [_Time] = '10:00' THEN SlotValue ELSE '' END) AS [10:00]
    ,MAX(CASE WHEN [_Time] = '10:15' THEN SlotValue ELSE '' END) AS [10:15]
    ,MAX(CASE WHEN [_Time] = '10:30' THEN SlotValue ELSE '' END) AS [10:30]
    ,MAX(CASE WHEN [_Time] = '10:45' THEN SlotValue ELSE '' END) AS [10:45]

    ,MAX(CASE WHEN [_Time] = '11:00' THEN SlotValue ELSE '' END) AS [11:00]
    ,MAX(CASE WHEN [_Time] = '11:15' THEN SlotValue ELSE '' END) AS [11:15]
    ,MAX(CASE WHEN [_Time] = '11:30' THEN SlotValue ELSE '' END) AS [11:30]
    ,MAX(CASE WHEN [_Time] = '11:45' THEN SlotValue ELSE '' END) AS [11:45]

    ,MAX(CASE WHEN [_Time] = '12:00' THEN SlotValue ELSE '' END) AS [12:00]
    ,MAX(CASE WHEN [_Time] = '12:15' THEN SlotValue ELSE '' END) AS [12:15]
    ,MAX(CASE WHEN [_Time] = '12:30' THEN SlotValue ELSE '' END) AS [12:30]
    ,MAX(CASE WHEN [_Time] = '12:45' THEN SlotValue ELSE '' END) AS [12:45]

    ,MAX(CASE WHEN [_Time] = '13:00' THEN SlotValue ELSE '' END) AS [13:00]
    ,MAX(CASE WHEN [_Time] = '13:15' THEN SlotValue ELSE '' END) AS [13:15]
    ,MAX(CASE WHEN [_Time] = '13:30' THEN SlotValue ELSE '' END) AS [13:30]
    ,MAX(CASE WHEN [_Time] = '13:45' THEN SlotValue ELSE '' END) AS [13:45]

    ,MAX(CASE WHEN [_Time] = '14:00' THEN SlotValue ELSE '' END) AS [14:00]
    ,MAX(CASE WHEN [_Time] = '14:15' THEN SlotValue ELSE '' END) AS [14:15]
    ,MAX(CASE WHEN [_Time] = '14:30' THEN SlotValue ELSE '' END) AS [14:30]
    ,MAX(CASE WHEN [_Time] = '14:45' THEN SlotValue ELSE '' END) AS [14:45]

    ,MAX(CASE WHEN [_Time] = '15:00' THEN SlotValue ELSE '' END) AS [15:00]
    ,MAX(CASE WHEN [_Time] = '15:15' THEN SlotValue ELSE '' END) AS [15:15]
    ,MAX(CASE WHEN [_Time] = '15:30' THEN SlotValue ELSE '' END) AS [15:30]
    ,MAX(CASE WHEN [_Time] = '15:45' THEN SlotValue ELSE '' END) AS [15:45]

    ,MAX(CASE WHEN [_Time] = '16:00' THEN SlotValue ELSE '' END) AS [16:00]
    ,MAX(CASE WHEN [_Time] = '16:15' THEN SlotValue ELSE '' END) AS [16:15]
    ,MAX(CASE WHEN [_Time] = '16:30' THEN SlotValue ELSE '' END) AS [16:30]
    ,MAX(CASE WHEN [_Time] = '16:45' THEN SlotValue ELSE '' END) AS [16:45]

    ,MAX(CASE WHEN [_Time] = '17:00' THEN SlotValue ELSE '' END) AS [17:00]
    ,MAX(CASE WHEN [_Time] = '17:15' THEN SlotValue ELSE '' END) AS [17:15]
    ,MAX(CASE WHEN [_Time] = '17:30' THEN SlotValue ELSE '' END) AS [17:30]
    ,MAX(CASE WHEN [_Time] = '17:45' THEN SlotValue ELSE '' END) AS [17:45]

-- Add more case statements for each 15-minute increment before/after if needed...
FROM
    Matrix
GROUP BY
    EmployeeID;

相关问题