在R中,对于每个重复的ID-time组合,如何保持特定列的最高值?

46scxncf  于 2023-05-20  发布在  其他
关注(0)|答案(3)|浏览(59)

我有一个包含工资记录的df。对于每个重复的PersonID-CalendarYear组合,我希望保持WagesQ 1:WagesQ 4列中的最高值。我还希望这些值都在同一行中。
例如,PersonID#49在2017年有3行。我想在每个Wages列中保留最高值,以便最后有一行用于PersonID#49和2017年。
我有什么:

我想要的:

到目前为止,我尝试了以下方法,但这将每个PersonID-CalendarYear组合重复4次,这根本不是我想要的。

test <- Emp %>% 
              group_by(PersonID, CalendarYear) %>%
              slice(which.max(WagesQ1), 
              which.max(WagesQ2), 
              which.max(WagesQ3), 
              which.max(WagesQ4))

我在这里分享df的前50行:

structure(list(PersonID = c("30", "30", "30", "30", "30", "30", 
"30", "30", "30", "30", "30", "30", "30", "30", "30", "30", "30", 
"30", "30", "30", "30", "30", "36", "49", "49", "49", "49", "49", 
"49", "49", "49", "49", "49", "49", "49", "49", "49", "49", "49", 
"49", "49", "49", "49", "49", "49", "49", "49", "49", "49", "49"
), CalendarYear = c(2003L, 2004L, 2005L, 2006L, 2006L, 2007L, 
2008L, 2008L, 2009L, 2010L, 2011L, 2012L, 2013L, 2014L, 2015L, 
2016L, 2017L, 2017L, 2018L, 2019L, 2020L, 2021L, 2018L, 2003L, 
2003L, 2004L, 2005L, 2006L, 2007L, 2008L, 2009L, 2010L, 2011L, 
2012L, 2013L, 2014L, 2014L, 2015L, 2015L, 2015L, 2016L, 2016L, 
2016L, 2017L, 2017L, 2017L, 2018L, 2018L, 2019L, 2020L), County = c("181", 
"181", "181", "53", "175", "175", "53", "175", "53", "53", "53", 
"53", "53", "53", "53", "53", "53", "53", "53", "53", "53", "53", 
"9", "169", "169", "169", "169", "169", "169", "169", "169", 
"169", "169", "169", "169", "169", "169", "157", "169", "169", 
"157", "169", "169", "157", "169", "169", "169", "169", "169", 
"169"), industryID = c(844L, 844L, 844L, 418L, 829L, 829L, 418L, 
829L, 418L, 418L, 418L, 418L, 418L, 418L, 418L, 418L, 411L, 418L, 
411L, 411L, 411L, 411L, 10L, 410L, 1037L, 1037L, 1037L, 1037L, 
1037L, 1037L, 1037L, 1037L, 1037L, 1037L, 1037L, 892L, 1037L, 
892L, 892L, 1037L, 892L, 892L, 1037L, 892L, 892L, 1037L, 892L, 
1037L, 1037L, 1037L), OwnershipCode_Qtr1 = c("50", "50", "50", 
"50", NA, "50", NA, "50", "50", "50", "50", "50", "50", "50", 
"50", "50", NA, "50", "50", "50", "50", "50", NA, "50", NA, "30", 
"30", "30", "30", "30", "30", "30", "30", "30", "30", "50", "30", 
NA, "50", "30", NA, "50", "30", NA, "50", "30", "50", "30", "30", 
"30"), EmployeeQuarterlyHoursWorked_Qtr1 = c("560", "480", "520", 
"520", NA, "520", NA, "520", "480", "432", "480", "480", "417", 
"371", "404", "384", NA, "488", "479", "454", "476", "428", NA, 
"520", NA, "369", "386", "449", "457", "416", "448", "384", "389", 
"384", "388", "23", "386", NA, "1", "384", NA, "6", "384", NA, 
"6", "560", "60", "560", "560", "560"), EmployeeQuarterlyWages_Qtr1 = c("25974.00", 
"21961.00", "40279.00", "28924.00", "0", "47695.00", "0", "51586.00", 
"53059.00", "34615.00", "51267.00", "65367.00", "65935.00", "66764.00", 
"68044.00", "67817.00", "0", "68835.00", "70923.00", "67671.00", 
"80230.00", "54924.00", "0", "6977.00", "0", "6106.00", "7207.00", 
"8640.00", "8976.00", "8576.00", "9801.00", "8275.00", "8275.00", 
"8275.00", "8474.00", "376.00", "8677.00", "0", "750.00", "8883.00", 
"0", "500.00", "9356.00", "0", "441.00", "16433.00", "490.00", 
"17652.00", "18791.00", "20521.00"), EmployeeTenure_Qtr1 = c("3", 
"7", "11", "1", NA, "3", NA, "7", "4", "8", "12", "16", "20", 
"24", "28", "32", NA, "36", "2", "6", "10", "14", NA, "16", NA, 
"2", "6", "10", "14", "18", "22", "26", "30", "34", "38", "1", 
"42", NA, "1", "46", NA, "2", "50", NA, "2", "54", "2", "58", 
"62", "66"), EmployerLocationTotalWagesPaid_Qtr1 = c("2705239.00", 
"2655546.00", "3060808.00", "56494722.00", NA, "11577972.00", 
NA, "7905850.00", "67523518.00", "43414753.00", "59875257.00", 
"70830126.00", "77209774.00", "57210242.00", "72471506.00", "78008581.00", 
NA, "69864117.00", "12839820.00", "14455450.00", "16005038.00", 
"11267450.00", NA, "7107073.00", NA, "1900914.00", "2031601.00", 
"2495287.00", "2637867.00", "2678876.00", "2780822.00", "2219695.00", 
"2268540.00", "2120709.00", "2231025.00", "672803.00", "2234705.00", 
NA, "734496.00", "2228583.00", NA, "611993.00", "2408250.00", 
NA, "646954.00", "2993537.00", "622919.00", "3193565.00", "3395497.00", 
"3553823.00"), QuarterlyAverageEmployment_Qtr1 = c("97", "98", 
"105", "2385", NA, "362", NA, "240", "1952", "1760", "1715", 
"1710", "1617", "1640", "1606", "1595", NA, "1470", "389", "443", 
"461", "402", NA, "762", NA, "260", "251", "254", "265", "269", 
"259", "235", "241", "235", "233", "106", "227", NA, "97", "224", 
NA, "97", "221", NA, "94", "238", "92", "236", "239", "241"), 
    OwnershipCode_Qtr2 = c("50", "50", "50", "50", NA, "50", 
    "50", NA, "50", "50", "50", "50", "50", "50", "50", "50", 
    NA, "50", "50", "50", "50", NA, NA, "50", NA, "30", "30", 
    "30", "30", "30", "30", "30", "30", "30", "30", NA, "30", 
    "30", NA, "30", "30", "50", "30", "30", "50", "30", "50", 
    "30", "30", "30"), EmployeeQuarterlyHoursWorked_Qtr2 = c("480", 
    "520", "520", "520", NA, "520", "520", NA, "448", "464", 
    "464", "508", "536", "489", "495", "492", NA, "422", "423", 
    "424", "384", NA, NA, "520", NA, "448", "450", "386", "386", 
    "255", "387", "387", "385", "449", "451", NA, "456", "1", 
    NA, "455", "3", "2", "457", "6", "5", "480", "20", "480", 
    "480", "480"), EmployeeQuarterlyWages_Qtr2 = c("21911.00", 
    "22970.00", "29353.00", "28490.00", "0", "29964.00", "34491.00", 
    "0", "32600.00", "32250.00", "37819.00", "41553.00", "42701.00", 
    "44442.00", "49143.00", "46457.00", "0", "41058.00", "42066.00", 
    "41826.00", "34726.00", "0", "0", "7182.00", "0", "7934.00", 
    "8408.00", "7426.00", "7726.00", "5293.00", "8275.00", "8275.00", 
    "8275.00", "9830.00", "9896.00", "0", "10147.00", "80.00", 
    "0", "10423.00", "80.00", "166.00", "11007.00", "50.00", 
    "367.00", "14214.00", "163.00", "15188.00", "16168.00", "18067.00"
    ), EmployeeTenure_Qtr2 = c("4", "8", "12", "2", NA, "4", 
    "1", NA, "5", "9", "13", "17", "21", "25", "29", "33", NA, 
    "37", "3", "7", "11", NA, NA, "17", NA, "3", "7", "11", "15", 
    "19", "23", "27", "31", "35", "39", NA, "43", "1", NA, "47", 
    "1", "3", "51", "1", "3", "55", "3", "59", "63", "67"), EmployerLocationTotalWagesPaid_Qtr2 = c("2273677.00", 
    "2360550.00", "2501736.00", "46874830.00", NA, "8604560.00", 
    "42606565.00", NA, "36796585.00", "36680417.00", "44117859.00", 
    "46342635.00", "49910970.00", "59437204.00", "53698147.00", 
    "54838150.00", NA, "39797895.00", "10375288.00", "11095997.00", 
    "10321467.00", NA, NA, "6344850.00", NA, "2282968.00", "2439921.00", 
    "2159125.00", "2375421.00", "2373332.00", "2368912.00", "2203507.00", 
    "2184714.00", "2570868.00", "2605639.00", NA, "2692559.00", 
    "968147.00", NA, "2782204.00", "966568.00", "638154.00", 
    "2802058.00", "1024873.00", "683685.00", "2645041.00", "660148.00", 
    "2811118.00", "2986976.00", "3153488.00"), QuarterlyAverageEmployment_Qtr2 = c("98", 
    "100", "103", "2379", NA, "358", "1935", NA, "1776", "1682", 
    "1739", "1703", "1660", "1658", "1586", "1606", NA, "1459", 
    "414", "438", "450", NA, NA, "715", NA, "276", "258", "283", 
    "300", "305", "285", "264", "261", "273", "275", NA, "273", 
    "103", NA, "268", "104", "88", "269", "115", "85", "274", 
    "80", "278", "268", "208"), OwnershipCode_Qtr3 = c("50", 
    "50", "50", NA, "50", "50", "50", NA, "50", "50", "50", "50", 
    "50", "50", "50", "50", NA, "50", "50", "50", "50", NA, "50", 
    "50", NA, "30", "30", "30", "30", "30", "30", "30", "30", 
    "30", "30", NA, "30", NA, NA, "30", NA, NA, "30", NA, NA, 
    "30", NA, "30", "30", "30"), EmployeeQuarterlyHoursWorked_Qtr3 = c("520", 
    "520", "520", NA, "520", "520", "520", NA, "488", "544", 
    "560", "399", "426", "396", "443", "415", NA, "489", "489", 
    "524", "488", NA, "839", "520", NA, "384", "449", "446", 
    "451", "418", "452", "450", "449", "390", "389", NA, "384", 
    NA, NA, "384", NA, NA, "545", NA, NA, "560", NA, "560", "560", 
    "560"), EmployeeQuarterlyWages_Qtr3 = c("25563.00", "26941.00", 
    "29161.00", "0", "33522.00", "37714.00", "37519.00", "0", 
    "33862.00", "37625.00", "42420.00", "37400.00", "36718.00", 
    "38339.00", "39777.00", "43092.00", "0", "48008.00", "49174.00", 
    "48985.00", "45297.00", "0", "12678.00", "3644.00", "0", 
    "7065.00", "8494.00", "8737.00", "9013.00", "8619.00", "9654.00", 
    "9654.00", "9654.00", "8394.00", "8521.00", "0", "8697.00", 
    "0", "0", "8912.00", "0", "0", "14892.00", "0", "0", "17151.00", 
    "0", "18305.00", "19518.00", "21078.00"), EmployeeTenure_Qtr3 = c("5", 
    "9", "13", NA, "1", "5", "2", NA, "6", "10", "14", "18", 
    "22", "26", "30", "34", NA, "38", "4", "8", "12", NA, "1", 
    "18", NA, "4", "8", "12", "16", "20", "24", "28", "32", "36", 
    "40", NA, "44", NA, NA, "48", NA, NA, "52", NA, NA, "56", 
    NA, "60", "64", "68"), EmployerLocationTotalWagesPaid_Qtr3 = c("2561942.00", 
    "2836345.00", "2901631.00", NA, "6006025.00", "9244633.00", 
    "45594699.00", NA, "40660417.00", "42996310.00", "46205456.00", 
    "41682396.00", "41350115.00", "43793052.00", "42007503.00", 
    "54221071.00", NA, "44139475.00", "12277604.00", "12920820.00", 
    "12261562.00", NA, "841949.00", "5433154.00", NA, "2077482.00", 
    "2588350.00", "2625862.00", "2810502.00", "2888481.00", "2855988.00", 
    "2737480.00", "2717807.00", "2322919.00", "2346778.00", NA, 
    "2349394.00", NA, NA, "2424185.00", NA, NA, "3049769.00", 
    NA, NA, "3227870.00", NA, "3388103.00", "3536607.00", "3555916.00"
    ), QuarterlyAverageEmployment_Qtr3 = c("102", "110", "104", 
    NA, "292", "355", "1962", NA, "1814", "1730", "1742", "1703", 
    "1645", "1663", "1624", "1622", NA, "1461", "419", "443", 
    "433", NA, "96", "665", NA, "330", "335", "339", "354", "358", 
    "339", "356", "321", "334", "334", NA, "326", NA, NA, "322", 
    NA, NA, "336", NA, NA, "332", NA, "342", "340", "231"), OwnershipCode_Qtr4 = c("50", 
    "50", "50", NA, "50", "50", "50", NA, "50", "50", "50", "50", 
    "50", "50", "50", "50", "50", NA, "50", "50", "50", NA, "50", 
    "50", "30", "30", "30", "30", "30", "30", "30", "30", "30", 
    "30", "30", NA, "30", NA, "50", "30", NA, "50", "30", NA, 
    "50", "30", "50", "30", "30", "30"), EmployeeQuarterlyHoursWorked_Qtr4 = c("520", 
    "520", "520", NA, "520", "520", "520", NA, "520", "536", 
    "480", "499", "429", "490", "506", "426", "430", NA, "375", 
    "416", "500", NA, "772", "520", "293", "458", "395", "400", 
    "394", "387", "457", "450", "449", "460", "458", NA, "451", 
    NA, "4", "454", NA, "6", "480", NA, "30", "480", "0", "480", 
    "480", "560"), EmployeeQuarterlyWages_Qtr4 = c("25588.00", 
    "28944.00", "31548.00", "0", "35997.00", "35253.00", "37625.00", 
    "0", "43087.00", "38574.00", "34017.00", "61003.00", "43721.00", 
    "44729.00", "46407.00", "39777.00", "41149.00", "0", "42559.00", 
    "47402.00", "49078.00", "0", "5737.00", "1634.00", "4758.00", 
    "8287.00", "7280.00", "7499.00", "7726.00", "7956.00", "9654.00", 
    "10013.00", "10013.00", "10160.00", "10314.00", "0", "10524.00", 
    "0", "333.00", "10784.00", "0", "441.00", "13338.00", "0", 
    "245.00", "14782.00", "145.00", "15774.00", "16900.00", "21078.00"
    ), EmployeeTenure_Qtr4 = c("6", "10", "14", NA, "2", "6", 
    "3", NA, "7", "11", "15", "19", "23", "27", "31", "35", "1", 
    NA, "5", "9", "13", NA, "2", "19", "1", "5", "9", "13", "17", 
    "21", "25", "29", "33", "37", "41", NA, "45", NA, "1", "49", 
    NA, "1", "53", NA, "1", "57", "1", "61", "65", "69"), EmployerLocationTotalWagesPaid_Qtr4 = c("2592689.00", 
    "2786088.00", "2805669.00", NA, "5740894.00", "6721161.00", 
    "46794858.00", NA, "42277545.00", "47369971.00", "45689406.00", 
    "52769353.00", "51278020.00", "50198997.00", "53838385.00", 
    "43169822.00", "8467677.00", NA, "10719468.00", "11308861.00", 
    "12149650.00", NA, "316437.00", "6584282.00", "2353924.00", 
    "2497289.00", "2250699.00", "2328109.00", "2456573.00", "2497301.00", 
    "2844437.00", "2796565.00", "2739956.00", "2725955.00", "2746522.00", 
    NA, "2805607.00", NA, "621933.00", "2831090.00", NA, "628393.00", 
    "2498316.00", NA, "647813.00", "2589627.00", "656515.00", 
    "2813896.00", "2945372.00", "2961453.00"), QuarterlyAverageEmployment_Qtr4 = c("100", 
    "106", "100", NA, "279", "300", "1944", NA, "1767", "1738", 
    "1743", "1704", "1646", "1579", "1595", "1538", "354", NA, 
    "423", "446", "418", NA, "25", "659", "247", "255", "263", 
    "259", "276", "277", "254", "291", "257", "244", "250", NA, 
    "233", NA, "97", "238", NA, "92", "243", NA, "91", "239", 
    "91", "237", "245", "220"), CountyName = c("Statewide (i.e. no fixed location)", 
    "Statewide (i.e. no fixed location)", "Statewide (i.e. no fixed location)", 
    "Hennepin County", "City of Minneapolis", "City of Minneapolis", 
    "Hennepin County", "City of Minneapolis", "Hennepin County", 
    "Hennepin County", "Hennepin County", "Hennepin County", 
    "Hennepin County", "Hennepin County", "Hennepin County", 
    "Hennepin County", "Hennepin County", "Hennepin County", 
    "Hennepin County", "Hennepin County", "Hennepin County", 
    "Hennepin County", "Benton County", "Winona County", "Winona County", 
    "Winona County", "Winona County", "Winona County", "Winona County", 
    "Winona County", "Winona County", "Winona County", "Winona County", 
    "Winona County", "Winona County", "Winona County", "Winona County", 
    "Wabasha County", "Winona County", "Winona County", "Wabasha County", 
    "Winona County", "Winona County", "Wabasha County", "Winona County", 
    "Winona County", "Winona County", "Winona County", "Winona County", 
    "Winona County"), mncounty = c(NA, NA, NA, "27", NA, NA, 
    "27", NA, "27", "27", "27", "27", "27", "27", "27", "27", 
    "27", "27", "27", "27", "27", "27", "5", "85", "85", "85", 
    "85", "85", "85", "85", "85", "85", "85", "85", "85", "85", 
    "85", "79", "85", "85", "79", "85", "85", "79", "85", "85", 
    "85", "85", "85", "85"), IndustryGroup = c("Other Professional, Scientific, and Technical Services", 
    "Other Professional, Scientific, and Technical Services", 
    "Other Professional, Scientific, and Technical Services", 
    "Navigational, Measuring, Electromedical, and Control Instruments Manufacturing", 
    "Scientific Research and Development Svc", "Scientific Research and Development Svc", 
    "Navigational, Measuring, Electromedical, and Control Instruments Manufacturing", 
    "Scientific Research and Development Svc", "Navigational, Measuring, Electromedical, and Control Instruments Manufacturing", 
    "Navigational, Measuring, Electromedical, and Control Instruments Manufacturing", 
    "Navigational, Measuring, Electromedical, and Control Instruments Manufacturing", 
    "Navigational, Measuring, Electromedical, and Control Instruments Manufacturing", 
    "Navigational, Measuring, Electromedical, and Control Instruments Manufacturing", 
    "Navigational, Measuring, Electromedical, and Control Instruments Manufacturing", 
    "Navigational, Measuring, Electromedical, and Control Instruments Manufacturing", 
    "Navigational, Measuring, Electromedical, and Control Instruments Manufacturing", 
    "Semiconductor and Other Electronic Component Manufacturing", 
    "Navigational, Measuring, Electromedical, and Control Instruments Manufacturing", 
    "Semiconductor and Other Electronic Component Manufacturing", 
    "Semiconductor and Other Electronic Component Manufacturing", 
    "Semiconductor and Other Electronic Component Manufacturing", 
    "Semiconductor and Other Electronic Component Manufacturing", 
    "Vegetable and Melon Farming", "Semiconductor and Other Electronic Component Manufacturing", 
    "Executive, Legislative, and Other General Government Support ", 
    "Executive, Legislative, and Other General Government Support ", 
    "Executive, Legislative, and Other General Government Support ", 
    "Executive, Legislative, and Other General Government Support ", 
    "Executive, Legislative, and Other General Government Support ", 
    "Executive, Legislative, and Other General Government Support ", 
    "Executive, Legislative, and Other General Government Support ", 
    "Executive, Legislative, and Other General Government Support ", 
    "Executive, Legislative, and Other General Government Support ", 
    "Executive, Legislative, and Other General Government Support ", 
    "Executive, Legislative, and Other General Government Support ", 
    "Elementary and Secondary Schools", "Executive, Legislative, and Other General Government Support ", 
    "Elementary and Secondary Schools", "Elementary and Secondary Schools", 
    "Executive, Legislative, and Other General Government Support ", 
    "Elementary and Secondary Schools", "Elementary and Secondary Schools", 
    "Executive, Legislative, and Other General Government Support ", 
    "Elementary and Secondary Schools", "Elementary and Secondary Schools", 
    "Executive, Legislative, and Other General Government Support ", 
    "Elementary and Secondary Schools", "Executive, Legislative, and Other General Government Support ", 
    "Executive, Legislative, and Other General Government Support ", 
    "Executive, Legislative, and Other General Government Support "
    ), industry = c("All Other Professional, Scientific, and Technical Services", 
    "All Other Professional, Scientific, and Technical Services", 
    "All Other Professional, Scientific, and Technical Services", 
    "Navigational, Measuring, Electromedical, and Control Instruments Manufacturing", 
    "Physical/Engineering/Biological Research", "Physical/Engineering/Biological Research", 
    "Navigational, Measuring, Electromedical, and Control Instruments Manufacturing", 
    "Physical/Engineering/Biological Research", "Navigational, Measuring, Electromedical, and Control Instruments Manufacturing", 
    "Navigational, Measuring, Electromedical, and Control Instruments Manufacturing", 
    "Navigational, Measuring, Electromedical, and Control Instruments Manufacturing", 
    "Navigational, Measuring, Electromedical, and Control Instruments Manufacturing", 
    "Navigational, Measuring, Electromedical, and Control Instruments Manufacturing", 
    "Navigational, Measuring, Electromedical, and Control Instruments Manufacturing", 
    "Navigational, Measuring, Electromedical, and Control Instruments Manufacturing", 
    "Navigational, Measuring, Electromedical, and Control Instruments Manufacturing", 
    "Semiconductor and Other Electronic Component Manufacturing", 
    "Navigational, Measuring, Electromedical, and Control Instruments Manufacturing", 
    "Semiconductor and Other Electronic Component Manufacturing", 
    "Semiconductor and Other Electronic Component Manufacturing", 
    "Semiconductor and Other Electronic Component Manufacturing", 
    "Semiconductor and Other Electronic Component Manufacturing", 
    "Vegetable and Melon Farming", "Semiconductor and Other Electronic Component Manufacturing", 
    "Executive Offices ", "Executive Offices ", "Executive Offices ", 
    "Executive Offices ", "Executive Offices ", "Executive Offices ", 
    "Executive Offices ", "Executive Offices ", "Executive Offices ", 
    "Executive Offices ", "Executive Offices ", "Elementary and Secondary Schools ", 
    "Executive Offices ", "Elementary and Secondary Schools ", 
    "Elementary and Secondary Schools ", "Executive Offices ", 
    "Elementary and Secondary Schools ", "Elementary and Secondary Schools ", 
    "Executive Offices ", "Elementary and Secondary Schools ", 
    "Elementary and Secondary Schools ", "Executive Offices ", 
    "Elementary and Secondary Schools ", "Executive Offices ", 
    "Executive Offices ", "Executive Offices "), sector = c("Professional, Scientific, and Technical Services", 
    "Professional, Scientific, and Technical Services", "Professional, Scientific, and Technical Services", 
    "Manufacturing", "Professional, Scientific, and Technical Services", 
    "Professional, Scientific, and Technical Services", "Manufacturing", 
    "Professional, Scientific, and Technical Services", "Manufacturing", 
    "Manufacturing", "Manufacturing", "Manufacturing", "Manufacturing", 
    "Manufacturing", "Manufacturing", "Manufacturing", "Manufacturing", 
    "Manufacturing", "Manufacturing", "Manufacturing", "Manufacturing", 
    "Manufacturing", "Agriculture, Forestry, Fishing and Hunting", 
    "Manufacturing", "Public Administration", "Public Administration", 
    "Public Administration", "Public Administration", "Public Administration", 
    "Public Administration", "Public Administration", "Public Administration", 
    "Public Administration", "Public Administration", "Public Administration", 
    "Educational Services", "Public Administration", "Educational Services", 
    "Educational Services", "Public Administration", "Educational Services", 
    "Educational Services", "Public Administration", "Educational Services", 
    "Educational Services", "Public Administration", "Educational Services", 
    "Public Administration", "Public Administration", "Public Administration"
    ), SectorCode = c("54", "54", "54", "33", "54", "54", "33", 
    "54", "33", "33", "33", "33", "33", "33", "33", "33", "33", 
    "33", "33", "33", "33", "33", "11", "33", "92", "92", "92", 
    "92", "92", "92", "92", "92", "92", "92", "92", "61", "92", 
    "61", "61", "92", "61", "61", "92", "61", "61", "92", "61", 
    "92", "92", "92"), SubSector = c("Professional, Scientific, and Technical Services", 
    "Professional, Scientific, and Technical Services", "Professional, Scientific, and Technical Services", 
    "Computer and Electronic Product Manufacturing", "Professional and Technical Services", 
    "Professional and Technical Services", "Computer and Electronic Product Manufacturing", 
    "Professional and Technical Services", "Computer and Electronic Product Manufacturing", 
    "Computer and Electronic Product Manufacturing", "Computer and Electronic Product Manufacturing", 
    "Computer and Electronic Product Manufacturing", "Computer and Electronic Product Manufacturing", 
    "Computer and Electronic Product Manufacturing", "Computer and Electronic Product Manufacturing", 
    "Computer and Electronic Product Manufacturing", "Computer and Electronic Product Manufacturing", 
    "Computer and Electronic Product Manufacturing", "Computer and Electronic Product Manufacturing", 
    "Computer and Electronic Product Manufacturing", "Computer and Electronic Product Manufacturing", 
    "Computer and Electronic Product Manufacturing", "Crop Production", 
    "Computer and Electronic Product Manufacturing", "Executive, Legislative, and Other General Government Support ", 
    "Executive, Legislative, and Other General Government Support ", 
    "Executive, Legislative, and Other General Government Support ", 
    "Executive, Legislative, and Other General Government Support ", 
    "Executive, Legislative, and Other General Government Support ", 
    "Executive, Legislative, and Other General Government Support ", 
    "Executive, Legislative, and Other General Government Support ", 
    "Executive, Legislative, and Other General Government Support ", 
    "Executive, Legislative, and Other General Government Support ", 
    "Executive, Legislative, and Other General Government Support ", 
    "Executive, Legislative, and Other General Government Support ", 
    "Educational Services", "Executive, Legislative, and Other General Government Support ", 
    "Educational Services", "Educational Services", "Executive, Legislative, and Other General Government Support ", 
    "Educational Services", "Educational Services", "Executive, Legislative, and Other General Government Support ", 
    "Educational Services", "Educational Services", "Executive, Legislative, and Other General Government Support ", 
    "Educational Services", "Executive, Legislative, and Other General Government Support ", 
    "Executive, Legislative, and Other General Government Support ", 
    "Executive, Legislative, and Other General Government Support "
    ), SubSectorCode = c(541L, 541L, 541L, 334L, 541L, 541L, 
    334L, 541L, 334L, 334L, 334L, 334L, 334L, 334L, 334L, 334L, 
    334L, 334L, 334L, 334L, 334L, 334L, 111L, 334L, 921L, 921L, 
    921L, 921L, 921L, 921L, 921L, 921L, 921L, 921L, 921L, 611L, 
    921L, 611L, 611L, 921L, 611L, 611L, 921L, 611L, 611L, 921L, 
    611L, 921L, 921L, 921L), TotalWages = c(99036, 100816, 130341, 
    57414, 69519, 150626, 109635, 51586, 162608, 143064, 165523, 
    205323, 189075, 194274, 203371, 197143, 41149, 157901, 204722, 
    205884, 209331, 54924, 18415, 19437, 4758, 29392, 31389, 
    32302, 33441, 30444, 37384, 36217, 36217, 36659, 37205, 376, 
    38045, 80, 1083, 39002, 80, 1107, 48593, 50, 1053, 62580, 
    798, 66919, 71377, 80744)), class = c("grouped_df", "tbl_df", 
"tbl", "data.frame"), row.names = c(NA, -50L), groups = structure(list(
    PersonID = c("30", "36", "49"), .rows = structure(list(1:22, 
        23L, 24:50), ptype = integer(0), class = c("vctrs_list_of", 
    "vctrs_vctr", "list"))), class = c("tbl_df", "tbl", "data.frame"
), row.names = c(NA, -3L), .drop = TRUE))
fafcakar

fafcakar1#

您可以在这里使用reframe(),并使用across()

reframe(
  ungroup(df),
  across(contains("QuarterlyWages"), ~max(as.numeric(.x))),
  .by=PersonID:CalendarYear
)

输出:

# A tibble: 38 × 6
   PersonID CalendarYear EmployeeQuarterlyWages_Qtr1 EmployeeQuarterlyWages_Qtr2 EmployeeQuarterlyWages_…¹
   <chr>           <int>                       <dbl>                       <dbl>                     <dbl>
 1 30               2003                       25974                       21911                     25563
 2 30               2004                       21961                       22970                     26941
 3 30               2005                       40279                       29353                     29161
 4 30               2006                       28924                       28490                     33522
 5 30               2007                       47695                       29964                     37714
 6 30               2008                       51586                       34491                     37519
 7 30               2009                       53059                       32600                     33862
 8 30               2010                       34615                       32250                     37625
 9 30               2011                       51267                       37819                     42420
10 30               2012                       65367                       41553                     37400
# ℹ 28 more rows
# ℹ abbreviated name: ¹​EmployeeQuarterlyWages_Qtr3
# ℹ 1 more variable: EmployeeQuarterlyWages_Qtr4 <dbl>
# ℹ Use `print(n = ...)` to see more rows
hlswsv35

hlswsv352#

我们可以提取每个人每年季度的最高工资,方法是对数据进行更长时间的整形,提取每个PersonID/Year/Qtr中的最高值,然后再次整形。
编辑:已将工资列转换为数字,以便正确排序。

library(dplyr); library(tidyr)
df %>%
  ungroup() %>%
  select(PersonID, Year = CalendarYear, contains("QuarterlyWages")) %>%
  pivot_longer(3:6, values_transform = as.numeric, names_to = "Qtr") %>% 
  slice_max(value, by = c(PersonID, Year, Qtr)) %>%
  pivot_wider(names_from = name, values_from = value)

结果

# A tibble: 38 × 6
   PersonID  Year EmployeeQuarterlyWages_Qtr1 EmployeeQuarterlyWages_Qtr2 EmployeeQuarterlyWages_Qtr3 EmployeeQuarterlyWages_Qtr4
   <chr>    <int>                       <dbl>                       <dbl>                       <dbl>                       <dbl>
 1 30        2003                       25974                       21911                       25563                       25588
 2 30        2004                       21961                       22970                       26941                       28944
 3 30        2005                       40279                       29353                       29161                       31548
 4 30        2006                       28924                       28490                       33522                       35997
 5 30        2007                       47695                       29964                       37714                       35253
 6 30        2008                       51586                       34491                       37519                       37625
 7 30        2009                       53059                       32600                       33862                       43087
 8 30        2010                       34615                       32250                       37625                       38574
 9 30        2011                       51267                       37819                       42420                       34017
10 30        2012                       65367                       41553                       37400                       61003
# ℹ 28 more rows

对OP中的示例进行过滤:(注意,2015年的预期产出似乎不完整)

# A tibble: 3 × 6
  PersonID  Year EmployeeQuarterlyWages_Qtr1 EmployeeQuarterlyWages_Qtr2 EmployeeQuarterlyWages_Qtr3 EmployeeQuarterlyWages_Qtr4
  <chr>    <int>                       <dbl>                       <dbl>                       <dbl>                       <dbl>
1 49        2015                        8883                       10423                        8912                       10784
2 49        2016                        9356                       11007                       14892                       13338
3 49        2017                       16433                       14214                       17151                       14782
wfveoks0

wfveoks03#

使用across而不使用reframe的替代(但类似)方法:

library(dplyr)

df %>% 
  group_by(PersonID, CalendarYear) %>% 
  transmute(across(contains("QuarterlyWages"), ~max(as.numeric(.), na.rm = TRUE))) %>% 
  slice(1) %>% 
  ungroup()

 PersonID CalendarYear EmployeeQuarterlyWages_Qtr1 EmployeeQuarterlyWages_Qtr2 EmployeeQ…¹ Emplo…²
   <chr>           <int>                       <dbl>                       <dbl>       <dbl>   <dbl>
 1 30               2003                       25974                       21911       25563   25588
 2 30               2004                       21961                       22970       26941   28944
 3 30               2005                       40279                       29353       29161   31548
 4 30               2006                       28924                       28490       33522   35997
 5 30               2007                       47695                       29964       37714   35253
 6 30               2008                       51586                       34491       37519   37625
 7 30               2009                       53059                       32600       33862   43087
 8 30               2010                       34615                       32250       37625   38574
 9 30               2011                       51267                       37819       42420   34017
10 30               2012                       65367                       41553       37400   61003
# … with 28 more rows, and abbreviated variable names ¹​EmployeeQuarterlyWages_Qtr3,
#   ²​EmployeeQuarterlyWages_Qtr4
# ℹ Use `print(n = ...)` to see more rows

相关问题