R语言 查找具有重复列值的行的平均值

v8wbuo2f  于 2023-02-17  发布在  其他
关注(0)|答案(2)|浏览(433)

如果GeneSymbol是重复的(即,以前的行包含GeneSymbol列中的字符串),则计算该行其他列的平均值。然后,我想将meth.kirp.cpg$GeneSymbol列指定为meth.kirp.symbol的新行名称。

meth.kirp.symbol <- aggregate(meth.kirp.cpg, by=meth.kirp.cpg$GeneSymbol,data=meth.kirp.cpg,FUN=mean)
meth.kirp.symbol <- na.omit(meth.kirp.symbol)

追溯:!meth.kirp.cpg$GeneSymbol中存在错误:无效参数类型

rownames(meth.kirp.symbol) <- meth.kirp.symbol$GeneSymbol
meth.kirp.symbol$GeneSymbol <- NULL

样本数据:

> dput(meth.kirp.cpg[1:100,200:203])
structure(list(TCGA.Y8.A8RZ.01A = c(0.497271965133314, 0.369704160054987, 
0.891551980644717, 0.53916519146516, 0.452596179682145, 0.763243369017172, 
0.158949338942062, 0.0114350980370701, 0.857172998292539, 0.934966165863031, 
0.0472399882616577, 0.0198027126658891, 0.0537032844435588, 0.564211104629996, 
0.927550968549496, 0.797624950816491, 0.0290697007131178, 0.595912681963104, 
0.174701858916678, 0.882333378501306, 0.857440598542643, 0.937145001009176, 
0.159643935623585, 0.0516599385847632, 0.0440610541422886, 0.986742471430344, 
0.0164534273018356, 0.905466185196924, 0.831233179669209, 0.945308723924202, 
0.889966942114764, 0.354918054240825, 0.013300356676493, 0.830128502759263, 
0.823700653779667, 0.10271041258008, 0.0287034526533831, 0.0206566535596095, 
0.600278705481019, 0.875119985046439, 0.0371692028405492, 0.0222508063515825, 
0.93666315025643, 0.928345505993255, 0.901317044941454, 0.765949109446722, 
0.0581920996836425, 0.0430643414149486, 0.90591121556885, 0.951186809441601, 
0.980658657952396, 0.0808689550165884, 0.572734025228151, 0.0463712698649506, 
0.192938671458161, 0.905133179842298, 0.154186184934303, 0.585848485208317, 
0.898651062830721, 0.936272438882973, 0.448635246131194, 0.283554776533025, 
0.0309419633482652, 0.861391852247259, 0.0658397100529213, 0.0675173265786392, 
0.96281794820265, 0.0313479382790672, 0.0866228017859603, 0.929772217122431, 
0.200029728957143, 0.706267849864433, 0.94823325183122, 0.0543243613691732, 
0.809705102714619, 0.910219965210065, 0.953735039953166, 0.868080342290672, 
0.332725938100749, 0.84324363592612, 0.198505346878334, 0.992801413007608, 
0.0503582852070818, 0.475444599242399, 0.988297216865074, 0.926321491575251, 
0.0243299898333789, 0.10772567979535, 0.892537448190976, 0.98896599725299, 
0.305816605549349, 0.696841353119351, 0.807770532814146, 0.115817690804427, 
0.0130874570078787, 0.837153421174282, 0.917049247300387, 0.0122380520755151, 
0.912364270697772, 0.951585664581661), TCGA.Y8.A8S0.01A = c(0.264547845506278, 
0.155993443463906, 0.90708922263186, 0.756216481105085, 0.740439258566013, 
0.791640201668772, 0.455406433078148, 0.0140503539973426, 0.898152971615672, 
0.942017289363471, 0.049036339456109, 0.0165762503059443, 0.0593335909473265, 
0.459771444740498, 0.929336066827294, 0.948532354182067, 0.0181370789479238, 
0.309340792232534, 0.444549689057808, 0.968706245954783, 0.911532818633905, 
0.922085999840623, 0.439367515136192, 0.0341088658899809, 0.259555790896829, 
0.987081295221313, 0.013467632667194, 0.935890204938304, 0.749182228512838, 
0.955266815776283, 0.854718619922343, 0.192270767250957, 0.0103294109383117, 
0.814778997430484, 0.884929086289906, 0.364141121626961, 0.0261130123662795, 
0.0201970054665062, 0.613121306641491, 0.867830249077504, 0.0313157213491265, 
0.0247935393251212, 0.911488850004792, 0.895214160236747, 0.52514961950261, 
0.88376413256428, 0.0384672039105036, 0.0294663841757698, 0.957910054231064, 
0.955637967662581, 0.980805007180895, 0.056939960969146, 0.737932777954196, 
0.0318060005100734, 0.0397987294622703, 0.914995576026559, 0.238482151213353, 
0.767237616032529, 0.939069872404913, 0.938081858296652, 0.440484138576205, 
0.114954872159159, 0.0244136454763111, 0.846540100826359, 0.200658220716674, 
0.0687237453669147, 0.974841732977847, 0.0278561439566069, 0.114556259287869, 
0.944131549328849, 0.493585389693977, 0.480768780057962, 0.932741163713172, 
0.216131694873378, 0.814313163672161, 0.928672085649515, 0.962443725743837, 
0.74077045094838, 0.133424847325993, 0.862916554456606, 0.145580720195214, 
0.992046334661351, 0.0436393256442202, 0.712662114242391, 0.990103320899525, 
0.880978917772909, 0.0225238271112234, 0.116230435240584, 0.841150918912896, 
0.987985943353706, 0.127354758970553, 0.898121601977617, 0.906792865660564, 
0.0737140541126039, 0.009384930132621, 0.793174978739833, 0.912006490158477, 
0.0139855016500958, 0.899386699555785, 0.954405724569427), TCGA.Y8.A8S1.01A = c(0.581694254362101, 
0.18542567310602, 0.867542022665212, 0.581696896530406, 0.78744743737146, 
0.389767299224826, 0.216960001070295, 0.0124187105565375, 0.856565918074359, 
0.936457155497046, 0.0442067173962602, 0.0140296565558842, 0.249846574394466, 
0.476357170192695, 0.919768286971888, 0.947576649507426, 0.0180436058702339, 
0.832021246626068, 0.417883168882454, 0.97668892894152, 0.858942374669228, 
0.908701674774902, 0.360605175695457, 0.0300050878088438, 0.0152722989237764, 
0.986911029436886, 0.017968868244694, 0.916403999667678, 0.665473802275997, 
0.894770076029211, 0.815937481683747, 0.505075619070648, 0.0100265844940471, 
0.807220745685876, 0.808764855317654, 0.316721084000246, 0.0221128261277136, 
0.0159144104679436, 0.841207551595894, 0.832097056965122, 0.0307025272327261, 
0.0185995888430839, 0.916475132262134, 0.866934314703349, 0.877454940064029, 
0.833363153320509, 0.0381805807655402, 0.0273586863112515, 0.941134508841013, 
0.912364696768614, 0.979496335094356, 0.0978730283287029, 0.525086161575951, 
0.0261062766734918, 0.0320956400558761, 0.853299258800261, 0.131541990462738, 
0.52940082480104, 0.85502275403225, 0.894518042164208, 0.535297530625847, 
0.4749856970718, 0.0169015303648868, 0.853076664003846, 0.147470852267928, 
0.0467328218099492, 0.978302229003696, 0.0283296096497759, 0.0728218303634117, 
0.87648102880048, 0.334033090095117, 0.680802308868236, 0.927680442837916, 
0.0707104696817582, 0.770195537274174, 0.868009087459515, 0.951475963819618, 
0.797689168093036, 0.683015763740223, 0.803432908458056, 0.191347383851541, 
0.991395963746777, 0.0604147893723364, 0.874483310648752, 0.98062171660084, 
0.83805259736689, 0.0247465538486677, 0.170932965706481, 0.842567160630419, 
0.983614792589478, 0.800795162965799, 0.860170927275085, 0.890545098059859, 
0.298079674760789, 0.0084611909392357, 0.73227051610062, 0.831245875309251, 
0.0115215601617515, 0.784943936721062, 0.835800768909104), GeneSymbol = c("RBL2", 
NA, "VDAC3", "ACTN1", "ATP2A1", "SFRP1", NA, "NIPA2", "MAN1B1", 
"LRRC16A", "CNBP", "DDX55", "FAM81A", "KCNQ1", NA, "NPHP4", "MRPS25", 
NA, NA, "MAEL", "PROX1", "ELOVL1", "LILRA6", "LOC283050", "NR5A2", 
"CDK10", NA, "TMEM182", NA, "DNAJA2", "ATOH7", "LRFN1", "MRPL12", 
"COL6A3", NA, "C8orf31", "RTTN", "CD2BP2", "SLMAP", NA, "NOV", 
"MXD4", "SND1", "MUSTN1", "TAS1R3", "ITGAD", "SMARCC2", "C1orf114", 
NA, "C1orf65", "DNAH17", "DAB1", NA, "SLBP", "CHCHD4", NA, "TNNT2", 
"CASZ1", "CASZ1", "C3orf16", NA, "WFIKKN2", "CCDC45", NA, "MEOX2", 
"CKLF", "TRANK1", "ZFP36", "SLC2A9", "MXRA7", "LOXL4", NA, "P2RX6P", 
"SFRS7", NA, "EHMT1", "AGPAT3", "CDSN", NA, NA, "AGA", "LDHD", 
"C14orf181", "LOC729176", "SH2B3", "MTMR7", "MT1F", "RSPO3", 
"ANKRD11", "TDRD6", "WWP2", "OR1F2P", "SERPINB12", "DOK7", "SRCAP", 
"AMDHD2", "DRG2", "TCTE3", "EFNB1", "FAM180B")), row.names = c("cg00000029", 
"cg00000165", "cg00000236", "cg00000289", "cg00000292", "cg00000321", 
"cg00000363", "cg00000622", "cg00000658", "cg00000721", "cg00000734", 
"cg00000769", "cg00000905", "cg00000924", "cg00000948", "cg00000957", 
"cg00001245", "cg00001249", "cg00001261", "cg00001349", "cg00001364", 
"cg00001446", "cg00001510", "cg00001582", "cg00001583", "cg00001687", 
"cg00001747", "cg00001791", "cg00001809", "cg00001854", "cg00001874", 
"cg00002033", "cg00002116", "cg00002145", "cg00002190", "cg00002224", 
"cg00002236", "cg00002406", "cg00002426", "cg00002449", "cg00002464", 
"cg00002490", "cg00002531", "cg00002591", "cg00002593", "cg00002597", 
"cg00002660", "cg00002719", "cg00002769", "cg00002808", "cg00002809", 
"cg00002810", "cg00002837", "cg00003091", "cg00003173", "cg00003181", 
"cg00003287", "cg00003345", "cg00003513", "cg00003529", "cg00003578", 
"cg00003625", "cg00003784", "cg00003969", "cg00003994", "cg00004055", 
"cg00004067", "cg00004072", "cg00004082", "cg00004089", "cg00004105", 
"cg00004121", "cg00004192", "cg00004207", "cg00004209", "cg00004429", 
"cg00004533", "cg00004562", "cg00004608", "cg00004773", "cg00004818", 
"cg00004883", "cg00004939", "cg00004963", "cg00004979", "cg00004996", 
"cg00005010", "cg00005040", "cg00005072", "cg00005083", "cg00005112", 
"cg00005166", "cg00005215", "cg00005297", "cg00005306", "cg00005390", 
"cg00005437", "cg00005543", "cg00005617", "cg00005619"), class = "data.frame")
anauzrmj

anauzrmj1#

由于GeneSymbol中有一些NA,因此不能将其用作行名称,除非同时删除NA。
这段代码识别并计算重复的和NA的行的平均值,然后将它们从 Dataframe 中删除,并将GeneSymbol指定为行名称

meth.kirp.duplicated.na <- data.frame(
  mean = apply(subset(meth.kirp.cpg, 
    duplicated(GeneSymbol) | is.na(GeneSymbol), 
    select = -GeneSymbol), MARGIN = 1, mean),
  is.na = with(meth.kirp.cpg, is.na(GeneSymbol)[
    duplicated(GeneSymbol) | is.na(GeneSymbol)]))

meth.kirp.cpg <- subset(meth.kirp.cpg, !duplicated(GeneSymbol) & !is.na(GeneSymbol))

rownames(meth.kirp.cpg) <-  meth.kirp.cpg$GeneSymbol
mklgxw1f

mklgxw1f2#

如果我没有理解错的话,您可以循环遍历这些行,并仅在GeneSymbol重复时计算平均值。

# create empty column for averages
meth.kirp.cpg$average = rep(NA, nrow(meth.kirp.cpg))

# fill column with row average for cases when `GeneSymbol` is a duplicate
for (r in 1:nrow(meth.kirp.cpg)) {
  if (sum(na.omit(meth.kirp.cpg$GeneSymbol[r] == meth.kirp.cpg$GeneSymbol)) > 1){
    meth.kirp.cpg$average[r] = mean(meth.kirp.cpg[r,1],
                      meth.kirp.cpg[r,2],
                      meth.kirp.cpg[r,3])
  }
}

相关问题