此问题已在此处有答案:
Standardize data columns in R(16个回答)
5天前关闭。
我正在尝试将我的数据集在0到1之间进行归一化。每列应独立归一化。我想输出一个新的dataframe,它保留了第一列(未规范化)和所有原始列标题。
这是我的数据的一个子集:
SEC <- structure(list(ml = c(0, 0.03, 0.06, 0.09, 0.12, 0.15, 0.18,
0.21, 0.24, 0.27), A1_280 = c(0.542, 0.322, 0.286, 0.261, 0.19,
-0.258, -0.272, -0.046, -0.005, 0.138), A1_420 = c(-0.06, -0.303,
-0.192, -0.381, 0.15, -0.268, -0.576, -0.016, -0.541, -0.41),
A2_280 = c(9.877, 27.637, 3.513, -0.882, -1.92, -1.251, -2.284,
-2.129, -3.131, -2.913), A2_420 = c(-0.445, 13.337, 1.075,
-1.402, -2.156, -2.263, -1.988, -2.105, -2.082, -2.61), A3_280 = c(8.782,
59.775, 56.769, 22.842, 9.086, 3.466, 2.256, 1.341, 0.946,
0.754), A3_420 = c(0.54, 30.736, 29.073, 12.277, 4.413, 1.77,
1.123, 0.488, 0.634, -0.011), B1_280 = c(14.95, 61.441, 37.189,
10.928, 4.316, 2.292, 0.757, 0.995, 0.997, -0.07), B1_420 = c(2.455,
30.966, 18.61, 4.779, 1.511, 0.74, 0.267, 0.533, 0.149, -0.551
), B2_280 = c(-0.288, -0.304, -0.006, -0.158, -0.284, -0.131,
-0.443, -0.081, -0.387, -0.04), B2_420 = c(-0.074, -0.256,
0.022, 0.104, -0.287, -0.139, -0.015, 0.1, -0.021, -0.146
), B3_280 = c(0.084, 0.043, 0.061, 0.032, 0.038, 0.072, 0.03,
0.128, 0.077, 0.098), B3_420 = c(-0.056, 0.095, 0.05, -0.015,
-0.106, 0.106, -0.017, -0.001, 0.036, 0.139), AB1_280 = c(1.599,
1.908, 0.735, 0.49, 0.708, 0.109, 0.702, -0.487, -0.009,
-0.196), AB1_420 = c(0.199, 1.218, 0.469, 0.564, 0.498, -0.2,
-0.322, 0.294, 0.367, -0.281), AB2_280 = c(-1.46, -1.2, -1.977,
-2.736, -2.087, -2.144, -2.246, -2.84, -2.304, -3.106), AB2_420 = c(-1,
-0.468, -0.459, -0.345, -1.145, -0.924, -1.622, -0.869, -1.028,
-1.183), AB3_280 = c(0.306, 1.392, -2.248, -3.247, -3.715,
-2.699, -3.896, -2.744, -3.653, -3.387), AB3_420 = c(-0.899,
0.817, -1.41, -1.162, -1.258, -1.409, -1.7, -1.309, -1.946,
-1.658), AB4_280 = c(6.847, 55.721, 51.163, 21.166, 8.441,
3.105, 2.631, 1.265, -0.184, 0.529), AB4_420 = c(-0.861,
27.465, 25.185, 10.767, 4.136, 1.414, 0.545, -0.098, 0.242,
-0.509)), row.names = c(NA, -10L), spec = structure(list(
cols = list(ml = structure(list(), class = c("collector_double",
"collector")), A1_280 = structure(list(), class = c("collector_double",
"collector")), A1_420 = structure(list(), class = c("collector_double",
"collector")), A2_280 = structure(list(), class = c("collector_double",
"collector")), A2_420 = structure(list(), class = c("collector_double",
"collector")), A3_280 = structure(list(), class = c("collector_double",
"collector")), A3_420 = structure(list(), class = c("collector_double",
"collector")), B1_280 = structure(list(), class = c("collector_double",
"collector")), B1_420 = structure(list(), class = c("collector_double",
"collector")), B2_280 = structure(list(), class = c("collector_double",
"collector")), B2_420 = structure(list(), class = c("collector_double",
"collector")), B3_280 = structure(list(), class = c("collector_double",
"collector")), B3_420 = structure(list(), class = c("collector_double",
"collector")), AB1_280 = structure(list(), class = c("collector_double",
"collector")), AB1_420 = structure(list(), class = c("collector_double",
"collector")), AB2_280 = structure(list(), class = c("collector_double",
"collector")), AB2_420 = structure(list(), class = c("collector_double",
"collector")), AB3_280 = structure(list(), class = c("collector_double",
"collector")), AB3_420 = structure(list(), class = c("collector_double",
"collector")), AB4_280 = structure(list(), class = c("collector_double",
"collector")), AB4_420 = structure(list(), class = c("collector_double",
"collector"))), default = structure(list(), class = c("collector_guess",
"collector")), delim = ","), class = "col_spec"), problems = <pointer: 0x5606ec29c390>, class = c("spec_tbl_df",
"tbl_df", "tbl", "data.frame"))
字符串
以下是我到目前为止的代码:
normalize_0_to_1_columnwise <- function(SEC) {
normalized_SEC <- data.frame(ml = SEC$ml) # Copy the first column as it is
# Apply normalization for each column (excluding the first column 'ml')
for (col in names(SEC)[-1]) {
normalized_SEC[[col]] <- (SEC[[col]] - min(SEC[[col]])) / (max(SEC[[col]]) - min(SEC[[col]]))
}
# Preserve the original column headers
colnames(normalized_SEC)[-1] <- colnames(SEC)[-1]
}
# Output normalized dataframe
normalized_SEC
型
这用于保留第一列“ml”和所有列标题,但 Dataframe 中的所有“值”都是NA。我哪里做错了?
我知道还有其他类似的问题,但我不能让他们为我的数据和所需的输出工作。
3条答案
按热度按时间np8igboo1#
在tidyverse中,您可以:
字符串
i34xakig2#
函数返回最后一行。函数内部的最后一行是
colnames(SEC)[-1]
。在函数关闭}
之后有normalized_SEC
,但它需要作为函数定义中的最后一行才能正确返回。如果我做了这个修正,你的函数就可以很好地处理你的样本数据了。但是如果它仍然不能处理完整的数据,并且您正在获得
NA
输出,则输入中可能有NA
值,并且您需要将na.rm = TRUE
参数添加到min()
和max()
调用中。也就是说,我可以从您的
tbl_df
类中看到您正在使用一些tidyverse函数。一个不错的dplyr
方法是这样的:字符串
使用此示例数据(
dput()
来自问题,删除了pointer
和spec_*
内容):型
wn9m85ua3#
请原谅我将您的数据从tibble转换为data.frame,因为我在重新创建结构时遇到了问题。下面是基R,所以它也适用于tibles,但会返回data.frame。
首先,数据:
字符串
现在,为缩放创建一个(矢量化)函数,并将其
apply
到数据框列。如果传递正确的center
和scale
值,也可以使用scale
的基本版本,但它足够简单,我们可以滚动自己的:型
现在简单地说:
型