将3个连续公式的函数应用于具有相同变量的稳健 Dataframe

vi4fp9gy  于 2024-01-03  发布在  其他
关注(0)|答案(2)|浏览(74)

为了重现这个问题,我使用了以下 Dataframe :

library(tidyverse)
library(lubridate)

#Step 1. Load data frame and libraries
df <- data.frame(
  stringsAsFactors = FALSE,
  check.names = FALSE,
  Date = c("01/11/1876","01/12/1876",
           "01/01/1877","01/02/1877","01/03/1877",
           "01/04/1877","01/05/1877","01/06/1877",
           "01/07/1877","01/08/1877","01/09/1877",
           "01/10/1877","01/11/1877","01/12/1877",
           "01/01/1878"),
  `Att-Bissen P [mm]` = c(48.5,111.2,29.7,139.4,90.1,25.9,
                          216,94.6,40.5,NA,64.4,68.8,44.7,
                          34.8,71.9),
  `Att-Bissen PET [mm]` = c(88.4,88.3,80.5,53.4,36.7,20.2,
                            21.6,21.7,21.3,37.6,46.1,66.5,89.8,
                            121.5,87.7),
  `Att-Bissen Q [mm]` = c(13.5,12.6,11.3,12.9,44.6,21.3,
                          194.9,NA,49.1,46.7,63.6,25.4,19.8,
                          15.3,16),
  `Rau. Merl P [mm]` = c(43.7,104.2,25.5,131.3,83.7,21.9,
                         205.2,88.1,35.9,61,59,63.2,40,
                         30.4,66.2),
  `Rau. Merl PET [mm]` = c(91.4,91.3,83.2,54.9,37.5,20.3,
                           21.8,21.8,21.4,38.4,47.3,68.6,NA,
                           125.9,90.7),
  `Rau. Merl Q [mm]` = c(8.7,10.6,8.4,14.3,23.7,14.1,
                         131.6,106.7,40.1,42.4,50.3,24.6,16.7,
                         11.3,13.7),
  `Syre Felsmuhle/Mertert P [mm]` = c(37.8,89.5,22.3,112.7,72,19.2,
                                      175.8,75.8,31.2,52.6,50.9,54.5,34.7,
                                      26.5,57.1),
  `Syre Felsmuhle/Mertert PET [mm]` = c(95.6,95.6,86.9,57.2,38.8,20.7,
                                        22.3,22.3,21.9,39.8,49.2,71.6,97.2,
                                        132,94.9),
  `Syre Felsmuhle/Mertert Q [mm]` = c(16,22,17.9,24,23.1,11.4,91,NA,
                                      NA,45.2,65.6,NA,NA,NA,NA),
  `Wiltz-Winseler P [mm]` = c(50.1,106.9,33,132.4,87.7,29.7,
                              201.8,91.8,42.8,66.4,64.5,68.5,46.7,
                              37.7,71.3),
  `Wiltz-Winseler PET [mm]` = c(87.4,87.3,79.5,52.5,35.8,19.4,
                                20.8,20.8,20.4,36.7,NA,NA,88.8,
                                120.4,86.7),
  `Wiltz-Winseler Q [mm]` = c(7.2,6.3,5,8.6,33.9,32.2,234.2,
                              148.1,68.5,51.5,101.4,25.7,18.7,
                              14.3,12.1))

字符串
数据框由四个部位组成,每个部位有三个参数:P、PET和Q。在步骤2中,我创建了一个函数,其中包含三个公式,我需要将它们应用于每个部位。请记住,这些公式应用于每个时间步。

# Step 2: Create Anomalies
# Calculate anomalies for P, PET, and Q
formula_1 <- function(P, PET, Q) {
  Anomaly_P = P - mean(P, na.rm = TRUE)
  Anomaly_PET = PET - mean(PET, na.rm = TRUE)
  Anomaly_Q = Q - mean(Q, na.rm = TRUE)
  return(list(Anomaly_P = Anomaly_P, Anomaly_PET = Anomaly_PET, Anomaly_Q = Anomaly_Q))
}


第三步为每个站点的名称设置子集

#Step 3: Extract the site names from the column names
site_names <- sub(" P \\[mm\\]| PET \\[mm\\]| Q \\[mm\\]", "", names(df)[-1]) |>
  unique()
site_names
#Step 4: Loop through each site and calculate the formula

results <- list()
for (site in site_names) {
  site_data <- df[, grepl(site, names(df))]
  results[[site]] <- formula_1(site_data[[paste0(site, " P [mm]")]], 
                                  site_data[[paste0(site, " PET [mm]")]], 
                                  site_data[[paste0(site, " Q [mm]")]])
}
#Step 5: unlist results
results_sum <- data.frame(Site = names(results), unlist(results))

我不知道我在哪里犯了错误。这段代码的结果是一个只有2列和180个条目的数据框架。我想得到的是一个数据框架,其中每个站点增加了三列,包含PPETQ的异常(每个时间步)。
任何帮助都将非常感激。

  • 编辑 * 以下是我想结束的:一个 Dataframe ,其中P,PET和Q(每个时间步)的异常被添加到每个站点之后。(棕色/红色列是异常计算的结果= x-均值(xn)x1c 0d1x
kjthegm6

kjthegm61#

使用代码:

data.frame(scale(df[-1], center = TRUE, scale = FALSE))

字符串
将上面代码的结果与步骤4中的列表结果进行比较。
在第4步的代码中,执行data.frame(lapply(results, data.frame))这将给予您想要的内容。但您所做的一切都不是必需的,因为您可以使用上面所示的scale函数。
现在,您可以通过以下方式实现您的目标:

site_names <- sub(" (P|PET|Q) \\[mm\\]", "", names(df)[-1])
site_names_uq <- unique(site_names)
site_vars <- sub(".* (P|PET|Q) \\[mm\\]", "\\1", names(df)[-1])

data.frame(scale(df[-1], TRUE, FALSE))|>
    setNames(site_vars)|>
    split.default(site_names)|>
    structure(dim = length(site_names_uq), 
                        dimnames = list(Site = site_names_uq))|>
    array2DF() |>
    data.frame(row.names = NULL)

                    Site          P        PET          Q
1              Att-Bissen -28.678571  29.646667 -25.571429
2              Att-Bissen  34.021429  29.546667 -26.471429
3              Att-Bissen -47.478571  21.746667 -27.771429
4              Att-Bissen  62.221429  -5.353333 -26.171429
5              Att-Bissen  12.921429 -22.053333   5.528571
6              Att-Bissen -51.278571 -38.553333 -17.771429
7              Att-Bissen 138.821429 -37.153333 155.828571
8              Att-Bissen  17.421429 -37.053333         NA
9              Att-Bissen -36.678571 -37.453333  10.028571
10             Att-Bissen         NA -21.153333   7.628571
11             Att-Bissen -12.778571 -12.653333  24.528571
12             Att-Bissen  -8.378571   7.746667 -13.671429


在tidyverse,在这种情况下,你想枢轴更长的结果,做:

library(tidyverse)
df %>%
    pivot_longer(-Date, names_to = c('Site', '.value'), 
                             names_pattern ='(\\S+) (\\S+) \\S+$') %>%
    mutate(across(c(P,PET,Q),~.x - mean(.x, na.rm = TRUE),
         names = 'Anomaly_{col}'), .by = Site, .keep = 'unused')

 A tibble: 60 × 5
   Date       Site              Anomaly_P Anomaly_PET Anomaly_Q
   <chr>      <chr>                 <dbl>       <dbl>     <dbl>
 1 01/11/1876 Att-Bissen            -28.7        29.6     -25.6
 2 01/11/1876 Merl                  -26.9        33.2     -25.8
 3 01/11/1876 Felsmuhle/Mertert     -23.0        32.5     -19.1
 4 01/11/1876 Wiltz-Winseler        -25.3        29.2     -44.0
 5 01/12/1876 Att-Bissen             34.0        29.5     -26.5
 6 01/12/1876 Merl                   33.6        33.1     -23.9
 7 01/12/1876 Felsmuhle/Mertert      28.7        32.5     -13.1
 8 01/12/1876 Wiltz-Winseler         31.5        29.1     -44.9
 9 01/01/1877 Att-Bissen            -47.5        21.7     -27.8
10 01/01/1877 Merl                  -45.1        25.0     -26.1

hujrc8aj

hujrc8aj2#

# In step 2: Change the formula to get back a data frame:
formula_1 <- function(P, PET, Q) {
  Anomaly_P = P - mean(P, na.rm = TRUE)
  Anomaly_PET = PET - mean(PET, na.rm = TRUE)
  Anomaly_Q = Q - mean(Q, na.rm = TRUE)
  return(data.frame(Anomaly_P = Anomaly_P, Anomaly_PET = Anomaly_PET, Anomaly_Q = Anomaly_Q))
}

# Step 3
site_names <- sub(" P \\[mm\\]| PET \\[mm\\]| Q \\[mm\\]", "", names(df)[-1]) |>
  unique()
site_names

# In step 4 store results in your data frame
results <- list()
for (site in site_names) {
  site_data <- df[, grepl(site, names(df))]
  anomalies <- formula_1(site_data[[paste0(site, " P [mm]")]], 
                         site_data[[paste0(site, " PET [mm]")]], 
                         site_data[[paste0(site, " Q [mm]")]])
  anomalies$Date = df$Date # Add the Date column to each site's anomalies
  anomalies$Site = site # Add the Site column to each site's anomalies
  results[[site]] <- anomalies
}

# Step 5: combine all results
do.call(rbind, results)

个字符

相关问题