ggplot2::facet_grid 'big'数据错误的解决方法?

46scxncf  于 12个月前  发布在  其他
关注(0)|答案(1)|浏览(79)

更像是“中等数据”,因为仅仅16k行就足以复制它。

工作状态:

library(ggplot2)
outputdir = "/tmp"
mtdf = NULL
for(i in 1:100){
  mti = mtcars[,c("disp","hp","wt","gear","carb","mpg","qsec","vs","am")]
  mti$disp = mti$disp + 10*rnorm(length(mti$disp))
  mti$hp = mti$hp + 10*rnorm(length(mti$hp))
  mti$wt = mti$wt + 10*rnorm(length(mti$wt))
  mtdf = rbind(mtdf,mti)
}
p = ggplot(mtdf,aes(disp,hp,color=wt)) + 
      geom_point()+
      facet_nested(gear ~ am,scales="free")
ggsave(plot=p,file=file.path(outputdir,"facet_scatter.pdf"),width=10,height=10)

不起作用(各种替代方法):

下面是sessionInfo()的输出,显示了加载的包版本。这个MWE在Linux上运行。

library(ggplot2)
library(ggpubr)
library(ggh4x)

outputdir = "/tmp"

mtdf = NULL
for(i in 1:500){
  mti = mtcars[,c("disp","hp","wt","gear","carb","mpg","qsec","vs","am")]
  mti$disp = mti$disp + 10*rnorm(length(mti$disp))
  mti$hp = mti$hp + 10*rnorm(length(mti$hp))
  mti$wt = mti$wt + 10*rnorm(length(mti$wt))
  mtdf = rbind(mtdf,mti)
}
p = ggplot(mtdf,aes(disp,hp,color=wt)) + 
      geom_point()+
      facet_nested(gear ~ am,scales="free")
ggsave(plot=p,file=file.path(outputdir,"facet_scatter.png"),width=10,height=10)

p = ggplot(mtdf,aes(disp,hp,color=wt)) + 
      geom_point()+
      facet_grid(gear ~ am,scales="free")
ggsave(plot=p,file=file.path(outputdir,"facet_scatter.png"),width=10,height=10)

p = ggscatter(mtdf,x="disp",y="hp",color="wt")
p = facet(p,facet.by=c("gear","am"))
ggsave(plot=p,file=file.path(outputdir,"facet_scatter.png"),width=10,height=10)

三次尝试均会产生以下错误:

R version 4.1.1 (2021-08-10) -- "Kick Things"
Copyright (C) 2021 The R Foundation for Statistical Computing
Platform: x86_64-pc-linux-gnu (64-bit)

R is free software and comes with ABSOLUTELY NO WARRANTY.
You are welcome to redistribute it under certain conditions.
Type 'license()' or 'licence()' for distribution details.

  Natural language support but running in an English locale

R is a collaborative project with many contributors.
Type 'contributors()' for more information and
'citation()' on how to cite R or R packages in publications.

Type 'demo()' for some demos, 'help()' for on-line help, or
'help.start()' for an HTML browser interface to help.
Type 'q()' to quit R.

source("/mnt/md0/insshare/mkarikom/Active_Project_Backup/DURIAN/DURIAN/slurm/test.R", encoding = "UTF-8")
Error: `scale_id` must not be `NA`
Run `rlang::last_error()` to see where the error occurred.
> sessionInfo()
R version 4.1.1 (2021-08-10)
Platform: x86_64-pc-linux-gnu (64-bit)
Running under: Ubuntu 20.04.3 LTS

Matrix products: default
BLAS/LAPACK: /usr/lib/x86_64-linux-gnu/libmkl_rt.so

locale:
 [1] LC_CTYPE=en_US.UTF-8       LC_NUMERIC=C              
 [3] LC_TIME=en_US.UTF-8        LC_COLLATE=en_US.UTF-8    
 [5] LC_MONETARY=en_US.UTF-8    LC_MESSAGES=en_US.UTF-8   
 [7] LC_PAPER=en_US.UTF-8       LC_NAME=C                 
 [9] LC_ADDRESS=C               LC_TELEPHONE=C            
[11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C       

attached base packages:
[1] stats     graphics  grDevices utils     datasets  methods   base     

other attached packages:
[1] ggh4x_0.2.1   ggpubr_0.4.0  ggplot2_3.3.5

loaded via a namespace (and not attached):
 [1] pillar_1.6.4     compiler_4.1.1   tools_4.1.1      jsonlite_1.7.2  
 [5] lifecycle_1.0.1  tibble_3.1.6     gtable_0.3.0     pkgconfig_2.0.3 
 [9] rlang_0.4.12     DBI_1.1.2        withr_2.4.3      dplyr_1.0.7     
[13] generics_0.1.1   vctrs_0.3.8      grid_4.1.1       tidyselect_1.1.1
[17] glue_1.6.0       R6_2.5.1         rstatix_0.7.0    fansi_1.0.0     
[21] carData_3.0-5    purrr_0.3.4      tidyr_1.1.4      farver_2.1.0    
[25] car_3.0-12       magrittr_2.0.1   scales_1.1.1     backports_1.4.1 
[29] ellipsis_0.3.2   assertthat_0.2.1 abind_1.4-5      colorspace_2.0-2
[33] ggsignif_0.6.3   utf8_1.2.2       munsell_0.5.0    broom_0.7.11    
[37] crayon_1.4.2

rang::last_error()

如错误消息所示:

> rlang::last_error()
<error/rlang_error>
`scale_id` must not be `NA`
Backtrace:
  1. ggplot2::ggsave(...)
  3. ggplot2:::grid.draw.ggplot(plot)
  5. ggplot2:::print.ggplot(x)
  7. ggplot2:::ggplot_build.ggplot(x)
  8. layout$train_position(data, scale_x(), scale_y())
  9. ggplot2:::f(..., self = self)
 10. self$facet$train_scales(...)
 11. ggplot2:::f(...)
 12. ggplot2:::scale_apply(layer_data, x_vars, "train", SCALE_X, x_scales)
Run `rlang::last_trace()` to see the full context.

rang::last_trace()

slang::last_error()的输出所示

> rlang::last_trace()
<error/rlang_error>
`scale_id` must not be `NA`
Backtrace:
     █
  1. └─ggplot2::ggsave(...)
  2.   ├─grid::grid.draw(plot)
  3.   └─ggplot2:::grid.draw.ggplot(plot)
  4.     ├─base::print(x)
  5.     └─ggplot2:::print.ggplot(x)
  6.       ├─ggplot2::ggplot_build(x)
  7.       └─ggplot2:::ggplot_build.ggplot(x)
  8.         └─layout$train_position(data, scale_x(), scale_y())
  9.           └─ggplot2:::f(..., self = self)
 10.             └─self$facet$train_scales(...)
 11.               └─ggplot2:::f(...)
 12.                 └─ggplot2:::scale_apply(layer_data, x_vars, "train", SCALE_X, x_scales)

更新:

这可能是由于scales1.1.1包造成的

r7xajy2e

r7xajy2e1#

这个问题与libmkl有关.
我可以重现上面提到的失败,并通过从MKL切换到openblas来避免它(update-alternatives --config libblas.so.3-x86_64-linux-gnu)。
重现问题的简单代码

library(ggplot2)
    sessionInfo()
    f <- function(n) {
        df <- data.frame(v = c("V1","V2", "V1", "V2"),
                 l = seq(1, 4*n), t = seq(4*n, 1), 
                 p = c("P_1", "P2", "P3", "P_1"))

        ggplot(data = df, aes(l, t)) + 
            geom_point() + 
            facet_grid(rows = vars(p), cols = vars(v))
    }

    f(100)
    f(10000)

使用openblas运行良好,使用MKL时失败。

> library(ggplot2)
> sessionInfo()
R version 4.3.1 (2023-06-16)
Platform: x86_64-pc-linux-gnu (64-bit)
Running under: Debian GNU/Linux 12 (bookworm)

Matrix products: default
BLAS/LAPACK: /usr/lib/x86_64-linux-gnu/libmkl_rt.so;  LAPACK version 3.8.0

locale:
 [1] LC_CTYPE=en_US.UTF-8       LC_NUMERIC=C              
 [3] LC_TIME=en_US.UTF-8        LC_COLLATE=en_US.UTF-8    
 [5] LC_MONETARY=en_US.UTF-8    LC_MESSAGES=en_US.UTF-8   
 [7] LC_PAPER=en_US.UTF-8       LC_NAME=C                 
 [9] LC_ADDRESS=C               LC_TELEPHONE=C            
[11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C       

time zone: Europe/Berlin
tzcode source: system (glibc)

attached base packages:
[1] stats     graphics  grDevices utils     datasets  methods   base     

other attached packages:
[1] ggplot2_3.4.3

loaded via a namespace (and not attached):
 [1] utf8_1.2.3       R6_2.5.1         tidyselect_1.2.0 magrittr_2.0.3  
 [5] gtable_0.3.4     glue_1.6.2       tibble_3.2.1     pkgconfig_2.0.3 
 [9] generics_0.1.3   dplyr_1.1.3      lifecycle_1.0.3  cli_3.6.1       
[13] fansi_1.0.4      scales_1.2.1     grid_4.3.1       vctrs_0.6.3     
[17] withr_2.5.0      compiler_4.3.1   munsell_0.5.0    pillar_1.9.0    
[21] colorspace_2.1-0 rlang_1.1.1     
> 
> f <- function(n) 
+ {
+   df <- data.frame(v = c("V1","V2", "V1", "V2"),
+                  l = seq(1, 4*n),
+                  t = seq(4*n, 1), 
+                  p = c("P_1", "P2", "P3", "P_1"))
+   ggplot(data = df, aes(l, t)) + 
+     geom_point() + 
+     facet_grid(rows = vars(p), cols = vars(v))
+ }
> 
> f(100)
> f(10000)
Error in `scale_apply()`:
! `scale_id` must not contain any "NA"
Backtrace:
     ▆
  1. ├─base (local) `<fn>`(x)
  2. └─ggplot2:::print.ggplot(x)
  3.   ├─ggplot2::ggplot_build(x)
  4.   └─ggplot2:::ggplot_build.ggplot(x)
  5.     └─layout$train_position(data, scale_x(), scale_y())
  6.       └─ggplot2 (local) train_position(..., self = self)
  7.         └─self$facet$train_scales(...)
  8.           └─ggplot2 (local) train_scales(...)
  9.             └─ggplot2:::scale_apply(layer_data, x_vars, "train", SCALE_X, x_scales)
 10.               └─cli::cli_abort("{.arg scale_id} must not contain any {.val NA}")
 11.                 └─rlang::abort(...)
Execution halted

相关问题