将3D气候数据导出到R中的netCDF文件

r7knjye2  于 2023-05-20  发布在  Etcd
关注(0)|答案(1)|浏览(205)

我在将网格化的每日气候数据导出到netCDF时遇到问题。我首先创建一些随机数据,将其导出为netCDF,然后重新打开它并绘制导出前后的时间序列。两行都应该匹配,但有一行始终是常量。在我编写netCDF文件的过程中一定有一个bug,但我就是找不到解决方案,尽管我已经分析了网上可用的大多数示例。

library(ncdf4)
library(lubridate)
library(reshape2)
library(dplyr)
library(ggplot2)

# create some example data
dates = seq(as.Date("1950-01-01"), length.out = 365, by="1 day")
lon = seq(-180.00, 180.00, by = 10)
lat = seq(0,90, by = 10)
tmp_df <- expand.grid(date = dates, lon = lon, lat = lat)
tmp_df$t2m <- rnorm(15, 15, n = nrow(tmp_df)) + 272.15   # some random Kelvins 

# export data as netCDF  
xvals <- unique(tmp_df$lon)
nx <- length(xvals)
  
yvals <- unique(tmp_df$lat) 
ny <- length(yvals)
  
lon1 <- ncdim_def("longitude", "degrees_east", xvals)
lat2 <- ncdim_def("latitude", "degrees_north", yvals)
  
time <-unique(tmp_df$date)
n_time <- length(time)
dimtime <- ncdim_def("time", "days since 1970-01-01 00:00:00", as.numeric(time),
                     unlim=T, calendar="gregorian")

# different sorting options, none of them works properly
vals <- dplyr::arrange(tmp_df, lon, lat, date) 
# vals <- dplyr::arrange(tmp_df, date, lon, lat) 
# vals <- dplyr::arrange(tmp_df, lat, lon, date)
vals <- vals$t2m
  
mv <- -999 #missing value to use
var_temp <- ncvar_def("t2m", "K", list(lon1, lat2, dimtime), longname = "test.nc", mv) 
ncnew <- nc_create("test.nc", list(var_temp))
ncvar_put(ncnew, var_temp, as.array(vals), start=c(1,1,1), count=c(nx,ny,n_time))
nc_close(ncnew)
  
###############################################################################

# save data from one random grid point for the comparison  
ts1 <- dplyr::filter(tmp_df, lon < 1, lon > -1, lat < 1)
ts1$source <- 'initial'
  
##############################################################################
# Open the nc file  
nc2 <- nc_open("test.nc")
  
# get longitude and latitude
lon <- ncvar_get(nc2,"longitude")
nlon <- dim(lon)
  
lat <- ncvar_get(nc2,"latitude")
nlat <- dim(lat)
  
time <- ncvar_get(nc2,"time")
tunits <- ncatt_get(nc2,"time","units")
nt <- dim(time)

tmp_array <- ncvar_get( nc2, "t2m", start= c(1, 1, 1), 
                          count = c(nlon, nlat, nt))
  
tmp_vec_long <- as.vector(tmp_array)
tmp_mat <- matrix(tmp_vec_long, nrow=nlon*nlat, ncol=nt)
lonlat <- as.matrix(expand.grid(lon,lat))
  
tmp_df02 <- data.frame(cbind(lonlat,tmp_mat))
names(tmp_df02)[1:2] <- c("lon","lat")
colnames(tmp_df02)[3:ncol(tmp_df02)] <- time
  
tmp_df02 <- reshape2::melt(tmp_df02, id.vars = c("lon", "lat"))

# convert back to Date
tmp_df02$date <- as.Date(as.POSIXct(time*24*60*60, origin = "1970-01-01"))
tmp_df02$variable <- NULL

nc_close(nc2)
  
##############################################################################
# extract the same point data as before
ts2 <- dplyr::filter(tmp_df02, lon < 1, lon > -1, lat < 1)
ts2$source <- 'after'

# rbind now and before  
ts <- rbind(ts1,rename(ts2, "t2m" = "value"))

# plot initial and after values: both lines should overlap
ggplot(ts, aes(x = date, y = t2m, col = source)) + geom_line()

我做错了什么?

cpjpxq1n

cpjpxq1n1#

我发现了问题:有一个与经度、纬度和时间变量的安排有关的问题。工作脚本张贴在下面。

library(ncdf4)
library(lubridate)
library(reshape2)
library(dplyr)
library(ggplot2)

# create some example data
dates = seq(as.Date("1950-01-01"), length.out = 365, by="1 day")
lon = seq(-180.00, 180.00, by = 10)
lat = seq(0,90, by = 10)
tmp_df <- expand.grid(date = dates, lon = lon, lat = lat)
tmp_df$t2m <- rnorm(15, 15, n = nrow(tmp_df)) + 272.15   # some random Kelvins 

# export data as netCDF  
xvals <- unique(tmp_df$lon)
nx <- length(xvals)
  
yvals <- unique(tmp_df$lat) 
ny <- length(yvals)
  
lon1 <- ncdim_def("longitude", "degrees_east", xvals)
lat2 <- ncdim_def("latitude", "degrees_north", yvals)
  
time <-unique(tmp_df$date)
n_time <- length(time)
dimtime <- ncdim_def("time", "days since 1970-01-01 00:00:00", as.numeric(time),
                     unlim=T, calendar="gregorian")

vals <- dplyr::arrange(tmp_df, date, lat, lon) 
vals <- vals$t2m
mat1 <- array(vals, c(nx, ny, n_time));  

mv <- -999 #missing value to use
var_temp <- ncvar_def("t2m", "K", list(lon1, lat2, dimtime), longname = "test.nc", mv) 
ncnew <- nc_create("test.nc", list(var_temp))
ncvar_put(ncnew, var_temp, mat1, start=c(1,1,1), count=c(nx,ny,n_time))
nc_close(ncnew)
  
###############################################################################

# save data from one random grid point for the comparison  
ts1 <- dplyr::filter(tmp_df, lon < 1, lon > -1, lat < 1)
ts1$source <- 'initial'
  
##############################################################################
# Open the nc file  
nc2 <- nc_open("test.nc")
  
# get longitude and latitude
lon <- ncvar_get(nc2,"longitude")
nlon <- dim(lon)
  
lat <- ncvar_get(nc2,"latitude")
nlat <- dim(lat)
  
time <- ncvar_get(nc2,"time")
tunits <- ncatt_get(nc2,"time","units")
nt <- dim(time)

tmp_array <- ncvar_get( nc2, "t2m", start= c(1, 1, 1), 
                          count = c(nlon, nlat, nt))
  
tmp_vec_long <- as.vector(tmp_array)

tmp_df02 <- melt(mat1)

mat1 <- array(tmp_vec_long, c(nlon, nlat, nt));  
tmp_mat <- matrix(tmp_vec_long, nrow=nlon*nlat, ncol=nt)
lonlat <- as.matrix(expand.grid(lon,lat))
tmp_df02 <- data.frame(cbind(lonlat,tmp_mat))
names(tmp_df02)[1:2] <- c("lon","lat")
colnames(tmp_df02)[3:ncol(tmp_df02)] <- time
tmp_df02 <- reshape2::melt(tmp_df02, id.vars = c("lon", "lat"))
colnames(tmp_df02)[1:3] <- c("lon", "lat", "date")

# convert back to Date
tmp_df02$date <- as.Date(as.POSIXct(as.numeric(as.character(tmp_df02$date))*24*60*60, origin = "1970-01-01"))

# tmp_df02$variable <- NULL
nc_close(nc2)
  
##############################################################################
# extract the same point data as before
ts2 <- dplyr::filter(tmp_df02, lon < 1, lon > -1, lat < 1)
ts2$source <- 'after'

# rbind now and before  
ts <- rbind(ts1,rename(ts2, "t2m" = "value"))

# plot initial and after values: both lines should overlap
ggplot(ts, aes(x = date, y = t2m, col = source)) + geom_line()

# another test
dcast(ts, formula = date ~ source, value.var = "t2m")

相关问题