我在将网格化的每日气候数据导出到netCDF时遇到问题。我首先创建一些随机数据,将其导出为netCDF,然后重新打开它并绘制导出前后的时间序列。两行都应该匹配,但有一行始终是常量。在我编写netCDF文件的过程中一定有一个bug,但我就是找不到解决方案,尽管我已经分析了网上可用的大多数示例。
library(ncdf4)
library(lubridate)
library(reshape2)
library(dplyr)
library(ggplot2)
# create some example data
dates = seq(as.Date("1950-01-01"), length.out = 365, by="1 day")
lon = seq(-180.00, 180.00, by = 10)
lat = seq(0,90, by = 10)
tmp_df <- expand.grid(date = dates, lon = lon, lat = lat)
tmp_df$t2m <- rnorm(15, 15, n = nrow(tmp_df)) + 272.15 # some random Kelvins
# export data as netCDF
xvals <- unique(tmp_df$lon)
nx <- length(xvals)
yvals <- unique(tmp_df$lat)
ny <- length(yvals)
lon1 <- ncdim_def("longitude", "degrees_east", xvals)
lat2 <- ncdim_def("latitude", "degrees_north", yvals)
time <-unique(tmp_df$date)
n_time <- length(time)
dimtime <- ncdim_def("time", "days since 1970-01-01 00:00:00", as.numeric(time),
unlim=T, calendar="gregorian")
# different sorting options, none of them works properly
vals <- dplyr::arrange(tmp_df, lon, lat, date)
# vals <- dplyr::arrange(tmp_df, date, lon, lat)
# vals <- dplyr::arrange(tmp_df, lat, lon, date)
vals <- vals$t2m
mv <- -999 #missing value to use
var_temp <- ncvar_def("t2m", "K", list(lon1, lat2, dimtime), longname = "test.nc", mv)
ncnew <- nc_create("test.nc", list(var_temp))
ncvar_put(ncnew, var_temp, as.array(vals), start=c(1,1,1), count=c(nx,ny,n_time))
nc_close(ncnew)
###############################################################################
# save data from one random grid point for the comparison
ts1 <- dplyr::filter(tmp_df, lon < 1, lon > -1, lat < 1)
ts1$source <- 'initial'
##############################################################################
# Open the nc file
nc2 <- nc_open("test.nc")
# get longitude and latitude
lon <- ncvar_get(nc2,"longitude")
nlon <- dim(lon)
lat <- ncvar_get(nc2,"latitude")
nlat <- dim(lat)
time <- ncvar_get(nc2,"time")
tunits <- ncatt_get(nc2,"time","units")
nt <- dim(time)
tmp_array <- ncvar_get( nc2, "t2m", start= c(1, 1, 1),
count = c(nlon, nlat, nt))
tmp_vec_long <- as.vector(tmp_array)
tmp_mat <- matrix(tmp_vec_long, nrow=nlon*nlat, ncol=nt)
lonlat <- as.matrix(expand.grid(lon,lat))
tmp_df02 <- data.frame(cbind(lonlat,tmp_mat))
names(tmp_df02)[1:2] <- c("lon","lat")
colnames(tmp_df02)[3:ncol(tmp_df02)] <- time
tmp_df02 <- reshape2::melt(tmp_df02, id.vars = c("lon", "lat"))
# convert back to Date
tmp_df02$date <- as.Date(as.POSIXct(time*24*60*60, origin = "1970-01-01"))
tmp_df02$variable <- NULL
nc_close(nc2)
##############################################################################
# extract the same point data as before
ts2 <- dplyr::filter(tmp_df02, lon < 1, lon > -1, lat < 1)
ts2$source <- 'after'
# rbind now and before
ts <- rbind(ts1,rename(ts2, "t2m" = "value"))
# plot initial and after values: both lines should overlap
ggplot(ts, aes(x = date, y = t2m, col = source)) + geom_line()
我做错了什么?
1条答案
按热度按时间cpjpxq1n1#
我发现了问题:有一个与经度、纬度和时间变量的安排有关的问题。工作脚本张贴在下面。