pandas 用xarray和cfgrib提取Grib2数据很慢,如何改进代码?

carvr3hs  于 2023-02-02  发布在  其他
关注(0)|答案(1)|浏览(611)

这个代码需要大约20分钟来加载每个变量一个月的时间,每天00和12 UTC的周期有168个时间步长。当谈到保存到csv时,代码需要更长的时间,它已经运行了几乎一天,仍然没有保存到任何站点。我如何改进下面的代码?

#!/usr/bin/env python
# coding: utf-8

# In[3]:

#!/usr/bin/env python
# coding: utf-8

import os, sys
import xarray as xr
import pandas as pd
import numpy as np

from datetime import datetime, timedelta
import datetime
import warnings
warnings.filterwarnings('ignore')

import glob

from eccodes import *

from dask.distributed import Client

def run():
    client = Client()

path_list = '/scratch/projetos/chp6/gfs/brasil/202012*/gfs*.grib2'

# In[8]:

start_time = datetime.datetime.now()

#Extrai as variáveis de superfície

surface_var = xr.open_mfdataset(path_list,
                     concat_dim='valid_time',
                     combine="nested", 
                     decode_times=False,
                     parallel=True,
                     chunks={'time':'500mb'},
                     engine='cfgrib',
                     decode_cf=False,           
                     backend_kwargs={'filter_by_keys': {'shortName': 'hpbl', 'typeOfLevel': 'surface'},'indexpath':''})

print('surface_var carregado')
end_time = datetime.datetime.now()
print('Duration: {}'.format(end_time - start_time))

start_time = datetime.datetime.now()

#Extrai a componente zonal do vento em 10 m
u_10 = xr.open_mfdataset(path_list, concat_dim='valid_time', decode_times=False, combine='nested', parallel=True, chunks={'time':'500mb'}, decode_cf=False, engine='cfgrib',
                                   backend_kwargs={ 'filter_by_keys':{ 'cfVarName': 'u10',  'typeOfLevel':'heightAboveGround'},'indexpath':''})

print('u_10 carregado')
end_time = datetime.datetime.now()
print('Duration: {}'.format(end_time - start_time))

start_time = datetime.datetime.now()

#Extrai a componente meridional do vento em 10 m
v_10 = xr.open_mfdataset(path_list, concat_dim='valid_time', decode_times=False, combine='nested', parallel=True, chunks={'time':'500mb'}, decode_cf=False, engine='cfgrib',
                                   backend_kwargs={ 'filter_by_keys':{ 'cfVarName': 'v10',  'typeOfLevel':'heightAboveGround'},'indexpath':''})

print('v_10 carregado')
end_time = datetime.datetime.now()
print('Duration: {}'.format(end_time - start_time))

start_time = datetime.datetime.now()

#Extrai a componente zonal do vento em 100 m
u_100 = xr.open_mfdataset(path_list, concat_dim='valid_time', decode_times=False, combine='nested', parallel=True, chunks={'time':1}, decode_cf=False, engine='cfgrib',
                                   backend_kwargs={ 'filter_by_keys':{ 'cfVarName': 'u100',  'typeOfLevel':'heightAboveGround'},'indexpath':''})

print('u_100 carregado')
end_time = datetime.datetime.now()
print('Duration: {}'.format(end_time - start_time))

# In[ ]:

start_time = datetime.datetime.now()

#Extrai a componente zonal do meridional do vento em 100 m
v_100 = xr.open_mfdataset(path_list, concat_dim='valid_time', decode_times=False, combine='nested', parallel=True, chunks={'time':'500mb'}, decode_cf=False, engine='cfgrib',
                                   backend_kwargs={ 'filter_by_keys':{ 'cfVarName': 'v100',  'typeOfLevel':'heightAboveGround'},'indexpath':''})

print('v_100 carregado')
end_time = datetime.datetime.now()
print('Duration: {}'.format(end_time - start_time))

# In[ ]:

start_time = datetime.datetime.now()

#Extrai a componente zonal do vento em 20, 30, 40, 50 e 80 m
u_ground = xr.open_mfdataset(path_list, concat_dim='valid_time', decode_times=False, combine='nested', parallel=True, chunks={'time':'500mb'}, decode_cf=False, engine='cfgrib',
                                   backend_kwargs={ 'filter_by_keys':{ 'cfVarName': 'u',  'typeOfLevel':'heightAboveGround'},'indexpath':''})

print('u_ground carregado')
end_time = datetime.datetime.now()
print('Duration: {}'.format(end_time - start_time))

# In[ ]:

start_time = datetime.datetime.now()

#Extrai a componente meridional do vento em 20, 30, 40, 50 e 80 m
v_ground = xr.open_mfdataset(path_list, concat_dim='valid_time', decode_times=False, combine='nested', parallel=True, chunks={'time':'500mb'}, decode_cf=False, engine='cfgrib',
                                   backend_kwargs={ 'filter_by_keys':{ 'cfVarName': 'v',  'typeOfLevel':'heightAboveGround'},'indexpath':''})

print('v_ground carregado')
end_time = datetime.datetime.now()
print('Duration: {}'.format(end_time - start_time))

# In[ ]:

start_time = datetime.datetime.now()

# Extrai a temperatura em 2 m
temp_2m = xr.open_mfdataset(path_list, concat_dim='valid_time', decode_times=False, combine='nested', parallel=True, chunks={'time':1}, decode_cf=False, engine='cfgrib',
                                   backend_kwargs={ 'filter_by_keys':{'cfVarName': 't2m', 'typeOfLevel':'heightAboveGround'},'indexpath':''})

print('temp_2m carregado')
end_time = datetime.datetime.now()
print('Duration: {}'.format(end_time - start_time))

# In[ ]:

start_time = datetime.datetime.now()

# Extrai a temperatura em 80 e 100 m
temp_ground = xr.open_mfdataset(path_list, concat_dim='valid_time', decode_times=False, combine='nested', parallel=True, chunks={'time':1}, decode_cf=False, engine='cfgrib',
                                   backend_kwargs={ 'filter_by_keys':{'cfVarName': 't', 'typeOfLevel':'heightAboveGround'},'indexpath':''})

print('temp_ground carregado')
end_time = datetime.datetime.now()
print('Duration: {}'.format(end_time - start_time))

# In[ ]:

start_time = datetime.datetime.now()

# Extrai a umidade relativa em 2m
rh_2m  = xr.open_mfdataset(path_list, concat_dim='valid_time', decode_times=False, combine='nested', parallel=True, chunks={'time':1}, decode_cf=False, engine='cfgrib',
                                   backend_kwargs={ 'filter_by_keys':{'shortName': '2r', 'typeOfLevel':'heightAboveGround'},'indexpath':''})

print('rh_2m carregado')
end_time = datetime.datetime.now()
print('Duration: {}'.format(end_time - start_time))

# In[ ]:

start_time = datetime.datetime.now()

# Extrai as variáveis u, v, temperatura e umidade relativa em níveis de pressão 1000, 975, 950, 925, 850 e 800 hPa
t_hPa = xr.open_mfdataset(path_list, concat_dim='valid_time', decode_times=False, combine='nested', parallel=True, chunks={'time':'500mb'}, decode_cf=False, engine='cfgrib',
                                   backend_kwargs={ 'filter_by_keys':{'cfVarName': 't', 'typeOfLevel':'isobaricInhPa'},'indexpath':''})

print('t_hPa carregado')
end_time = datetime.datetime.now()
print('Duration: {}'.format(end_time - start_time))

# In[ ]:

start_time = datetime.datetime.now()

r_hPa = xr.open_mfdataset(path_list, concat_dim='valid_time', decode_times=False, combine='nested', parallel=True, chunks={'time':'500mb'}, decode_cf=False, engine='cfgrib',
                                   backend_kwargs={ 'filter_by_keys':{'cfVarName': 'r', 'typeOfLevel':'isobaricInhPa'},'indexpath':''})

print('r_hPa carregado')
end_time = datetime.datetime.now()
print('Duration: {}'.format(end_time - start_time))

# In[ ]:

start_time = datetime.datetime.now()

# Extrai as variáveis u, v, temperatura e umidade relativa em níveis de pressão 1000, 975, 950, 925, 850 e 800 hPa
u_hPa = xr.open_mfdataset(path_list, concat_dim='valid_time', decode_times=False, combine='nested', parallel=True, chunks={'time':'500mb'}, decode_cf=False, engine='cfgrib',
                                   backend_kwargs={ 'filter_by_keys':{'cfVarName': 'u', 'typeOfLevel':'isobaricInhPa'},'indexpath':''})

print('u_hPa carregado')
end_time = datetime.datetime.now()
print('Duration: {}'.format(end_time - start_time))

# In[ ]:

start_time = datetime.datetime.now()

# Extrai as variáveis u, v, temperatura e umidade relativa em níveis de pressão 1000, 975, 950, 925, 850 e 800 hPa
v_hPa = xr.open_mfdataset(path_list, concat_dim='valid_time', decode_times=False, combine='nested', parallel=True, chunks={'time':'500mb'}, decode_cf=False, engine='cfgrib',
                                   backend_kwargs={ 'filter_by_keys':{'cfVarName': 'v', 'typeOfLevel':'isobaricInhPa'},'indexpath':''})

print('v_hPa carregado')
end_time = datetime.datetime.now()
print('Duration: {}'.format(end_time - start_time))

# In[ ]:

# Convertendo a longitude para variar de -180 a 180

surface_var.coords['longitude'] = ((surface_var.coords['longitude']+180) % 360) - 180

surface_var = surface_var.sortby(surface_var.longitude)

# Convertendo a longitude para variar de -180 a 180

rh_2m.coords['longitude'] = ((rh_2m.coords['longitude']+180) % 360) - 180

rh_2m = rh_2m.sortby(rh_2m.longitude)

# Convertendo a longitude para variar de -180 a 180

temp_2m.coords['longitude'] = ((temp_2m.coords['longitude']+180) % 360) - 180

temp_2m = temp_2m.sortby(temp_2m.longitude)

# Convertendo a longitude para variar de -180 a 180

temp_ground.coords['longitude'] = ((temp_ground.coords['longitude']+180) % 360) - 180

temp_ground = temp_ground.sortby(temp_ground.longitude)

# Convertendo a longitude para variar de -180 a 180

v_hPa.coords['longitude'] = ((v_hPa.coords['longitude']+180) % 360) - 180

v_hPa = v_hPa.sortby(v_hPa.longitude)

# Convertendo a longitude para variar de -180 a 180

u_hPa.coords['longitude'] = ((u_hPa.coords['longitude']+180) % 360) - 180

u_hPa = u_hPa.sortby(u_hPa.longitude)

# Convertendo a longitude para variar de -180 a 180

r_hPa.coords['longitude'] = ((r_hPa.coords['longitude']+180) % 360) - 180

r_hPa = r_hPa.sortby(r_hPa.longitude)

# Convertendo a longitude para variar de -180 a 180

t_hPa.coords['longitude'] = ((t_hPa.coords['longitude']+180) % 360) - 180

t_hPa = t_hPa.sortby(t_hPa.longitude)

# Convertendo a longitude para variar de -180 a 180

u_10.coords['longitude'] = ((u_10.coords['longitude']+180) % 360) - 180

u_10 = u_10.sortby(u_10.longitude)

# Convertendo a longitude para variar de -180 a 180

v_10.coords['longitude'] = ((v_10.coords['longitude']+180) % 360) - 180

v_10 = v_10.sortby(v_10.longitude)

# Convertendo a longitude para variar de -180 a 180

u_100.coords['longitude'] = ((u_100.coords['longitude']+180) % 360) - 180

u_100 = u_100.sortby(u_100.longitude)

# Convertendo a longitude para variar de -180 a 180

v_100.coords['longitude'] = ((v_100.coords['longitude']+180) % 360) - 180

v_100 = v_100.sortby(v_100.longitude)

# Convertendo a longitude para variar de -180 a 180

u_ground.coords['longitude'] = ((u_ground.coords['longitude']+180) % 360) - 180

u_ground = u_ground.sortby(u_ground.longitude)

# Convertendo a longitude para variar de -180 a 180

v_ground.coords['longitude'] = ((v_ground.coords['longitude']+180) % 360) - 180

v_ground = v_ground.sortby(v_ground.longitude)

# In[ ]:

# Altura da CLP em metros

hpbl = surface_var['hpbl']

# Acessando as variáveis em níveis de pressão individuais

# Componente v do vento em níveis de pressão

u_1000hPa = u_hPa['u'][:,0,:,:]
u_975hPa = u_hPa['u'][:,1,:,:]
u_950hPa = u_hPa['u'][:,2,:,:]
u_925hPa = u_hPa['u'][:,3,:,:]
u_900hPa = u_hPa['u'][:,4,:,:]
u_850hPa = u_hPa['u'][:,5,:,:]
u_800hPa = u_hPa['u'][:,6,:,:]

# Componente v do vento em níveis de pressão

v_1000hPa = v_hPa['v'][:,0,:,:]
v_975hPa = v_hPa['v'][:,1,:,:]
v_950hPa = v_hPa['v'][:,2,:,:]
v_925hPa = v_hPa['v'][:,3,:,:]
v_900hPa = v_hPa['v'][:,4,:,:]
v_850hPa = v_hPa['v'][:,5,:,:]
v_800hPa = v_hPa['v'][:,6,:,:]

# Temperatura do ar em 80 e 100 m

t_2m  = temp_2m['t2m']
t_80m = temp_ground['t'][:,0,:,:]
t_100m = temp_ground['t'][:,1,:,:]

# Temperatura do ar em níveis de pressão

t_1000hPa = t_hPa['t'][:,0,:,:]
t_975hPa = t_hPa['t'][:,1,:,:]
t_950hPa = t_hPa['t'][:,2,:,:]
t_925hPa = t_hPa['t'][:,3,:,:]
t_900hPa = t_hPa['t'][:,4,:,:]
t_850hPa = t_hPa['t'][:,5,:,:]
t_800hPa = t_hPa['t'][:,6,:,:]

# Umidade relativa em níveis de pressão

r_1000hPa = r_hPa['r'][:,0,:,:]
r_975hPa = r_hPa['r'][:,1,:,:]
r_950hPa = r_hPa['r'][:,2,:,:]
r_925hPa = r_hPa['r'][:,3,:,:]
r_900hPa = r_hPa['r'][:,4,:,:]
r_850hPa = r_hPa['r'][:,5,:,:]
r_800hPa = r_hPa['r'][:,6,:,:]

# In[ ]:

#Calculando a velocidade do vento apartir das componentes u e v

wind_speed = (u_ground.u**2 + v_ground.v**2)**(0.5)

wind_speed_10 = (u_10.u10**2 + v_10.v10**2)**(0.5)

wind_speed_100 = (u_100.u100**2 + v_100.v100**2)**(0.5)

wind_speed_1000hPa = (u_1000hPa**2 + v_1000hPa**2)**(0.5)

wind_speed_975hPa = (u_975hPa**2 + v_975hPa**2)**(0.5)

wind_speed_950hPa = (u_950hPa**2 + v_950hPa**2)**(0.5)

wind_speed_925hPa = (u_925hPa**2 + v_925hPa**2)**(0.5)

wind_speed_900hPa = (u_900hPa**2 + v_900hPa**2)**(0.5)

wind_speed_850hPa = (u_850hPa**2 + u_850hPa**2)**(0.5)

wind_speed_800hPa = (u_800hPa**2 + v_800hPa**2)**(0.5)

# In[ ]:

#Componente zonal em 10 m

U10 = u_10['u10'][:,:,:]

#Componente zonal em 20 m

U20 = u_ground['u'][:,0,:,:]

#Componente zonal em 30 m
U30 = u_ground['u'][:,1,:,:]

#Componente zonal em 40 m
U40 = u_ground['u'][:,2,:,:]

#Componente zonal em 50 m
U50 = u_ground['u'][:,3,:,:]

#Componente zonal em 80 m
U80 = u_ground['u'][:,4,:,:]

#Componente zonal em 100 m
U100 = u_100['u100'][:,:,:]

#Componente meridional em 10 m
V10 = v_10['v10'][:,:,:]

#Componente meridional em 20 m
V20 = v_ground['v'][:,0,:,:]

#Componente meridional em 30 m
V30 = v_ground['v'][:,1,:,:]

#Componente meridional em 40m
V40 = v_ground['v'][:,2,:,:]

#Componente meridional em 50m
V50 = v_ground['v'][:,3,:,:]

#Componente meridional em 80m
V80 = v_ground['v'][:,4,:,:]

#Componente meridional em 100 m
V100 = v_100['v100'][:,:,:]

# In[ ]:

#Vento em 10m
WS10 = wind_speed_10[:,:,:]

#Vento em 20 m
WS20 = wind_speed[:,0,:,:]

#Vento em 30 m
WS30 = wind_speed[:,1,:,:]

#Vento em 40m
WS40 = wind_speed[:,2,:,:]

#Vento em 50m
WS50 = wind_speed[:,3,:,:]

#Vento em 80m
WS80 = wind_speed[:,4,:,:]

#Vento em 100m
WS100 = wind_speed_100[:,:,:]

# Vento em 1000 hPA
WS1000hPa = wind_speed_1000hPa

# Vento em 975 hPa
WS975hPa = wind_speed_975hPa

# Vento em 950 hPa
WS950hPa = wind_speed_950hPa

# Vento em 925 hPa
WS925hPa = wind_speed_925hPa

# Vento em 925 hPa
WS900hPa = wind_speed_900hPa

# Vento em 850 hPa
WS850hPa = wind_speed_850hPa

# Vento em 800 hPa
WS800hPa = wind_speed_800hPa

# In[ ]:

start_time = datetime.datetime.now()

print("\n! !Iniciando a gravação dos dataframes!!\n{}")

dir_out = '/scratch/projetos/chp6/GFS_William/Dados_Extraidos_CHESF/202012'
if not os.path.exists(dir_out):
    os.makedirs(dir_out)

print("\nReading the observation station names:\n")

#Editar o nome, latitude e longitude da localização que se deseja extrair as timeseries no arquivo STATIO.csv
stations = pd.read_csv(r"/scratch/projetos/chp6/GFS_William/Coordenadas_CHESF.csv",index_col=0, sep='\;')

print(stations)

lat   = u_ground['latitude'][:]
lon   = u_ground['longitude'][:]

# Formato do calendário
unit  = u_ground['time'].units

# Validade da previsão 
step  = u_ground['step']

# valid_time para as demais variáveis
times = u_ground['valid_time'][:]

# Unidade física do vento 
unitu = u_ground['u'].units

for key, value in stations.iterrows():
    #print(key,value[0], value[1], value[2])
    station = value[0]
    file_name = "{}{}".format(station+'All_Vars',".csv")
    #print(file_name)
    lon_point = value[1]
    lat_point = value[2]

    # Encontrando o ponto de Latitude e Longitude mais próximo das estações
    
    # Squared difference of lat and lon
    sq_diff_lat = (lat - lat_point)**2
    sq_diff_lon = (lon - lon_point)**2
    
    # Identifying the index of the minimum value for lat and lon
    min_index_lat = sq_diff_lat.argmin()
    min_index_lon = sq_diff_lon.argmin()
    print("Generating time series for station {}".format(station))
    ref_date   = datetime.datetime(int(unit[14:18]),int(unit[19:21]),int(unit[22:24]),int(unit[25:27]))
    
    date_range  = list()
    step_data   = list()
    ws10_data   = list()
    ws20_data   = list()
    ws30_data   = list()
    ws40_data   = list()
    ws50_data   = list()
    ws80_data   = list()
    ws100_data  = list()
    ws1000hPa_data  = list()
    ws975hPa_data   = list()
    ws950hPa_data   = list()
    ws925hPa_data   = list()
    ws900hPa_data   = list()
    ws850hPa_data   = list()
    ws800hPa_data   = list()
    u10_data   = list()
    u20_data   = list()
    u30_data   = list()
    u40_data   = list()
    u50_data   = list()
    u80_data   = list()
    u100_data  = list()
    u_1000hPa_data  = list()
    u_975hPa_data   = list()
    u_950hPa_data   = list()
    u_925hPa_data   = list()
    u_900hPa_data   = list()
    u_850hPa_data   = list()
    u_800hPa_data   = list()
    v10_data   = list()
    v20_data   = list()
    v30_data   = list()
    v40_data   = list()
    v50_data   = list()
    v80_data   = list()
    v100_data  = list()
    v_1000hPa_data  = list()
    v_975hPa_data   = list()
    v_950hPa_data   = list()
    v_925hPa_data   = list()
    v_900hPa_data   = list()
    v_850hPa_data   = list()
    v_800hPa_data   = list()
    t2_data         = list()
    t80_data        = list()
    t100_data       = list()
    t_1000hPa_data  = list()
    t_975hPa_data   = list()
    t_950hPa_data   = list()
    t_925hPa_data   = list()
    t_900hPa_data   = list()
    t_850hPa_data   = list()
    t_800hPa_data   = list()
    r2_data         = list()
    r_1000hPa_data  = list()
    r_975hPa_data   = list()
    r_950hPa_data   = list()
    r_925hPa_data   = list()
    r_900hPa_data   = list()
    r_850hPa_data   = list()
    r_800hPa_data   = list()
    pblh_data   = list()
    precip_data = list()
 
    for index, time in enumerate(times):
        date_time = ref_date+datetime.timedelta(seconds=int(time))
        date_range.append(date_time)
        step_data.append(step[index].values)
        ws10_data.append(WS10[index, min_index_lat, min_index_lon].values)
        ws20_data.append(WS20[index, min_index_lat, min_index_lon].values)
        ws30_data.append(WS30[index, min_index_lat, min_index_lon].values)
        ws40_data.append(WS40[index, min_index_lat, min_index_lon].values)
        ws50_data.append(WS50[index, min_index_lat, min_index_lon].values)
        ws80_data.append(WS80[index, min_index_lat, min_index_lon].values)
        ws100_data.append(WS100[index, min_index_lat, min_index_lon].values)
        ws1000hPa_data.append(WS1000hPa[index, min_index_lat, min_index_lon].values)
        ws975hPa_data.append(WS975hPa[index, min_index_lat, min_index_lon].values)
        ws950hPa_data.append(WS950hPa[index, min_index_lat, min_index_lon].values)
        ws925hPa_data.append(WS925hPa[index, min_index_lat, min_index_lon].values)
        ws900hPa_data.append(WS900hPa[index, min_index_lat, min_index_lon].values)
        ws850hPa_data.append(WS850hPa[index, min_index_lat, min_index_lon].values)
        ws800hPa_data.append(WS900hPa[index, min_index_lat, min_index_lon].values)
        u10_data.append(U10[index, min_index_lat, min_index_lon].values)
        u20_data.append(U20[index, min_index_lat, min_index_lon].values)
        u30_data.append(U30[index, min_index_lat, min_index_lon].values)
        u40_data.append(U40[index, min_index_lat, min_index_lon].values)
        u50_data.append(U50[index, min_index_lat, min_index_lon].values)
        u80_data.append(U80[index, min_index_lat, min_index_lon].values)
        u100_data.append(U100[index, min_index_lat, min_index_lon].values)
        u_1000hPa_data.append(u_1000hPa[index, min_index_lat, min_index_lon].values)
        u_975hPa_data.append(u_975hPa[index, min_index_lat, min_index_lon].values)
        u_950hPa_data.append(u_950hPa[index, min_index_lat, min_index_lon].values)
        u_925hPa_data.append(u_925hPa[index, min_index_lat, min_index_lon].values)
        u_900hPa_data.append(u_900hPa[index, min_index_lat, min_index_lon].values)
        u_850hPa_data.append(u_850hPa[index, min_index_lat, min_index_lon].values)
        u_800hPa_data.append(u_900hPa[index, min_index_lat, min_index_lon].values)
        v10_data.append(V10[index, min_index_lat, min_index_lon].values)
        v20_data.append(V20[index, min_index_lat, min_index_lon].values)
        v30_data.append(V30[index, min_index_lat, min_index_lon].values)
        v40_data.append(V40[index, min_index_lat, min_index_lon].values)
        v50_data.append(V50[index, min_index_lat, min_index_lon].values)
        v80_data.append(V80[index, min_index_lat, min_index_lon].values)
        v100_data.append(V100[index, min_index_lat, min_index_lon].values)
        v_1000hPa_data.append(v_1000hPa[index, min_index_lat, min_index_lon].values)
        v_975hPa_data.append(v_975hPa[index, min_index_lat, min_index_lon].values)
        v_950hPa_data.append(v_950hPa[index, min_index_lat, min_index_lon].values)
        v_925hPa_data.append(v_925hPa[index, min_index_lat, min_index_lon].values)
        v_900hPa_data.append(v_900hPa[index, min_index_lat, min_index_lon].values)
        v_850hPa_data.append(v_850hPa[index, min_index_lat, min_index_lon].values)
        v_800hPa_data.append(v_900hPa[index, min_index_lat, min_index_lon].values)
        t2_data.append(t_2m[index, min_index_lat, min_index_lon].values)
        t80_data.append(t_80m[index, min_index_lat, min_index_lon].values)
        t100_data.append(t_100m[index, min_index_lat, min_index_lon].values)
        t_1000hPa_data.append(t_1000hPa[index, min_index_lat, min_index_lon].values)
        t_975hPa_data.append(t_975hPa[index, min_index_lat, min_index_lon].values)
        t_950hPa_data.append(t_950hPa[index, min_index_lat, min_index_lon].values)
        t_925hPa_data.append(t_925hPa[index, min_index_lat, min_index_lon].values)
        t_900hPa_data.append(t_900hPa[index, min_index_lat, min_index_lon].values)
        t_850hPa_data.append(t_850hPa[index, min_index_lat, min_index_lon].values)
        t_800hPa_data.append(t_900hPa[index, min_index_lat, min_index_lon].values)
        r_1000hPa_data.append(r_1000hPa[index, min_index_lat, min_index_lon].values)
        r_975hPa_data.append(r_975hPa[index, min_index_lat, min_index_lon].values)
        r_950hPa_data.append(r_950hPa[index, min_index_lat, min_index_lon].values)
        r_925hPa_data.append(r_925hPa[index, min_index_lat, min_index_lon].values)
        r_900hPa_data.append(r_900hPa[index, min_index_lat, min_index_lon].values)
        r_850hPa_data.append(r_850hPa[index, min_index_lat, min_index_lon].values)
        r_800hPa_data.append(r_900hPa[index, min_index_lat, min_index_lon].values)
        pblh_data.append(hpbl[index, min_index_lat, min_index_lon].values)
        
    #print(date_range)
    
    df = pd.DataFrame(date_range, columns = ["Date-Time"])
    df["Date-Time"] = date_range
    df = df.set_index(["Date-Time"])
    df["Forecast ({})".format('valid time')] = step_data
    df["WS10m  ({})".format('m/s')] = ws10_data
    df["WS20m  ({})".format('m/s')] = ws20_data
    df["WS30m  ({})".format('m/s')] = ws30_data
    df["WS40m  ({})".format('m/s')] = ws40_data
    df["WS50m  ({})".format('m/s')] = ws50_data
    df["WS80m  ({})".format('m/s')] = ws80_data
    df["WS100m ({})".format('m/s')] = ws100_data
    df["WS1000hPa ({})".format('m/s')] = ws1000hPa_data
    df["WS975hPa ({})".format('m/s')] = ws975hPa_data
    df["WS950hPa ({})".format('m/s')] = ws950hPa_data
    df["WS925hPa ({})".format('m/s')] = ws925hPa_data
    df["WS900hPa ({})".format('m/s')] = ws900hPa_data
    df["WS850hPa ({})".format('m/s')] = ws850hPa_data
    df["WS800hPa ({})".format('m/s')] = ws800hPa_data
    df["U10  ({})".format('m/s')] = u10_data
    df["U20  ({})".format('m/s')] = u20_data
    df["U30  ({})".format('m/s')] = u30_data
    df["U40  ({})".format('m/s')] = u40_data
    df["U50  ({})".format('m/s')] = u50_data
    df["U80  ({})".format('m/s')] = u80_data
    df["U100 ({})".format('m/s')] = u100_data
    df["U_1000hPa ({})".format('m/s')] = u_1000hPa_data
    df["U_975hPa ({})".format('m/s')] = u_975hPa_data
    df["U_950hPa ({})".format('m/s')] = u_950hPa_data
    df["U_925hPa ({})".format('m/s')] = u_925hPa_data
    df["U_900hPa ({})".format('m/s')] = u_900hPa_data
    df["U_850hPa ({})".format('m/s')] = u_850hPa_data
    df["U_800hPa ({})".format('m/s')] = u_800hPa_data
    df["V10 ({})".format('m/s')] = v10_data
    df["V20 ({})".format('m/s')] = v20_data
    df["V30 ({})".format('m/s')] = v30_data
    df["V40 ({})".format('m/s')] = v40_data
    df["V50 ({})".format('m/s')] = v50_data
    df["V80 ({})".format('m/s')] = v80_data
    df["V100 ({})".format('m/s')] = v100_data
    df["V_1000hPa ({})".format('m/s')] = v_1000hPa_data
    df["V_975hPa ({})".format('m/s')] = v_975hPa_data
    df["V_950hPa ({})".format('m/s')] = v_950hPa_data
    df["V_925hPa ({})".format('m/s')] = v_925hPa_data
    df["V_900hPa ({})".format('m/s')] = v_900hPa_data
    df["V_850hPa ({})".format('m/s')] = v_850hPa_data
    df["V_800hPa ({})".format('m/s')] = v_800hPa_data
    df["Tair_2m ({})".format('K')]  = t2_data
    df["Tair_80m ({})".format('K')] = t80_data
    df["Tair_100m ({})".format('K')] = t100_data
    df["Tair_1000hPa ({})".format('K')] = t_1000hPa_data
    df["Tair_975hPa ({})".format('K')] = t_975hPa_data
    df["Tair_950hPa ({})".format('K')] = t_950hPa_data
    df["Tair_925hPa ({})".format('K')] = t_925hPa_data
    df["Tair_900hPa ({})".format('K')] = t_900hPa_data
    df["Tair_850hPa ({})".format('K')] = t_850hPa_data
    df["Tair_800hPa ({})".format('K')] = t_800hPa_data
    df["RH_1000hPa ({})".format('%')] = r_1000hPa_data
    df["RH_975hPa ({})".format('%')] = r_975hPa_data
    df["RH_950hPa ({})".format('%')] = r_950hPa_data
    df["RH_925hPa ({})".format('%')] = r_925hPa_data
    df["RH_900hPa ({})".format('%')] = r_900hPa_data
    df["RH_850hPa ({})".format('%')] = r_850hPa_data
    df["RH_800hPa ({})".format('%')] = r_800hPa_data
    df["PBLH ({})".format('m')] = pblh_data
    
    df.to_csv(os.path.join(dir_out,file_name), sep=';',encoding="utf-8", index=True)

print("\n! !Successfuly saved all the Time Series the output Directory!!\n{}".format(dir_out))

end_time = datetime.datetime.now()
print('Duration: {}'.format(end_time - start_time))
    
if __name__ == "__main__":
    run()

mkshixfv

mkshixfv1#

使用www.example.com_mfdataset()和cfgrib读取. grib文件:xr.open_mfdataset() and cfgrib:

我可以说使用xr.open_mfdataset()读取grib文件的速度很慢。我有一个类似的任务,我使用xarray读取许多grib文件,这需要很长时间。其他人也遇到过类似的问题(参见here)。
根据here提出的问题,"cfgrib没有优化来处理具有大量字段的文件,即使这些字段很小。"
对我来说,最有效的一件事是将尽可能多的grib文件转换为一个(或多个)netcdf文件,然后将新创建的netcdf文件读入xarray。下面是一个link,向您展示如何使用几种不同的方法来完成此操作。我通过ecCodes工具使用grib_to_netcdf命令。
总而言之,我将从将grib文件转换为netcdf开始,因为它应该能够以一种更高性能的方式将数据读入xarray,然后您可以将注意力集中在代码中的其他优化上。
希望这能有所帮助!

相关问题