Source code for ppcpy.io.readMeteo


#import netCDF4
import glob
import re
import datetime
import os
import logging
import xarray as xr
#from scipy.interpolate import griddata
import numpy as np


[docs] class Meteo: """ Picasso docstring ----------------- % LOADMETEOR read meteorological data. % % USAGE: % [temp, pres, relh, wins, wind, meteorAttri] = loadMeteor(mTime, asl) % % INPUTS: % mTime: array % query time. % asl: array % height above sea level. (m) % % KEYWORDS: % meteorDataSource: str % meteorological data type. % e.g., 'gdas1'(default), 'standard_atmosphere', 'websonde', 'radiosonde', 'nc_cloudnet' % gdas1Site: str % the GDAS1 site for the current campaign. % meteo_folder: str % the main folder of the GDAS1 profiles (or the cloudnet profiles). % radiosondeSitenum: integer % site number, which can be found in % doc/radiosonde-station-list.txt. % radiosondeFolder: str % the folder of the sonding files. % radiosondeType: integer % file type of the radiosonde file. % 1: radiosonde file for MOSAiC (default) % 2: radiosonde file for MUA % flagReadLess: logical % flag to determine whether access meteorological data by certain time % interval. (default: false) % method: char % Interpolation method. (default: 'nearest') % isUseLatestGDAS: logical % whether to search the latest available GDAS profile (default: false). % % OUTPUTS: % temp: matrix (time * height) % temperature for each range bin. [°C] % pres: matrix (time * height) % pressure for each range bin. [hPa] % relh: matrix (time * height) % relative humidity for each range bin. [%] % wins: matrix (time * height) % wind speed. (m/s) % meteorAttri: struct % dataSource: cell % The data source used in the data processing for each cloud-free group. % URL: cell % The data file info for each cloud-free group. % datetime: array % datetime label for the meteorlogical data. % % HISTORY: % - 2021-05-22: first edition by Zhenping % % .. Authors: - zhenping@tropos.de Typical call in Picasso ----------------------- [temp, pres, relh, ~, ~, data.meteorAttri] = loadMeteor(clFreGrpTimes, data.alt, ... 'meteorDataSource', PollyConfig.meteorDataSource, ... 'gdas1Site', PollyConfig.gdas1Site, ... 'meteo_folder', PollyConfig.meteo_folder, ... 'radiosondeSitenum', PollyConfig.radiosondeSitenum, ... 'radiosondeFolder', PollyConfig.radiosondeFolder, ... 'radiosondeType', PollyConfig.radiosondeType, ... 'method', 'linear', ... 'isUseLatestGDAS', PollyConfig.flagUseLatestGDAS); """ def __init__(self, meteorDataSource:str, meteo_folder:str, meteo_file:str): """Initialise Meteo object. Parameters ---------- meteorDataSource : str Meteorological data source name eg. "nc_cloudnet". meteo_folder : str Path to meteorological data folder. meteo_file : str Meteorological data file name. Notes ----- **History** - 2021-05-22: First edition by Zhenping. """ assert meteorDataSource == 'nc_cloudnet', "Other meteo sources are not implemented yet" self.reader = MeteoNcCloudnet(meteo_folder, meteo_file)
[docs] def load(self, times:float, heights:np.ndarray, asl:float, flagPicassoComparison:bool=False): """Load the data and resample to 15 minute intervals. Parameters ---------- times : float Date of measurement [Unix time]. heights : np.ndarray Height above ground [m]. asl : float Altitude of measurement site (station) [m.a.s.l.]. flagPicassoComparison : bool If true, use Picasso values and logic. """ self.ds = self.reader.load(times, heights, asl, flagPicassoComparison) self.ds = self.ds.resample(time='15min').interpolate()
[docs] def get_mean_profiles(self, time_slice:list) -> list: """Get the mean meteorological profiles. Parameters ---------- time_slice : list Nested list of time slices in np.datetime [[start time, end time], ..] Returns ------- mean_profiles : list Mean meteorological profiles for each time slice. """ mean_profiles = [] for t in time_slice: mean_profiles.append(self.ds.sel(time=slice(*t)).mean(dim='time')) print(f"get_mean_profiles(time_slice: {type(time_slice)}) -> {type(mean_profiles)}") return mean_profiles
[docs] class MeteoNcCloudnet: """ TODO for now only one filename define preferred model """ radiusEarth:float = 6371200.0 # [m] acelerationEarth:float = 9.80665 # [m/s^2] def __init__(self, basepath, filepattern): """Initialize MeteoNcCloudnet object. Parameters ---------- basepath : str Path to meteorological data folder. filepattern : str Meteorological data file name. Notes ----- **History** - xxxx-xx-xx: First edition by ... """ if not '/' == basepath[-1]: basepath = basepath + '/' self.basepath = basepath self.filepattern = filepattern
[docs] def find_path_for_time(self, time:float) -> str: """Find the files for a given time. Parametes --------- time : float Date of measurement [Unix time]. Returns ------- str Path to the file for the given time. """ candidates = glob.glob(self.basepath + "**", recursive=True) #print('candidates ', candidates) dt = datetime.datetime.fromtimestamp(time) regex = re.compile(self.filepattern.format(dt)) #print('regex ', regex) filename = [s for s in candidates if regex.search(s) ] #print('filename ', filename) assert len(filename) == 1, f"{os.getcwd()}, {self.basepath} found {candidates} reduced to filenames {filename}" return filename[0]
[docs] def load(self, time:float, height_grid:np.ndarray, station_altitude:float, flagPicassoComparison:bool=False) -> xr.Dataset: """Load the data. Parameters ---------- time : float Date of measurement [Unix time]. height_grid : float Height above ground [m]. station_altitude : float Altitude of measurement site (station) [m.a.s.l.]. flagPicassoComparison : bool If True, use Picasso values and logic. Returns ------- ds_new : Dataset Dataset of meteorology profiles (temperature, pressure, relative humidity, specific humidity). Notes ----- not quite sure on the interface yet ``` met.load(data_cube.retrievals_highres['time'][0]) met.load(datetime.datetime.timestamp(datetime.datetime.strptime(data_cube.date, '%Y%m%d'))) ``` Recipie: - load - select variables? - rename? - regrid from (time, level) to (time, lidar heights) .. TODO:: clarify the above ground above sea level issues. .. TODO:: Check how the interpolation handles negative grid points .. TODO:: Check if we can interpolate several variables at once """ filename = self.find_path_for_time(time) ds = xr.load_dataset(filename) variables_to_select = [ 'height', 'temperature', 'pressure', 'rh', 'q', 'sfc_geopotential' ] ds = ds[variables_to_select] height_2d = ds.height.values.copy() time = ds.time.values.astype('datetime64[s]').astype(int).copy() ## Model height correction: logging.info('Performing model height correction.') logging.info(f'Uncorrected model height[:,0]:\n{height_2d[:, 0]}') geopotential_surface_height = ds['sfc_geopotential'].values / self.acelerationEarth surface_altitude = self.radiusEarth * geopotential_surface_height/ \ (self.radiusEarth - geopotential_surface_height) height_shift = surface_altitude - station_altitude height_2d = height_2d + height_shift[:, np.newaxis] logging.info(f'Geopotenital surface height:\n{geopotential_surface_height}') logging.info(f'Model surface altitude:\n{surface_altitude}') logging.info(f'Station altitude: {station_altitude}') logging.info(f'Heigth shift:\n{height_shift}') logging.info(f'Corrected model height[:,0]:\n{height_2d[:, 0]}') if flagPicassoComparison: height_2d = ds.height.values height_grid = height_grid + station_altitude # for some reasons this interpolation provides strange results in the lowermost layers # zi = griddata((np.repeat(time, ds_dash.height.shape[1], axis=0), # height_2d.ravel()), # ds_dash['temperature'].values.ravel(), # (time[None,:], height_grid[:,None]), # method='linear') temp = np.zeros((time.shape[0], height_grid.shape[0])) p = np.zeros((time.shape[0], height_grid.shape[0])) rh = np.zeros((time.shape[0], height_grid.shape[0])) q = np.zeros((time.shape[0], height_grid.shape[0])) for i in range(time.shape[0]): temp[i, :] = np.interp(height_grid, height_2d[i, :], ds['temperature'].values[i, :]) p[i, :] = np.interp(height_grid, height_2d[i, :], ds['pressure'].values[i, :]) rh[i, :] = np.interp(height_grid, height_2d[i, :], ds['rh'].values[i, :]) q[i, :] = np.interp(height_grid, height_2d[i, :], ds['q'].values[i, :]) ds_new = xr.Dataset( data_vars=dict( temperature=(["time", "height"], temp, ds['temperature'].attrs), pressure=(["time", "height"], p, ds['pressure'].attrs), rh=(["time", "height"], rh, ds['rh'].attrs), q=(["time", "height"], q, ds['q'].attrs), ), coords=dict( time=("time", ds.time.values), height=("height", height_grid), ), attrs=ds.attrs, ) return ds_new