Source code for ppcpy.qc.overlapEst


import logging
import numpy as np

from ppcpy.retrievals.collection import calc_snr
from ppcpy.misc.helper import mean_stable, uniform_filter
from scipy.ndimage import uniform_filter1d

from pathlib import Path

[docs] def run_frnr_cldFreeGrps(data_cube, collect_debug:bool=True) -> list: """Estimate overlap function using far-range-near-range method. Parameters ---------- data_cube : object Main PicassoProc object. collect_debug : bool If true, collect debug information. Default is True. Returns ------- overlap : list of dicts Per channel per cloud free period: - overlap : ndarray Overlap function. - overlapStd : ndarray Standard deviation of overlap function. - sigRatio : float Signal ratio between near-range and far-range signals. - normRange : list Height index of the signal normalization range. Notes ----- - The matlab version preforms some convertio from PC to PCR after calculating the overlap function (this might just be relevent at all) - The matlab version also uses calculates 1 overlap function for each channel while we calculate 1 per clFreeGrp per channel... - The frnr overlap calculations are very unstable and frequently results in values outside the range [0,1]. """ height = data_cube.retrievals_highres['range'] logging.warning(f'rayleighfit seems to use range in matlab, but the met data should be in height >> RECHECK!') logging.warning(f'at 10km height this is a difference of about 4 indices') config_dict = data_cube.polly_config_dict overlap = [{} for i in range(len(data_cube.clFreeGrps))] print('Starting Overlap retrieval') for i, cldFree in enumerate(data_cube.clFreeGrps): print('cldFree', i, cldFree) cldFree = cldFree[0], cldFree[1] + 1 print('cldFree mod', cldFree) for wv in [355, 387, 532, 607]: if np.any(data_cube.gf(wv, 'total', 'FR')) and np.any(data_cube.gf(wv, 'total', 'NR')): print(wv, 'both telescopes available') sigFR = np.squeeze(data_cube.retrievals_profile['sigTCor'][i, :, data_cube.gf(wv, 'total', 'FR')]) bgFR = np.squeeze(data_cube.retrievals_profile['BGTCor'][i, data_cube.gf(wv, 'total', 'FR')]) sigNR = np.squeeze(data_cube.retrievals_profile['sigTCor'][i, :, data_cube.gf(wv, 'total', 'NR')]) bgNR = np.squeeze(data_cube.retrievals_profile['BGTCor'][i, data_cube.gf(wv, 'total', 'NR')]) hFullOverlap = np.array(config_dict['heightFullOverlap'])[data_cube.gf(wv, 'total', 'FR')][0] ol = overlapCalc(height=height, sigFR=sigFR, bgFR=bgFR, sigNR=sigNR, bgNR=bgNR, hFullOverlap=hFullOverlap, collect_debug=collect_debug) overlap[i][f"{wv}_total_FR"] = ol return overlap
[docs] def overlapCalc(height:np.ndarray, sigFR:np.ndarray, bgFR:np.ndarray, sigNR:np.ndarray, bgNR:np.ndarray, hFullOverlap:float=600, collect_debug=False) -> dict: """Calculate overlap function. Parameters ---------- height : ndarray Height above ground (m). sigFR : ndarray Far-field signal (photon count). bgFR : ndarray Background of far-field signal (photon count). sigNR : ndarray Near-field signal (photon count). bgNR : ndarray Background of near-field signal (photon count). Other Parameters ---------------- hFullOverlap : float, optional Minimum height with full overlap for far-range signal (default: 600 m). Returns ------- overlap : ndarray Overlap function. overlapStd : ndarray Standard deviation of overlap function. sigRatio : float Signal ratio between near-range and far-range signals. normRange : list Height index of the signal normalization range. History ------- - 2021-05-18: First edition by Zhenping """ if sigNR.shape[0] > 0 and sigFR.shape[0] > 0: # Find the height index with full overlap full_overlap_index = np.where(height >= hFullOverlap)[0] if full_overlap_index.size == 0: raise ValueError("The index with full overlap cannot be found.") full_overlap_index = full_overlap_index[0] # Calculate the channel ratio of near and far range total signals sigRatio, normRange, _ = mean_stable(sigNR / sigFR , 40, full_overlap_index, len(sigNR), 0.1) # print('is normRange index or slice?', normRange) # -> is list of indices # Calculate the overlap of the far-range channel if normRange.shape[0] > 0: if collect_debug: print("bgFR.shape", bgFR.shape) print("bgNR.shape", bgNR.shape) print("normRange.shape", normRange.shape) print("normRange", normRange) print("bgFR*normRange.shape[0]", bgFR*normRange.shape[0]) print("bgFR*normRange.shape[0]", bgFR*normRange.shape[0]) SNRnormRangeFR = calc_snr(np.sum(sigFR[normRange], keepdims=True), bgFR*normRange.shape[0]) SNRnormRangeNR = calc_snr(np.sum(sigNR[normRange], keepdims=True), bgNR*normRange.shape[0]) sigRatioStd = sigRatio * np.sqrt(1 / (SNRnormRangeFR**2) + 1 / (SNRnormRangeNR**2)) overlap = (sigFR / (sigNR + 1e-6)) * sigRatio overlapStd = overlap * np.sqrt((sigRatioStd / sigRatio)**2 + 1 / (sigFR + 1e-6)**2 + 1 / (sigNR + 1e-6)**2) ret = {'olFunc': overlap, 'olFuncStd': overlapStd, 'sigRatio': sigRatio, 'normRange': normRange} return ret
[docs] def run_raman_cldFreeGrps(data_cube, collect_debug:bool=True) -> list: """Estimate overlap function using Raman method. Parameters ---------- data_cube : object Main PicassoProc object. collect_debug : bool If true, collect debug information. Default is True. Returns ------- overlap : list of dicts Per channel per cloud free period: - olFunc : ndarray Overlap function. - olStd : float Standard deviation of overlap function. - olFunc0 : ndarray Overlap function with no smoothing. """ height = data_cube.retrievals_highres['range'] hres = data_cube.rawdata_dict['measurement_height_resolution']['var_data'] logging.warning(f'rayleighfit seems to use range in matlab, but the met data should be in height >> RECHECK!') logging.warning(f'at 10km height this is a difference of about 4 indices') config_dict = data_cube.polly_config_dict overlap = [{} for i in range(len(data_cube.clFreeGrps))] print('Starting Raman Overlap retrieval') for i, cldFree in enumerate(data_cube.clFreeGrps): print('cldFree ', i, cldFree) cldFree = cldFree[0], cldFree[1] + 1 print('cldFree mod', cldFree) channels = [((532, 'total', 'FR'), (607, 'total', 'FR')),((355, 'total', 'FR'), (387, 'total', 'FR'))] for (wv, t, tel), (wv_r, t_r, tel_r) in channels: if np.any(data_cube.gf(wv, t, tel)) and np.any(data_cube.gf(wv_r, t_r, tel_r)): print(wv, wv_r, 'both wavelengths available') sig = np.squeeze(data_cube.retrievals_profile['sigTCor'][i, :, data_cube.gf(wv, t, tel)]) # bg = np.nansum(np.squeeze(data_cube.retrievals_highres['BGTCor'][slice(*cldFree), data_cube.gf(wv, t, tel)]), axis=0) molBsc = data_cube.mol_profiles[f'mBsc_{wv}'][i, :] molExt = data_cube.mol_profiles[f'mExt_{wv}'][i, :] if f"{wv}_{t}_{tel}" in data_cube.retrievals_profile['raman'][i].keys(): pass else: continue if 'aerBsc' in data_cube.retrievals_profile['raman'][i][f"{wv}_{t}_{tel}"].keys(): pass else: continue aerBsc = data_cube.retrievals_profile['raman'][i][f"{wv}_{t}_{tel}"]['aerBsc'] sig_r = np.squeeze(data_cube.retrievals_profile['sigTCor'][i, :, data_cube.gf(wv_r, t, tel)]) # bg_r = np.nansum(np.squeeze(data_cube.retrievals_highres['BGTCor'][slice(*cldFree), data_cube.gf(wv_r, t, tel)]), axis=0) molBsc_r = data_cube.mol_profiles[f'mBsc_{wv_r}'][i, :] molExt_r = data_cube.mol_profiles[f'mExt_{wv_r}'][i, :] hFullOverlap = np.array(config_dict['heightFullOverlap'])[data_cube.gf(wv, t, tel)][0] ol = overlapCalcRaman( Lambda_el=wv, Lambda_Ra=wv_r, height=height, sigFRel=sig, sigFRRa=sig_r, molExt=molExt, molBsc_r=molBsc_r, molExt_r=molExt_r, aerBsc=aerBsc, hFullOverlap=hFullOverlap, smoothbins=config_dict['overlapSmoothBins']-3, # Hardcoded???? AE=config_dict['angstrexp'], hres=hres, collect_debug=collect_debug ) overlap[i][f"{wv}_{t}_{tel}"] = ol return overlap
[docs] def overlapCalcRaman( Lambda_el:float, Lambda_Ra:float, height:np.ndarray, sigFRel:np.ndarray, sigFRRa:np.ndarray, molExt:np.ndarray, molBsc_r:np.ndarray, molExt_r:np.ndarray, aerBsc:np.ndarray, hFullOverlap:float=600, smoothbins:float=1, AE:float=1, hres:float=150, collect_debug=False ) -> dict: """Calculate overlap function from Polly measurements based on Wandinger and Ansmann 2002 https://doi.org/10.1364/AO.41.000511 Parameters ---------- Lambda_el : float Elastic wavelength. Lambda_Ra : float Raman wavelength. height : ndarray Height above ground [m]. sigFRel : ndarray Far-field elastic signal. sigFRRa : ndarray Far-field Raman signal. molExt : ndarray Molecular extinction (elastic). molBsc_r : ndarray Molecular Backscatter (inelastic). molExt_r : ndarray Molecular extinction (inelastic). aerBsc : ndarray Particle backscattering derived with the Raman method [m^{-1}]. hFullOverlap : float, optional Minimum height with complete overlap [m]. Default is 600. smoothbins : int, optional Number of bins for smoothing. Default is 1. AE : float, optional Angstroem exponent. Default is 1. hres : float, optional Instrument height resolution [m]. Default is 150. Returns ------- olFunc : ndarray Overlap function. olStd : float Standard deviation of overlap function. olFunc0 : ndarray Overlap function with no smoothing. History ------- - 2023-06-06: First edition by Cristofer Notes ----- - TODO: What is returned by the function and what is described in the docstring does not corresponed. - TODO: This function uses a mix of ppcpy.misc.helper.unifrom_filter and scipy.ndimage.uniform_filter1d for smoothing. This is done to avoid NaN-value related errors. A better more cohesive solution should be precude in the future. - TODO: Be cearfull with NaN values! scipy.ndimage.uniform_filter1d used for smoothing in this module will propagate any NaN values present in the signal throughot the rest of the smoothed signal. Additionaly, here we are using mode 'reflect' for padding (see scipy.ndimage.uniform_filter1d documentation). This might not be the most optimal mode, the other availabel mode should also be considered. Optimally, should we designe our own filter for this purpuse that do not have the issue with propagating NaN values, Like what is used in the rest of the modules. However, without reducing dimension or filling in NaN values. """ if len(aerBsc) > 0: sigFRRa0 = sigFRRa.copy() for _ in range(5): sigFRRa = uniform_filter1d(sigFRRa, smoothbins) sigFRel = uniform_filter1d(sigFRel, smoothbins) aerBsc = aerBsc.copy() if len(aerBsc) > 0: aerBsc[:5] = aerBsc[5] else: aerBsc = 0 aerBsc0 = aerBsc.copy() # Use ppcpy.misc.helper.unifrom_filter here to avoid propagating the NaN-values in aerBse througout the smoothed array. aerBsc = uniform_filter(aerBsc, smoothbins) LR0 = np.arange(30, 82, 2) # LR array to search best LR. diff_norm = [] for ii in range(len(LR0) + 1): if ii == len(LR0): indx_min = np.argmin(diff_norm) LR = LR0[indx_min] else: LR = LR0[ii] # Overlap calculation (direct version) transRa = np.exp(-np.nancumsum((molExt_r + LR * aerBsc * (Lambda_el / Lambda_Ra) ** AE) * np.concatenate(([height[0]], np.diff(height))))) transel = np.exp(-np.nancumsum((molExt + LR * aerBsc) * np.concatenate(([height[0]], np.diff(height))))) transRa0 = np.exp(-np.nancumsum((molExt_r + LR * aerBsc0 * (Lambda_el / Lambda_Ra) ** AE) * np.concatenate(([height[0]], np.diff(height))))) transel0 = np.exp(-np.nancumsum((molExt + LR * aerBsc0) * np.concatenate(([height[0]], np.diff(height))))) if sigFRRa.shape[0] > 0 and sigFRel.shape[0] > 0: fullOverlapIndx = np.searchsorted(height, hFullOverlap) if fullOverlapIndx == len(height): raise ValueError('The index with full overlap cannot be found.') olFunc = sigFRRa * height ** 2 / molBsc_r / transel / transRa olFunc0 = sigFRRa0 * height ** 2 / molBsc_r / transel0 / transRa0 for _ in range(5): olFunc = uniform_filter1d(olFunc, 3) ovl_norm, normRange, _ = mean_stable(olFunc, 40, fullOverlapIndx - round(37.5 / hres), fullOverlapIndx + round(2250 / hres), 0.1) ovl_norm0, normRange0, _ = mean_stable(olFunc0, 40, fullOverlapIndx - round(37.5 / hres), fullOverlapIndx + round(2250 / hres), 0.1) if ovl_norm.size == 1: olFunc /= ovl_norm else: olFunc /= np.nanmean(olFunc[fullOverlapIndx + round(150 / hres):fullOverlapIndx + round(1500 / hres)]) if ovl_norm0.size == 1: olFunc0 /= ovl_norm0 else: olFunc0 /= np.nanmean(olFunc0[fullOverlapIndx + round(150 / hres):fullOverlapIndx + round(1500 / hres)]) # first bin to start searching full overlap height. % Please replace the 180 by a paramter in future. bin_ini = int(np.ceil(180 / hres)) full_ovl_indx = np.argmax(np.diff(olFunc[bin_ini:]) <= 0) + bin_ini if full_ovl_indx == 0: full_ovl_indx = fullOverlapIndx diff_norm.append(np.nansum(np.abs(1 - olFunc[full_ovl_indx:full_ovl_indx + round(1500 / hres)]))) olFunc[full_ovl_indx:] = olFunc[full_ovl_indx] olFunc /= olFunc[full_ovl_indx] # renormalization if (full_ovl_indx - bin_ini) < 1: full_ovl_indx = bin_ini + 1 norm_index0 = np.argmax(olFunc[full_ovl_indx - bin_ini:full_ovl_indx + bin_ini * 3]) norm_index = norm_index0 + full_ovl_indx - bin_ini - 1 olFunc /= np.mean(olFunc[norm_index - 1:norm_index + 2]) half_ovl_indx = np.argmax(olFunc >= 0.95) if half_ovl_indx == 0: half_ovl_indx = full_ovl_indx - int(np.floor(180 / hres)) # smoothing before full overlap to avoid oscilations on that part. for _ in range(6): # smoothing before full overlap to avoid S-shape near to the # full overlap. olFunc[half_ovl_indx:norm_index + round(bin_ini / 2)] = uniform_filter1d(olFunc[half_ovl_indx:norm_index + round(bin_ini / 2)], 5) olFunc[olFunc < 1e-5] = 1e-5 #olFunc = olFunc.T #olFunc0 = olFunc0.T ret = {'olFunc': olFunc, 'olFunc_raw': olFunc0, 'LR': LR, 'normRange': normRange} return ret
[docs] def load(data_cube) -> list: """read the overlap function from files into a structure similar to the others. Parameters ---------- data_cube : object Main PicassoProc object. Returns ------- overlap : list of dicts """ print(data_cube.picasso_config_dict['defaultFile_folder']) print(data_cube.polly_config_dict) print(data_cube.polly_default_dict) ovl_files_for = [k for k in data_cube.polly_default_dict.keys() if 'overlapFile_' in k] print(ovl_files_for) height = data_cube.retrievals_highres['range'] polly_default = data_cube.polly_default_dict folder = Path(data_cube.picasso_config_dict['defaultFile_folder']) #data_cube.retrievals_profile['overlap']['raman'][i][f'{wv}_total_NR']['olFunc'] overlap = [{}] for k in ovl_files_for: print(k) full_f = folder.joinpath(polly_default[k]) print(full_f) if polly_default[k] == "": print('empty string') continue dat = np.loadtxt(full_f, skiprows=1, delimiter=',') print(dat.shape) h_ovl = dat[:, 0] ovl = dat[:, 1] print(h_ovl.shape, h_ovl[:10], h_ovl[-10:]) print(ovl.shape, ovl[:20]) if height.shape != h_ovl.shape or not np.all(np.isclose(height, h_ovl)): logging.warning('Heights not equal, need interpolating') ovl = np.interp(height, h_ovl, ovl, left=0, right=1) overlap[0][k.replace('overlapFile_', '')] = {} overlap[0][k.replace('overlapFile_', '')]['olFunc'] = ovl return overlap