def standard_dewpoint_depression(data, tvar='t', dpdvar='dpd', vpvar='vp', tdvar='td', update=False, replace=False, method='murphy_koop', inplace=False, report=None, verbose=0, **kwargs): """ Convert water vapor pressure to dewpoint depression Parameters ---------- data pd.DataFrame Input Radiosonde Data tvar str Temperature variable to use: t, t_cor dpdvar str Dewpoint Dep. variable to use: dpd, dpd_mcor vpvar str Water vapor pressure: vp method str saturation water vapor formulation update bool update dpd inplace bool apply directly to input / no copy ? verbose int verbosness kwargs dict ** Returns ------- pd.DataFrame / same as input (add columns) """ from raso.met.conversion import dewpoint dpd_absmax = 60. ############################################################################ funcid = "[SV] " if not isinstance(data, (pd.DataFrame, pd.Panel)): raise ValueError(funcid + "Requires a Dataframe or Panel") if not inplace: data = data.copy() if not hasnames(data,'qual'): data['qual'] = '' if not hasnames(data, vpvar): raise ValueError(funcid + " Requires a humidity variable: vp") if not hasnames(data, tvar): raise ValueError(funcid + " Requires variable: t") ############################################################################ # # Dewpoint / for IGRA mostly # if not hasnames(data, dpdvar) or update: dpd = data[tvar] - dewpoint(data[vpvar], method=method) if hasnames(data, dpdvar): logic = (np.isfinite(dpd) & ~np.isfinite(data[dpdvar].values)) # Update? GOOD, BAD data[dpdvar] = np.where(logic, dpd, data[dpdvar].values) # Update? NEW, OLD data[tdvar] = data[tvar].values - np.where(logic, dpd, data[dpdvar].values) # Update? NEW, OLD else: data[dpdvar] = dpd data[tdvar] = data[tvar].values - dpd journal(funcid + "DPD (%s, %s) from vp (%s), replace: %s" % (dpdvar, tvar, vpvar, replace), report, verbose) # # DPD valid range ? # logic = ((data[dpdvar].values < 0) | (data[dpdvar].values > dpd_absmax)) # BAD, GOOD if replace: data[dpdvar] = np.where(logic, np.nan, data[dpdvar].values) # set dpd data[vpvar] = np.where(logic, np.nan, data[vpvar].values) # set vp data[tdvar] = np.where(logic, np.nan, data[tdvar].values) # set Td data['qual'] = np.where(logic, data.qual.values + 'D', data.qual.replace('D', '').values) # FLAG: D # # Dewpoint larger than Temperature! # logic = (data[tdvar].values > data[tvar].values) # BAD, GOOD if replace: data[dpdvar] = np.where(logic, np.nan, data[dpdvar].values) # set dpd to small value data[tdvar] = np.where(logic, np.nan, data[tdvar].values) # set Td to small value data[vpvar] = np.where(logic, np.nan, data[vpvar].values) # set vp data['qual'] = np.where(logic, data.qual.values + 'Y', data.qual.replace('Y', '').values) # FLAG: D ############################################################################ # # Unique Flags # data['qual'] = unique_flags(data['qual']) if not inplace: return data
def standard_water_vapor(data, tvar='t', dpdvar='dpd', vpvar='vp', rvar='r', qvar='q', update=False, replace=False, method='murphy_koop', inplace=False, report=None, verbose=0, **kwargs): """ Convert humidity variables to water vapor pressure 1. RH (Esat) 2. Q (P) 3. DPD (Esat) --> VP Limits are from the RTTOV Coefficient file table 54 levels Notes ----- http://nwpsaf.eu/oldsite/deliverables/rtm/rttov11_coefficients.html#54L_reg_limits Parameters ---------- data DataFrame Input database tvar str dpdvar str vpvar str rvar str qvar str update bool esat str inplace bool verbose int kwargs ** Returns ------- data """ from raso.met.conversion import sh2vap from raso.met import esat_functions from raso.qc import profile_limits ############################################################################ rt = profile_limits(tohpa=True, simple_names=True) # RTTOV Variable Limits rt['p'] *= 100. # hPa to Pa rt['vpmin'] *= 100. # hPa to Pa rt['vpmax'] *= 100. # hPa to Pa ############################################################################ funcid = "[SV] " if not isinstance(data, (pd.DataFrame, pd.Panel)): raise ValueError(funcid + "Requires a Dataframe or Panel") if not inplace: data = data.copy() if not hasnames(data, 'qual'): data['qual'] = '' if hasnames(data, [rvar, qvar, dpdvar], value=0): raise ValueError(funcid + " Requires a humidity variable: r, q or dpd") if hasnames(data, tvar, value=0): raise ValueError(funcid + " Requires variable: t") vpfunc = getattr(esat_functions, method) if not hasnames(data, vpvar) or update: remove_pressure = False if isinstance(data, pd.Panel): data['p'] = 0. # add minor_axis as p data.loc['p', :, :] = np.asarray(data.minor_axis)[np.newaxis, np.newaxis, :] remove_pressure = True ndates = data.shape[1] * data.shape[2] else: ndates = data.shape[0] ############################################################################ # First use R, which includes Q # R # if hasnames(data, rvar): journal(funcid + "Using r (%s) and t (%s) for vp (%s)" % (rvar, tvar, vpvar), report, verbose) data[vpvar] = data[rvar].values * vpfunc(data[tvar].values) # Convert r,t to vp ############################################################################ # # Q # elif hasnames(qvar, data): journal(funcid + "Using q (%s) and p to fill up gaps for vp (%s)" % (qvar, vpvar), report, verbose) vp = sh2vap(data[qvar].values, data['p'].values) # Convert q,p to vp if hasnames(data, vpvar): logic = (np.isfinite(vp) & ~np.isfinite(data[vpvar].values)) data[vpvar] = np.where(logic, vp, data[vpvar].values) # TODO missing else # data.loc[:, vpvar] = np.where(logic, vp, data[vpvar].values) # data.loc[:, 'qual'] = np.where(logic, data.qual.values + 'Q', data.qual.values) # FLAG: F fill ############################################################################ # # Second use DPD (IGRA) # DPD elif hasnames(data, dpdvar): journal(funcid + "Warning using dpd (%s) for vp (%s)" % (dpdvar, vpvar), report, verbose) vp = vpfunc((data[tvar] - data[dpdvar]).values) # Convert Td to vp if hasnames(data, vpvar): logic = (np.isfinite(vp) & ~np.isfinite(data[vpvar].values)) # Update? GOOD, BAD data[vpvar] = np.where(logic, vp, data[vpvar].values) # Update? NEW, OLD else: data[vpvar] = vp else: raise RuntimeError("No humidity variable found!") ############################################################################ # # Quality control # vpmins = np.interp(np.log(data.p.values), np.log(rt.p.values), rt.vpmin.values, left=rt.vpmin.min(), right=rt.vpmin.max()) # Interpolate Minimum vpmaxs = np.interp(np.log(data.p.values), np.log(rt.p.values), rt.vpmax.values, left=rt.vpmax.min(), right=rt.vpmax.max()) # Interpolate Maximum # Range? BAD, GOOD logic = ((data[vpvar].values < vpmins) | (data[vpvar].values > vpmaxs)) & np.isfinite(data[vpvar].values) data['qual'] = np.where(logic, data.qual.values + 'V', data.qual.replace('V', '').values) # FLAG: V # data.loc[:, 'qual'] = np.where(logic, data.qual.values + 'V', data.qual.replace('V', '').values) # FLAG: V journal(funcid + "#%8d V flagged. (%d)" % (np.sum(np.sum(flag_inside(data.qual, 'V'))), ndates), report, verbose) if replace: data[vpvar] = np.where(logic, np.nan, data[vpvar].values) # Apply? BAD, GOOD # data.loc[:, vpvar] = np.where(logic, np.nan, data[vpvar].values) # Apply? BAD, GOOD ############################################################################ # # Unique Flags # data['qual'] = unique_flags(data['qual']) ############################################################################ # # Pressure # if remove_pressure: del data['p'] if not inplace: return data
def standard_rel_humidity(data, rvar='r', tvar='t', dpdvar='dpd', qvar='q', update=False, replace=False, method='murphy_koop', inplace=False, report=None, verbose=0): """ convert humidity variables to relative humidity 1. q to vp 2. dpd to vp 3. merge vp 4. vp to rh set quality flags Parameters ---------- data DataFrame Input database rvar str tvar str dpdvar str qvar str update bool Update existing replace bool set flagged to nan method str Saturation water vapor inplace bool verbose int kwargs ** Returns ------- data """ from raso.met.conversion import sh2vap from raso.met import esat_functions from raso.qc import profile_limits ############################################################################ rt = profile_limits(tohpa=True, simple_names=True) # RTTOV Variable Limits rt['p'] *= 100. # hPa to Pa rt['vpmin'] *= 100. # hPa to Pa rt['vpmax'] *= 100. # hPa to Pa ############################################################################ funcid = "[SV] " r_absmin = 0 r_absmax = 1 if not isinstance(data, (pd.DataFrame, pd.Panel)): raise ValueError(funcid + "Requires a Dataframe or Panel") if not inplace: data = data.copy() if not hasnames('qual', data): data['qual'] = '' if hasnames(data, [rvar, qvar, dpdvar], value=0): raise ValueError(funcid + " Requires a humidity variable: %s, %s or %s" % (rvar, qvar, dpdvar)) if hasnames(data,tvar, value=0): raise ValueError(funcid + " Requires variable: %s" % tvar) vpfunc = getattr(esat_functions, method) if not hasnames(data, rvar) or update: remove_pressure = False if isinstance(data, pd.Panel): data['p'] = 0. # add minor_axis as p data.loc['p', :, :] = np.asarray(data.minor_axis)[np.newaxis, np.newaxis, :] remove_pressure = True journal(funcid + "rel. humidity (%s) update: %s" % (rvar, update), report, verbose) # Quality control vpmins = np.interp(np.log(data.p.values), np.log(rt.p.values), rt.vpmin.values, left=rt.vpmin.min(), right=rt.vpmin.max()) # Interpolate Minimum vpmaxs = np.interp(np.log(data.p.values), np.log(rt.p.values), rt.vpmax.values, left=rt.vpmax.min(), right=rt.vpmax.max()) # Interpolate Maximum vpsat = vpfunc(data[tvar].values) if hasnames(data, rvar): vp = data[rvar].values * vpsat # Convert r,t to vp logic = ((vp < vpmins) | (vp > vpmaxs)) & np.isfinite(vp) data['qual'] = np.where(logic, data.qual.values + 'R', data.qual.replace('R', '').values) # FLAG: R (?) journal(funcid + "rel. humidity (%s) available (replace: %s)" % (rvar, replace), report, verbose) if replace: vp = np.where(logic, np.nan, vp) # Apply? BAD, GOOD else: vp = np.full(data[tvar].shape, np.nan) if hasnames(data, qvar): qvp = sh2vap(data[qvar].values, data['p'].values) # only a formula no approximation logic = ((qvp < vpmins) | (qvp > vpmaxs)) & np.isfinite(qvp) data['qual'] = np.where(logic, data.qual.values + 'Q', data.qual.replace('Q', '').values) # FLAG: R (?) journal(funcid + "spec. humidity (%s) available (replace: %s)" % (qvar, replace), report, verbose) if replace: qvp = np.where(logic, np.nan, qvp) # Apply? BAD, GOOD # Fill Gaps logic = (np.isfinite(qvp) & (~np.isfinite(vp))) # GOOD, BAD vp = np.where(logic, qvp, vp) # UPDATE, OLD if hasnames(data, dpdvar): dvp = vpfunc((data[tvar] - data[dpdvar])) # Dewpoint -> vp logic = ((dvp < vpmins) | (dvp > vpmaxs)) & np.isfinite(dvp) data['qual'] = np.where(logic, data.qual.values + 'D', data.qual.replace('D', '').values) # FLAG: R (?) journal(funcid + "DPD (%s) available (replace: %s)" % (dpdvar, replace), report, verbose) if replace: dvp = np.where(logic, np.nan, dvp) # Apply? BAD, GOOD # fill gaps logic = (np.isfinite(dvp) & (~np.isfinite(vp))) # GOOD, BAD vp = np.where(logic, dvp, vp) # UPDATE, OLD ############################################################################ # # Convert VP to RH # data[rvar] = vp / vpsat # rel. Humidity logic = ((data[rvar].values < r_absmin) | (data[rvar].values > r_absmax)) & np.isfinite( data[rvar].values) # Range? BAD, GOOD data['qual'] = np.where(logic, data.qual.values + 'R', data.qual.replace('R', '').values) # FLAG: R if replace: data[rvar] = np.where(logic, np.nan, data.r.values) # Apply? BAD, GOOD ############################################################################ # # Unique Flags # data['qual'] = unique_flags(data['qual']) ############################################################################ # # Pressure # if remove_pressure: del data['p'] if not inplace: return data
def detect_and_correct_daynight(data, var='dpd', thres=50, correct_m=True, correct_q=True, quantilen=None, levels=None, sample_size=730, borders=180, bounded=(0, 60), verbose=0, **kwargs): """Detect and Correct Radiosonde biases from Departure Statistics Use ERA-Interim departures to detect breakpoints and correct these with a mean and a quantile adjustment going back in time. uses raso.timeseries.breakpoint.detection / correction Parameters ---------- data pd.DataFrame Radiosonde Database var str Variable: t, dpd thres int SNHT Threshold quantilen list/array Quantile Ranges levels list Pressure levels sample_size int minimum Sample size borders int biased sample before and after a break bias30k bool remove 30K Bias for dpd ? verbose int verboseness kwargs dict breakpoint.detection, breakpoint.correction ... Returns ------- pd.DataFrame """ funcid = "[DC] Data " if not hasnames(data, '%s_dep' % var): if not hasnames(data, [var, '%s_era' % var]): raise ValueError(funcid + "Missing variables for departures: %s %s_era" % (var, var)) data['%s_dep' % var] = data[var] - data['%s_era' % var] # Departures else: print_verbose(funcid + "Departures: %s_dep used!" % var, verbose) if quantilen is None: quantilen = np.arange(0, 101, 10) # Detect in Departures # Detect breakpoints > to panel # might not be data for both ? ibreaks1, night = breakpoint.detection(data, '%s_dep' % var, thres=thres, levels=levels, valid_times=[0], freq='24h', verbose=verbose - 1, **kwargs) ibreaks2, noon = breakpoint.detection(data, '%s_dep' % var, thres=thres, levels=levels, valid_times=[12], freq='24h', verbose=verbose - 1, **kwargs) # new variables: # *_breaks, *_snht # # if levels is None: # levels = night.minor_axis # night_breaks = {} day_breaks = {} if ibreaks1['%s_dep' % var]: # Correct absolute Values # Mean Correction if correct_m: night_breaks, night = breakpoint.mean_correction(night, var, '%s_dep_breaks' % var, sample_size=sample_size, borders=borders, bounded=bounded, verbose=verbose - 1) if correct_q: # Quantile Correction night_breaks, night = breakpoint.quantile_correction(night, var, '%s_dep_breaks' % var, quantilen=quantilen, sample_size=sample_size, borders=borders, bounded=bounded, verbose=verbose - 1) # ERA Quantile Correction bqestat, night = breakpoint.quantile_era_correction(night, var, '%s_era' % var, '%s_dep_breaks' % var, quantilen=quantilen, sample_size=sample_size, borders=None, bounded=bounded, verbose=verbose - 1) if ibreaks2['%s_dep' % var]: # Correct absolute Values # Mean Correction if correct_m: day_breaks, noon = breakpoint.mean_correction(noon, var, '%s_dep_breaks' % var, sample_size=sample_size, borders=borders, bounded=bounded, verbose=verbose - 1) if correct_q: # Quantile Correction day_breaks, noon = breakpoint.quantile_correction(noon, var, '%s_dep_breaks' % var, quantilen=quantilen, sample_size=sample_size, borders=borders, bounded=bounded, verbose=verbose - 1) # ERA Quantile Correction bqestat, noon = breakpoint.quantile_era_correction(noon, var, '%s_era' % var, '%s_dep_breaks' % var, quantilen=quantilen, sample_size=sample_size, borders=None, bounded=bounded, verbose=verbose - 1) # # night breaks will be negative night['%s_dep_breaks' % var] *= -1 # night is negative # recombine day and night new = pd.concat([night, noon], axis=1, join='outer') # return new, {'00Z': night_breaks.keys(), '12Z': day_breaks.keys()}
def detect_and_correct_loop(data, var='dpd', thres=50, iteration=1, correct_m=True, correct_q=True, quantilen=None, levels=None, sample_size=730, borders=180, bounded=(0, 60), verbose=0, **kwargs): """Detect and Correct Radiosonde biases from Departure Statistics Use ERA-Interim departures to detect breakpoints and correct these with a mean and a quantile adjustment going back in time. uses raso.timeseries.breakpoint.detection / correction Parameters ---------- data DataFrame/Panel Radiosonde Database var str Variable: t, dpd thres int SNHT Threshold iteration int Number of Detect and Correct Lopps correct_m bool Correct Mean Adjust ? correct_q bool Correct Quantile Adjust ? quantilen list/array Quantile Ranges levels list Pressure levels sample_size int minimum Sample size borders int biased sample before and after a break bias30k bool remove 30K Bias for dpd ? verbose int verboseness kwargs dict breakpoint.detection, breakpoint.correction ... Returns ------- Panel """ funcid = "[DCL] Data " if not isinstance(data, (pd.DataFrame, pd.Panel)): raise ValueError(funcid + "Requires a DataFrame or Panel") if not hasnames(data, '%s_dep' % var): if not hasnames(data, [var, '%s_era' % var]): raise ValueError(funcid + "Missing variables for departures: %s %s_era" % (var, var)) data['%s_dep' % var] = data[var] - data['%s_era' % var] # Departures else: print_verbose(funcid + "Departures: %s_dep used!" % var, verbose) if quantilen is None: quantilen = np.arange(0, 101, 10) # Detect in Departures (First Blood) # Detect breakpoints > to panel ibreaks, new = breakpoint.detection(data, '%s_dep' % var, thres=thres, levels=levels, verbose=verbose - 1, **kwargs) breakpoints = [] # new variables: # *_breaks, *_snht new.rename(items={'%s_dep_breaks' % var: '%s_dep_initbreaks' % var, '%s_dep_snht' % var: '%s_dep_initsnht' % var}, inplace=True) if ibreaks['%s_dep' % var]: initial_breaks = sorted(np.where((new['%s_dep_initbreaks' % var] > 0).any(1))[0]) mcor_breaks = len(initial_breaks) qcor_breaks = len(initial_breaks) for irun in range(iteration): # Correction # Mean Correction if correct_m and mcor_breaks > 0: if irun == 0: new['%s_mcor_dep_breaks' % var] = new['%s_dep_initbreaks' % var] # copy breakpoints # Same call as everywhere bmstat, new = breakpoint.mean_correction(new, var, '%s_mcor_dep_breaks' % var, sample_size=sample_size, borders=borders, bounded=bounded, varcopy=False, # reuse mcor verbose=verbose - 1) # works on >> var_mcor breakpoints = bmstat.keys() # create Departures for Detection new["%s_mcor_dep" % var] = new["%s_mcor" % var] - new['%s_era' % var] # Detection ibreaks, new = breakpoint.detection(new, '%s_mcor_dep' % var, thres=thres, levels=levels, verbose=verbose - 1, **kwargs) mcor_breaks = len(ibreaks) # ? if correct_q and qcor_breaks > 0: if irun == 0: new['%s_qcor_dep_breaks' % var] = new['%s_dep_initbreaks' % var] # copy breakpoints # Quantile Correction bqstat, new = breakpoint.quantile_correction(new, var, '%s_dep_breaks' % var, quantilen=quantilen, sample_size=sample_size, borders=borders, bounded=bounded, verbose=verbose - 1) breakpoints = bqstat.keys() # individual or not ? # ERA Quantile Correction bqestat, new = breakpoint.quantile_era_correction(new, var, '%s_era' % var, '%s_dep_breaks' % var, quantilen=quantilen, sample_size=sample_size, borders=None, bounded=bounded, verbose=verbose - 1) # return new, {'breaks': breakpoints}
def detect_and_correct(data, var='dpd', thres=50, correct_m=True, correct_q=True, quantilen=None, levels=None, sample_size=730, borders=180, bounded=(0, 60), verbose=0, **kwargs): """Detect and Correct Radiosonde biases from Departure Statistics Use ERA-Interim departures to detect breakpoints and correct these with a mean and a quantile adjustment going back in time. uses raso.timeseries.breakpoint.detection / correction Parameters ---------- data DataFrame/Panel Radiosonde Database var str Variable: t, dpd thres int SNHT Threshold correct_m bool Correct Mean Adjust ? correct_q bool Correct Quantile Adjust ? quantilen list/array Quantile Ranges levels list Pressure levels sample_size int minimum Sample size borders int biased sample before and after a break bias30k bool remove 30K Bias for dpd ? verbose int verboseness kwargs dict breakpoint.detection, breakpoint.correction ... Returns ------- Panel """ funcid = "[DC] Data " if not isinstance(data, (pd.DataFrame, pd.Panel)): raise ValueError(funcid + "Requires a DataFrame or Panel") if not hasnames(data, '%s_dep' % var): if not hasnames(data, [var, '%s_era' % var]): raise ValueError(funcid + "Missing variables for departures: %s %s_era" % (var, var)) data['%s_dep' % var] = data[var] - data['%s_era' % var] # Departures else: print_verbose(funcid + "Departures: %s_dep used!" % var, verbose) if quantilen is None: quantilen = np.arange(0, 101, 10) # Detect in Departures # Detect breakpoints > to panel ibreaks, new = breakpoint.detection(data, '%s_dep' % var, thres=thres, levels=levels, verbose=verbose - 1, **kwargs) breakpoints = [] # new variables: # *_breaks, *_snht if ibreaks['%s_dep' % var]: # Correct absolute Values # Mean Correction if correct_m: bmstat, new = breakpoint.mean_correction(new, var, '%s_dep_breaks' % var, sample_size=sample_size, borders=borders, bounded=bounded, verbose=verbose - 1) breakpoints = bmstat.keys() if correct_q: # Quantile Correction bqstat, new = breakpoint.quantile_correction(new, var, '%s_dep_breaks' % var, quantilen=quantilen, sample_size=sample_size, borders=borders, bounded=bounded, verbose=verbose - 1) breakpoints = bqstat.keys() # ERA Quantile Correction bqestat, new = breakpoint.quantile_era_correction(new, var, '%s_era' % var, '%s_dep_breaks' % var, quantilen=quantilen, sample_size=sample_size, borders=None, bounded=bounded, verbose=verbose - 1) return new, {'breaks': breakpoints}