def detect_and_correct_daynight(data, var='dpd', thres=50, correct_m=True, correct_q=True, quantilen=None, levels=None, sample_size=730, borders=180, bounded=(0, 60), verbose=0, **kwargs): """Detect and Correct Radiosonde biases from Departure Statistics Use ERA-Interim departures to detect breakpoints and correct these with a mean and a quantile adjustment going back in time. uses raso.timeseries.breakpoint.detection / correction Parameters ---------- data pd.DataFrame Radiosonde Database var str Variable: t, dpd thres int SNHT Threshold quantilen list/array Quantile Ranges levels list Pressure levels sample_size int minimum Sample size borders int biased sample before and after a break bias30k bool remove 30K Bias for dpd ? verbose int verboseness kwargs dict breakpoint.detection, breakpoint.correction ... Returns ------- pd.DataFrame """ funcid = "[DC] Data " if not hasnames(data, '%s_dep' % var): if not hasnames(data, [var, '%s_era' % var]): raise ValueError(funcid + "Missing variables for departures: %s %s_era" % (var, var)) data['%s_dep' % var] = data[var] - data['%s_era' % var] # Departures else: print_verbose(funcid + "Departures: %s_dep used!" % var, verbose) if quantilen is None: quantilen = np.arange(0, 101, 10) # Detect in Departures # Detect breakpoints > to panel # might not be data for both ? ibreaks1, night = breakpoint.detection(data, '%s_dep' % var, thres=thres, levels=levels, valid_times=[0], freq='24h', verbose=verbose - 1, **kwargs) ibreaks2, noon = breakpoint.detection(data, '%s_dep' % var, thres=thres, levels=levels, valid_times=[12], freq='24h', verbose=verbose - 1, **kwargs) # new variables: # *_breaks, *_snht # # if levels is None: # levels = night.minor_axis # night_breaks = {} day_breaks = {} if ibreaks1['%s_dep' % var]: # Correct absolute Values # Mean Correction if correct_m: night_breaks, night = breakpoint.mean_correction(night, var, '%s_dep_breaks' % var, sample_size=sample_size, borders=borders, bounded=bounded, verbose=verbose - 1) if correct_q: # Quantile Correction night_breaks, night = breakpoint.quantile_correction(night, var, '%s_dep_breaks' % var, quantilen=quantilen, sample_size=sample_size, borders=borders, bounded=bounded, verbose=verbose - 1) # ERA Quantile Correction bqestat, night = breakpoint.quantile_era_correction(night, var, '%s_era' % var, '%s_dep_breaks' % var, quantilen=quantilen, sample_size=sample_size, borders=None, bounded=bounded, verbose=verbose - 1) if ibreaks2['%s_dep' % var]: # Correct absolute Values # Mean Correction if correct_m: day_breaks, noon = breakpoint.mean_correction(noon, var, '%s_dep_breaks' % var, sample_size=sample_size, borders=borders, bounded=bounded, verbose=verbose - 1) if correct_q: # Quantile Correction day_breaks, noon = breakpoint.quantile_correction(noon, var, '%s_dep_breaks' % var, quantilen=quantilen, sample_size=sample_size, borders=borders, bounded=bounded, verbose=verbose - 1) # ERA Quantile Correction bqestat, noon = breakpoint.quantile_era_correction(noon, var, '%s_era' % var, '%s_dep_breaks' % var, quantilen=quantilen, sample_size=sample_size, borders=None, bounded=bounded, verbose=verbose - 1) # # night breaks will be negative night['%s_dep_breaks' % var] *= -1 # night is negative # recombine day and night new = pd.concat([night, noon], axis=1, join='outer') # return new, {'00Z': night_breaks.keys(), '12Z': day_breaks.keys()}
def detect_and_correct_loop(data, var='dpd', thres=50, iteration=1, correct_m=True, correct_q=True, quantilen=None, levels=None, sample_size=730, borders=180, bounded=(0, 60), verbose=0, **kwargs): """Detect and Correct Radiosonde biases from Departure Statistics Use ERA-Interim departures to detect breakpoints and correct these with a mean and a quantile adjustment going back in time. uses raso.timeseries.breakpoint.detection / correction Parameters ---------- data DataFrame/Panel Radiosonde Database var str Variable: t, dpd thres int SNHT Threshold iteration int Number of Detect and Correct Lopps correct_m bool Correct Mean Adjust ? correct_q bool Correct Quantile Adjust ? quantilen list/array Quantile Ranges levels list Pressure levels sample_size int minimum Sample size borders int biased sample before and after a break bias30k bool remove 30K Bias for dpd ? verbose int verboseness kwargs dict breakpoint.detection, breakpoint.correction ... Returns ------- Panel """ funcid = "[DCL] Data " if not isinstance(data, (pd.DataFrame, pd.Panel)): raise ValueError(funcid + "Requires a DataFrame or Panel") if not hasnames(data, '%s_dep' % var): if not hasnames(data, [var, '%s_era' % var]): raise ValueError(funcid + "Missing variables for departures: %s %s_era" % (var, var)) data['%s_dep' % var] = data[var] - data['%s_era' % var] # Departures else: print_verbose(funcid + "Departures: %s_dep used!" % var, verbose) if quantilen is None: quantilen = np.arange(0, 101, 10) # Detect in Departures (First Blood) # Detect breakpoints > to panel ibreaks, new = breakpoint.detection(data, '%s_dep' % var, thres=thres, levels=levels, verbose=verbose - 1, **kwargs) breakpoints = [] # new variables: # *_breaks, *_snht new.rename(items={'%s_dep_breaks' % var: '%s_dep_initbreaks' % var, '%s_dep_snht' % var: '%s_dep_initsnht' % var}, inplace=True) if ibreaks['%s_dep' % var]: initial_breaks = sorted(np.where((new['%s_dep_initbreaks' % var] > 0).any(1))[0]) mcor_breaks = len(initial_breaks) qcor_breaks = len(initial_breaks) for irun in range(iteration): # Correction # Mean Correction if correct_m and mcor_breaks > 0: if irun == 0: new['%s_mcor_dep_breaks' % var] = new['%s_dep_initbreaks' % var] # copy breakpoints # Same call as everywhere bmstat, new = breakpoint.mean_correction(new, var, '%s_mcor_dep_breaks' % var, sample_size=sample_size, borders=borders, bounded=bounded, varcopy=False, # reuse mcor verbose=verbose - 1) # works on >> var_mcor breakpoints = bmstat.keys() # create Departures for Detection new["%s_mcor_dep" % var] = new["%s_mcor" % var] - new['%s_era' % var] # Detection ibreaks, new = breakpoint.detection(new, '%s_mcor_dep' % var, thres=thres, levels=levels, verbose=verbose - 1, **kwargs) mcor_breaks = len(ibreaks) # ? if correct_q and qcor_breaks > 0: if irun == 0: new['%s_qcor_dep_breaks' % var] = new['%s_dep_initbreaks' % var] # copy breakpoints # Quantile Correction bqstat, new = breakpoint.quantile_correction(new, var, '%s_dep_breaks' % var, quantilen=quantilen, sample_size=sample_size, borders=borders, bounded=bounded, verbose=verbose - 1) breakpoints = bqstat.keys() # individual or not ? # ERA Quantile Correction bqestat, new = breakpoint.quantile_era_correction(new, var, '%s_era' % var, '%s_dep_breaks' % var, quantilen=quantilen, sample_size=sample_size, borders=None, bounded=bounded, verbose=verbose - 1) # return new, {'breaks': breakpoints}
def clim(data, anomaly=False, per_month=None, attach=False, how='mean', period=None, verbose=0): """Calculate monthly mean climatology from data and add as column. Select only levels with at least min_count values available. Parameters ---------- data Series / DataFrame / Panel Input data anomaly bool climate anomaly per_month str return monthly values: clim, count, all attach bool attach to data how str climatological moment verbose int verboseness Returns ------- Series / DataFrame / Panel """ if per_month is not None and per_month not in ['clim', 'count', 'all']: raise ValueError("per_month: clim, count or all") if attach and per_month is not None: raise RuntimeError("[CLIM] Can not attach a reduced shape to original data") if not isinstance(data, (pd.Series, pd.DataFrame, pd.Panel)): raise ValueError("[CLIM] Require a Series, DataFrame or Panel") data = data.copy() # For Series if isinstance(data, pd.Series): return clim_timeseries(data, anomaly=anomaly, per_month=per_month, attach=attach, how=how, period=period, verbose=verbose-1) # For DataFrame if isinstance(data, pd.DataFrame): if 'p' in data.columns and len(data['p'].unique()) > 1: print_verbose("[CLIM] database detected converting to panel", verbose) data = data.reset_index().set_index(['date', 'p']).to_panel() # To Panel else: tmp = data.select_dtypes(include=['number']).apply(clim_timeseries, axis=0, per_month=per_month, anomaly=anomaly, how=how, attach=attach, period=period, verbose=verbose-1) if attach: if not data.select_dtypes(exclude=['number']).empty: tmp = tmp.join(data.select_dtypes(exclude=['float', 'float32', 'int', 'int16'])) return tmp # Panel # relatively slow # return data.apply(lambda x: clim(x, anomaly=anomaly, per_month=per_month, attach=attach, how=how, # verbose=verbose-1)) tmp = {} for it in data.items: tmp[it] = clim(data[it], anomaly=anomaly, per_month=per_month, attach=attach, how=how, period=period, verbose=verbose - 1) try: tmp = pd.Panel(tmp) except Exception, e: print repr(e)
def detect_and_correct(data, var='dpd', thres=50, correct_m=True, correct_q=True, quantilen=None, levels=None, sample_size=730, borders=180, bounded=(0, 60), verbose=0, **kwargs): """Detect and Correct Radiosonde biases from Departure Statistics Use ERA-Interim departures to detect breakpoints and correct these with a mean and a quantile adjustment going back in time. uses raso.timeseries.breakpoint.detection / correction Parameters ---------- data DataFrame/Panel Radiosonde Database var str Variable: t, dpd thres int SNHT Threshold correct_m bool Correct Mean Adjust ? correct_q bool Correct Quantile Adjust ? quantilen list/array Quantile Ranges levels list Pressure levels sample_size int minimum Sample size borders int biased sample before and after a break bias30k bool remove 30K Bias for dpd ? verbose int verboseness kwargs dict breakpoint.detection, breakpoint.correction ... Returns ------- Panel """ funcid = "[DC] Data " if not isinstance(data, (pd.DataFrame, pd.Panel)): raise ValueError(funcid + "Requires a DataFrame or Panel") if not hasnames(data, '%s_dep' % var): if not hasnames(data, [var, '%s_era' % var]): raise ValueError(funcid + "Missing variables for departures: %s %s_era" % (var, var)) data['%s_dep' % var] = data[var] - data['%s_era' % var] # Departures else: print_verbose(funcid + "Departures: %s_dep used!" % var, verbose) if quantilen is None: quantilen = np.arange(0, 101, 10) # Detect in Departures # Detect breakpoints > to panel ibreaks, new = breakpoint.detection(data, '%s_dep' % var, thres=thres, levels=levels, verbose=verbose - 1, **kwargs) breakpoints = [] # new variables: # *_breaks, *_snht if ibreaks['%s_dep' % var]: # Correct absolute Values # Mean Correction if correct_m: bmstat, new = breakpoint.mean_correction(new, var, '%s_dep_breaks' % var, sample_size=sample_size, borders=borders, bounded=bounded, verbose=verbose - 1) breakpoints = bmstat.keys() if correct_q: # Quantile Correction bqstat, new = breakpoint.quantile_correction(new, var, '%s_dep_breaks' % var, quantilen=quantilen, sample_size=sample_size, borders=borders, bounded=bounded, verbose=verbose - 1) breakpoints = bqstat.keys() # ERA Quantile Correction bqestat, new = breakpoint.quantile_era_correction(new, var, '%s_era' % var, '%s_dep_breaks' % var, quantilen=quantilen, sample_size=sample_size, borders=None, bounded=bounded, verbose=verbose - 1) return new, {'breaks': breakpoints}
def trend(data, timeframe, deg=1, fit=False, anomaly=True, per_month=False, pmiss=0.3, freq='12h', verbose=0, debug=False, **kwargs): """ Calculate poly-linear trend per level and variable Parameters ---------- data DataFrame / Panel Input Data timeframe slice Timeslice deg int Polygon degree fit bool return residuum anomaly bool remove climatology per_month bool calculate trends per month pmiss float percentage of missing values allowed freq str resampling frequency verbose int verbosness debug bool debug kwargs dict to numpy polyfit Returns ------- DataFrame or Panel """ data = data.copy() if not isinstance(data, (pd.Series, pd.DataFrame, pd.Panel)): raise ValueError("Requires a Series, DataFrame or Panel") if timeframe is not None and not isinstance(timeframe, slice): raise ValueError("Requires a slice") miss = None if isinstance(data, pd.Series): print_verbose("[TREND] Series", verbose) if timeframe is None: timeframe = slice(str(data.index[0].year), str(data.index[-1].year)) miss = len(pd.date_range(timeframe.start, timeframe.stop, freq=freq)) * (1 - pmiss) data = data[timeframe] # Select timeframe = pd.date_range(timeframe.start, timeframe.stop, freq=freq) data = data.reindex(timeframe) if fit: return trend_fit_timeseries(data, miss=miss, **kwargs) else: return trend_timeseries(data, anomaly=anomaly, **kwargs) elif isinstance(data, pd.DataFrame): print_verbose("[TREND] DataFrame", verbose) if timeframe is None: timeframe = slice(str(data.index[0].year), str(data.index[-1].year)) miss = len(pd.date_range(timeframe.start, timeframe.stop, freq=freq)) * (1 - pmiss) data = data.ix[timeframe, :] timeframe = pd.date_range(timeframe.start, timeframe.stop, freq=freq) data = data.reindex(timeframe) # resample to right size # DATABASE ? if 'p' in data.columns and len(data['p'].unique()) > 0: out = {} plevels = filter_series((data.groupby('p').count() > 10).all(1)) plevels = plevels.tolist() if len(plevels) > 60: raise RuntimeWarning("[TREND] more than 60 pressure levels ? %d" % len(plevels)) for ip in plevels: tmp = data.query('p==%d' % ip).drop('p', 1) if fit: out[ip] = tmp.apply(trend_fit_timeseries, axis=0, poly_deg=deg, miss=miss, debug=debug, **kwargs) else: out[ip] = tmp.apply(trend_timeseries, axis=0, poly_deg=deg, anomaly=anomaly, debug=debug, **kwargs) out = pd.Panel(out).squeeze() if isinstance(out, pd.Panel): return out.swapaxes(0, 2) return out else: # PANEL SLICE print_verbose("[TREND] whole DataFrame", verbose) if fit: return data.apply(trend_fit_timeseries, axis=0, poly_deg=deg, miss=miss, debug=debug, **kwargs) else: return data.apply(trend_timeseries, axis=0, poly_deg=deg, anomaly=anomaly, debug=debug, **kwargs) elif isinstance(data, pd.Panel): print_verbose("[TREND] Panel", verbose) # out = {} if timeframe is None: timeframe = slice(str(data.major_axis[0].year), str(data.major_axis[-1].year)) # everyone has the same timeframe # return data.apply(lambda x: trend(x, timeframe, deg=deg, fit=fit, anomaly=anomaly, per_month=per_month, pmiss=pmiss, freq=freq, debug=debug, **kwargs), axis=(1, 2))