def process(self, df_gw, df_w, E): if df_gw is None or df_w is None: raise Exception( 'Hydrograph data not found in terminals `df_gw` or `df_w`') return {'tlag': None} if E in [None, nan]: raise Exception('Tidal efficiency is invalid: E={0}'.format(E)) return {'tlag': None} self.CW().param('tlag_grp', 'tlag = ').setValue('?') colname = [ col for col in df_gw.columns if not isNumpyDatetime(df_gw[col].dtype) ] self.CW().param('gw').setLimits(colname) colname = [ col for col in df_gw.columns if isNumpyDatetime(df_gw[col].dtype) ] self.CW().param('gw_dtime').setLimits(colname) colname = [ col for col in df_w.columns if not isNumpyDatetime(df_w[col].dtype) ] self.CW().param('river').setLimits(colname) colname = [ col for col in df_w.columns if isNumpyDatetime(df_w[col].dtype) ] self.CW().param('river_dtime').setLimits(colname) if not self.CW().param('E_grp', 'manual_E').value(): self.CW().disconnect_valueChanged2upd(self.CW().param( 'E_grp', 'E')) self.CW().param('E_grp', 'E').setValue( E) # maybe this will provoke process onceagain. self.CW().connect_valueChanged2upd(self.CW().param('E_grp', 'E')) kwargs = self.CW().prepareInputArguments() E = kwargs['E'] # and i would have to block the signals here... with BusyCursor(): if kwargs['method'] == '1) Erskine 1991': tlag = timelag_erskine1991_method( df_gw, kwargs['gw'], kwargs['gw_dtime'], df_w, kwargs['river'], kwargs['river_dtime'], E, tlag_tuple=(kwargs['t1'], kwargs['t2'], kwargs['t_step']), log=True) else: raise Exception('Method <%s> not yet implemented' % kwargs['method']) self.CW().param('tlag_grp', 'tlag = ').setValue(str(tlag)) return {'tlag': tlag}
def on_pushButton_viewPlot_clicked(self): """ open nice graphic representation of our data""" with BusyCursor(): try: df = self.parent().getPandasDataModel().df columns = self.parent().getPandasHeaderModel().selectedColumns() #consider only the selected columns datetime_cols = [col for col in columns if isNumpyDatetime(df[col].dtype)] numeric_cols = [col for col in columns if isNumpyNumeric (df[col].dtype)] datetime_col = datetime_cols[0] if len(datetime_cols) > 0 else None #plot with x=datetime if possible if self.checkBox_separateSubplots.isChecked() and len(numeric_cols) > 1: ''' Do the plotting of each selected numerical column on an individual subplot ''' f, axes = plt.subplots(len(numeric_cols), sharex=True) for ax, numeric_col in zip(axes, numeric_cols): df.plot(x=datetime_col, y=numeric_col, ax=ax) legend = ax.legend(shadow=True) # Fine-tune figure; make subplots close to each other and hide x ticks for all but bottom plot. #f.subplots_adjust(hspace=0) plt.setp([a.get_xticklabels() for a in f.axes[:-1]], visible=False) else: ''' Plot all selected numerical columns together on a single subplot ''' f, ax = plt.subplots(1) for numeric_col in numeric_cols: df.plot(x=datetime_col, y=numeric_col, ax=ax) legend = ax.legend(shadow=True) f.show() except Exception as exp: self._parent.setException(exp) return
def process(self, In): gc.collect() # populate USE COLUMNS param, but only on item received, not when we click button if not self._ctrlWidget.calculateNAllowed() and not self._ctrlWidget.applyAllowed(): self._ctrlWidget.param('Apply to columns').clearChildren() with BusyCursor(): df = copy.deepcopy(In) # check out http://docs.scipy.org/doc/numpy-dev/neps/datetime-proposal.html colnames = [col for col in df.columns if isNumpyDatetime(df[col].dtype)]+[None] self._ctrlWidget.param('datetime').setLimits(colnames) self._ctrlWidget.param('datetime').setValue(colnames[0]) # populate (Apply to columns) param, but only on item received, not when we click button if not self._ctrlWidget.calculateNAllowed() and not self._ctrlWidget.applyAllowed(): colnames = [col for col in df.columns if isNumpyNumeric(df[col].dtype)] for col_name in colnames: # cycle through each column... self._ctrlWidget.param('Apply to columns').addChild({'name': col_name, 'type': 'bool', 'value': True}) kwargs = self.ctrlWidget().prepareInputArguments() if self._ctrlWidget.calculateNAllowed(): N = serfes.get_number_of_measurements_per_day(df, datetime=kwargs['datetime'], log=kwargs['log']) self._ctrlWidget.param('N').setValue(N) if self._ctrlWidget.applyAllowed(): if kwargs['N'] in [None, '']: QtGui.QMessageBox.warning(None, "Node: {0}".format(self.nodeName), 'First set number of measurements per day in parameter `N`') raise ValueError('First set number of measurements per day in parameter `N`') result = serfes.filter_wl_71h_serfes1991(df, **kwargs) return {'Out': result}
def process(self, df): if df is None: del self.item self.item = None return {'Curve': None, 'pd.Series': None } if self.item is None: self.item = PlotDataItem(clipToView=False) colname = [col for col in df.columns if isNumpyNumeric(df[col].dtype)] self._ctrlWidget.param('Y:signal').setLimits(colname) colname = [col for col in df.columns if isNumpyDatetime(df[col].dtype)] self._ctrlWidget.param('X:datetime').setLimits(colname) with BusyCursor(): kwargs = self.ctrlWidget().prepareInputArguments() #self.item = PlotDataItem(clipToView=False) t = df[kwargs['X:datetime']].values # part 1 timeSeries = pd.DataFrame(data=df[kwargs['Y:signal']].values, index=t, columns=[kwargs['Y:signal']]) # part 2 # convert time b = t.astype(np.dtype('datetime64[s]')) timeStamps = b.astype(np.int64)-kwargs['tz correct']*60*60+time.timezone # now create curve pen = fn.mkPen(color=kwargs['color'], width=kwargs['width'], style=kwargs['style']) self.item.setData(timeStamps, df[kwargs['Y:signal']].values, pen=pen, name=kwargs['Y:signal']) self.item.setSymbol(kwargs['symbol']) if kwargs['symbol'] is not None: self.item.setSymbolPen(kwargs['color']) self.item.setSymbolBrush(kwargs['color']) self.item.setSymbolSize(kwargs['symbolSize']) return {'Curve': self.item, 'pd.Series': timeSeries }
def process(self, In): df = In if df is None: return self.CW().param('eq').setValue('') if self._df_id != id(df): #print 'df new' self._df_id = id(df) self.CW().disconnect_valueChanged2upd(self.CW().param('datetime')) self.CW().disconnect_valueChanged2upd(self.CW().param('sig')) colname = [ col for col in df.columns if isNumpyDatetime(df[col].dtype) ] self.CW().param('datetime').setLimits(colname) colname = [ col for col in df.columns if isNumpyNumeric(df[col].dtype) ] self.CW().param('sig').setLimits(colname) self.CW().connect_valueChanged2upd(self.CW().param('datetime')) self.CW().connect_valueChanged2upd(self.CW().param('sig')) # ------------------------------------------------------ # now update our range selectors kwargs = self.CW().prepareInputArguments() t_vals = df[kwargs['datetime']].values t_min = pd.to_datetime(str(min(t_vals))) t_max = pd.to_datetime(str(max(t_vals))) self.CW().disconnect_valueChanged2upd(self.CW().param('t0')) self.CW().disconnect_valueChanged2upd(self.CW().param('t1')) self.CW().param('t0').setValue(t_min.strftime('%Y-%m-%d %H:%M:%S')) self.CW().param('t0').setDefault( t_min.strftime('%Y-%m-%d %H:%M:%S')) self.CW().param('t1').setValue(t_max.strftime('%Y-%m-%d %H:%M:%S')) self.CW().param('t1').setDefault( t_max.strftime('%Y-%m-%d %H:%M:%S')) if self.CW().p['ranges'] is True: self.CW().connect_valueChanged2upd(self.CW().param('t0')) self.CW().connect_valueChanged2upd(self.CW().param('t1')) # get params once again kwargs = self.CW().prepareInputArguments() # ------------------------------------------------------ with BusyCursor(): df_out, eq_str, function, self.fig = pandas_fourier_analysis( df, kwargs['sig'], date_name=kwargs['datetime'], ranges=kwargs['ranges'], N_MAX_POW=kwargs['N_MAX_POW'], generate_plot=True) self.CW().param('eq').setValue(eq_str) self._PLOT_REQUESTED = False return {'params': df_out, 'f(t)': function}
def process(self, df, md_peaks): E = None self.CW().param('E = ').setValue(str(E)) self.CW().param('gw').setWritable(True) if df is not None: for name in ['river', 'gw', 'datetime']: self.CW().disconnect_valueChanged2upd(self.CW().param(name)) colname = [col for col in df.columns if isNumpyNumeric(df[col].dtype)] self.CW().param('river').setLimits(colname) self.CW().param('gw').setLimits(colname) colname = [col for col in df.columns if isNumpyDatetime(df[col].dtype)] self.CW().param('datetime').setLimits(colname) for name in ['river', 'gw', 'datetime']: self.CW().connect_valueChanged2upd(self.CW().param(name)) kwargs = self.ctrlWidget().prepareInputArguments() if kwargs['method'] == '1) STD': E = tidalEfficiency_method1(df, kwargs['river'], kwargs['gw']) E_c = None elif kwargs['method'] == '2) Cyclic amplitude' or kwargs['method'] == '3) Cyclic STD': if md_peaks is None: msg = 'To use method `{0}` please provide "matched-peaks" data in terminal `md_peaks` (a valid data-set can be created with node `Match Peaks`)'.format(kwargs['method']) QtGui.QMessageBox.warning(None, "Node: {0}".format(self.nodeName), msg) raise ValueError(msg) self.CW().disconnect_valueChanged2upd(self.CW().param('gw')) self.CW().param('gw').setWritable(False) self.CW().param('gw').setLimits(['see matched peaks']) self.CW().connect_valueChanged2upd(self.CW().param('gw')) mPeaks_slice = md_peaks.loc[~md_peaks['md_N'].isin([np.nan, None])] # select only valid cycles if kwargs['method'] == '2) Cyclic amplitude': E, E_cyclic = tidalEfficiency_method2(mPeaks_slice['tidal_range'], mPeaks_slice['md_tidal_range']) elif kwargs['method'] == '3) Cyclic STD': with BusyCursor(): river_name = mPeaks_slice['name'][0] well_name = mPeaks_slice['md_name'][0] E, E_cyclic = tidalEfficiency_method3(df, river_name, well_name, kwargs['datetime'], mPeaks_slice['time_min'], mPeaks_slice['time_max'], mPeaks_slice['md_time_min'], mPeaks_slice['md_time_max']) # now do nice output table E_c = pd.DataFrame({'N': mPeaks_slice['N'], 'md_N': mPeaks_slice['md_N'], 'E_cyclic': E_cyclic, }) else: raise Exception('Method <%s> is not yet implemented' % kwargs['method']) self.CW().param('E = ').setValue('{0:.4f}'.format(E)) return {'E': E, 'E_cyclic': E_c}
def on_data_recieved(self, df): """ Modify the TableWidget when the data is recieved in the `data` terminal """ self._clear_comboboxes() self.clear(clearTable=False) if df is not None: colnamesNumeric = [col for col in df.columns if isNumpyNumeric (df[col].dtype)] colnamesDatetime = [col for col in df.columns if isNumpyDatetime(df[col].dtype)] self._addItems_to_comboboxes(colnamesNumeric, colnamesDatetime) self.updateUI()
def process(self, In): gc.collect() # populate USE COLUMNS param, but only on item received, not when we click button if not self._ctrlWidget.calculateNAllowed( ) and not self._ctrlWidget.applyAllowed(): self._ctrlWidget.param('Apply to columns').clearChildren() with BusyCursor(): df = copy.deepcopy(In) # check out http://docs.scipy.org/doc/numpy-dev/neps/datetime-proposal.html colnames = [ col for col in df.columns if isNumpyDatetime(df[col].dtype) ] + [None] self._ctrlWidget.param('datetime').setLimits(colnames) self._ctrlWidget.param('datetime').setValue(colnames[0]) # populate (Apply to columns) param, but only on item received, not when we click button if not self._ctrlWidget.calculateNAllowed( ) and not self._ctrlWidget.applyAllowed(): colnames = [ col for col in df.columns if isNumpyNumeric(df[col].dtype) ] for col_name in colnames: # cycle through each column... self._ctrlWidget.param('Apply to columns').addChild({ 'name': col_name, 'type': 'bool', 'value': True }) kwargs = self.ctrlWidget().prepareInputArguments() if self._ctrlWidget.calculateNAllowed(): N = serfes.get_number_of_measurements_per_day( df, datetime=kwargs['datetime'], log=kwargs['log']) self._ctrlWidget.param('N').setValue(N) if self._ctrlWidget.applyAllowed(): if kwargs['N'] in [None, '']: QtGui.QMessageBox.warning( None, "Node: {0}".format(self.nodeName), 'First set number of measurements per day in parameter `N`' ) raise ValueError( 'First set number of measurements per day in parameter `N`' ) result = serfes.filter_wl_71h_serfes1991(df, **kwargs) return {'Out': result}
def process(self, df_gw, df_w, E): if df_gw is None or df_w is None: raise Exception('Hydrograph data not found in terminals `df_gw` or `df_w`') return {'tlag': None} if E in [None, nan]: raise Exception('Tidal efficiency is invalid: E={0}'.format(E)) return {'tlag': None} self.CW().param('tlag_grp', 'tlag = ').setValue('?') colname = [col for col in df_gw.columns if not isNumpyDatetime(df_gw[col].dtype)] self.CW().param('gw').setLimits(colname) colname = [col for col in df_gw.columns if isNumpyDatetime(df_gw[col].dtype)] self.CW().param('gw_dtime').setLimits(colname) colname = [col for col in df_w.columns if not isNumpyDatetime(df_w[col].dtype)] self.CW().param('river').setLimits(colname) colname = [col for col in df_w.columns if isNumpyDatetime(df_w[col].dtype)] self.CW().param('river_dtime').setLimits(colname) if not self.CW().param('E_grp', 'manual_E').value(): self.CW().disconnect_valueChanged2upd(self.CW().param('E_grp', 'E')) self.CW().param('E_grp', 'E').setValue(E) # maybe this will provoke process onceagain. self.CW().connect_valueChanged2upd(self.CW().param('E_grp', 'E')) kwargs = self.CW().prepareInputArguments() E = kwargs['E'] # and i would have to block the signals here... with BusyCursor(): if kwargs['method'] == '1) Erskine 1991': tlag = timelag_erskine1991_method(df_gw, kwargs['gw'], kwargs['gw_dtime'], df_w, kwargs['river'], kwargs['river_dtime'], E, tlag_tuple=(kwargs['t1'], kwargs['t2'], kwargs['t_step']), log=True) else: raise Exception('Method <%s> not yet implemented' % kwargs['method']) self.CW().param('tlag_grp', 'tlag = ').setValue(str(tlag)) return {'tlag': tlag}
def on_data_recieved(self, df): """ Modify the TableWidget when the data is recieved in the `data` terminal """ self._clear_comboboxes() self.clear(clearTable=False) if df is not None: colnamesNumeric = [ col for col in df.columns if isNumpyNumeric(df[col].dtype) ] colnamesDatetime = [ col for col in df.columns if isNumpyDatetime(df[col].dtype) ] self._addItems_to_comboboxes(colnamesNumeric, colnamesDatetime) self.updateUI()
def process(self, In): df = In self._ctrlWidget.param("Period Check Params", "Warnings").setValue("?") colname = [col for col in df.columns if isNumpyNumeric(df[col].dtype)] self._ctrlWidget.param("column").setLimits(colname) colname = [col for col in df.columns if isNumpyDatetime(df[col].dtype)] self._ctrlWidget.param("datetime").setLimits(colname) kwargs = self._ctrlWidget.prepareInputArguments() with BusyCursor(): peaks = detectPeaks_ts(df, kwargs.pop("column"), plot=self._plotRequired, **kwargs) self._ctrlWidget.param("Period Check Params", "Warnings").setValue(str(len(peaks[peaks["check"] == False]))) return {"peaks": peaks}
def process(self, In): df = In self.CW().param('check_grp', 'MIN_grp', 'warn').setValue('?') self.CW().param('check_grp', 'MAX_grp', 'warn').setValue('?') self.CW().param('check_grp', 'ALL_grp', 'warn').setValue('?') self.CW().param('check_grp', 'warn_sum').setValue('?') self.CW().param('out_grp', 'raw_nmin').setValue('?') self.CW().param('out_grp', 'raw_nmax').setValue('?') self.CW().param('out_grp', 'raw_n_all').setValue('?') self.CW().param('out_grp', 'n_cycles').setValue('?') self.CW().param('Peak Detection Params', 'order').setValue('?') if df is None: return {'raw': None, 'peaks': None} colname = [col for col in df.columns if isNumpyNumeric(df[col].dtype)] self.CW().param('column').setLimits(colname) colname = [col for col in df.columns if isNumpyDatetime(df[col].dtype)] self.CW().param('datetime').setLimits(colname) kwargs = self.CW().prepareInputArguments() kwargs['split'] = True with BusyCursor(): kwargs['order'] = prepare_order(kwargs['T'], kwargs['hMargin'], prepare_datetime(df, datetime=kwargs['datetime'])) self.CW().param('Peak Detection Params', 'order').setValue(str(kwargs['order'])) #peaks = detectPeaks_ts(df, kwargs.pop('column'), plot=self._plotRequired, **kwargs) extra, raw, peaks = full_peak_detection_routine(df, col=kwargs.pop('column'), date_col=kwargs.pop('datetime'), IDs2mask=kwargs.pop('IDs2mask'), valid_range=kwargs.pop('valid_range'), plot=self._plotRequired, **kwargs) n_warn_min = len(extra['warnings']['MIN']) n_warn_max = len(extra['warnings']['MAX']) n_warn_all = len(extra['warnings']['ALL']) self.CW().param('check_grp', 'MIN_grp', 'warn').setValue(n_warn_min) self.CW().param('check_grp', 'MAX_grp', 'warn').setValue(n_warn_max) self.CW().param('check_grp', 'ALL_grp', 'warn').setValue(n_warn_all) self.CW().param('check_grp', 'warn_sum').setValue(n_warn_min + n_warn_max + n_warn_all) self.CW().param('out_grp', 'raw_nmin').setValue(extra['raw_nmin']) self.CW().param('out_grp', 'raw_nmax').setValue(extra['raw_nmax']) if raw is not None: self.CW().param('out_grp', 'raw_n_all').setValue(len(raw.index)) if peaks is not None: self.CW().param('out_grp', 'n_cycles').setValue(len(peaks.index)) return {'raw': raw, 'peaks': peaks}
def process(self, In): df = In if df is None: return self.CW().param('eq').setValue('') if self._df_id != id(df): #print 'df new' self._df_id = id(df) self.CW().disconnect_valueChanged2upd(self.CW().param('datetime')) self.CW().disconnect_valueChanged2upd(self.CW().param('sig')) colname = [col for col in df.columns if isNumpyDatetime(df[col].dtype)] self.CW().param('datetime').setLimits(colname) colname = [col for col in df.columns if isNumpyNumeric(df[col].dtype)] self.CW().param('sig').setLimits(colname) self.CW().connect_valueChanged2upd(self.CW().param('datetime')) self.CW().connect_valueChanged2upd(self.CW().param('sig')) # ------------------------------------------------------ # now update our range selectors kwargs = self.CW().prepareInputArguments() t_vals = df[kwargs['datetime']].values t_min = pd.to_datetime(str(min(t_vals))) t_max = pd.to_datetime(str(max(t_vals))) self.CW().disconnect_valueChanged2upd(self.CW().param('t0')) self.CW().disconnect_valueChanged2upd(self.CW().param('t1')) self.CW().param('t0').setValue(t_min.strftime('%Y-%m-%d %H:%M:%S')) self.CW().param('t0').setDefault(t_min.strftime('%Y-%m-%d %H:%M:%S')) self.CW().param('t1').setValue(t_max.strftime('%Y-%m-%d %H:%M:%S')) self.CW().param('t1').setDefault(t_max.strftime('%Y-%m-%d %H:%M:%S')) if self.CW().p['ranges'] is True: self.CW().connect_valueChanged2upd(self.CW().param('t0')) self.CW().connect_valueChanged2upd(self.CW().param('t1')) # get params once again kwargs = self.CW().prepareInputArguments() # ------------------------------------------------------ with BusyCursor(): df_out, eq_str, function, self.fig = pandas_fourier_analysis(df, kwargs['sig'], date_name=kwargs['datetime'], ranges=kwargs['ranges'], N_MAX_POW=kwargs['N_MAX_POW'], generate_plot=True) self.CW().param('eq').setValue(eq_str) self._PLOT_REQUESTED = False return {'params': df_out, 'f(t)': function}
def process(self, W_peaks, GW_peaks): N_md = '?' df_w = W_peaks df_gw = GW_peaks colname = [col for col in df_w.columns if isNumpyDatetime(df_w[col].dtype)] self._ctrlWidget.param('Closest Time', 'Match Column').setLimits(colname) kwargs = self._ctrlWidget.prepareInputArguments() with BusyCursor(): mode = kwargs.pop('Match Option') if mode == 'Closest Time': matched_peaks = match_peaks(df_w, df_gw, kwargs.pop('Match Column'), **kwargs) N_md = matched_peaks['md_N'].count() self._ctrlWidget.param('MATCHED/PEAKS').setValue('{0}/{1}'.format(N_md, len(df_w))) return {'matched': matched_peaks}
def on_pushButton_viewPlot_clicked(self): """ open nice graphic representation of our data""" with BusyCursor(): try: df = self.parent().getPandasDataModel().df columns = self.parent().getPandasHeaderModel().selectedColumns( ) #consider only the selected columns datetime_cols = [ col for col in columns if isNumpyDatetime(df[col].dtype) ] numeric_cols = [ col for col in columns if isNumpyNumeric(df[col].dtype) ] datetime_col = datetime_cols[0] if len( datetime_cols ) > 0 else None #plot with x=datetime if possible if self.checkBox_separateSubplots.isChecked( ) and len(numeric_cols) > 1: ''' Do the plotting of each selected numerical column on an individual subplot ''' f, axes = plt.subplots(len(numeric_cols), sharex=True) for ax, numeric_col in zip(axes, numeric_cols): df.plot(x=datetime_col, y=numeric_col, ax=ax) legend = ax.legend(shadow=True) # Fine-tune figure; make subplots close to each other and hide x ticks for all but bottom plot. #f.subplots_adjust(hspace=0) plt.setp([a.get_xticklabels() for a in f.axes[:-1]], visible=False) else: ''' Plot all selected numerical columns together on a single subplot ''' f, ax = plt.subplots(1) for numeric_col in numeric_cols: df.plot(x=datetime_col, y=numeric_col, ax=ax) legend = ax.legend(shadow=True) f.show() except Exception as exp: self._parent.setException(exp) return
def process(self, In): df = In self._ctrlWidget.param('Period Check Params', 'Warnings').setValue('?') colname = [col for col in df.columns if isNumpyNumeric(df[col].dtype)] self._ctrlWidget.param('column').setLimits(colname) colname = [col for col in df.columns if isNumpyDatetime(df[col].dtype)] self._ctrlWidget.param('datetime').setLimits(colname) kwargs = self._ctrlWidget.prepareInputArguments() with BusyCursor(): peaks = detectPeaks_ts(df, kwargs.pop('column'), plot=self._plotRequired, **kwargs) self._ctrlWidget.param('Period Check Params', 'Warnings').setValue( str(len(peaks[peaks['check'] == False]))) return {'peaks': peaks}
def process(self, W_peaks, GW_peaks): N_md = '?' df_w = W_peaks df_gw = GW_peaks colname = [ col for col in df_w.columns if isNumpyDatetime(df_w[col].dtype) ] self._ctrlWidget.param('Closest Time', 'Match Column').setLimits(colname) kwargs = self._ctrlWidget.prepareInputArguments() with BusyCursor(): mode = kwargs.pop('Match Option') if mode == 'Closest Time': matched_peaks = match_peaks(df_w, df_gw, kwargs.pop('Match Column'), **kwargs) N_md = matched_peaks['md_N'].count() self._ctrlWidget.param('MATCHED/PEAKS').setValue('{0}/{1}'.format( N_md, len(df_w))) return {'matched': matched_peaks}
def process(self, coord, data): if data is not None: colname = [col for col in data.columns if isNumpyDatetime(data[col].dtype)] self._ctrlWidget.param('Datetime').setLimits(colname) self.data = data else: self.data = None return dict(this=None, All=self.All_out) if coord is not None: colname = [col for col in coord.columns if isNumpyNumeric(coord[col].dtype)] self._ctrlWidget.param('coords_grp', 'x').setLimits(colname) self._ctrlWidget.param('coords_grp', 'y').setLimits(colname) self.CW().disconnect_valueChanged2upd(self.CW().param('coords_grp', 'x')) self.CW().disconnect_valueChanged2upd(self.CW().param('coords_grp', 'y')) self.CW().param('coords_grp', 'x').setValue(colname[0]) self.CW().param('coords_grp', 'y').setValue(colname[1]) self.CW().connect_valueChanged2upd(self.CW().param('coords_grp', 'x')) self.CW().connect_valueChanged2upd(self.CW().param('coords_grp', 'y')) else: return dict(this=None, All=self.All_out) # now make sure All well specified in `coord` dataframe are found in `data` well_names = coord.index.values for well_n in well_names: if well_n not in data.columns: raise ValueError('Well named `{0}` not found in `data` but is declared in `coords`'.format(well_n)) kwargs = self.ctrlWidget().prepareInputArguments() # select row whith user-specified datetime `timestep` row = data.loc[data[kwargs['datetime']] == kwargs['t']] if row.empty: raise IndexError('Selected timestep `{0}` not found in `data`s column {1}. Select correct one'.format(kwargs['t'], kwargs['datetime'])) # now prepare dataframe for devlin calculations df = coord.copy() df['z'] = np.zeros(len(df.index)) for well_n in well_names: df.loc[well_n, 'z'] = float(row[well_n]) gradient, direction = devlin2003pandas(df, kwargs['x'], kwargs['y'], 'z') self.CW().param('grad').setValue(gradient) self.CW().param('angle').setValue(direction) # here we will generate large dataset of all timesteps if self.CW().CALCULATE_ALL: # now generate long dataframe All = pd.DataFrame({kwargs['datetime']: data[kwargs['datetime']], 'gradient': np.zeros(len(data.index)), 'direction(degrees North)': np.zeros(len(data.index))} ) self.All_out = All # pointer with pg.ProgressDialog("Calculating gradient for All timesteps {0}".format(len(All.index)), 0, len(All.index)) as dlg: for row_i in data.index: row = data.loc[row_i] z = np.zeros(len(coord.index)) for i, well_n in enumerate(well_names): z[i] = float(row[well_n]) x = coord[kwargs['x']].values y = coord[kwargs['y']].values _, gradient, angle = devlin2003(np.matrix([x, y, z]).T) All.loc[row_i, 'gradient'] = gradient All.loc[row_i, 'direction(degrees North)'] = angle2bearing(angle, origin='N')[0] dlg += 1 del z if dlg.wasCanceled(): del All self.All_out = None break #return dict(df=df, All=self.All_out) dlg += 1 return dict(this=df, All=self.All_out)
def process(self, df, md_peaks): E = None self.CW().param('E = ').setValue(str(E)) self.CW().param('gw').setWritable(True) if df is not None: for name in ['river', 'gw', 'datetime']: self.CW().disconnect_valueChanged2upd(self.CW().param(name)) colname = [ col for col in df.columns if isNumpyNumeric(df[col].dtype) ] self.CW().param('river').setLimits(colname) self.CW().param('gw').setLimits(colname) colname = [ col for col in df.columns if isNumpyDatetime(df[col].dtype) ] self.CW().param('datetime').setLimits(colname) for name in ['river', 'gw', 'datetime']: self.CW().connect_valueChanged2upd(self.CW().param(name)) kwargs = self.ctrlWidget().prepareInputArguments() if kwargs['method'] == '1) STD': E = tidalEfficiency_method1(df, kwargs['river'], kwargs['gw']) E_c = None elif kwargs['method'] == '2) Cyclic amplitude' or kwargs[ 'method'] == '3) Cyclic STD': if md_peaks is None: msg = 'To use method `{0}` please provide "matched-peaks" data in terminal `md_peaks` (a valid data-set can be created with node `Match Peaks`)'.format( kwargs['method']) QtGui.QMessageBox.warning( None, "Node: {0}".format(self.nodeName), msg) raise ValueError(msg) self.CW().disconnect_valueChanged2upd(self.CW().param('gw')) self.CW().param('gw').setWritable(False) self.CW().param('gw').setLimits(['see matched peaks']) self.CW().connect_valueChanged2upd(self.CW().param('gw')) mPeaks_slice = md_peaks.loc[~md_peaks['md_N'].isin( [np.nan, None])] # select only valid cycles if kwargs['method'] == '2) Cyclic amplitude': E, E_cyclic = tidalEfficiency_method2( mPeaks_slice['tidal_range'], mPeaks_slice['md_tidal_range']) elif kwargs['method'] == '3) Cyclic STD': with BusyCursor(): river_name = mPeaks_slice['name'][0] well_name = mPeaks_slice['md_name'][0] E, E_cyclic = tidalEfficiency_method3( df, river_name, well_name, kwargs['datetime'], mPeaks_slice['time_min'], mPeaks_slice['time_max'], mPeaks_slice['md_time_min'], mPeaks_slice['md_time_max']) # now do nice output table E_c = pd.DataFrame({ 'N': mPeaks_slice['N'], 'md_N': mPeaks_slice['md_N'], 'E_cyclic': E_cyclic, }) else: raise Exception('Method <%s> is not yet implemented' % kwargs['method']) self.CW().param('E = ').setValue('{0:.4f}'.format(E)) return {'E': E, 'E_cyclic': E_c}
def process(self, In): df = In self.CW().param('check_grp', 'MIN_grp', 'warn').setValue('?') self.CW().param('check_grp', 'MAX_grp', 'warn').setValue('?') self.CW().param('check_grp', 'ALL_grp', 'warn').setValue('?') self.CW().param('check_grp', 'warn_sum').setValue('?') self.CW().param('out_grp', 'raw_nmin').setValue('?') self.CW().param('out_grp', 'raw_nmax').setValue('?') self.CW().param('out_grp', 'raw_n_all').setValue('?') self.CW().param('out_grp', 'n_cycles').setValue('?') self.CW().param('Peak Detection Params', 'order').setValue('?') if df is None: return {'raw': None, 'peaks': None} colname = [col for col in df.columns if isNumpyNumeric(df[col].dtype)] self.CW().param('column').setLimits(colname) colname = [col for col in df.columns if isNumpyDatetime(df[col].dtype)] self.CW().param('datetime').setLimits(colname) kwargs = self.CW().prepareInputArguments() kwargs['split'] = True with BusyCursor(): kwargs['order'] = prepare_order( kwargs['T'], kwargs['hMargin'], prepare_datetime(df, datetime=kwargs['datetime'])) self.CW().param('Peak Detection Params', 'order').setValue(str(kwargs['order'])) #peaks = detectPeaks_ts(df, kwargs.pop('column'), plot=self._plotRequired, **kwargs) extra, raw, peaks = full_peak_detection_routine( df, col=kwargs.pop('column'), date_col=kwargs.pop('datetime'), IDs2mask=kwargs.pop('IDs2mask'), valid_range=kwargs.pop('valid_range'), plot=self._plotRequired, **kwargs) n_warn_min = len(extra['warnings']['MIN']) n_warn_max = len(extra['warnings']['MAX']) n_warn_all = len(extra['warnings']['ALL']) self.CW().param('check_grp', 'MIN_grp', 'warn').setValue(n_warn_min) self.CW().param('check_grp', 'MAX_grp', 'warn').setValue(n_warn_max) self.CW().param('check_grp', 'ALL_grp', 'warn').setValue(n_warn_all) self.CW().param('check_grp', 'warn_sum').setValue(n_warn_min + n_warn_max + n_warn_all) self.CW().param('out_grp', 'raw_nmin').setValue(extra['raw_nmin']) self.CW().param('out_grp', 'raw_nmax').setValue(extra['raw_nmax']) if raw is not None: self.CW().param('out_grp', 'raw_n_all').setValue(len(raw.index)) if peaks is not None: self.CW().param('out_grp', 'n_cycles').setValue(len(peaks.index)) return {'raw': raw, 'peaks': peaks}
def filter_wl_71h_serfes1991(data, datetime=None, N=None, usecols=None, keep_origin=True, verbose=False, log=False): """ Calculate mean water-level according to Serfes1991. Perform a column-wise time averaging in three iterations. 1) The first sequence averages 24 hours of measurements 2) The second sequence averages 24 hours of first sequence 3) The third sequence averages all values of second sequence that were generated when the filter was applied to 71h This function is a modified version of original Serfes filter: it is not limited to hourly measurements. Args: data (pd.DataFrame): input data, where indexes are Datetime objects, see `parse_dates` parameters of `pd.read_csv()` datetime (Optional[str]): Location of the datetime objects. By default is `None`, meaning that datetime objects are located within `pd.DataFrame.index`. If not `None` - pass the column-name of dataframe where datetime objects are located. This is needed to determine number of measurements per day. Note: this argument is ignored if `N` is not `None` !!! N (Optional[int]): explicit number of measurements in 24 hours. By default `N=None`, meaning that script will try to determine number of measurements per 24 hours based on real datetime information provided with `datetime` argument. usecols (Optional[List[str]]): explicitly pass the name of the columns that will be evaluated. These columns must have numerical dtype (i.e. int32, int64, float32, float64). Default value is `None` meaning that all numerical columns will be processed. keep_origin (Optional[bool]): if `True` - will keep original columns in the output dataframe. If `False` - will return dataframe which has only results columns and original DateTime columns verbose (Optional[bool]): if `True` - will keep all three iterations in the output. If `False` - will save only final (3rd) iteration. This may useful for debugging, or checking this filter. log (Optional[bool]): flag to show some prints in console Returns: data (pd.DataFrame): input dataframe with appended time-averaged values. these values are appended into new columns """ n = N # for compatibility with thesis description # if convert all columns... if usecols is None: # select only numeric columns... numeric_columns = [col for col in data.columns if isNumpyNumeric(data[col].dtype)] # or covert only user defined columns.... else: # select only numeric columns... numeric_columns = [col for col in data.columns if (isNumpyNumeric(data[col].dtype) and col in usecols)] # if user has not explicitly passed number of measurements in a day, find it out! if n is None: n = get_number_of_measurements_per_day(data, datetime=datetime, log=log) if log: print("All column names:", list(data.columns)) print("Numeric colums:", numeric_columns) print("i will use following number of entries per day: ", n) if keep_origin: output = data else: output = pd.DataFrame() # copy datetime columns datetime_columns = [col for col in data.columns if isNumpyDatetime(data[col].dtype)] for col in datetime_columns: output[col] = data[col] nX = int(n / 24.0 * 71 - (n - 1)) # number of elements in sequence_1 nY = nX - (n - 1) # number of elements in sequence_2 # print (n, nX, nY) for col_name in numeric_columns: if float(".".join(pd.__version__.split(".")[0:2])) < 0.18: # if version is less then 0.18 (OLD API) output[col_name + "_sequence1"] = pd.rolling_mean( data[col_name], window=n, min_periods=n, center=True ).values output[col_name + "_sequence2"] = pd.rolling_mean( output[col_name + "_sequence1"], window=n, min_periods=n, center=True ).values output[col_name + "_mean"] = pd.rolling_mean( output[col_name + "_sequence2"], window=nY, min_periods=nY, center=True ).values else: # new API output[col_name + "_sequence1"] = data[col_name].rolling(window=n, min_periods=n, center=True).mean().values output[col_name + "_sequence2"] = ( output[col_name + "_sequence1"].rolling(window=n, min_periods=n, center=True).mean().values ) output[col_name + "_mean"] = ( output[col_name + "_sequence2"].rolling(window=nY, min_periods=nY, center=True).mean().values ) if not verbose: del output[col_name + "_sequence1"] if not verbose: del output[col_name + "_sequence2"] gc.collect() return output
def filter_wl_71h_serfes1991(data, datetime=None, N=None, usecols=None, keep_origin=True, verbose=False, log=False): ''' Calculate mean water-level according to Serfes1991. Perform a column-wise time averaging in three iterations. 1) The first sequence averages 24 hours of measurements 2) The second sequence averages 24 hours of first sequence 3) The third sequence averages all values of second sequence that were generated when the filter was applied to 71h This function is a modified version of original Serfes filter: it is not limited to hourly measurements. Args: data (pd.DataFrame): input data, where indexes are Datetime objects, see `parse_dates` parameters of `pd.read_csv()` datetime (Optional[str]): Location of the datetime objects. By default is `None`, meaning that datetime objects are located within `pd.DataFrame.index`. If not `None` - pass the column-name of dataframe where datetime objects are located. This is needed to determine number of measurements per day. Note: this argument is ignored if `N` is not `None` !!! N (Optional[int]): explicit number of measurements in 24 hours. By default `N=None`, meaning that script will try to determine number of measurements per 24 hours based on real datetime information provided with `datetime` argument. usecols (Optional[List[str]]): explicitly pass the name of the columns that will be evaluated. These columns must have numerical dtype (i.e. int32, int64, float32, float64). Default value is `None` meaning that all numerical columns will be processed. keep_origin (Optional[bool]): if `True` - will keep original columns in the output dataframe. If `False` - will return dataframe which has only results columns and original DateTime columns verbose (Optional[bool]): if `True` - will keep all three iterations in the output. If `False` - will save only final (3rd) iteration. This may useful for debugging, or checking this filter. log (Optional[bool]): flag to show some prints in console Returns: data (pd.DataFrame): input dataframe with appended time-averaged values. these values are appended into new columns ''' n = N # for compatibility with thesis description # if convert all columns... if usecols is None: # select only numeric columns... numeric_columns = [ col for col in data.columns if isNumpyNumeric(data[col].dtype) ] # or covert only user defined columns.... else: # select only numeric columns... numeric_columns = [ col for col in data.columns if (isNumpyNumeric(data[col].dtype) and col in usecols) ] #if user has not explicitly passed number of measurements in a day, find it out! if n is None: n = get_number_of_measurements_per_day(data, datetime=datetime, log=log) if log: print('All column names:', list(data.columns)) print('Numeric colums:', numeric_columns) print('i will use following number of entries per day: ', n) if keep_origin: output = data else: output = pd.DataFrame() #copy datetime columns datetime_columns = [ col for col in data.columns if isNumpyDatetime(data[col].dtype) ] for col in datetime_columns: output[col] = data[col] nX = int(n / 24. * 71 - (n - 1)) # number of elements in sequence_1 nY = nX - (n - 1) # number of elements in sequence_2 #print (n, nX, nY) for col_name in numeric_columns: if float('.'.join(pd.__version__.split( '.')[0:2])) < 0.18: # if version is less then 0.18 (OLD API) output[col_name + '_sequence1'] = pd.rolling_mean( data[col_name], window=n, min_periods=n, center=True).values output[col_name + '_sequence2'] = pd.rolling_mean( output[col_name + '_sequence1'], window=n, min_periods=n, center=True).values output[col_name + '_mean'] = pd.rolling_mean(output[col_name + '_sequence2'], window=nY, min_periods=nY, center=True).values else: # new API output[col_name + '_sequence1'] = data[col_name].rolling( window=n, min_periods=n, center=True).mean().values output[col_name + '_sequence2'] = output[col_name + '_sequence1'].rolling( window=n, min_periods=n, center=True).mean().values output[col_name + '_mean'] = output[col_name + '_sequence2'].rolling( window=nY, min_periods=nY, center=True).mean().values if not verbose: del output[col_name + '_sequence1'] if not verbose: del output[col_name + '_sequence2'] gc.collect() return output
def process(self, pattern, pickFrom): df1 = pickFrom df2 = pattern self.CW().disconnect_valueChanged2upd(self.CW().param( 'slice', 'Start')) self.CW().disconnect_valueChanged2upd(self.CW().param('slice', 'End')) if self.CW().p['slice'] is True: self.CW().connect_valueChanged2upd(self.CW().param( 'slice', 'Start')) self.CW().connect_valueChanged2upd(self.CW().param('slice', 'End')) if df1 is None: self.CW().disconnect_valueChanged2upd( self.CW().param('datetime <pickFrom>')) self.CW().param('datetime <pickFrom>').setLimits([None]) self.CW().connect_valueChanged2upd( self.CW().param('datetime <pickFrom>')) return {'Out': None} else: self.CW().disconnect_valueChanged2upd( self.CW().param('datetime <pickFrom>')) colname = [ col for col in df1.columns if isNumpyDatetime(df1[col].dtype) ] self.CW().param('datetime <pickFrom>').setLimits(colname) self.CW().connect_valueChanged2upd( self.CW().param('datetime <pickFrom>')) if df2 is None: self.CW().disconnect_valueChanged2upd( self.CW().param('datetime <pattern>')) self.CW().param('datetime <pattern>').setLimits([None]) self.CW().connect_valueChanged2upd( self.CW().param('datetime <pattern>')) else: self.CW().disconnect_valueChanged2upd( self.CW().param('datetime <pattern>')) colname = [ col for col in df2.columns if isNumpyDatetime(df2[col].dtype) ] self.CW().param('datetime <pattern>').setLimits(colname) self.CW().connect_valueChanged2upd( self.CW().param('datetime <pattern>')) if self._df1_id != id(df1): self._df1_id = id(df1) t_vals = df1[self.CW().p['datetime <pickFrom>']].values t_min, t_max = pd.to_datetime(str(min(t_vals))), pd.to_datetime( str(max(t_vals))) self.CW().param('slice', 'Start').setDefault( t_min.strftime('%Y-%m-%d %H:%M:%S')) self.CW().param('slice', 'End').setDefault( t_max.strftime('%Y-%m-%d %H:%M:%S')) kwargs = self.ctrlWidget().prepareInputArguments() # now actually slice if kwargs['slice']: df = df1.set_index(kwargs['datetime <pickFrom>']) start = df.index.searchsorted(kwargs['slice_start'], side='left') end = df.index.searchsorted(kwargs['slice_end'], side='right') del df df1 = df1[start:end].copy(deep=True) # warning pointer to new DF! # now pick dates as in another df if kwargs['datetime <pattern>'] is not None and kwargs[ 'datetime <pickFrom>'] is not None: selector = df1[kwargs['datetime <pickFrom>']].isin( df2[kwargs['datetime <pattern>']]) df1 = df1[selector] gc.collect() return {'Out': df1}
def process(self, pattern, pickFrom): df1 = pickFrom df2 = pattern self.CW().disconnect_valueChanged2upd(self.CW().param('slice', 'Start')) self.CW().disconnect_valueChanged2upd(self.CW().param('slice', 'End')) if self.CW().p['slice'] is True: self.CW().connect_valueChanged2upd(self.CW().param('slice', 'Start')) self.CW().connect_valueChanged2upd(self.CW().param('slice', 'End')) if df1 is None: self.CW().disconnect_valueChanged2upd(self.CW().param('datetime <pickFrom>')) self.CW().param('datetime <pickFrom>').setLimits([None]) self.CW().connect_valueChanged2upd(self.CW().param('datetime <pickFrom>')) return {'Out': None} else: self.CW().disconnect_valueChanged2upd(self.CW().param('datetime <pickFrom>')) colname = [col for col in df1.columns if isNumpyDatetime(df1[col].dtype)] self.CW().param('datetime <pickFrom>').setLimits(colname) self.CW().connect_valueChanged2upd(self.CW().param('datetime <pickFrom>')) if df2 is None: self.CW().disconnect_valueChanged2upd(self.CW().param('datetime <pattern>')) self.CW().param('datetime <pattern>').setLimits([None]) self.CW().connect_valueChanged2upd(self.CW().param('datetime <pattern>')) else: self.CW().disconnect_valueChanged2upd(self.CW().param('datetime <pattern>')) colname = [col for col in df2.columns if isNumpyDatetime(df2[col].dtype)] self.CW().param('datetime <pattern>').setLimits(colname) self.CW().connect_valueChanged2upd(self.CW().param('datetime <pattern>')) if self._df1_id != id(df1): self._df1_id = id(df1) t_vals = df1[self.CW().p['datetime <pickFrom>']].values t_min, t_max = pd.to_datetime(str(min(t_vals))), pd.to_datetime(str(max(t_vals))) self.CW().param('slice', 'Start').setDefault(t_min.strftime('%Y-%m-%d %H:%M:%S')) self.CW().param('slice', 'End').setDefault(t_max.strftime('%Y-%m-%d %H:%M:%S')) kwargs = self.ctrlWidget().prepareInputArguments() # now actually slice if kwargs['slice']: df = df1.set_index(kwargs['datetime <pickFrom>']) start = df.index.searchsorted(kwargs['slice_start'], side='left') end = df.index.searchsorted(kwargs['slice_end'], side='right') del df df1 = df1[start:end].copy(deep=True) # warning pointer to new DF! # now pick dates as in another df if kwargs['datetime <pattern>'] is not None and kwargs['datetime <pickFrom>'] is not None: selector = df1[kwargs['datetime <pickFrom>']].isin(df2[kwargs['datetime <pattern>']]) df1 = df1[selector] gc.collect() return {'Out': df1}
def process(self, coord, data): if data is not None: colname = [ col for col in data.columns if isNumpyDatetime(data[col].dtype) ] self._ctrlWidget.param('Datetime').setLimits(colname) self.data = data else: self.data = None return dict(this=None, All=self.All_out) if coord is not None: colname = [ col for col in coord.columns if isNumpyNumeric(coord[col].dtype) ] self._ctrlWidget.param('coords_grp', 'x').setLimits(colname) self._ctrlWidget.param('coords_grp', 'y').setLimits(colname) self.CW().disconnect_valueChanged2upd(self.CW().param( 'coords_grp', 'x')) self.CW().disconnect_valueChanged2upd(self.CW().param( 'coords_grp', 'y')) self.CW().param('coords_grp', 'x').setValue(colname[0]) self.CW().param('coords_grp', 'y').setValue(colname[1]) self.CW().connect_valueChanged2upd(self.CW().param( 'coords_grp', 'x')) self.CW().connect_valueChanged2upd(self.CW().param( 'coords_grp', 'y')) else: return dict(this=None, All=self.All_out) # now make sure All well specified in `coord` dataframe are found in `data` well_names = coord.index.values for well_n in well_names: if well_n not in data.columns: raise ValueError( 'Well named `{0}` not found in `data` but is declared in `coords`' .format(well_n)) kwargs = self.ctrlWidget().prepareInputArguments() # select row whith user-specified datetime `timestep` row = data.loc[data[kwargs['datetime']] == kwargs['t']] if row.empty: raise IndexError( 'Selected timestep `{0}` not found in `data`s column {1}. Select correct one' .format(kwargs['t'], kwargs['datetime'])) # now prepare dataframe for devlin calculations df = coord.copy() df['z'] = np.zeros(len(df.index)) for well_n in well_names: df.loc[well_n, 'z'] = float(row[well_n]) gradient, direction = devlin2003pandas(df, kwargs['x'], kwargs['y'], 'z') self.CW().param('grad').setValue(gradient) self.CW().param('angle').setValue(direction) # here we will generate large dataset of all timesteps if self.CW().CALCULATE_ALL: # now generate long dataframe All = pd.DataFrame({ kwargs['datetime']: data[kwargs['datetime']], 'gradient': np.zeros(len(data.index)), 'direction(degrees North)': np.zeros(len(data.index)) }) self.All_out = All # pointer with pg.ProgressDialog( "Calculating gradient for All timesteps {0}".format( len(All.index)), 0, len(All.index)) as dlg: for row_i in data.index: row = data.loc[row_i] z = np.zeros(len(coord.index)) for i, well_n in enumerate(well_names): z[i] = float(row[well_n]) x = coord[kwargs['x']].values y = coord[kwargs['y']].values _, gradient, angle = devlin2003(np.matrix([x, y, z]).T) All.loc[row_i, 'gradient'] = gradient All.loc[row_i, 'direction(degrees North)'] = angle2bearing( angle, origin='N')[0] dlg += 1 del z if dlg.wasCanceled(): del All self.All_out = None break #return dict(df=df, All=self.All_out) dlg += 1 return dict(this=df, All=self.All_out)