def process(self, A, B): if A is None: del self.df self.df = None return {'Out': self.df} else: self.CW().disconnect_valueChanged2upd(self.CW().param('a')) cols_A = [col for col in A.columns if isNumpyNumeric(A[col].dtype)] self.CW().param('a').setLimits(cols_A) self.CW().connect_valueChanged2upd(self.CW().param('a')) if B is not None: if len(A) != len(B): raise ValueError('Number of rows in both DataFrames must be equal. A has {0} rows, B has {1} rows'.format(len(A), len(B))) self.CW().disconnect_valueChanged2upd(self.CW().param('b')) cols_B = [None]+[col for col in B.columns if isNumpyNumeric(B[col].dtype)] self.CW().param('b').setLimits(cols_B) self.CW().connect_valueChanged2upd(self.CW().param('b')) kwargs = self.CW().prepareInputArguments() # ------------------------------------------------------ del self.df self.df = A.copy(deep=True) #maybe need to use deepcopy # actually do add operation if kwargs['b'] is None: self.df[kwargs['a']] = self.df[kwargs['a']] / kwargs['c'] else: self.df[kwargs['a']] = self.df[kwargs['a']] / B[kwargs['b']] / kwargs['c'] return {'Out': self.df}
def process(self, A, B): if A is None: self.CW().param('a').setLimits([]) del self.df self.df = None return {'Out': self.df} else: #self.CW().disconnect_valueChanged2upd(self.CW().param('a')) cols_A = [col for col in A.columns if isNumpyNumeric(A[col].dtype)] if self.CW().param('a').opts['limits'] != cols_A: self.CW().param('a').setLimits(cols_A) #self.CW().connect_valueChanged2upd(self.CW().param('a')) if B is not None: if len(A) != len(B): raise ValueError('Number of rows in both DataFrames must be equal. A has {0} rows, B has {1} rows'.format(len(A), len(B))) #self.CW().disconnect_valueChanged2upd(self.CW().param('b')) cols_B = [SPACE]+[col for col in B.columns if isNumpyNumeric(B[col].dtype)] if self.CW().param('b').opts['limits'] != cols_B: #print self.CW().param('b'), '>>>', self.CW().param('b').value() #print self.CW().param('b'), '>>>', self.CW().param('b').opts['limits'] cached = self.CW().param('b').value() self.CW().param('b').setLimits(cols_B) #print self.CW().param('b'), '>>>', self.CW().param('b').value() #print self.CW().param('b'), '>>>', self.CW().param('b').opts['limits'] self.CW().param('b').setValue(cached) #print self.CW().param('b'), '>>>', self.CW().param('b').value() #print self.CW().param('b'), '>>>', self.CW().param('b').opts['limits'] #self.CW().connect_valueChanged2upd(self.CW().param('b')) else: self.CW().param('b').setLimits([]) kwargs = self.CW().prepareInputArguments() # ------------------------------------------------------ del self.df self.df = A.copy(deep=True) #maybe need to use deepcopy # actually do add operation if kwargs['b'] in [SPACE, None, '']: self.df[kwargs['a']] += kwargs['c'] else: self.df[kwargs['a']] = self.df[kwargs['a']] + B[kwargs['b']] + kwargs['c'] return {'Out': self.df}
def process(self, In): gc.collect() # populate USE COLUMNS param, but only on item received, not when we click button if not self._ctrlWidget.calculateNAllowed() and not self._ctrlWidget.applyAllowed(): self._ctrlWidget.param('Apply to columns').clearChildren() with BusyCursor(): df = copy.deepcopy(In) # check out http://docs.scipy.org/doc/numpy-dev/neps/datetime-proposal.html colnames = [col for col in df.columns if isNumpyDatetime(df[col].dtype)]+[None] self._ctrlWidget.param('datetime').setLimits(colnames) self._ctrlWidget.param('datetime').setValue(colnames[0]) # populate (Apply to columns) param, but only on item received, not when we click button if not self._ctrlWidget.calculateNAllowed() and not self._ctrlWidget.applyAllowed(): colnames = [col for col in df.columns if isNumpyNumeric(df[col].dtype)] for col_name in colnames: # cycle through each column... self._ctrlWidget.param('Apply to columns').addChild({'name': col_name, 'type': 'bool', 'value': True}) kwargs = self.ctrlWidget().prepareInputArguments() if self._ctrlWidget.calculateNAllowed(): N = serfes.get_number_of_measurements_per_day(df, datetime=kwargs['datetime'], log=kwargs['log']) self._ctrlWidget.param('N').setValue(N) if self._ctrlWidget.applyAllowed(): if kwargs['N'] in [None, '']: QtGui.QMessageBox.warning(None, "Node: {0}".format(self.nodeName), 'First set number of measurements per day in parameter `N`') raise ValueError('First set number of measurements per day in parameter `N`') result = serfes.filter_wl_71h_serfes1991(df, **kwargs) return {'Out': result}
def on_tides_received(self, tides): ''' Populate nessesary widgets whith the information from the DataFrame Args: tides (pd.DataFrame): Specially designed dataframe. See docs ''' self.on_update_disconnect() # disconnect valueChanged signal to prevent new updates self.le_n_tides.setText(str(len(tides)-1)) colnames = [col for col in tides.columns if isNumpyNumeric(tides[col].dtype)] self.cb_tides_A.addItems(colnames) self.cb_tides_omega.addItems(colnames) self.cb_tides_phi.addItems(colnames) self.cb_tides_A.setCurrentIndex(0) self.cb_tides_omega.setCurrentIndex(1) self.cb_tides_phi.setCurrentIndex(2) W = tides[self.cb_tides_A.currentText()][0] # 1st value from column `A` self.sb_main_const.setValue(W) self.on_update_connect() # reconnect valueChanged signal
def on_tides_received(self, tides): ''' Populate nessesary widgets whith the information from the DataFrame Args: tides (pd.DataFrame): Specially designed dataframe. See docs ''' self.on_update_disconnect( ) # disconnect valueChanged signal to prevent new updates self.le_n_tides.setText(str(len(tides) - 1)) colnames = [ col for col in tides.columns if isNumpyNumeric(tides[col].dtype) ] self.cb_tides_A.addItems(colnames) self.cb_tides_omega.addItems(colnames) self.cb_tides_phi.addItems(colnames) self.cb_tides_A.setCurrentIndex(0) self.cb_tides_omega.setCurrentIndex(1) self.cb_tides_phi.setCurrentIndex(2) W = tides[self.cb_tides_A.currentText()][ 0] # 1st value from column `A` self.sb_main_const.setValue(W) self.on_update_connect() # reconnect valueChanged signal
def process(self, df): if df is None: del self.item self.item = None return {'Curve': None, 'pd.Series': None } if self.item is None: self.item = PlotDataItem(clipToView=False) colname = [col for col in df.columns if isNumpyNumeric(df[col].dtype)] self._ctrlWidget.param('Y:signal').setLimits(colname) colname = [col for col in df.columns if isNumpyDatetime(df[col].dtype)] self._ctrlWidget.param('X:datetime').setLimits(colname) with BusyCursor(): kwargs = self.ctrlWidget().prepareInputArguments() #self.item = PlotDataItem(clipToView=False) t = df[kwargs['X:datetime']].values # part 1 timeSeries = pd.DataFrame(data=df[kwargs['Y:signal']].values, index=t, columns=[kwargs['Y:signal']]) # part 2 # convert time b = t.astype(np.dtype('datetime64[s]')) timeStamps = b.astype(np.int64)-kwargs['tz correct']*60*60+time.timezone # now create curve pen = fn.mkPen(color=kwargs['color'], width=kwargs['width'], style=kwargs['style']) self.item.setData(timeStamps, df[kwargs['Y:signal']].values, pen=pen, name=kwargs['Y:signal']) self.item.setSymbol(kwargs['symbol']) if kwargs['symbol'] is not None: self.item.setSymbolPen(kwargs['color']) self.item.setSymbolBrush(kwargs['color']) self.item.setSymbolSize(kwargs['symbolSize']) return {'Curve': self.item, 'pd.Series': timeSeries }
def on_pushButton_viewPlot_clicked(self): """ open nice graphic representation of our data""" with BusyCursor(): try: df = self.parent().getPandasDataModel().df columns = self.parent().getPandasHeaderModel().selectedColumns() #consider only the selected columns datetime_cols = [col for col in columns if isNumpyDatetime(df[col].dtype)] numeric_cols = [col for col in columns if isNumpyNumeric (df[col].dtype)] datetime_col = datetime_cols[0] if len(datetime_cols) > 0 else None #plot with x=datetime if possible if self.checkBox_separateSubplots.isChecked() and len(numeric_cols) > 1: ''' Do the plotting of each selected numerical column on an individual subplot ''' f, axes = plt.subplots(len(numeric_cols), sharex=True) for ax, numeric_col in zip(axes, numeric_cols): df.plot(x=datetime_col, y=numeric_col, ax=ax) legend = ax.legend(shadow=True) # Fine-tune figure; make subplots close to each other and hide x ticks for all but bottom plot. #f.subplots_adjust(hspace=0) plt.setp([a.get_xticklabels() for a in f.axes[:-1]], visible=False) else: ''' Plot all selected numerical columns together on a single subplot ''' f, ax = plt.subplots(1) for numeric_col in numeric_cols: df.plot(x=datetime_col, y=numeric_col, ax=ax) legend = ax.legend(shadow=True) f.show() except Exception as exp: self._parent.setException(exp) return
def process(self, In): df = In if df is not None: # when we recieve a new dataframe into terminal - update possible selection list if not self._ctrlWidget.plotAllowed(): colname = [ col for col in df.columns if isNumpyNumeric(df[col].dtype) ] self._ctrlWidget.param('Bearing').setLimits(colname) column = self._ctrlWidget.param('Bearing').value() Max = df[column].max() Min = df[column].min() MU = df[column].mean() ME = df[column].median() STD = df[column].std() NBins = self._ctrlWidget.param('Bins').value() self._ctrlWidget.param('Max').setValue('{0:.3f}'.format(Max)) self._ctrlWidget.param('Min').setValue('{0:.3f}'.format(Min)) self._ctrlWidget.param('Mean').setValue('{0:.3f}'.format(MU)) self._ctrlWidget.param('Median').setValue('{0:.3f}'.format(ME)) self._ctrlWidget.param('STD').setValue('{0:.3f}'.format(STD)) self._ctrlWidget.param('Bin Width').setValue('{0:.3f}'.format( 360. / float(NBins))) if self._ctrlWidget.plotAllowed(): kwargs = self.ctrlWidget().prepareInputArguments() # check that data is in range [0 to 360] MIN = df[kwargs['Bearing']].min() MAX = df[kwargs['Bearing']].max() if MAX > 360. or MAX < 0. or MIN < 0. or MIN > 360.: msg = 'Note that the data you are going to plot is most likely not the directions, since the MIN={0} and MAX={1}. It is expected that the data-values fall in range [0:360].\nThe figure will be plotted anyway. Continuing...'.format( MIN, MAX) QtGui.QMessageBox.warning( None, "Node: {0}".format(self.nodeName), msg) plot_direction_rose( df, COLNAME=kwargs['Bearing'], N=kwargs['Bins'], RELATIVE_HIST=kwargs['relative_hist'], PLOT_LINE_SUBPLOT=kwargs['add_l_plt'], R_FONTSIZE='x-small', THETA_DIRECTION=kwargs['theta_dir'], THETA_ZERO_LOCATION=kwargs['theta_0'], THETA_GRIDS_VALUES=kwargs['theta_grid_values'], THETA_GRIDS_LABELS=kwargs['theta_grid_labels'], R_LABEL_POS=kwargs['r_label_pos'], BOTTOM=kwargs['bottom']) else: self._ctrlWidget.param('Bearing').setLimits(['']) self._ctrlWidget.param('Max').setValue('') self._ctrlWidget.param('Min').setValue('') self._ctrlWidget.param('Mean').setValue('') self._ctrlWidget.param('Median').setValue('') self._ctrlWidget.param('STD').setValue('') self._ctrlWidget.param('Bin Width').setValue('')
def process(self, In): df = In if df is None: return self.CW().param('eq').setValue('') if self._df_id != id(df): #print 'df new' self._df_id = id(df) self.CW().disconnect_valueChanged2upd(self.CW().param('datetime')) self.CW().disconnect_valueChanged2upd(self.CW().param('sig')) colname = [ col for col in df.columns if isNumpyDatetime(df[col].dtype) ] self.CW().param('datetime').setLimits(colname) colname = [ col for col in df.columns if isNumpyNumeric(df[col].dtype) ] self.CW().param('sig').setLimits(colname) self.CW().connect_valueChanged2upd(self.CW().param('datetime')) self.CW().connect_valueChanged2upd(self.CW().param('sig')) # ------------------------------------------------------ # now update our range selectors kwargs = self.CW().prepareInputArguments() t_vals = df[kwargs['datetime']].values t_min = pd.to_datetime(str(min(t_vals))) t_max = pd.to_datetime(str(max(t_vals))) self.CW().disconnect_valueChanged2upd(self.CW().param('t0')) self.CW().disconnect_valueChanged2upd(self.CW().param('t1')) self.CW().param('t0').setValue(t_min.strftime('%Y-%m-%d %H:%M:%S')) self.CW().param('t0').setDefault( t_min.strftime('%Y-%m-%d %H:%M:%S')) self.CW().param('t1').setValue(t_max.strftime('%Y-%m-%d %H:%M:%S')) self.CW().param('t1').setDefault( t_max.strftime('%Y-%m-%d %H:%M:%S')) if self.CW().p['ranges'] is True: self.CW().connect_valueChanged2upd(self.CW().param('t0')) self.CW().connect_valueChanged2upd(self.CW().param('t1')) # get params once again kwargs = self.CW().prepareInputArguments() # ------------------------------------------------------ with BusyCursor(): df_out, eq_str, function, self.fig = pandas_fourier_analysis( df, kwargs['sig'], date_name=kwargs['datetime'], ranges=kwargs['ranges'], N_MAX_POW=kwargs['N_MAX_POW'], generate_plot=True) self.CW().param('eq').setValue(eq_str) self._PLOT_REQUESTED = False return {'params': df_out, 'f(t)': function}
def process(self, df, md_peaks): E = None self.CW().param('E = ').setValue(str(E)) self.CW().param('gw').setWritable(True) if df is not None: for name in ['river', 'gw', 'datetime']: self.CW().disconnect_valueChanged2upd(self.CW().param(name)) colname = [col for col in df.columns if isNumpyNumeric(df[col].dtype)] self.CW().param('river').setLimits(colname) self.CW().param('gw').setLimits(colname) colname = [col for col in df.columns if isNumpyDatetime(df[col].dtype)] self.CW().param('datetime').setLimits(colname) for name in ['river', 'gw', 'datetime']: self.CW().connect_valueChanged2upd(self.CW().param(name)) kwargs = self.ctrlWidget().prepareInputArguments() if kwargs['method'] == '1) STD': E = tidalEfficiency_method1(df, kwargs['river'], kwargs['gw']) E_c = None elif kwargs['method'] == '2) Cyclic amplitude' or kwargs['method'] == '3) Cyclic STD': if md_peaks is None: msg = 'To use method `{0}` please provide "matched-peaks" data in terminal `md_peaks` (a valid data-set can be created with node `Match Peaks`)'.format(kwargs['method']) QtGui.QMessageBox.warning(None, "Node: {0}".format(self.nodeName), msg) raise ValueError(msg) self.CW().disconnect_valueChanged2upd(self.CW().param('gw')) self.CW().param('gw').setWritable(False) self.CW().param('gw').setLimits(['see matched peaks']) self.CW().connect_valueChanged2upd(self.CW().param('gw')) mPeaks_slice = md_peaks.loc[~md_peaks['md_N'].isin([np.nan, None])] # select only valid cycles if kwargs['method'] == '2) Cyclic amplitude': E, E_cyclic = tidalEfficiency_method2(mPeaks_slice['tidal_range'], mPeaks_slice['md_tidal_range']) elif kwargs['method'] == '3) Cyclic STD': with BusyCursor(): river_name = mPeaks_slice['name'][0] well_name = mPeaks_slice['md_name'][0] E, E_cyclic = tidalEfficiency_method3(df, river_name, well_name, kwargs['datetime'], mPeaks_slice['time_min'], mPeaks_slice['time_max'], mPeaks_slice['md_time_min'], mPeaks_slice['md_time_max']) # now do nice output table E_c = pd.DataFrame({'N': mPeaks_slice['N'], 'md_N': mPeaks_slice['md_N'], 'E_cyclic': E_cyclic, }) else: raise Exception('Method <%s> is not yet implemented' % kwargs['method']) self.CW().param('E = ').setValue('{0:.4f}'.format(E)) return {'E': E, 'E_cyclic': E_c}
def on_data_recieved(self, df): """ Modify the TableWidget when the data is recieved in the `data` terminal """ self._clear_comboboxes() self.clear(clearTable=False) if df is not None: colnamesNumeric = [col for col in df.columns if isNumpyNumeric (df[col].dtype)] colnamesDatetime = [col for col in df.columns if isNumpyDatetime(df[col].dtype)] self._addItems_to_comboboxes(colnamesNumeric, colnamesDatetime) self.updateUI()
def process(self, In): gc.collect() # populate USE COLUMNS param, but only on item received, not when we click button if not self._ctrlWidget.calculateNAllowed( ) and not self._ctrlWidget.applyAllowed(): self._ctrlWidget.param('Apply to columns').clearChildren() with BusyCursor(): df = copy.deepcopy(In) # check out http://docs.scipy.org/doc/numpy-dev/neps/datetime-proposal.html colnames = [ col for col in df.columns if isNumpyDatetime(df[col].dtype) ] + [None] self._ctrlWidget.param('datetime').setLimits(colnames) self._ctrlWidget.param('datetime').setValue(colnames[0]) # populate (Apply to columns) param, but only on item received, not when we click button if not self._ctrlWidget.calculateNAllowed( ) and not self._ctrlWidget.applyAllowed(): colnames = [ col for col in df.columns if isNumpyNumeric(df[col].dtype) ] for col_name in colnames: # cycle through each column... self._ctrlWidget.param('Apply to columns').addChild({ 'name': col_name, 'type': 'bool', 'value': True }) kwargs = self.ctrlWidget().prepareInputArguments() if self._ctrlWidget.calculateNAllowed(): N = serfes.get_number_of_measurements_per_day( df, datetime=kwargs['datetime'], log=kwargs['log']) self._ctrlWidget.param('N').setValue(N) if self._ctrlWidget.applyAllowed(): if kwargs['N'] in [None, '']: QtGui.QMessageBox.warning( None, "Node: {0}".format(self.nodeName), 'First set number of measurements per day in parameter `N`' ) raise ValueError( 'First set number of measurements per day in parameter `N`' ) result = serfes.filter_wl_71h_serfes1991(df, **kwargs) return {'Out': result}
def process(self, A, B): if A is None: del self.df self.df = None return {'Out': self.df} else: self.CW().disconnect_valueChanged2upd(self.CW().param('a')) cols_A = [col for col in A.columns if isNumpyNumeric(A[col].dtype)] self.CW().param('a').setLimits(cols_A) self.CW().param('a').setValue(self.CW().p['a']) self.CW().connect_valueChanged2upd(self.CW().param('a')) if B is not None: if len(A) != len(B): raise ValueError('Number of rows in both DataFrames must be equal. A has {0} rows, B has {1} rows'.format(len(A), len(B))) self.CW().disconnect_valueChanged2upd(self.CW().param('b')) cols_B = [None]+[col for col in B.columns if isNumpyNumeric(B[col].dtype)] self.CW().param('b').setLimits(cols_B) self.CW().param('b').setValue(self.CW().p['b']) self.CW().connect_valueChanged2upd(self.CW().param('b')) kwargs = self.CW().prepareInputArguments() # ------------------------------------------------------ del self.df self.df = A.copy(deep=True) #maybe need to use deepcopy # actually do add operation #print kwargs if kwargs['b'] is None: self.df[kwargs['a']] -= kwargs['c'] else: self.df[kwargs['a']] = self.df[kwargs['a']] - B[kwargs['b']] - kwargs['c'] return {'Out': self.df}
def process(self, In): df = In if df is not None: # when we recieve a new dataframe into terminal - update possible selection list if not self._ctrlWidget.plotAllowed(): colname = [col for col in df.columns if isNumpyNumeric(df[col].dtype)] self._ctrlWidget.param('Bearing').setLimits(colname) column = self._ctrlWidget.param('Bearing').value() Max = df[column].max() Min = df[column].min() MU = df[column].mean() ME = df[column].median() STD = df[column].std() NBins = self._ctrlWidget.param('Bins').value() self._ctrlWidget.param('Max').setValue('{0:.3f}'.format(Max)) self._ctrlWidget.param('Min').setValue('{0:.3f}'.format(Min)) self._ctrlWidget.param('Mean').setValue('{0:.3f}'.format(MU)) self._ctrlWidget.param('Median').setValue('{0:.3f}'.format(ME)) self._ctrlWidget.param('STD').setValue('{0:.3f}'.format(STD)) self._ctrlWidget.param('Bin Width').setValue('{0:.3f}'.format(360./float(NBins))) if self._ctrlWidget.plotAllowed(): kwargs = self.ctrlWidget().prepareInputArguments() # check that data is in range [0 to 360] MIN = df[kwargs['Bearing']].min() MAX = df[kwargs['Bearing']].max() if MAX > 360. or MAX < 0. or MIN < 0. or MIN > 360.: msg = 'Note that the data you are going to plot is most likely not the directions, since the MIN={0} and MAX={1}. It is expected that the data-values fall in range [0:360].\nThe figure will be plotted anyway. Continuing...'.format(MIN, MAX) QtGui.QMessageBox.warning(None, "Node: {0}".format(self.nodeName), msg) plot_direction_rose(df, COLNAME=kwargs['Bearing'], N=kwargs['Bins'], RELATIVE_HIST=kwargs['relative_hist'], PLOT_LINE_SUBPLOT=kwargs['add_l_plt'], R_FONTSIZE='x-small', THETA_DIRECTION=kwargs['theta_dir'], THETA_ZERO_LOCATION=kwargs['theta_0'], THETA_GRIDS_VALUES=kwargs['theta_grid_values'], THETA_GRIDS_LABELS=kwargs['theta_grid_labels'], R_LABEL_POS=kwargs['r_label_pos'], BOTTOM=kwargs['bottom']) else: self._ctrlWidget.param('Bearing').setLimits(['']) self._ctrlWidget.param('Max').setValue('') self._ctrlWidget.param('Min').setValue('') self._ctrlWidget.param('Mean').setValue('') self._ctrlWidget.param('Median').setValue('') self._ctrlWidget.param('STD').setValue('') self._ctrlWidget.param('Bin Width').setValue('')
def on_data_recieved(self, df): """ Modify the TableWidget when the data is recieved in the `data` terminal """ self._clear_comboboxes() self.clear(clearTable=False) if df is not None: colnamesNumeric = [ col for col in df.columns if isNumpyNumeric(df[col].dtype) ] colnamesDatetime = [ col for col in df.columns if isNumpyDatetime(df[col].dtype) ] self._addItems_to_comboboxes(colnamesNumeric, colnamesDatetime) self.updateUI()
def process(self, In): df = In self._ctrlWidget.param("Period Check Params", "Warnings").setValue("?") colname = [col for col in df.columns if isNumpyNumeric(df[col].dtype)] self._ctrlWidget.param("column").setLimits(colname) colname = [col for col in df.columns if isNumpyDatetime(df[col].dtype)] self._ctrlWidget.param("datetime").setLimits(colname) kwargs = self._ctrlWidget.prepareInputArguments() with BusyCursor(): peaks = detectPeaks_ts(df, kwargs.pop("column"), plot=self._plotRequired, **kwargs) self._ctrlWidget.param("Period Check Params", "Warnings").setValue(str(len(peaks[peaks["check"] == False]))) return {"peaks": peaks}
def process(self, In): df = In if df is not None: # when we recieve a new dataframe into terminal - update possible selection list if not self._ctrlWidget.plotAllowed(): colname = [ col for col in df.columns if isNumpyNumeric(df[col].dtype) ] self._ctrlWidget.param('Signal').setLimits(colname) column = self._ctrlWidget.param('Signal').value() Max = df[column].max() Min = df[column].min() MU = df[column].mean() ME = df[column].median() STD = df[column].std() NBins = self._ctrlWidget.param('Bins').value() self._ctrlWidget.param('Max').setValue('{0:.3f}'.format(Max)) self._ctrlWidget.param('Min').setValue('{0:.3f}'.format(Min)) self._ctrlWidget.param('Mean').setValue('{0:.3f}'.format(MU)) self._ctrlWidget.param('Median').setValue('{0:.3f}'.format(ME)) self._ctrlWidget.param('STD').setValue('{0:.3f}'.format(STD)) self._ctrlWidget.param('Bin Width').setValue('{0:.3f}'.format( (Max - Min) / float(NBins))) if self._ctrlWidget.plotAllowed(): kwargs = self.ctrlWidget().prepareInputArguments() if df[kwargs['Signal']].count() != len(df.index): QtGui.QMessageBox.warning( None, 'Cannot Make Histogram', "The signal `{0}` contains NaN values. Can not create histogram while NaNs are presents. Remove them first" .format(kwargs['Signal']), QtGui.QMessageBox.Ok) return plot_pandas.plot_statistical_analysis( df[kwargs['Signal']], plot_title='Original Signal: {0}'.format(kwargs['Signal']), bins=kwargs['Bins'], data_units=kwargs['Signal Units'], hist_type=kwargs['Histogram Type']) else: self._ctrlWidget.param('Signal').setLimits(['']) self._ctrlWidget.param('Max').setValue('') self._ctrlWidget.param('Min').setValue('') self._ctrlWidget.param('Mean').setValue('') self._ctrlWidget.param('Median').setValue('') self._ctrlWidget.param('STD').setValue('') self._ctrlWidget.param('Bin Width').setValue('')
def process(self, In): df = In self.CW().param('check_grp', 'MIN_grp', 'warn').setValue('?') self.CW().param('check_grp', 'MAX_grp', 'warn').setValue('?') self.CW().param('check_grp', 'ALL_grp', 'warn').setValue('?') self.CW().param('check_grp', 'warn_sum').setValue('?') self.CW().param('out_grp', 'raw_nmin').setValue('?') self.CW().param('out_grp', 'raw_nmax').setValue('?') self.CW().param('out_grp', 'raw_n_all').setValue('?') self.CW().param('out_grp', 'n_cycles').setValue('?') self.CW().param('Peak Detection Params', 'order').setValue('?') if df is None: return {'raw': None, 'peaks': None} colname = [col for col in df.columns if isNumpyNumeric(df[col].dtype)] self.CW().param('column').setLimits(colname) colname = [col for col in df.columns if isNumpyDatetime(df[col].dtype)] self.CW().param('datetime').setLimits(colname) kwargs = self.CW().prepareInputArguments() kwargs['split'] = True with BusyCursor(): kwargs['order'] = prepare_order(kwargs['T'], kwargs['hMargin'], prepare_datetime(df, datetime=kwargs['datetime'])) self.CW().param('Peak Detection Params', 'order').setValue(str(kwargs['order'])) #peaks = detectPeaks_ts(df, kwargs.pop('column'), plot=self._plotRequired, **kwargs) extra, raw, peaks = full_peak_detection_routine(df, col=kwargs.pop('column'), date_col=kwargs.pop('datetime'), IDs2mask=kwargs.pop('IDs2mask'), valid_range=kwargs.pop('valid_range'), plot=self._plotRequired, **kwargs) n_warn_min = len(extra['warnings']['MIN']) n_warn_max = len(extra['warnings']['MAX']) n_warn_all = len(extra['warnings']['ALL']) self.CW().param('check_grp', 'MIN_grp', 'warn').setValue(n_warn_min) self.CW().param('check_grp', 'MAX_grp', 'warn').setValue(n_warn_max) self.CW().param('check_grp', 'ALL_grp', 'warn').setValue(n_warn_all) self.CW().param('check_grp', 'warn_sum').setValue(n_warn_min + n_warn_max + n_warn_all) self.CW().param('out_grp', 'raw_nmin').setValue(extra['raw_nmin']) self.CW().param('out_grp', 'raw_nmax').setValue(extra['raw_nmax']) if raw is not None: self.CW().param('out_grp', 'raw_n_all').setValue(len(raw.index)) if peaks is not None: self.CW().param('out_grp', 'n_cycles').setValue(len(peaks.index)) return {'raw': raw, 'peaks': peaks}
def process(self, In): df = In if df is None: return self.CW().param('eq').setValue('') if self._df_id != id(df): #print 'df new' self._df_id = id(df) self.CW().disconnect_valueChanged2upd(self.CW().param('datetime')) self.CW().disconnect_valueChanged2upd(self.CW().param('sig')) colname = [col for col in df.columns if isNumpyDatetime(df[col].dtype)] self.CW().param('datetime').setLimits(colname) colname = [col for col in df.columns if isNumpyNumeric(df[col].dtype)] self.CW().param('sig').setLimits(colname) self.CW().connect_valueChanged2upd(self.CW().param('datetime')) self.CW().connect_valueChanged2upd(self.CW().param('sig')) # ------------------------------------------------------ # now update our range selectors kwargs = self.CW().prepareInputArguments() t_vals = df[kwargs['datetime']].values t_min = pd.to_datetime(str(min(t_vals))) t_max = pd.to_datetime(str(max(t_vals))) self.CW().disconnect_valueChanged2upd(self.CW().param('t0')) self.CW().disconnect_valueChanged2upd(self.CW().param('t1')) self.CW().param('t0').setValue(t_min.strftime('%Y-%m-%d %H:%M:%S')) self.CW().param('t0').setDefault(t_min.strftime('%Y-%m-%d %H:%M:%S')) self.CW().param('t1').setValue(t_max.strftime('%Y-%m-%d %H:%M:%S')) self.CW().param('t1').setDefault(t_max.strftime('%Y-%m-%d %H:%M:%S')) if self.CW().p['ranges'] is True: self.CW().connect_valueChanged2upd(self.CW().param('t0')) self.CW().connect_valueChanged2upd(self.CW().param('t1')) # get params once again kwargs = self.CW().prepareInputArguments() # ------------------------------------------------------ with BusyCursor(): df_out, eq_str, function, self.fig = pandas_fourier_analysis(df, kwargs['sig'], date_name=kwargs['datetime'], ranges=kwargs['ranges'], N_MAX_POW=kwargs['N_MAX_POW'], generate_plot=True) self.CW().param('eq').setValue(eq_str) self._PLOT_REQUESTED = False return {'params': df_out, 'f(t)': function}
def on_pushButton_viewPlot_clicked(self): """ open nice graphic representation of our data""" with BusyCursor(): try: df = self.parent().getPandasDataModel().df columns = self.parent().getPandasHeaderModel().selectedColumns( ) #consider only the selected columns datetime_cols = [ col for col in columns if isNumpyDatetime(df[col].dtype) ] numeric_cols = [ col for col in columns if isNumpyNumeric(df[col].dtype) ] datetime_col = datetime_cols[0] if len( datetime_cols ) > 0 else None #plot with x=datetime if possible if self.checkBox_separateSubplots.isChecked( ) and len(numeric_cols) > 1: ''' Do the plotting of each selected numerical column on an individual subplot ''' f, axes = plt.subplots(len(numeric_cols), sharex=True) for ax, numeric_col in zip(axes, numeric_cols): df.plot(x=datetime_col, y=numeric_col, ax=ax) legend = ax.legend(shadow=True) # Fine-tune figure; make subplots close to each other and hide x ticks for all but bottom plot. #f.subplots_adjust(hspace=0) plt.setp([a.get_xticklabels() for a in f.axes[:-1]], visible=False) else: ''' Plot all selected numerical columns together on a single subplot ''' f, ax = plt.subplots(1) for numeric_col in numeric_cols: df.plot(x=datetime_col, y=numeric_col, ax=ax) legend = ax.legend(shadow=True) f.show() except Exception as exp: self._parent.setException(exp) return
def process(self, In): df = In if df is not None: # when we recieve a new dataframe into terminal - update possible selection list if not self._ctrlWidget.plotAllowed(): colname = [col for col in df.columns if isNumpyNumeric(df[col].dtype)] self._ctrlWidget.param('Signal').setLimits(colname) column = self._ctrlWidget.param('Signal').value() Max = df[column].max() Min = df[column].min() MU = df[column].mean() ME = df[column].median() STD = df[column].std() NBins = self._ctrlWidget.param('Bins').value() self._ctrlWidget.param('Max').setValue('{0:.3f}'.format(Max)) self._ctrlWidget.param('Min').setValue('{0:.3f}'.format(Min)) self._ctrlWidget.param('Mean').setValue('{0:.3f}'.format(MU)) self._ctrlWidget.param('Median').setValue('{0:.3f}'.format(ME)) self._ctrlWidget.param('STD').setValue('{0:.3f}'.format(STD)) self._ctrlWidget.param('Bin Width').setValue('{0:.3f}'.format((Max-Min)/float(NBins))) if self._ctrlWidget.plotAllowed(): kwargs = self.ctrlWidget().prepareInputArguments() if df[kwargs['Signal']].count() != len(df.index): QtGui.QMessageBox.warning(None, 'Cannot Make Histogram', "The signal `{0}` contains NaN values. Can not create histogram while NaNs are presents. Remove them first".format(kwargs['Signal']), QtGui.QMessageBox.Ok) return plot_pandas.plot_statistical_analysis(df[kwargs['Signal']], plot_title='Original Signal: {0}'.format(kwargs['Signal']), bins=kwargs['Bins'], data_units=kwargs['Signal Units'], hist_type=kwargs['Histogram Type']) else: self._ctrlWidget.param('Signal').setLimits(['']) self._ctrlWidget.param('Max').setValue('') self._ctrlWidget.param('Min').setValue('') self._ctrlWidget.param('Mean').setValue('') self._ctrlWidget.param('Median').setValue('') self._ctrlWidget.param('STD').setValue('') self._ctrlWidget.param('Bin Width').setValue('')
def process(self, In): df = In self._ctrlWidget.param('Period Check Params', 'Warnings').setValue('?') colname = [col for col in df.columns if isNumpyNumeric(df[col].dtype)] self._ctrlWidget.param('column').setLimits(colname) colname = [col for col in df.columns if isNumpyDatetime(df[col].dtype)] self._ctrlWidget.param('datetime').setLimits(colname) kwargs = self._ctrlWidget.prepareInputArguments() with BusyCursor(): peaks = detectPeaks_ts(df, kwargs.pop('column'), plot=self._plotRequired, **kwargs) self._ctrlWidget.param('Period Check Params', 'Warnings').setValue( str(len(peaks[peaks['check'] == False]))) return {'peaks': peaks}
def process(self, In): df = In if df is not None: # when we recieve a new dataframe into terminal - update possible selection list if not self._ctrlWidget.plotAllowed(): colname = [col for col in df.columns if isNumpyNumeric(df[col].dtype)] self._ctrlWidget.param('y').setLimits(colname) self._ctrlWidget.param('x').setLimits(colname) if self._ctrlWidget.plotAllowed(): kwargs = self.ctrlWidget().prepareInputArguments() with BusyCursor(): if self._ctrlWidget.param('plot overheads').value() is True: y_name = kwargs['y'][0] x_name = kwargs['x'][0] overhead_name = y_name+' - '+x_name df[overhead_name] = df[y_name]-df[x_name] kwargs['y'] = [overhead_name] plot_pandas.plot_pandas_scatter_special1(df, **kwargs) if self._ctrlWidget.param('plot overheads').value() is True: del df[overhead_name]
def process(self, tides): if tides is None: return if self._df_id != id(tides): #print 'df new' self._df_id = id(tides) self.CW().param('tides_grp', 'n_sig').setValue(len(tides)-1) self.CW().disconnect_valueChanged2upd(self.CW().param('tides_grp', 'A')) self.CW().disconnect_valueChanged2upd(self.CW().param('tides_grp', 'omega')) self.CW().disconnect_valueChanged2upd(self.CW().param('tides_grp', 'phi')) colname = [col for col in tides.columns if isNumpyNumeric(tides[col].dtype)] self.CW().param('tides_grp', 'A').setLimits(colname) self.CW().param('tides_grp', 'omega').setLimits(colname) self.CW().param('tides_grp', 'phi').setLimits(colname) self.CW().param('tides_grp', 'A').setValue(colname[0]) self.CW().param('tides_grp', 'omega').setValue(colname[1]) self.CW().param('tides_grp', 'phi').setValue(colname[2]) self.CW().connect_valueChanged2upd(self.CW().param('tides_grp', 'A')) self.CW().connect_valueChanged2upd(self.CW().param('tides_grp', 'omega')) self.CW().connect_valueChanged2upd(self.CW().param('tides_grp', 'phi')) self.CW().disconnect_valueChanged2upd(self.CW().param('W')) W = tides[self.CW().p['tides_grp', 'A']][0] # 1st value from column `A` self.CW().param('W').setValue(W) self.CW().param('W').setDefault(W) self.CW().connect_valueChanged2upd(self.CW().param('W')) kwargs = self.CW().prepareInputArguments() kwargs['tides'] = {} for i in xrange(len(tides)): if not np.isnan(tides.iloc[i][kwargs['df_A']]) and np.isnan(tides.iloc[i][kwargs['df_omega']]): continue #skipping 0-frequency amplitude kwargs['tides'][str(i)] = {} kwargs['tides'][str(i)]['A'] = tides.iloc[i][kwargs['df_A']] kwargs['tides'][str(i)]['omega'] = tides.iloc[i][kwargs['df_omega']] kwargs['tides'][str(i)]['phi'] = tides.iloc[i][kwargs['df_phi']] #print i, ': a={0}, omega={1}, phi={2}'.format(kwargs['tides'][str(i)]['A'], kwargs['tides'][str(i)]['omega'], kwargs['tides'][str(i)]['phi'] ) with BusyCursor(): if kwargs['eq'] == 'tide': df = generate_tide(kwargs['t0'], kwargs['dt'], kwargs['tend'], components=kwargs['tides'], W=kwargs['W'], F=kwargs['F'], label=kwargs['label'], equation=kwargs['eq']) elif kwargs['eq'] == 'ferris': df = generate_tide(kwargs['t0'], kwargs['dt'], kwargs['tend'], components=kwargs['tides'], W=kwargs['W'], F=kwargs['F'], label=kwargs['label'], equation=kwargs['eq'], D=kwargs['ferris']['D'], x=kwargs['ferris']['x']) elif kwargs['eq'] == 'xia': df = generate_tide(kwargs['t0'], kwargs['dt'], kwargs['tend'], components=kwargs['tides'], W=kwargs['W'], F=kwargs['F'], label=kwargs['label'], equation=kwargs['eq'], x=kwargs['xia']['x'], alpha=kwargs['xia']['alpha'], beta=kwargs['xia']['beta'], theta=kwargs['xia']['theta'], L=kwargs['xia']['L'], K1=kwargs['xia']['K1'], b1=kwargs['xia']['b1'], K=kwargs['xia']['K'], b=kwargs['xia']['b'], K_cap=kwargs['xia']['K_cap'], b_cap=kwargs['xia']['b_cap']) else: df = None return {'sig': df}
def process(self, coord, data): if data is not None: colname = [ col for col in data.columns if isNumpyDatetime(data[col].dtype) ] self._ctrlWidget.param('Datetime').setLimits(colname) self.data = data else: self.data = None return dict(this=None, All=self.All_out) if coord is not None: colname = [ col for col in coord.columns if isNumpyNumeric(coord[col].dtype) ] self._ctrlWidget.param('coords_grp', 'x').setLimits(colname) self._ctrlWidget.param('coords_grp', 'y').setLimits(colname) self.CW().disconnect_valueChanged2upd(self.CW().param( 'coords_grp', 'x')) self.CW().disconnect_valueChanged2upd(self.CW().param( 'coords_grp', 'y')) self.CW().param('coords_grp', 'x').setValue(colname[0]) self.CW().param('coords_grp', 'y').setValue(colname[1]) self.CW().connect_valueChanged2upd(self.CW().param( 'coords_grp', 'x')) self.CW().connect_valueChanged2upd(self.CW().param( 'coords_grp', 'y')) else: return dict(this=None, All=self.All_out) # now make sure All well specified in `coord` dataframe are found in `data` well_names = coord.index.values for well_n in well_names: if well_n not in data.columns: raise ValueError( 'Well named `{0}` not found in `data` but is declared in `coords`' .format(well_n)) kwargs = self.ctrlWidget().prepareInputArguments() # select row whith user-specified datetime `timestep` row = data.loc[data[kwargs['datetime']] == kwargs['t']] if row.empty: raise IndexError( 'Selected timestep `{0}` not found in `data`s column {1}. Select correct one' .format(kwargs['t'], kwargs['datetime'])) # now prepare dataframe for devlin calculations df = coord.copy() df['z'] = np.zeros(len(df.index)) for well_n in well_names: df.loc[well_n, 'z'] = float(row[well_n]) gradient, direction = devlin2003pandas(df, kwargs['x'], kwargs['y'], 'z') self.CW().param('grad').setValue(gradient) self.CW().param('angle').setValue(direction) # here we will generate large dataset of all timesteps if self.CW().CALCULATE_ALL: # now generate long dataframe All = pd.DataFrame({ kwargs['datetime']: data[kwargs['datetime']], 'gradient': np.zeros(len(data.index)), 'direction(degrees North)': np.zeros(len(data.index)) }) self.All_out = All # pointer with pg.ProgressDialog( "Calculating gradient for All timesteps {0}".format( len(All.index)), 0, len(All.index)) as dlg: for row_i in data.index: row = data.loc[row_i] z = np.zeros(len(coord.index)) for i, well_n in enumerate(well_names): z[i] = float(row[well_n]) x = coord[kwargs['x']].values y = coord[kwargs['y']].values _, gradient, angle = devlin2003(np.matrix([x, y, z]).T) All.loc[row_i, 'gradient'] = gradient All.loc[row_i, 'direction(degrees North)'] = angle2bearing( angle, origin='N')[0] dlg += 1 del z if dlg.wasCanceled(): del All self.All_out = None break #return dict(df=df, All=self.All_out) dlg += 1 return dict(this=df, All=self.All_out)
def process(self, coord, data): if data is not None: colname = [col for col in data.columns if isNumpyDatetime(data[col].dtype)] self._ctrlWidget.param('Datetime').setLimits(colname) self.data = data else: self.data = None return dict(this=None, All=self.All_out) if coord is not None: colname = [col for col in coord.columns if isNumpyNumeric(coord[col].dtype)] self._ctrlWidget.param('coords_grp', 'x').setLimits(colname) self._ctrlWidget.param('coords_grp', 'y').setLimits(colname) self.CW().disconnect_valueChanged2upd(self.CW().param('coords_grp', 'x')) self.CW().disconnect_valueChanged2upd(self.CW().param('coords_grp', 'y')) self.CW().param('coords_grp', 'x').setValue(colname[0]) self.CW().param('coords_grp', 'y').setValue(colname[1]) self.CW().connect_valueChanged2upd(self.CW().param('coords_grp', 'x')) self.CW().connect_valueChanged2upd(self.CW().param('coords_grp', 'y')) else: return dict(this=None, All=self.All_out) # now make sure All well specified in `coord` dataframe are found in `data` well_names = coord.index.values for well_n in well_names: if well_n not in data.columns: raise ValueError('Well named `{0}` not found in `data` but is declared in `coords`'.format(well_n)) kwargs = self.ctrlWidget().prepareInputArguments() # select row whith user-specified datetime `timestep` row = data.loc[data[kwargs['datetime']] == kwargs['t']] if row.empty: raise IndexError('Selected timestep `{0}` not found in `data`s column {1}. Select correct one'.format(kwargs['t'], kwargs['datetime'])) # now prepare dataframe for devlin calculations df = coord.copy() df['z'] = np.zeros(len(df.index)) for well_n in well_names: df.loc[well_n, 'z'] = float(row[well_n]) gradient, direction = devlin2003pandas(df, kwargs['x'], kwargs['y'], 'z') self.CW().param('grad').setValue(gradient) self.CW().param('angle').setValue(direction) # here we will generate large dataset of all timesteps if self.CW().CALCULATE_ALL: # now generate long dataframe All = pd.DataFrame({kwargs['datetime']: data[kwargs['datetime']], 'gradient': np.zeros(len(data.index)), 'direction(degrees North)': np.zeros(len(data.index))} ) self.All_out = All # pointer with pg.ProgressDialog("Calculating gradient for All timesteps {0}".format(len(All.index)), 0, len(All.index)) as dlg: for row_i in data.index: row = data.loc[row_i] z = np.zeros(len(coord.index)) for i, well_n in enumerate(well_names): z[i] = float(row[well_n]) x = coord[kwargs['x']].values y = coord[kwargs['y']].values _, gradient, angle = devlin2003(np.matrix([x, y, z]).T) All.loc[row_i, 'gradient'] = gradient All.loc[row_i, 'direction(degrees North)'] = angle2bearing(angle, origin='N')[0] dlg += 1 del z if dlg.wasCanceled(): del All self.All_out = None break #return dict(df=df, All=self.All_out) dlg += 1 return dict(this=df, All=self.All_out)
def process(self, In): df = In self.CW().param('check_grp', 'MIN_grp', 'warn').setValue('?') self.CW().param('check_grp', 'MAX_grp', 'warn').setValue('?') self.CW().param('check_grp', 'ALL_grp', 'warn').setValue('?') self.CW().param('check_grp', 'warn_sum').setValue('?') self.CW().param('out_grp', 'raw_nmin').setValue('?') self.CW().param('out_grp', 'raw_nmax').setValue('?') self.CW().param('out_grp', 'raw_n_all').setValue('?') self.CW().param('out_grp', 'n_cycles').setValue('?') self.CW().param('Peak Detection Params', 'order').setValue('?') if df is None: return {'raw': None, 'peaks': None} colname = [col for col in df.columns if isNumpyNumeric(df[col].dtype)] self.CW().param('column').setLimits(colname) colname = [col for col in df.columns if isNumpyDatetime(df[col].dtype)] self.CW().param('datetime').setLimits(colname) kwargs = self.CW().prepareInputArguments() kwargs['split'] = True with BusyCursor(): kwargs['order'] = prepare_order( kwargs['T'], kwargs['hMargin'], prepare_datetime(df, datetime=kwargs['datetime'])) self.CW().param('Peak Detection Params', 'order').setValue(str(kwargs['order'])) #peaks = detectPeaks_ts(df, kwargs.pop('column'), plot=self._plotRequired, **kwargs) extra, raw, peaks = full_peak_detection_routine( df, col=kwargs.pop('column'), date_col=kwargs.pop('datetime'), IDs2mask=kwargs.pop('IDs2mask'), valid_range=kwargs.pop('valid_range'), plot=self._plotRequired, **kwargs) n_warn_min = len(extra['warnings']['MIN']) n_warn_max = len(extra['warnings']['MAX']) n_warn_all = len(extra['warnings']['ALL']) self.CW().param('check_grp', 'MIN_grp', 'warn').setValue(n_warn_min) self.CW().param('check_grp', 'MAX_grp', 'warn').setValue(n_warn_max) self.CW().param('check_grp', 'ALL_grp', 'warn').setValue(n_warn_all) self.CW().param('check_grp', 'warn_sum').setValue(n_warn_min + n_warn_max + n_warn_all) self.CW().param('out_grp', 'raw_nmin').setValue(extra['raw_nmin']) self.CW().param('out_grp', 'raw_nmax').setValue(extra['raw_nmax']) if raw is not None: self.CW().param('out_grp', 'raw_n_all').setValue(len(raw.index)) if peaks is not None: self.CW().param('out_grp', 'n_cycles').setValue(len(peaks.index)) return {'raw': raw, 'peaks': peaks}
def filter_wl_71h_serfes1991(data, datetime=None, N=None, usecols=None, keep_origin=True, verbose=False, log=False): """ Calculate mean water-level according to Serfes1991. Perform a column-wise time averaging in three iterations. 1) The first sequence averages 24 hours of measurements 2) The second sequence averages 24 hours of first sequence 3) The third sequence averages all values of second sequence that were generated when the filter was applied to 71h This function is a modified version of original Serfes filter: it is not limited to hourly measurements. Args: data (pd.DataFrame): input data, where indexes are Datetime objects, see `parse_dates` parameters of `pd.read_csv()` datetime (Optional[str]): Location of the datetime objects. By default is `None`, meaning that datetime objects are located within `pd.DataFrame.index`. If not `None` - pass the column-name of dataframe where datetime objects are located. This is needed to determine number of measurements per day. Note: this argument is ignored if `N` is not `None` !!! N (Optional[int]): explicit number of measurements in 24 hours. By default `N=None`, meaning that script will try to determine number of measurements per 24 hours based on real datetime information provided with `datetime` argument. usecols (Optional[List[str]]): explicitly pass the name of the columns that will be evaluated. These columns must have numerical dtype (i.e. int32, int64, float32, float64). Default value is `None` meaning that all numerical columns will be processed. keep_origin (Optional[bool]): if `True` - will keep original columns in the output dataframe. If `False` - will return dataframe which has only results columns and original DateTime columns verbose (Optional[bool]): if `True` - will keep all three iterations in the output. If `False` - will save only final (3rd) iteration. This may useful for debugging, or checking this filter. log (Optional[bool]): flag to show some prints in console Returns: data (pd.DataFrame): input dataframe with appended time-averaged values. these values are appended into new columns """ n = N # for compatibility with thesis description # if convert all columns... if usecols is None: # select only numeric columns... numeric_columns = [col for col in data.columns if isNumpyNumeric(data[col].dtype)] # or covert only user defined columns.... else: # select only numeric columns... numeric_columns = [col for col in data.columns if (isNumpyNumeric(data[col].dtype) and col in usecols)] # if user has not explicitly passed number of measurements in a day, find it out! if n is None: n = get_number_of_measurements_per_day(data, datetime=datetime, log=log) if log: print("All column names:", list(data.columns)) print("Numeric colums:", numeric_columns) print("i will use following number of entries per day: ", n) if keep_origin: output = data else: output = pd.DataFrame() # copy datetime columns datetime_columns = [col for col in data.columns if isNumpyDatetime(data[col].dtype)] for col in datetime_columns: output[col] = data[col] nX = int(n / 24.0 * 71 - (n - 1)) # number of elements in sequence_1 nY = nX - (n - 1) # number of elements in sequence_2 # print (n, nX, nY) for col_name in numeric_columns: if float(".".join(pd.__version__.split(".")[0:2])) < 0.18: # if version is less then 0.18 (OLD API) output[col_name + "_sequence1"] = pd.rolling_mean( data[col_name], window=n, min_periods=n, center=True ).values output[col_name + "_sequence2"] = pd.rolling_mean( output[col_name + "_sequence1"], window=n, min_periods=n, center=True ).values output[col_name + "_mean"] = pd.rolling_mean( output[col_name + "_sequence2"], window=nY, min_periods=nY, center=True ).values else: # new API output[col_name + "_sequence1"] = data[col_name].rolling(window=n, min_periods=n, center=True).mean().values output[col_name + "_sequence2"] = ( output[col_name + "_sequence1"].rolling(window=n, min_periods=n, center=True).mean().values ) output[col_name + "_mean"] = ( output[col_name + "_sequence2"].rolling(window=nY, min_periods=nY, center=True).mean().values ) if not verbose: del output[col_name + "_sequence1"] if not verbose: del output[col_name + "_sequence2"] gc.collect() return output
def process(self, df, md_peaks): E = None self.CW().param('E = ').setValue(str(E)) self.CW().param('gw').setWritable(True) if df is not None: for name in ['river', 'gw', 'datetime']: self.CW().disconnect_valueChanged2upd(self.CW().param(name)) colname = [ col for col in df.columns if isNumpyNumeric(df[col].dtype) ] self.CW().param('river').setLimits(colname) self.CW().param('gw').setLimits(colname) colname = [ col for col in df.columns if isNumpyDatetime(df[col].dtype) ] self.CW().param('datetime').setLimits(colname) for name in ['river', 'gw', 'datetime']: self.CW().connect_valueChanged2upd(self.CW().param(name)) kwargs = self.ctrlWidget().prepareInputArguments() if kwargs['method'] == '1) STD': E = tidalEfficiency_method1(df, kwargs['river'], kwargs['gw']) E_c = None elif kwargs['method'] == '2) Cyclic amplitude' or kwargs[ 'method'] == '3) Cyclic STD': if md_peaks is None: msg = 'To use method `{0}` please provide "matched-peaks" data in terminal `md_peaks` (a valid data-set can be created with node `Match Peaks`)'.format( kwargs['method']) QtGui.QMessageBox.warning( None, "Node: {0}".format(self.nodeName), msg) raise ValueError(msg) self.CW().disconnect_valueChanged2upd(self.CW().param('gw')) self.CW().param('gw').setWritable(False) self.CW().param('gw').setLimits(['see matched peaks']) self.CW().connect_valueChanged2upd(self.CW().param('gw')) mPeaks_slice = md_peaks.loc[~md_peaks['md_N'].isin( [np.nan, None])] # select only valid cycles if kwargs['method'] == '2) Cyclic amplitude': E, E_cyclic = tidalEfficiency_method2( mPeaks_slice['tidal_range'], mPeaks_slice['md_tidal_range']) elif kwargs['method'] == '3) Cyclic STD': with BusyCursor(): river_name = mPeaks_slice['name'][0] well_name = mPeaks_slice['md_name'][0] E, E_cyclic = tidalEfficiency_method3( df, river_name, well_name, kwargs['datetime'], mPeaks_slice['time_min'], mPeaks_slice['time_max'], mPeaks_slice['md_time_min'], mPeaks_slice['md_time_max']) # now do nice output table E_c = pd.DataFrame({ 'N': mPeaks_slice['N'], 'md_N': mPeaks_slice['md_N'], 'E_cyclic': E_cyclic, }) else: raise Exception('Method <%s> is not yet implemented' % kwargs['method']) self.CW().param('E = ').setValue('{0:.4f}'.format(E)) return {'E': E, 'E_cyclic': E_c}
def process(self, tides): if tides is None: return if self._df_id != id(tides): # print 'df new' self._df_id = id(tides) self.CW().param("tides_grp", "n_sig").setValue(len(tides) - 1) self.CW().disconnect_valueChanged2upd(self.CW().param("tides_grp", "A")) self.CW().disconnect_valueChanged2upd(self.CW().param("tides_grp", "omega")) self.CW().disconnect_valueChanged2upd(self.CW().param("tides_grp", "phi")) colname = [col for col in tides.columns if isNumpyNumeric(tides[col].dtype)] self.CW().param("tides_grp", "A").setLimits(colname) self.CW().param("tides_grp", "omega").setLimits(colname) self.CW().param("tides_grp", "phi").setLimits(colname) self.CW().param("tides_grp", "A").setValue(colname[0]) self.CW().param("tides_grp", "omega").setValue(colname[1]) self.CW().param("tides_grp", "phi").setValue(colname[2]) self.CW().connect_valueChanged2upd(self.CW().param("tides_grp", "A")) self.CW().connect_valueChanged2upd(self.CW().param("tides_grp", "omega")) self.CW().connect_valueChanged2upd(self.CW().param("tides_grp", "phi")) self.CW().disconnect_valueChanged2upd(self.CW().param("W")) W = tides[self.CW().p["tides_grp", "A"]][0] # 1st value from column `A` self.CW().param("W").setValue(W) self.CW().param("W").setDefault(W) self.CW().connect_valueChanged2upd(self.CW().param("W")) kwargs = self.CW().prepareInputArguments() kwargs["tides"] = {} for i in xrange(len(tides)): if not np.isnan(tides.iloc[i][kwargs["df_A"]]) and np.isnan(tides.iloc[i][kwargs["df_omega"]]): continue # skipping 0-frequency amplitude kwargs["tides"][str(i)] = {} kwargs["tides"][str(i)]["A"] = tides.iloc[i][kwargs["df_A"]] kwargs["tides"][str(i)]["omega"] = tides.iloc[i][kwargs["df_omega"]] kwargs["tides"][str(i)]["phi"] = tides.iloc[i][kwargs["df_phi"]] # print i, ': a={0}, omega={1}, phi={2}'.format(kwargs['tides'][str(i)]['A'], kwargs['tides'][str(i)]['omega'], kwargs['tides'][str(i)]['phi'] ) with BusyCursor(): if kwargs["eq"] == "tide": df = generate_tide( kwargs["t0"], kwargs["dt"], kwargs["tend"], components=kwargs["tides"], W=kwargs["W"], F=kwargs["F"], label=kwargs["label"], equation=kwargs["eq"], ) elif kwargs["eq"] == "ferris": df = generate_tide( kwargs["t0"], kwargs["dt"], kwargs["tend"], components=kwargs["tides"], W=kwargs["W"], F=kwargs["F"], label=kwargs["label"], equation=kwargs["eq"], D=kwargs["ferris"]["D"], x=kwargs["ferris"]["x"], ) elif kwargs["eq"] == "xia": df = generate_tide( kwargs["t0"], kwargs["dt"], kwargs["tend"], components=kwargs["tides"], W=kwargs["W"], F=kwargs["F"], label=kwargs["label"], equation=kwargs["eq"], x=kwargs["xia"]["x"], alpha=kwargs["xia"]["alpha"], beta=kwargs["xia"]["beta"], theta=kwargs["xia"]["theta"], L=kwargs["xia"]["L"], K1=kwargs["xia"]["K1"], b1=kwargs["xia"]["b1"], K=kwargs["xia"]["K"], b=kwargs["xia"]["b"], K_cap=kwargs["xia"]["K_cap"], b_cap=kwargs["xia"]["b_cap"], ) else: df = None return {"sig": df}
def filter_wl_71h_serfes1991(data, datetime=None, N=None, usecols=None, keep_origin=True, verbose=False, log=False): ''' Calculate mean water-level according to Serfes1991. Perform a column-wise time averaging in three iterations. 1) The first sequence averages 24 hours of measurements 2) The second sequence averages 24 hours of first sequence 3) The third sequence averages all values of second sequence that were generated when the filter was applied to 71h This function is a modified version of original Serfes filter: it is not limited to hourly measurements. Args: data (pd.DataFrame): input data, where indexes are Datetime objects, see `parse_dates` parameters of `pd.read_csv()` datetime (Optional[str]): Location of the datetime objects. By default is `None`, meaning that datetime objects are located within `pd.DataFrame.index`. If not `None` - pass the column-name of dataframe where datetime objects are located. This is needed to determine number of measurements per day. Note: this argument is ignored if `N` is not `None` !!! N (Optional[int]): explicit number of measurements in 24 hours. By default `N=None`, meaning that script will try to determine number of measurements per 24 hours based on real datetime information provided with `datetime` argument. usecols (Optional[List[str]]): explicitly pass the name of the columns that will be evaluated. These columns must have numerical dtype (i.e. int32, int64, float32, float64). Default value is `None` meaning that all numerical columns will be processed. keep_origin (Optional[bool]): if `True` - will keep original columns in the output dataframe. If `False` - will return dataframe which has only results columns and original DateTime columns verbose (Optional[bool]): if `True` - will keep all three iterations in the output. If `False` - will save only final (3rd) iteration. This may useful for debugging, or checking this filter. log (Optional[bool]): flag to show some prints in console Returns: data (pd.DataFrame): input dataframe with appended time-averaged values. these values are appended into new columns ''' n = N # for compatibility with thesis description # if convert all columns... if usecols is None: # select only numeric columns... numeric_columns = [ col for col in data.columns if isNumpyNumeric(data[col].dtype) ] # or covert only user defined columns.... else: # select only numeric columns... numeric_columns = [ col for col in data.columns if (isNumpyNumeric(data[col].dtype) and col in usecols) ] #if user has not explicitly passed number of measurements in a day, find it out! if n is None: n = get_number_of_measurements_per_day(data, datetime=datetime, log=log) if log: print('All column names:', list(data.columns)) print('Numeric colums:', numeric_columns) print('i will use following number of entries per day: ', n) if keep_origin: output = data else: output = pd.DataFrame() #copy datetime columns datetime_columns = [ col for col in data.columns if isNumpyDatetime(data[col].dtype) ] for col in datetime_columns: output[col] = data[col] nX = int(n / 24. * 71 - (n - 1)) # number of elements in sequence_1 nY = nX - (n - 1) # number of elements in sequence_2 #print (n, nX, nY) for col_name in numeric_columns: if float('.'.join(pd.__version__.split( '.')[0:2])) < 0.18: # if version is less then 0.18 (OLD API) output[col_name + '_sequence1'] = pd.rolling_mean( data[col_name], window=n, min_periods=n, center=True).values output[col_name + '_sequence2'] = pd.rolling_mean( output[col_name + '_sequence1'], window=n, min_periods=n, center=True).values output[col_name + '_mean'] = pd.rolling_mean(output[col_name + '_sequence2'], window=nY, min_periods=nY, center=True).values else: # new API output[col_name + '_sequence1'] = data[col_name].rolling( window=n, min_periods=n, center=True).mean().values output[col_name + '_sequence2'] = output[col_name + '_sequence1'].rolling( window=n, min_periods=n, center=True).mean().values output[col_name + '_mean'] = output[col_name + '_sequence2'].rolling( window=nY, min_periods=nY, center=True).mean().values if not verbose: del output[col_name + '_sequence1'] if not verbose: del output[col_name + '_sequence2'] gc.collect() return output