class KB_plotter(object): def __init__(self,xval='date_float'): self.xval = xval self.rootNumber = 0 self.stof = DB() self.roots_df = self.stof.get_kb_roots() NavigationToolbar2.forward = self.next_button NavigationToolbar2.back = self.back_button def next_button(self, *args, **kwargs): if self.rootNumber != self.roots_df.shape[0] - 1: self.rootNumber = self.rootNumber + 1 self.show_plot() def back_button(self, *args, **kwargs): if self.rootNumber != 0: self.rootNumber = self.rootNumber - 1 self.show_plot() def get_root_id(self,index): root = self.roots_df.iloc[index] return root['id'] def get_area_name(self,index): root = self.roots_df.iloc[index] return root['area_name'] def show_plot(self): plt.clf() root_id = self.get_root_id(self.rootNumber) area_name = self.get_area_name(self.rootNumber) mdf = self.stof.get_kb_oodi_dd(root_id) print mdf plt.suptitle(area_name) self.sub_plot(221, "MEP-aan",mdf[[self.xval,'mep_aan']],True,True,True,True) self.sub_plot(222, "MEP-uit", mdf[[self.xval,'mep_uit']],True,True,True,True) self.sub_plot(223, "Diff", mdf[[self.xval,'difference']],True,True,True,True) self.sub_plot(224, "I-Flens", mdf[[self.xval,'iflens']],True,True,False,True) plt.show() def sub_plot(self,loc, name, mdf,mean=False,std=False,sin=False,lin=False): mdf.columns = ['x', 'y'] (m,s) = stats.mean_std(mdf['y']) mdf = mdf[mdf.y < m + 2 * s] mdf = mdf[mdf.y > m - 2 * s] (m, s) = stats.mean_std(mdf['y']) plt.subplot(loc) plt.title(name) plt.scatter(mdf['x'],mdf['y']) if mean: plt.plot([np.min(mdf['x']),np.max(mdf['x'])],[m,m],'g--') if std: plt.plot([np.min(mdf['x']),np.max(mdf['x'])],[m + s,m + s],'r:') plt.plot([np.min(mdf['x']), np.max(mdf['x'])], [m - s, m - s],'r:') if sin: ls = np.linspace(np.min(mdf['x']), np.max(mdf['x']), 100) sin_pred = reg.fit_sin(mdf['x'], mdf['y']) plt.plot(ls,sin_pred['fitfunc'](ls)) if lin: plt.plot(mdf['x'], reg.fit_lin(mdf[['x']], mdf['y']))
class CoefsPlotter(object): def __init__(self, xval='date_float', yval='mep_uit'): self.xval = xval self.yval = yval self.rootNumber = 0 self.mpNumber = 0 self.stof = DB() self.roots_df = self.stof.get_kb_roots() root_id = self.get_root_id(self.rootNumber) self.area_df = self.stof.get_kb_oodi_dd(root_id) self.polyInt = PolyInterpolation(precision=3) self.area_df = self.stof.get_kb_oodi_dd(root_id) self.meas_df = self.area_df[self.area_df.measurepoint_id == self.get_mp_ids()[0]] self.meas_df = self.prepare_meas_df(self.meas_df) NavigationToolbar2.forward = self.next_button_area NavigationToolbar2.back = self.back_button_area NavigationToolbar2.forward_mp = self.next_button_mp NavigationToolbar2.back_mp = self.back_button_mp NavigationToolbar2.toolitems = NavigationToolbar2.toolitems + ( ('Back mp', 'Back to previous mp', 'back', 'back_mp'), ('Forward mp', 'Forward to next mp', 'forward', 'forward_mp') ) plt.figure() plt.show() def next_button_area(self, *args, **kwargs): if self.rootNumber != self.roots_df.shape[0] - 1: self.rootNumber = self.rootNumber + 1 root_id = self.get_root_id(self.rootNumber) self.area_df = self.stof.get_kb_oodi_dd(root_id) self.meas_df = self.area_df[self.area_df.measurepoint_id == self.get_mp_ids()[0]] self.meas_df = self.prepare_meas_df(self.meas_df) self.mpNumber = 0 self.show_plot() def back_button_area(self, *args, **kwargs): if self.rootNumber != 0: self.rootNumber = self.rootNumber - 1 root_id = self.get_root_id(self.rootNumber) self.area_df = self.stof.get_kb_oodi_dd(root_id) self.meas_df = self.area_df[self.area_df.measurepoint_id == self.get_mp_ids()[0]] self.meas_df = self.prepare_meas_df(self.meas_df) self.mpNumber = 0 self.show_plot() def next_button_mp(self, *args, **kwargs): if self.mpNumber != self.get_mp_ids().shape[0] - 1: self.mpNumber = self.mpNumber + 1 mpid = self.get_mp_ids()[self.mpNumber] self.meas_df = self.area_df[self.area_df.measurepoint_id == mpid] self.meas_df = self.prepare_meas_df(self.meas_df) self.show_plot() def back_button_mp(self, *args, **kwargs): if self.mpNumber != 0: self.mpNumber = self.mpNumber - 1 mpid = self.get_mp_ids()[self.mpNumber] self.meas_df = self.area_df[self.area_df.measurepoint_id == mpid] self.meas_df = self.prepare_meas_df(self.meas_df) self.show_plot() def get_root_id(self, index): root = self.roots_df.iloc[index] return root['id'] def get_mp_ids(self): return self.area_df.measurepoint_id.unique() def get_area_name(self, index): root = self.roots_df.iloc[index] return root['area_name'] def get_number_of_mps(self): return self.get_mp_ids().shape[0] def get_number_of_areas(self): return self.roots_df.shape[0] def prepare_meas_df(self, meas_df): meas_df = meas_df[[self.xval, self.yval]] meas_df = meas_df.dropna() meas_df.columns = ['x', 'y'] if (meas_df.shape[0] <= 2): return None (m, s) = stats.mean_std(meas_df['y']) meas_df = meas_df[meas_df.y < m + 2 * s] meas_df = meas_df[meas_df.y > m - 2 * s] if (meas_df.shape[0] <= 2): return None return meas_df def show_plot(self): plt.clf() if self.meas_df is None: print 'Not enough points' return area_name = self.get_area_name(self.rootNumber) self.polyInt.set_t(np.array(self.meas_df['x'])) self.polyInt.set_y(np.array(self.meas_df['y'])) coefs = self.polyInt.find_coefs(10) yHat = self.polyInt.get_y_hat_for_range(coefs, np.arange(-1, 1, 0.04)) tHat = np.arange(-1, 1, 0.04) t = self.polyInt.t y = self.polyInt.y t_sorted, y_sorted = zip(*sorted(zip(t, y), key=lambda x: x[0])) plt.plot(t_sorted, y_sorted) plt.plot(tHat, yHat) area_title = area_name + ": " + str(self.rootNumber + 1) + "/" + str(self.get_number_of_areas()) mp_title = "mp: " + str(self.mpNumber + 1) + "/" + str(self.get_number_of_mps()) plt.title(area_title + " " + mp_title) plt.draw()
class KB_plotter_avg_n(object): def __init__(self, xval='date_float', yval='mep_uit'): self.xval = xval self.yval = yval self.rootNumber = 0 self.stof = DB() self.roots_df = self.stof.get_kb_roots() root_id = self.get_root_id(self.rootNumber) self.area_df = self.stof.get_kb_oodi_dd(root_id) NavigationToolbar2.forward = self.next_button_area NavigationToolbar2.back = self.back_button_area NavigationToolbar2.home = self.home_button def next_button_area(self, *args, **kwargs): if self.rootNumber != self.roots_df.shape[0] - 1: self.rootNumber = self.rootNumber + 1 root_id = self.get_root_id(self.rootNumber) self.area_df = self.stof.get_kb_oodi_dd(root_id) #plt.clf() def back_button_area(self, *args, **kwargs): if self.rootNumber != 0: self.rootNumber = self.rootNumber - 1 root_id = self.get_root_id(self.rootNumber) self.area_df = self.stof.get_kb_oodi_dd(root_id) #plt.clf() def home_button(self, *args, **kwargs): self.show_plot() def get_root_id(self, index): root = self.roots_df.iloc[index] return root['id'] def get_mp_ids(self): return self.area_df.measurepoint_id.unique() def get_area_name(self, index): root = self.roots_df.iloc[index] return root['area_name'] def save_errors(self): for rootIdx in range(0, self.roots_df.shape[0]): print "area: " + str(rootIdx) + "/" + str(self.roots_df.shape[0]) rootId = self.get_root_id(rootIdx) self.area_df = self.stof.get_kb_oodi_dd(rootId) area_name = self.get_area_name(rootIdx) max_n = 50 mpLen = len(self.get_mp_ids()) e = np.zeros([mpLen, max_n]) polyInt = PolyInterpolation(precision=2) for idx, mpid in enumerate(self.get_mp_ids()): meas_df = self.area_df[self.area_df.measurepoint_id == mpid] meas_df = meas_df[[self.xval, 'mep_uit']] meas_df = meas_df.dropna() meas_df.columns = ['x', 'y'] if (meas_df.shape[0] <= 1): continue (m, s) = stats.mean_std(meas_df['y']) meas_df = meas_df[meas_df.y < m + 2 * s] meas_df = meas_df[meas_df.y > m - 2 * s] polyInt.set_t(meas_df['x']) polyInt.set_y(meas_df['y']) for n in range(0, max_n): coefs = polyInt.find_coefs(n) e[idx, n] = polyInt.avg_dist_reg(coefs[0:n], precision=2) #e[idx, :] = (e[idx, :] - np.min(e[idx, :])) / (np.max(e[idx, :]) - np.min(e[idx, :])) np.savetxt(area_name + '.csv', e, delimiter=',') def save_errors_LOOCV(self): polyInt = PolyInterpolation(precision=2) for rootIdx in range(20, self.roots_df.shape[0]): print rootId = self.get_root_id(rootIdx) self.area_df = self.stof.get_kb_oodi_dd(rootId) area_name = self.get_area_name(rootIdx) max_n = 50 mpLen = len(self.get_mp_ids()) e = np.zeros([mpLen, max_n]) for mpointIdx, mpid in enumerate(self.get_mp_ids()): meas_df = self.area_df[self.area_df.measurepoint_id == mpid] meas_df = meas_df[[self.xval, self.yval]] meas_df = meas_df.dropna() meas_df.columns = ['x', 'y'] if (meas_df.shape[0] <= 2): print area_name + ": " + str(rootIdx + 1) + "/" + str( self.roots_df.shape[0] ) + ", mp: " + str(mpointIdx + 1) + "/" + str( mpLen) + " - ommited, has too little measurements" continue (m, s) = stats.mean_std(meas_df['y']) meas_df = meas_df[meas_df.y < m + 2 * s] meas_df = meas_df[meas_df.y > m - 2 * s] if (meas_df.shape[0] <= 2): print area_name + ": " + str(rootIdx + 1) + "/" + str( self.roots_df.shape[0] ) + ", mp: " + str(mpointIdx + 1) + "/" + str( mpLen) + " - ommited, has too little measurements" continue print area_name + ": " + str(rootIdx + 1) + "/" + str( self.roots_df.shape[0]) + ", mp: " + str( mpointIdx + 1) + "/" + str(mpLen) measIdx = 0 for test_row in meas_df.iterrows(): polyInt.set_t(np.array(meas_df['x'])) polyInt.set_y(np.array(meas_df['y'])) testT = polyInt.t[measIdx] testY = np.array(polyInt.y)[measIdx] polyInt.t = np.delete(polyInt.t, measIdx) polyInt.y = np.delete(polyInt.y, measIdx) for n in range(0, max_n): coefs = polyInt.find_coefs(n) p = float(10.**-2) yHat = polyInt.get_y_hat(coefs, precision=2) yFit = np.interp(testT, np.arange(-1.0, 1. + p, p), yHat) e[mpointIdx, n] += np.abs(testY - yFit) measIdx += 1 e[mpointIdx, :] = e[mpointIdx, :] / float(meas_df.shape[0]) np.savetxt(area_name + '.csv', e, delimiter=',') def show_plot(self): #plt.clf() area_name = self.get_area_name(self.rootNumber) max_n = 25 mpLen = len(self.get_mp_ids()) e = np.zeros([mpLen, max_n]) polyInt = PolyInterpolation(precision=2) for idx, mpid in enumerate(self.get_mp_ids()): print str(idx) + "/" + str(mpLen) meas_df = self.area_df[self.area_df.measurepoint_id == mpid] meas_df = meas_df[[self.xval, 'mep_uit']] meas_df = meas_df.dropna() meas_df.columns = ['x', 'y'] if (meas_df.shape[0] <= 1): continue (m, s) = stats.mean_std(meas_df['y']) meas_df = meas_df[meas_df.y < m + 2 * s] meas_df = meas_df[meas_df.y > m - 2 * s] polyInt.set_t(meas_df['x']) polyInt.set_y(meas_df['y']) coefs = polyInt.find_coefs(max_n) for n in range(0, max_n): e[idx, n] = polyInt.avg_dist_reg(coefs[0:n], precision=2) e[idx, :] = (e[idx, :] - np.min(e[idx, :])) / (np.max(e[idx, :]) - np.min(e[idx, :])) #plt.plot(range(0, max_n), e[idx,:]) plt.plot(range(0, max_n), np.mean(e, axis=0)) plt.title(area_name) plt.show()
class KB_plotter(object): def __init__(self, xval='date_float'): self.xval = xval self.rootNumber = 0 self.mpNumber = 0 self.stof = DB() self.roots_df = self.stof.get_kb_roots() root_id = self.get_root_id(self.rootNumber) self.area_df = self.stof.get_kb_oodi_dd(root_id) self.meas_df = self.area_df[self.area_df.measurepoint_id == self.get_mp_ids()[0]] NavigationToolbar2.forward = self.next_button_area NavigationToolbar2.back = self.back_button_area NavigationToolbar2.forward_mp = self.next_button_mp NavigationToolbar2.back_mp = self.back_button_mp NavigationToolbar2.toolitems = NavigationToolbar2.toolitems + ( ('Back mp', 'Back to previous mp', 'back', 'back_mp'), ('Forward mp', 'Forward to next mp', 'forward', 'forward_mp')) def next_button_area(self, *args, **kwargs): if self.rootNumber != self.roots_df.shape[0] - 1: self.rootNumber = self.rootNumber + 1 root_id = self.get_root_id(self.rootNumber) self.area_df = self.stof.get_kb_oodi_dd(root_id) self.meas_df = self.area_df[self.area_df.measurepoint_id == self.get_mp_ids()[0]] self.mpNumber = 0 self.show_plot() def back_button_area(self, *args, **kwargs): if self.rootNumber != 0: self.rootNumber = self.rootNumber - 1 root_id = self.get_root_id(self.rootNumber) self.area_df = self.stof.get_kb_oodi_dd(root_id) self.meas_df = self.area_df[self.area_df.measurepoint_id == self.get_mp_ids()[0]] self.mpNumber = 0 self.show_plot() def next_button_mp(self, *args, **kwargs): if self.mpNumber != self.get_mp_ids().shape[0] - 1: self.mpNumber = self.mpNumber + 1 mpid = self.get_mp_ids()[self.mpNumber] self.meas_df = self.area_df[self.area_df.measurepoint_id == mpid] self.show_plot() def back_button_mp(self, *args, **kwargs): if self.mpNumber != 0: self.mpNumber = self.mpNumber - 1 mpid = self.get_mp_ids()[self.mpNumber] self.meas_df = self.area_df[self.area_df.measurepoint_id == mpid] self.show_plot() def get_root_id(self, index): root = self.roots_df.iloc[index] return root['id'] def get_mp_ids(self): return self.area_df.measurepoint_id.unique() def get_area_name(self, index): root = self.roots_df.iloc[index] return root['area_name'] def show_plot(self): plt.clf() print self.meas_df['date_float'] - np.min(self.meas_df['date_float']) print self.meas_df['mep_uit'] area_name = self.get_area_name(self.rootNumber) mp_name = str(self.get_mp_ids()[self.mpNumber]) plt.suptitle(area_name + " - " + mp_name) self.sub_plot(221, "MEP-aan", self.meas_df[[self.xval, 'mep_aan']], True, True, False, True) self.sub_plot(222, "MEP-uit", self.meas_df[[self.xval, 'mep_uit']], True, True, False, True) self.sub_plot(223, "Diff", self.meas_df[[self.xval, 'difference']], True, True, False, True) self.sub_plot(224, "I-Flens", self.meas_df[[self.xval, 'iflens']], True, True, False, True) plt.show() def sub_plot(self, loc, name, mdf, mean=False, std=False, sin=False, lin=False): mdf.columns = ['x', 'y'] if (mdf['y'].isnull().all()): return (m, s) = stats.mean_std(mdf['y']) mdf = mdf[mdf.y < m + 2 * s] mdf = mdf[mdf.y > m - 2 * s] (m, s) = stats.mean_std(mdf['y']) plt.subplot(loc) plt.title(name) plt.scatter(mdf['x'], mdf['y']) if mean: plt.plot([np.min(mdf['x']), np.max(mdf['x'])], [m, m], 'g--') if std: plt.plot([np.min(mdf['x']), np.max(mdf['x'])], [m + s, m + s], 'r:') plt.plot([np.min(mdf['x']), np.max(mdf['x'])], [m - s, m - s], 'r:') if sin: ls = np.linspace(np.min(mdf['x']), np.max(mdf['x']), 100) sin_pred = reg.fit_sin(mdf['x'], mdf['y']) plt.plot(ls, sin_pred['fitfunc'](ls)) if lin: try: pred = reg.fit_lin(mdf[['x']], mdf['y']) plt.plot(mdf['x'], pred) except: return
class AreaModel(object): def __init__(self, save_areas=True): self.save_areas = save_areas self.stof = DB() self.roots_df = self.stof.get_kb_roots() self.areas = dict() def get_area_id(self, area_idx): """ Gets the id of an area by its index in the dataframe :param area_idx: integer of the index :return: the area id :rtype: int """ root = self.roots_df.iloc[area_idx] return root['id'] def get_area_df(self, area_idx): """ Gets the area DataFrame by its index :param area_idx: index of the area :return: The area DataFrame :rtype: pandas.DataFrame """ area_id = self.get_area_id(area_idx) if not self.save_areas: return self.stof.get_kb_oodi_dd(area_id) if area_id in self.areas: return self.areas[area_id] self.areas[area_id] = self.stof.get_kb_oodi_dd(area_id) return self.areas[area_id] def get_mp_df(self, area_idx, mp_idx): """ Gets the measurepoint Dataframe by its index :param area_idx: index of the area :param mp_idx: index of the measure point :return: The measure point DataFrame :rtype: pandas.DataFrame """ area_df = self.get_area_df(area_idx) return area_df[area_df.measurepoint_id == self.get_mp_ids(area_idx) [mp_idx]] def get_mp_ids(self, area_idx): """ Get a list of measure point indexes for the given area :param area_idx: index of the area :return: A list with idexes of measurepoints :rtype: list """ return self.get_area_df(area_idx).measurepoint_id.unique() def get_area_name(self, area_idx): """ gets the name of an area by its index :param area_idx: index of the area :return: The name as a string of the area :rtype: str """ root = self.roots_df.iloc[area_idx] return root['area_name'] def get_number_of_measurments(self, area_idx, mp_idx): """ Gets the number of measurements in the measure point :param area_idx: index of the area :param mp_idx: index of the measure point :return: number of measurments :rtype: int """ return self.get_mp_df(area_idx, mp_idx).shape[0] def get_number_of_mps(self, area_idx): """ Gets the number of measure points in the area :param area_idx: index of the area :return: number of measure points :rtype: int """ return self.get_mp_ids(area_idx).shape[0] def get_number_of_areas(self): """ Gets the number of areas in the database :return: number of areas :rtype: int """ return self.roots_df.shape[0] def prepare_meas_df(self, meas_df, xval='date_float', yval='mep_uit'): """ Prepares the measure point DataFrame by removing outliers and checking if it has more than 2 points :param meas_df: the measurement DataFrame :param xval: name of the x value :param yval: name of the y value :return: the measurement DataFrame of None if it has less than 2 datapoints :rtype: pandas.DataFrame, None """ meas_df = meas_df[[xval, yval]] meas_df = meas_df.dropna() meas_df.columns = ['x', 'y'] if meas_df.shape[0] <= 2: return None (m, s) = Stats.mean_std(meas_df['y']) meas_df = meas_df[meas_df.y < m + 2 * s] meas_df = meas_df[meas_df.y > m - 2 * s] if meas_df.shape[0] <= 2: return None return meas_df