Exemplo n.º 1
0
class KB_plotter(object):

    def __init__(self,xval='date_float'):
        self.xval = xval
        self.rootNumber = 0
        self.stof = DB()
        self.roots_df = self.stof.get_kb_roots()
        NavigationToolbar2.forward = self.next_button
        NavigationToolbar2.back = self.back_button

    def next_button(self, *args, **kwargs):
        if self.rootNumber != self.roots_df.shape[0] - 1:
            self.rootNumber = self.rootNumber + 1
        self.show_plot()

    def back_button(self, *args, **kwargs):
        if self.rootNumber != 0:
            self.rootNumber = self.rootNumber - 1
        self.show_plot()

    def get_root_id(self,index):
        root = self.roots_df.iloc[index]
        return root['id']

    def get_area_name(self,index):
        root = self.roots_df.iloc[index]
        return root['area_name']

    def show_plot(self):
        plt.clf()
        root_id = self.get_root_id(self.rootNumber)
        area_name = self.get_area_name(self.rootNumber)
        mdf = self.stof.get_kb_oodi_dd(root_id)
        print mdf
        plt.suptitle(area_name)
        self.sub_plot(221, "MEP-aan",mdf[[self.xval,'mep_aan']],True,True,True,True)
        self.sub_plot(222, "MEP-uit", mdf[[self.xval,'mep_uit']],True,True,True,True)
        self.sub_plot(223, "Diff", mdf[[self.xval,'difference']],True,True,True,True)
        self.sub_plot(224, "I-Flens", mdf[[self.xval,'iflens']],True,True,False,True)
        plt.show()

    def sub_plot(self,loc, name, mdf,mean=False,std=False,sin=False,lin=False):
        mdf.columns = ['x', 'y']
        (m,s) = stats.mean_std(mdf['y'])
        mdf = mdf[mdf.y < m + 2 * s]
        mdf = mdf[mdf.y > m - 2 * s]
        (m, s) = stats.mean_std(mdf['y'])
        plt.subplot(loc)
        plt.title(name)
        plt.scatter(mdf['x'],mdf['y'])
        if mean: plt.plot([np.min(mdf['x']),np.max(mdf['x'])],[m,m],'g--')
        if std:
            plt.plot([np.min(mdf['x']),np.max(mdf['x'])],[m + s,m + s],'r:')
            plt.plot([np.min(mdf['x']), np.max(mdf['x'])], [m - s, m - s],'r:')

        if sin:
            ls = np.linspace(np.min(mdf['x']), np.max(mdf['x']), 100)
            sin_pred = reg.fit_sin(mdf['x'], mdf['y'])
            plt.plot(ls,sin_pred['fitfunc'](ls))
        if lin: plt.plot(mdf['x'], reg.fit_lin(mdf[['x']], mdf['y']))
class CoefsPlotter(object):
    def __init__(self, xval='date_float', yval='mep_uit'):
        self.xval = xval
        self.yval = yval
        self.rootNumber = 0
        self.mpNumber = 0
        self.stof = DB()
        self.roots_df = self.stof.get_kb_roots()
        root_id = self.get_root_id(self.rootNumber)
        self.area_df = self.stof.get_kb_oodi_dd(root_id)
        self.polyInt = PolyInterpolation(precision=3)
        self.area_df = self.stof.get_kb_oodi_dd(root_id)
        self.meas_df = self.area_df[self.area_df.measurepoint_id == self.get_mp_ids()[0]]
        self.meas_df = self.prepare_meas_df(self.meas_df)
        NavigationToolbar2.forward = self.next_button_area
        NavigationToolbar2.back = self.back_button_area
        NavigationToolbar2.forward_mp = self.next_button_mp
        NavigationToolbar2.back_mp = self.back_button_mp
        NavigationToolbar2.toolitems = NavigationToolbar2.toolitems + (
            ('Back mp', 'Back to  previous mp', 'back', 'back_mp'),
            ('Forward mp', 'Forward to next mp', 'forward', 'forward_mp')
        )
        plt.figure()
        plt.show()

    def next_button_area(self, *args, **kwargs):
        if self.rootNumber != self.roots_df.shape[0] - 1:
            self.rootNumber = self.rootNumber + 1
            root_id = self.get_root_id(self.rootNumber)
            self.area_df = self.stof.get_kb_oodi_dd(root_id)
            self.meas_df = self.area_df[self.area_df.measurepoint_id == self.get_mp_ids()[0]]
            self.meas_df = self.prepare_meas_df(self.meas_df)
            self.mpNumber = 0
        self.show_plot()

    def back_button_area(self, *args, **kwargs):
        if self.rootNumber != 0:
            self.rootNumber = self.rootNumber - 1
            root_id = self.get_root_id(self.rootNumber)
            self.area_df = self.stof.get_kb_oodi_dd(root_id)
            self.meas_df = self.area_df[self.area_df.measurepoint_id == self.get_mp_ids()[0]]
            self.meas_df = self.prepare_meas_df(self.meas_df)
            self.mpNumber = 0
        self.show_plot()

    def next_button_mp(self, *args, **kwargs):
        if self.mpNumber != self.get_mp_ids().shape[0] - 1:
            self.mpNumber = self.mpNumber + 1
            mpid = self.get_mp_ids()[self.mpNumber]
            self.meas_df = self.area_df[self.area_df.measurepoint_id == mpid]
            self.meas_df = self.prepare_meas_df(self.meas_df)
        self.show_plot()

    def back_button_mp(self, *args, **kwargs):
        if self.mpNumber != 0:
            self.mpNumber = self.mpNumber - 1
            mpid = self.get_mp_ids()[self.mpNumber]
            self.meas_df = self.area_df[self.area_df.measurepoint_id == mpid]
            self.meas_df = self.prepare_meas_df(self.meas_df)
        self.show_plot()

    def get_root_id(self, index):
        root = self.roots_df.iloc[index]
        return root['id']

    def get_mp_ids(self):
        return self.area_df.measurepoint_id.unique()

    def get_area_name(self, index):
        root = self.roots_df.iloc[index]
        return root['area_name']

    def get_number_of_mps(self):
        return self.get_mp_ids().shape[0]

    def get_number_of_areas(self):
        return self.roots_df.shape[0]

    def prepare_meas_df(self, meas_df):
        meas_df = meas_df[[self.xval, self.yval]]
        meas_df = meas_df.dropna()
        meas_df.columns = ['x', 'y']
        if (meas_df.shape[0] <= 2):
            return None
        (m, s) = stats.mean_std(meas_df['y'])
        meas_df = meas_df[meas_df.y < m + 2 * s]
        meas_df = meas_df[meas_df.y > m - 2 * s]
        if (meas_df.shape[0] <= 2):
            return None
        return meas_df

    def show_plot(self):
        plt.clf()
        if self.meas_df is None:
            print 'Not enough points'
            return
        area_name = self.get_area_name(self.rootNumber)
        self.polyInt.set_t(np.array(self.meas_df['x']))
        self.polyInt.set_y(np.array(self.meas_df['y']))
        coefs = self.polyInt.find_coefs(10)
        yHat = self.polyInt.get_y_hat_for_range(coefs, np.arange(-1, 1, 0.04))
        tHat = np.arange(-1, 1, 0.04)
        t = self.polyInt.t
        y = self.polyInt.y
        t_sorted, y_sorted = zip(*sorted(zip(t, y), key=lambda x: x[0]))
        plt.plot(t_sorted, y_sorted)
        plt.plot(tHat, yHat)
        area_title = area_name + ": " + str(self.rootNumber + 1) + "/" + str(self.get_number_of_areas())
        mp_title = "mp: " + str(self.mpNumber + 1) + "/" + str(self.get_number_of_mps())
        plt.title(area_title + " " + mp_title)
        plt.draw()
class KB_plotter_avg_n(object):
    def __init__(self, xval='date_float', yval='mep_uit'):
        self.xval = xval
        self.yval = yval
        self.rootNumber = 0
        self.stof = DB()
        self.roots_df = self.stof.get_kb_roots()
        root_id = self.get_root_id(self.rootNumber)
        self.area_df = self.stof.get_kb_oodi_dd(root_id)
        NavigationToolbar2.forward = self.next_button_area
        NavigationToolbar2.back = self.back_button_area
        NavigationToolbar2.home = self.home_button

    def next_button_area(self, *args, **kwargs):
        if self.rootNumber != self.roots_df.shape[0] - 1:
            self.rootNumber = self.rootNumber + 1
            root_id = self.get_root_id(self.rootNumber)
            self.area_df = self.stof.get_kb_oodi_dd(root_id)
        #plt.clf()

    def back_button_area(self, *args, **kwargs):
        if self.rootNumber != 0:
            self.rootNumber = self.rootNumber - 1
            root_id = self.get_root_id(self.rootNumber)
            self.area_df = self.stof.get_kb_oodi_dd(root_id)
        #plt.clf()

    def home_button(self, *args, **kwargs):
        self.show_plot()

    def get_root_id(self, index):
        root = self.roots_df.iloc[index]
        return root['id']

    def get_mp_ids(self):
        return self.area_df.measurepoint_id.unique()

    def get_area_name(self, index):
        root = self.roots_df.iloc[index]
        return root['area_name']

    def save_errors(self):
        for rootIdx in range(0, self.roots_df.shape[0]):
            print "area: " + str(rootIdx) + "/" + str(self.roots_df.shape[0])
            rootId = self.get_root_id(rootIdx)
            self.area_df = self.stof.get_kb_oodi_dd(rootId)
            area_name = self.get_area_name(rootIdx)
            max_n = 50
            mpLen = len(self.get_mp_ids())
            e = np.zeros([mpLen, max_n])
            polyInt = PolyInterpolation(precision=2)
            for idx, mpid in enumerate(self.get_mp_ids()):
                meas_df = self.area_df[self.area_df.measurepoint_id == mpid]
                meas_df = meas_df[[self.xval, 'mep_uit']]
                meas_df = meas_df.dropna()
                meas_df.columns = ['x', 'y']
                if (meas_df.shape[0] <= 1):
                    continue
                (m, s) = stats.mean_std(meas_df['y'])
                meas_df = meas_df[meas_df.y < m + 2 * s]
                meas_df = meas_df[meas_df.y > m - 2 * s]
                polyInt.set_t(meas_df['x'])
                polyInt.set_y(meas_df['y'])
                for n in range(0, max_n):
                    coefs = polyInt.find_coefs(n)
                    e[idx, n] = polyInt.avg_dist_reg(coefs[0:n], precision=2)
                #e[idx, :] = (e[idx, :] - np.min(e[idx, :])) / (np.max(e[idx, :]) - np.min(e[idx, :]))
            np.savetxt(area_name + '.csv', e, delimiter=',')

    def save_errors_LOOCV(self):
        polyInt = PolyInterpolation(precision=2)
        for rootIdx in range(20, self.roots_df.shape[0]):
            print
            rootId = self.get_root_id(rootIdx)
            self.area_df = self.stof.get_kb_oodi_dd(rootId)
            area_name = self.get_area_name(rootIdx)
            max_n = 50
            mpLen = len(self.get_mp_ids())
            e = np.zeros([mpLen, max_n])
            for mpointIdx, mpid in enumerate(self.get_mp_ids()):
                meas_df = self.area_df[self.area_df.measurepoint_id == mpid]
                meas_df = meas_df[[self.xval, self.yval]]
                meas_df = meas_df.dropna()
                meas_df.columns = ['x', 'y']
                if (meas_df.shape[0] <= 2):
                    print area_name + ": " + str(rootIdx + 1) + "/" + str(
                        self.roots_df.shape[0]
                    ) + ", mp: " + str(mpointIdx + 1) + "/" + str(
                        mpLen) + " - ommited, has too little measurements"
                    continue
                (m, s) = stats.mean_std(meas_df['y'])
                meas_df = meas_df[meas_df.y < m + 2 * s]
                meas_df = meas_df[meas_df.y > m - 2 * s]
                if (meas_df.shape[0] <= 2):
                    print area_name + ": " + str(rootIdx + 1) + "/" + str(
                        self.roots_df.shape[0]
                    ) + ", mp: " + str(mpointIdx + 1) + "/" + str(
                        mpLen) + " - ommited, has too little measurements"
                    continue
                print area_name + ": " + str(rootIdx + 1) + "/" + str(
                    self.roots_df.shape[0]) + ", mp: " + str(
                        mpointIdx + 1) + "/" + str(mpLen)
                measIdx = 0
                for test_row in meas_df.iterrows():
                    polyInt.set_t(np.array(meas_df['x']))
                    polyInt.set_y(np.array(meas_df['y']))
                    testT = polyInt.t[measIdx]
                    testY = np.array(polyInt.y)[measIdx]
                    polyInt.t = np.delete(polyInt.t, measIdx)
                    polyInt.y = np.delete(polyInt.y, measIdx)
                    for n in range(0, max_n):
                        coefs = polyInt.find_coefs(n)
                        p = float(10.**-2)
                        yHat = polyInt.get_y_hat(coefs, precision=2)
                        yFit = np.interp(testT, np.arange(-1.0, 1. + p, p),
                                         yHat)
                        e[mpointIdx, n] += np.abs(testY - yFit)
                    measIdx += 1
                e[mpointIdx, :] = e[mpointIdx, :] / float(meas_df.shape[0])
            np.savetxt(area_name + '.csv', e, delimiter=',')

    def show_plot(self):
        #plt.clf()
        area_name = self.get_area_name(self.rootNumber)
        max_n = 25
        mpLen = len(self.get_mp_ids())
        e = np.zeros([mpLen, max_n])
        polyInt = PolyInterpolation(precision=2)
        for idx, mpid in enumerate(self.get_mp_ids()):
            print str(idx) + "/" + str(mpLen)
            meas_df = self.area_df[self.area_df.measurepoint_id == mpid]
            meas_df = meas_df[[self.xval, 'mep_uit']]
            meas_df = meas_df.dropna()
            meas_df.columns = ['x', 'y']
            if (meas_df.shape[0] <= 1):
                continue
            (m, s) = stats.mean_std(meas_df['y'])
            meas_df = meas_df[meas_df.y < m + 2 * s]
            meas_df = meas_df[meas_df.y > m - 2 * s]
            polyInt.set_t(meas_df['x'])
            polyInt.set_y(meas_df['y'])

            coefs = polyInt.find_coefs(max_n)
            for n in range(0, max_n):
                e[idx, n] = polyInt.avg_dist_reg(coefs[0:n], precision=2)
            e[idx, :] = (e[idx, :] - np.min(e[idx, :])) / (np.max(e[idx, :]) -
                                                           np.min(e[idx, :]))
            #plt.plot(range(0, max_n), e[idx,:])
        plt.plot(range(0, max_n), np.mean(e, axis=0))
        plt.title(area_name)
        plt.show()
Exemplo n.º 4
0
class KB_plotter(object):
    def __init__(self, xval='date_float'):
        self.xval = xval
        self.rootNumber = 0
        self.mpNumber = 0
        self.stof = DB()
        self.roots_df = self.stof.get_kb_roots()
        root_id = self.get_root_id(self.rootNumber)
        self.area_df = self.stof.get_kb_oodi_dd(root_id)
        self.meas_df = self.area_df[self.area_df.measurepoint_id ==
                                    self.get_mp_ids()[0]]
        NavigationToolbar2.forward = self.next_button_area
        NavigationToolbar2.back = self.back_button_area
        NavigationToolbar2.forward_mp = self.next_button_mp
        NavigationToolbar2.back_mp = self.back_button_mp
        NavigationToolbar2.toolitems = NavigationToolbar2.toolitems + (
            ('Back mp', 'Back to  previous mp', 'back', 'back_mp'),
            ('Forward mp', 'Forward to next mp', 'forward', 'forward_mp'))

    def next_button_area(self, *args, **kwargs):
        if self.rootNumber != self.roots_df.shape[0] - 1:
            self.rootNumber = self.rootNumber + 1
            root_id = self.get_root_id(self.rootNumber)
            self.area_df = self.stof.get_kb_oodi_dd(root_id)
            self.meas_df = self.area_df[self.area_df.measurepoint_id ==
                                        self.get_mp_ids()[0]]
            self.mpNumber = 0
        self.show_plot()

    def back_button_area(self, *args, **kwargs):
        if self.rootNumber != 0:
            self.rootNumber = self.rootNumber - 1
            root_id = self.get_root_id(self.rootNumber)
            self.area_df = self.stof.get_kb_oodi_dd(root_id)
            self.meas_df = self.area_df[self.area_df.measurepoint_id ==
                                        self.get_mp_ids()[0]]
            self.mpNumber = 0
        self.show_plot()

    def next_button_mp(self, *args, **kwargs):
        if self.mpNumber != self.get_mp_ids().shape[0] - 1:
            self.mpNumber = self.mpNumber + 1
            mpid = self.get_mp_ids()[self.mpNumber]
            self.meas_df = self.area_df[self.area_df.measurepoint_id == mpid]
        self.show_plot()

    def back_button_mp(self, *args, **kwargs):
        if self.mpNumber != 0:
            self.mpNumber = self.mpNumber - 1
            mpid = self.get_mp_ids()[self.mpNumber]
            self.meas_df = self.area_df[self.area_df.measurepoint_id == mpid]
        self.show_plot()

    def get_root_id(self, index):
        root = self.roots_df.iloc[index]
        return root['id']

    def get_mp_ids(self):
        return self.area_df.measurepoint_id.unique()

    def get_area_name(self, index):
        root = self.roots_df.iloc[index]
        return root['area_name']

    def show_plot(self):
        plt.clf()
        print self.meas_df['date_float'] - np.min(self.meas_df['date_float'])
        print self.meas_df['mep_uit']
        area_name = self.get_area_name(self.rootNumber)
        mp_name = str(self.get_mp_ids()[self.mpNumber])
        plt.suptitle(area_name + " - " + mp_name)
        self.sub_plot(221, "MEP-aan", self.meas_df[[self.xval, 'mep_aan']],
                      True, True, False, True)
        self.sub_plot(222, "MEP-uit", self.meas_df[[self.xval, 'mep_uit']],
                      True, True, False, True)
        self.sub_plot(223, "Diff", self.meas_df[[self.xval, 'difference']],
                      True, True, False, True)
        self.sub_plot(224, "I-Flens", self.meas_df[[self.xval, 'iflens']],
                      True, True, False, True)
        plt.show()

    def sub_plot(self,
                 loc,
                 name,
                 mdf,
                 mean=False,
                 std=False,
                 sin=False,
                 lin=False):
        mdf.columns = ['x', 'y']
        if (mdf['y'].isnull().all()):
            return
        (m, s) = stats.mean_std(mdf['y'])
        mdf = mdf[mdf.y < m + 2 * s]
        mdf = mdf[mdf.y > m - 2 * s]
        (m, s) = stats.mean_std(mdf['y'])
        plt.subplot(loc)
        plt.title(name)
        plt.scatter(mdf['x'], mdf['y'])
        if mean: plt.plot([np.min(mdf['x']), np.max(mdf['x'])], [m, m], 'g--')
        if std:
            plt.plot([np.min(mdf['x']), np.max(mdf['x'])], [m + s, m + s],
                     'r:')
            plt.plot([np.min(mdf['x']), np.max(mdf['x'])], [m - s, m - s],
                     'r:')

        if sin:
            ls = np.linspace(np.min(mdf['x']), np.max(mdf['x']), 100)
            sin_pred = reg.fit_sin(mdf['x'], mdf['y'])
            plt.plot(ls, sin_pred['fitfunc'](ls))
        if lin:
            try:
                pred = reg.fit_lin(mdf[['x']], mdf['y'])
                plt.plot(mdf['x'], pred)
            except:
                return
Exemplo n.º 5
0
class AreaModel(object):
    def __init__(self, save_areas=True):
        self.save_areas = save_areas
        self.stof = DB()
        self.roots_df = self.stof.get_kb_roots()
        self.areas = dict()

    def get_area_id(self, area_idx):
        """
        Gets the id of an area by its index in the dataframe
        :param area_idx: integer of the index
        :return: the area id
        :rtype: int
        """
        root = self.roots_df.iloc[area_idx]
        return root['id']

    def get_area_df(self, area_idx):
        """
        Gets the area DataFrame by its index
        :param area_idx: index of the area
        :return: The area DataFrame
        :rtype: pandas.DataFrame
        """
        area_id = self.get_area_id(area_idx)
        if not self.save_areas: return self.stof.get_kb_oodi_dd(area_id)
        if area_id in self.areas:
            return self.areas[area_id]
        self.areas[area_id] = self.stof.get_kb_oodi_dd(area_id)
        return self.areas[area_id]

    def get_mp_df(self, area_idx, mp_idx):
        """
        Gets the measurepoint Dataframe by its index
        :param area_idx: index of the area
        :param mp_idx: index of the measure point
        :return: The measure point DataFrame
        :rtype: pandas.DataFrame
        """
        area_df = self.get_area_df(area_idx)
        return area_df[area_df.measurepoint_id == self.get_mp_ids(area_idx)
                       [mp_idx]]

    def get_mp_ids(self, area_idx):
        """
        Get a list of measure point indexes for the given area
        :param area_idx: index of the area
        :return: A list with idexes of measurepoints
        :rtype: list
        """
        return self.get_area_df(area_idx).measurepoint_id.unique()

    def get_area_name(self, area_idx):
        """
        gets the name of an area by its index
        :param area_idx: index of the area
        :return: The name as a string of the area
        :rtype: str
        """
        root = self.roots_df.iloc[area_idx]
        return root['area_name']

    def get_number_of_measurments(self, area_idx, mp_idx):
        """
        Gets the number of measurements in the measure point
        :param area_idx: index of the area
        :param mp_idx: index of the measure point
        :return: number of measurments
        :rtype: int
        """
        return self.get_mp_df(area_idx, mp_idx).shape[0]

    def get_number_of_mps(self, area_idx):
        """
        Gets the number of measure points in the area
        :param area_idx: index of the area
        :return: number of measure points
        :rtype: int
        """
        return self.get_mp_ids(area_idx).shape[0]

    def get_number_of_areas(self):
        """
        Gets the number of areas in the database
        :return: number of areas
        :rtype: int
        """
        return self.roots_df.shape[0]

    def prepare_meas_df(self, meas_df, xval='date_float', yval='mep_uit'):
        """
        Prepares the measure point DataFrame by removing outliers and checking if it has more than 2 points
        :param meas_df: the measurement DataFrame
        :param xval: name of the x value
        :param yval: name of the y value
        :return: the measurement DataFrame of None if it has less than 2 datapoints
        :rtype: pandas.DataFrame, None
        """
        meas_df = meas_df[[xval, yval]]
        meas_df = meas_df.dropna()
        meas_df.columns = ['x', 'y']
        if meas_df.shape[0] <= 2:
            return None
        (m, s) = Stats.mean_std(meas_df['y'])
        meas_df = meas_df[meas_df.y < m + 2 * s]
        meas_df = meas_df[meas_df.y > m - 2 * s]
        if meas_df.shape[0] <= 2:
            return None
        return meas_df