示例#1
0
class DatabaseTest(unittest.TestCase):
    engine = create_engine("sqlite:///python_models.db", echo=True)
    meta = MetaData()

    def setUp(self):
        self.df = pd.DataFrame(np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]]),
                               columns=['a', 'b', 'c'])
        self._data = Data("unittest")
        self._data.add_test_table('unittest')

    def test_csv_to_df(self):
        self.df.to_csv('./tests/unittest.csv')
        self._data.csv_to_df()
        self.assertEqual(type(pd.DataFrame()), type(self._data.df))

    def test_csv_to_db(self):
        self.assertTrue(self._data.csv_to_db())

    def test_fit_model(self):
        with self.assertRaises(RegressionException):
            self._data.fit_model(1, Data('ideal'), 'real')
        self.df = pd.DataFrame(
            [[1.5555555, 2.55555555], [4.5555555, 5.5555555]],
            columns=['x', 'y1'])
        self.df.to_csv('./tests/unittest.csv')
        self._data.csv_to_df()
        with self.assertRaises(Exception):
            self._data.fit_model(1, None, 'linear')
        # test print_table arg
        with open('./datasets/ideal.csv', 'r') as idf:
            ideal = Data('ideal', _create=False)
            ideal.csv_to_df()
            self.assertFalse(ideal.is_empty(), 'df obj should be populated')
            model = self._data.fit_model(1,
                                         ideal,
                                         'linear',
                                         print_table=True,
                                         table_name='./tests/unittest')
            self.assertEqual(type(model),
                             type(Model(self.df['x'], self.df['y1'], 1)))
            self.assertEqual(model.x[0], self.df['x'][0])

    def tearDown(self):
        try:
            self._data.drop_test_table()
        except AttributeError:
            print('table not dropped')
示例#2
0
class Interface:
    """


    """
    def __init__(self, **kwargs):
        """
        :keyword map_train: run Model functions entirely and
        plot matched ideal vs test data

        :keyword to_db: create SQLite db table for created Data obj

        :keyword plt_type: which type of fit algorithm to run, 'linear' or 'best fit'

        :keyword with_rmse: include rmse values in graph

        :keyword print_table: save stats from model comparisons as a .pdf table

        :keyword plot: plot data and save

        :keyword plot_training_subplots: display and save training data as subplots

        :keyword compare_models: shortcut to just plot comparison of fitted models,
        values is dict of fitted model dicts
        """

        to_db = kwargs.get('to_db', True)
        _create = kwargs.get('create_tables', True)

        self.train = Data('training_data', to_db=to_db)
        self.ideal = Data('ideal_functions', to_db=to_db)
        self.test = Data('test_data')

        self.train_master = self.train.csv_to_df()
        self.train_graph = Graph("Training Data", df=self.train.csv_to_df())
        self.ideal_fn_dict = {'x': self.train.df['x']}

        self._n = kwargs.get('_n', {})
        self.plt_type = kwargs.get('plt_type', 'best fit')
        self.with_rmse = kwargs.get('with_rmse', True)
        self.print_table = kwargs.get('print_table', True)
        self.plot = kwargs.get('plot', True)

        map_train = kwargs.get('map_train', True)
        continue_matching = kwargs.get('continue_matching', True)

        self.models = dict()
        self.models_master_1 = dict()
        self.models_master_2 = dict()
        self.models_master_3 = dict()
        self.result = tuple()

        global model

        if 'compare_models' in kwargs.keys():
            models = kwargs.get('compare_models')
            self.train_graph.make_subplots('Model Comparison',
                                           models={
                                               'm1': models['m1'],
                                               'm2': models['m2'],
                                               'm3': models['m3']
                                           })

        if 'plot_training_subplots' in kwargs.keys():
            self.train_graph.make_subplots(self.train_graph.title)

        if continue_matching:

            check_n_size(self._n)
            idx = 1
            while self._n['y1']:
                n = {
                    'y1': self._n['y1'].pop(0),
                    'y2': self._n['y2'].pop(0),
                    'y4': self._n['y4'].pop(0)
                }
                self._fit(n, idx)
                idx += 1

            self.ideal_fn_df = pd.DataFrame(data=self.ideal_fn_dict)
            self.ideal_fn_df = self.ideal_fn_df.set_index('x')

            self.test_df = self.test.csv_to_df()
            test_model = Model(self.test_df['x'],
                               self.test_df['y'],
                               1,
                               df=self.test_df)

            finals = test_model.match_ideal_functions(self.ideal_fn_df,
                                                      self.train_master,
                                                      self.models,
                                                      map_train=map_train)

            if 'run_complete' in kwargs.keys():
                self.test.df_to_db(finals[0])
            else:
                self.result = finals

    def _fit(self, n, idx):

        _m = f'm{idx}'
        new_models = dict()

        for i in range(1, 5):

            col = f'y{i}'
            _if, _max, _bf = f'y{i}_if', f'y{i}_max_err', f'y{i}_best_fit'

            if i != 3:
                model = self.train.fit_model(i,
                                             self.ideal,
                                             'poly.fit',
                                             order=n[col],
                                             print_table=self.print_table)
            else:
                model = self.train.fit_model(i,
                                             self.ideal,
                                             'linear',
                                             print_table=self.print_table)

            new_models[col] = model
            self.models[col] = model

            self.ideal_fn_dict[model.ideal_col] = model.ideal_col_array
            self.train_master[_if] = model.ideal_col
            self.train_master[_max] = model.max_dev
            self.train_master[_bf] = model

            if self.plot:
                self.train_graph.plot_model(model,
                                            plt_type=self.plt_type,
                                            with_rmse=self.with_rmse)

        if idx == 1:
            self.models_master_1[_m] = new_models
        if idx == 2:
            self.models_master_2[_m] = new_models
        if idx == 3:
            self.models_master_3[_m] = new_models