def test_standby(self): df = datasets.get('elec_power_min_1sensor') res = og.analysis.standby(df, 'D') self.assertEqual(res.index.tz.zone, 'Europe/Brussels') self.assertRaises(EmptyDataFrameError, og.analysis.standby, pd.DataFrame)
def test_strange_names(self): df = datasets.get('gas_2016_hour') df_month = df.resample('MS').sum() df_month.rename(columns={'d5a7': '3*tempête !'}, inplace=True) mvlr = og.MultiVarLinReg(df_month, '313b', p_max=0.04) mvlr.do_analysis() self.assertTrue(hasattr(mvlr, 'list_of_fits'))
def test_plot(self): df = datasets.get('gas_2016_hour') df_month = df.resample('MS').sum() mvlr = og.MultiVarLinReg(df_month, '313b', p_max=0.04) with mock.patch.object(plt_mocked, 'subplots', return_value=(fig_mock, ax_mock)): mvlr.plot()
def test_plot(self): df = datasets.get('gas_2016_hour') df_month = df.resample('MS').sum() mvlr = og.MultiVarLinReg(df_month, '313b', p_max=0.04) mvlr.do_analysis() with mock.patch.object(plt_mocked, 'subplots', return_value=(fig_mock, ax_mock)): mvlr.plot()
def test_cross_validation(self): df = datasets.get('gas_2016_hour') df_month = df.resample('MS').sum() mvlr = og.MultiVarLinReg(df_month, '313b', p_max=0.04, cross_validation=True) self.assertTrue(hasattr(mvlr, 'list_of_fits'))
def test_standby_with_time_window(self): df = datasets.get('elec_power_min_1sensor') res = og.analysis.standby(df, 'D', time_window=('01:00', '06:00')) self.assertEqual(res.index.tz.zone, 'Europe/Brussels') self.assertEqual(res.squeeze().to_json(), '{"1507327200000":61.739999936,"1507413600000":214.9799999222,"1507500000000":53.0399997951,"1507586400000":55.7399999164,"1507672800000":59.94000006,"1507759200000":69.4800002407,"1507845600000":56.8200000236,"1507932000000":54.1799997864,"1508018400000":54.779999801,"1508104800000":54.7199997772,"1508191200000":98.5199999576,"1508277600000":55.6799999066,"1508364000000":53.9399997052,"1508450400000":109.5599999931,"1508536800000":144.3600001093,"1508623200000":52.7999997279}') res = og.analysis.standby(df, 'D', time_window=('22:00', '06:00')) self.assertEqual(res.index.tz.zone, 'Europe/Brussels') self.assertEqual(res.squeeze().to_json(), '{"1507327200000":61.739999936,"1507413600000":119.2800000636,"1507500000000":53.0399997951,"1507586400000":55.7399999164,"1507672800000":59.94000006,"1507759200000":69.4800002407,"1507845600000":56.8200000236,"1507932000000":54.1799997864,"1508018400000":54.779999801,"1508104800000":54.7199997772,"1508191200000":98.5199999576,"1508277600000":55.6799999066,"1508364000000":53.9399997052,"1508450400000":96.3000000408,"1508536800000":133.9200000744,"1508623200000":52.7999997279}')
def test_alternative_metrics(self): df = datasets.get('gas_2016_hour') df_month = df.resample('MS').sum() mvlr = og.MultiVarLinReg(df_month, '313b', p_max=0.04) best_rsquared = mvlr.find_best_rsquared(mvlr.list_of_fits) best_akaike = mvlr.find_best_akaike(mvlr.list_of_fits) best_bic = mvlr.find_best_bic(mvlr.list_of_fits) self.assertEqual(best_rsquared, best_akaike) self.assertEqual(best_rsquared, best_bic)
def test_predict(self): df = datasets.get('gas_2016_hour') df_month = df.resample('MS').sum() df_month.rename(columns={'d5a7': '3*tempête !'}, inplace=True) mvlr = og.MultiVarLinReg(df_month, '313b', p_max=0.04) mvlr.add_prediction() self.assertListEqual(mvlr.df.columns.tolist(), df_month.columns.tolist() + ['predicted', 'interval_l', 'interval_u'])
def test_alternative_metrics(self): df = datasets.get('gas_2016_hour') df_month = df.resample('MS').sum() mvlr = og.MultiVarLinReg(df_month, '313b', p_max=0.04) best_rsquared = mvlr.find_best_rsquared(mvlr.list_of_fits) best_akaike = mvlr.find_best_akaike(mvlr.list_of_fits) best_bic = mvlr.find_best_bic(mvlr.list_of_fits) self.assertEqual(best_rsquared, best_akaike) self.assertEqual(best_rsquared, best_bic)
def test_raises(self): df = datasets.get('gas_2016_hour') df_month = df.resample('MS').sum() mvlr = og.MultiVarLinReg(df_month, '313b', p_max=0.04) self.assertRaises(UnboundLocalError, mvlr.add_prediction) try: x = mvlr.list_of_fits self.assertTrue(False) except UnboundLocalError: self.assertTrue(True)
def test_predict(self): df = datasets.get('gas_2016_hour') df_month = df.resample('MS').sum() df_month.rename(columns={'d5a7': '3*tempête !'}, inplace=True) mvlr = og.MultiVarLinReg(df_month, '313b', p_max=0.04) mvlr.do_analysis() mvlr.add_prediction() self.assertListEqual(mvlr.df.columns.tolist(), df_month.columns.tolist() + ['predicted', 'interval_l', 'interval_u'])
def test_prune(self): "Create overfitted model and prune it" df = datasets.get('gas_2016_hour') df_month = df.resample('MS').sum() mvlr = og.MultiVarLinReg(df_month, '313b') self.assertTrue("Q('d5a7')" in mvlr.fit.model.exog_names) pruned = mvlr._prune(mvlr.fit, 0.05) self.assertTrue("Q('d5a7')" in pruned.model.exog_names) pruned = mvlr._prune(mvlr.fit, 0.0001) self.assertFalse("Q('d5a7')" in pruned.model.exog_names)
def test_prune(self): "Create overfitted model and prune it" df = datasets.get('gas_2016_hour') df_month = df.resample('MS').sum() mvlr = og.MultiVarLinReg(df_month, '313b') self.assertTrue("Q('d5a7')" in mvlr.fit.model.exog_names) pruned = mvlr._prune(mvlr.fit, 0.05) self.assertTrue("Q('d5a7')" in pruned.model.exog_names) pruned = mvlr._prune(mvlr.fit, 0.0001) self.assertFalse("Q('d5a7')" in pruned.model.exog_names)
def test_pickle_round_trip(self): "Pickle, unpickle and check results" df = datasets.get('gas_2016_hour') df_month = df.resample('MS').sum().loc['2016', :] df_training = df_month.iloc[:-1, :] df_pred = df_month.iloc[[-1], :] mvlr = og.MultiVarLinReg(df_training, '313b', p_max=0.04) mvlr.do_analysis() df_pred_95_orig = mvlr._predict(mvlr.fit, df=df_pred) s = pickle.dumps(mvlr) m = pickle.loads(s) self.assertTrue(hasattr(m, 'list_of_fits')) df_pred_95_roundtrip = m._predict(m.fit, df=df_pred) self.assertAlmostEqual(df_pred_95_orig.loc['2016-12-01', 'predicted'], df_pred_95_roundtrip.loc['2016-12-01', 'predicted'])
def test_prediction(self): df = datasets.get('gas_2016_hour') df_month = df.resample('MS').sum().loc['2016', :] df_training = df_month.iloc[:-1, :] df_pred = df_month.iloc[[-1], :] mvlr = og.MultiVarLinReg(df_training, '313b', p_max=0.04) df_pred_95 = mvlr._predict(mvlr.fit, df=df_pred) mvlr.confint = 0.98 df_pred_98 = mvlr._predict(mvlr.fit, df=df_pred) self.assertAlmostEqual(df_pred_95.loc['2016-12-01', 'predicted'], df_pred_98.loc['2016-12-01', 'predicted']) self.assertTrue(df_pred_98.loc['2016-12-01', 'interval_u'] > df_pred_95.loc['2016-12-01', 'interval_u']) self.assertTrue(df_pred_98.loc['2016-12-01', 'interval_l'] < df_pred_95.loc['2016-12-01', 'interval_l']) # check limitation to zero mvlr.allow_negative_predictions = False mvlr.add_prediction() self.assertTrue(mvlr.df['predicted'].min() >= 0)
def test_prune(self): "Create overfitted model and prune it" df = datasets.get('gas_2016_hour') df_month = df.resample('MS').sum() mvlr = og.MultiVarLinReg(df_month, '313b') mvlr.do_analysis() self.assertTrue("ba14" in mvlr.fit.model.exog_names) pruned = mvlr._prune(mvlr.fit, 0.05) self.assertTrue("ba14" in pruned.model.exog_names) pruned = mvlr._prune(mvlr.fit, 0.00009) # with this value, both x will be removed, which is a bit counter-intuitive because initially only ba14 has a pvalue > p_max. self.assertFalse("ba14" in pruned.model.exog_names) self.assertFalse("d5a7" in pruned.model.exog_names) mvlr = og.MultiVarLinReg(df_month, '313b', p_max=0.00009) mvlr.do_analysis() self.assertFalse("ba14" in mvlr.fit.model.exog_names) self.assertFalse("d5a7" in mvlr.fit.model.exog_names)
def test_prediction(self): df = datasets.get('gas_2016_hour') df_month = df.resample('MS').sum().loc['2016', :] df_training = df_month.iloc[:-1, :] df_pred = df_month.iloc[[-1], :] mvlr = og.MultiVarLinReg(df_training, '313b', p_max=0.04) mvlr.do_analysis() df_pred_95 = mvlr._predict(mvlr.fit, df=df_pred) mvlr.confint = 0.98 df_pred_98 = mvlr._predict(mvlr.fit, df=df_pred) self.assertAlmostEqual(df_pred_95.loc['2016-12-01', 'predicted'], df_pred_98.loc['2016-12-01', 'predicted']) self.assertTrue(df_pred_98.loc['2016-12-01', 'interval_u'] > df_pred_95.loc['2016-12-01', 'interval_u']) self.assertTrue(df_pred_98.loc['2016-12-01', 'interval_l'] < df_pred_95.loc['2016-12-01', 'interval_l']) # check limitation to zero mvlr.allow_negative_predictions = False mvlr.add_prediction() self.assertTrue(mvlr.df['predicted'].min() >= 0)
def test_standby(self): df = datasets.get('elec_power_min_1sensor') res = og.analysis.standby(df, 'D') self.assertEqual(res.index.tz.zone, 'Europe/Brussels') self.assertRaises(EmptyDataFrame, og.analysis.standby, pd.DataFrame)
def test_count_peaks(self): df = datasets.get('gas_dec2016_min') ts = df['313b'].head(100) count = og.analysis.count_peaks(ts) self.assertEqual(count, 13)
def test_init(self): df = datasets.get('gas_2016_hour') df_month = df.resample('MS').sum() mvlr = og.MultiVarLinReg(df_month, '313b', p_max=0.04) self.assertTrue(hasattr(mvlr, 'list_of_fits'))
def test_count_peaks(self): df = datasets.get('gas_dec2016_min') ts = df['313b'].head(100) count = og.analysis.count_peaks(ts) self.assertEqual(count, 13)
def test_cross_validation(self): df = datasets.get('gas_2016_hour') df_month = df.resample('MS').sum() mvlr = og.MultiVarLinReg(df_month, '313b', p_max=0.04, cross_validation=True) mvlr.do_analysis() self.assertTrue(hasattr(mvlr, 'list_of_fits'))
def test_init(self): df = datasets.get('gas_2016_hour') df_month = df.resample('MS').sum() mvlr = og.MultiVarLinReg(df_month, '313b', p_max=0.04) mvlr.do_analysis() self.assertTrue(hasattr(mvlr, 'list_of_fits'))