示例#1
0
 def test_fit_changepoint_not_in_history(self):
     train = DATA[(DATA['ds'] < '2013-01-01') | (DATA['ds'] > '2014-01-01')]
     train[(train['ds'] > '2014-01-01')] += 20
     future = pd.DataFrame({'ds': DATA['ds']})
     forecaster = Prophet(changepoints=['2013-06-06'])
     forecaster.fit(train)
     forecaster.predict(future)
示例#2
0
def prophetForecast(rawData, startDate, modelDir, partitions):
	"""Forecasting with fbprophet"""
	from fbprophet import Prophet
	from fbprophet.diagnostics import cross_validation

	partitions = int(partitions)
	# initiate model
	prophet = Prophet()

	# put dates in df
	dates = pd.date_range(start=startDate, periods=len(rawData), freq="H")
	input_df = pd.DataFrame(rawData, columns=["y", "temp"])
	input_df["ds"] = dates.to_pydatetime()
	input_df.to_csv(pJoin(modelDir, "prophetin.csv"))

	# give prophet the input data
	with suppress_stdout_stderr():
		prophet.fit(input_df)

		# determine partition length for the cross-validation
	total_hours = len(input_df.ds)
	hp = total_hours // partitions  # horizon and period
	init = total_hours % partitions  # total_hours - hp * (partitions - 1)

	# train prophet w/ those partitions
	# take a moment to appreciate this stupid way to pass the durations
	out_df = cross_validation(
		prophet,
		initial="%d hours" % init,
		horizon="%d hours" % hp,
		period="%d hours" % hp,
	)
	out_df.to_csv(pJoin(modelDir, "prophetout.csv"))
	return (list(out_df.yhat), list(out_df.yhat_lower), list(out_df.yhat_upper))
示例#3
0
 def test_cross_validation(self):
     m = Prophet()
     m.fit(self.__df)
     # Calculate the number of cutoff points(k)
     horizon = pd.Timedelta('4 days')
     period = pd.Timedelta('10 days')
     initial = pd.Timedelta('115 days')
     df_cv = diagnostics.cross_validation(
         m, horizon='4 days', period='10 days', initial='115 days')
     self.assertEqual(len(np.unique(df_cv['cutoff'])), 3)
     self.assertEqual(max(df_cv['ds'] - df_cv['cutoff']), horizon)
     self.assertTrue(min(df_cv['cutoff']) >= min(self.__df['ds']) + initial)
     dc = df_cv['cutoff'].diff()
     dc = dc[dc > pd.Timedelta(0)].min()
     self.assertTrue(dc >= period)
     self.assertTrue((df_cv['cutoff'] < df_cv['ds']).all())
     # Each y in df_cv and self.__df with same ds should be equal
     df_merged = pd.merge(df_cv, self.__df, 'left', on='ds')
     self.assertAlmostEqual(
         np.sum((df_merged['y_x'] - df_merged['y_y']) ** 2), 0.0)
     df_cv = diagnostics.cross_validation(
         m, horizon='4 days', period='10 days', initial='135 days')
     self.assertEqual(len(np.unique(df_cv['cutoff'])), 1)
     with self.assertRaises(ValueError):
         diagnostics.cross_validation(
             m, horizon='10 days', period='10 days', initial='140 days')
def add_prophet_features(df_shop):
    df = df_shop[['day', 'pays_count']].rename(columns={'day': 'ds', 'pays_count': 'y'})

    results = []
    biweek_max = df_shop.biweek_id.max()

    for m in range(biweek_max - 1, 0, -1):
        train_idx = df_shop.biweek_id >= m
        df_train = df[train_idx]

        not_null = ~df_train.y.isnull()
        if not_null.sum() < 7:
            continue

        p = Prophet().fit(df_train)
        future = p.make_future_dataframe(14, include_history=False)
        pred = p.predict(future)
        results.append(pred)

    df_res = pd.concat(results)
    df_res.columns = ['prophet_%s' % c for c in pred.columns]

    df_res = df_shop.merge(df_res, how='left', left_on='day', right_on='prophet_ds')
    del df_res['prophet_t'], df_res['prophet_ds']
    
    df_res.drop_duplicates('days_from_beginning', keep='last', inplace=1)

    if len(df_res) != len(df_shop):
        raise Exception("size doesn't match")

    return df_res
def get_predictions(validate, train):
    total_dates = train['date'].unique()
    result = pd.DataFrame(columns=['id', 'unit_sales'])
    problem_pairs = []
    example_items = [510052, 1503899, 2081175, 1047674, 215327, 1239746, 765520, 1463867, 1010755, 1473396]
    store47examples = validate.loc[(validate.store_nbr == 47) & (validate.item_nbr.isin(example_items))]
    print("ONLY PREDICTING ITEMS {} IN STORE NO. 47!".format(example_items))
    for name, y in store47examples.groupby(['item_nbr']):
    # for name, y in validate.groupby(['item_nbr', 'store_nbr']):
        item_nbr=int(name)
        store_nbr = 47
        df = train[(train.item_nbr==item_nbr)&(train.store_nbr==store_nbr)]
        CV_SIZE = 16 #if you make it bigger, fill missing dates in cv with 0 if any
        TRAIN_SIZE = 365
        total_dates = train['date'].unique()
        df = fill_missing_date(df, total_dates)
        df = df.sort_values(by=['date'])
        X = df[-TRAIN_SIZE:]
        X = X[['date','unit_sales']]
        X.columns = ['ds', 'y']
        m = Prophet(yearly_seasonality=True)
        try:
            m.fit(X)
        except ValueError:
            print("problem for this item store pair")
            problem_pairs.append((item_nbr, store_nbr))
            continue
        future = m.make_future_dataframe(periods=CV_SIZE)
        pred = m.predict(future)
        data = pred[['ds','yhat']].tail(CV_SIZE)
        data = pred[['ds','yhat']].merge(y, left_on='ds', right_on='date')
        data['unit_sales'] = data['yhat'].fillna(0).clip(0, 999999)
        result = result.append(data[['id', 'unit_sales']])
    return (result, problem_pairs)
示例#6
0
 def test_performance_metrics(self):
     m = Prophet()
     m.fit(self.__df)
     df_cv = diagnostics.cross_validation(
         m, horizon='4 days', period='10 days', initial='90 days')
     # Aggregation level none
     df_none = diagnostics.performance_metrics(df_cv, rolling_window=0)
     self.assertEqual(
         set(df_none.columns),
         {'horizon', 'coverage', 'mae', 'mape', 'mse', 'rmse'},
     )
     self.assertEqual(df_none.shape[0], 16)
     # Aggregation level 0.2
     df_horizon = diagnostics.performance_metrics(df_cv, rolling_window=0.2)
     self.assertEqual(len(df_horizon['horizon'].unique()), 4)
     self.assertEqual(df_horizon.shape[0], 14)
     # Aggregation level all
     df_all = diagnostics.performance_metrics(df_cv, rolling_window=1)
     self.assertEqual(df_all.shape[0], 1)
     for metric in ['mse', 'mape', 'mae', 'coverage']:
         self.assertEqual(df_all[metric].values[0], df_none[metric].mean())
     # Custom list of metrics
     df_horizon = diagnostics.performance_metrics(
         df_cv, metrics=['coverage', 'mse'],
     )
     self.assertEqual(
         set(df_horizon.columns),
         {'coverage', 'mse', 'horizon'},
     )
示例#7
0
    def test_holidays(self):
        holidays = pd.DataFrame({
            'ds': pd.to_datetime(['2016-12-25']),
            'holiday': ['xmas'],
            'lower_window': [-1],
            'upper_window': [0],
        })
        model = Prophet(holidays=holidays)
        df = pd.DataFrame({
            'ds': pd.date_range('2016-12-20', '2016-12-31')
        })
        feats = model.make_holiday_features(df['ds'])
        # 11 columns generated even though only 8 overlap
        self.assertEqual(feats.shape, (df.shape[0], 2))
        self.assertEqual((feats.sum(0) - np.array([1.0, 1.0])).sum(), 0)

        holidays = pd.DataFrame({
            'ds': pd.to_datetime(['2016-12-25']),
            'holiday': ['xmas'],
            'lower_window': [-1],
            'upper_window': [10],
        })
        feats = Prophet(holidays=holidays).make_holiday_features(df['ds'])
        # 12 columns generated even though only 8 overlap
        self.assertEqual(feats.shape, (df.shape[0], 12))
示例#8
0
    def test_fit(self):
        train = pd.DataFrame({
            'ds': np.array(['2012-05-18', '2012-05-20']),
            'y': np.array([38.23, 21.25])
        })

        forecaster = Prophet(mcmc_samples=1)
        forecaster.fit(train)
示例#9
0
    def test_fit_predict(self):
        N = DATA.shape[0]
        train = DATA.head(N // 2)
        future = DATA.tail(N // 2)

        forecaster = Prophet()
        forecaster.fit(train)
        forecaster.predict(future)
示例#10
0
    def test_fit_predict_no_seasons(self):
        N = DATA.shape[0]
        train = DATA.head(N // 2)
        future = DATA.tail(N // 2)

        forecaster = Prophet(weekly_seasonality=False, yearly_seasonality=False)
        forecaster.fit(train)
        forecaster.predict(future)
示例#11
0
    def test_fit_predict_no_changepoints(self):
        N = DATA.shape[0]
        train = DATA.head(N // 2)
        future = DATA.tail(N // 2)

        forecaster = Prophet(n_changepoints=0)
        forecaster.fit(train)
        forecaster.predict(future)
示例#12
0
 def test_fit_with_holidays(self):
     holidays = pd.DataFrame({
         'ds': pd.to_datetime(['2012-06-06', '2013-06-06']),
         'holiday': ['seans-bday'] * 2,
         'lower_window': [0] * 2,
         'upper_window': [1] * 2,
     })
     model = Prophet(holidays=holidays, uncertainty_samples=0)
     model.fit(DATA).predict()
示例#13
0
 def test_subdaily_holidays(self):
     holidays = pd.DataFrame({
         'ds': pd.to_datetime(['2017-01-02']),
         'holiday': ['special_day'],
     })
     m = Prophet(holidays=holidays)
     m.fit(DATA2)
     fcst = m.predict()
     self.assertEqual(sum(fcst['special_day'] == 0), 575)
示例#14
0
 def test_fit_predict_duplicates(self):
     N = DATA.shape[0]
     train1 = DATA.head(N // 2).copy()
     train2 = DATA.head(N // 2).copy()
     train2['y'] += 10
     train = train1.append(train2)
     future = pd.DataFrame({'ds': DATA['ds'].tail(N // 2)})
     forecaster = Prophet()
     forecaster.fit(train)
     forecaster.predict(future)
示例#15
0
    def test_override_n_changepoints(self):
        m = Prophet()
        history = DATA.head(20).copy()

        history = m.setup_dataframe(history, initialize_scales=True)
        m.history = history

        m.set_changepoints()
        self.assertEqual(m.n_changepoints, 15)
        cp = m.changepoints_t
        self.assertEqual(cp.shape[0], 15)
示例#16
0
    def test_get_zero_changepoints(self):
        m = Prophet(n_changepoints=0)
        N = DATA.shape[0]
        history = DATA.head(N // 2).copy()

        history = m.setup_dataframe(history, initialize_scales=True)
        m.history = history

        m.set_changepoints()
        cp = m.changepoints_t
        self.assertEqual(cp.shape[0], 1)
        self.assertEqual(cp[0], 0)
示例#17
0
def hello():
    print('Hello, world!')
    df = pd.read_csv(url)
    df['y'] = np.log(df['y'])
    df.head()
    m = Prophet()
    m.fit(df);
    future = m.make_future_dataframe(periods=365)
    future.tail()
    forecast = m.predict(future)
    forecast[['ds', 'yhat', 'yhat_lower', 'yhat_upper']].tail()
    return forecast.to_json(orient='table')
示例#18
0
 def test_cross_validation_default_value_check(self):
     m = Prophet()
     m.fit(self.__df)
     # Default value of initial should be equal to 3 * horizon
     df_cv1 = diagnostics.cross_validation(
         m, horizon='32 days', period='10 days')
     df_cv2 = diagnostics.cross_validation(
         m, horizon='32 days', period='10 days', initial='96 days')
     self.assertAlmostEqual(
         ((df_cv1['y'] - df_cv2['y']) ** 2).sum(), 0.0)
     self.assertAlmostEqual(
         ((df_cv1['yhat'] - df_cv2['yhat']) ** 2).sum(), 0.0)
示例#19
0
    def test_setup_dataframe(self):
        m = Prophet()
        N = DATA.shape[0]
        history = DATA.head(N // 2).copy()

        history = m.setup_dataframe(history, initialize_scales=True)

        self.assertTrue('t' in history)
        self.assertEqual(history['t'].min(), 0.0)
        self.assertEqual(history['t'].max(), 1.0)

        self.assertTrue('y_scaled' in history)
        self.assertEqual(history['y_scaled'].max(), 1.0)
示例#20
0
    def test_logistic_floor(self):
        m = Prophet(growth='logistic')
        N = DATA.shape[0]
        history = DATA.head(N // 2).copy()
        history['floor'] = 10.
        history['cap'] = 80.
        future = DATA.tail(N // 2).copy()
        future['cap'] = 80.
        future['floor'] = 10.
        m.fit(history, algorithm='Newton')
        self.assertTrue(m.logistic_floor)
        self.assertTrue('floor' in m.history)
        self.assertAlmostEqual(m.history['y_scaled'][0], 1.)
        fcst1 = m.predict(future)

        m2 = Prophet(growth='logistic')
        history2 = history.copy()
        history2['y'] += 10.
        history2['floor'] += 10.
        history2['cap'] += 10.
        future['cap'] += 10.
        future['floor'] += 10.
        m2.fit(history2, algorithm='Newton')
        self.assertAlmostEqual(m2.history['y_scaled'][0], 1.)
        fcst2 = m2.predict(future)
        fcst2['yhat'] -= 10.
        # Check for approximate shift invariance
        self.assertTrue((np.abs(fcst1['yhat'] - fcst2['yhat']) < 1).all())
示例#21
0
def run():
    journal = ledger.read_journal("./secret/ledger.dat")
    last_post = None
    amount = 0

    for post in journal.query(""):
        if last_post == None or post.date == last_post.date:
            if str(post.amount.commodity) != "£":
                continue
            amount = amount + post.amount
        else:
            print post.date, ",", amount
            amount = 0
        last_post = post

    df = pd.read_csv('./testing.csv')
    df['y'] = np.multiply(100, df['y'])

    m = Prophet()
    m.fit(df);

    forecast = m.predict(future)
    forecast[['ds', 'yhat', 'yhat_lower', 'yhat_upper']].tail()

    m.plot(forecast);
    m.plot_components(forecast);
示例#22
0
    def test_growth_init(self):
        model = Prophet(growth='logistic')
        history = DATA.iloc[:468].copy()
        history['cap'] = history['y'].max()

        history = model.setup_dataframe(history, initialize_scales=True)

        k, m = model.linear_growth_init(history)
        self.assertAlmostEqual(k, 0.3055671)
        self.assertAlmostEqual(m, 0.5307511)

        k, m = model.logistic_growth_init(history)

        self.assertAlmostEqual(k, 1.507925, places=4)
        self.assertAlmostEqual(m, -0.08167497, places=4)
示例#23
0
    def test_get_changepoints(self):
        m = Prophet()
        N = DATA.shape[0]
        history = DATA.head(N // 2).copy()

        history = m.setup_dataframe(history, initialize_scales=True)
        m.history = history

        m.set_changepoints()

        cp = m.changepoints_t
        self.assertEqual(cp.shape[0], m.n_changepoints)
        self.assertEqual(len(cp.shape), 1)
        self.assertTrue(cp.min() > 0)
        cp_indx = int(np.ceil(0.8 * history.shape[0]))
        self.assertTrue(cp.max() <= history['t'].values[cp_indx])
示例#24
0
 def test_fourier_series_weekly(self):
     mat = Prophet.fourier_series(DATA['ds'], 7, 3)
     # These are from the R forecast package directly.
     true_values = np.array([
         0.7818315, 0.6234898, 0.9749279, -0.2225209, 0.4338837, -0.9009689,
     ])
     self.assertAlmostEqual(np.sum((mat[0] - true_values)**2), 0.0)
示例#25
0
 def test_fourier_series_yearly(self):
     mat = Prophet.fourier_series(DATA['ds'], 365.25, 3)
     # These are from the R forecast package directly.
     true_values = np.array([
         0.7006152, -0.7135393, -0.9998330, 0.01827656, 0.7262249, 0.6874572,
     ])
     self.assertAlmostEqual(np.sum((mat[0] - true_values)**2), 0.0)
示例#26
0
    def test_make_future_dataframe(self):
        N = 468
        train = DATA.head(N // 2)
        forecaster = Prophet()
        forecaster.fit(train)
        future = forecaster.make_future_dataframe(periods=3, freq='D',
                                                  include_history=False)
        correct = pd.DatetimeIndex(['2013-04-26', '2013-04-27', '2013-04-28'])
        self.assertEqual(len(future), 3)
        for i in range(3):
            self.assertEqual(future.iloc[i]['ds'], correct[i])

        future = forecaster.make_future_dataframe(periods=3, freq='M',
                                                  include_history=False)
        correct = pd.DatetimeIndex(['2013-04-30', '2013-05-31', '2013-06-30'])
        self.assertEqual(len(future), 3)
        for i in range(3):
            self.assertEqual(future.iloc[i]['ds'], correct[i])
示例#27
0
    def test_piecewise_linear(self):
        model = Prophet()

        t = np.arange(11.)
        m = 0
        k = 1.0
        deltas = np.array([0.5])
        changepoint_ts = np.array([5])

        y = model.piecewise_linear(t, deltas, k, m, changepoint_ts)
        y_true = np.array([0.0, 1.0, 2.0, 3.0, 4.0, 5.0,
                           6.5, 8.0, 9.5, 11.0, 12.5])
        self.assertEqual((y - y_true).sum(), 0.0)

        t = t[8:]
        y_true = y_true[8:]
        y = model.piecewise_linear(t, deltas, k, m, changepoint_ts)
        self.assertEqual((y - y_true).sum(), 0.0)
示例#28
0
 def test_auto_yearly_seasonality(self):
     # Should be True
     m = Prophet()
     self.assertEqual(m.yearly_seasonality, 'auto')
     m.fit(DATA)
     self.assertEqual(m.yearly_seasonality, True)
     # Should be False due to too short history
     N = 240
     train = DATA.head(N)
     m = Prophet()
     m.fit(train)
     self.assertEqual(m.yearly_seasonality, False)
     m = Prophet(yearly_seasonality=True)
     m.fit(train)
     self.assertEqual(m.yearly_seasonality, True)
示例#29
0
def train_prophet(df, modelDir, confidence=0.99):
	# train and cache into modelDir
	m = Prophet(
		yearly_seasonality=True, daily_seasonality=True, interval_width=confidence
	)
	with suppress_stdout_stderr():
		m.fit(df)

		# Predict the future.
	print "PREDICTING!"
	future = m.make_future_dataframe(periods=0)
	forecast = m.predict(future)
	# Merge in the historical data.
	forecast["y"] = df.y.astype(float)
	# Backup the model.
	forecast.to_csv(
		pJoin(modelDir, "forecasted_{}.csv".format(confidence)), index=False
	)
	return forecast
示例#30
0
    def test_piecewise_logistic(self):
        model = Prophet()

        t = np.arange(11.)
        cap = np.ones(11) * 10
        m = 0
        k = 1.0
        deltas = np.array([0.5])
        changepoint_ts = np.array([5])

        y = model.piecewise_logistic(t, cap, deltas, k, m, changepoint_ts)
        y_true = np.array([5.000000, 7.310586, 8.807971, 9.525741, 9.820138,
                           9.933071, 9.984988, 9.996646, 9.999252, 9.999833,
                           9.999963])
        self.assertAlmostEqual((y - y_true).sum(), 0.0, places=5)

        t = t[8:]
        y_true = y_true[8:]
        cap = cap[8:]
        y = model.piecewise_logistic(t, cap, deltas, k, m, changepoint_ts)
        self.assertAlmostEqual((y - y_true).sum(), 0.0, places=5)
示例#31
0
 def test_added_regressors(self):
     m = Prophet()
     m.add_regressor('binary_feature', prior_scale=0.2)
     m.add_regressor('numeric_feature', prior_scale=0.5)
     m.add_regressor('numeric_feature2',
                     prior_scale=0.5,
                     mode='multiplicative')
     m.add_regressor('binary_feature2', standardize=True)
     df = DATA.copy()
     df['binary_feature'] = ['0'] * 255 + ['1'] * 255
     df['numeric_feature'] = range(510)
     df['numeric_feature2'] = range(510)
     with self.assertRaises(ValueError):
         # Require all regressors in df
         m.fit(df)
     df['binary_feature2'] = [1] * 100 + [0] * 410
     m.fit(df)
     # Check that standardizations are correctly set
     self.assertEqual(
         m.extra_regressors['binary_feature'],
         {
             'prior_scale': 0.2,
             'mu': 0,
             'std': 1,
             'standardize': 'auto',
             'mode': 'additive',
         },
     )
     self.assertEqual(m.extra_regressors['numeric_feature']['prior_scale'],
                      0.5)
     self.assertEqual(m.extra_regressors['numeric_feature']['mu'], 254.5)
     self.assertAlmostEqual(m.extra_regressors['numeric_feature']['std'],
                            147.368585,
                            places=5)
     self.assertEqual(m.extra_regressors['numeric_feature2']['mode'],
                      'multiplicative')
     self.assertEqual(m.extra_regressors['binary_feature2']['prior_scale'],
                      10.)
     self.assertAlmostEqual(m.extra_regressors['binary_feature2']['mu'],
                            0.1960784,
                            places=5)
     self.assertAlmostEqual(m.extra_regressors['binary_feature2']['std'],
                            0.3974183,
                            places=5)
     # Check that standardization is done correctly
     df2 = m.setup_dataframe(df.copy())
     self.assertEqual(df2['binary_feature'][0], 0)
     self.assertAlmostEqual(df2['numeric_feature'][0], -1.726962, places=4)
     self.assertAlmostEqual(df2['binary_feature2'][0], 2.022859, places=4)
     # Check that feature matrix and prior scales are correctly constructed
     seasonal_features, prior_scales, component_cols, modes = (
         m.make_all_seasonality_features(df2))
     self.assertEqual(seasonal_features.shape[1], 30)
     names = ['binary_feature', 'numeric_feature', 'binary_feature2']
     true_priors = [0.2, 0.5, 10.]
     for i, name in enumerate(names):
         self.assertIn(name, seasonal_features)
         self.assertEqual(sum(component_cols[name]), 1)
         self.assertEqual(
             sum(np.array(prior_scales) * component_cols[name]),
             true_priors[i],
         )
     # Check that forecast components are reasonable
     future = pd.DataFrame({
         'ds': ['2014-06-01'],
         'binary_feature': [0],
         'numeric_feature': [10],
         'numeric_feature2': [10],
     })
     with self.assertRaises(ValueError):
         m.predict(future)
     future['binary_feature2'] = 0
     fcst = m.predict(future)
     self.assertEqual(fcst.shape[1], 37)
     self.assertEqual(fcst['binary_feature'][0], 0)
     self.assertAlmostEqual(
         fcst['extra_regressors_additive'][0],
         fcst['numeric_feature'][0] + fcst['binary_feature2'][0],
     )
     self.assertAlmostEqual(
         fcst['extra_regressors_multiplicative'][0],
         fcst['numeric_feature2'][0],
     )
     self.assertAlmostEqual(
         fcst['additive_terms'][0],
         fcst['yearly'][0] + fcst['weekly'][0] +
         fcst['extra_regressors_additive'][0],
     )
     self.assertAlmostEqual(
         fcst['multiplicative_terms'][0],
         fcst['extra_regressors_multiplicative'][0],
     )
     self.assertAlmostEqual(
         fcst['yhat'][0],
         fcst['trend'][0] * (1 + fcst['multiplicative_terms'][0]) +
         fcst['additive_terms'][0],
     )
     # Check works if constant extra regressor at 0
     df['constant_feature'] = 0
     m = Prophet()
     m.add_regressor('constant_feature')
     m.fit(df)
     self.assertEqual(m.extra_regressors['constant_feature']['std'], 1)
示例#32
0
import pandas as pd

import plotly.offline as py
import plotly.io as pio

from fbprophet import Prophet
from fbprophet.plot import plot_plotly

pio.renderers.default = "png"

df = pd.read_csv("example_wp_log_peyton_manning.csv")
m = Prophet()
m.fit(df)

future = m.make_future_dataframe(periods=365)
forecast = m.predict(future)

fig1 = m.plot(forecast)
fig2 = m.plot_components(forecast)

#py.init_notebook_mode()

fig = plot_plotly(m, forecast)  # This returns a plotly Figure
py.iplot(fig)
fig.show()

"""
import matplotlib.pyplot as plt

x = 2
示例#33
0
df_humidity = dfall_humidity_1

dfall_pressure_1 = dfall_beijing_day_pressure.rename(columns={
    'date': 'ds',
    'pressure': 'y'
})
#dfall['y'] = np.log(dfall['y'])
dfall_pressure_1['y'] = (dfall_pressure_1['y'] - dfall_pressure_1['y'].min()
                         ) / (dfall_pressure_1['y'].max() -
                              dfall_pressure_1['y'].min())
dfall_pressure_1['ds'] = pd.to_datetime(dfall_pressure_1['ds'])
dfall_pressure_1.set_index('ds')
df_pressure = dfall_pressure_1

m_temperature = Prophet(daily_seasonality=False,
                        weekly_seasonality=False,
                        changepoint_prior_scale=0.01)
m_temperature.fit(df_temperature)

m_humidity = Prophet(daily_seasonality=False,
                     weekly_seasonality=False,
                     changepoint_prior_scale=0.01)
m_humidity.fit(df_humidity)

m_pressure = Prophet(daily_seasonality=False,
                     weekly_seasonality=False,
                     changepoint_prior_scale=0.01)
m_pressure.fit(df_pressure)

future_temperature = m_temperature.make_future_dataframe(periods=180)
future_temperature.tail()
示例#34
0
# Python
import pandas as pd
from fbprophet import Prophet
# 读入数据集
df = pd.read_csv('examples/example_wp_log_peyton_manning.csv')
df.head()

# 拟合模型
m = Prophet()
m.fit(df)

# 构建待预测日期数据框,periods = 365 代表除历史数据的日期外再往后推 365 天
future = m.make_future_dataframe(periods=365)
future.tail()

# 预测数据集
forecast = m.predict(future)
forecast[['ds', 'yhat', 'yhat_lower', 'yhat_upper']].tail()

# 展示预测结果
m.plot(forecast)

# 预测的成分分析绘图,展示预测中的趋势、周效应和年度效应
m.plot_components(forecast)

print(forecast.columns)
Index([
    'ds', 'trend', 'trend_lower', 'trend_upper', 'yhat_lower', 'yhat_upper',
    'additive_terms', 'additive_terms_lower', 'additive_terms_upper',
    'multiplicative_terms', 'multiplicative_terms_lower',
    'multiplicative_terms_upper', 'weekly', 'weekly_lower', 'weekly_upper',
示例#35
0
 def test_seasonality_modes(self):
     # Model with holidays, seasonalities, and extra regressors
     holidays = pd.DataFrame({
         'ds': pd.to_datetime(['2016-12-25']),
         'holiday': ['xmas'],
         'lower_window': [-1],
         'upper_window': [0],
     })
     m = Prophet(seasonality_mode='multiplicative', holidays=holidays)
     m.add_seasonality('monthly',
                       period=30,
                       mode='additive',
                       fourier_order=3)
     m.add_regressor('binary_feature', mode='additive')
     m.add_regressor('numeric_feature')
     # Construct seasonal features
     df = DATA.copy()
     df['binary_feature'] = [0] * 255 + [1] * 255
     df['numeric_feature'] = range(510)
     df = m.setup_dataframe(df, initialize_scales=True)
     m.history = df.copy()
     m.set_auto_seasonalities()
     seasonal_features, prior_scales, component_cols, modes = (
         m.make_all_seasonality_features(df))
     self.assertEqual(sum(component_cols['additive_terms']), 7)
     self.assertEqual(sum(component_cols['multiplicative_terms']), 29)
     self.assertEqual(
         set(modes['additive']),
         {
             'monthly', 'binary_feature', 'additive_terms',
             'extra_regressors_additive'
         },
     )
     self.assertEqual(
         set(modes['multiplicative']),
         {
             'weekly',
             'yearly',
             'xmas',
             'numeric_feature',
             'multiplicative_terms',
             'extra_regressors_multiplicative',
             'holidays',
         },
     )
示例#36
0
finish_date = datetime.strptime("2011-12-31 23:59", "%Y-%m-%d %H:%M")
number_tower = "Data_1st"

query = '''SELECT Date, %s FROM Data WHERE  Date >=  \'%s\' AND Date <= \'%s\';''' % (
    str(number_tower), start_date, finish_date)
conn = sqlite3.connect("mydatabase.db")
cursor = conn.cursor()
dataset = pd.read_sql_query(query, conn)
dataset.Date = dataset["Date"].apply(pd.to_datetime)

print(dataset)

predictions = 180
# приводим dataframe к нужному формату
df = dataset
print(df.head())
df.columns = ['ds', 'y']
# отрезаем из обучающей выборки последние 30 точек, чтобы измерить на них качество
train_df = df[:-predictions]

m = Prophet()
m.fit(train_df)

future = m.make_future_dataframe(periods=predictions)
forecast = m.predict(future)

m.plot(forecast)
m.plot_components(forecast)

plt.show()
示例#37
0
def Table_generator():
    import pandas as pd
    import numpy as np
    from matplotlib import pyplot as plt
    from pylab import rcParams
    from sklearn import linear_model
    from fbprophet import Prophet
    from numpy import inf
    filepath = 'C:/weekly sales and labour cost for all shops 2013 to 20177.csv'
    df = pd.read_csv(filepath)
    df2 = df[df.sales_status != 0]
    # df2.week_no.isnull().values.any()
    nulldetect = df2.week_no.isnull()
    # nulldetect[nulldetect==True].index
    df2.week_no.loc[nulldetect == True] = 54
    df2['week_no'] = df2.week_no - 2
    len_week1 = []
    for i in df2.shop_id:
        len_week = len(df2.week_no[df2.shop_id == i])
        len_week1.append(len_week)
    len_week2 = pd.DataFrame(len_week1)
    len_week2
    d = {'shop_id': df2.shop_id, 'len_of_weeks': len_week1}
    d1 = pd.DataFrame(d)
    d2 = d1.drop_duplicates()

    dtt = pd.DataFrame(index=list(range(1, 53)),
                       columns=d2.shop_id[d2.len_of_weeks < 52].values)
    # table = pd.DataFrame(columns=['shop_id','week_no','dates','forecasted_sales'])

    for uu in d2.shop_id[d2.len_of_weeks < 52].values:
        df3 = df2[df2.shop_id == uu]
        # df1 = df[(= -1) & (df.b != -1)]
        dff4 = df3.set_index('start_date')
        # df4=df3.week_no-2
        a = df3[[
            'week_no', 'shop_id', 'sales_amount', 'transactions', 'total_tax',
            'item_sold'
        ]]
        # print(a)
        dates = pd.date_range(dff4.index[-1],
                              periods=52,
                              freq='W-MON',
                              format='%Y-%m-%d')
        dates1 = pd.DataFrame(dates)
        dates2 = pd.date_range(dff4.index[0],
                               periods=len(dff4.index),
                               freq='W-MON',
                               format='%Y-%m-%d')
        mean_week_item = []
        for i in dates.week:
            mean_item_sold = a.item_sold[a.week_no == i].mean()
            mean_week_item.append(mean_item_sold)
        mean_week_item1 = pd.DataFrame(mean_week_item)

        trans_week_item = []
        for i1 in dates.week:
            mean_trans_sold = a.transactions[a.week_no == i1].mean()
            trans_week_item.append(mean_trans_sold)

        sales_week = []
        for ii1 in dates.week:
            mean_sales_sold = a.sales_amount[a.week_no == ii1].mean()
            sales_week.append(mean_sales_sold)
        dd = {
            'date': dates,
            'weeks_no': dates.week,
            'sales': sales_week,
            'mean_item': mean_week_item,
            'mean_trans': trans_week_item
        }
        dd1 = pd.DataFrame(dd)
        dff1 = df[df.sales_status != 0]
        nulldetect = dff1.week_no.isnull()
        dff1.week_no.loc[nulldetect == True] = 54
        dff1['week_no'] = dff1.week_no - 2
        X_Cluster = dff1[['shop_id', 'sales_amount']]
        from sklearn.cluster import KMeans
        kmeans_model = KMeans(n_clusters=3, random_state=8).fit(X_Cluster)
        y_hat = kmeans_model.labels_  # clusters
        cen = kmeans_model.cluster_centers_
        y_hat1 = pd.DataFrame(y_hat)
        group_low_sales = X_Cluster[y_hat == 0]
        group_middle_sales = X_Cluster[y_hat == 2]
        group_high_sales = X_Cluster[y_hat == 1]
        fff = []
        for j in X_Cluster.shop_id:
            dfdf = X_Cluster.sales_amount[X_Cluster.shop_id == j].mean()
            fff.append(dfdf)
        f3 = pd.DataFrame(X_Cluster.shop_id.drop_duplicates())
        f4 = pd.DataFrame(fff)
        f5 = f4.drop_duplicates()
        f3['salle'] = f5.values

        Xx2 = f3[['shop_id', 'salle']]
        kmeans_model2 = KMeans(n_clusters=3, random_state=8).fit(Xx2)
        y_hat2 = kmeans_model2.labels_  # clusters
        cen2 = kmeans_model2.cluster_centers_

        group_middle_sales2 = Xx2[y_hat2 == 0]
        group_high_sales2 = Xx2[y_hat2 == 2]
        group_low_sales2 = Xx2[y_hat2 == 1]
        nullweeks = dd1.weeks_no[dd1.mean_trans.isnull() == True]

        if (group_low_sales2.shop_id.values == uu).any() == True:
            cx = int(group_low_sales.sales_amount[group_low_sales.shop_id ==
                                                  uu].values.mean())
            trt = group_low_sales[group_low_sales.sales_amount > cx - 3000]
            trt2 = trt[trt.sales_amount < cx + 3000]
            valid_cls = dff1[[
                'sales_amount', 'item_sold', 'transactions', 'week_no'
            ]].loc[trt2.index.values]
            # print("Cluster of shop %s is low sales" %uu)
            # print("Average sales per week of shop %s is" %uu,cx)
        elif (group_middle_sales2.shop_id.values == uu).any() == True:
            # valid_cls=dff1[['sales_amount','item_sold','transactions','week_no']].loc[group_middle_sales.shop_id.index.values]
            cx = int(group_middle_sales.sales_amount[group_middle_sales.shop_id
                                                     == uu].values.mean())
            trt = group_middle_sales[group_middle_sales.sales_amount > cx -
                                     3000]
            trt2 = trt[trt.sales_amount < cx + 3000]
            valid_cls = dff1[[
                'sales_amount', 'item_sold', 'transactions', 'week_no'
            ]].loc[trt2.index.values]
            # print("Cluster of shop %s is average sales" %uu)
            # print("Average sales per week of shop %s is " %uu,cx)
        elif (group_high_sales2.shop_id.values == uu).any() == True:
            # valid_cls=dff1[['sales_amount','item_sold','transactions','week_no']].loc[group_high_sales.shop_id.index.values]
            cx = int(group_high_sales.sales_amount[group_high_sales.shop_id ==
                                                   uu].values.mean())
            trt = group_high_sales[group_high_sales.sales_amount > cx - 4000]
            trt2 = trt[trt.sales_amount < cx + 4000]
            valid_cls = dff1[[
                'sales_amount', 'item_sold', 'transactions', 'week_no'
            ]].loc[trt2.index.values]
            # print("Cluster of shop %s is high sales" %uu)
            # print("Average sales per week of shop %s is" %uu,cx)
        drr = valid_cls
        drr = valid_cls  # dff1[['sales_amount','item_sold','transactions','week_no']].loc[trt2.index.values]
        itt = []
        trr = []
        sale = []
        for i3 in nullweeks:
            item = drr.item_sold[drr.week_no == i3].mean()
            trans = drr.transactions[drr.week_no == i3].mean()
            salee = drr.sales_amount[drr.week_no == i3].mean()
            itt.append(item)
            trr.append(trans)
            sale.append(salee)
        df_insert = {
            'sales_amountt': sale,
            'ittem': itt,
            'trans': trr,
            'weeks_no': nullweeks
        }
        df_insert1 = pd.DataFrame(df_insert)
        forecastdf = dd1.fillna({
            'mean_item': df_insert1.ittem,
            'mean_trans': df_insert1.trans,
            'sales': df_insert1.sales_amountt
        })
        forecastdf1 = forecastdf.fillna({
            'mean_item':
            df_insert1.ittem.mean(),
            'mean_trans':
            df_insert1.trans.mean(),
            'sales':
            df_insert1.sales_amountt.mean()
        })
        regr3 = linear_model.LinearRegression()
        X = forecastdf1[['mean_item', 'mean_trans']]
        Y = forecastdf1.sales
        regr3.fit(X, Y)
        y_predictionss = regr3.predict(X)
        y_predictionss1 = pd.DataFrame(y_predictionss)
        pred_y = round(y_predictionss1, 2)
        #print(pred_y.values)
        forecastdf1['forecasted_sales'] = pred_y.values
        # ddt.fillna()
        forecastdf1.sort_values('weeks_no', inplace=True)
        # forecastdf1
        # forecastdf1.forecasted_sales.reset_index()
        f = forecastdf1.set_index('weeks_no')
        # dtt = pd.DataFrame(index=list(range(1,53)), columns=d2.shop_id[d2.len_of_weeks<52].values)
        dtt['shop_id'] = dtt.index.values
        # dtt[dtt.shop_id==uu].fillna()
        dtt[[uu]] = f.forecasted_sales.values.reshape((52, 1))

    dtt1 = pd.DataFrame(index=d2.shop_id[d2.len_of_weeks < 52].values,
                        columns=list(range(1, 53)))
    for jj in dtt.index.values:
        dtt1.loc[:, jj] = dtt.loc[jj, :]

    data = pd.read_csv(
        'C:/weekly sales and labour cost for all shops 2013 to 20177.csv',
        index_col='start_date',
        parse_dates=True)
    # shopID = input("Enter your shop id")

    df2 = data[data.sales_status != 0]
    # df2.week_no.isnull().values.any()
    nulldetect = df2.week_no.isnull()
    # nulldetect[nulldetect==True].index
    df2.week_no.loc[nulldetect == True] = 54
    df2['week_no'] = df2.week_no - 2

    len_week1 = []
    for i in df2.shop_id:
        len_week = len(df2.week_no[df2.shop_id == i])
        len_week1.append(len_week)
    len_week2 = pd.DataFrame(len_week1)
    len_week2
    d = {'shop_id': df2.shop_id, 'len_of_weeks': len_week1}
    d1 = pd.DataFrame(d)
    d2 = d1.drop_duplicates()

    dtt2 = pd.DataFrame(index=list(range(1, 53)),
                        columns=d2.shop_id[d2.len_of_weeks > 52].values)

    for j in d2.shop_id[d2.len_of_weeks >= 52].values:
        data2 = data[[
            'sales_id', 'shop_id', 'week_no', 'sales_amount', 'item_sold',
            'transactions', 'total_tax', 'sales_status'
        ]]
        df1 = data2[data2.shop_id == j]  # input №1
        df2 = df1[df1.sales_status != 0]
        df2.week_no.isnull().values.any()
        nulldetect = df1.week_no.isnull()
        nulldetect[nulldetect == True].index
        df2.week_no.loc[nulldetect == True] = 54
        df2['week_no'] = df2.week_no - 2
        dff = df2[['sales_amount']]
        data3 = dff.reset_index()
        data4 = data3

        data5 = data4.rename(columns={'start_date': 'ds', 'sales_amount': 'y'})
        data5.set_index('ds')
        # y.plot()
        data5['y'] = np.log(data5['y'])
        data5 = data5.replace([np.inf, -np.inf], np.nan).fillna(0)
        data5.set_index('ds')
        model = Prophet()
        model.fit(data5)
        future = model.make_future_dataframe(periods=52, freq='w')
        forecast = model.predict(future)
        data5.set_index('ds', inplace=True)
        forecast.set_index('ds', inplace=True)
        viz_df = dff.join(forecast[['yhat', 'yhat_lower', 'yhat_upper']],
                          how='outer')
        viz_df['yhat_rescaled'] = np.exp(viz_df['yhat'])
        dff.index = pd.to_datetime(
            dff.index)  # make sure our index as a datetime object
        connect_date = dff.index[-2]  # select the 2nd to last date
        mask = (forecast.index > connect_date)
        predict_df = forecast.loc[mask]
        viz_df = dff.join(predict_df[['yhat', 'yhat_lower', 'yhat_upper']],
                          how='outer')
        viz_df['yhat_scaled'] = np.exp(viz_df['yhat'])
        ii = len(dff.sales_amount) - 1
        viz_df.yhat_scaled[ii:]
        predicted_future_sales = pd.DataFrame(viz_df.yhat_scaled[ii:])
        predicted_future_sales1 = predicted_future_sales.rename(
            columns={'yhat_scaled': 'future_sales'})
        predicted_future_sales2 = predicted_future_sales1.reset_index()
        week_no = predicted_future_sales2['index'].dt.week
        future_sales = predicted_future_sales2['future_sales']
        future_sales1 = round(future_sales, 2)
        start_date = predicted_future_sales2['index']
        predict_data = {
            'shop_id': int(df2.shop_id.mean()),
            'future_sales': future_sales1,
            'week_no': week_no,
            'start_date': start_date
        }
        predict_data1 = pd.DataFrame(predict_data)
        predict_data1 = predict_data1.drop_duplicates(subset=['week_no'])
        predict_data1.sort_values('week_no', inplace=True)
        f1 = predict_data1.set_index('week_no')
        dtt2[[j]] = f1.future_sales.values.reshape((52, 1))

    dtt3 = pd.DataFrame(index=d2.shop_id[d2.len_of_weeks > 52].values,
                        columns=list(range(1, 53)))
    for qq in dtt.index.values:
        dtt3.loc[:, qq] = dtt2.loc[qq, :]

    tab = dtt1.append(dtt3)
    tab['shop_id'] = tab.index.values
    tab.sort_values('shop_id', inplace=True)
    tab_id = tab.shop_id
    tab = tab.drop('shop_id', axis=1)
    tab.insert(0, 'shop_id', tab_id)
    #writer = pd.ExcelWriter('output.xlsx')
    #tab.to_excel(writer, 'Sheet1')
    #writer.save()
    tab.to_json(path_or_buf='df.json', orient='records')
    memval2 = tab
示例#38
0
    def test_fit_predict_no_changepoints(self):
        N = DATA.shape[0]
        train = DATA.head(N // 2)
        future = DATA.tail(N // 2)

        forecaster = Prophet(n_changepoints=0)
        forecaster.fit(train)
        forecaster.predict(future)

        forecaster = Prophet(n_changepoints=0, mcmc_samples=100)
        forecaster.fit(train)
        forecaster.predict(future)
示例#39
0
 def test_fit_predict_with_country_holidays(self):
     holidays = pd.DataFrame({
         'ds':
         pd.to_datetime(['2012-06-06', '2013-06-06']),
         'holiday': ['seans-bday'] * 2,
         'lower_window': [0] * 2,
         'upper_window': [1] * 2,
     })
     # Test with holidays and country_holidays
     model = Prophet(holidays=holidays, uncertainty_samples=0)
     model.add_country_holidays(country_name='US')
     model.fit(DATA).predict()
     # There are training holidays missing in the test set
     train = DATA.head(154)
     future = DATA.tail(355)
     model = Prophet(uncertainty_samples=0)
     model.add_country_holidays(country_name='US')
     model.fit(train).predict(future)
     # There are test holidays missing in the training set
     train = DATA.tail(355)
     future = DATA2
     model = Prophet(uncertainty_samples=0)
     model.add_country_holidays(country_name='US')
     model.fit(train).predict(future)
示例#40
0
 def test_fit_changepoint_not_in_history(self):
     train = DATA[(DATA['ds'] < '2013-01-01') | (DATA['ds'] > '2014-01-01')]
     future = pd.DataFrame({'ds': DATA['ds']})
     forecaster = Prophet(changepoints=['2013-06-06'])
     forecaster.fit(train)
     forecaster.predict(future)
示例#41
0
from fbprophet import Prophet

import matplotlib.pyplot as plt
plt.style.use('fivethirtyeight')


df = pd.read_csv('oil_prices.csv')
format = '%d/%m/%Y'
df['ts'] = pd.to_datetime(df['ts'], format=format)
df = df.set_index(pd.DatetimeIndex(df['ts']))

df = df.rename(columns={'ts': 'ds',
                        'price': 'y'})

# set the uncertainty interval to 95% (the Prophet default is 80%)
my_model = Prophet(n_changepoints=10,interval_width=0.95,daily_seasonality=False,weekly_seasonality=True,yearly_seasonality=True,uncertainty_samples=1000)

forecast_period = 12 # Number of forecasted time steps

my_model.fit(df)

# In order to obtain forecasts of our time series, we must provide Prophet with a new DataFrame 
# containing a ds column that holds the dates for which we want predictions:
future_dates = my_model.make_future_dataframe(periods=forecast_period, freq='12MS')

# The DataFrame of future dates is then used as input to the predict method of our fitted model:
forecast = my_model.predict(future_dates)

'''
Prophet returns a large DataFrame with many interesting columns, but we subset our output to the columns most relevant to forecasting, which are:
ds: the datestamp of the forecasted value
示例#42
0
#print(templst)
#dfp = pd.DataFrame(templst)

temp = data3[['datep', 'pm2.5']]
temp.columns = ['ds', 'y']
temp['PRES'] = data3['PRES']
temp['DEWP'] = data3['DEWP']
temp['TEMP'] = data3['TEMP']
temp['Iws'] = data3['Iws']

#temp.y.plot()
plt.plot(temp.y)
print(temp.head())

# initializing the fbprophet model and fitting the data
model = Prophet()
#model.add_regressor('PRES', standardize = "auto", mode='additive')
model.add_regressor('PRES')
#model.add_regressor('DEWP')
#model.add_regressor('TEMP')
#model.add_regressor('IWS')
model.fit(temp)

temp_pres = data3[['datep', 'PRES']]
temp_pres.columns = ['ds', 'y']

# initializing the fbprophet model and fitting the data
model = Prophet()
model.fit(temp_pres)

#creating a separate dataframe for predicted values
示例#43
0
 def make_model(self):
     self.model = Prophet()
示例#44
0
crv.Show_prediction_by_day(future_dates_reported,' Bayesian Interpolation predictions for Greece',predict_bayesian_gr,days_in_future)

daily_world_cases = np.array(daily_world_cases)
daily_world_cases.shape = (len(daily_world_cases),1)
print(daily_world_cases.shape)
Dates_reported = np.array(Dates_reported)
Dates_reported.shape = (len(Dates_reported),1)
print(Dates_reported.shape)
World_cases_perday = np.concatenate((Dates_reported,daily_world_cases), axis=1)
print((World_cases_perday.shape))
World_cases_perday = pd.DataFrame(data=World_cases_perday)
World_cases_perday.columns = ['ds', 'y']
print(World_cases_perday)

ph = Prophet(n_changepoints=41, changepoint_prior_scale=1 , interval_width=1, daily_seasonality=True, yearly_seasonality=True, seasonality_mode='additive',seasonality_prior_scale=10)
ph.fit(World_cases_perday)
World_cases_perday.tail()

future_prediction = ph.make_future_dataframe(periods=10)
future_prediction.tail(10)

forecast = ph.predict(future_prediction)
forecast[['ds', 'yhat', 'yhat_lower', 'yhat_upper']].tail(10)

y = np.array(forecast['yhat'])[-10:]
y_l = np.array(forecast['yhat_lower'])[-10:]
y_u = np.array(forecast['yhat_upper'])[-10:]
for i in range(len(y)):
  y[i] = int (y[i])
  y_l[i] = int (y_l[i])
示例#45
0
 def test_conditional_custom_seasonality(self):
     m = Prophet(weekly_seasonality=False, yearly_seasonality=False)
     m.add_seasonality(name='conditional_weekly',
                       period=7,
                       fourier_order=3,
                       prior_scale=2.,
                       condition_name='is_conditional_week')
     m.add_seasonality(name='normal_monthly',
                       period=30.5,
                       fourier_order=5,
                       prior_scale=2.)
     df = DATA.copy()
     with self.assertRaises(ValueError):
         # Require all conditions names in df
         m.fit(df)
     df['is_conditional_week'] = [0] * 255 + [2] * 255
     with self.assertRaises(ValueError):
         # Require boolean compatible values
         m.fit(df)
     df['is_conditional_week'] = [0] * 255 + [1] * 255
     m.fit(df)
     self.assertEqual(
         m.seasonalities['conditional_weekly'],
         {
             'period': 7,
             'fourier_order': 3,
             'prior_scale': 2.,
             'mode': 'additive',
             'condition_name': 'is_conditional_week'
         },
     )
     self.assertIsNone(m.seasonalities['normal_monthly']['condition_name'])
     seasonal_features, prior_scales, component_cols, modes = (
         m.make_all_seasonality_features(m.history))
     # Confirm that only values without is_conditional_week has non zero entries
     conditional_weekly_columns = seasonal_features.columns[
         seasonal_features.columns.str.startswith('conditional_weekly')]
     self.assertTrue(
         np.array_equal(
             (seasonal_features[conditional_weekly_columns] != 0).any(
                 axis=1).values, df['is_conditional_week'].values))
示例#46
0
def multi_input():
    shop_Id = request.form['shop_ids']
    shopID = [int(x) for x in shop_Id.split(',')]
    data = pd.read_csv(
        'C:/weekly sales and labour cost for all shops 2013 to 20177.csv',
        index_col='start_date',
        parse_dates=True)
    # shopID = input("Enter your shop id")
    shopID1 = list(shopID)
    for j in shopID1:

        data2 = data[[
            'sales_id', 'shop_id', 'week_no', 'sales_amount', 'item_sold',
            'transactions', 'total_tax', 'sales_status'
        ]]
        df1 = data2[data2.shop_id == j]  # input №1
        df2 = df1[df1.sales_status != 0]
        df2.week_no.isnull().values.any()
        nulldetect = df1.week_no.isnull()
        nulldetect[nulldetect == True].index
        df2.week_no.loc[nulldetect == True] = 54
        df2['week_no'] = df2.week_no - 2
        if len(df2.week_no) > 51:

            dff = df2[['sales_amount']]
            data3 = dff.reset_index()
            data4 = data3

            data5 = data4.rename(columns={
                'start_date': 'ds',
                'sales_amount': 'y'
            })
            data5.set_index('ds')
            # y.plot()
            data5['y'] = np.log(data5['y'])
            data5 = data5.replace([np.inf, -np.inf], np.nan).fillna(0)
            data5.set_index('ds')
            model = Prophet()
            model.fit(data5)
            future = model.make_future_dataframe(periods=52, freq='w')
            forecast = model.predict(future)
            data5.set_index('ds', inplace=True)
            forecast.set_index('ds', inplace=True)
            viz_df = dff.join(forecast[['yhat', 'yhat_lower', 'yhat_upper']],
                              how='outer')
            viz_df['yhat_rescaled'] = np.exp(viz_df['yhat'])
            dff.index = pd.to_datetime(
                dff.index)  # make sure our index as a datetime object
            connect_date = dff.index[-2]  # select the 2nd to last date
            mask = (forecast.index > connect_date)
            predict_df = forecast.loc[mask]
            viz_df = dff.join(predict_df[['yhat', 'yhat_lower', 'yhat_upper']],
                              how='outer')
            viz_df['yhat_scaled'] = np.exp(viz_df['yhat'])
            ii = len(dff.sales_amount) - 1
            viz_df.yhat_scaled[ii:]
            predicted_future_sales = pd.DataFrame(viz_df.yhat_scaled[ii:])
            predicted_future_sales1 = predicted_future_sales.rename(
                columns={'yhat_scaled': 'future_sales'})
            predicted_future_sales2 = predicted_future_sales1.reset_index()
            week_no = predicted_future_sales2['index'].dt.week
            future_sales = predicted_future_sales2['future_sales']
            future_sales1 = round(future_sales, 2)
            start_date = predicted_future_sales2['index']
            predict_data = {
                'future_sales': future_sales1,
                'week_no': week_no,
                'start_date': start_date
            }
            predict_data1 = pd.DataFrame(predict_data)

            #weekNO = int(input("Enter week number for shop %s" % j))

            print("Predicted sales amount for shop #" + str(j) + ": " + str(
                float(predict_data1.future_sales[predict_data1.week_no ==
                                                 23].values[0])))
            # fig, ax1 = plt.subplots()
            plt.plot(viz_df.sales_amount, label='Actual Sales shop %s' % j)
            plt.plot(viz_df.yhat_scaled, label='Forecasted Sales %s' % j)
            # ax1.fill_between(viz_df.index, np.exp(viz_df['yhat_upper']), np.exp(viz_df['yhat_lower']), alpha=0.5, color='darkgray')
            plt.title('Sales (Orange) vs Sales Forecast (Black) for shop ' +
                      str(j))
            plt.ylabel('Dollar Sales')
            plt.xlabel('Dates')
            plt.legend()
        else:
            # dff4=df2.set_index('start_date')
            # df4=df3.week_no-2
            a = df2[[
                'week_no', 'shop_id', 'sales_amount', 'transactions',
                'total_tax', 'item_sold'
            ]]
            # start=[]
            # idd=list1

            dates = pd.date_range(df2.index[-1],
                                  periods=52,
                                  freq='W-MON',
                                  format='%Y-%m-%d')
            dates1 = pd.DataFrame(dates)
            dates2 = pd.date_range(df2.index[0],
                                   periods=len(df2.index),
                                   freq='W-MON',
                                   format='%Y-%m-%d')

            # dates1.set_index()
            mean_week_item = []
            for i in dates.week:
                mean_item_sold = a.item_sold[a.week_no == i].mean()
                mean_week_item.append(mean_item_sold)
            mean_week_item1 = pd.DataFrame(mean_week_item)

            trans_week_item = []
            for i1 in dates.week:
                mean_trans_sold = a.transactions[a.week_no == i1].mean()
                trans_week_item.append(mean_trans_sold)

            sales_week = []
            for ii1 in dates.week:
                mean_sales_sold = a.sales_amount[a.week_no == ii1].mean()
                sales_week.append(mean_sales_sold)

            dd = {
                'date': dates,
                'weeks_no': dates.week,
                'sales': sales_week,
                'mean_item': mean_week_item,
                'mean_trans': trans_week_item
            }
            dd1 = pd.DataFrame(dd)

            data1 = pd.read_csv(
                'C:/weekly sales and labour cost for all shops 2013 to 20177.csv'
            )

            dff1 = data1[data1.sales_status != 0]
            nulldetect = dff1.week_no.isnull()
            dff1.week_no.loc[nulldetect == True] = 54
            dff1['week_no'] = dff1.week_no - 2
            X_Cluster = dff1[['shop_id', 'sales_amount']]
            from sklearn.cluster import KMeans
            kmeans_model = KMeans(n_clusters=3, random_state=8).fit(X_Cluster)
            y_hat = kmeans_model.labels_  # clusters
            cen = kmeans_model.cluster_centers_
            y_hat1 = pd.DataFrame(y_hat)
            group_low_sales = X_Cluster[y_hat == 0]
            group_middle_sales = X_Cluster[y_hat == 2]
            group_high_sales = X_Cluster[y_hat == 1]

            fff = []
            for j in X_Cluster.shop_id:
                dfdf = X_Cluster.sales_amount[X_Cluster.shop_id == j].mean()
                fff.append(dfdf)
            f3 = pd.DataFrame(X_Cluster.shop_id.drop_duplicates())
            f4 = pd.DataFrame(fff)
            f5 = f4.drop_duplicates()
            f3['salle'] = f5.values

            # from sklearn.cluster import KMeans
            Xx2 = f3[['shop_id', 'salle']]
            kmeans_model2 = KMeans(n_clusters=3, random_state=8).fit(Xx2)
            y_hat2 = kmeans_model2.labels_  # clusters
            cen2 = kmeans_model2.cluster_centers_

            group_middle_sales2 = Xx2[y_hat2 == 0]
            group_high_sales2 = Xx2[y_hat2 == 2]
            group_low_sales2 = Xx2[y_hat2 == 1]

            # cx=int(group_low_sales.sales_amount[group_low_sales.shop_id==uu].values.mean())
            # trt=group_low_sales[group_low_sales.sales_amount>cx-3000]
            # trt2=trt[trt.sales_amount<cx+3000]

            nullweeks = dd1.weeks_no[dd1.mean_trans.isnull() == True]
            q = int(a.shop_id.mean())

            if (group_low_sales2.shop_id.values == q).any() == True:
                cx = int(group_low_sales.sales_amount[group_low_sales.shop_id
                                                      == q].values.mean())
                trt = group_low_sales[group_low_sales.sales_amount > cx - 3000]
                trt2 = trt[trt.sales_amount < cx + 3000]
                valid_cls = dff1[[
                    'sales_amount', 'item_sold', 'transactions', 'week_no'
                ]].loc[trt2.index.values]
                #print("Cluster of shop %s is low sales" % q)
                # print("Average sales per week of shop %s is" %uu,cx)
            elif (group_middle_sales2.shop_id.values == q).any() == True:
                # valid_cls=dff1[['sales_amount','item_sold','transactions','week_no']].loc[group_middle_sales.shop_id.index.values]
                cx = int(group_middle_sales.sales_amount[
                    group_middle_sales.shop_id == q].values.mean())
                trt = group_middle_sales[group_middle_sales.sales_amount > cx -
                                         3000]
                trt2 = trt[trt.sales_amount < cx + 3000]
                valid_cls = dff1[[
                    'sales_amount', 'item_sold', 'transactions', 'week_no'
                ]].loc[trt2.index.values]
                #print("Cluster of shop %s is average sales" % q)
                # print("Average sales per week of shop %s is " %uu,cx)
            elif (group_high_sales2.shop_id.values == q).any() == True:
                # valid_cls=dff1[['sales_amount','item_sold','transactions','week_no']].loc[group_high_sales.shop_id.index.values]
                cx = int(group_high_sales.sales_amount[group_high_sales.shop_id
                                                       == q].values.mean())
                trt = group_high_sales[group_high_sales.sales_amount > cx -
                                       4000]
                trt2 = trt[trt.sales_amount < cx + 4000]
                valid_cls = dff1[[
                    'sales_amount', 'item_sold', 'transactions', 'week_no'
                ]].loc[trt2.index.values]
                #print("Cluster of shop %s is high sales" % q)
                # print("Average sales per week of shop %s is" %uu,cx)
                # drr=valid_cls

                # if (group_low_sales2.shop_id.values==99).any()==True:
                # valid_cls=dff1[['sales_amount','item_sold','transactions','week_no']].loc[group_low_sales.shop_id.index.values]
                # elif (group_middle_sales2.shop_id.values==99).any()==True:
                # valid_cls=dff1[['sales_amount','item_sold','transactions','week_no']].loc[group_middle_sales.shop_id.index.values]
                # elif (group_high_sales2.shop_id.values==99).any()==True:
                # valid_cls=dff1[['sales_amount','item_sold','transactions','week_no']].loc[group_high_sales.shop_id.index.values]

            drr = valid_cls  # dff1[['sales_amount','item_sold','transactions','week_no']].loc[trt2.index.values]
            itt = []
            trr = []
            sale = []
            for i3 in nullweeks:
                item = drr.item_sold[drr.week_no == i3].mean()
                trans = drr.transactions[drr.week_no == i3].mean()
                salee = drr.sales_amount[drr.week_no == i3].mean()
                itt.append(item)
                trr.append(trans)
                sale.append(salee)
            df_insert = {
                'sales_amountt': sale,
                'ittem': itt,
                'trans': trr,
                'weeks_no': nullweeks
            }
            df_insert1 = pd.DataFrame(df_insert)
            # group_low_sales.shop_id.drop_duplicates().index.values
            # null=dd1.isnull()
            # dd1.isnull().loc[null==True]

            # for i4 in dates.week:
            # a.transactions[a.week_no==i1].mean()
            # trans_week_item.append(mean_trans_sold)
            forecastdf = dd1.fillna({
                'mean_item': df_insert1.ittem,
                'mean_trans': df_insert1.trans,
                'sales': df_insert1.sales_amountt
            })
            # forecastdf
            # print("Average amount of transactions per week of shop %s is " %uu+str(int(forecastdf.mean_trans.mean()))+"\n")
            regr3 = linear_model.LinearRegression()
            X = forecastdf[['mean_item', 'mean_trans']]
            Y = forecastdf.sales
            regr3.fit(X, Y)
            y_predictionss = regr3.predict(X)
            y_predictionss1 = pd.DataFrame(y_predictionss)
            # dff1[['item_sold','transactions','week_no']].index#group_low_sales.shop_id.drop_duplicates().index
            # plt.figure(figsize=(19,6))
            # from pylab import rcParams
            #plt.rcParams['figure.figsize'] = 15, 10
            #from pylab import rcParams
            #rcParams['figure.figsize'] = (20, 10)
            plt.plot(dates2,
                     df2.sales_amount,
                     label="actual sales shop %s" % q)
            plt.plot(dates,
                     y_predictionss1,
                     label="predicted sales shop %s" % q)
            plt.title(
                'Comparison actual and predicted sales for whole period of shops %s'
                % shopID1)
            plt.xlabel('Weeks')
            plt.ylabel('Sales amount')
            plt.legend()
            figg = plt.gcf()
            figg.set_size_inches(13, 7)
            # print(valid_cls)

    #mpld3.show()

    # L=ax1.legend() #get the legend
    # L.get_texts()[0].set_text('Actual Sales') #change the legend text for 1st plot
    # L.get_texts()[1].set_text('Forecasted Sales') #change the legend text for 2nd plot

    #plt.show()
    #img = BytesIO()
    #plt.savefig(img, format='png')
    #img.seek(0)
    #return send_file(img, mimetype='image/png')
    #figg=mpld3
    graphh = mpld3.fig_to_html(figg)
    #mpld3.show(fig)
    #@app.route('/multi/')
    #def multi_input1():
    return render_template('multinput.html', graphh=graphh, value2=shopID1)
示例#47
0
                          '2019-01-02', '2019-01-03', '2019-01-13',
                          '2019-04-29', '2019-04-30', '2019-05-01',
                          '2019-05-03', '2019-05-04', '2019-06-22',
                          '2020-01-02', '2020-01-03', '2020-01-04',
                          '2020-01-11']),
    'lower_window':0,
   'upper_window':1,
})
holidays = pd.concat((events, superholidays))

#Hyperparameters
model = Prophet(holidays=holidays,

            yearly_seasonality=True,
            weekly_seasonality=True,
            daily_seasonality=False,

            seasonality_prior_scale=0.1,
            interval_width=0.95,
            holidays_prior_scale=10,
            changepoint_prior_scale=0.15)

model.add_country_holidays(country_name='JP')
model.fit(df_train)

forecast = model.predict(df)
forecast[['ds','yhat']].head()

model.plot_components(forecast)

fig, ax = plt.subplots(figsize=(15,5))
ax.plot(df_train['ds'], df_train['y'], c='grey', marker='o', ms=3, linestyle='-', label='Train')
示例#48
0
scores = {}

train = train[['region', 'date', 'mortality_rate']]
train.columns = ['region', 'ds', 'y']
#train.y = np.log( train.y )

test = test[['Id', 'region', 'date']]
test.columns = ['Id', 'region', 'ds']

for r in sorted(train.region.unique()):
    train_regions[r] = train[train.region == r].copy()
    test_regions[r] = test[test.region == r].reset_index(drop=True)
    print r, len(train_regions[r]), len(test_regions[r])

for r in train_regions:
    prophets[r] = Prophet()
    prophets[r].fit(train_regions[r])
    predictions[r] = prophets[r].predict(test_regions[r])
    predictions[r]['mortality_rate'] = predictions[r].yhat

for r in train_regions:
    print predictions[r].head()
    prophets[r].plot(predictions[r])
    #prophets[r].plot_components( predictions[r] )
plt.show()

submissions = []
for r in predictions:
    tmp = predictions[r][['Id', 'mortality_rate']]
    submissions.append(tmp)
示例#49
0
文件: fbp_helpers.py 项目: omok12/Wow
class ProphetProfit:
    def __init__(self, engine, query, item):
        self.engine = engine
        self.query = query
        self.item = item
        self.df = None
        self.data = None
        self.m = None
        self.lmbda = None
        self.forecast = None
        self.ma = None
        self.item_list = None
        self.positive_trend = []
        self.negative_trend = []
        self.profit = pd.DataFrame()

    def sql_call(self):
        self.df = pd.read_sql(self.query, self.engine)
        self.df = self.df.sort_values(by='when')

    def prophet_fit(self, periods=31):
        mask = self.df['name_enus'] == self.item
        self.data = self.df[mask][['when', 'priceavg']].rename(columns={
            'when': 'ds',
            'priceavg': 'y'
        })
        self.data['ds'] = pd.to_datetime(self.data['ds'])
        # remove outliers
        std = self.data['y'].std() * 1.5
        mean = self.data['y'].mean()
        self.data = self.data[(self.data['y'] < mean + std)
                              & (self.data['y'] > mean - std)]
        # box-cox transformation
        # yt, self.lmbda = stats.boxcox(self.data['y'])
        # self.data['y'] = yt
        # fit
        self.m = Prophet(n_changepoints=20)
        self.m.add_seasonality(period=30.4, fourier_order=5, name='monthly')
        self.m.fit(self.data)
        future = self.m.make_future_dataframe(periods)
        self.forecast = self.m.predict(future)
        # create moving average colunmn
        # self.forecast['yhat'] = inv_boxcox(self.forecast['yhat'], self.lmbda)
        # self.data['y'] = inv_boxcox(self.data['y'], self.lmbda)
        self.ma = pd.concat([
            self.data['y'].reset_index(drop=True),
            self.forecast[['ds', 'yhat', 'trend']]
        ],
                            axis=1)
        self.ma['7day'] = self.ma['trend'].rolling(7).mean()
        self.ma.loc[(self.ma['trend'] > self.ma['7day']), 'trend_pos'] = 1
        self.ma.loc[(self.ma['trend'] < self.ma['7day']), 'trend_pos'] = -1

    def plot(self):
        self.sql_call()
        self.prophet_fit()
        fig1 = self.m.plot(self.forecast)
        fig2 = self.m.plot_components(self.forecast)
        plt.show()

    def make_lists(self, buy_date):
        self.sql_call()
        self.item_list = self.df.groupby('name_enus').mean().sort_values(
            'quantityavg')[::-1]
        buy_date = np.datetime64(
            datetime.datetime.strptime(buy_date, '%Y-%m-%d').date())
        for item in self.item_list.index:
            self.item = item
            if self.profit.shape[1] < 10:
                self.prophet_fit()
                if self.ma['trend_pos'].iloc[-31:-39:-1].sum() > 1:
                    try:
                        buy = self.data[self.data['ds'] ==
                                        buy_date]['y'].values[0]
                    except:
                        buy = self.data[self.data['ds'] == (
                            buy_date - np.timedelta64(1, 'D'))]['y'].values[0]
                    profit_temp = self.forecast[
                        self.forecast['ds'] > buy_date][[
                            'ds', 'yhat'
                        ]].reset_index(drop=True)
                    profit_temp['buy'] = buy
                    profit_temp[
                        item] = profit_temp['yhat'] - profit_temp['buy']
                    self.profit = pd.concat([self.profit, profit_temp[item]],
                                            axis=1)
                elif self.ma['trend_pos'].iloc[-31:-39:-1].sum() < -1:
                    self.negative_trend.append(self.item)
            else:
                pickle.dump(self.profit, open('../data/profit_df.pkl', 'wb'))
                break

    def cross_val(self):
        df_cv = cross_validation(self.m,
                                 initial='62 days',
                                 period='1 days',
                                 horizon='7 days')
        # for col in ['yhat', 'yhat_lower', 'yhat_upper', 'y']:
        #     df_cv[col] = inv_boxcox(df_cv[col], lmbda)
        print(df_cv.sort_values('ds').tail())
        df_p = performance_metrics(df_cv)
        print(df_p)

    def mabp_random(self):
        df = pd.read_pickle('../data/profit_df.pkl')
        df.fillna(df.mean())
        scaler = RobustScaler().fit(df)
        df = scaler.transform(df)
        N = df.shape[0]
        d = df.shape[1]
        selected = []
        total_reward = 0
        total_profit = 0
        for n in range(0, N):
            item = random.randrange(d)
            selected.append(item)
            reward = df[n, item]
            profit = scaler.inverse_tranform(df)[n, item]
            total_reward = total_reward + reward
            total_profit = total_profit + profit
        return pd.Series(selected).value_counts(normalize=True)

    def mapb_ucb(self):
        df = pd.read_pickle('../data/profit_df.pkl')
        df.fillna(df.mean())
        scaler = RobustScaler().fit(df)
        df = scaler.transform(df)
        N = df.shape[0]
        d = df.shape[1]
        selected = []
        numbers_of_selections = [0] * d
        sums_of_reward = [0] * d
        total_reward = 0
        total_profit = 0

        for n in range(0, N):
            item = 0
            max_upper_bound = 0
            for i in range(0, d):
                if (numbers_of_selections[i] > 0):
                    average_reward = sums_of_reward[i] / numbers_of_selections[
                        i]
                    delta_i = math.sqrt(2 * math.log(n + 1) /
                                        numbers_of_selections[i])
                    upper_bound = average_reward + delta_i
                else:
                    upper_bound = 1e400
                if upper_bound > max_upper_bound:
                    max_upper_bound = upper_bound
                    item = i
            selected.append(item)
            numbers_of_selections[item] += 1
            reward = df[n, item]
            profit = scaler.inverse_tranform(df)[n, item]
            sums_of_reward[item] += reward
            total_reward += reward
            total_profit += profit
        return pd.Series(selected).value_counts(normalize=True)
# plot the avocado prices vs. regions for organic avocados
organic = sns.catplot('AveragePrice',
                      'region',
                      data=df[df['type'] == 'organic'],
                      hue='year',
                      height=20)

# # TASK 4: PREPARE THE DATA BEFORE APPLYING FACEBOOK PROPHET TOOL
df_sample = df[['Date', 'AveragePrice']]
df_sample
df_sample = df_sample.rename(columns={'Date': 'ds', 'AveragePrice': 'y'})
df_sample

# # TASK 5: DEVELOP MODEL AND MAKE PREDICTIONS - PART A

m = Prophet()
m.fit(df_sample)

# Forcasting into the future
future = m.make_future_dataframe(periods=365)
forecast = m.predict(future)
forecast

figure = m.plot(forecast, xlabel='Date', ylabel='Price')
figure2 = m.plot_components(forecast)

# # TASK 6: DEVELOP MODEL AND MAKE PREDICTIONS (REGION SPECIFIC) - PART B

# Select specific region
df_r1 = df[df['region'] == 'West']
df_r2 = df[df['region'] == 'Chicago']
示例#51
0
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from pandas_datareader import DataReader
import datetime as dt
from fbprophet import Prophet

ticker = 'AAPL'
num_of_years = 20
start = dt.datetime.now() - dt.timedelta(int(365.25 * num_of_years))
now = dt.datetime.now() 

data = DataReader(ticker, 'yahoo', start, now)
data = data.reset_index()

data = data[["Date","Close"]]
data = data.rename(columns = {"Date":"ds","Close":"y"}) 

m = Prophet(daily_seasonality = True) 
m.fit(data) 

future = m.make_future_dataframe(periods=30) 
prediction = m.predict(future)
m.plot(prediction)
plt.title(f"Prediction of the {ticker}'s Stock Price using the Prophet")
plt.xlabel("Date")
plt.ylabel("Close Price")
plt.show()
示例#52
0
    def test_holidays(self):
        holidays = pd.DataFrame({
            'ds': pd.to_datetime(['2016-12-25']),
            'holiday': ['xmas'],
            'lower_window': [-1],
            'upper_window': [0],
        })
        model = Prophet(holidays=holidays)
        df = pd.DataFrame({'ds': pd.date_range('2016-12-20', '2016-12-31')})
        feats, priors, names = model.make_holiday_features(
            df['ds'], model.holidays)
        # 11 columns generated even though only 8 overlap
        self.assertEqual(feats.shape, (df.shape[0], 2))
        self.assertEqual((feats.sum(0) - np.array([1.0, 1.0])).sum(), 0)
        self.assertEqual(priors, [10., 10.])  # Default prior
        self.assertEqual(names, ['xmas'])

        holidays = pd.DataFrame({
            'ds': pd.to_datetime(['2016-12-25']),
            'holiday': ['xmas'],
            'lower_window': [-1],
            'upper_window': [10],
        })
        m = Prophet(holidays=holidays)
        feats, priors, names = m.make_holiday_features(df['ds'], m.holidays)
        # 12 columns generated even though only 8 overlap
        self.assertEqual(feats.shape, (df.shape[0], 12))
        self.assertEqual(priors, list(10. * np.ones(12)))
        self.assertEqual(names, ['xmas'])
        # Check prior specifications
        holidays = pd.DataFrame({
            'ds':
            pd.to_datetime(['2016-12-25', '2017-12-25']),
            'holiday': ['xmas', 'xmas'],
            'lower_window': [-1, -1],
            'upper_window': [0, 0],
            'prior_scale': [5., 5.],
        })
        m = Prophet(holidays=holidays)
        feats, priors, names = m.make_holiday_features(df['ds'], m.holidays)
        self.assertEqual(priors, [5., 5.])
        self.assertEqual(names, ['xmas'])
        # 2 different priors
        holidays2 = pd.DataFrame({
            'ds':
            pd.to_datetime(['2012-06-06', '2013-06-06']),
            'holiday': ['seans-bday'] * 2,
            'lower_window': [0] * 2,
            'upper_window': [1] * 2,
            'prior_scale': [8] * 2,
        })
        holidays2 = pd.concat((holidays, holidays2), sort=True)
        m = Prophet(holidays=holidays2)
        feats, priors, names = m.make_holiday_features(df['ds'], m.holidays)
        pn = zip(priors, [s.split('_delim_')[0] for s in feats.columns])
        for t in pn:
            self.assertIn(t, [(8., 'seans-bday'), (5., 'xmas')])
        holidays2 = pd.DataFrame({
            'ds':
            pd.to_datetime(['2012-06-06', '2013-06-06']),
            'holiday': ['seans-bday'] * 2,
            'lower_window': [0] * 2,
            'upper_window': [1] * 2,
        })
        holidays2 = pd.concat((holidays, holidays2), sort=True)
        feats, priors, names = Prophet(
            holidays=holidays2,
            holidays_prior_scale=4).make_holiday_features(df['ds'], holidays2)
        self.assertEqual(set(priors), {4., 5.})
        # Check incompatible priors
        holidays = pd.DataFrame({
            'ds':
            pd.to_datetime(['2016-12-25', '2016-12-27']),
            'holiday': ['xmasish', 'xmasish'],
            'lower_window': [-1, -1],
            'upper_window': [0, 0],
            'prior_scale': [5., 6.],
        })
        with self.assertRaises(ValueError):
            Prophet(holidays=holidays).make_holiday_features(
                df['ds'], holidays)
示例#53
0
 def test_custom_seasonality(self):
     holidays = pd.DataFrame({
         'ds': pd.to_datetime(['2017-01-02']),
         'holiday': ['special_day'],
         'prior_scale': [4.],
     })
     m = Prophet(holidays=holidays)
     m.add_seasonality(name='monthly',
                       period=30,
                       fourier_order=5,
                       prior_scale=2.)
     self.assertEqual(
         m.seasonalities['monthly'],
         {
             'period': 30,
             'fourier_order': 5,
             'prior_scale': 2.,
             'mode': 'additive',
             'condition_name': None
         },
     )
     with self.assertRaises(ValueError):
         m.add_seasonality(name='special_day', period=30, fourier_order=5)
     with self.assertRaises(ValueError):
         m.add_seasonality(name='trend', period=30, fourier_order=5)
     m.add_seasonality(name='weekly', period=30, fourier_order=5)
     # Test fourier order <= 0
     m = Prophet()
     with self.assertRaises(ValueError):
         m.add_seasonality(name='weekly', period=7, fourier_order=0)
     with self.assertRaises(ValueError):
         m.add_seasonality(name='weekly', period=7, fourier_order=-1)
     # Test priors
     m = Prophet(
         holidays=holidays,
         yearly_seasonality=False,
         seasonality_mode='multiplicative',
     )
     m.add_seasonality(name='monthly',
                       period=30,
                       fourier_order=5,
                       prior_scale=2.,
                       mode='additive')
     m.fit(DATA.copy())
     self.assertEqual(m.seasonalities['monthly']['mode'], 'additive')
     self.assertEqual(m.seasonalities['weekly']['mode'], 'multiplicative')
     seasonal_features, prior_scales, component_cols, modes = (
         m.make_all_seasonality_features(m.history))
     self.assertEqual(sum(component_cols['monthly']), 10)
     self.assertEqual(sum(component_cols['special_day']), 1)
     self.assertEqual(sum(component_cols['weekly']), 6)
     self.assertEqual(sum(component_cols['additive_terms']), 10)
     self.assertEqual(sum(component_cols['multiplicative_terms']), 7)
     if seasonal_features.columns[0] == 'monthly_delim_1':
         true = [2.] * 10 + [10.] * 6 + [4.]
         self.assertEqual(sum(component_cols['monthly'][:10]), 10)
         self.assertEqual(sum(component_cols['weekly'][10:16]), 6)
     else:
         true = [10.] * 6 + [2.] * 10 + [4.]
         self.assertEqual(sum(component_cols['weekly'][:6]), 6)
         self.assertEqual(sum(component_cols['monthly'][6:16]), 10)
     self.assertEqual(prior_scales, true)
示例#54
0
@author: batesc

Takes a .csv of hourly feeder loading data and projects it out for max and min 
values for a year. To be ran in a time-series load flow analysis.

"""

import pandas as pd
from fbprophet import Prophet

# instantiate the model and set parameters
model = Prophet(changepoint_prior_scale=0.01,
                interval_width=0.95,
                growth='linear',
                daily_seasonality=True,
                weekly_seasonality=False,
                yearly_seasonality=True,
                seasonality_mode='additive')

history_pd = pd.read_csv("load.csv")

# fit the model to historical data
model.fit(history_pd)

# projects over 8760 hours - 1 year
future_pd = model.make_future_dataframe(periods=8760,
                                        freq='H',
                                        include_history=True)

# predict over the dataset
示例#55
0
 def test_auto_daily_seasonality(self):
     # Should be enabled
     m = Prophet()
     self.assertEqual(m.daily_seasonality, 'auto')
     m.fit(DATA2)
     self.assertIn('daily', m.seasonalities)
     self.assertEqual(
         m.seasonalities['daily'],
         {
             'period': 1,
             'fourier_order': 4,
             'prior_scale': 10.,
             'mode': 'additive',
             'condition_name': None
         },
     )
     # Should be disabled due to too short history
     N = 430
     train = DATA2.head(N)
     m = Prophet()
     m.fit(train)
     self.assertNotIn('daily', m.seasonalities)
     m = Prophet(daily_seasonality=True)
     m.fit(train)
     self.assertIn('daily', m.seasonalities)
     m = Prophet(daily_seasonality=7, seasonality_prior_scale=3.)
     m.fit(DATA2)
     self.assertEqual(
         m.seasonalities['daily'],
         {
             'period': 1,
             'fourier_order': 7,
             'prior_scale': 3.,
             'mode': 'additive',
             'condition_name': None
         },
     )
     m = Prophet()
     m.fit(DATA)
     self.assertNotIn('daily', m.seasonalities)
示例#56
0
        '2016-02-07', '2016-02-08', '2016-02-09', '2016-02-10', '2016-02-11',
        '2016-02-12', '2016-02-13', '2017-01-27', '2017-01-28', '2017-01-29',
        '2017-01-30', '2017-01-31', '2017-02-01', '2017-02-02'
    ]),
    'lower_window':
    0,
    'upper_window':
    0,
})
holidays = pd.concat((playoffs, superbowls))
for i in user:
    df2 = generatedata(df, i)
    # plt.figure(1)
    # plt.plot(df2['ds'], df2['y'])
    # plt.grid(True)
    prophet = Prophet()  #,yearly_seasonality=True holidays=holidays
    prophet.fit(df2)
    future = prophet.make_future_dataframe(
        periods=90)  #, include_history=False
    df_cv = cross_validation(prophet,
                             '90 days',
                             initial='270 days',
                             period='90 days')
    print(df_cv)
    plt.figure(1)
    plt.plot(df_cv['ds'], df_cv['y'])
    plt.plot(df_cv['ds'], df_cv['yhat'])
    plt.grid(True)
    plt.show()
    break
    forecast = prophet.predict(future)
示例#57
0
 def test_auto_weekly_seasonality(self):
     # Should be enabled
     N = 15
     train = DATA.head(N)
     m = Prophet()
     self.assertEqual(m.weekly_seasonality, 'auto')
     m.fit(train)
     self.assertIn('weekly', m.seasonalities)
     self.assertEqual(
         m.seasonalities['weekly'],
         {
             'period': 7,
             'fourier_order': 3,
             'prior_scale': 10.,
             'mode': 'additive',
             'condition_name': None
         },
     )
     # Should be disabled due to too short history
     N = 9
     train = DATA.head(N)
     m = Prophet()
     m.fit(train)
     self.assertNotIn('weekly', m.seasonalities)
     m = Prophet(weekly_seasonality=True)
     m.fit(train)
     self.assertIn('weekly', m.seasonalities)
     # Should be False due to weekly spacing
     train = DATA.iloc[::7, :]
     m = Prophet()
     m.fit(train)
     self.assertNotIn('weekly', m.seasonalities)
     m = Prophet(weekly_seasonality=2, seasonality_prior_scale=3.)
     m.fit(DATA)
     self.assertEqual(
         m.seasonalities['weekly'],
         {
             'period': 7,
             'fourier_order': 2,
             'prior_scale': 3.,
             'mode': 'additive',
             'condition_name': None
         },
     )
示例#58
0
# from the prophet documentation every variables should have specific names
volume = volume.rename(columns={'date': 'ds', 'volume': 'y'})
volume.head()

# In[ ]:

# plot daily sales
ax = volume.set_index('ds').plot(figsize=(12, 4), color=c)
ax.set_ylabel('Daily volume of A.N')
ax.set_xlabel('Date')
plt.show()

# In[ ]:

# set the uncertainty interval to 95% (the Prophet default is 80%)
my_model = Prophet(interval_width=0.95)
my_model.fit(volume)

# dataframe that extends into future 6 weeks
future_dates = my_model.make_future_dataframe(periods=1)

print("First day to forecast.")
future_dates

# predictions
forecast = my_model.predict(future_dates)

# preditions for last week
forecast[['ds', 'yhat', 'yhat_lower', 'yhat_upper']]

# In[ ]:
示例#59
0
      'lower_window': 0,
      'upper_window': 1,
    })
else:
    extra_holiday = False

#changepoints=['2020-03-17', '2020-05-11', '2020-07-28', '2008-09-14']
start_time = time.time()
pro = Prophet(growth = param['growth'],
              #default = 'linear'
              yearly_seasonality = param['yearly_seasonality'],
              #default = True
              weekly_seasonality = param['weekly_seasonality'],
              #default = True
              daily_seasonality = param['daily_seasonality'],
              #default = True
              holidays = extra_holiday,
              #default = None
              changepoint_range = param['changepoint_range'],
              #default = 0.8
              changepoint_prior_scale = param['changepoint_prior_scale']
              #default = 0.5
              )
pro.add_country_holidays(country_name = param['country_name'])
pro.fit(Data)
end_time = time.time()
print("Fitting time:", end_time - start_time)

future = pro.make_future_dataframe(periods = days2predict)
forecast = pro.predict(future)
fig1 = pro.plot(forecast)
示例#60
0
def my_form_post():
    shop_Id = request.form['shop_id']
    week_No = request.form['week_no']
    ShopID = int(shop_Id)
    WeekNo = int(week_No)

    # %matplotlib inline

    #plt.rcParams['figure.figsize'] = (20, 10)
    plt.style.use('ggplot')

    data_f = pd.read_csv(
        'C:/weekly sales and labour cost for all shops 2013 to 20177.csv')
    data = pd.read_csv(
        'C:/weekly sales and labour cost for all shops 2013 to 20177.csv',
        index_col='start_date',
        parse_dates=True)
    # shopID = input("Enter your shop id")
    shopID1 = ShopID
    # if shopID1<min(data.shop_id) or shopID1>max(data.shop_id):
    # print("Enter correct shop id number")
    # return select_model()
    WeekNo1 = WeekNo

    data2 = data[[
        'sales_id', 'shop_id', 'week_no', 'sales_amount', 'item_sold',
        'transactions', 'total_tax', 'sales_status'
    ]]
    df1 = data2[data2.shop_id == shopID1]  # input №1
    df2 = df1[df1.sales_status != 0]
    df2.week_no.isnull().values.any()
    nulldetect = df1.week_no.isnull()
    nulldetect[nulldetect == True].index
    df2.week_no.loc[nulldetect == True] = 54
    df2['week_no'] = df2.week_no - 2
    if len(df2.week_no) > 51:

        dff = df2[['sales_amount']]
        data3 = dff.reset_index()
        data4 = data3

        data5 = data4.rename(columns={'start_date': 'ds', 'sales_amount': 'y'})
        data5.set_index('ds')
        data5 = data5.replace([np.inf, -np.inf], np.nan).fillna(0)
        # y.plot()
        data5['y'] = np.log(data5['y'])
        data5.set_index('ds')
        model = Prophet()
        model.fit(data5)
        future = model.make_future_dataframe(periods=52, freq='w')
        forecast = model.predict(future)
        data5.set_index('ds', inplace=True)
        forecast.set_index('ds', inplace=True)
        viz_df = dff.join(forecast[['yhat', 'yhat_lower', 'yhat_upper']],
                          how='outer')
        viz_df['yhat_rescaled'] = np.exp(viz_df['yhat'])
        dff.index = pd.to_datetime(
            dff.index)  # make sure our index as a datetime object
        connect_date = dff.index[-2]  # select the 2nd to last date
        mask = (forecast.index > connect_date)
        predict_df = forecast.loc[mask]
        viz_df = dff.join(predict_df[['yhat', 'yhat_lower', 'yhat_upper']],
                          how='outer')
        viz_df['yhat_scaled'] = np.exp(viz_df['yhat'])
        ii = len(dff.sales_amount) - 1
        viz_df.yhat_scaled[ii:]
        predicted_future_sales = pd.DataFrame(viz_df.yhat_scaled[ii:])
        predicted_future_sales1 = predicted_future_sales.rename(
            columns={'yhat_scaled': 'future_sales'})
        predicted_future_sales2 = predicted_future_sales1.reset_index()
        week_no = predicted_future_sales2['index'].dt.week
        future_sales = predicted_future_sales2['future_sales']
        future_sales1 = round(future_sales, 2)
        start_date = predicted_future_sales2['index']
        predict_data = {
            'future_sales': future_sales1,
            'week_no': week_no,
            'start_date': start_date
        }
        predict_data1 = pd.DataFrame(predict_data)

        predict_data2 = predict_data1.set_index('start_date')
        frames = [df2.sales_amount, predict_data2.future_sales]
        join = pd.concat(frames)
        detrend_sdata = signal.detrend(join)
        trend = join - detrend_sdata

        p2 = predict_data1.set_index('start_date')
        r = []
        for jj in pd.DataFrame(
                df2.index.year.values).drop_duplicates().index.values:
            sale_year = df2.sales_amount[str(
                int(pd.DataFrame(
                    df2.index.year).drop_duplicates().loc[jj]))].mean()
            r.append(sale_year)
        years = pd.DataFrame(
            df2.index.year).drop_duplicates().start_date.values
        holday = []
        for t in years[0:len(years) - 1]:
            h = df2.sales_amount[df2.week_no >= 50][str(t)].mean(
            ) + df2.sales_amount[df2.week_no <= 3][str(int(t) + 1)].mean()
            holday.append(h / 2)
        year_last = p2.future_sales[p2.week_no >= 50][str(years[-1])].mean(
        ) + p2.future_sales[p2.week_no <= 3].mean()  # 2018
        holday.append(year_last / 2)
        N = len(r)
        Holiday_Means = holday
        All_Year_Means = r
        ind = np.arange(N)

        avg_sale = round(df2.sales_amount.mean(), 2)
        maxSale = round(max(df2.sales_amount), 2)
        minSale = round(min(df2.sales_amount), 2)
        itemTrans = round((df2.item_sold / df2.transactions).mean(), 2)

        fig, ax1 = plt.subplots(figsize=(7, 4))
        ax1.plot(viz_df.sales_amount)
        ax1.plot(viz_df.yhat_scaled, color='green')
        ax1.plot(join.index, trend, color='blue', alpha=0.5, label='Trend')
        #ax1.plot(join.index, trend, color='blue', alpha=0.5, label='Trend')
        #ax1.fill_between(viz_df.index, np.exp(viz_df['yhat_upper']), np.exp(viz_df['yhat_lower']), alpha=0.5,
        #color='darkgray')
        ax1.set_title('Sales (Orange) vs Sales Forecast (Green) for shop ' +
                      str(shopID1))
        ax1.set_ylabel('Sales amount')
        ax1.set_xlabel('Dates')

        L = ax1.legend()  # get the legend
        L.get_texts()[0].set_text(
            'Actual Sales')  # change the legend text for 1st plot
        L.get_texts()[1].set_text(
            'Forecasted Sales')  # change the legend text for 2nd plot
        graph = mpld3.fig_to_html(fig)

        fig, ax2 = plt.subplots(figsize=(7, 4))
        bar_width = 0.4
        opacity = 0.8
        bar1 = ax2.bar(ind,
                       Holiday_Means,
                       bar_width,
                       opacity,
                       label='Holidays')
        bar2 = ax2.bar(ind + bar_width,
                       All_Year_Means,
                       bar_width,
                       opacity,
                       label='Avg sales per year')
        ticks = pd.DataFrame(
            df2.index.year).drop_duplicates().start_date.values

        ax2.set_ylabel('Sales_amount')
        ax2.set_title(
            'Holiday sales (Xmas & NY) vs Average sales per year (shop #%s)' %
            shopID1)
        plt.xticks(ind + 0.25, ticks)
        ax2.legend()
        graph1 = mpld3.fig_to_html(fig)

        f_sale = str(
            float(predict_data1.future_sales[predict_data1.week_no ==
                                             WeekNo1].values[0]))
        n_week = WeekNo1
        id_shop = shopID1
        sale_mean = avg_sale
        max_sale = maxSale
        min_sale = minSale
        item_trans = itemTrans

    else:

        a = df2[[
            'sales_amount', 'shop_id', 'week_no', 'transactions', 'item_sold'
        ]]
        y = a.iloc[:, 0]
        x = a.iloc[:, 3:5]
        # print (df2)
        from sklearn import linear_model
        regr2 = linear_model.LinearRegression()
        X1 = x
        y1 = y
        regr2.fit(X1, y1)
        y_predictions = regr2.predict(X1)
        y_predictions1 = pd.DataFrame(y_predictions)
        d = {'actual sales': y, 'predicted sales': y_predictions1}
        d1 = np.array(d)

        dates = pd.date_range(y.index[-1],
                              periods=52,
                              freq='W-MON',
                              format='%Y-%m-%d')
        dates1 = pd.DataFrame(dates)

        mean_week_item = []
        for i in dates.week:
            mean_item_sold = a.item_sold[a.week_no == i].mean()
            mean_week_item.append(mean_item_sold)
        mean_week_item1 = pd.DataFrame(mean_week_item)

        trans_week_item = []
        for i1 in dates.week:
            mean_trans_sold = a.transactions[a.week_no == i1].mean()
            trans_week_item.append(mean_trans_sold)

        sales_week = []
        for ii1 in dates.week:
            mean_sales_sold = a.sales_amount[a.week_no == ii1].mean()
            sales_week.append(mean_sales_sold)

        dd = {
            'date': dates,
            'weeks_no': dates.week,
            'sales': sales_week,
            'mean_item': mean_week_item,
            'mean_trans': trans_week_item
        }
        dd1 = pd.DataFrame(dd)

        dff1 = data_f[data_f.sales_status != 0]
        nulldetect = dff1.week_no.isnull()
        dff1.week_no.loc[nulldetect == True] = 54
        dff1['week_no'] = dff1.week_no - 2
        X_Cluster = dff1[['shop_id', 'sales_amount']]
        from sklearn.cluster import KMeans
        kmeans_model = KMeans(n_clusters=3, random_state=8).fit(X_Cluster)
        y_hat = kmeans_model.labels_  # clusters
        cen = kmeans_model.cluster_centers_
        y_hat1 = pd.DataFrame(y_hat)
        group_low_sales = X_Cluster[y_hat == 0]
        group_middle_sales = X_Cluster[y_hat == 2]
        group_high_sales = X_Cluster[y_hat == 1]

        fff = []
        for j in X_Cluster.shop_id:
            dfdf = X_Cluster.sales_amount[X_Cluster.shop_id == j].mean()
            fff.append(dfdf)
        f3 = pd.DataFrame(X_Cluster.shop_id.drop_duplicates())
        f4 = pd.DataFrame(fff)
        f5 = f4.drop_duplicates()
        f3['salle'] = f5.values

        # from sklearn.cluster import KMeans
        Xx2 = f3[['shop_id', 'salle']]
        kmeans_model2 = KMeans(n_clusters=3, random_state=8).fit(Xx2)
        y_hat2 = kmeans_model2.labels_  # clusters
        cen2 = kmeans_model2.cluster_centers_

        group_middle_sales2 = Xx2[y_hat2 == 0]
        group_high_sales2 = Xx2[y_hat2 == 2]
        group_low_sales2 = Xx2[y_hat2 == 1]

        nullweeks = dd1.weeks_no[dd1.mean_trans.isnull() == True]

        if (group_low_sales2.shop_id.values == shopID1).any() == True:
            cx = int(group_low_sales.sales_amount[group_low_sales.shop_id ==
                                                  shopID1].values.mean())
            trt = group_low_sales[group_low_sales.sales_amount > cx - 3000]
            trt2 = trt[trt.sales_amount < cx + 3000]
            valid_cls = dff1[[
                'sales_amount', 'item_sold', 'transactions', 'week_no'
            ]].loc[trt2.index.values]
            #print("Cluster of shop %s is low sales" % shopID1)
        elif (group_middle_sales2.shop_id.values == shopID1).any() == True:
            # valid_cls=dff1[['sales_amount','item_sold','transactions','week_no']].loc[group_middle_sales.shop_id.index.values]
            cx = int(group_middle_sales.sales_amount[group_middle_sales.shop_id
                                                     == shopID1].values.mean())
            trt = group_middle_sales[group_middle_sales.sales_amount > cx -
                                     3000]
            trt2 = trt[trt.sales_amount < cx + 3000]
            valid_cls = dff1[[
                'sales_amount', 'item_sold', 'transactions', 'week_no'
            ]].loc[trt2.index.values]
            #print("Cluster of shop %s is average sales" % shopID1)
        elif (group_high_sales2.shop_id.values == shopID1).any() == True:
            # valid_cls=dff1[['sales_amount','item_sold','transactions','week_no']].loc[group_high_sales.shop_id.index.values]
            cx = int(group_high_sales.sales_amount[group_high_sales.shop_id ==
                                                   shopID1].values.mean())
            trt = group_high_sales[group_high_sales.sales_amount > cx - 4000]
            trt2 = trt[trt.sales_amount < cx + 4000]
            valid_cls = dff1[[
                'sales_amount', 'item_sold', 'transactions', 'week_no'
            ]].loc[trt2.index.values]
            #print("Cluster of shop %s is high sales" % shopID1)
        drr = valid_cls
        #print('Avg sales per week for whole period ',
        avg_sale = round(df2.sales_amount.mean(),
                         2)  # avg sales per week for whole period
        # avg_items_week=round(df2.item_sold[df2.week_no==17].mean(),2)# avg items for input week
        #print('Avg items sold per week for whole period ',
        #round(df2.item_sold.mean(), 2) # avg items per week for whole period
        # avg_trans_week=round(df2.transactions[df2.week_no==17].mean(),2)# avg trans for input week
        #print('Avg trans per week for whole period ',
        #round(df2.transactions.mean(), 2) # avg trans per week for whole period
        # avg_item_per_trans=round((df2.item_sold[df2.week_no==17]/df2.transactions[df2.week_no==17]).mean(),2)#items per transactions w
        itemTrans = round((df2.item_sold / df2.transactions).mean(), 2)
        # max_w=round(max(df2.sales_amount[df2.week_no==17]),2)
        # min_w=round(min(df2.sales_amount[df2.week_no==17]),2)
        maxSale = round(max(df2.sales_amount), 2)
        minSale = round(min(df2.sales_amount), 2)
        # worst=df2.week_no[df2.sales_amount>min(df2.sales_amount)]
        #df2[['week_no', 'sales_amount']][(df2.sales_amount >= min(df2.sales_amount)) & (df2.sales_amount <= min(df2.sales_amount) + 1500)])
        #df2[['week_no', 'sales_amount']][(df2.sales_amount <= max(df2.sales_amount)) & (df2.sales_amount >= max(df2.sales_amount) - 3000)])
        #print('Price of trans ', round((df2.sales_amount / df2.transactions).mean(), 2))
        #print('Price of item ', round((df2.sales_amount / df2.item_sold).mean(), 2))
        itt = []
        trr = []
        sale = []
        for i3 in nullweeks:
            item = drr.item_sold[drr.week_no == i3].mean()
            trans = drr.transactions[drr.week_no == i3].mean()
            salee = drr.sales_amount[drr.week_no == i3].mean()
            itt.append(item)
            trr.append(trans)
            sale.append(salee)
        df_insert = {
            'sales_amountt': sale,
            'ittem': itt,
            'trans': trr,
            'weeks_no': nullweeks
        }
        df_insert1 = pd.DataFrame(df_insert)
        forecastdf = dd1.fillna({
            'mean_item': df_insert1.ittem,
            'mean_trans': df_insert1.trans,
            'sales': df_insert1.sales_amountt
        })
        regr3 = linear_model.LinearRegression()
        X = forecastdf[['mean_item', 'mean_trans']]
        Y = forecastdf.sales
        regr3.fit(X, Y)
        y_predictionss = regr3.predict(X)
        y_predictionss1 = pd.DataFrame(y_predictionss)
        forecastdf['future_sales1'] = y_predictionss1.values
        f1 = forecastdf.set_index('date')
        frames1 = [df2.sales_amount, f1.future_sales1]
        join1 = pd.concat(frames1)
        detrend_sdata1 = signal.detrend(join1)
        trend1 = join1 - detrend_sdata1

        r1 = []
        for jj1 in pd.DataFrame(
                df2.index.year.values).drop_duplicates().index.values:
            sale_year1 = df2.sales_amount[str(
                int(pd.DataFrame(
                    df2.index.year).drop_duplicates().loc[jj1]))].mean()
            r1.append(sale_year1)
        years1 = pd.DataFrame(
            df2.index.year).drop_duplicates().start_date.values
        holday1 = []
        for t1 in years1[0:len(years1) - 1]:
            h1 = df2.sales_amount[df2.week_no >= 50][str(t1)].mean(
            ) + df2.sales_amount[df2.week_no <= 3][str(int(t1) + 1)].mean()
            holday1.append(h1 / 2)
        year_last1 = f1.future_sales1[f1.weeks_no >= 50][str(years1[-1])].mean(
        ) + f1.future_sales1[f1.weeks_no <= 3].mean()  # 2018
        holday1.append(year_last1 / 2)
        N1 = len(r1)
        Holiday_Means1 = holday1
        All_Year_Means1 = r1
        ind1 = np.arange(N1)

        f_sale = int(
            forecastdf.future_sales1[forecastdf.weeks_no == WeekNo1].values)

        n_week = WeekNo1
        id_shop = shopID1
        sale_mean = avg_sale
        max_sale = maxSale
        min_sale = minSale
        item_trans = itemTrans
        # print(y.index)
        fig3, ax3 = plt.subplots(figsize=(7, 4))
        # dates = pd.date_range(y.index[0], periods=104, freq='W-MON',format='%Y-%m-%d')
        # plt.plot(y.index,y,color='blue',label="actual sales")
        ax3.plot(y.index, a.sales_amount, color='red', label="actual sales")
        ax3.plot(dates,
                 y_predictionss1,
                 color='green',
                 label="forecasted sales")
        ax3.plot(join1.index, trend1, color='blue', alpha=0.5, label='Trend')
        ax3.set_title(
            'Comparison actual and predicted sales for whole period of shop ' +
            str(shopID1) + '\n')
        ax3.set_xlabel('Weeks')
        ax3.set_ylabel('Sales amount')
        ax3.legend()
        graph = mpld3.fig_to_html(fig3)

        fig4, ax4 = plt.subplots(figsize=(7, 4))
        bar_width1 = 0.4
        opacity1 = 0.8
        ax4.bar(ind1, Holiday_Means1, bar_width1, opacity1, label='Holidays')
        ax4.bar(ind1 + bar_width1,
                All_Year_Means1,
                bar_width1,
                opacity1,
                label='Avg sales per year')

        ax4.set_ylabel('Sales_amount')
        ax4.set_title(
            'Holiday sales (Xmas & NY) vs Average sales per year (shop #%s)' %
            shopID1)
        plt.xticks(
            ind1 + 0.25,
            (pd.DataFrame(df2.index.year).drop_duplicates().start_date.values))
        ax4.legend()
        graph1 = mpld3.fig_to_html(fig4)

    return render_template('index.html',
                           graph1=graph1,
                           graph=graph,
                           value6=itemTrans,
                           value5=min_sale,
                           value4=max_sale,
                           value3=sale_mean,
                           value2=id_shop,
                           value1=n_week,
                           value=f_sale)