Exemplo n.º 1
0
 def test_cross_validation(self):
     m = Prophet()
     m.fit(self.__df)
     # Calculate the number of cutoff points(k)
     horizon = pd.Timedelta('4 days')
     period = pd.Timedelta('10 days')
     initial = pd.Timedelta('115 days')
     df_cv = diagnostics.cross_validation(
         m, horizon='4 days', period='10 days', initial='115 days')
     self.assertEqual(len(np.unique(df_cv['cutoff'])), 3)
     self.assertEqual(max(df_cv['ds'] - df_cv['cutoff']), horizon)
     self.assertTrue(min(df_cv['cutoff']) >= min(self.__df['ds']) + initial)
     dc = df_cv['cutoff'].diff()
     dc = dc[dc > pd.Timedelta(0)].min()
     self.assertTrue(dc >= period)
     self.assertTrue((df_cv['cutoff'] < df_cv['ds']).all())
     # Each y in df_cv and self.__df with same ds should be equal
     df_merged = pd.merge(df_cv, self.__df, 'left', on='ds')
     self.assertAlmostEqual(
         np.sum((df_merged['y_x'] - df_merged['y_y']) ** 2), 0.0)
     df_cv = diagnostics.cross_validation(
         m, horizon='4 days', period='10 days', initial='135 days')
     self.assertEqual(len(np.unique(df_cv['cutoff'])), 1)
     with self.assertRaises(ValueError):
         diagnostics.cross_validation(
             m, horizon='10 days', period='10 days', initial='140 days')
Exemplo n.º 2
0
 def test_performance_metrics(self):
     m = Prophet()
     m.fit(self.__df)
     df_cv = diagnostics.cross_validation(
         m, horizon='4 days', period='10 days', initial='90 days')
     # Aggregation level none
     df_none = diagnostics.performance_metrics(df_cv, rolling_window=0)
     self.assertEqual(
         set(df_none.columns),
         {'horizon', 'coverage', 'mae', 'mape', 'mse', 'rmse'},
     )
     self.assertEqual(df_none.shape[0], 16)
     # Aggregation level 0.2
     df_horizon = diagnostics.performance_metrics(df_cv, rolling_window=0.2)
     self.assertEqual(len(df_horizon['horizon'].unique()), 4)
     self.assertEqual(df_horizon.shape[0], 14)
     # Aggregation level all
     df_all = diagnostics.performance_metrics(df_cv, rolling_window=1)
     self.assertEqual(df_all.shape[0], 1)
     for metric in ['mse', 'mape', 'mae', 'coverage']:
         self.assertEqual(df_all[metric].values[0], df_none[metric].mean())
     # Custom list of metrics
     df_horizon = diagnostics.performance_metrics(
         df_cv, metrics=['coverage', 'mse'],
     )
     self.assertEqual(
         set(df_horizon.columns),
         {'coverage', 'mse', 'horizon'},
     )
def get_predictions(validate, train):
    total_dates = train['date'].unique()
    result = pd.DataFrame(columns=['id', 'unit_sales'])
    problem_pairs = []
    example_items = [510052, 1503899, 2081175, 1047674, 215327, 1239746, 765520, 1463867, 1010755, 1473396]
    store47examples = validate.loc[(validate.store_nbr == 47) & (validate.item_nbr.isin(example_items))]
    print("ONLY PREDICTING ITEMS {} IN STORE NO. 47!".format(example_items))
    for name, y in store47examples.groupby(['item_nbr']):
    # for name, y in validate.groupby(['item_nbr', 'store_nbr']):
        item_nbr=int(name)
        store_nbr = 47
        df = train[(train.item_nbr==item_nbr)&(train.store_nbr==store_nbr)]
        CV_SIZE = 16 #if you make it bigger, fill missing dates in cv with 0 if any
        TRAIN_SIZE = 365
        total_dates = train['date'].unique()
        df = fill_missing_date(df, total_dates)
        df = df.sort_values(by=['date'])
        X = df[-TRAIN_SIZE:]
        X = X[['date','unit_sales']]
        X.columns = ['ds', 'y']
        m = Prophet(yearly_seasonality=True)
        try:
            m.fit(X)
        except ValueError:
            print("problem for this item store pair")
            problem_pairs.append((item_nbr, store_nbr))
            continue
        future = m.make_future_dataframe(periods=CV_SIZE)
        pred = m.predict(future)
        data = pred[['ds','yhat']].tail(CV_SIZE)
        data = pred[['ds','yhat']].merge(y, left_on='ds', right_on='date')
        data['unit_sales'] = data['yhat'].fillna(0).clip(0, 999999)
        result = result.append(data[['id', 'unit_sales']])
    return (result, problem_pairs)
Exemplo n.º 4
0
    def test_logistic_floor(self):
        m = Prophet(growth='logistic')
        N = DATA.shape[0]
        history = DATA.head(N // 2).copy()
        history['floor'] = 10.
        history['cap'] = 80.
        future = DATA.tail(N // 2).copy()
        future['cap'] = 80.
        future['floor'] = 10.
        m.fit(history, algorithm='Newton')
        self.assertTrue(m.logistic_floor)
        self.assertTrue('floor' in m.history)
        self.assertAlmostEqual(m.history['y_scaled'][0], 1.)
        fcst1 = m.predict(future)

        m2 = Prophet(growth='logistic')
        history2 = history.copy()
        history2['y'] += 10.
        history2['floor'] += 10.
        history2['cap'] += 10.
        future['cap'] += 10.
        future['floor'] += 10.
        m2.fit(history2, algorithm='Newton')
        self.assertAlmostEqual(m2.history['y_scaled'][0], 1.)
        fcst2 = m2.predict(future)
        fcst2['yhat'] -= 10.
        # Check for approximate shift invariance
        self.assertTrue((np.abs(fcst1['yhat'] - fcst2['yhat']) < 1).all())
Exemplo n.º 5
0
def run():
    journal = ledger.read_journal("./secret/ledger.dat")
    last_post = None
    amount = 0

    for post in journal.query(""):
        if last_post == None or post.date == last_post.date:
            if str(post.amount.commodity) != "£":
                continue
            amount = amount + post.amount
        else:
            print post.date, ",", amount
            amount = 0
        last_post = post

    df = pd.read_csv('./testing.csv')
    df['y'] = np.multiply(100, df['y'])

    m = Prophet()
    m.fit(df);

    forecast = m.predict(future)
    forecast[['ds', 'yhat', 'yhat_lower', 'yhat_upper']].tail()

    m.plot(forecast);
    m.plot_components(forecast);
Exemplo n.º 6
0
 def test_fit_changepoint_not_in_history(self):
     train = DATA[(DATA['ds'] < '2013-01-01') | (DATA['ds'] > '2014-01-01')]
     train[(train['ds'] > '2014-01-01')] += 20
     future = pd.DataFrame({'ds': DATA['ds']})
     forecaster = Prophet(changepoints=['2013-06-06'])
     forecaster.fit(train)
     forecaster.predict(future)
Exemplo n.º 7
0
def prophetForecast(rawData, startDate, modelDir, partitions):
	"""Forecasting with fbprophet"""
	from fbprophet import Prophet
	from fbprophet.diagnostics import cross_validation

	partitions = int(partitions)
	# initiate model
	prophet = Prophet()

	# put dates in df
	dates = pd.date_range(start=startDate, periods=len(rawData), freq="H")
	input_df = pd.DataFrame(rawData, columns=["y", "temp"])
	input_df["ds"] = dates.to_pydatetime()
	input_df.to_csv(pJoin(modelDir, "prophetin.csv"))

	# give prophet the input data
	with suppress_stdout_stderr():
		prophet.fit(input_df)

		# determine partition length for the cross-validation
	total_hours = len(input_df.ds)
	hp = total_hours // partitions  # horizon and period
	init = total_hours % partitions  # total_hours - hp * (partitions - 1)

	# train prophet w/ those partitions
	# take a moment to appreciate this stupid way to pass the durations
	out_df = cross_validation(
		prophet,
		initial="%d hours" % init,
		horizon="%d hours" % hp,
		period="%d hours" % hp,
	)
	out_df.to_csv(pJoin(modelDir, "prophetout.csv"))
	return (list(out_df.yhat), list(out_df.yhat_lower), list(out_df.yhat_upper))
Exemplo n.º 8
0
    def test_fit_predict(self):
        N = DATA.shape[0]
        train = DATA.head(N // 2)
        future = DATA.tail(N // 2)

        forecaster = Prophet()
        forecaster.fit(train)
        forecaster.predict(future)
Exemplo n.º 9
0
    def test_fit_predict_no_seasons(self):
        N = DATA.shape[0]
        train = DATA.head(N // 2)
        future = DATA.tail(N // 2)

        forecaster = Prophet(weekly_seasonality=False, yearly_seasonality=False)
        forecaster.fit(train)
        forecaster.predict(future)
Exemplo n.º 10
0
    def test_fit_predict_no_changepoints(self):
        N = DATA.shape[0]
        train = DATA.head(N // 2)
        future = DATA.tail(N // 2)

        forecaster = Prophet(n_changepoints=0)
        forecaster.fit(train)
        forecaster.predict(future)
Exemplo n.º 11
0
    def test_fit(self):
        train = pd.DataFrame({
            'ds': np.array(['2012-05-18', '2012-05-20']),
            'y': np.array([38.23, 21.25])
        })

        forecaster = Prophet(mcmc_samples=1)
        forecaster.fit(train)
Exemplo n.º 12
0
 def test_fit_with_holidays(self):
     holidays = pd.DataFrame({
         'ds': pd.to_datetime(['2012-06-06', '2013-06-06']),
         'holiday': ['seans-bday'] * 2,
         'lower_window': [0] * 2,
         'upper_window': [1] * 2,
     })
     model = Prophet(holidays=holidays, uncertainty_samples=0)
     model.fit(DATA).predict()
Exemplo n.º 13
0
 def test_subdaily_holidays(self):
     holidays = pd.DataFrame({
         'ds': pd.to_datetime(['2017-01-02']),
         'holiday': ['special_day'],
     })
     m = Prophet(holidays=holidays)
     m.fit(DATA2)
     fcst = m.predict()
     self.assertEqual(sum(fcst['special_day'] == 0), 575)
Exemplo n.º 14
0
 def test_fit_predict_duplicates(self):
     N = DATA.shape[0]
     train1 = DATA.head(N // 2).copy()
     train2 = DATA.head(N // 2).copy()
     train2['y'] += 10
     train = train1.append(train2)
     future = pd.DataFrame({'ds': DATA['ds'].tail(N // 2)})
     forecaster = Prophet()
     forecaster.fit(train)
     forecaster.predict(future)
Exemplo n.º 15
0
def build_forecast(
        data,
        forecast_range,
        truncate_range=0
):
    """build a forecast for publishing

    Args:
        data (:obj:`pandas.data_frame`): data to build prediction
        forecast_range (int): how much time into the future to forecast
        truncate_range (int, optional): truncate output to CREST_RANGE

    Returns:
        pandas.DataFrame: collection of data + forecast info
            ['date', 'avgPrice', 'yhat', 'yhat_low', 'yhat_high', 'prediction']

    """
    data['date'] = pd.to_datetime(data['date'])
    filter_date = data['date'].max()

    ## Build DataFrame ##
    predict_df = pd.DataFrame()
    predict_df['ds'] = data['date']
    predict_df['y'] = data['avgPrice']

    ## Run prediction ##
    # https://facebookincubator.github.io/prophet/docs/quick_start.html#python-api
    model = Prophet()
    model.fit(predict_df)
    future = model.make_future_dataframe(periods=forecast_range)
    tst = model.predict(future)

    predict_df = pd.merge(
        predict_df, model.predict(future),
        on='ds',
        how='right'
    )

    ## Build report for endpoint ##
    report = pd.DataFrame()
    report['date'] = pd.to_datetime(predict_df['ds'], format='%Y-%m-%d')
    report['avgPrice'] = predict_df['y']
    report['yhat'] = predict_df['yhat']
    report['yhat_low'] = predict_df['yhat_lower']
    report['yhat_high'] = predict_df['yhat_upper']
    report['prediction'] = False
    report.loc[report.date > filter_date, 'prediction'] = True

    if truncate_range > 0:
        cut_date = filter_date - timedelta(days=truncate_range)
        report = report.loc[report.date > cut_date]

    return report
Exemplo n.º 16
0
 def test_cross_validation_default_value_check(self):
     m = Prophet()
     m.fit(self.__df)
     # Default value of initial should be equal to 3 * horizon
     df_cv1 = diagnostics.cross_validation(
         m, horizon='32 days', period='10 days')
     df_cv2 = diagnostics.cross_validation(
         m, horizon='32 days', period='10 days', initial='96 days')
     self.assertAlmostEqual(
         ((df_cv1['y'] - df_cv2['y']) ** 2).sum(), 0.0)
     self.assertAlmostEqual(
         ((df_cv1['yhat'] - df_cv2['yhat']) ** 2).sum(), 0.0)
Exemplo n.º 17
0
def hello():
    print('Hello, world!')
    df = pd.read_csv(url)
    df['y'] = np.log(df['y'])
    df.head()
    m = Prophet()
    m.fit(df);
    future = m.make_future_dataframe(periods=365)
    future.tail()
    forecast = m.predict(future)
    forecast[['ds', 'yhat', 'yhat_lower', 'yhat_upper']].tail()
    return forecast.to_json(orient='table')
Exemplo n.º 18
0
 def test_custom_seasonality(self):
     holidays = pd.DataFrame({
         'ds': pd.to_datetime(['2017-01-02']),
         'holiday': ['special_day'],
         'prior_scale': [4.],
     })
     m = Prophet(holidays=holidays)
     m.add_seasonality(name='monthly', period=30, fourier_order=5,
                       prior_scale=2.)
     self.assertEqual(
         m.seasonalities['monthly'],
         {
             'period': 30,
             'fourier_order': 5,
             'prior_scale': 2.,
             'mode': 'additive',
         },
     )
     with self.assertRaises(ValueError):
         m.add_seasonality(name='special_day', period=30, fourier_order=5)
     with self.assertRaises(ValueError):
         m.add_seasonality(name='trend', period=30, fourier_order=5)
     m.add_seasonality(name='weekly', period=30, fourier_order=5)
     # Test priors
     m = Prophet(
         holidays=holidays, yearly_seasonality=False,
         seasonality_mode='multiplicative',
     )
     m.add_seasonality(name='monthly', period=30, fourier_order=5,
                       prior_scale=2., mode='additive')
     m.fit(DATA.copy())
     self.assertEqual(m.seasonalities['monthly']['mode'], 'additive')
     self.assertEqual(m.seasonalities['weekly']['mode'], 'multiplicative')
     seasonal_features, prior_scales, component_cols, modes = (
         m.make_all_seasonality_features(m.history)
     )
     self.assertEqual(sum(component_cols['monthly']), 10)
     self.assertEqual(sum(component_cols['special_day']), 1)
     self.assertEqual(sum(component_cols['weekly']), 6)
     self.assertEqual(sum(component_cols['additive_terms']), 10)
     self.assertEqual(sum(component_cols['multiplicative_terms']), 7)
     if seasonal_features.columns[0] == 'monthly_delim_1':
         true = [2.] * 10 + [10.] * 6 + [4.]
         self.assertEqual(sum(component_cols['monthly'][:10]), 10)
         self.assertEqual(sum(component_cols['weekly'][10:16]), 6)
     else:
         true = [10.] * 6 + [2.] * 10 + [4.]
         self.assertEqual(sum(component_cols['weekly'][:6]), 6)
         self.assertEqual(sum(component_cols['monthly'][6:16]), 10)
     self.assertEqual(prior_scales, true)
Exemplo n.º 19
0
 def test_auto_yearly_seasonality(self):
     # Should be enabled
     m = Prophet()
     self.assertEqual(m.yearly_seasonality, 'auto')
     m.fit(DATA)
     self.assertIn('yearly', m.seasonalities)
     self.assertEqual(
         m.seasonalities['yearly'],
         {
             'period': 365.25,
             'fourier_order': 10,
             'prior_scale': 10.,
             'mode': 'additive',
         },
     )
     # Should be disabled due to too short history
     N = 240
     train = DATA.head(N)
     m = Prophet()
     m.fit(train)
     self.assertNotIn('yearly', m.seasonalities)
     m = Prophet(yearly_seasonality=True)
     m.fit(train)
     self.assertIn('yearly', m.seasonalities)
     m = Prophet(yearly_seasonality=7, seasonality_prior_scale=3.)
     m.fit(DATA)
     self.assertEqual(
         m.seasonalities['yearly'],
         {
             'period': 365.25,
             'fourier_order': 7,
             'prior_scale': 3.,
             'mode': 'additive',
         },
     )
Exemplo n.º 20
0
 def test_fit_predict_constant_history(self):
     N = DATA.shape[0]
     train = DATA.head(N // 2).copy()
     train['y'] = 20
     future = pd.DataFrame({'ds': DATA['ds'].tail(N // 2)})
     m = Prophet()
     m.fit(train)
     fcst = m.predict(future)
     self.assertEqual(fcst['yhat'].values[-1], 20)
     train['y'] = 0
     future = pd.DataFrame({'ds': DATA['ds'].tail(N // 2)})
     m = Prophet()
     m.fit(train)
     fcst = m.predict(future)
     self.assertEqual(fcst['yhat'].values[-1], 0)
Exemplo n.º 21
0
 def test_cross_validation_extra_regressors(self):
     df = self.__df.copy()
     df['extra'] = range(df.shape[0])
     m = Prophet()
     m.add_seasonality(name='monthly', period=30.5, fourier_order=5)
     m.add_regressor('extra')
     m.fit(df)
     df_cv = diagnostics.cross_validation(
         m, horizon='4 days', period='4 days', initial='135 days')
     self.assertEqual(len(np.unique(df_cv['cutoff'])), 2)
     period = pd.Timedelta('4 days')
     dc = df_cv['cutoff'].diff()
     dc = dc[dc > pd.Timedelta(0)].min()
     self.assertTrue(dc >= period)
     self.assertTrue((df_cv['cutoff'] < df_cv['ds']).all())
     df_merged = pd.merge(df_cv, self.__df, 'left', on='ds')
     self.assertAlmostEqual(
         np.sum((df_merged['y_x'] - df_merged['y_y']) ** 2), 0.0)
Exemplo n.º 22
0
    def test_make_future_dataframe(self):
        N = 468
        train = DATA.head(N // 2)
        forecaster = Prophet()
        forecaster.fit(train)
        future = forecaster.make_future_dataframe(periods=3, freq='D',
                                                  include_history=False)
        correct = pd.DatetimeIndex(['2013-04-26', '2013-04-27', '2013-04-28'])
        self.assertEqual(len(future), 3)
        for i in range(3):
            self.assertEqual(future.iloc[i]['ds'], correct[i])

        future = forecaster.make_future_dataframe(periods=3, freq='M',
                                                  include_history=False)
        correct = pd.DatetimeIndex(['2013-04-30', '2013-05-31', '2013-06-30'])
        self.assertEqual(len(future), 3)
        for i in range(3):
            self.assertEqual(future.iloc[i]['ds'], correct[i])
Exemplo n.º 23
0
def train_prophet(df, modelDir, confidence=0.99):
	# train and cache into modelDir
	m = Prophet(
		yearly_seasonality=True, daily_seasonality=True, interval_width=confidence
	)
	with suppress_stdout_stderr():
		m.fit(df)

		# Predict the future.
	print "PREDICTING!"
	future = m.make_future_dataframe(periods=0)
	forecast = m.predict(future)
	# Merge in the historical data.
	forecast["y"] = df.y.astype(float)
	# Backup the model.
	forecast.to_csv(
		pJoin(modelDir, "forecasted_{}.csv".format(confidence)), index=False
	)
	return forecast
Exemplo n.º 24
0
 def test_auto_weekly_seasonality(self):
     # Should be enabled
     N = 15
     train = DATA.head(N)
     m = Prophet()
     self.assertEqual(m.weekly_seasonality, 'auto')
     m.fit(train)
     self.assertIn('weekly', m.seasonalities)
     self.assertEqual(
         m.seasonalities['weekly'],
         {
             'period': 7,
             'fourier_order': 3,
             'prior_scale': 10.,
             'mode': 'additive',
         },
     )
     # Should be disabled due to too short history
     N = 9
     train = DATA.head(N)
     m = Prophet()
     m.fit(train)
     self.assertNotIn('weekly', m.seasonalities)
     m = Prophet(weekly_seasonality=True)
     m.fit(train)
     self.assertIn('weekly', m.seasonalities)
     # Should be False due to weekly spacing
     train = DATA.iloc[::7, :]
     m = Prophet()
     m.fit(train)
     self.assertNotIn('weekly', m.seasonalities)
     m = Prophet(weekly_seasonality=2, seasonality_prior_scale=3.)
     m.fit(DATA)
     self.assertEqual(
         m.seasonalities['weekly'],
         {
             'period': 7,
             'fourier_order': 2,
             'prior_scale': 3.,
             'mode': 'additive',
         },
     )
Exemplo n.º 25
0
 def test_auto_weekly_seasonality(self):
     # Should be True
     N = 15
     train = DATA.head(N)
     m = Prophet()
     self.assertEqual(m.weekly_seasonality, 'auto')
     m.fit(train)
     self.assertEqual(m.weekly_seasonality, True)
     # Should be False due to too short history
     N = 9
     train = DATA.head(N)
     m = Prophet()
     m.fit(train)
     self.assertEqual(m.weekly_seasonality, False)
     m = Prophet(weekly_seasonality=True)
     m.fit(train)
     self.assertEqual(m.weekly_seasonality, True)
     # Should be False due to weekly spacing
     train = DATA.iloc[::7, :]
     m = Prophet()
     m.fit(train)
     self.assertEqual(m.weekly_seasonality, False)
Exemplo n.º 26
0
 def test_auto_yearly_seasonality(self):
     # Should be True
     m = Prophet()
     self.assertEqual(m.yearly_seasonality, 'auto')
     m.fit(DATA)
     self.assertEqual(m.yearly_seasonality, True)
     # Should be False due to too short history
     N = 240
     train = DATA.head(N)
     m = Prophet()
     m.fit(train)
     self.assertEqual(m.yearly_seasonality, False)
     m = Prophet(yearly_seasonality=True)
     m.fit(train)
     self.assertEqual(m.yearly_seasonality, True)
Exemplo n.º 27
0
 def test_fit_predict_with_append_holidays(self):
     holidays = pd.DataFrame({
         'ds': pd.to_datetime(['2012-06-06', '2013-06-06']),
         'holiday': ['seans-bday'] * 2,
         'lower_window': [0] * 2,
         'upper_window': [1] * 2,
     })
     append_holidays = 'US'
     # Test with holidays and append_holidays
     model = Prophet(holidays=holidays,
                     append_holidays=append_holidays,
                     uncertainty_samples=0)
     model.fit(DATA).predict()
     # There are training holidays missing in the test set
     train = DATA.head(154)
     future = DATA.tail(355)
     model = Prophet(append_holidays=append_holidays, uncertainty_samples=0)
     model.fit(train).predict(future)
     # There are test holidays missing in the training set
     train = DATA.tail(355)
     future = DATA2
     model = Prophet(append_holidays=append_holidays, uncertainty_samples=0)
     model.fit(train).predict(future)
Exemplo n.º 28
0
    def test_copy(self):
        df = DATA.copy()
        df['cap'] = 200.
        df['binary_feature'] = [0] * 255 + [1] * 255
        # These values are created except for its default values
        holiday = pd.DataFrame({
            'ds': pd.to_datetime(['2016-12-25']),
            'holiday': ['x']
        })
        products = itertools.product(
            ['linear', 'logistic'],  # growth
            [None, pd.to_datetime(['2016-12-25'])],  # changepoints
            [3],  # n_changepoints
            [True, False],  # yearly_seasonality
            [True, False],  # weekly_seasonality
            [True, False],  # daily_seasonality
            [None, holiday],  # holidays
            [1.1],  # seasonality_prior_scale
            [1.1],  # holidays_prior_scale
            [0.1],  # changepoint_prior_scale
            [100],  # mcmc_samples
            [0.9],  # interval_width
            [200]  # uncertainty_samples
        )
        # Values should be copied correctly
        for product in products:
            m1 = Prophet(*product)
            m1.history = m1.setup_dataframe(df.copy(), initialize_scales=True)
            m1.set_auto_seasonalities()
            m2 = m1.copy()
            self.assertEqual(m1.growth, m2.growth)
            self.assertEqual(m1.n_changepoints, m2.n_changepoints)
            self.assertEqual(m1.changepoints, m2.changepoints)
            self.assertEqual(False, m2.yearly_seasonality)
            self.assertEqual(False, m2.weekly_seasonality)
            self.assertEqual(False, m2.daily_seasonality)
            self.assertEqual(m1.yearly_seasonality, 'yearly'
                             in m2.seasonalities)
            self.assertEqual(m1.weekly_seasonality, 'weekly'
                             in m2.seasonalities)
            self.assertEqual(m1.daily_seasonality, 'daily' in m2.seasonalities)
            if m1.holidays is None:
                self.assertEqual(m1.holidays, m2.holidays)
            else:
                self.assertTrue((m1.holidays == m2.holidays).values.all())
            self.assertEqual(m1.seasonality_prior_scale,
                             m2.seasonality_prior_scale)
            self.assertEqual(m1.changepoint_prior_scale,
                             m2.changepoint_prior_scale)
            self.assertEqual(m1.holidays_prior_scale, m2.holidays_prior_scale)
            self.assertEqual(m1.mcmc_samples, m2.mcmc_samples)
            self.assertEqual(m1.interval_width, m2.interval_width)
            self.assertEqual(m1.uncertainty_samples, m2.uncertainty_samples)

        # Check for cutoff and custom seasonality and extra regressors
        changepoints = pd.date_range('2012-06-15', '2012-09-15')
        cutoff = pd.Timestamp('2012-07-25')
        m1 = Prophet(changepoints=changepoints)
        m1.add_seasonality('custom', 10, 5)
        m1.add_regressor('binary_feature')
        m1.fit(df)
        m2 = m1.copy(cutoff=cutoff)
        changepoints = changepoints[changepoints <= cutoff]
        self.assertTrue((changepoints == m2.changepoints).all())
        self.assertTrue('custom' in m2.seasonalities)
        self.assertTrue('binary_feature' in m2.extra_regressors)
Exemplo n.º 29
0
state = pd.DataFrame({
    'holiday': 'state_holiday',
    'ds': pd.to_datetime(state_dates)
})
school = pd.DataFrame({
    'holiday': 'school_holiday',
    'ds': pd.to_datetime(school_dates)
})

holidays = pd.concat((state, school))
holidays.head()

# set the uncertainty interval to 95% (the Prophet default is 80%)
my_model = Prophet(interval_width=0.95,
                   holidays=holidays,
                   daily_seasonality=True)
my_model.fit(sales)

# dataframe that extends into future 6 weeks
future_dates = my_model.make_future_dataframe(periods=6 * 7)

# predictions
forecast = my_model.predict(future_dates)

# preditions for last week
forecast[['ds', 'yhat', 'yhat_lower', 'yhat_upper']].tail(7)

# visualizing predicions
my_model.plot(forecast)
my_model.plot_components(forecast)
Exemplo n.º 30
0
df = pd.read_json(enc)

# jsonデータのcolumnをprophetの指定名称に書き換え、時系列でソートする
df = df.rename(columns={'date': 'ds', 'rate': 'y'})
df['ds'] = pd.to_datetime(df['ds'])
df = df.sort_values('ds')

# 予測モデルの指定
from fbprophet import Prophet

model = Prophet(yearly_seasonality=True,
                weekly_seasonality=True,
                daily_seasonality=True)

# 予測モデルへのdf読み込み
model.fit(df)
future = model.make_future_dataframe(periods=30)
forecast = model.predict(future)

# 30日後の予測値と今日の価格を出力
# 30日後の日付
ds_f = pd.to_datetime(forecast['ds'].tail(1).values[0]).strftime('%Y/%m/%d')
ds_f_unicode = ds_f.decode('unicode-escape')  #DB保存用にunicode化

# 30日後の予測価格(ラベルなし)
f = forecast['yhat'].tail(1).values[0]
f_str = '{0:.1f}'.format(f)  #str化して桁を調整
f_unicode = str(f_str).decode('unicode-escape')  #DB保存用にunicode化

# 計算日当日の価格(ラベルなし)
today = df['y'].tail(1).values[0]
Exemplo n.º 31
0
def predict(index):
    timeseries = pd.DataFrame(final_data.iloc[index])
    #Linear Regression
    reg = LinearRegression().fit(float_index.reshape(-1, 1), timeseries.to_numpy())
    linear_pred = reg.predict(predict_time)
    linear_pred = pd.DataFrame(data = linear_pred, columns = timeseries.columns)
    linear_pred.index = future_years
    #Prophet
    model = Prophet()
    new_timeseries = pd.DataFrame(columns = ['ds', 'y'])
    new_timeseries['y'] = timeseries.iloc[:, 0]
    new_timeseries['ds'] = date_index
    model.fit(new_timeseries)
    future_timeseries = pd.DataFrame(columns = ['ds', 'y'])
    future_timeseries['ds'] = list(map(quarter_to_date, future_years))
    out_sample_forecast = model.predict(future_timeseries)
    prophet_pred = pd.DataFrame(out_sample_forecast['yhat'].to_numpy().flatten(), columns = timeseries.columns)
    prophet_pred.index = future_years
    prophet_pred_lower = pd.DataFrame(out_sample_forecast['yhat_lower'].to_numpy().flatten(), columns = timeseries.columns)
    prophet_pred_lower.index = future_years
    prophet_pred_upper = pd.DataFrame(out_sample_forecast['yhat_upper'].to_numpy().flatten(), columns = timeseries.columns)
    prophet_pred_upper.index = future_years
    #Visualize
    fig = go.Figure()
    fig.update_layout(plot_bgcolor = 'rgb(255,255,255)')
    present_time = timeseries.index.values
    future_time = np.append(['2020Q2*'], future_years)
    fig.add_trace(
        go.Scatter(
            x= present_time, 
            y=timeseries.values.flatten(), 
            name=timeseries.columns[0][0],
            line=dict(color='black', width=4)
            ))
    fig.add_trace(
        go.Scatter(
            x= future_time, 
            y = timeseries.tail(1).append(linear_pred).values.flatten(), 
            name = 'Linear', 
            line=dict(color='blue', width = 4)
            ))
    fig.add_trace(
        go.Scatter(
            x= future_time, 
            y = timeseries.tail(1).append(prophet_pred).values.flatten(), 
            name = 'Prophet', 
            line=dict(color='red', width = 4)
            ))
    fig.add_trace(
        go.Scatter(
            x= future_time, 
            y = timeseries.tail(1).append(prophet_pred_lower).values.flatten(), 
            name = 'Prophet_lower', 
            line=dict(color='gray', width = 2, dash='dash'),
            ))
    fig.add_trace(
        go.Scatter(
            x= future_time, 
            y = timeseries.tail(1).append(prophet_pred_upper).values.flatten(), 
            name = 'Prophet_upper', 
            line=dict(color='gray', width = 2, dash='dash'),
            ))
    return fig.show()
Exemplo n.º 32
0
stock_return.head()
stock_return.plot(grid=True).axhline(y=1, color="black", lw=2)
stock_change = graph.apply(
    lambda x: np.log(x) - np.log(x.shift(1)))  # shift moves dates back by 1.
stock_change.head()
stock_change.plot(grid=True).axhline(y=0, color="black", lw=2)
graph["20d"] = np.round(graph["Close"].rolling(window=20, center=False).mean(),
                        2)

df = pd.DataFrame()
df['ds'] = stock_return.index
df['y'] = graph['Close'].apply(lambda x: np.log(x)).values
df.tail()

m0 = Prophet(yearly_seasonality=True)
m0.fit(df)
#how many days in the future to show predictions for
n_add = 100
print("adding {n} days.".format(n=n_add))
future = m0.make_future_dataframe(periods=n_add)
future.tail()

forecast = m0.predict(future)
forecast[['ds', 'yhat', 'yhat_lower', 'yhat_upper']].tail()
forcast = m0.plot(forecast, ylabel='$\ln($stock_return$)$')
forcast.savefig('/home/ubuntu/Desktop/TelegramBot/charts/RUBforcast.jpeg',
                dpi=400,
                bbox_inches='tight')

trend = m0.plot_components(forecast)
trend.savefig('/home/ubuntu/Desktop/TelegramBot/charts/RUBtrend.jpeg',
Exemplo n.º 33
0
def Table_generator():
    import pandas as pd
    import numpy as np
    from matplotlib import pyplot as plt
    from pylab import rcParams
    from sklearn import linear_model
    from fbprophet import Prophet
    from numpy import inf
    filepath = ('weekly sales and labour cost for all shops 2013 to 20177.csv')
    df = pd.read_csv(filepath)
    d_f2 = df[df.sales_status != 0]
    # df2.week_no.isnull().values.any()
    nulldetect = d_f2.week_no.isnull()
    # nulldetect[nulldetect==True].index
    d_f2.week_no.loc[nulldetect == True] = 54
    d_f2['week_no'] = d_f2.week_no - 2
    len_week1 = []
    for i in d_f2.shop_id:
        len_week = len(d_f2.week_no[d_f2.shop_id == i])
        len_week1.append(len_week)
    len_week2 = pd.DataFrame(len_week1)
    #len_week2
    d = {'shop_id': d_f2.shop_id, 'len_of_weeks': len_week1}
    d1 = pd.DataFrame(d)
    d2 = d1.drop_duplicates()

    dtt = pd.DataFrame(index=list(range(1, 53)), columns=d2.shop_id[d2.len_of_weeks < 52].values)
    # table = pd.DataFrame(columns=['shop_id','week_no','dates','forecasted_sales'])

    for uu in d2.shop_id[d2.len_of_weeks < 52].values:
        df3 = d_f2[d_f2.shop_id == uu]
        # df1 = df[(= -1) & (df.b != -1)]
        dff4 = df3.set_index('start_date')
        # df4=df3.week_no-2
        a = df3[['week_no', 'shop_id', 'sales_amount', 'transactions', 'total_tax', 'item_sold']]
        # print(a)
        dates = pd.date_range(dff4.index[-1], periods=52, freq='W-MON', format='%Y-%m-%d')
        dates1 = pd.DataFrame(dates)
        dates2 = pd.date_range(dff4.index[0], periods=len(dff4.index), freq='W-MON', format='%Y-%m-%d')
        mean_week_item = []
        for i in dates.week:
            mean_item_sold = a.item_sold[a.week_no == i].mean()
            mean_week_item.append(mean_item_sold)
        mean_week_item1 = pd.DataFrame(mean_week_item)

        trans_week_item = []
        for i1 in dates.week:
            mean_trans_sold = a.transactions[a.week_no == i1].mean()
            trans_week_item.append(mean_trans_sold)

        sales_week = []
        for ii1 in dates.week:
            mean_sales_sold = a.sales_amount[a.week_no == ii1].mean()
            sales_week.append(mean_sales_sold)
        dd = {'date': dates, 'weeks_no': dates.week, 'sales': sales_week, 'mean_item': mean_week_item,
              'mean_trans': trans_week_item}
        dd1 = pd.DataFrame(dd)
        dff1 = df[df.sales_status != 0]
        nulldetect = dff1.week_no.isnull()
        dff1.week_no.loc[nulldetect == True] = 54
        dff1['week_no'] = dff1.week_no - 2
        X_Cluster = dff1[['shop_id', 'sales_amount']]
        from sklearn.cluster import KMeans
        kmeans_model = KMeans(n_clusters=3, random_state=8).fit(X_Cluster)
        y_hat = kmeans_model.labels_  # clusters
        cen = kmeans_model.cluster_centers_
        y_hat1 = pd.DataFrame(y_hat)
        group_low_sales = X_Cluster[y_hat == 0]
        group_middle_sales = X_Cluster[y_hat == 2]
        group_high_sales = X_Cluster[y_hat == 1]
        fff = []
        for j in X_Cluster.shop_id:
            dfdf = X_Cluster.sales_amount[X_Cluster.shop_id == j].mean()
            fff.append(dfdf)
        f3 = pd.DataFrame(X_Cluster.shop_id.drop_duplicates())
        f4 = pd.DataFrame(fff)
        f5 = f4.drop_duplicates()
        f3['salle'] = f5.values

        Xx2 = f3[['shop_id', 'salle']]
        kmeans_model2 = KMeans(n_clusters=3, random_state=8).fit(Xx2)
        y_hat2 = kmeans_model2.labels_  # clusters
        cen2 = kmeans_model2.cluster_centers_

        group_middle_sales2 = Xx2[y_hat2 == 0]
        group_high_sales2 = Xx2[y_hat2 == 2]
        group_low_sales2 = Xx2[y_hat2 == 1]
        nullweeks = dd1.weeks_no[dd1.mean_trans.isnull() == True]

        if (group_low_sales2.shop_id.values == uu).any() == True:
            cx = int(group_low_sales.sales_amount[group_low_sales.shop_id == uu].values.mean())
            trt = group_low_sales[group_low_sales.sales_amount > cx - 3000]
            trt2 = trt[trt.sales_amount < cx + 3000]
            valid_cls = dff1[['sales_amount', 'item_sold', 'transactions', 'week_no']].loc[trt2.index.values]
            # print("Cluster of shop %s is low sales" %uu)
            # print("Average sales per week of shop %s is" %uu,cx)
        elif (group_middle_sales2.shop_id.values == uu).any() == True:
            # valid_cls=dff1[['sales_amount','item_sold','transactions','week_no']].loc[group_middle_sales.shop_id.index.values]
            cx = int(group_middle_sales.sales_amount[group_middle_sales.shop_id == uu].values.mean())
            trt = group_middle_sales[group_middle_sales.sales_amount > cx - 3000]
            trt2 = trt[trt.sales_amount < cx + 3000]
            valid_cls = dff1[['sales_amount', 'item_sold', 'transactions', 'week_no']].loc[trt2.index.values]
            # print("Cluster of shop %s is average sales" %uu)
            # print("Average sales per week of shop %s is " %uu,cx)
        elif (group_high_sales2.shop_id.values == uu).any() == True:
            # valid_cls=dff1[['sales_amount','item_sold','transactions','week_no']].loc[group_high_sales.shop_id.index.values]
            cx = int(group_high_sales.sales_amount[group_high_sales.shop_id == uu].values.mean())
            trt = group_high_sales[group_high_sales.sales_amount > cx - 4000]
            trt2 = trt[trt.sales_amount < cx + 4000]
            valid_cls = dff1[['sales_amount', 'item_sold', 'transactions', 'week_no']].loc[trt2.index.values]
            # print("Cluster of shop %s is high sales" %uu)
            # print("Average sales per week of shop %s is" %uu,cx)
        drr = valid_cls
        drr = valid_cls  # dff1[['sales_amount','item_sold','transactions','week_no']].loc[trt2.index.values]
        itt = []
        trr = []
        sale = []
        for i3 in nullweeks:
            item = drr.item_sold[drr.week_no == i3].mean()
            trans = drr.transactions[drr.week_no == i3].mean()
            salee = drr.sales_amount[drr.week_no == i3].mean()
            itt.append(item)
            trr.append(trans)
            sale.append(salee)
        df_insert = {'sales_amountt': sale, 'ittem': itt, 'trans': trr, 'weeks_no': nullweeks}
        df_insert1 = pd.DataFrame(df_insert)
        forecastdf = dd1.fillna(
            {'mean_item': df_insert1.ittem, 'mean_trans': df_insert1.trans, 'sales': df_insert1.sales_amountt})
        forecastdf1 = forecastdf.fillna({'mean_item': df_insert1.ittem.mean(),
                                         'mean_trans': df_insert1.trans.mean(),
                                         'sales': df_insert1.sales_amountt.mean()})
        regr3 = linear_model.LinearRegression()
        X = forecastdf1[['mean_item', 'mean_trans']]
        Y = forecastdf1.sales
        regr3.fit(X, Y)
        y_predictionss = regr3.predict(X)
        y_predictionss1 = pd.DataFrame(y_predictionss)
        pred_y = round(y_predictionss1, 2)
        # print(pred_y.values)
        forecastdf1['forecasted_sales'] = pred_y.values
        # ddt.fillna()
        forecastdf1.sort_values('weeks_no', inplace=True)
        # forecastdf1
        # forecastdf1.forecasted_sales.reset_index()
        f = forecastdf1.set_index('weeks_no')
        # dtt = pd.DataFrame(index=list(range(1,53)), columns=d2.shop_id[d2.len_of_weeks<52].values)
        dtt['shop_id'] = dtt.index.values
        # dtt[dtt.shop_id==uu].fillna()
        dtt[[uu]] = f.forecasted_sales.values.reshape((52, 1))

    dtt1 = pd.DataFrame(index=d2.shop_id[d2.len_of_weeks < 52].values, columns=list(range(1, 53)))
    for jj in dtt.index.values:
        dtt1.loc[:, jj] = dtt.loc[jj, :]

    data = pd.read_csv('weekly sales and labour cost for all shops 2013 to 20177.csv',index_col='start_date',parse_dates=True)
    # shopID = input("Enter your shop id")

    df2 = data[data.sales_status != 0]
    # df2.week_no.isnull().values.any()
    nulldetect = df2.week_no.isnull()
    # nulldetect[nulldetect==True].index
    df2.week_no.loc[nulldetect == True] = 54
    df2['week_no'] = df2.week_no - 2

    len_week1 = []
    for i in df2.shop_id:
        len_week = len(df2.week_no[df2.shop_id == i])
        len_week1.append(len_week)
    len_week2 = pd.DataFrame(len_week1)
    #len_week2
    d = {'shop_id': df2.shop_id, 'len_of_weeks': len_week1}
    d1 = pd.DataFrame(d)
    d2 = d1.drop_duplicates()

    dtt2 = pd.DataFrame(index=list(range(1, 53)), columns=d2.shop_id[d2.len_of_weeks > 52].values)

    for j in d2.shop_id[d2.len_of_weeks >= 52].values:
        data2 = data[['sales_id', 'shop_id', 'week_no', 'sales_amount', 'item_sold', 'transactions', 'total_tax',
                      'sales_status']]
        df1 = data2[data2.shop_id == j]  # input №1
        df2 = df1[df1.sales_status != 0]
        df2.week_no.isnull().values.any()
        nulldetect = df1.week_no.isnull()
        nulldetect[nulldetect == True].index
        df2.week_no.loc[nulldetect == True] = 54
        df2['week_no'] = df2.week_no - 2
        dff = df2[['sales_amount']]
        data3 = dff.reset_index()
        data4 = data3

        data5 = data4.rename(columns={'start_date': 'ds', 'sales_amount': 'y'})
        data5.set_index('ds')
        # y.plot()
        data5['y'] = np.log(data5['y'])
        data5 = data5.replace([np.inf, -np.inf], np.nan).fillna(0)
        data5.set_index('ds')
        model = Prophet()
        model.fit(data5)
        future = model.make_future_dataframe(periods=52, freq='w')
        forecast = model.predict(future)
        data5.set_index('ds', inplace=True)
        forecast.set_index('ds', inplace=True)
        viz_df = dff.join(forecast[['yhat', 'yhat_lower', 'yhat_upper']], how='outer')
        viz_df['yhat_rescaled'] = np.exp(viz_df['yhat'])
        dff.index = pd.to_datetime(dff.index)  # make sure our index as a datetime object
        connect_date = dff.index[-2]  # select the 2nd to last date
        mask = (forecast.index > connect_date)
        predict_df = forecast.loc[mask]
        viz_df = dff.join(predict_df[['yhat', 'yhat_lower', 'yhat_upper']], how='outer')
        viz_df['yhat_scaled'] = np.exp(viz_df['yhat'])
        ii = len(dff.sales_amount) - 1
        viz_df.yhat_scaled[ii:]
        predicted_future_sales = pd.DataFrame(viz_df.yhat_scaled[ii:])
        predicted_future_sales1 = predicted_future_sales.rename(columns={'yhat_scaled': 'future_sales'})
        predicted_future_sales2 = predicted_future_sales1.reset_index()
        week_no = predicted_future_sales2['index'].dt.week
        future_sales = predicted_future_sales2['future_sales']
        future_sales1 = round(future_sales, 2)
        start_date = predicted_future_sales2['index']
        predict_data = {'shop_id': int(df2.shop_id.mean()), 'future_sales': future_sales1, 'week_no': week_no,
                        'start_date': start_date}
        predict_data1 = pd.DataFrame(predict_data)
        predict_data1 = predict_data1.drop_duplicates(subset=['week_no'])
        predict_data1.sort_values('week_no', inplace=True)
        f1 = predict_data1.set_index('week_no')
        dtt2[[j]] = f1.future_sales.values.reshape((52, 1))

    dtt3 = pd.DataFrame(index=d2.shop_id[d2.len_of_weeks > 52].values, columns=list(range(1, 53)))
    for qq in dtt.index.values:
        dtt3.loc[:, qq] = dtt2.loc[qq, :]

    X5 = d_f2[['shop_id', 'start_date']]
    act_date = pd.DataFrame({'shop_id': X5.shop_id.drop_duplicates().values, 'last_date': np.nan})
    act_dates = act_date[['shop_id', 'last_date']]
    lastdate = []
    for ji in X5.shop_id.drop_duplicates().values:
        l_date = X5.start_date[X5.shop_id == ji].iloc[-1]
        lastdate.append(l_date)
    # lastdate
    # act_dates['last_date'] = lastdate

    tab = dtt1.append(dtt3)
    tab.insert(0, 'last_date', lastdate)

    tab['shop_id'] = tab.index.values
    tab.sort_values('shop_id', inplace=True)
    tab_id = tab.shop_id
    tab = tab.drop('shop_id', axis=1)
    tab.insert(0, 'shop_id', tab_id)
    #writer = pd.ExcelWriter('output.xlsx')
    #tab.to_excel(writer, 'Sheet1')
    #writer.save()
    tab.to_json(path_or_buf='df.json', orient='records')
    memval2 =tab
Exemplo n.º 34
0
    def _tune(self,
              y,
              period,
              start_date,
              x=None,
              metric="smape",
              val_size=None,
              verbose=False):
        """
        Tune hyperparameters of the model.
        :param y: pd.Series or 1-D np.array, time series to predict.
        :param period: Int or Str, the number of observations per cycle: 1 or "annual" for yearly data, 4 or "quarterly"
        for quarterly data, 7 or "daily" for daily data, 12 or "monthly" for monthly data, 24 or "hourly" for hourly
        data, 52 or "weekly" for weekly data. First-letter abbreviations of strings work as well ("a", "q", "d", "m",
        "h" and "w", respectively). Additional reference: https://robjhyndman.com/hyndsight/seasonal-periods/.
        :param x: pd.DataFrame or 2-D np.array, exogeneous predictors, optional
        :param metric: Str, the metric used for model selection. One of: "mse", "mae", "mape", "smape", "rmse".
        :param val_size: Int, the number of most recent observations to use as validation set for tuning.
        :param verbose: Boolean, True for printing additional info while tuning.
        :return: None
        """
        self.period = data_utils.period_to_int(period) if type(
            period) == str else period
        dates = data_utils.create_dates(start_date, period, length=len(y))
        val_size = int(len(y) * .1) if val_size is None else val_size
        y_train, y_val = model_utils.train_val_split(y, val_size=val_size)
        dates_train, dates_val = model_utils.train_val_split(dates,
                                                             val_size=val_size)
        input_df = pd.DataFrame({"ds": dates_train, "y": y_train})
        future_df = pd.DataFrame({"ds": dates_val})
        if x is not None:
            x_train, x_val = model_utils.train_val_split(x, val_size=val_size)
            for variable_id, x_variable in enumerate(x_train.T):
                input_df[variable_id] = x_variable
            for variable_id, x_variable in enumerate(x_val.T):
                future_df[variable_id] = x_variable
        metric_fun = get_metric(metric)

        params_grid = {
            "seasonality": ["additive", "multiplicative"],
            "growth": ["linear", "logistic"],
            "changepoint_prior_scale": [0.005, 0.05, 0.5],
        }
        params_keys, params_values = zip(*params_grid.items())
        params_permutations = [
            dict(zip(params_keys, v))
            for v in itertools.product(*params_values)
        ]

        scores = []
        for permutation in params_permutations:
            try:
                with warnings.catch_warnings():
                    warnings.simplefilter("ignore")
                    model = Prophet(
                        seasonality_mode=permutation["seasonality"],
                        growth=permutation["growth"],
                        changepoint_prior_scale=permutation[
                            "changepoint_prior_scale"],
                    )
                    if x is not None:
                        variable_ids = list(
                            sorted(
                                set(input_df.columns).difference(
                                    set(["ds", "y"]))))
                        for variable_id in variable_ids:
                            model.add_regressor(variable_id)
                    with SuppressStdoutStderr():
                        model.fit(input_df)
                    y_pred = model.predict(future_df)["yhat"].values
                    score = metric_fun(y_val, y_pred)
                    scores.append(score)
            except:
                scores.append(np.inf)

        best_params = params_permutations[np.nanargmin(scores)]
        self.params.update(best_params)
        self.params["tuned"] = True
Exemplo n.º 35
0
plt.rcParams['font.family'] = 'Hiragino Sans'


url = 'https://www.data.jma.go.jp/obd/stats/etrn/view/\
monthly_s3.php?prec_no=44&block_no=47662'

#データの抽出
dfs = pd.read_html(url)
df = dfs[0].dropna()


'''時系列予測'''
#学習データ
data = pd.DataFrame()
data['y'] = df['1月']
data['ds'] = df[['年']].apply(lambda x: '{}'.format(x[0]), axis=1)+ '-01-01'

#モデル構築
model = Prophet(daily_seasonality=True, weekly_seasonality=True,
yearly_seasonality=True)
model.fit(data)

#予測
future_data = model.make_future_dataframe(periods=100, freq='y')
forecast_data = model.predict(future_data)

#プロットして可視化
model.plot(forecast_data)
model.plot_components(forecast_data)
plt.show()
Exemplo n.º 36
0
def create_prophet_m(app_name, z1, cpu_perc_list, delay=24):

    ### --- For realtime pred ---###

    full_df = z1.user_count.iloc[0:len(z1)]
    full_df = full_df.reset_index()
    full_df.columns = ['ds', 'y']

    #-- Realtime prediction --##
    #model
    model_r = Prophet(yearly_seasonality=False,
                      changepoint_prior_scale=.03,
                      seasonality_prior_scale=0.2)
    model_r.fit(full_df)
    future_r = model_r.make_future_dataframe(periods=delay, freq='D')
    forecast_r = model_r.predict(future_r)
    forecast_r.index = forecast_r['ds']
    #forecast
    pred_r = pd.DataFrame(forecast_r['yhat'][len(z1):(len(z1) + delay)])
    pred_r = pred_r.reset_index()
    #--- completes realtime pred ---#

    train_end_index = len(z1.user_count) - delay
    train_df = z1.user_count.iloc[0:train_end_index]

    test_df = z1.user_count.iloc[train_end_index:len(z1)]

    train_df = train_df.reset_index()
    test_df = test_df.reset_index()
    train_df.columns = ['ds', 'y']

    #--- removing outliers in trainset  ---#

    test_df.columns = ['ds', 'y']
    test_df['ds'] = pd.to_datetime(test_df['ds'])

    #model
    model = Prophet(yearly_seasonality=False,
                    changepoint_prior_scale=.03,
                    seasonality_prior_scale=0.2)
    model.fit(train_df)

    cpu_perc_list.append(py.cpu_percent())
    cpu_perc_list = [max(cpu_perc_list)]

    future = model.make_future_dataframe(periods=len(test_df), freq='D')
    forecast = model.predict(future)
    forecast.index = forecast['ds']
    #forecast
    pred = pd.DataFrame(forecast['yhat'][train_end_index:len(z1)])
    pred = pred.reset_index()
    pred_df = pd.merge(test_df, pred, on='ds', how='left')
    pred_df.dropna(inplace=True)

    df = pd.DataFrame()

    cpu_perc_list.append(py.cpu_percent())
    cpu_perc_list = [max(cpu_perc_list)]

    if (len(pred_df) > 0):

        pred_df['error_test'] = pred_df.y - pred_df.yhat

        MSE = mse(pred_df.y, pred_df.yhat)
        RMSE = math.sqrt(MSE)
        pred_df['APE'] = abs(pred_df.error_test * 100 / pred_df.y)
        MAPE = pred_df.APE.mean()
        min_error_rate = pred_df['APE'].quantile(0) / 100
        max_error_rate = pred_df['APE'].quantile(1) / 100
        median_error_rate = pred_df['APE'].quantile(.50) / 100
        print("App name:", app_name)
        #print("MSE  :",MSE)
        print("RMSE :", RMSE)
        print("MAPE :", MAPE)

        mape_q98 = pred_df['APE'][
            pred_df.APE < pred_df['APE'].quantile(0.98)].mean()
        std_MAPE = math.sqrt(((pred_df.APE - MAPE)**2).mean())

        df = pd.DataFrame(
            {
                'length': len(z1),
                'test_rmse': RMSE,
                'test_mape': MAPE,
                'std_mape': std_MAPE,  #standerd deviation of mape
                'min_error_rate': min_error_rate,
                'max_error_rate': max_error_rate,
                'median_error_rate': median_error_rate,
                'test_mape_98': mape_q98
            },
            index=[app_name])

    return (df, model, forecast, pred_df, pred_r)
Exemplo n.º 37
0
two_years_data = length - (6 * 24 * 30 * 24)
df2 = gridwatch_df[two_years_data:]
df2.reset_index(inplace=True)
df2.tail()

# In[11]:

# fit the data (only 1/7th of it)
# from lloking at data, we can set high cap to 55,000 and low to 15,000
# this gives us a workable range
df2['cap'] = 55000
df2['floor'] = 15000

df2_prophet = Prophet(changepoint_prior_scale=0.10, growth='logistic')
df2_prophet.add_country_holidays(country_name='UK')
df2_prophet.fit(df2)

# In[12]:

# future predicted values dataset, provides hourly predictions ahead of time
# periods will be a variable
future = df2_prophet.make_future_dataframe(periods=24, freq='H')
future['cap'] = 55000
future['floor'] = 15000
future.tail()

# In[13]:

# forecast uncertainty for future predicted points
forecast = df2_prophet.predict(future)
forecast[['ds', 'yhat', 'yhat_lower', 'yhat_upper']].head()
Exemplo n.º 38
0
def dept_model():

    print('entering dept_agg')
    global df_dept, df_hobbies_1, df_hobbies_2, df_household_1, df_household_2, df_foods_1, df_foods_2, df_foods_3, df_foods_4
    #df_dept=df_final.groupby(['dept_id','store_id','Date','dayofyear','weekofyear','monthstart','quarterstart','yearstart'])['Sales'].sum()
    #df_dept=df_dept.reset_index()

    df_dept = pd.read_csv('df_dept_pred')
    df_dept = df_dept.rename(columns={'Sales': 'Dept Sales'})
    df_dept_pred = df_dept[['dept_id', 'store_id', 'Date', 'Dept Sales']]
    df_dept_pred.columns = ['dept_id', 'store_id', 'ds', 'y']

    stores = {
        'CA_1': 1,
        'CA_2': 2,
        'CA_3': 3,
        'CA_4': 4,
        'TX_1': 5,
        'TX_2': 6,
        'TX_3': 7,
        'WI_1': 8,
        'WI_2': 9,
        'WI_3': 10
    }
    dept = {
        'FOODS_1': 1,
        'FOODS_2': 2,
        'FOODS_3': 3,
        'HOUSEHOLD_1': 4,
        'HOUSEHOLD_2': 5,
        'HOBBIES_1': 6,
        'HOBBIES_2': 7
    }
    df_dept_pred['store_id'] = df_dept_pred['store_id'].map(stores)
    df_dept_pred['dept_id'] = df_dept_pred['dept_id'].map(dept)

    fb = Prophet(interval_width=0.95,
                 daily_seasonality=True,
                 weekly_seasonality=True,
                 yearly_seasonality=True)
    fb.add_country_holidays(country_name='US')
    fb.add_regressor('store_id')
    fb.add_regressor('dept_id')
    fb.fit(df_dept_pred)
    future = fb.make_future_dataframe(freq='D',
                                      periods=28,
                                      include_history=False)

    s = pd.Series([1, 2, 3, 4, 5, 6, 7, 8, 9, 10])
    d = pd.Series([1, 2, 3, 4, 5, 6, 7])
    stores1 = pd.DataFrame({'store_id': s.repeat(28)}).reset_index()
    stores1 = pd.concat([stores1] * 7).reset_index().drop('index', axis=1)
    dept1 = pd.DataFrame({'dept_id': d.repeat(280)}).reset_index()

    final_df = pd.concat([future] * 70).reset_index().drop('index', axis=1)
    final_df['store_id'] = stores1['store_id']
    final_df['dept_id'] = dept1['dept_id']

    predict = fb.predict(final_df)
    y_pred_df = predict[['yhat']]

    final = pd.concat([final_df, y_pred_df], axis=1)
    stores2 = {
        1: 'CA_1',
        2: 'CA_2',
        3: 'CA_3',
        4: 'CA_4',
        5: 'TX_1',
        6: 'TX_2',
        7: 'TX_3',
        8: 'WI_1',
        9: 'WI_2',
        10: 'WI_3'
    }
    dept2 = {
        1: 'FOODS_1',
        2: 'FOODS_2',
        3: 'FOODS_3',
        4: 'HOUSEHOLD_1',
        5: 'HOUSEHOLD_2',
        6: 'HOBBIES_1',
        7: 'HOBBIES_2'
    }
    final['store_id'] = final['store_id'].map(stores2)
    final['dept_id'] = final['dept_id'].map(dept2)
    final = final.rename(columns={'ds': 'Date', 'yhat': 'Forecasted Sales'})
    final = final[['dept_id', 'store_id', 'Date', 'Forecasted Sales']]
    print(final)
Exemplo n.º 39
0
'''
print('x_train.shape = ',x_train.shape)
print('y_train.shape = ', y_train.shape)
print('x_valid.shape = ',x_valid.shape)
print('y_valid.shape = ', y_valid.shape)
print('x_test.shape = ', x_test.shape)
print('y_test.shape = ',y_test.shape)
'''

#There is 51 number of different stocks company, those below are some of these company.
#['AAPL', 'CLX', 'ETR', 'MCK', 'WMT', 'HCN', 'CTSH', 'NVDA', 'AIV', 'EFX']
# You can specify below any name you want to predict

stock = 'AAPL'
price = 'close' # you can also specify any price you want between those :
# ['open', 'high', 'low', 'close']

df_prophet = df[df['symbol'] == stock]
df_prophet=df_prophet[['date', price]]
df_prophet=df_prophet.sort_values('date')
df_prophet=df_prophet.rename(columns={'date':'ds',price:'y'})



m=Prophet()
m.fit(df_prophet)
future=m.make_future_dataframe(periods=365)
forecast=m.predict(future)

figure=m.plot(forecast,xlabel='Date',ylabel='{}_Price'.format(price))
plt.show()
Exemplo n.º 40
0
def create_prophet_m(app_name,z1,cpu_perc_list,delay=24):
    
    ### --- For realtime pred ---###
    
    full_df = z1.bw.iloc[0:len(z1)]
    full_df = full_df.reset_index()
    full_df.columns = ['ds','y']
    
    #removing outliers
    q50 = full_df.y.median()
    q100 = full_df.y.quantile(1)
    q75  = full_df.y.quantile(.75)
    
    if((q100-q50) >= (2*q50)):
        
        full_df.loc[full_df.y>=(2*q50),'y'] = None
    
    #-- Realtime prediction --##
    #model 
    model_r = Prophet(yearly_seasonality=False,changepoint_prior_scale=.1,seasonality_prior_scale=0.05)
    model_r.fit(full_df)

    cpu_perc_list.append(py.cpu_percent())
    cpu_perc_list = [max(cpu_perc_list)]

    future_r = model_r.make_future_dataframe(periods=delay,freq='D')
    forecast_r = model_r.predict(future_r)
    forecast_r.index = forecast_r['ds']
    #forecast 
    pred_r = pd.DataFrame(forecast_r['yhat'][len(z1):(len(z1)+delay)])
    pred_r=pred_r.reset_index()
    #--- completes realtime pred ---#
    
    train_end_index=len(z1.bw)-delay
    train_df=z1.bw.iloc[0:train_end_index]
    
    test_df=z1.bw.iloc[train_end_index:len(z1)]
    
    train_df=train_df.reset_index()
    test_df=test_df.reset_index()
    
    train_df.columns=['ds','y']
    
    #--- removing outliers in trainset  ---#
    
    q50 = train_df.y.median()
    q100 = train_df.y.quantile(1)
    q75  = train_df.y.quantile(.75)
    
    if((q100-q50) >= (2*q50)):
        
        train_df.loc[train_df.y>=(2*q50),'y'] = None
    
    test_df.columns=['ds','y']
    test_df['ds'] = pd.to_datetime(test_df['ds'])
   
    #model 
    model = Prophet(yearly_seasonality=False,changepoint_prior_scale=.1,seasonality_prior_scale=0.05)
    model.fit(train_df)

    cpu_perc_list.append(py.cpu_percent())
    cpu_perc_list = [max(cpu_perc_list)]


    future = model.make_future_dataframe(periods=len(test_df),freq='D')
    forecast = model.predict(future)
    forecast.index = forecast['ds']
    #forecast 
    pred = pd.DataFrame(forecast['yhat'][train_end_index:len(z1)])
    
    print('length forecasted non realtime=',len(pred))
    pred=pred.reset_index()
    pred_df=pd.merge(test_df,pred,on='ds',how='left')
    
    pred_df.dropna(inplace=True)
    
    
    df=pd.DataFrame()
    
    if(len(pred_df)>0):
        
        pred_df['error_test']=pred_df.y-pred_df.yhat
    
        
    
        MSE=mse(pred_df.y,pred_df.yhat)
        RMSE=math.sqrt(MSE)
        pred_df['APE']=abs(pred_df.error_test*100/pred_df.y)
        MAPE=pred_df.APE.mean()
        min_error_rate = pred_df['APE'].quantile(0)/100
        max_error_rate = pred_df['APE'].quantile(1)/100
        median_error_rate = pred_df['APE'].quantile(.50)/100
        print("App name:",app_name)
        #print("MSE  :",MSE)
        print("RMSE :",RMSE)
        print("MAPE :",MAPE)
        
       
        mape_q98=pred_df['APE'][pred_df.APE<pred_df['APE'].quantile(0.98)].mean()
        std_MAPE = math.sqrt(((pred_df.APE-MAPE)**2).mean())

        df = pd.DataFrame({'length':len(z1),
                             'test_rmse':RMSE,
                             'test_mape':MAPE,
                             'std_mape':std_MAPE, #standerd deviation of mape
                             'min_error_rate':min_error_rate ,
                             'max_error_rate':max_error_rate ,
                             'median_error_rate':median_error_rate,
                 
                 'test_mape_98':mape_q98},
                   
                          index=[app_name])

    return(df,model,forecast,pred_df,pred_r)
Exemplo n.º 41
0
 def predict(self):
     time_drilldown = params.get("drilldowns", "Year")
     seasonality_mode = params.get("seasonality_mode", "multiplicative")
     changepoint_prior_scale = params.get("changepoint_prior_scale", "0.05")
     try:
         changepoint_prior_scale = float(changepoint_prior_scale)
     except ValueError:
         changepoint_prior_scale = 0.05
     changepoint_range = params.get("changepoint_range", "0.80")
     try:
         changepoint_range = float(changepoint_range)
     except ValueError:
         changepoint_range = 0.80
     if time_drilldown == "Year":
         date_index = pd.to_datetime(
             [f'{year}-01-31' for year in self.raw_df[time_drilldown]])
     elif time_drilldown == "Time":
         date_index = pd.to_datetime([
             f'{str(date)[:4]}-{str(date)[4:]}-01'
             for date in self.raw_df[time_drilldown]
         ])
     else:
         date_index = pd.to_datetime(
             [t for t in self.raw_df[time_drilldown]])
     self.raw_df["ds"] = date_index
     self.raw_df["y_orig"] = self.raw_df[self.params["measures"]].astype(
         float)
     self.raw_df["y"] = self.raw_df["y_orig"].round(2)
     if DEBUG:
         print("\nRaw DataFrame (head):\n________________\n")
         print(self.raw_df.head())
     model = Prophet(seasonality_mode=seasonality_mode,
                     changepoint_prior_scale=changepoint_prior_scale,
                     changepoint_range=changepoint_range)
     with suppress_stdout_stderr():
         model.fit(self.raw_df)
     if time_drilldown == "Year":
         future = model.make_future_dataframe(periods=10,
                                              freq='A-JAN',
                                              include_history=True)
     else:
         future = model.make_future_dataframe(periods=10 * 5,
                                              freq='m',
                                              include_history=True)
     if DEBUG:
         print("\nFuture DataFrame (unpopulated tail):\n________________\n")
         print(future.tail())
     self.forecast_df = model.predict(future)
     # now merge forcast with original dataframes
     self.raw_df = self.raw_df.set_index('ds')
     self.forecast_df = self.forecast_df.set_index('ds')
     self.merged_df = self.raw_df.join(self.forecast_df[[
         'yhat', 'yhat_lower', 'yhat_upper', 'trend', 'trend_lower',
         'trend_upper'
     ]],
                                       how='outer')
     del self.merged_df['y']
     if DEBUG:
         print("\nFuture DataFrame (unpopulated tail):\n________________\n")
         print(self.forecast_df.tail(10))
     if DEBUG:
         print("\nFuture DataFrame (last row):\n________________\n")
         print(self.forecast_df.iloc[-1])
Exemplo n.º 42
0
 def test_fit_changepoint_not_in_history(self):
     train = DATA[(DATA['ds'] < '2013-01-01') | (DATA['ds'] > '2014-01-01')]
     future = pd.DataFrame({'ds': DATA['ds']})
     forecaster = Prophet(changepoints=['2013-06-06'])
     forecaster.fit(train)
     forecaster.predict(future)
Exemplo n.º 43
0
 def test_added_regressors(self):
     m = Prophet()
     m.add_regressor('binary_feature', prior_scale=0.2)
     m.add_regressor('numeric_feature', prior_scale=0.5)
     m.add_regressor('binary_feature2', standardize=True)
     df = DATA.copy()
     df['binary_feature'] = [0] * 255 + [1] * 255
     df['numeric_feature'] = list(range(510))
     with self.assertRaises(ValueError):
         # Require all regressors in df
         m.fit(df)
     df['binary_feature2'] = [1] * 100 + [0] * 410
     m.fit(df)
     # Check that standardizations are correctly set
     self.assertEqual(
         m.extra_regressors['binary_feature'],
         {
             'prior_scale': 0.2,
             'mu': 0,
             'std': 1,
             'standardize': 'auto'
         },
     )
     self.assertEqual(m.extra_regressors['numeric_feature']['prior_scale'],
                      0.5)
     self.assertEqual(m.extra_regressors['numeric_feature']['mu'], 254.5)
     self.assertAlmostEqual(m.extra_regressors['numeric_feature']['std'],
                            147.368585,
                            places=5)
     self.assertEqual(m.extra_regressors['binary_feature2']['prior_scale'],
                      10.)
     self.assertAlmostEqual(m.extra_regressors['binary_feature2']['mu'],
                            0.1960784,
                            places=5)
     self.assertAlmostEqual(m.extra_regressors['binary_feature2']['std'],
                            0.3974183,
                            places=5)
     # Check that standardization is done correctly
     df2 = m.setup_dataframe(df.copy())
     self.assertEqual(df2['binary_feature'][0], 0)
     self.assertAlmostEqual(df2['numeric_feature'][0], -1.726962, places=4)
     self.assertAlmostEqual(df2['binary_feature2'][0], 2.022859, places=4)
     # Check that feature matrix and prior scales are correctly constructed
     seasonal_features, prior_scales = m.make_all_seasonality_features(df2)
     self.assertIn('binary_feature', seasonal_features)
     self.assertIn('numeric_feature', seasonal_features)
     self.assertIn('binary_feature2', seasonal_features)
     self.assertEqual(seasonal_features.shape[1], 29)
     self.assertEqual(set(prior_scales[26:]), set([0.2, 0.5, 10.]))
     # Check that forecast components are reasonable
     future = pd.DataFrame({
         'ds': ['2014-06-01'],
         'binary_feature': [0],
         'numeric_feature': [10],
     })
     with self.assertRaises(ValueError):
         m.predict(future)
     future['binary_feature2'] = 0
     fcst = m.predict(future)
     self.assertEqual(fcst.shape[1], 31)
     self.assertEqual(fcst['binary_feature'][0], 0)
     self.assertAlmostEqual(
         fcst['extra_regressors'][0],
         fcst['numeric_feature'][0] + fcst['binary_feature2'][0],
     )
     self.assertAlmostEqual(
         fcst['seasonalities'][0],
         fcst['yearly'][0] + fcst['weekly'][0],
     )
     self.assertAlmostEqual(
         fcst['seasonal'][0],
         fcst['seasonalities'][0] + fcst['extra_regressors'][0],
     )
     self.assertAlmostEqual(
         fcst['yhat'][0],
         fcst['trend'][0] + fcst['seasonal'][0],
     )
     # Check fails if constant extra regressor
     df['constant_feature'] = 5
     m = Prophet()
     m.add_regressor('constant_feature')
     with self.assertRaises(ValueError):
         m.fit(df.copy())
def make_tableau_dataset(
        inpath=os.path.join("data", "processed", "data_ready.csv"),
        outpath=os.path.join("data", "processed", "data_ready_tableau.csv"),
):
    """Creates a csv file to use in Tableau dashboard
    
    Keyword Arguments:
        inpath {string} -- Path to the last iteration of data (default: {os.path.join("data", "processed", "data_ready.csv")})
        outpath {string} -- Path to output file (default: {os.path.join("data", "processed", "data_ready_tableau.csv")})
    """

    # remove total energy use and all weather information
    df = pd.read_csv(inpath,
                     index_col=["Date_Time"],
                     parse_dates=["Date_Time"])
    df = df[[
        "Sub_metering_1", "Sub_metering_2", "Sub_metering_3", "unmeasured"
    ]]
    columns = [
        "Kitchen", "Laundry Room", "Heating and Air Conditioning", "Other"
    ]
    df.columns = columns

    last_day = df.index[-1]
    month_end = last_day + MonthEnd(1)
    month_end = month_end.replace(hour=23)
    diff = month_end - last_day
    hours_to_predict = int(diff.total_seconds() / 3600)

    # add predictions for all submeters with a predictions up to full month

    index = pd.date_range(last_day + timedelta(hours=1),
                          periods=hours_to_predict,
                          freq="H")
    predictions_df = pd.DataFrame(index=index)
    predictions_df.index.name = "Date_Time"

    for column in columns:

        data_prophet = df[column].reset_index(level=0)
        data_prophet.columns = ["ds", "y"]

        m = Prophet()
        m.fit(data_prophet)
        future = m.make_future_dataframe(periods=hours_to_predict, freq="H")
        forecast = m.predict(future)
        predictions_df[column] = forecast.loc[
            forecast.index[-hours_to_predict:], "yhat"].to_numpy()

    df["prediction"] = False
    predictions_df["prediction"] = True

    df = pd.concat([df, predictions_df])

    # Add boolean column for current month and last month
    df["month"] = np.where(
        (df.index.year == last_day.year) & (df.index.month == last_day.month),
        "Current Month",
        "",
    )
    df["month"] = np.where(
        ((df.index.year == last_day.year)
         & (df.index.month == last_day.month - 1)
         & (last_day.month != 1))
        | ((df.index.year == last_day.year - 1)
           & (df.index.month == 12)
           & (last_day.month == 1)),
        "Last Month",
        df["month"],
    )

    df.reset_index(level=0, inplace=True)
    df = df.melt(
        id_vars=["Date_Time", "prediction", "month"],
        var_name="measure",
        value_name="Value",
    )

    df.to_csv(outpath, index=False)
Exemplo n.º 45
0
dfall_pressure_1 = dfall_chongqing_day_pressure.rename(columns={
    'date': 'ds',
    'pressure': 'y'
})
#dfall['y'] = np.log(dfall['y'])
dfall_pressure_1['y'] = (dfall_pressure_1['y'] - dfall_pressure_1['y'].min()
                         ) / (dfall_pressure_1['y'].max() -
                              dfall_pressure_1['y'].min())
dfall_pressure_1['ds'] = pd.to_datetime(dfall_pressure_1['ds'])
dfall_pressure_1.set_index('ds')
df_pressure = dfall_pressure_1

m_temperature = Prophet(daily_seasonality=False,
                        weekly_seasonality=False,
                        changepoint_prior_scale=0.01)
m_temperature.fit(df_temperature)

m_humidity = Prophet(daily_seasonality=False,
                     weekly_seasonality=False,
                     changepoint_prior_scale=0.01)
m_humidity.fit(df_humidity)

m_pressure = Prophet(daily_seasonality=False,
                     weekly_seasonality=False,
                     changepoint_prior_scale=0.01)
m_pressure.fit(df_pressure)

future_temperature = m_temperature.make_future_dataframe(periods=180)
future_temperature.tail()

future_humidity = m_humidity.make_future_dataframe(periods=180)
Exemplo n.º 46
0
y = data1.values.reshape(-1, 1)

from sklearn.neural_network import MLPRegressor
model = MLPRegressor(hidden_layer_sizes=[32, 32, 10], max_iter=50000, alpha=0.0005, random_state=26)
_=model.fit(x, y.ravel())

test = np.arange(len(data1)+7).reshape(-1, 1)
pred = model.predict(test)
prediction = pred.round().astype(int)

prediction = pd.DataFrame(prediction)

prediction.plot()

m=Prophet()
m.fit(data)
future=m.make_future_dataframe(periods=30)
forecast_cm=m.predict(future)
forecast_cm

cnfrm = forecast_cm.loc[:,['ds','trend']]
cnfrm = cnfrm[cnfrm['trend']>0]
cnfrm.head()
cnfrm=cnfrm.tail(30)
cnfrm.columns = ['Date','Confirm']
cnfrm.head()

fig_cm = plot_plotly(m, forecast_cm)
py.iplot(fig_cm) 

fig_cm = m.plot(forecast_cm,xlabel='Date',ylabel='Confirmed Count')
Exemplo n.º 47
0
class BuildProphet(BuildBase):
    """Class to build a Prophet Model
    """
    def __init__(self, forecast_period, time_interval, seasonal_period,
                 scoring, verbose, conf_int, holidays, growth, seasonality,
                 **kwargs):
        """
        Automatically build a Prophet Model
        """
        super().__init__(scoring=scoring,
                         forecast_period=forecast_period,
                         verbose=verbose)
        self.time_interval = time_interval
        self.seasonal_period = seasonal_period
        self.conf_int = conf_int
        self.holidays = holidays
        self.growth = growth
        self.seasonality = seasonality
        yearly_seasonality = False
        daily_seasonality = False
        weekly_seasonality = False
        if self.time_interval == 'weeks':
            weekly_seasonality = seasonality
        elif self.time_interval == 'years':
            yearly_seasonality = seasonality
        elif self.time_interval == 'days':
            daily_seasonality = seasonality
        #self.model = Prophet(
        #    yearly_seasonality=yearly_seasonality,
        #    weekly_seasonality=weekly_seasonality,
        #    daily_seasonality=daily_seasonality,
        #    interval_width=self.conf_int,
        #    holidays = self.holidays,
        #    growth = self.growth)
        self.model = Prophet(growth=self.growth)
        self.univariate = None
        self.list_of_valid_time_ints = [
            'B', 'C', 'D', 'W', 'M', 'SM', 'BM', 'CBM', 'MS', 'SMS', 'BMS',
            'CBMS', 'Q', 'BQ', 'QS', 'BQS', 'A,Y', 'BA,BY', 'AS,YS', 'BAS,BYS',
            'BH', 'H', 'T,min', 'S', 'L,ms', 'U,us', 'N'
        ]
        self.list_of_valid_time_ints.append(time_interval)

        if kwargs:
            for key, value in zip(kwargs.keys(), kwargs.values()):
                if key == 'seasonality_mode':
                    self.seasonality = True
                    key = value
                else:
                    key = value

    def fit(self, ts_df: pd.DataFrame, target_col: str, cv: Optional[int],
            time_col: str):
        """
        Fits the model to the data

        :param ts_df The time series data to be used for fitting the model
        :type ts_df pd.DataFrame

        :param target_col The column name of the target time series that needs to be modeled.
        All other columns will be considered as exogenous variables (if applicable to method)
        :type target_col str

        :param cv: Number of folds to use for cross validation.
        Number of observations in the Validation set for each fold = forecast period
        If None, a single fold is used
        :type cv Optional[int]

        :param time_col: Name of the time column in the dataset (needed by Prophet)
        Time column can also be the index, in which case, this would be the name of the index
        :type time_col str

        :rtype object
        """
        # use all available threads/cores

        self.time_col = time_col
        self.original_target_col = target_col
        self.original_preds = [
            x for x in list(ts_df) if x not in [self.original_target_col]
        ]

        if len(self.original_preds) == 0:
            self.univariate = True
        else:
            self.univariate = False

        # print(f"Prophet Is Univariate: {self.univariate}")

        ts_df = copy.deepcopy(ts_df)

        ##### if you are going to use matplotlib with prophet data, it gives an error unless you do this.
        pd.plotting.register_matplotlib_converters()

        #### You have to import Prophet if you are going to build a Prophet model #############
        actual = 'y'
        timecol = 'ds'

        data = self.prep_col_names_for_prophet(ts_df=ts_df, test=False)

        if self.univariate:
            dft = data[[timecol, actual]]
        else:
            dft = data[[timecol, actual] + self.original_preds]

        ##### For most Financial time series data, 80 percent conf interval is enough...
        if self.verbose >= 1:
            print(
                '    Fit-Predict data (shape=%s) with Confidence Interval = %0.2f...'
                % (dft.shape, self.conf_int))
        ### Make Sure you lower your desired interval width from the normal 95% to a more realistic 80%
        start_time = time.time()

        if self.univariate is False:
            for name in self.original_preds:
                self.model.add_regressor(name)

        print("  Starting Prophet Fit")

        if self.seasonality:
            prophet_seasonality, prophet_period, fourier_order, prior_scale = get_prophet_seasonality(
                self.time_interval, self.seasonal_period)
            self.model.add_seasonality(name=prophet_seasonality,
                                       period=prophet_period,
                                       fourier_order=fourier_order,
                                       prior_scale=prior_scale)
            print(
                '       Adding %s seasonality to Prophet with period=%d, fourier_order=%d and prior_scale=%0.2f'
                % (prophet_seasonality, prophet_period, fourier_order,
                   prior_scale))
        else:
            print(
                '      No seasonality assumed since seasonality flag is set to False'
            )

        if type(dft) == dask.dataframe.core.DataFrame:
            num_obs = dft.shape[0].compute()
        else:
            num_obs = dft.shape[0]

        ### Creating a new way to skip cross validation when trying to run auto-ts multiple times. ###
        if cv == 0:
            cv_in = 0
        else:
            cv_in = copy.deepcopy(cv)
        NFOLDS = self.get_num_folds_from_cv(cv)

        #########################################################################################
        # NOTE: This change to the FB recommendation will cause the cv folds from facebook to
        # be incompatible with the folds from the other models (in terms of periods of evaluation
        # as well as number of observations in each period). Hence the final comparison will
        # be biased since it will not compare the same folds.

        # The original implementation was giving issues under certain conditions, hence this change
        # to FB recommendation has been made as a temporary (short term) fix.
        # The root cause issue will need to be fixed eventually at a later point.
        #########################################################################################

        ### Prophet's Time Interval translates into frequency based on the following pandas date_range alias:
        #  Link: https://pandas.pydata.org/pandas-docs/stable/user_guide/timeseries.html#timeseries-offset-aliases
        ## This is done using the get_prophet_time_interval() function later.

        if self.time_interval in self.list_of_valid_time_ints:
            time_int = copy.deepcopy(self.time_interval)
        else:
            time_int = self.get_prophet_time_interval(for_cv=False)

        # First  Fold -->
        #   Train Set: 0:initial
        #   Test Set: initial:(initial+horizon)
        # Second Fold -->
        #   Train Set: (period):(initial+period)
        #   Test Set: (initial+period):(initial+horizon+ period)
        # Format: '850 D'

        print("  Starting Prophet Cross Validation")
        ################################################################################
        if self.forecast_period <= 5:
            #### Set a minimum of 5 for the number of rows in test!
            self.forecast_period = 5
        ### In case the number of forecast_period is too high, just reduce it so it can fit into num_obs
        if NFOLDS * self.forecast_period > num_obs:
            self.forecast_period = int(num_obs / (NFOLDS + 1))
            print('Lowering forecast period to %d to enable cross_validation' %
                  self.forecast_period)
        ###########################################################################################
        #cv = GapWalkForward(n_splits=NFOLDS, gap_size=0, test_size=self.forecast_period)
        max_trainsize = len(dft) - self.forecast_period
        try:
            cv = TimeSeriesSplit(n_splits=NFOLDS,
                                 test_size=self.forecast_period
                                 )  ### this works only sklearn v 0.0.24]
        except:
            cv = TimeSeriesSplit(n_splits=NFOLDS, max_train_size=max_trainsize)
        y_preds = pd.DataFrame()
        print('Max. iterations using expanding window cross validation = %d' %
              NFOLDS)
        start_time = time.time()
        rmse_folds = []
        norm_rmse_folds = []
        forecast_df_folds = []

        concatenated = pd.DataFrame()
        extra_concatenated = pd.DataFrame()

        if type(dft) == dask.dataframe.core.DataFrame:
            dft = dft.head(
                len(dft))  ### this converts dask into a pandas dataframe

        if cv_in == 0:
            print(
                'Skipping cross validation steps since cross_validation = %s' %
                cv_in)
            model = Prophet(growth="linear")
            kwargs = {
                'iter': 1e2
            }  ## this limits iterations and hence speeds up prophet
        else:
            for fold_number, (train_index,
                              test_index) in enumerate(cv.split(dft)):
                dftx = dft.head(len(train_index) + len(test_index))
                train_fold = dftx.head(
                    len(train_index
                        ))  ## now train will be the first segment of dftx
                test_fold = dftx.tail(
                    len(test_index
                        ))  ### now test will be right after train in dftx

                horizon = len(test_fold)
                print(
                    f"\nFold Number: {fold_number+1} --> Train Shape: {train_fold.shape[0]} Test Shape: {test_fold.shape[0]}"
                )

                #########################################
                #### Define the model with fold data ####
                #########################################

                model = Prophet(growth="linear")

                ############################################
                #### Fit the model with train_fold data ####
                ############################################

                kwargs = {
                    'iter': 1e2
                }  ## this limits iterations and hence speeds up prophet
                model.fit(train_fold, **kwargs)

                #################################################
                #### Predict using model with test_fold data ####
                #################################################

                future_period = model.make_future_dataframe(freq=time_int,
                                                            periods=horizon)
                forecast_df = model.predict(future_period)
                ### Now compare the actuals with predictions ######

                y_pred = forecast_df['yhat'][-horizon:]

                concatenated = pd.DataFrame(np.c_[test_fold[actual].values,
                                                  y_pred.values],
                                            columns=['original', 'predicted'],
                                            index=test_fold.index)

                if fold_number == 0:
                    extra_concatenated = copy.deepcopy(concatenated)
                else:
                    extra_concatenated = extra_concatenated.append(
                        concatenated)

                rmse_fold, rmse_norm = print_dynamic_rmse(
                    concatenated['original'].values,
                    concatenated['predicted'].values,
                    concatenated['original'].values)

                print('Cross Validation window: %d completed' %
                      (fold_number + 1, ))
                rmse_folds.append(rmse_fold)
                norm_rmse_folds.append(rmse_norm)

            ######################################################
            ### This is where you consolidate the CV results #####
            ######################################################
            fig = model.plot(forecast_df)
            #rmse_mean = np.mean(rmse_folds)
            #print('Average CV RMSE over %d windows (macro) = %0.5f' %(fold_number+1,rmse_mean))

            #cv_micro = np.sqrt(mean_squared_error(y_trues.values, y_preds.values))
            #print('Average CV RMSE of all predictions (micro) = %0.5f' %cv_micro)

            try:
                print_ts_model_stats(extra_concatenated['original'],
                                     extra_concatenated['predicted'],
                                     "Prophet")
            except:
                print('Error: Not able to plot Prophet CV results')

            forecast_df_folds = extra_concatenated['predicted'].values
            #print("  End of Prophet Cross Validation")
            print('Time Taken = %0.0f seconds' % ((time.time() - start_time)))

        #### Now you need to fit Prophet on the whole train data set ##########
        dftx = dft.head(len(dft))
        model = Prophet(growth="linear")
        self.model = model
        self.model.fit(dftx, **kwargs)
        print("  End of Prophet Fit")

        #num_obs_folds = df_cv.groupby('cutoff')['ds'].count()

        # https://stackoverflow.com/questions/54405704/check-if-all-values-in-dataframe-column-are-the-same
        #a = num_obs_folds.to_numpy()
        #all_equal = (a[0] == a).all()

        #if not all_equal:
        #print("WARNING: All folds did not have the same number of observations in the validation sets.")
        #print("Num Test Obs Per fold")
        #print(num_obs_folds)

        #rmse_folds = []
        #norm_rmse_folds = []
        #forecast_df_folds = []

        #df_cv_grouped = df_cv.groupby('cutoff')
        #for (_, loop_df) in df_cv_grouped:
        #    rmse, norm_rmse = print_dynamic_rmse(loop_df['y'], loop_df['yhat'], dft['y'])
        #    rmse_folds.append(rmse)
        #    norm_rmse_folds.append(norm_rmse)
        #    forecast_df_folds.append(loop_df)

        # print(f"RMSE Folds: {rmse_folds}")
        # print(f"Norm RMSE Folds: {norm_rmse_folds}")
        # print(f"Forecast DF folds: {forecast_df_folds}")

        # forecast = self.predict(simple=False, return_train_preds=True)

        # ####  We are going to plot Prophet's forecasts differently since it is better
        # dfa = plot_prophet(dft, forecast);
        # # Prophet makes Incredible Predictions Charts!
        # ###  There can't be anything simpler than this to make Forecasts!
        # #self.model.plot(forecast);  # make sure to add semi-colon in the end to avoid plotting twice
        # # Also their Trend, Seasonality Charts are Spot On!
        # try:
        #     self.model.plot_components(forecast)
        # except:
        #     print('Error in FB Prophet components forecast. Continuing...')

        #rmse, norm_rmse = print_dynamic_rmse(dfa['y'], dfa['yhat'], dfa['y'])

        #return self.model, forecast, rmse, norm_rmse
        return self.model, forecast_df_folds, rmse_folds, norm_rmse_folds

    def refit(self, ts_df: pd.DataFrame) -> object:
        """
        Refits an already trained model using a new dataset
        Useful when fitting to the full data after testing with cross validation
        :param ts_df The time series data to be used for fitting the model
        :type ts_df pd.DataFrame
        :rtype object
        """

    def predict(self,
                testdata: Optional[pd.DataFrame] = None,
                forecast_period: Optional[int] = None,
                simple: bool = False,
                return_train_preds: bool = False) -> Optional[NDFrame]:
        """
        Return the predictions
        :param testdata The test dataframe containing the exogenous variables to be used for prediction.
        :type testdata Optional[pd.DataFrame]
        :param forecast_period The number of periods to make a prediction for.
        :type forecast_period Optional[int]
        :param simple If True, this method just returns the predictions.
        If False, it will return the standard error, lower and upper confidence interval (if available)
        :type simple bool
        :param return_train_preds If True, this method just returns the train predictions along with test predictions.
        If False, it will return only test predictions
        :type return_train_preds bool
        :rtype NDFrame
        """
        """
        Return the predictions
        # TODO: What about future exogenous variables?
        # https://towardsdatascience.com/forecast-model-tuning-with-additional-regressors-in-prophet-ffcbf1777dda
        """

        # if testdata is not None:
        #     warnings.warn(
        #         "Multivariate models are not supported by the AutoML prophet module." +
        #         "Univariate predictions will be returned for now."
        #     )

        # Prophet is a Little Complicated - You need 2 steps to Forecast
        ## 1. You need to create a dataframe to hold the predictions which specifies datetime
        ##    periods that you want to predict. It automatically creates one with both past
        ##    and future dates.
        ## 2. You need to ask Prophet to make predictions for the past and future dates in
        ##    that dataframe above.
        ## So if you had 2905 rows of data, and ask Prophet to predict for 365 periods,
        ##    it will give you predictions of the past (2905) and an additional 365 rows
        ##    of future (total: 3270) rows of data.
        ### This is where we take the first steps to make a forecast using Prophet:
        ##   1. Create a dataframe with datetime index of past and future dates

        # Next we ask Prophet to make predictions for those dates in the dataframe along with prediction intervals
        if self.time_interval in self.list_of_valid_time_ints:
            time_int = copy.deepcopy(self.time_interval)
        else:
            time_int = self.get_prophet_time_interval(for_cv=False)

        if self.univariate:
            if isinstance(testdata, int):
                forecast_period = testdata
            elif isinstance(testdata, pd.DataFrame):
                forecast_period = testdata.shape[0]
                if testdata.shape[0] != self.forecast_period:
                    self.forecast_period = testdata.shape[0]
            else:
                forecast_period = self.forecast_period
            self.forecast_period = forecast_period
            future = self.model.make_future_dataframe(
                periods=self.forecast_period, freq=time_int)
        else:
            if isinstance(testdata, int) or testdata is None:
                print(
                    "(Error): Model is Multivariate, hence test dataframe must be provided for prediction."
                )
                return None
            elif isinstance(testdata, pd.DataFrame):
                forecast_period = testdata.shape[0]
                if testdata.shape[0] != self.forecast_period:
                    self.forecast_period = testdata.shape[0]
                future = self.prep_col_names_for_prophet(ts_df=testdata,
                                                         test=True)
        print('Building Forecast dataframe. Forecast Period = %d' %
              self.forecast_period)
        ### This will work in both univariate and multi-variate cases now ######

        forecast = self.model.predict(future)

        # Return values for the forecast period only
        if simple:
            if return_train_preds:
                forecast = forecast['yhat']
            else:
                if forecast_period is None:
                    forecast = forecast['yhat']
                else:
                    forecast = forecast.iloc[-forecast_period:]['yhat']

        else:
            if return_train_preds:
                forecast = forecast
            else:
                if forecast_period is None:
                    forecast = forecast['yhat']
                else:
                    forecast = forecast.iloc[-forecast_period:]

        return forecast

    # TODO: Update: This method will not be used in CV since it is in D always.
    # Hence Remove the 'for_cv' argument
    def get_prophet_time_interval(self, for_cv: bool = False) -> str:
        """
        Returns the time interval in Prophet compatible format

        :param for_cv If False, this will return the format needed to make future dataframe (for univariate analysis)
        If True, this will return the format needed to be passed to the cross-validation object
        """
        if self.time_interval in ['months', 'month', 'm']:
            time_int = 'M'
        elif self.time_interval in ['days', 'daily', 'd']:
            time_int = 'D'
        elif self.time_interval in ['weeks', 'weekly', 'w']:
            time_int = 'W'
        # TODO: Add time_int for other options if they are different for CV and for future forecasts
        elif self.time_interval in ['qtr', 'quarter', 'q']:
            time_int = 'Q'
        elif self.time_interval in ['years', 'year', 'annual', 'y', 'a']:
            time_int = 'Y'
        elif self.time_interval in ['hours', 'hourly', 'h']:
            time_int = 'H'
        elif self.time_interval in ['minutes', 'minute', 'min', 'n']:
            time_int = 'M'
        elif self.time_interval in ['seconds', 'second', 'sec', 's']:
            time_int = 'S'
        else:
            time_int = 'W'
        return time_int

    def prep_col_names_for_prophet(self,
                                   ts_df: pd.DataFrame,
                                   test: bool = False) -> pd.DataFrame:
        """
        Renames the columns of the input dataframe to the right format needed by Prophet
        Target is renamed to 'y' and the time column is renamed to 'ds'
        # TODO: Complete docstring
        """

        if self.time_col not in ts_df.columns:
            #### This happens when time_col is not found but it's actually the index. In that case, reset index
            data = ts_df.reset_index()
        else:
            data = ts_df.copy(deep=True)

        if self.time_col not in data.columns:
            print(
                "(Error): You have not provided the time_column values. This will result in an error"
            )

        if test is False:
            data = data.rename(columns={
                self.time_col: 'ds',
                self.original_target_col: 'y'
            })
        else:
            data = data.rename(columns={self.time_col: 'ds'})

        return data
Exemplo n.º 48
0
byte_stream = BytesIO()
blobservice.get_blob_to_stream(container_name='htflaskcontainer', blob_name='asgdu.xlsx', stream=byte_stream)
byte_stream.seek(0)
ser=pd.read_excel(byte_stream,index_col=0)
byte_stream.close()
#ser.head()
#ser = pd.read_excel('Copy of DB-O.xlsx',sheet_name='ASGDB1-O',index_col=0)
for i in range(0,4):
    ser.iloc[:21,i]=ser.iloc[:21,i].apply(lambda x : x*1000)
    print(i)
########################FBPROPHET####################
revdf = ser
revdf['ds']= revdf.index
revdf=revdf.rename(columns={"Total Sum of Revenue":'y'})
my_model = Prophet(interval_width=0.95,changepoint_prior_scale=4)
my_model.fit(revdf[['ds','y']])
future_dates = my_model.make_future_dataframe(periods=6, freq='MS')
forecast = my_model.predict(future_dates)
forecast[['ds', 'yhat','yhat_lower', 'yhat_upper']]
from sklearn.metrics import mean_squared_error
rms = np.sqrt(mean_squared_error(revdf['y'],forecast['yhat'][:len(revdf['y'])]))
#print(rms)
adrf = forecast['yhat'].tail(6)
my_model.plot(forecast,uncertainty=True)
########################### predicting FTE ##################################
A=ser['Total Sum of BFTE']
#from plotly.plotly import plot_mpl
from statsmodels.tsa.seasonal import seasonal_decompose
result = seasonal_decompose(A, model='additive',freq=12)
fig = result.plot()
from statsmodels.tsa.stattools import adfuller
Exemplo n.º 49
0
# rename columns header
furniture.columns =["Order Date", "Sales"]
# print(furniture)

# plot data - descriptive
furniture["Sales"].plot( figsize=(15, 6))
plt.show()
# furniture.plot()

furniture.to_csv(r'C:\Users\Gebruiker\Desktop\Data\Output.csv', sep=',', index=False)

# Start Prophet section
# rename header
furniture = furniture.rename(columns={'Order Date': 'ds', 'Sales': 'y'})
furniture_model = Prophet(interval_width=0.95)
furniture_model.fit(furniture)

furniture_forecast = furniture_model.make_future_dataframe(periods=12, freq='M')
furniture_forecast = furniture_model.predict(furniture_forecast)


plt.figure(figsize=(18, 6))
furniture_model.plot(furniture_forecast, xlabel = 'Date', ylabel = 'Sales')
plt.title('Furniture Sales');


furniture_forecast_1 = furniture_forecast.loc[furniture_forecast['ds'] == "2018/1/31"]
print(furniture_forecast_1)

furniture_forecast.to_csv(r'C:\Users\Gebruiker\Desktop\Data\Output_f.csv', sep=',', index=False)
Exemplo n.º 50
0
    def train_test_prophet(self):
        ''' 
        1. Training the model on the Train set, and predicting on both the Train and Test sets
        2. Setting growth = 'linear'. To use Logistic growth (appropriate parameter for this problem), 
            it requires domain inputs such as cap and floor of the Application Available, 
            which should provide better results for this problem
        3. Will be returning the forecasts on train & test, model, and Dates of future weeks for Predicton
        4. dev_node is the whole dataframe for a specific device_node pair and it has 2 columns ['ds','y'], where ds=dates, y= application_available
        '''
        pred_prophet = {}
        for device in self.original_data:
            pred_prophet[device] = {}
            for node in self.original_data[device]:
                app_data_train = list(
                    self.original_data[device][node]['train'].memory)
                app_data_test = list(
                    self.original_data[device][node]['test'].memory)
                date_data_train = list(
                    self.original_data[device][node]['train'].date)
                date_data_test = list(
                    self.original_data[device][node]['test'].date)
                total_data_test = list(
                    self.original_data[device][node]['test'].total)
                pred_prophet[device][node] = {}
                pred_prophet[device][node]['expected'] = []
                pred_prophet[device][node]['lower'] = []
                pred_prophet[device][node]['upper'] = []
                pred_prophet[device][node]['error'] = []
                pred_prophet[device][node]['history'] = [
                    x for x in app_data_train
                ]
                pred_prophet[device][node]['history_date'] = [
                    x for x in date_data_train
                ]
                pred_prophet[device][node]['forecast'] = [
                    x for x in app_data_train
                ]
                pred_prophet[device][node]['total'] = [
                    x for x in list(self.original_data[device][node]
                                    ['train'].total)
                ]
                pred_prophet = pd.DataFrame({
                    'ds': date_data_train,
                    'y': app_data_train
                })
                prediction = ()
                train = pd.DataFrame({
                    'ds': date_data_train,
                    'y': app_data_train
                })
                test = pd.DataFrame({'ds': date_data_test, 'y': app_data_test})
                if len(app_data_train) > 2:
                    model = Prophet(growth='linear',
                                    changepoints=None,
                                    n_changepoints=25,
                                    changepoint_range=0.8,
                                    yearly_seasonality=False,
                                    weekly_seasonality=True,
                                    daily_seasonality=False,
                                    holidays=None,
                                    seasonality_mode='multiplicative',
                                    seasonality_prior_scale=0.1,
                                    holidays_prior_scale=0.1,
                                    changepoint_prior_scale=0.05,
                                    mcmc_samples=0,
                                    interval_width=0.95,
                                    uncertainty_samples=10)
                    trained_model = model.fit(train)

                    forecast = trained_model.predict(test)
                    print("forecasting", forecast)
                    future_weeks = model.make_future_dataframe(
                        periods=100, freq='W', include_history=False)
                    future_weeks = future_weeks.loc[
                        future_weeks.ds > test.ds.max()]
                    future_weeks = future_weeks.head(54).reset_index().drop(
                        ['index'], axis=1)
Exemplo n.º 51
0
def stock():
    menu = {
        'ho': 0,
        'da': 0,
        'ml': 10,
        'se': 0,
        'co': 0,
        'cg': 0,
        'cr': 0,
        'wc': 0,
        'cf': 0,
        'ac': 0,
        're': 1,
        'cu': 0
    }
    if request.method == 'GET':
        return render_template('regression/stock.html',
                               menu=menu,
                               weather=get_weather(),
                               kospi=kospi_dict,
                               kosdaq=kosdaq_dict,
                               nyse=nyse_dict,
                               nasdaq=nasdaq_dict)
    else:
        market = request.form['market']
        if market == 'KS':
            code = request.form['kospi_code']
            company = kospi_dict[code]
            code += '.KS'
        elif market == 'KQ':
            code = request.form['kosdaq_code']
            company = kosdaq_dict[code]
            code += '.KQ'
        elif market == 'NY':
            code = request.form['nyse_code']
            company = nyse_dict[code]
        else:
            code = request.form['nasdaq_code']
            company = nasdaq_dict[code]
        learn_period = int(request.form['learn'])
        pred_period = int(request.form['pred'])
        current_app.logger.debug(
            f'{market}, {code}, {learn_period}, {pred_period}')

        today = datetime.now()
        start_learn = today - timedelta(days=learn_period * 365)
        end_learn = today - timedelta(days=1)

        stock_data = pdr.DataReader(code,
                                    data_source='yahoo',
                                    start=start_learn,
                                    end=end_learn)
        current_app.logger.info(f"get stock data: {company}({code})")
        df = pd.DataFrame({'ds': stock_data.index, 'y': stock_data.Close})
        df.reset_index(inplace=True)
        try:
            del df['Date']
        except:
            current_app.logger.error('Date error')

        model = Prophet(daily_seasonality=True)
        model.fit(df)
        future = model.make_future_dataframe(periods=pred_period)
        forecast = model.predict(future)

        fig = model.plot(forecast)
        img_file = os.path.join(current_app.root_path, 'static/img/stock.png')
        fig.savefig(img_file)
        mtime = int(os.stat(img_file).st_mtime)

        return render_template('regression/stock_res.html',
                               menu=menu,
                               weather=get_weather_main(),
                               mtime=mtime,
                               company=company,
                               code=code)
Exemplo n.º 52
0
def ts_outliers(y_df, t_col, y_col, coef=3.0, verbose=False, replace=False, ignore_dates=None, lbl_dict=None, r_val=1.0):      # set outliers to NaN
    """
    Find outliers in y_col which is a time series using IQR method or median filter.
    Assumes y_col >= 0
    :param df: DF with y_col (data) and t_col
    :param t_col: time column name.
    :param y_col: data column
    :param coef: IQR coefficient
    :param verbose: verbose
    :param lbl_dict: into dict (context)
    :param r_val: r_val = 1 replaces by the yhat_upr/yhat_lwr value, r_val=0 replaces by yhat. In between, a weighted avg
    :param replace: if True replace the outlier value(s) by the Prophet in-sample forecast. If false, set outlier to nan
    :param ignore_dates: do not replace outliers for dates in this list
    :return: DF with either nan in outliers or fit outliers
    """
    if len(y_df) <= 10:
        su.my_print(str(os.getpid()) + ' WARNING: not enough points for outlier detection: ' + str(len(y_df)))
        return y_df, np.nan, None

    # look for outliers
    _y_df = y_df.copy()
    _y_df.rename(columns={t_col: 'ds', y_col: 'y'}, inplace=True)
    _y_df.reset_index(inplace=True, drop=True)
    try:
        if verbose:
            m = Prophet(changepoint_range=0.9)
            m.fit(_y_df[['ds', 'y']])
        else:
            with su.suppress_stdout_stderr():
                m = Prophet(changepoint_range=0.9)
                m.fit(_y_df[['ds', 'y']])
    except ValueError:
        su.my_print(str(os.getpid()) + ' ERROR: prophet err: returning original DF. Data len: ' + str(len(_y_df)) + ' Saving to ' + '~/my_tmp/_prophet_df.par')
        _y_df.rename(columns={'ds': t_col, 'y': y_col}, inplace=True)
        save_df(_y_df, '~/my_tmp/_y_df')
        return None, np.nan, None

    future = m.make_future_dataframe(periods=0)
    forecast = m.predict(future)
    y_vals = _y_df['y'].copy()  # they will be filtered later
    _y_df['yhat'] = forecast['yhat']
    _y_df['resi'] = _y_df['y'] - _y_df['yhat']

    # use iqr or median filter
    # using Prophet's interval_width does not work as it is a quantile,
    # and about the same number of outliers is always found on avg ~ len * (1 - interval_width)
    upr, lwr = iqr_filter(_y_df['resi'], coef=coef, q_lwr=0.25, q_upr=0.75)  # iqr
    # upr, lwr = median_filter(_y_df['resi'], coef=coef)                     # median filter

    _y_df['yhat_upr'] = forecast['yhat'] + upr
    _y_df['yhat_lwr'] = forecast['yhat'] + lwr
    _y_df.rename(columns={'ds': t_col, 'y': y_col}, inplace=True)

    # no outlier if yhat_lwr <= y <= yhat_upr
    _y_df['is_outlier'] = (y_vals > _y_df['yhat_upr']) | (y_vals < _y_df['yhat_lwr'])
    n_outliers = _y_df['is_outlier'].sum()
    err = np.round(100 * n_outliers / len(_y_df), 0)
    if ignore_dates is None:
        ignore_dates = list()

    off = None
    if n_outliers > 0:
        if verbose is True:
            save_df(_y_df, '~/my_tmp/outliers_DF_' + str(y_col) + '_' + str(lbl_dict))   # no outlier processing yet
        su.my_print(str(os.getpid()) + ' WARNING::column ' + y_col + ' has ' + str(len(_y_df)) +
                    ' rows and ' + str(n_outliers) + ' outliers (' + str(err) + '%) for context ' + str(lbl_dict))
        b_dates = ~_y_df[t_col].isin(ignore_dates)                          # boolean dates adjuster: when true, an outlier on that date can be adjusted
        b_adj = _y_df['is_outlier'] & b_dates                               # boolean outlier adjuster: if true it is an outlier we can adjust
        if replace is False:
            _y_df[y_col] = y_vals * (1 - b_adj) + np.nan * b_adj
        else:
            _y_df[y_col] = y_vals * (1 - b_adj) + \
                           (r_val * _y_df['yhat_upr'] + (1.0 - r_val) * _y_df['yhat']) * ((y_vals > _y_df['yhat_upr']) & b_dates) + \
                           (r_val * _y_df['yhat_lwr'] + (1.0 - r_val) * _y_df['yhat']) * ((y_vals < _y_df['yhat_lwr']) & b_dates)

        if verbose is True:    # print outlier info: note that actuals are already filtered wheras the original value is in the outlier column
            off = _y_df[b_adj].copy()
            su.my_print('*************** outlier detail ************')
            print(off)
    _y_df.drop(['resi', 'yhat', 'yhat_upr', 'yhat_lwr', 'is_outlier'], axis=1, inplace=True)
    return _y_df, err, off
    holidays = pd.DataFrame({
        'holiday': 'superbowl',
        'ds': pd.to_datetime(playoff_dates),
        'lower_window': 0,
        'upper_window': 1,
    })

    train = data_backup.iloc[0:step]
    test = data_backup.iloc[step:step + n_test]
    train.columns = ["ds", "y"]
    test.columns = ["ds", "y"]

    test = test[["y"]].values
    m = Prophet(changepoint_range=1, interval_width=0.7, holidays=holidays)
    # m.add_regressor('regressor', mode='additive')
    m.fit(train)

    future = pd.date_range(
        datetime.datetime.strptime(train.iloc[-1, 0], '%Y-%m-%d') +
        datetime.timedelta(days=1),
        periods=len(test),
        freq='D')
    future = pd.DataFrame({'ds': future})

    preds = m.predict(future)

    yhats = preds[['yhat']].values
    for i in range(len(yhats)):
        if yhats[i] < 0:
            yhats[i] = 0
import pandas as pd
from fbprophet import Prophet

df = pd.read_csv('data/shop.csv')
df['ds'] = pd.to_datetime(df['ds'], unit='s')
m = Prophet()
m.fit(df)
future = m.make_future_dataframe(periods=1440,
                                 freq='1min',
                                 include_history=False)

future.to_csv('container/local_test/payload.csv', header=True)

t_future = pd.read_csv('payload.csv')
forecast = m.predict(t_future)
print(forecast[['ds', 'yhat', 'yhat_lower', 'yhat_upper']].tail())

# fig1 = m.plot(forecast)
# fig2 = m.plot_components(forecast)
Exemplo n.º 55
0
    if price_regressor == True:
        m.add_regressor('price')

    m.add_regressor('weekends')
    m.add_regressor('snap')

    m.add_regressor('cum7', standardize=False)
    # m.add_regressor('cum14')
    # m.add_regressor('cum28')
    # m.add_regressor('cum56')
    m.add_regressor('cum_max', standardize=False)
    m.add_regressor('cum_zero', standardize=False)

    m.add_seasonality(name='monthly', period=30.5, fourier_order=4)
    m.add_seasonality(name='quarterly', period=91, fourier_order=4)  # new
    m.fit(time_series)
    future = m.make_future_dataframe(periods=28)

    if price_regressor == True:
        future['price'] = prices.iloc[i, start_date:].values

    future['snap'] = snap[i, (start_date) - 1:]
    future['weekends'] = weekends[start_date - 1:]

    time_series['cum7'] = cum7[i, (start_date - 1):]
    # time_series['cum14'] = cum14[i, (start_date-1):]
    # time_series['cum28'] = cum28[i, (start_date-1):]
    # time_series['cum56'] = cum56[i, (start_date-1):]
    time_series['cum_max'] = cum_max[i, (start_date - 1):]
    time_series['cum_zero'] = cum_zero[i, (start_date - 1):]
Exemplo n.º 56
0
                             name="stock_open"))
    fig.add_trace(
        go.Scatter(x=data["Date"], y=data["Close"], name="stock_close"))
    fig.layout.update(title_text="Time Series Data",
                      xaxis_rangeslider_visible=True)
    st.plotly_chart(fig)


plot_raw_data()

#forecasting with facebookprophet

df_train = data[['Date', 'Close']]
df_train = df_train.rename(columns={"Date": "ds", "Close": "y"})

m = Prophet()
m.fit(df_train)
future = m.make_future_dataframe(periods=period)
forecast = m.predict(future)

st.subheader("Forecast Data")
st.write(forecast.tail())

st.write("Forecast Data")
fig1 = plot_plotly(m, forecast)
st.plotly_chart(fig1)

st.write("Forecast Components")
fig2 = m.plot_components(forecast)
st.write(fig2)
Exemplo n.º 57
0
def create_prophet_m(source_name,z1,delay):

    import math
   
    train_end_index=len(z1.bw)-delay
    train_df=z1.bw.iloc[0:train_end_index]
    
    full_df = z1.bw.iloc[0:len(z1)]
    
    
    test_df=z1.bw.iloc[train_end_index:len(z1)]
    
    
    
    train_df=train_df.reset_index()
    test_df=test_df.reset_index()
    train_df.columns=['ds','y']
    #--- removing outliers in trainset  ---#
    
    q50 = train_df.y.median()
    q100 = train_df.y.quantile(1)
    q75  = train_df.y.quantile(.75)
    print(max(train_df.y))
    if((q100-q50) >= (2*q50)):
        print('ind')
        train_df.loc[train_df.y>=(2*q50),'y'] = None
    
    full_df = full_df.reset_index()
    full_df.columns = ['ds','y']
    
    test_df.columns=['ds','y']
    
    ##-- Realtime prediction --##
    #model 
    model_r = Prophet(yearly_seasonality=False,changepoint_prior_scale=.2)
    model_r.fit(full_df)
    future_r = model_r.make_future_dataframe(periods=delay,freq='H')
    forecast_r = model_r.predict(future_r)
    forecast_r.index = forecast_r['ds']
    #forecast 
    pred_r = pd.DataFrame(forecast_r['yhat'][len(z1):(len(z1)+delay)])
    pred_r=pred_r.reset_index()
    
    
    #model 
    model = Prophet(yearly_seasonality=False,changepoint_prior_scale=.2)
    model.fit(train_df)
    future = model.make_future_dataframe(periods=len(test_df),freq='H')
    forecast = model.predict(future)
    forecast.index = forecast['ds']
    #forecast 
    pred = pd.DataFrame(forecast['yhat'][train_end_index:len(z1)])
    pred=pred.reset_index()
    pred_df=pd.merge(test_df,pred,on='ds',how='left')
    pred_df.dropna(inplace=True)
    
    df=pd.DataFrame()
    
    if(len(pred_df)>0):
        
        pred_df['error_test']=pred_df.y-pred_df.yhat
    
       
        MSE=mse(pred_df.y,pred_df.yhat)
        RMSE=math.sqrt(MSE)
        pred_df['APE']=abs(pred_df.error_test*100/pred_df.y)
        
        MAPE=pred_df.APE.mean()
        min_error_rate = pred_df.quantile(0)/100
        max_error_rate = pred_df.quantile(1)/100
        median_error_rate = pred_df.quantile(.50)/100
        
        std_MAPE = math.sqrt(((pred_df.APE-MAPE)**2).mean())
        print("App name:",source_name)
        print("MSE  :",MSE)
        print("RMSE :",RMSE)
        print("MAPE :",MAPE)
        
        q98=pred_df['APE'].quantile(0.98)
        mape_q98=pred_df['APE'][pred_df.APE<pred_df['APE'].quantile(0.98)].mean()

        df = pd.DataFrame({'length':len(z1),#'predicted_t':[forcast_lag],
                             'test_rmse':RMSE,
                             'test_mape':MAPE,
                             'std_mape':std_MAPE, #standerd deviation of mape
                             'min_error_rate':min_error_rate ,
                             'max_error_rate':max_error_rate ,
                             'median_error_rate':median_error_rate,
                 
                 'test_mape_98':mape_q98},
                          index=[source_name])

    return(df,model,forecast,pred_df,pred_r)
Exemplo n.º 58
0
us_loc = pd.read_csv(file_source, parse_dates=['date'], index_col=['date'])
us_loc.tail()

new_york = us_loc[us_loc["county"] == "New York City"]
new_york.tail()

# the dates will not become a index
df = new_york.reset_index()
from datetime import datetime
mask = (df['date'] > '2020-03-16')
df = df.loc[mask]
df=df.rename(columns={'date':'ds', 'cases':'y'})

# creating the predictions
m = Prophet(mcmc_samples=300)
m.fit(df)
future = m.make_future_dataframe(periods=36, freq='D')
forecast = Prophet(interval_width=0.95).fit(df).predict(future)
fig = m.plot_components(forecast)


# Creating the all new cases chart
new_york_new_cases = new_york['cases'] - new_york['cases'].shift()

# the dates will not become a index
df1 = new_york_new_cases.reset_index()
mask = (df1['date'] > '2020-03-16')
df1 = df1.loc[mask]
df1=df1.rename(columns={'date':'ds', 'cases':'y'})

# creating the predictions
Exemplo n.º 59
0
    def create_prophet_m(self,app_name,z1,delay=24):

        import pandas as pd
        import pymysql
        import warnings
        warnings.filterwarnings("ignore")
        from datetime import datetime, timedelta
        import logging
        from tqdm import tqdm
        from fbprophet import Prophet
        from sklearn.metrics import mean_squared_error as mse
        import math

        ### --- For realtime pred ---###

        full_df = z1.bw.iloc[0:len(z1)]
        full_df = full_df.reset_index()
        full_df.columns = ['ds','y']

        #removing outliers
        q50 = full_df.y.median()
        q100 = full_df.y.quantile(1)
        q75  = full_df.y.quantile(.75)
        #print(max(train_df.y))
        if((q100-q50) >= (2*q75)):
            #print('ind')
            full_df.loc[full_df.y>=(2*q75),'y'] = None

        #-- Realtime prediction --##
        #model 
        model_r = Prophet(yearly_seasonality=False,changepoint_prior_scale=.2)
        model_r.fit(full_df)
        future_r = model_r.make_future_dataframe(periods=delay,freq='H')
        forecast_r = model_r.predict(future_r)
        forecast_r.index = forecast_r['ds']
        #forecast 
        pred_r = pd.DataFrame(forecast_r['yhat'][len(z1):(len(z1)+delay)])
        pred_r=pred_r.reset_index()
        #--- completes realtime pred ---#

        train_end_index=len(z1.bw)-delay
        train_df=z1.bw.iloc[0:train_end_index]
        #train_df= train_df[train_df<cutter]


        test_df=z1.bw.iloc[train_end_index:len(z1)]



        train_df=train_df.reset_index()
        test_df=test_df.reset_index()
        train_df.columns=['ds','y']

        #--- removing outliers in trainset  ---#

        q50 = train_df.y.median()
        q100 = train_df.y.quantile(1)
        q75  = train_df.y.quantile(.75)
        #print(max(train_df.y))
        if((q100-q50) >= (2*q75)):
            #print('ind')
            train_df.loc[train_df.y>=(2*q75),'y'] = None

        test_df.columns=['ds','y']
        #print('len of testdf = ',len(test_df))
        #model 
        model = Prophet(yearly_seasonality=False,changepoint_prior_scale=.2)
        model.fit(train_df)
        future = model.make_future_dataframe(periods=len(test_df),freq='H')
        forecast = model.predict(future)
        forecast.index = forecast['ds']
        #forecast 
        pred = pd.DataFrame(forecast['yhat'][train_end_index:len(z1)])
        pred=pred.reset_index()
        pred_df=pd.merge(test_df,pred,on='ds',how='left')
        pred_df.dropna(inplace=True)

        df=pd.DataFrame()

        if(len(pred_df)>0):

            pred_df['error_test']=pred_df.y-pred_df.yhat



            MSE=mse(pred_df.y,pred_df.yhat)
            RMSE=math.sqrt(MSE)
            pred_df['APE']=abs(pred_df.error_test*100/pred_df.y)
            MAPE=pred_df.APE.mean()
            #print("App name:",app_name)
            #print("MSE  :",MSE)
            #print("RMSE :",RMSE)
            #print("MAPE :",MAPE)

            q98=pred_df['APE'].quantile(0.98)
            mape_q98=pred_df['APE'][pred_df.APE<pred_df['APE'].quantile(0.98)].mean()

            df = pd.DataFrame({'length':len(z1),#'predicted_t':[forcast_lag],
                                 'test_rmse':RMSE,
                                 'test_mape':MAPE,
                     #'test_ape_98':q98,
                     'test_mape_98':mape_q98},

                              index=[app_name])

        return(df,model,forecast,pred_df,pred_r)
Exemplo n.º 60
0
ts = sales.groupby(["date_block_num"])["item_cnt_day"].sum()
print("ts")
print(ts.head())

# prophetが受け入れるデータ形は、日付(ds)と値(y)
ts.index = pd.date_range(start='2013-01-01', end='2015-10-01', freq='MS')
ts = ts.reset_index()
# 列名を修正する
ts.columns = ['ds', 'y']
print("before modeling")
print(ts.head())

#時系列モデルを定義
# パラメータは、年周期があること
model = Prophet('linear', yearly_seasonality=True)
model.fit(ts)

# 2017/11を予測
future = model.make_future_dataframe(periods=1, freq='MS')
forecast = model.predict(future)
forecast[['ds', 'yhat', 'yhat_lower', 'yhat_upper']].tail()

print("forecast")
print(forecast.head())

# 2018/11は最後期
forecast_value = forecast['yhat'].values[-1]
sales_201411["result"] = sales_201411["percentage"] * forecast_value
print("calculation")
print(sales_201411.head())