def test_cross_validation(self): m = Prophet() m.fit(self.__df) # Calculate the number of cutoff points(k) horizon = pd.Timedelta('4 days') period = pd.Timedelta('10 days') initial = pd.Timedelta('115 days') df_cv = diagnostics.cross_validation( m, horizon='4 days', period='10 days', initial='115 days') self.assertEqual(len(np.unique(df_cv['cutoff'])), 3) self.assertEqual(max(df_cv['ds'] - df_cv['cutoff']), horizon) self.assertTrue(min(df_cv['cutoff']) >= min(self.__df['ds']) + initial) dc = df_cv['cutoff'].diff() dc = dc[dc > pd.Timedelta(0)].min() self.assertTrue(dc >= period) self.assertTrue((df_cv['cutoff'] < df_cv['ds']).all()) # Each y in df_cv and self.__df with same ds should be equal df_merged = pd.merge(df_cv, self.__df, 'left', on='ds') self.assertAlmostEqual( np.sum((df_merged['y_x'] - df_merged['y_y']) ** 2), 0.0) df_cv = diagnostics.cross_validation( m, horizon='4 days', period='10 days', initial='135 days') self.assertEqual(len(np.unique(df_cv['cutoff'])), 1) with self.assertRaises(ValueError): diagnostics.cross_validation( m, horizon='10 days', period='10 days', initial='140 days')
def test_performance_metrics(self): m = Prophet() m.fit(self.__df) df_cv = diagnostics.cross_validation( m, horizon='4 days', period='10 days', initial='90 days') # Aggregation level none df_none = diagnostics.performance_metrics(df_cv, rolling_window=0) self.assertEqual( set(df_none.columns), {'horizon', 'coverage', 'mae', 'mape', 'mse', 'rmse'}, ) self.assertEqual(df_none.shape[0], 16) # Aggregation level 0.2 df_horizon = diagnostics.performance_metrics(df_cv, rolling_window=0.2) self.assertEqual(len(df_horizon['horizon'].unique()), 4) self.assertEqual(df_horizon.shape[0], 14) # Aggregation level all df_all = diagnostics.performance_metrics(df_cv, rolling_window=1) self.assertEqual(df_all.shape[0], 1) for metric in ['mse', 'mape', 'mae', 'coverage']: self.assertEqual(df_all[metric].values[0], df_none[metric].mean()) # Custom list of metrics df_horizon = diagnostics.performance_metrics( df_cv, metrics=['coverage', 'mse'], ) self.assertEqual( set(df_horizon.columns), {'coverage', 'mse', 'horizon'}, )
def get_predictions(validate, train): total_dates = train['date'].unique() result = pd.DataFrame(columns=['id', 'unit_sales']) problem_pairs = [] example_items = [510052, 1503899, 2081175, 1047674, 215327, 1239746, 765520, 1463867, 1010755, 1473396] store47examples = validate.loc[(validate.store_nbr == 47) & (validate.item_nbr.isin(example_items))] print("ONLY PREDICTING ITEMS {} IN STORE NO. 47!".format(example_items)) for name, y in store47examples.groupby(['item_nbr']): # for name, y in validate.groupby(['item_nbr', 'store_nbr']): item_nbr=int(name) store_nbr = 47 df = train[(train.item_nbr==item_nbr)&(train.store_nbr==store_nbr)] CV_SIZE = 16 #if you make it bigger, fill missing dates in cv with 0 if any TRAIN_SIZE = 365 total_dates = train['date'].unique() df = fill_missing_date(df, total_dates) df = df.sort_values(by=['date']) X = df[-TRAIN_SIZE:] X = X[['date','unit_sales']] X.columns = ['ds', 'y'] m = Prophet(yearly_seasonality=True) try: m.fit(X) except ValueError: print("problem for this item store pair") problem_pairs.append((item_nbr, store_nbr)) continue future = m.make_future_dataframe(periods=CV_SIZE) pred = m.predict(future) data = pred[['ds','yhat']].tail(CV_SIZE) data = pred[['ds','yhat']].merge(y, left_on='ds', right_on='date') data['unit_sales'] = data['yhat'].fillna(0).clip(0, 999999) result = result.append(data[['id', 'unit_sales']]) return (result, problem_pairs)
def test_logistic_floor(self): m = Prophet(growth='logistic') N = DATA.shape[0] history = DATA.head(N // 2).copy() history['floor'] = 10. history['cap'] = 80. future = DATA.tail(N // 2).copy() future['cap'] = 80. future['floor'] = 10. m.fit(history, algorithm='Newton') self.assertTrue(m.logistic_floor) self.assertTrue('floor' in m.history) self.assertAlmostEqual(m.history['y_scaled'][0], 1.) fcst1 = m.predict(future) m2 = Prophet(growth='logistic') history2 = history.copy() history2['y'] += 10. history2['floor'] += 10. history2['cap'] += 10. future['cap'] += 10. future['floor'] += 10. m2.fit(history2, algorithm='Newton') self.assertAlmostEqual(m2.history['y_scaled'][0], 1.) fcst2 = m2.predict(future) fcst2['yhat'] -= 10. # Check for approximate shift invariance self.assertTrue((np.abs(fcst1['yhat'] - fcst2['yhat']) < 1).all())
def run(): journal = ledger.read_journal("./secret/ledger.dat") last_post = None amount = 0 for post in journal.query(""): if last_post == None or post.date == last_post.date: if str(post.amount.commodity) != "£": continue amount = amount + post.amount else: print post.date, ",", amount amount = 0 last_post = post df = pd.read_csv('./testing.csv') df['y'] = np.multiply(100, df['y']) m = Prophet() m.fit(df); forecast = m.predict(future) forecast[['ds', 'yhat', 'yhat_lower', 'yhat_upper']].tail() m.plot(forecast); m.plot_components(forecast);
def test_fit_changepoint_not_in_history(self): train = DATA[(DATA['ds'] < '2013-01-01') | (DATA['ds'] > '2014-01-01')] train[(train['ds'] > '2014-01-01')] += 20 future = pd.DataFrame({'ds': DATA['ds']}) forecaster = Prophet(changepoints=['2013-06-06']) forecaster.fit(train) forecaster.predict(future)
def prophetForecast(rawData, startDate, modelDir, partitions): """Forecasting with fbprophet""" from fbprophet import Prophet from fbprophet.diagnostics import cross_validation partitions = int(partitions) # initiate model prophet = Prophet() # put dates in df dates = pd.date_range(start=startDate, periods=len(rawData), freq="H") input_df = pd.DataFrame(rawData, columns=["y", "temp"]) input_df["ds"] = dates.to_pydatetime() input_df.to_csv(pJoin(modelDir, "prophetin.csv")) # give prophet the input data with suppress_stdout_stderr(): prophet.fit(input_df) # determine partition length for the cross-validation total_hours = len(input_df.ds) hp = total_hours // partitions # horizon and period init = total_hours % partitions # total_hours - hp * (partitions - 1) # train prophet w/ those partitions # take a moment to appreciate this stupid way to pass the durations out_df = cross_validation( prophet, initial="%d hours" % init, horizon="%d hours" % hp, period="%d hours" % hp, ) out_df.to_csv(pJoin(modelDir, "prophetout.csv")) return (list(out_df.yhat), list(out_df.yhat_lower), list(out_df.yhat_upper))
def test_fit_predict(self): N = DATA.shape[0] train = DATA.head(N // 2) future = DATA.tail(N // 2) forecaster = Prophet() forecaster.fit(train) forecaster.predict(future)
def test_fit_predict_no_seasons(self): N = DATA.shape[0] train = DATA.head(N // 2) future = DATA.tail(N // 2) forecaster = Prophet(weekly_seasonality=False, yearly_seasonality=False) forecaster.fit(train) forecaster.predict(future)
def test_fit_predict_no_changepoints(self): N = DATA.shape[0] train = DATA.head(N // 2) future = DATA.tail(N // 2) forecaster = Prophet(n_changepoints=0) forecaster.fit(train) forecaster.predict(future)
def test_fit(self): train = pd.DataFrame({ 'ds': np.array(['2012-05-18', '2012-05-20']), 'y': np.array([38.23, 21.25]) }) forecaster = Prophet(mcmc_samples=1) forecaster.fit(train)
def test_fit_with_holidays(self): holidays = pd.DataFrame({ 'ds': pd.to_datetime(['2012-06-06', '2013-06-06']), 'holiday': ['seans-bday'] * 2, 'lower_window': [0] * 2, 'upper_window': [1] * 2, }) model = Prophet(holidays=holidays, uncertainty_samples=0) model.fit(DATA).predict()
def test_subdaily_holidays(self): holidays = pd.DataFrame({ 'ds': pd.to_datetime(['2017-01-02']), 'holiday': ['special_day'], }) m = Prophet(holidays=holidays) m.fit(DATA2) fcst = m.predict() self.assertEqual(sum(fcst['special_day'] == 0), 575)
def test_fit_predict_duplicates(self): N = DATA.shape[0] train1 = DATA.head(N // 2).copy() train2 = DATA.head(N // 2).copy() train2['y'] += 10 train = train1.append(train2) future = pd.DataFrame({'ds': DATA['ds'].tail(N // 2)}) forecaster = Prophet() forecaster.fit(train) forecaster.predict(future)
def build_forecast( data, forecast_range, truncate_range=0 ): """build a forecast for publishing Args: data (:obj:`pandas.data_frame`): data to build prediction forecast_range (int): how much time into the future to forecast truncate_range (int, optional): truncate output to CREST_RANGE Returns: pandas.DataFrame: collection of data + forecast info ['date', 'avgPrice', 'yhat', 'yhat_low', 'yhat_high', 'prediction'] """ data['date'] = pd.to_datetime(data['date']) filter_date = data['date'].max() ## Build DataFrame ## predict_df = pd.DataFrame() predict_df['ds'] = data['date'] predict_df['y'] = data['avgPrice'] ## Run prediction ## # https://facebookincubator.github.io/prophet/docs/quick_start.html#python-api model = Prophet() model.fit(predict_df) future = model.make_future_dataframe(periods=forecast_range) tst = model.predict(future) predict_df = pd.merge( predict_df, model.predict(future), on='ds', how='right' ) ## Build report for endpoint ## report = pd.DataFrame() report['date'] = pd.to_datetime(predict_df['ds'], format='%Y-%m-%d') report['avgPrice'] = predict_df['y'] report['yhat'] = predict_df['yhat'] report['yhat_low'] = predict_df['yhat_lower'] report['yhat_high'] = predict_df['yhat_upper'] report['prediction'] = False report.loc[report.date > filter_date, 'prediction'] = True if truncate_range > 0: cut_date = filter_date - timedelta(days=truncate_range) report = report.loc[report.date > cut_date] return report
def test_cross_validation_default_value_check(self): m = Prophet() m.fit(self.__df) # Default value of initial should be equal to 3 * horizon df_cv1 = diagnostics.cross_validation( m, horizon='32 days', period='10 days') df_cv2 = diagnostics.cross_validation( m, horizon='32 days', period='10 days', initial='96 days') self.assertAlmostEqual( ((df_cv1['y'] - df_cv2['y']) ** 2).sum(), 0.0) self.assertAlmostEqual( ((df_cv1['yhat'] - df_cv2['yhat']) ** 2).sum(), 0.0)
def hello(): print('Hello, world!') df = pd.read_csv(url) df['y'] = np.log(df['y']) df.head() m = Prophet() m.fit(df); future = m.make_future_dataframe(periods=365) future.tail() forecast = m.predict(future) forecast[['ds', 'yhat', 'yhat_lower', 'yhat_upper']].tail() return forecast.to_json(orient='table')
def test_custom_seasonality(self): holidays = pd.DataFrame({ 'ds': pd.to_datetime(['2017-01-02']), 'holiday': ['special_day'], 'prior_scale': [4.], }) m = Prophet(holidays=holidays) m.add_seasonality(name='monthly', period=30, fourier_order=5, prior_scale=2.) self.assertEqual( m.seasonalities['monthly'], { 'period': 30, 'fourier_order': 5, 'prior_scale': 2., 'mode': 'additive', }, ) with self.assertRaises(ValueError): m.add_seasonality(name='special_day', period=30, fourier_order=5) with self.assertRaises(ValueError): m.add_seasonality(name='trend', period=30, fourier_order=5) m.add_seasonality(name='weekly', period=30, fourier_order=5) # Test priors m = Prophet( holidays=holidays, yearly_seasonality=False, seasonality_mode='multiplicative', ) m.add_seasonality(name='monthly', period=30, fourier_order=5, prior_scale=2., mode='additive') m.fit(DATA.copy()) self.assertEqual(m.seasonalities['monthly']['mode'], 'additive') self.assertEqual(m.seasonalities['weekly']['mode'], 'multiplicative') seasonal_features, prior_scales, component_cols, modes = ( m.make_all_seasonality_features(m.history) ) self.assertEqual(sum(component_cols['monthly']), 10) self.assertEqual(sum(component_cols['special_day']), 1) self.assertEqual(sum(component_cols['weekly']), 6) self.assertEqual(sum(component_cols['additive_terms']), 10) self.assertEqual(sum(component_cols['multiplicative_terms']), 7) if seasonal_features.columns[0] == 'monthly_delim_1': true = [2.] * 10 + [10.] * 6 + [4.] self.assertEqual(sum(component_cols['monthly'][:10]), 10) self.assertEqual(sum(component_cols['weekly'][10:16]), 6) else: true = [10.] * 6 + [2.] * 10 + [4.] self.assertEqual(sum(component_cols['weekly'][:6]), 6) self.assertEqual(sum(component_cols['monthly'][6:16]), 10) self.assertEqual(prior_scales, true)
def test_auto_yearly_seasonality(self): # Should be enabled m = Prophet() self.assertEqual(m.yearly_seasonality, 'auto') m.fit(DATA) self.assertIn('yearly', m.seasonalities) self.assertEqual( m.seasonalities['yearly'], { 'period': 365.25, 'fourier_order': 10, 'prior_scale': 10., 'mode': 'additive', }, ) # Should be disabled due to too short history N = 240 train = DATA.head(N) m = Prophet() m.fit(train) self.assertNotIn('yearly', m.seasonalities) m = Prophet(yearly_seasonality=True) m.fit(train) self.assertIn('yearly', m.seasonalities) m = Prophet(yearly_seasonality=7, seasonality_prior_scale=3.) m.fit(DATA) self.assertEqual( m.seasonalities['yearly'], { 'period': 365.25, 'fourier_order': 7, 'prior_scale': 3., 'mode': 'additive', }, )
def test_fit_predict_constant_history(self): N = DATA.shape[0] train = DATA.head(N // 2).copy() train['y'] = 20 future = pd.DataFrame({'ds': DATA['ds'].tail(N // 2)}) m = Prophet() m.fit(train) fcst = m.predict(future) self.assertEqual(fcst['yhat'].values[-1], 20) train['y'] = 0 future = pd.DataFrame({'ds': DATA['ds'].tail(N // 2)}) m = Prophet() m.fit(train) fcst = m.predict(future) self.assertEqual(fcst['yhat'].values[-1], 0)
def test_cross_validation_extra_regressors(self): df = self.__df.copy() df['extra'] = range(df.shape[0]) m = Prophet() m.add_seasonality(name='monthly', period=30.5, fourier_order=5) m.add_regressor('extra') m.fit(df) df_cv = diagnostics.cross_validation( m, horizon='4 days', period='4 days', initial='135 days') self.assertEqual(len(np.unique(df_cv['cutoff'])), 2) period = pd.Timedelta('4 days') dc = df_cv['cutoff'].diff() dc = dc[dc > pd.Timedelta(0)].min() self.assertTrue(dc >= period) self.assertTrue((df_cv['cutoff'] < df_cv['ds']).all()) df_merged = pd.merge(df_cv, self.__df, 'left', on='ds') self.assertAlmostEqual( np.sum((df_merged['y_x'] - df_merged['y_y']) ** 2), 0.0)
def test_make_future_dataframe(self): N = 468 train = DATA.head(N // 2) forecaster = Prophet() forecaster.fit(train) future = forecaster.make_future_dataframe(periods=3, freq='D', include_history=False) correct = pd.DatetimeIndex(['2013-04-26', '2013-04-27', '2013-04-28']) self.assertEqual(len(future), 3) for i in range(3): self.assertEqual(future.iloc[i]['ds'], correct[i]) future = forecaster.make_future_dataframe(periods=3, freq='M', include_history=False) correct = pd.DatetimeIndex(['2013-04-30', '2013-05-31', '2013-06-30']) self.assertEqual(len(future), 3) for i in range(3): self.assertEqual(future.iloc[i]['ds'], correct[i])
def train_prophet(df, modelDir, confidence=0.99): # train and cache into modelDir m = Prophet( yearly_seasonality=True, daily_seasonality=True, interval_width=confidence ) with suppress_stdout_stderr(): m.fit(df) # Predict the future. print "PREDICTING!" future = m.make_future_dataframe(periods=0) forecast = m.predict(future) # Merge in the historical data. forecast["y"] = df.y.astype(float) # Backup the model. forecast.to_csv( pJoin(modelDir, "forecasted_{}.csv".format(confidence)), index=False ) return forecast
def test_auto_weekly_seasonality(self): # Should be enabled N = 15 train = DATA.head(N) m = Prophet() self.assertEqual(m.weekly_seasonality, 'auto') m.fit(train) self.assertIn('weekly', m.seasonalities) self.assertEqual( m.seasonalities['weekly'], { 'period': 7, 'fourier_order': 3, 'prior_scale': 10., 'mode': 'additive', }, ) # Should be disabled due to too short history N = 9 train = DATA.head(N) m = Prophet() m.fit(train) self.assertNotIn('weekly', m.seasonalities) m = Prophet(weekly_seasonality=True) m.fit(train) self.assertIn('weekly', m.seasonalities) # Should be False due to weekly spacing train = DATA.iloc[::7, :] m = Prophet() m.fit(train) self.assertNotIn('weekly', m.seasonalities) m = Prophet(weekly_seasonality=2, seasonality_prior_scale=3.) m.fit(DATA) self.assertEqual( m.seasonalities['weekly'], { 'period': 7, 'fourier_order': 2, 'prior_scale': 3., 'mode': 'additive', }, )
def test_auto_weekly_seasonality(self): # Should be True N = 15 train = DATA.head(N) m = Prophet() self.assertEqual(m.weekly_seasonality, 'auto') m.fit(train) self.assertEqual(m.weekly_seasonality, True) # Should be False due to too short history N = 9 train = DATA.head(N) m = Prophet() m.fit(train) self.assertEqual(m.weekly_seasonality, False) m = Prophet(weekly_seasonality=True) m.fit(train) self.assertEqual(m.weekly_seasonality, True) # Should be False due to weekly spacing train = DATA.iloc[::7, :] m = Prophet() m.fit(train) self.assertEqual(m.weekly_seasonality, False)
def test_auto_yearly_seasonality(self): # Should be True m = Prophet() self.assertEqual(m.yearly_seasonality, 'auto') m.fit(DATA) self.assertEqual(m.yearly_seasonality, True) # Should be False due to too short history N = 240 train = DATA.head(N) m = Prophet() m.fit(train) self.assertEqual(m.yearly_seasonality, False) m = Prophet(yearly_seasonality=True) m.fit(train) self.assertEqual(m.yearly_seasonality, True)
def test_fit_predict_with_append_holidays(self): holidays = pd.DataFrame({ 'ds': pd.to_datetime(['2012-06-06', '2013-06-06']), 'holiday': ['seans-bday'] * 2, 'lower_window': [0] * 2, 'upper_window': [1] * 2, }) append_holidays = 'US' # Test with holidays and append_holidays model = Prophet(holidays=holidays, append_holidays=append_holidays, uncertainty_samples=0) model.fit(DATA).predict() # There are training holidays missing in the test set train = DATA.head(154) future = DATA.tail(355) model = Prophet(append_holidays=append_holidays, uncertainty_samples=0) model.fit(train).predict(future) # There are test holidays missing in the training set train = DATA.tail(355) future = DATA2 model = Prophet(append_holidays=append_holidays, uncertainty_samples=0) model.fit(train).predict(future)
def test_copy(self): df = DATA.copy() df['cap'] = 200. df['binary_feature'] = [0] * 255 + [1] * 255 # These values are created except for its default values holiday = pd.DataFrame({ 'ds': pd.to_datetime(['2016-12-25']), 'holiday': ['x'] }) products = itertools.product( ['linear', 'logistic'], # growth [None, pd.to_datetime(['2016-12-25'])], # changepoints [3], # n_changepoints [True, False], # yearly_seasonality [True, False], # weekly_seasonality [True, False], # daily_seasonality [None, holiday], # holidays [1.1], # seasonality_prior_scale [1.1], # holidays_prior_scale [0.1], # changepoint_prior_scale [100], # mcmc_samples [0.9], # interval_width [200] # uncertainty_samples ) # Values should be copied correctly for product in products: m1 = Prophet(*product) m1.history = m1.setup_dataframe(df.copy(), initialize_scales=True) m1.set_auto_seasonalities() m2 = m1.copy() self.assertEqual(m1.growth, m2.growth) self.assertEqual(m1.n_changepoints, m2.n_changepoints) self.assertEqual(m1.changepoints, m2.changepoints) self.assertEqual(False, m2.yearly_seasonality) self.assertEqual(False, m2.weekly_seasonality) self.assertEqual(False, m2.daily_seasonality) self.assertEqual(m1.yearly_seasonality, 'yearly' in m2.seasonalities) self.assertEqual(m1.weekly_seasonality, 'weekly' in m2.seasonalities) self.assertEqual(m1.daily_seasonality, 'daily' in m2.seasonalities) if m1.holidays is None: self.assertEqual(m1.holidays, m2.holidays) else: self.assertTrue((m1.holidays == m2.holidays).values.all()) self.assertEqual(m1.seasonality_prior_scale, m2.seasonality_prior_scale) self.assertEqual(m1.changepoint_prior_scale, m2.changepoint_prior_scale) self.assertEqual(m1.holidays_prior_scale, m2.holidays_prior_scale) self.assertEqual(m1.mcmc_samples, m2.mcmc_samples) self.assertEqual(m1.interval_width, m2.interval_width) self.assertEqual(m1.uncertainty_samples, m2.uncertainty_samples) # Check for cutoff and custom seasonality and extra regressors changepoints = pd.date_range('2012-06-15', '2012-09-15') cutoff = pd.Timestamp('2012-07-25') m1 = Prophet(changepoints=changepoints) m1.add_seasonality('custom', 10, 5) m1.add_regressor('binary_feature') m1.fit(df) m2 = m1.copy(cutoff=cutoff) changepoints = changepoints[changepoints <= cutoff] self.assertTrue((changepoints == m2.changepoints).all()) self.assertTrue('custom' in m2.seasonalities) self.assertTrue('binary_feature' in m2.extra_regressors)
state = pd.DataFrame({ 'holiday': 'state_holiday', 'ds': pd.to_datetime(state_dates) }) school = pd.DataFrame({ 'holiday': 'school_holiday', 'ds': pd.to_datetime(school_dates) }) holidays = pd.concat((state, school)) holidays.head() # set the uncertainty interval to 95% (the Prophet default is 80%) my_model = Prophet(interval_width=0.95, holidays=holidays, daily_seasonality=True) my_model.fit(sales) # dataframe that extends into future 6 weeks future_dates = my_model.make_future_dataframe(periods=6 * 7) # predictions forecast = my_model.predict(future_dates) # preditions for last week forecast[['ds', 'yhat', 'yhat_lower', 'yhat_upper']].tail(7) # visualizing predicions my_model.plot(forecast) my_model.plot_components(forecast)
df = pd.read_json(enc) # jsonデータのcolumnをprophetの指定名称に書き換え、時系列でソートする df = df.rename(columns={'date': 'ds', 'rate': 'y'}) df['ds'] = pd.to_datetime(df['ds']) df = df.sort_values('ds') # 予測モデルの指定 from fbprophet import Prophet model = Prophet(yearly_seasonality=True, weekly_seasonality=True, daily_seasonality=True) # 予測モデルへのdf読み込み model.fit(df) future = model.make_future_dataframe(periods=30) forecast = model.predict(future) # 30日後の予測値と今日の価格を出力 # 30日後の日付 ds_f = pd.to_datetime(forecast['ds'].tail(1).values[0]).strftime('%Y/%m/%d') ds_f_unicode = ds_f.decode('unicode-escape') #DB保存用にunicode化 # 30日後の予測価格(ラベルなし) f = forecast['yhat'].tail(1).values[0] f_str = '{0:.1f}'.format(f) #str化して桁を調整 f_unicode = str(f_str).decode('unicode-escape') #DB保存用にunicode化 # 計算日当日の価格(ラベルなし) today = df['y'].tail(1).values[0]
def predict(index): timeseries = pd.DataFrame(final_data.iloc[index]) #Linear Regression reg = LinearRegression().fit(float_index.reshape(-1, 1), timeseries.to_numpy()) linear_pred = reg.predict(predict_time) linear_pred = pd.DataFrame(data = linear_pred, columns = timeseries.columns) linear_pred.index = future_years #Prophet model = Prophet() new_timeseries = pd.DataFrame(columns = ['ds', 'y']) new_timeseries['y'] = timeseries.iloc[:, 0] new_timeseries['ds'] = date_index model.fit(new_timeseries) future_timeseries = pd.DataFrame(columns = ['ds', 'y']) future_timeseries['ds'] = list(map(quarter_to_date, future_years)) out_sample_forecast = model.predict(future_timeseries) prophet_pred = pd.DataFrame(out_sample_forecast['yhat'].to_numpy().flatten(), columns = timeseries.columns) prophet_pred.index = future_years prophet_pred_lower = pd.DataFrame(out_sample_forecast['yhat_lower'].to_numpy().flatten(), columns = timeseries.columns) prophet_pred_lower.index = future_years prophet_pred_upper = pd.DataFrame(out_sample_forecast['yhat_upper'].to_numpy().flatten(), columns = timeseries.columns) prophet_pred_upper.index = future_years #Visualize fig = go.Figure() fig.update_layout(plot_bgcolor = 'rgb(255,255,255)') present_time = timeseries.index.values future_time = np.append(['2020Q2*'], future_years) fig.add_trace( go.Scatter( x= present_time, y=timeseries.values.flatten(), name=timeseries.columns[0][0], line=dict(color='black', width=4) )) fig.add_trace( go.Scatter( x= future_time, y = timeseries.tail(1).append(linear_pred).values.flatten(), name = 'Linear', line=dict(color='blue', width = 4) )) fig.add_trace( go.Scatter( x= future_time, y = timeseries.tail(1).append(prophet_pred).values.flatten(), name = 'Prophet', line=dict(color='red', width = 4) )) fig.add_trace( go.Scatter( x= future_time, y = timeseries.tail(1).append(prophet_pred_lower).values.flatten(), name = 'Prophet_lower', line=dict(color='gray', width = 2, dash='dash'), )) fig.add_trace( go.Scatter( x= future_time, y = timeseries.tail(1).append(prophet_pred_upper).values.flatten(), name = 'Prophet_upper', line=dict(color='gray', width = 2, dash='dash'), )) return fig.show()
stock_return.head() stock_return.plot(grid=True).axhline(y=1, color="black", lw=2) stock_change = graph.apply( lambda x: np.log(x) - np.log(x.shift(1))) # shift moves dates back by 1. stock_change.head() stock_change.plot(grid=True).axhline(y=0, color="black", lw=2) graph["20d"] = np.round(graph["Close"].rolling(window=20, center=False).mean(), 2) df = pd.DataFrame() df['ds'] = stock_return.index df['y'] = graph['Close'].apply(lambda x: np.log(x)).values df.tail() m0 = Prophet(yearly_seasonality=True) m0.fit(df) #how many days in the future to show predictions for n_add = 100 print("adding {n} days.".format(n=n_add)) future = m0.make_future_dataframe(periods=n_add) future.tail() forecast = m0.predict(future) forecast[['ds', 'yhat', 'yhat_lower', 'yhat_upper']].tail() forcast = m0.plot(forecast, ylabel='$\ln($stock_return$)$') forcast.savefig('/home/ubuntu/Desktop/TelegramBot/charts/RUBforcast.jpeg', dpi=400, bbox_inches='tight') trend = m0.plot_components(forecast) trend.savefig('/home/ubuntu/Desktop/TelegramBot/charts/RUBtrend.jpeg',
def Table_generator(): import pandas as pd import numpy as np from matplotlib import pyplot as plt from pylab import rcParams from sklearn import linear_model from fbprophet import Prophet from numpy import inf filepath = ('weekly sales and labour cost for all shops 2013 to 20177.csv') df = pd.read_csv(filepath) d_f2 = df[df.sales_status != 0] # df2.week_no.isnull().values.any() nulldetect = d_f2.week_no.isnull() # nulldetect[nulldetect==True].index d_f2.week_no.loc[nulldetect == True] = 54 d_f2['week_no'] = d_f2.week_no - 2 len_week1 = [] for i in d_f2.shop_id: len_week = len(d_f2.week_no[d_f2.shop_id == i]) len_week1.append(len_week) len_week2 = pd.DataFrame(len_week1) #len_week2 d = {'shop_id': d_f2.shop_id, 'len_of_weeks': len_week1} d1 = pd.DataFrame(d) d2 = d1.drop_duplicates() dtt = pd.DataFrame(index=list(range(1, 53)), columns=d2.shop_id[d2.len_of_weeks < 52].values) # table = pd.DataFrame(columns=['shop_id','week_no','dates','forecasted_sales']) for uu in d2.shop_id[d2.len_of_weeks < 52].values: df3 = d_f2[d_f2.shop_id == uu] # df1 = df[(= -1) & (df.b != -1)] dff4 = df3.set_index('start_date') # df4=df3.week_no-2 a = df3[['week_no', 'shop_id', 'sales_amount', 'transactions', 'total_tax', 'item_sold']] # print(a) dates = pd.date_range(dff4.index[-1], periods=52, freq='W-MON', format='%Y-%m-%d') dates1 = pd.DataFrame(dates) dates2 = pd.date_range(dff4.index[0], periods=len(dff4.index), freq='W-MON', format='%Y-%m-%d') mean_week_item = [] for i in dates.week: mean_item_sold = a.item_sold[a.week_no == i].mean() mean_week_item.append(mean_item_sold) mean_week_item1 = pd.DataFrame(mean_week_item) trans_week_item = [] for i1 in dates.week: mean_trans_sold = a.transactions[a.week_no == i1].mean() trans_week_item.append(mean_trans_sold) sales_week = [] for ii1 in dates.week: mean_sales_sold = a.sales_amount[a.week_no == ii1].mean() sales_week.append(mean_sales_sold) dd = {'date': dates, 'weeks_no': dates.week, 'sales': sales_week, 'mean_item': mean_week_item, 'mean_trans': trans_week_item} dd1 = pd.DataFrame(dd) dff1 = df[df.sales_status != 0] nulldetect = dff1.week_no.isnull() dff1.week_no.loc[nulldetect == True] = 54 dff1['week_no'] = dff1.week_no - 2 X_Cluster = dff1[['shop_id', 'sales_amount']] from sklearn.cluster import KMeans kmeans_model = KMeans(n_clusters=3, random_state=8).fit(X_Cluster) y_hat = kmeans_model.labels_ # clusters cen = kmeans_model.cluster_centers_ y_hat1 = pd.DataFrame(y_hat) group_low_sales = X_Cluster[y_hat == 0] group_middle_sales = X_Cluster[y_hat == 2] group_high_sales = X_Cluster[y_hat == 1] fff = [] for j in X_Cluster.shop_id: dfdf = X_Cluster.sales_amount[X_Cluster.shop_id == j].mean() fff.append(dfdf) f3 = pd.DataFrame(X_Cluster.shop_id.drop_duplicates()) f4 = pd.DataFrame(fff) f5 = f4.drop_duplicates() f3['salle'] = f5.values Xx2 = f3[['shop_id', 'salle']] kmeans_model2 = KMeans(n_clusters=3, random_state=8).fit(Xx2) y_hat2 = kmeans_model2.labels_ # clusters cen2 = kmeans_model2.cluster_centers_ group_middle_sales2 = Xx2[y_hat2 == 0] group_high_sales2 = Xx2[y_hat2 == 2] group_low_sales2 = Xx2[y_hat2 == 1] nullweeks = dd1.weeks_no[dd1.mean_trans.isnull() == True] if (group_low_sales2.shop_id.values == uu).any() == True: cx = int(group_low_sales.sales_amount[group_low_sales.shop_id == uu].values.mean()) trt = group_low_sales[group_low_sales.sales_amount > cx - 3000] trt2 = trt[trt.sales_amount < cx + 3000] valid_cls = dff1[['sales_amount', 'item_sold', 'transactions', 'week_no']].loc[trt2.index.values] # print("Cluster of shop %s is low sales" %uu) # print("Average sales per week of shop %s is" %uu,cx) elif (group_middle_sales2.shop_id.values == uu).any() == True: # valid_cls=dff1[['sales_amount','item_sold','transactions','week_no']].loc[group_middle_sales.shop_id.index.values] cx = int(group_middle_sales.sales_amount[group_middle_sales.shop_id == uu].values.mean()) trt = group_middle_sales[group_middle_sales.sales_amount > cx - 3000] trt2 = trt[trt.sales_amount < cx + 3000] valid_cls = dff1[['sales_amount', 'item_sold', 'transactions', 'week_no']].loc[trt2.index.values] # print("Cluster of shop %s is average sales" %uu) # print("Average sales per week of shop %s is " %uu,cx) elif (group_high_sales2.shop_id.values == uu).any() == True: # valid_cls=dff1[['sales_amount','item_sold','transactions','week_no']].loc[group_high_sales.shop_id.index.values] cx = int(group_high_sales.sales_amount[group_high_sales.shop_id == uu].values.mean()) trt = group_high_sales[group_high_sales.sales_amount > cx - 4000] trt2 = trt[trt.sales_amount < cx + 4000] valid_cls = dff1[['sales_amount', 'item_sold', 'transactions', 'week_no']].loc[trt2.index.values] # print("Cluster of shop %s is high sales" %uu) # print("Average sales per week of shop %s is" %uu,cx) drr = valid_cls drr = valid_cls # dff1[['sales_amount','item_sold','transactions','week_no']].loc[trt2.index.values] itt = [] trr = [] sale = [] for i3 in nullweeks: item = drr.item_sold[drr.week_no == i3].mean() trans = drr.transactions[drr.week_no == i3].mean() salee = drr.sales_amount[drr.week_no == i3].mean() itt.append(item) trr.append(trans) sale.append(salee) df_insert = {'sales_amountt': sale, 'ittem': itt, 'trans': trr, 'weeks_no': nullweeks} df_insert1 = pd.DataFrame(df_insert) forecastdf = dd1.fillna( {'mean_item': df_insert1.ittem, 'mean_trans': df_insert1.trans, 'sales': df_insert1.sales_amountt}) forecastdf1 = forecastdf.fillna({'mean_item': df_insert1.ittem.mean(), 'mean_trans': df_insert1.trans.mean(), 'sales': df_insert1.sales_amountt.mean()}) regr3 = linear_model.LinearRegression() X = forecastdf1[['mean_item', 'mean_trans']] Y = forecastdf1.sales regr3.fit(X, Y) y_predictionss = regr3.predict(X) y_predictionss1 = pd.DataFrame(y_predictionss) pred_y = round(y_predictionss1, 2) # print(pred_y.values) forecastdf1['forecasted_sales'] = pred_y.values # ddt.fillna() forecastdf1.sort_values('weeks_no', inplace=True) # forecastdf1 # forecastdf1.forecasted_sales.reset_index() f = forecastdf1.set_index('weeks_no') # dtt = pd.DataFrame(index=list(range(1,53)), columns=d2.shop_id[d2.len_of_weeks<52].values) dtt['shop_id'] = dtt.index.values # dtt[dtt.shop_id==uu].fillna() dtt[[uu]] = f.forecasted_sales.values.reshape((52, 1)) dtt1 = pd.DataFrame(index=d2.shop_id[d2.len_of_weeks < 52].values, columns=list(range(1, 53))) for jj in dtt.index.values: dtt1.loc[:, jj] = dtt.loc[jj, :] data = pd.read_csv('weekly sales and labour cost for all shops 2013 to 20177.csv',index_col='start_date',parse_dates=True) # shopID = input("Enter your shop id") df2 = data[data.sales_status != 0] # df2.week_no.isnull().values.any() nulldetect = df2.week_no.isnull() # nulldetect[nulldetect==True].index df2.week_no.loc[nulldetect == True] = 54 df2['week_no'] = df2.week_no - 2 len_week1 = [] for i in df2.shop_id: len_week = len(df2.week_no[df2.shop_id == i]) len_week1.append(len_week) len_week2 = pd.DataFrame(len_week1) #len_week2 d = {'shop_id': df2.shop_id, 'len_of_weeks': len_week1} d1 = pd.DataFrame(d) d2 = d1.drop_duplicates() dtt2 = pd.DataFrame(index=list(range(1, 53)), columns=d2.shop_id[d2.len_of_weeks > 52].values) for j in d2.shop_id[d2.len_of_weeks >= 52].values: data2 = data[['sales_id', 'shop_id', 'week_no', 'sales_amount', 'item_sold', 'transactions', 'total_tax', 'sales_status']] df1 = data2[data2.shop_id == j] # input №1 df2 = df1[df1.sales_status != 0] df2.week_no.isnull().values.any() nulldetect = df1.week_no.isnull() nulldetect[nulldetect == True].index df2.week_no.loc[nulldetect == True] = 54 df2['week_no'] = df2.week_no - 2 dff = df2[['sales_amount']] data3 = dff.reset_index() data4 = data3 data5 = data4.rename(columns={'start_date': 'ds', 'sales_amount': 'y'}) data5.set_index('ds') # y.plot() data5['y'] = np.log(data5['y']) data5 = data5.replace([np.inf, -np.inf], np.nan).fillna(0) data5.set_index('ds') model = Prophet() model.fit(data5) future = model.make_future_dataframe(periods=52, freq='w') forecast = model.predict(future) data5.set_index('ds', inplace=True) forecast.set_index('ds', inplace=True) viz_df = dff.join(forecast[['yhat', 'yhat_lower', 'yhat_upper']], how='outer') viz_df['yhat_rescaled'] = np.exp(viz_df['yhat']) dff.index = pd.to_datetime(dff.index) # make sure our index as a datetime object connect_date = dff.index[-2] # select the 2nd to last date mask = (forecast.index > connect_date) predict_df = forecast.loc[mask] viz_df = dff.join(predict_df[['yhat', 'yhat_lower', 'yhat_upper']], how='outer') viz_df['yhat_scaled'] = np.exp(viz_df['yhat']) ii = len(dff.sales_amount) - 1 viz_df.yhat_scaled[ii:] predicted_future_sales = pd.DataFrame(viz_df.yhat_scaled[ii:]) predicted_future_sales1 = predicted_future_sales.rename(columns={'yhat_scaled': 'future_sales'}) predicted_future_sales2 = predicted_future_sales1.reset_index() week_no = predicted_future_sales2['index'].dt.week future_sales = predicted_future_sales2['future_sales'] future_sales1 = round(future_sales, 2) start_date = predicted_future_sales2['index'] predict_data = {'shop_id': int(df2.shop_id.mean()), 'future_sales': future_sales1, 'week_no': week_no, 'start_date': start_date} predict_data1 = pd.DataFrame(predict_data) predict_data1 = predict_data1.drop_duplicates(subset=['week_no']) predict_data1.sort_values('week_no', inplace=True) f1 = predict_data1.set_index('week_no') dtt2[[j]] = f1.future_sales.values.reshape((52, 1)) dtt3 = pd.DataFrame(index=d2.shop_id[d2.len_of_weeks > 52].values, columns=list(range(1, 53))) for qq in dtt.index.values: dtt3.loc[:, qq] = dtt2.loc[qq, :] X5 = d_f2[['shop_id', 'start_date']] act_date = pd.DataFrame({'shop_id': X5.shop_id.drop_duplicates().values, 'last_date': np.nan}) act_dates = act_date[['shop_id', 'last_date']] lastdate = [] for ji in X5.shop_id.drop_duplicates().values: l_date = X5.start_date[X5.shop_id == ji].iloc[-1] lastdate.append(l_date) # lastdate # act_dates['last_date'] = lastdate tab = dtt1.append(dtt3) tab.insert(0, 'last_date', lastdate) tab['shop_id'] = tab.index.values tab.sort_values('shop_id', inplace=True) tab_id = tab.shop_id tab = tab.drop('shop_id', axis=1) tab.insert(0, 'shop_id', tab_id) #writer = pd.ExcelWriter('output.xlsx') #tab.to_excel(writer, 'Sheet1') #writer.save() tab.to_json(path_or_buf='df.json', orient='records') memval2 =tab
def _tune(self, y, period, start_date, x=None, metric="smape", val_size=None, verbose=False): """ Tune hyperparameters of the model. :param y: pd.Series or 1-D np.array, time series to predict. :param period: Int or Str, the number of observations per cycle: 1 or "annual" for yearly data, 4 or "quarterly" for quarterly data, 7 or "daily" for daily data, 12 or "monthly" for monthly data, 24 or "hourly" for hourly data, 52 or "weekly" for weekly data. First-letter abbreviations of strings work as well ("a", "q", "d", "m", "h" and "w", respectively). Additional reference: https://robjhyndman.com/hyndsight/seasonal-periods/. :param x: pd.DataFrame or 2-D np.array, exogeneous predictors, optional :param metric: Str, the metric used for model selection. One of: "mse", "mae", "mape", "smape", "rmse". :param val_size: Int, the number of most recent observations to use as validation set for tuning. :param verbose: Boolean, True for printing additional info while tuning. :return: None """ self.period = data_utils.period_to_int(period) if type( period) == str else period dates = data_utils.create_dates(start_date, period, length=len(y)) val_size = int(len(y) * .1) if val_size is None else val_size y_train, y_val = model_utils.train_val_split(y, val_size=val_size) dates_train, dates_val = model_utils.train_val_split(dates, val_size=val_size) input_df = pd.DataFrame({"ds": dates_train, "y": y_train}) future_df = pd.DataFrame({"ds": dates_val}) if x is not None: x_train, x_val = model_utils.train_val_split(x, val_size=val_size) for variable_id, x_variable in enumerate(x_train.T): input_df[variable_id] = x_variable for variable_id, x_variable in enumerate(x_val.T): future_df[variable_id] = x_variable metric_fun = get_metric(metric) params_grid = { "seasonality": ["additive", "multiplicative"], "growth": ["linear", "logistic"], "changepoint_prior_scale": [0.005, 0.05, 0.5], } params_keys, params_values = zip(*params_grid.items()) params_permutations = [ dict(zip(params_keys, v)) for v in itertools.product(*params_values) ] scores = [] for permutation in params_permutations: try: with warnings.catch_warnings(): warnings.simplefilter("ignore") model = Prophet( seasonality_mode=permutation["seasonality"], growth=permutation["growth"], changepoint_prior_scale=permutation[ "changepoint_prior_scale"], ) if x is not None: variable_ids = list( sorted( set(input_df.columns).difference( set(["ds", "y"])))) for variable_id in variable_ids: model.add_regressor(variable_id) with SuppressStdoutStderr(): model.fit(input_df) y_pred = model.predict(future_df)["yhat"].values score = metric_fun(y_val, y_pred) scores.append(score) except: scores.append(np.inf) best_params = params_permutations[np.nanargmin(scores)] self.params.update(best_params) self.params["tuned"] = True
plt.rcParams['font.family'] = 'Hiragino Sans' url = 'https://www.data.jma.go.jp/obd/stats/etrn/view/\ monthly_s3.php?prec_no=44&block_no=47662' #データの抽出 dfs = pd.read_html(url) df = dfs[0].dropna() '''時系列予測''' #学習データ data = pd.DataFrame() data['y'] = df['1月'] data['ds'] = df[['年']].apply(lambda x: '{}'.format(x[0]), axis=1)+ '-01-01' #モデル構築 model = Prophet(daily_seasonality=True, weekly_seasonality=True, yearly_seasonality=True) model.fit(data) #予測 future_data = model.make_future_dataframe(periods=100, freq='y') forecast_data = model.predict(future_data) #プロットして可視化 model.plot(forecast_data) model.plot_components(forecast_data) plt.show()
def create_prophet_m(app_name, z1, cpu_perc_list, delay=24): ### --- For realtime pred ---### full_df = z1.user_count.iloc[0:len(z1)] full_df = full_df.reset_index() full_df.columns = ['ds', 'y'] #-- Realtime prediction --## #model model_r = Prophet(yearly_seasonality=False, changepoint_prior_scale=.03, seasonality_prior_scale=0.2) model_r.fit(full_df) future_r = model_r.make_future_dataframe(periods=delay, freq='D') forecast_r = model_r.predict(future_r) forecast_r.index = forecast_r['ds'] #forecast pred_r = pd.DataFrame(forecast_r['yhat'][len(z1):(len(z1) + delay)]) pred_r = pred_r.reset_index() #--- completes realtime pred ---# train_end_index = len(z1.user_count) - delay train_df = z1.user_count.iloc[0:train_end_index] test_df = z1.user_count.iloc[train_end_index:len(z1)] train_df = train_df.reset_index() test_df = test_df.reset_index() train_df.columns = ['ds', 'y'] #--- removing outliers in trainset ---# test_df.columns = ['ds', 'y'] test_df['ds'] = pd.to_datetime(test_df['ds']) #model model = Prophet(yearly_seasonality=False, changepoint_prior_scale=.03, seasonality_prior_scale=0.2) model.fit(train_df) cpu_perc_list.append(py.cpu_percent()) cpu_perc_list = [max(cpu_perc_list)] future = model.make_future_dataframe(periods=len(test_df), freq='D') forecast = model.predict(future) forecast.index = forecast['ds'] #forecast pred = pd.DataFrame(forecast['yhat'][train_end_index:len(z1)]) pred = pred.reset_index() pred_df = pd.merge(test_df, pred, on='ds', how='left') pred_df.dropna(inplace=True) df = pd.DataFrame() cpu_perc_list.append(py.cpu_percent()) cpu_perc_list = [max(cpu_perc_list)] if (len(pred_df) > 0): pred_df['error_test'] = pred_df.y - pred_df.yhat MSE = mse(pred_df.y, pred_df.yhat) RMSE = math.sqrt(MSE) pred_df['APE'] = abs(pred_df.error_test * 100 / pred_df.y) MAPE = pred_df.APE.mean() min_error_rate = pred_df['APE'].quantile(0) / 100 max_error_rate = pred_df['APE'].quantile(1) / 100 median_error_rate = pred_df['APE'].quantile(.50) / 100 print("App name:", app_name) #print("MSE :",MSE) print("RMSE :", RMSE) print("MAPE :", MAPE) mape_q98 = pred_df['APE'][ pred_df.APE < pred_df['APE'].quantile(0.98)].mean() std_MAPE = math.sqrt(((pred_df.APE - MAPE)**2).mean()) df = pd.DataFrame( { 'length': len(z1), 'test_rmse': RMSE, 'test_mape': MAPE, 'std_mape': std_MAPE, #standerd deviation of mape 'min_error_rate': min_error_rate, 'max_error_rate': max_error_rate, 'median_error_rate': median_error_rate, 'test_mape_98': mape_q98 }, index=[app_name]) return (df, model, forecast, pred_df, pred_r)
two_years_data = length - (6 * 24 * 30 * 24) df2 = gridwatch_df[two_years_data:] df2.reset_index(inplace=True) df2.tail() # In[11]: # fit the data (only 1/7th of it) # from lloking at data, we can set high cap to 55,000 and low to 15,000 # this gives us a workable range df2['cap'] = 55000 df2['floor'] = 15000 df2_prophet = Prophet(changepoint_prior_scale=0.10, growth='logistic') df2_prophet.add_country_holidays(country_name='UK') df2_prophet.fit(df2) # In[12]: # future predicted values dataset, provides hourly predictions ahead of time # periods will be a variable future = df2_prophet.make_future_dataframe(periods=24, freq='H') future['cap'] = 55000 future['floor'] = 15000 future.tail() # In[13]: # forecast uncertainty for future predicted points forecast = df2_prophet.predict(future) forecast[['ds', 'yhat', 'yhat_lower', 'yhat_upper']].head()
def dept_model(): print('entering dept_agg') global df_dept, df_hobbies_1, df_hobbies_2, df_household_1, df_household_2, df_foods_1, df_foods_2, df_foods_3, df_foods_4 #df_dept=df_final.groupby(['dept_id','store_id','Date','dayofyear','weekofyear','monthstart','quarterstart','yearstart'])['Sales'].sum() #df_dept=df_dept.reset_index() df_dept = pd.read_csv('df_dept_pred') df_dept = df_dept.rename(columns={'Sales': 'Dept Sales'}) df_dept_pred = df_dept[['dept_id', 'store_id', 'Date', 'Dept Sales']] df_dept_pred.columns = ['dept_id', 'store_id', 'ds', 'y'] stores = { 'CA_1': 1, 'CA_2': 2, 'CA_3': 3, 'CA_4': 4, 'TX_1': 5, 'TX_2': 6, 'TX_3': 7, 'WI_1': 8, 'WI_2': 9, 'WI_3': 10 } dept = { 'FOODS_1': 1, 'FOODS_2': 2, 'FOODS_3': 3, 'HOUSEHOLD_1': 4, 'HOUSEHOLD_2': 5, 'HOBBIES_1': 6, 'HOBBIES_2': 7 } df_dept_pred['store_id'] = df_dept_pred['store_id'].map(stores) df_dept_pred['dept_id'] = df_dept_pred['dept_id'].map(dept) fb = Prophet(interval_width=0.95, daily_seasonality=True, weekly_seasonality=True, yearly_seasonality=True) fb.add_country_holidays(country_name='US') fb.add_regressor('store_id') fb.add_regressor('dept_id') fb.fit(df_dept_pred) future = fb.make_future_dataframe(freq='D', periods=28, include_history=False) s = pd.Series([1, 2, 3, 4, 5, 6, 7, 8, 9, 10]) d = pd.Series([1, 2, 3, 4, 5, 6, 7]) stores1 = pd.DataFrame({'store_id': s.repeat(28)}).reset_index() stores1 = pd.concat([stores1] * 7).reset_index().drop('index', axis=1) dept1 = pd.DataFrame({'dept_id': d.repeat(280)}).reset_index() final_df = pd.concat([future] * 70).reset_index().drop('index', axis=1) final_df['store_id'] = stores1['store_id'] final_df['dept_id'] = dept1['dept_id'] predict = fb.predict(final_df) y_pred_df = predict[['yhat']] final = pd.concat([final_df, y_pred_df], axis=1) stores2 = { 1: 'CA_1', 2: 'CA_2', 3: 'CA_3', 4: 'CA_4', 5: 'TX_1', 6: 'TX_2', 7: 'TX_3', 8: 'WI_1', 9: 'WI_2', 10: 'WI_3' } dept2 = { 1: 'FOODS_1', 2: 'FOODS_2', 3: 'FOODS_3', 4: 'HOUSEHOLD_1', 5: 'HOUSEHOLD_2', 6: 'HOBBIES_1', 7: 'HOBBIES_2' } final['store_id'] = final['store_id'].map(stores2) final['dept_id'] = final['dept_id'].map(dept2) final = final.rename(columns={'ds': 'Date', 'yhat': 'Forecasted Sales'}) final = final[['dept_id', 'store_id', 'Date', 'Forecasted Sales']] print(final)
''' print('x_train.shape = ',x_train.shape) print('y_train.shape = ', y_train.shape) print('x_valid.shape = ',x_valid.shape) print('y_valid.shape = ', y_valid.shape) print('x_test.shape = ', x_test.shape) print('y_test.shape = ',y_test.shape) ''' #There is 51 number of different stocks company, those below are some of these company. #['AAPL', 'CLX', 'ETR', 'MCK', 'WMT', 'HCN', 'CTSH', 'NVDA', 'AIV', 'EFX'] # You can specify below any name you want to predict stock = 'AAPL' price = 'close' # you can also specify any price you want between those : # ['open', 'high', 'low', 'close'] df_prophet = df[df['symbol'] == stock] df_prophet=df_prophet[['date', price]] df_prophet=df_prophet.sort_values('date') df_prophet=df_prophet.rename(columns={'date':'ds',price:'y'}) m=Prophet() m.fit(df_prophet) future=m.make_future_dataframe(periods=365) forecast=m.predict(future) figure=m.plot(forecast,xlabel='Date',ylabel='{}_Price'.format(price)) plt.show()
def create_prophet_m(app_name,z1,cpu_perc_list,delay=24): ### --- For realtime pred ---### full_df = z1.bw.iloc[0:len(z1)] full_df = full_df.reset_index() full_df.columns = ['ds','y'] #removing outliers q50 = full_df.y.median() q100 = full_df.y.quantile(1) q75 = full_df.y.quantile(.75) if((q100-q50) >= (2*q50)): full_df.loc[full_df.y>=(2*q50),'y'] = None #-- Realtime prediction --## #model model_r = Prophet(yearly_seasonality=False,changepoint_prior_scale=.1,seasonality_prior_scale=0.05) model_r.fit(full_df) cpu_perc_list.append(py.cpu_percent()) cpu_perc_list = [max(cpu_perc_list)] future_r = model_r.make_future_dataframe(periods=delay,freq='D') forecast_r = model_r.predict(future_r) forecast_r.index = forecast_r['ds'] #forecast pred_r = pd.DataFrame(forecast_r['yhat'][len(z1):(len(z1)+delay)]) pred_r=pred_r.reset_index() #--- completes realtime pred ---# train_end_index=len(z1.bw)-delay train_df=z1.bw.iloc[0:train_end_index] test_df=z1.bw.iloc[train_end_index:len(z1)] train_df=train_df.reset_index() test_df=test_df.reset_index() train_df.columns=['ds','y'] #--- removing outliers in trainset ---# q50 = train_df.y.median() q100 = train_df.y.quantile(1) q75 = train_df.y.quantile(.75) if((q100-q50) >= (2*q50)): train_df.loc[train_df.y>=(2*q50),'y'] = None test_df.columns=['ds','y'] test_df['ds'] = pd.to_datetime(test_df['ds']) #model model = Prophet(yearly_seasonality=False,changepoint_prior_scale=.1,seasonality_prior_scale=0.05) model.fit(train_df) cpu_perc_list.append(py.cpu_percent()) cpu_perc_list = [max(cpu_perc_list)] future = model.make_future_dataframe(periods=len(test_df),freq='D') forecast = model.predict(future) forecast.index = forecast['ds'] #forecast pred = pd.DataFrame(forecast['yhat'][train_end_index:len(z1)]) print('length forecasted non realtime=',len(pred)) pred=pred.reset_index() pred_df=pd.merge(test_df,pred,on='ds',how='left') pred_df.dropna(inplace=True) df=pd.DataFrame() if(len(pred_df)>0): pred_df['error_test']=pred_df.y-pred_df.yhat MSE=mse(pred_df.y,pred_df.yhat) RMSE=math.sqrt(MSE) pred_df['APE']=abs(pred_df.error_test*100/pred_df.y) MAPE=pred_df.APE.mean() min_error_rate = pred_df['APE'].quantile(0)/100 max_error_rate = pred_df['APE'].quantile(1)/100 median_error_rate = pred_df['APE'].quantile(.50)/100 print("App name:",app_name) #print("MSE :",MSE) print("RMSE :",RMSE) print("MAPE :",MAPE) mape_q98=pred_df['APE'][pred_df.APE<pred_df['APE'].quantile(0.98)].mean() std_MAPE = math.sqrt(((pred_df.APE-MAPE)**2).mean()) df = pd.DataFrame({'length':len(z1), 'test_rmse':RMSE, 'test_mape':MAPE, 'std_mape':std_MAPE, #standerd deviation of mape 'min_error_rate':min_error_rate , 'max_error_rate':max_error_rate , 'median_error_rate':median_error_rate, 'test_mape_98':mape_q98}, index=[app_name]) return(df,model,forecast,pred_df,pred_r)
def predict(self): time_drilldown = params.get("drilldowns", "Year") seasonality_mode = params.get("seasonality_mode", "multiplicative") changepoint_prior_scale = params.get("changepoint_prior_scale", "0.05") try: changepoint_prior_scale = float(changepoint_prior_scale) except ValueError: changepoint_prior_scale = 0.05 changepoint_range = params.get("changepoint_range", "0.80") try: changepoint_range = float(changepoint_range) except ValueError: changepoint_range = 0.80 if time_drilldown == "Year": date_index = pd.to_datetime( [f'{year}-01-31' for year in self.raw_df[time_drilldown]]) elif time_drilldown == "Time": date_index = pd.to_datetime([ f'{str(date)[:4]}-{str(date)[4:]}-01' for date in self.raw_df[time_drilldown] ]) else: date_index = pd.to_datetime( [t for t in self.raw_df[time_drilldown]]) self.raw_df["ds"] = date_index self.raw_df["y_orig"] = self.raw_df[self.params["measures"]].astype( float) self.raw_df["y"] = self.raw_df["y_orig"].round(2) if DEBUG: print("\nRaw DataFrame (head):\n________________\n") print(self.raw_df.head()) model = Prophet(seasonality_mode=seasonality_mode, changepoint_prior_scale=changepoint_prior_scale, changepoint_range=changepoint_range) with suppress_stdout_stderr(): model.fit(self.raw_df) if time_drilldown == "Year": future = model.make_future_dataframe(periods=10, freq='A-JAN', include_history=True) else: future = model.make_future_dataframe(periods=10 * 5, freq='m', include_history=True) if DEBUG: print("\nFuture DataFrame (unpopulated tail):\n________________\n") print(future.tail()) self.forecast_df = model.predict(future) # now merge forcast with original dataframes self.raw_df = self.raw_df.set_index('ds') self.forecast_df = self.forecast_df.set_index('ds') self.merged_df = self.raw_df.join(self.forecast_df[[ 'yhat', 'yhat_lower', 'yhat_upper', 'trend', 'trend_lower', 'trend_upper' ]], how='outer') del self.merged_df['y'] if DEBUG: print("\nFuture DataFrame (unpopulated tail):\n________________\n") print(self.forecast_df.tail(10)) if DEBUG: print("\nFuture DataFrame (last row):\n________________\n") print(self.forecast_df.iloc[-1])
def test_fit_changepoint_not_in_history(self): train = DATA[(DATA['ds'] < '2013-01-01') | (DATA['ds'] > '2014-01-01')] future = pd.DataFrame({'ds': DATA['ds']}) forecaster = Prophet(changepoints=['2013-06-06']) forecaster.fit(train) forecaster.predict(future)
def test_added_regressors(self): m = Prophet() m.add_regressor('binary_feature', prior_scale=0.2) m.add_regressor('numeric_feature', prior_scale=0.5) m.add_regressor('binary_feature2', standardize=True) df = DATA.copy() df['binary_feature'] = [0] * 255 + [1] * 255 df['numeric_feature'] = list(range(510)) with self.assertRaises(ValueError): # Require all regressors in df m.fit(df) df['binary_feature2'] = [1] * 100 + [0] * 410 m.fit(df) # Check that standardizations are correctly set self.assertEqual( m.extra_regressors['binary_feature'], { 'prior_scale': 0.2, 'mu': 0, 'std': 1, 'standardize': 'auto' }, ) self.assertEqual(m.extra_regressors['numeric_feature']['prior_scale'], 0.5) self.assertEqual(m.extra_regressors['numeric_feature']['mu'], 254.5) self.assertAlmostEqual(m.extra_regressors['numeric_feature']['std'], 147.368585, places=5) self.assertEqual(m.extra_regressors['binary_feature2']['prior_scale'], 10.) self.assertAlmostEqual(m.extra_regressors['binary_feature2']['mu'], 0.1960784, places=5) self.assertAlmostEqual(m.extra_regressors['binary_feature2']['std'], 0.3974183, places=5) # Check that standardization is done correctly df2 = m.setup_dataframe(df.copy()) self.assertEqual(df2['binary_feature'][0], 0) self.assertAlmostEqual(df2['numeric_feature'][0], -1.726962, places=4) self.assertAlmostEqual(df2['binary_feature2'][0], 2.022859, places=4) # Check that feature matrix and prior scales are correctly constructed seasonal_features, prior_scales = m.make_all_seasonality_features(df2) self.assertIn('binary_feature', seasonal_features) self.assertIn('numeric_feature', seasonal_features) self.assertIn('binary_feature2', seasonal_features) self.assertEqual(seasonal_features.shape[1], 29) self.assertEqual(set(prior_scales[26:]), set([0.2, 0.5, 10.])) # Check that forecast components are reasonable future = pd.DataFrame({ 'ds': ['2014-06-01'], 'binary_feature': [0], 'numeric_feature': [10], }) with self.assertRaises(ValueError): m.predict(future) future['binary_feature2'] = 0 fcst = m.predict(future) self.assertEqual(fcst.shape[1], 31) self.assertEqual(fcst['binary_feature'][0], 0) self.assertAlmostEqual( fcst['extra_regressors'][0], fcst['numeric_feature'][0] + fcst['binary_feature2'][0], ) self.assertAlmostEqual( fcst['seasonalities'][0], fcst['yearly'][0] + fcst['weekly'][0], ) self.assertAlmostEqual( fcst['seasonal'][0], fcst['seasonalities'][0] + fcst['extra_regressors'][0], ) self.assertAlmostEqual( fcst['yhat'][0], fcst['trend'][0] + fcst['seasonal'][0], ) # Check fails if constant extra regressor df['constant_feature'] = 5 m = Prophet() m.add_regressor('constant_feature') with self.assertRaises(ValueError): m.fit(df.copy())
def make_tableau_dataset( inpath=os.path.join("data", "processed", "data_ready.csv"), outpath=os.path.join("data", "processed", "data_ready_tableau.csv"), ): """Creates a csv file to use in Tableau dashboard Keyword Arguments: inpath {string} -- Path to the last iteration of data (default: {os.path.join("data", "processed", "data_ready.csv")}) outpath {string} -- Path to output file (default: {os.path.join("data", "processed", "data_ready_tableau.csv")}) """ # remove total energy use and all weather information df = pd.read_csv(inpath, index_col=["Date_Time"], parse_dates=["Date_Time"]) df = df[[ "Sub_metering_1", "Sub_metering_2", "Sub_metering_3", "unmeasured" ]] columns = [ "Kitchen", "Laundry Room", "Heating and Air Conditioning", "Other" ] df.columns = columns last_day = df.index[-1] month_end = last_day + MonthEnd(1) month_end = month_end.replace(hour=23) diff = month_end - last_day hours_to_predict = int(diff.total_seconds() / 3600) # add predictions for all submeters with a predictions up to full month index = pd.date_range(last_day + timedelta(hours=1), periods=hours_to_predict, freq="H") predictions_df = pd.DataFrame(index=index) predictions_df.index.name = "Date_Time" for column in columns: data_prophet = df[column].reset_index(level=0) data_prophet.columns = ["ds", "y"] m = Prophet() m.fit(data_prophet) future = m.make_future_dataframe(periods=hours_to_predict, freq="H") forecast = m.predict(future) predictions_df[column] = forecast.loc[ forecast.index[-hours_to_predict:], "yhat"].to_numpy() df["prediction"] = False predictions_df["prediction"] = True df = pd.concat([df, predictions_df]) # Add boolean column for current month and last month df["month"] = np.where( (df.index.year == last_day.year) & (df.index.month == last_day.month), "Current Month", "", ) df["month"] = np.where( ((df.index.year == last_day.year) & (df.index.month == last_day.month - 1) & (last_day.month != 1)) | ((df.index.year == last_day.year - 1) & (df.index.month == 12) & (last_day.month == 1)), "Last Month", df["month"], ) df.reset_index(level=0, inplace=True) df = df.melt( id_vars=["Date_Time", "prediction", "month"], var_name="measure", value_name="Value", ) df.to_csv(outpath, index=False)
dfall_pressure_1 = dfall_chongqing_day_pressure.rename(columns={ 'date': 'ds', 'pressure': 'y' }) #dfall['y'] = np.log(dfall['y']) dfall_pressure_1['y'] = (dfall_pressure_1['y'] - dfall_pressure_1['y'].min() ) / (dfall_pressure_1['y'].max() - dfall_pressure_1['y'].min()) dfall_pressure_1['ds'] = pd.to_datetime(dfall_pressure_1['ds']) dfall_pressure_1.set_index('ds') df_pressure = dfall_pressure_1 m_temperature = Prophet(daily_seasonality=False, weekly_seasonality=False, changepoint_prior_scale=0.01) m_temperature.fit(df_temperature) m_humidity = Prophet(daily_seasonality=False, weekly_seasonality=False, changepoint_prior_scale=0.01) m_humidity.fit(df_humidity) m_pressure = Prophet(daily_seasonality=False, weekly_seasonality=False, changepoint_prior_scale=0.01) m_pressure.fit(df_pressure) future_temperature = m_temperature.make_future_dataframe(periods=180) future_temperature.tail() future_humidity = m_humidity.make_future_dataframe(periods=180)
y = data1.values.reshape(-1, 1) from sklearn.neural_network import MLPRegressor model = MLPRegressor(hidden_layer_sizes=[32, 32, 10], max_iter=50000, alpha=0.0005, random_state=26) _=model.fit(x, y.ravel()) test = np.arange(len(data1)+7).reshape(-1, 1) pred = model.predict(test) prediction = pred.round().astype(int) prediction = pd.DataFrame(prediction) prediction.plot() m=Prophet() m.fit(data) future=m.make_future_dataframe(periods=30) forecast_cm=m.predict(future) forecast_cm cnfrm = forecast_cm.loc[:,['ds','trend']] cnfrm = cnfrm[cnfrm['trend']>0] cnfrm.head() cnfrm=cnfrm.tail(30) cnfrm.columns = ['Date','Confirm'] cnfrm.head() fig_cm = plot_plotly(m, forecast_cm) py.iplot(fig_cm) fig_cm = m.plot(forecast_cm,xlabel='Date',ylabel='Confirmed Count')
class BuildProphet(BuildBase): """Class to build a Prophet Model """ def __init__(self, forecast_period, time_interval, seasonal_period, scoring, verbose, conf_int, holidays, growth, seasonality, **kwargs): """ Automatically build a Prophet Model """ super().__init__(scoring=scoring, forecast_period=forecast_period, verbose=verbose) self.time_interval = time_interval self.seasonal_period = seasonal_period self.conf_int = conf_int self.holidays = holidays self.growth = growth self.seasonality = seasonality yearly_seasonality = False daily_seasonality = False weekly_seasonality = False if self.time_interval == 'weeks': weekly_seasonality = seasonality elif self.time_interval == 'years': yearly_seasonality = seasonality elif self.time_interval == 'days': daily_seasonality = seasonality #self.model = Prophet( # yearly_seasonality=yearly_seasonality, # weekly_seasonality=weekly_seasonality, # daily_seasonality=daily_seasonality, # interval_width=self.conf_int, # holidays = self.holidays, # growth = self.growth) self.model = Prophet(growth=self.growth) self.univariate = None self.list_of_valid_time_ints = [ 'B', 'C', 'D', 'W', 'M', 'SM', 'BM', 'CBM', 'MS', 'SMS', 'BMS', 'CBMS', 'Q', 'BQ', 'QS', 'BQS', 'A,Y', 'BA,BY', 'AS,YS', 'BAS,BYS', 'BH', 'H', 'T,min', 'S', 'L,ms', 'U,us', 'N' ] self.list_of_valid_time_ints.append(time_interval) if kwargs: for key, value in zip(kwargs.keys(), kwargs.values()): if key == 'seasonality_mode': self.seasonality = True key = value else: key = value def fit(self, ts_df: pd.DataFrame, target_col: str, cv: Optional[int], time_col: str): """ Fits the model to the data :param ts_df The time series data to be used for fitting the model :type ts_df pd.DataFrame :param target_col The column name of the target time series that needs to be modeled. All other columns will be considered as exogenous variables (if applicable to method) :type target_col str :param cv: Number of folds to use for cross validation. Number of observations in the Validation set for each fold = forecast period If None, a single fold is used :type cv Optional[int] :param time_col: Name of the time column in the dataset (needed by Prophet) Time column can also be the index, in which case, this would be the name of the index :type time_col str :rtype object """ # use all available threads/cores self.time_col = time_col self.original_target_col = target_col self.original_preds = [ x for x in list(ts_df) if x not in [self.original_target_col] ] if len(self.original_preds) == 0: self.univariate = True else: self.univariate = False # print(f"Prophet Is Univariate: {self.univariate}") ts_df = copy.deepcopy(ts_df) ##### if you are going to use matplotlib with prophet data, it gives an error unless you do this. pd.plotting.register_matplotlib_converters() #### You have to import Prophet if you are going to build a Prophet model ############# actual = 'y' timecol = 'ds' data = self.prep_col_names_for_prophet(ts_df=ts_df, test=False) if self.univariate: dft = data[[timecol, actual]] else: dft = data[[timecol, actual] + self.original_preds] ##### For most Financial time series data, 80 percent conf interval is enough... if self.verbose >= 1: print( ' Fit-Predict data (shape=%s) with Confidence Interval = %0.2f...' % (dft.shape, self.conf_int)) ### Make Sure you lower your desired interval width from the normal 95% to a more realistic 80% start_time = time.time() if self.univariate is False: for name in self.original_preds: self.model.add_regressor(name) print(" Starting Prophet Fit") if self.seasonality: prophet_seasonality, prophet_period, fourier_order, prior_scale = get_prophet_seasonality( self.time_interval, self.seasonal_period) self.model.add_seasonality(name=prophet_seasonality, period=prophet_period, fourier_order=fourier_order, prior_scale=prior_scale) print( ' Adding %s seasonality to Prophet with period=%d, fourier_order=%d and prior_scale=%0.2f' % (prophet_seasonality, prophet_period, fourier_order, prior_scale)) else: print( ' No seasonality assumed since seasonality flag is set to False' ) if type(dft) == dask.dataframe.core.DataFrame: num_obs = dft.shape[0].compute() else: num_obs = dft.shape[0] ### Creating a new way to skip cross validation when trying to run auto-ts multiple times. ### if cv == 0: cv_in = 0 else: cv_in = copy.deepcopy(cv) NFOLDS = self.get_num_folds_from_cv(cv) ######################################################################################### # NOTE: This change to the FB recommendation will cause the cv folds from facebook to # be incompatible with the folds from the other models (in terms of periods of evaluation # as well as number of observations in each period). Hence the final comparison will # be biased since it will not compare the same folds. # The original implementation was giving issues under certain conditions, hence this change # to FB recommendation has been made as a temporary (short term) fix. # The root cause issue will need to be fixed eventually at a later point. ######################################################################################### ### Prophet's Time Interval translates into frequency based on the following pandas date_range alias: # Link: https://pandas.pydata.org/pandas-docs/stable/user_guide/timeseries.html#timeseries-offset-aliases ## This is done using the get_prophet_time_interval() function later. if self.time_interval in self.list_of_valid_time_ints: time_int = copy.deepcopy(self.time_interval) else: time_int = self.get_prophet_time_interval(for_cv=False) # First Fold --> # Train Set: 0:initial # Test Set: initial:(initial+horizon) # Second Fold --> # Train Set: (period):(initial+period) # Test Set: (initial+period):(initial+horizon+ period) # Format: '850 D' print(" Starting Prophet Cross Validation") ################################################################################ if self.forecast_period <= 5: #### Set a minimum of 5 for the number of rows in test! self.forecast_period = 5 ### In case the number of forecast_period is too high, just reduce it so it can fit into num_obs if NFOLDS * self.forecast_period > num_obs: self.forecast_period = int(num_obs / (NFOLDS + 1)) print('Lowering forecast period to %d to enable cross_validation' % self.forecast_period) ########################################################################################### #cv = GapWalkForward(n_splits=NFOLDS, gap_size=0, test_size=self.forecast_period) max_trainsize = len(dft) - self.forecast_period try: cv = TimeSeriesSplit(n_splits=NFOLDS, test_size=self.forecast_period ) ### this works only sklearn v 0.0.24] except: cv = TimeSeriesSplit(n_splits=NFOLDS, max_train_size=max_trainsize) y_preds = pd.DataFrame() print('Max. iterations using expanding window cross validation = %d' % NFOLDS) start_time = time.time() rmse_folds = [] norm_rmse_folds = [] forecast_df_folds = [] concatenated = pd.DataFrame() extra_concatenated = pd.DataFrame() if type(dft) == dask.dataframe.core.DataFrame: dft = dft.head( len(dft)) ### this converts dask into a pandas dataframe if cv_in == 0: print( 'Skipping cross validation steps since cross_validation = %s' % cv_in) model = Prophet(growth="linear") kwargs = { 'iter': 1e2 } ## this limits iterations and hence speeds up prophet else: for fold_number, (train_index, test_index) in enumerate(cv.split(dft)): dftx = dft.head(len(train_index) + len(test_index)) train_fold = dftx.head( len(train_index )) ## now train will be the first segment of dftx test_fold = dftx.tail( len(test_index )) ### now test will be right after train in dftx horizon = len(test_fold) print( f"\nFold Number: {fold_number+1} --> Train Shape: {train_fold.shape[0]} Test Shape: {test_fold.shape[0]}" ) ######################################### #### Define the model with fold data #### ######################################### model = Prophet(growth="linear") ############################################ #### Fit the model with train_fold data #### ############################################ kwargs = { 'iter': 1e2 } ## this limits iterations and hence speeds up prophet model.fit(train_fold, **kwargs) ################################################# #### Predict using model with test_fold data #### ################################################# future_period = model.make_future_dataframe(freq=time_int, periods=horizon) forecast_df = model.predict(future_period) ### Now compare the actuals with predictions ###### y_pred = forecast_df['yhat'][-horizon:] concatenated = pd.DataFrame(np.c_[test_fold[actual].values, y_pred.values], columns=['original', 'predicted'], index=test_fold.index) if fold_number == 0: extra_concatenated = copy.deepcopy(concatenated) else: extra_concatenated = extra_concatenated.append( concatenated) rmse_fold, rmse_norm = print_dynamic_rmse( concatenated['original'].values, concatenated['predicted'].values, concatenated['original'].values) print('Cross Validation window: %d completed' % (fold_number + 1, )) rmse_folds.append(rmse_fold) norm_rmse_folds.append(rmse_norm) ###################################################### ### This is where you consolidate the CV results ##### ###################################################### fig = model.plot(forecast_df) #rmse_mean = np.mean(rmse_folds) #print('Average CV RMSE over %d windows (macro) = %0.5f' %(fold_number+1,rmse_mean)) #cv_micro = np.sqrt(mean_squared_error(y_trues.values, y_preds.values)) #print('Average CV RMSE of all predictions (micro) = %0.5f' %cv_micro) try: print_ts_model_stats(extra_concatenated['original'], extra_concatenated['predicted'], "Prophet") except: print('Error: Not able to plot Prophet CV results') forecast_df_folds = extra_concatenated['predicted'].values #print(" End of Prophet Cross Validation") print('Time Taken = %0.0f seconds' % ((time.time() - start_time))) #### Now you need to fit Prophet on the whole train data set ########## dftx = dft.head(len(dft)) model = Prophet(growth="linear") self.model = model self.model.fit(dftx, **kwargs) print(" End of Prophet Fit") #num_obs_folds = df_cv.groupby('cutoff')['ds'].count() # https://stackoverflow.com/questions/54405704/check-if-all-values-in-dataframe-column-are-the-same #a = num_obs_folds.to_numpy() #all_equal = (a[0] == a).all() #if not all_equal: #print("WARNING: All folds did not have the same number of observations in the validation sets.") #print("Num Test Obs Per fold") #print(num_obs_folds) #rmse_folds = [] #norm_rmse_folds = [] #forecast_df_folds = [] #df_cv_grouped = df_cv.groupby('cutoff') #for (_, loop_df) in df_cv_grouped: # rmse, norm_rmse = print_dynamic_rmse(loop_df['y'], loop_df['yhat'], dft['y']) # rmse_folds.append(rmse) # norm_rmse_folds.append(norm_rmse) # forecast_df_folds.append(loop_df) # print(f"RMSE Folds: {rmse_folds}") # print(f"Norm RMSE Folds: {norm_rmse_folds}") # print(f"Forecast DF folds: {forecast_df_folds}") # forecast = self.predict(simple=False, return_train_preds=True) # #### We are going to plot Prophet's forecasts differently since it is better # dfa = plot_prophet(dft, forecast); # # Prophet makes Incredible Predictions Charts! # ### There can't be anything simpler than this to make Forecasts! # #self.model.plot(forecast); # make sure to add semi-colon in the end to avoid plotting twice # # Also their Trend, Seasonality Charts are Spot On! # try: # self.model.plot_components(forecast) # except: # print('Error in FB Prophet components forecast. Continuing...') #rmse, norm_rmse = print_dynamic_rmse(dfa['y'], dfa['yhat'], dfa['y']) #return self.model, forecast, rmse, norm_rmse return self.model, forecast_df_folds, rmse_folds, norm_rmse_folds def refit(self, ts_df: pd.DataFrame) -> object: """ Refits an already trained model using a new dataset Useful when fitting to the full data after testing with cross validation :param ts_df The time series data to be used for fitting the model :type ts_df pd.DataFrame :rtype object """ def predict(self, testdata: Optional[pd.DataFrame] = None, forecast_period: Optional[int] = None, simple: bool = False, return_train_preds: bool = False) -> Optional[NDFrame]: """ Return the predictions :param testdata The test dataframe containing the exogenous variables to be used for prediction. :type testdata Optional[pd.DataFrame] :param forecast_period The number of periods to make a prediction for. :type forecast_period Optional[int] :param simple If True, this method just returns the predictions. If False, it will return the standard error, lower and upper confidence interval (if available) :type simple bool :param return_train_preds If True, this method just returns the train predictions along with test predictions. If False, it will return only test predictions :type return_train_preds bool :rtype NDFrame """ """ Return the predictions # TODO: What about future exogenous variables? # https://towardsdatascience.com/forecast-model-tuning-with-additional-regressors-in-prophet-ffcbf1777dda """ # if testdata is not None: # warnings.warn( # "Multivariate models are not supported by the AutoML prophet module." + # "Univariate predictions will be returned for now." # ) # Prophet is a Little Complicated - You need 2 steps to Forecast ## 1. You need to create a dataframe to hold the predictions which specifies datetime ## periods that you want to predict. It automatically creates one with both past ## and future dates. ## 2. You need to ask Prophet to make predictions for the past and future dates in ## that dataframe above. ## So if you had 2905 rows of data, and ask Prophet to predict for 365 periods, ## it will give you predictions of the past (2905) and an additional 365 rows ## of future (total: 3270) rows of data. ### This is where we take the first steps to make a forecast using Prophet: ## 1. Create a dataframe with datetime index of past and future dates # Next we ask Prophet to make predictions for those dates in the dataframe along with prediction intervals if self.time_interval in self.list_of_valid_time_ints: time_int = copy.deepcopy(self.time_interval) else: time_int = self.get_prophet_time_interval(for_cv=False) if self.univariate: if isinstance(testdata, int): forecast_period = testdata elif isinstance(testdata, pd.DataFrame): forecast_period = testdata.shape[0] if testdata.shape[0] != self.forecast_period: self.forecast_period = testdata.shape[0] else: forecast_period = self.forecast_period self.forecast_period = forecast_period future = self.model.make_future_dataframe( periods=self.forecast_period, freq=time_int) else: if isinstance(testdata, int) or testdata is None: print( "(Error): Model is Multivariate, hence test dataframe must be provided for prediction." ) return None elif isinstance(testdata, pd.DataFrame): forecast_period = testdata.shape[0] if testdata.shape[0] != self.forecast_period: self.forecast_period = testdata.shape[0] future = self.prep_col_names_for_prophet(ts_df=testdata, test=True) print('Building Forecast dataframe. Forecast Period = %d' % self.forecast_period) ### This will work in both univariate and multi-variate cases now ###### forecast = self.model.predict(future) # Return values for the forecast period only if simple: if return_train_preds: forecast = forecast['yhat'] else: if forecast_period is None: forecast = forecast['yhat'] else: forecast = forecast.iloc[-forecast_period:]['yhat'] else: if return_train_preds: forecast = forecast else: if forecast_period is None: forecast = forecast['yhat'] else: forecast = forecast.iloc[-forecast_period:] return forecast # TODO: Update: This method will not be used in CV since it is in D always. # Hence Remove the 'for_cv' argument def get_prophet_time_interval(self, for_cv: bool = False) -> str: """ Returns the time interval in Prophet compatible format :param for_cv If False, this will return the format needed to make future dataframe (for univariate analysis) If True, this will return the format needed to be passed to the cross-validation object """ if self.time_interval in ['months', 'month', 'm']: time_int = 'M' elif self.time_interval in ['days', 'daily', 'd']: time_int = 'D' elif self.time_interval in ['weeks', 'weekly', 'w']: time_int = 'W' # TODO: Add time_int for other options if they are different for CV and for future forecasts elif self.time_interval in ['qtr', 'quarter', 'q']: time_int = 'Q' elif self.time_interval in ['years', 'year', 'annual', 'y', 'a']: time_int = 'Y' elif self.time_interval in ['hours', 'hourly', 'h']: time_int = 'H' elif self.time_interval in ['minutes', 'minute', 'min', 'n']: time_int = 'M' elif self.time_interval in ['seconds', 'second', 'sec', 's']: time_int = 'S' else: time_int = 'W' return time_int def prep_col_names_for_prophet(self, ts_df: pd.DataFrame, test: bool = False) -> pd.DataFrame: """ Renames the columns of the input dataframe to the right format needed by Prophet Target is renamed to 'y' and the time column is renamed to 'ds' # TODO: Complete docstring """ if self.time_col not in ts_df.columns: #### This happens when time_col is not found but it's actually the index. In that case, reset index data = ts_df.reset_index() else: data = ts_df.copy(deep=True) if self.time_col not in data.columns: print( "(Error): You have not provided the time_column values. This will result in an error" ) if test is False: data = data.rename(columns={ self.time_col: 'ds', self.original_target_col: 'y' }) else: data = data.rename(columns={self.time_col: 'ds'}) return data
byte_stream = BytesIO() blobservice.get_blob_to_stream(container_name='htflaskcontainer', blob_name='asgdu.xlsx', stream=byte_stream) byte_stream.seek(0) ser=pd.read_excel(byte_stream,index_col=0) byte_stream.close() #ser.head() #ser = pd.read_excel('Copy of DB-O.xlsx',sheet_name='ASGDB1-O',index_col=0) for i in range(0,4): ser.iloc[:21,i]=ser.iloc[:21,i].apply(lambda x : x*1000) print(i) ########################FBPROPHET#################### revdf = ser revdf['ds']= revdf.index revdf=revdf.rename(columns={"Total Sum of Revenue":'y'}) my_model = Prophet(interval_width=0.95,changepoint_prior_scale=4) my_model.fit(revdf[['ds','y']]) future_dates = my_model.make_future_dataframe(periods=6, freq='MS') forecast = my_model.predict(future_dates) forecast[['ds', 'yhat','yhat_lower', 'yhat_upper']] from sklearn.metrics import mean_squared_error rms = np.sqrt(mean_squared_error(revdf['y'],forecast['yhat'][:len(revdf['y'])])) #print(rms) adrf = forecast['yhat'].tail(6) my_model.plot(forecast,uncertainty=True) ########################### predicting FTE ################################## A=ser['Total Sum of BFTE'] #from plotly.plotly import plot_mpl from statsmodels.tsa.seasonal import seasonal_decompose result = seasonal_decompose(A, model='additive',freq=12) fig = result.plot() from statsmodels.tsa.stattools import adfuller
# rename columns header furniture.columns =["Order Date", "Sales"] # print(furniture) # plot data - descriptive furniture["Sales"].plot( figsize=(15, 6)) plt.show() # furniture.plot() furniture.to_csv(r'C:\Users\Gebruiker\Desktop\Data\Output.csv', sep=',', index=False) # Start Prophet section # rename header furniture = furniture.rename(columns={'Order Date': 'ds', 'Sales': 'y'}) furniture_model = Prophet(interval_width=0.95) furniture_model.fit(furniture) furniture_forecast = furniture_model.make_future_dataframe(periods=12, freq='M') furniture_forecast = furniture_model.predict(furniture_forecast) plt.figure(figsize=(18, 6)) furniture_model.plot(furniture_forecast, xlabel = 'Date', ylabel = 'Sales') plt.title('Furniture Sales'); furniture_forecast_1 = furniture_forecast.loc[furniture_forecast['ds'] == "2018/1/31"] print(furniture_forecast_1) furniture_forecast.to_csv(r'C:\Users\Gebruiker\Desktop\Data\Output_f.csv', sep=',', index=False)
def train_test_prophet(self): ''' 1. Training the model on the Train set, and predicting on both the Train and Test sets 2. Setting growth = 'linear'. To use Logistic growth (appropriate parameter for this problem), it requires domain inputs such as cap and floor of the Application Available, which should provide better results for this problem 3. Will be returning the forecasts on train & test, model, and Dates of future weeks for Predicton 4. dev_node is the whole dataframe for a specific device_node pair and it has 2 columns ['ds','y'], where ds=dates, y= application_available ''' pred_prophet = {} for device in self.original_data: pred_prophet[device] = {} for node in self.original_data[device]: app_data_train = list( self.original_data[device][node]['train'].memory) app_data_test = list( self.original_data[device][node]['test'].memory) date_data_train = list( self.original_data[device][node]['train'].date) date_data_test = list( self.original_data[device][node]['test'].date) total_data_test = list( self.original_data[device][node]['test'].total) pred_prophet[device][node] = {} pred_prophet[device][node]['expected'] = [] pred_prophet[device][node]['lower'] = [] pred_prophet[device][node]['upper'] = [] pred_prophet[device][node]['error'] = [] pred_prophet[device][node]['history'] = [ x for x in app_data_train ] pred_prophet[device][node]['history_date'] = [ x for x in date_data_train ] pred_prophet[device][node]['forecast'] = [ x for x in app_data_train ] pred_prophet[device][node]['total'] = [ x for x in list(self.original_data[device][node] ['train'].total) ] pred_prophet = pd.DataFrame({ 'ds': date_data_train, 'y': app_data_train }) prediction = () train = pd.DataFrame({ 'ds': date_data_train, 'y': app_data_train }) test = pd.DataFrame({'ds': date_data_test, 'y': app_data_test}) if len(app_data_train) > 2: model = Prophet(growth='linear', changepoints=None, n_changepoints=25, changepoint_range=0.8, yearly_seasonality=False, weekly_seasonality=True, daily_seasonality=False, holidays=None, seasonality_mode='multiplicative', seasonality_prior_scale=0.1, holidays_prior_scale=0.1, changepoint_prior_scale=0.05, mcmc_samples=0, interval_width=0.95, uncertainty_samples=10) trained_model = model.fit(train) forecast = trained_model.predict(test) print("forecasting", forecast) future_weeks = model.make_future_dataframe( periods=100, freq='W', include_history=False) future_weeks = future_weeks.loc[ future_weeks.ds > test.ds.max()] future_weeks = future_weeks.head(54).reset_index().drop( ['index'], axis=1)
def stock(): menu = { 'ho': 0, 'da': 0, 'ml': 10, 'se': 0, 'co': 0, 'cg': 0, 'cr': 0, 'wc': 0, 'cf': 0, 'ac': 0, 're': 1, 'cu': 0 } if request.method == 'GET': return render_template('regression/stock.html', menu=menu, weather=get_weather(), kospi=kospi_dict, kosdaq=kosdaq_dict, nyse=nyse_dict, nasdaq=nasdaq_dict) else: market = request.form['market'] if market == 'KS': code = request.form['kospi_code'] company = kospi_dict[code] code += '.KS' elif market == 'KQ': code = request.form['kosdaq_code'] company = kosdaq_dict[code] code += '.KQ' elif market == 'NY': code = request.form['nyse_code'] company = nyse_dict[code] else: code = request.form['nasdaq_code'] company = nasdaq_dict[code] learn_period = int(request.form['learn']) pred_period = int(request.form['pred']) current_app.logger.debug( f'{market}, {code}, {learn_period}, {pred_period}') today = datetime.now() start_learn = today - timedelta(days=learn_period * 365) end_learn = today - timedelta(days=1) stock_data = pdr.DataReader(code, data_source='yahoo', start=start_learn, end=end_learn) current_app.logger.info(f"get stock data: {company}({code})") df = pd.DataFrame({'ds': stock_data.index, 'y': stock_data.Close}) df.reset_index(inplace=True) try: del df['Date'] except: current_app.logger.error('Date error') model = Prophet(daily_seasonality=True) model.fit(df) future = model.make_future_dataframe(periods=pred_period) forecast = model.predict(future) fig = model.plot(forecast) img_file = os.path.join(current_app.root_path, 'static/img/stock.png') fig.savefig(img_file) mtime = int(os.stat(img_file).st_mtime) return render_template('regression/stock_res.html', menu=menu, weather=get_weather_main(), mtime=mtime, company=company, code=code)
def ts_outliers(y_df, t_col, y_col, coef=3.0, verbose=False, replace=False, ignore_dates=None, lbl_dict=None, r_val=1.0): # set outliers to NaN """ Find outliers in y_col which is a time series using IQR method or median filter. Assumes y_col >= 0 :param df: DF with y_col (data) and t_col :param t_col: time column name. :param y_col: data column :param coef: IQR coefficient :param verbose: verbose :param lbl_dict: into dict (context) :param r_val: r_val = 1 replaces by the yhat_upr/yhat_lwr value, r_val=0 replaces by yhat. In between, a weighted avg :param replace: if True replace the outlier value(s) by the Prophet in-sample forecast. If false, set outlier to nan :param ignore_dates: do not replace outliers for dates in this list :return: DF with either nan in outliers or fit outliers """ if len(y_df) <= 10: su.my_print(str(os.getpid()) + ' WARNING: not enough points for outlier detection: ' + str(len(y_df))) return y_df, np.nan, None # look for outliers _y_df = y_df.copy() _y_df.rename(columns={t_col: 'ds', y_col: 'y'}, inplace=True) _y_df.reset_index(inplace=True, drop=True) try: if verbose: m = Prophet(changepoint_range=0.9) m.fit(_y_df[['ds', 'y']]) else: with su.suppress_stdout_stderr(): m = Prophet(changepoint_range=0.9) m.fit(_y_df[['ds', 'y']]) except ValueError: su.my_print(str(os.getpid()) + ' ERROR: prophet err: returning original DF. Data len: ' + str(len(_y_df)) + ' Saving to ' + '~/my_tmp/_prophet_df.par') _y_df.rename(columns={'ds': t_col, 'y': y_col}, inplace=True) save_df(_y_df, '~/my_tmp/_y_df') return None, np.nan, None future = m.make_future_dataframe(periods=0) forecast = m.predict(future) y_vals = _y_df['y'].copy() # they will be filtered later _y_df['yhat'] = forecast['yhat'] _y_df['resi'] = _y_df['y'] - _y_df['yhat'] # use iqr or median filter # using Prophet's interval_width does not work as it is a quantile, # and about the same number of outliers is always found on avg ~ len * (1 - interval_width) upr, lwr = iqr_filter(_y_df['resi'], coef=coef, q_lwr=0.25, q_upr=0.75) # iqr # upr, lwr = median_filter(_y_df['resi'], coef=coef) # median filter _y_df['yhat_upr'] = forecast['yhat'] + upr _y_df['yhat_lwr'] = forecast['yhat'] + lwr _y_df.rename(columns={'ds': t_col, 'y': y_col}, inplace=True) # no outlier if yhat_lwr <= y <= yhat_upr _y_df['is_outlier'] = (y_vals > _y_df['yhat_upr']) | (y_vals < _y_df['yhat_lwr']) n_outliers = _y_df['is_outlier'].sum() err = np.round(100 * n_outliers / len(_y_df), 0) if ignore_dates is None: ignore_dates = list() off = None if n_outliers > 0: if verbose is True: save_df(_y_df, '~/my_tmp/outliers_DF_' + str(y_col) + '_' + str(lbl_dict)) # no outlier processing yet su.my_print(str(os.getpid()) + ' WARNING::column ' + y_col + ' has ' + str(len(_y_df)) + ' rows and ' + str(n_outliers) + ' outliers (' + str(err) + '%) for context ' + str(lbl_dict)) b_dates = ~_y_df[t_col].isin(ignore_dates) # boolean dates adjuster: when true, an outlier on that date can be adjusted b_adj = _y_df['is_outlier'] & b_dates # boolean outlier adjuster: if true it is an outlier we can adjust if replace is False: _y_df[y_col] = y_vals * (1 - b_adj) + np.nan * b_adj else: _y_df[y_col] = y_vals * (1 - b_adj) + \ (r_val * _y_df['yhat_upr'] + (1.0 - r_val) * _y_df['yhat']) * ((y_vals > _y_df['yhat_upr']) & b_dates) + \ (r_val * _y_df['yhat_lwr'] + (1.0 - r_val) * _y_df['yhat']) * ((y_vals < _y_df['yhat_lwr']) & b_dates) if verbose is True: # print outlier info: note that actuals are already filtered wheras the original value is in the outlier column off = _y_df[b_adj].copy() su.my_print('*************** outlier detail ************') print(off) _y_df.drop(['resi', 'yhat', 'yhat_upr', 'yhat_lwr', 'is_outlier'], axis=1, inplace=True) return _y_df, err, off
holidays = pd.DataFrame({ 'holiday': 'superbowl', 'ds': pd.to_datetime(playoff_dates), 'lower_window': 0, 'upper_window': 1, }) train = data_backup.iloc[0:step] test = data_backup.iloc[step:step + n_test] train.columns = ["ds", "y"] test.columns = ["ds", "y"] test = test[["y"]].values m = Prophet(changepoint_range=1, interval_width=0.7, holidays=holidays) # m.add_regressor('regressor', mode='additive') m.fit(train) future = pd.date_range( datetime.datetime.strptime(train.iloc[-1, 0], '%Y-%m-%d') + datetime.timedelta(days=1), periods=len(test), freq='D') future = pd.DataFrame({'ds': future}) preds = m.predict(future) yhats = preds[['yhat']].values for i in range(len(yhats)): if yhats[i] < 0: yhats[i] = 0
import pandas as pd from fbprophet import Prophet df = pd.read_csv('data/shop.csv') df['ds'] = pd.to_datetime(df['ds'], unit='s') m = Prophet() m.fit(df) future = m.make_future_dataframe(periods=1440, freq='1min', include_history=False) future.to_csv('container/local_test/payload.csv', header=True) t_future = pd.read_csv('payload.csv') forecast = m.predict(t_future) print(forecast[['ds', 'yhat', 'yhat_lower', 'yhat_upper']].tail()) # fig1 = m.plot(forecast) # fig2 = m.plot_components(forecast)
if price_regressor == True: m.add_regressor('price') m.add_regressor('weekends') m.add_regressor('snap') m.add_regressor('cum7', standardize=False) # m.add_regressor('cum14') # m.add_regressor('cum28') # m.add_regressor('cum56') m.add_regressor('cum_max', standardize=False) m.add_regressor('cum_zero', standardize=False) m.add_seasonality(name='monthly', period=30.5, fourier_order=4) m.add_seasonality(name='quarterly', period=91, fourier_order=4) # new m.fit(time_series) future = m.make_future_dataframe(periods=28) if price_regressor == True: future['price'] = prices.iloc[i, start_date:].values future['snap'] = snap[i, (start_date) - 1:] future['weekends'] = weekends[start_date - 1:] time_series['cum7'] = cum7[i, (start_date - 1):] # time_series['cum14'] = cum14[i, (start_date-1):] # time_series['cum28'] = cum28[i, (start_date-1):] # time_series['cum56'] = cum56[i, (start_date-1):] time_series['cum_max'] = cum_max[i, (start_date - 1):] time_series['cum_zero'] = cum_zero[i, (start_date - 1):]
name="stock_open")) fig.add_trace( go.Scatter(x=data["Date"], y=data["Close"], name="stock_close")) fig.layout.update(title_text="Time Series Data", xaxis_rangeslider_visible=True) st.plotly_chart(fig) plot_raw_data() #forecasting with facebookprophet df_train = data[['Date', 'Close']] df_train = df_train.rename(columns={"Date": "ds", "Close": "y"}) m = Prophet() m.fit(df_train) future = m.make_future_dataframe(periods=period) forecast = m.predict(future) st.subheader("Forecast Data") st.write(forecast.tail()) st.write("Forecast Data") fig1 = plot_plotly(m, forecast) st.plotly_chart(fig1) st.write("Forecast Components") fig2 = m.plot_components(forecast) st.write(fig2)
def create_prophet_m(source_name,z1,delay): import math train_end_index=len(z1.bw)-delay train_df=z1.bw.iloc[0:train_end_index] full_df = z1.bw.iloc[0:len(z1)] test_df=z1.bw.iloc[train_end_index:len(z1)] train_df=train_df.reset_index() test_df=test_df.reset_index() train_df.columns=['ds','y'] #--- removing outliers in trainset ---# q50 = train_df.y.median() q100 = train_df.y.quantile(1) q75 = train_df.y.quantile(.75) print(max(train_df.y)) if((q100-q50) >= (2*q50)): print('ind') train_df.loc[train_df.y>=(2*q50),'y'] = None full_df = full_df.reset_index() full_df.columns = ['ds','y'] test_df.columns=['ds','y'] ##-- Realtime prediction --## #model model_r = Prophet(yearly_seasonality=False,changepoint_prior_scale=.2) model_r.fit(full_df) future_r = model_r.make_future_dataframe(periods=delay,freq='H') forecast_r = model_r.predict(future_r) forecast_r.index = forecast_r['ds'] #forecast pred_r = pd.DataFrame(forecast_r['yhat'][len(z1):(len(z1)+delay)]) pred_r=pred_r.reset_index() #model model = Prophet(yearly_seasonality=False,changepoint_prior_scale=.2) model.fit(train_df) future = model.make_future_dataframe(periods=len(test_df),freq='H') forecast = model.predict(future) forecast.index = forecast['ds'] #forecast pred = pd.DataFrame(forecast['yhat'][train_end_index:len(z1)]) pred=pred.reset_index() pred_df=pd.merge(test_df,pred,on='ds',how='left') pred_df.dropna(inplace=True) df=pd.DataFrame() if(len(pred_df)>0): pred_df['error_test']=pred_df.y-pred_df.yhat MSE=mse(pred_df.y,pred_df.yhat) RMSE=math.sqrt(MSE) pred_df['APE']=abs(pred_df.error_test*100/pred_df.y) MAPE=pred_df.APE.mean() min_error_rate = pred_df.quantile(0)/100 max_error_rate = pred_df.quantile(1)/100 median_error_rate = pred_df.quantile(.50)/100 std_MAPE = math.sqrt(((pred_df.APE-MAPE)**2).mean()) print("App name:",source_name) print("MSE :",MSE) print("RMSE :",RMSE) print("MAPE :",MAPE) q98=pred_df['APE'].quantile(0.98) mape_q98=pred_df['APE'][pred_df.APE<pred_df['APE'].quantile(0.98)].mean() df = pd.DataFrame({'length':len(z1),#'predicted_t':[forcast_lag], 'test_rmse':RMSE, 'test_mape':MAPE, 'std_mape':std_MAPE, #standerd deviation of mape 'min_error_rate':min_error_rate , 'max_error_rate':max_error_rate , 'median_error_rate':median_error_rate, 'test_mape_98':mape_q98}, index=[source_name]) return(df,model,forecast,pred_df,pred_r)
us_loc = pd.read_csv(file_source, parse_dates=['date'], index_col=['date']) us_loc.tail() new_york = us_loc[us_loc["county"] == "New York City"] new_york.tail() # the dates will not become a index df = new_york.reset_index() from datetime import datetime mask = (df['date'] > '2020-03-16') df = df.loc[mask] df=df.rename(columns={'date':'ds', 'cases':'y'}) # creating the predictions m = Prophet(mcmc_samples=300) m.fit(df) future = m.make_future_dataframe(periods=36, freq='D') forecast = Prophet(interval_width=0.95).fit(df).predict(future) fig = m.plot_components(forecast) # Creating the all new cases chart new_york_new_cases = new_york['cases'] - new_york['cases'].shift() # the dates will not become a index df1 = new_york_new_cases.reset_index() mask = (df1['date'] > '2020-03-16') df1 = df1.loc[mask] df1=df1.rename(columns={'date':'ds', 'cases':'y'}) # creating the predictions
def create_prophet_m(self,app_name,z1,delay=24): import pandas as pd import pymysql import warnings warnings.filterwarnings("ignore") from datetime import datetime, timedelta import logging from tqdm import tqdm from fbprophet import Prophet from sklearn.metrics import mean_squared_error as mse import math ### --- For realtime pred ---### full_df = z1.bw.iloc[0:len(z1)] full_df = full_df.reset_index() full_df.columns = ['ds','y'] #removing outliers q50 = full_df.y.median() q100 = full_df.y.quantile(1) q75 = full_df.y.quantile(.75) #print(max(train_df.y)) if((q100-q50) >= (2*q75)): #print('ind') full_df.loc[full_df.y>=(2*q75),'y'] = None #-- Realtime prediction --## #model model_r = Prophet(yearly_seasonality=False,changepoint_prior_scale=.2) model_r.fit(full_df) future_r = model_r.make_future_dataframe(periods=delay,freq='H') forecast_r = model_r.predict(future_r) forecast_r.index = forecast_r['ds'] #forecast pred_r = pd.DataFrame(forecast_r['yhat'][len(z1):(len(z1)+delay)]) pred_r=pred_r.reset_index() #--- completes realtime pred ---# train_end_index=len(z1.bw)-delay train_df=z1.bw.iloc[0:train_end_index] #train_df= train_df[train_df<cutter] test_df=z1.bw.iloc[train_end_index:len(z1)] train_df=train_df.reset_index() test_df=test_df.reset_index() train_df.columns=['ds','y'] #--- removing outliers in trainset ---# q50 = train_df.y.median() q100 = train_df.y.quantile(1) q75 = train_df.y.quantile(.75) #print(max(train_df.y)) if((q100-q50) >= (2*q75)): #print('ind') train_df.loc[train_df.y>=(2*q75),'y'] = None test_df.columns=['ds','y'] #print('len of testdf = ',len(test_df)) #model model = Prophet(yearly_seasonality=False,changepoint_prior_scale=.2) model.fit(train_df) future = model.make_future_dataframe(periods=len(test_df),freq='H') forecast = model.predict(future) forecast.index = forecast['ds'] #forecast pred = pd.DataFrame(forecast['yhat'][train_end_index:len(z1)]) pred=pred.reset_index() pred_df=pd.merge(test_df,pred,on='ds',how='left') pred_df.dropna(inplace=True) df=pd.DataFrame() if(len(pred_df)>0): pred_df['error_test']=pred_df.y-pred_df.yhat MSE=mse(pred_df.y,pred_df.yhat) RMSE=math.sqrt(MSE) pred_df['APE']=abs(pred_df.error_test*100/pred_df.y) MAPE=pred_df.APE.mean() #print("App name:",app_name) #print("MSE :",MSE) #print("RMSE :",RMSE) #print("MAPE :",MAPE) q98=pred_df['APE'].quantile(0.98) mape_q98=pred_df['APE'][pred_df.APE<pred_df['APE'].quantile(0.98)].mean() df = pd.DataFrame({'length':len(z1),#'predicted_t':[forcast_lag], 'test_rmse':RMSE, 'test_mape':MAPE, #'test_ape_98':q98, 'test_mape_98':mape_q98}, index=[app_name]) return(df,model,forecast,pred_df,pred_r)
ts = sales.groupby(["date_block_num"])["item_cnt_day"].sum() print("ts") print(ts.head()) # prophetが受け入れるデータ形は、日付(ds)と値(y) ts.index = pd.date_range(start='2013-01-01', end='2015-10-01', freq='MS') ts = ts.reset_index() # 列名を修正する ts.columns = ['ds', 'y'] print("before modeling") print(ts.head()) #時系列モデルを定義 # パラメータは、年周期があること model = Prophet('linear', yearly_seasonality=True) model.fit(ts) # 2017/11を予測 future = model.make_future_dataframe(periods=1, freq='MS') forecast = model.predict(future) forecast[['ds', 'yhat', 'yhat_lower', 'yhat_upper']].tail() print("forecast") print(forecast.head()) # 2018/11は最後期 forecast_value = forecast['yhat'].values[-1] sales_201411["result"] = sales_201411["percentage"] * forecast_value print("calculation") print(sales_201411.head())