def test_fit_changepoint_not_in_history(self): train = DATA[(DATA['ds'] < '2013-01-01') | (DATA['ds'] > '2014-01-01')] train[(train['ds'] > '2014-01-01')] += 20 future = pd.DataFrame({'ds': DATA['ds']}) forecaster = Prophet(changepoints=['2013-06-06']) forecaster.fit(train) forecaster.predict(future)
def prophetForecast(rawData, startDate, modelDir, partitions): """Forecasting with fbprophet""" from fbprophet import Prophet from fbprophet.diagnostics import cross_validation partitions = int(partitions) # initiate model prophet = Prophet() # put dates in df dates = pd.date_range(start=startDate, periods=len(rawData), freq="H") input_df = pd.DataFrame(rawData, columns=["y", "temp"]) input_df["ds"] = dates.to_pydatetime() input_df.to_csv(pJoin(modelDir, "prophetin.csv")) # give prophet the input data with suppress_stdout_stderr(): prophet.fit(input_df) # determine partition length for the cross-validation total_hours = len(input_df.ds) hp = total_hours // partitions # horizon and period init = total_hours % partitions # total_hours - hp * (partitions - 1) # train prophet w/ those partitions # take a moment to appreciate this stupid way to pass the durations out_df = cross_validation( prophet, initial="%d hours" % init, horizon="%d hours" % hp, period="%d hours" % hp, ) out_df.to_csv(pJoin(modelDir, "prophetout.csv")) return (list(out_df.yhat), list(out_df.yhat_lower), list(out_df.yhat_upper))
def test_cross_validation(self): m = Prophet() m.fit(self.__df) # Calculate the number of cutoff points(k) horizon = pd.Timedelta('4 days') period = pd.Timedelta('10 days') initial = pd.Timedelta('115 days') df_cv = diagnostics.cross_validation( m, horizon='4 days', period='10 days', initial='115 days') self.assertEqual(len(np.unique(df_cv['cutoff'])), 3) self.assertEqual(max(df_cv['ds'] - df_cv['cutoff']), horizon) self.assertTrue(min(df_cv['cutoff']) >= min(self.__df['ds']) + initial) dc = df_cv['cutoff'].diff() dc = dc[dc > pd.Timedelta(0)].min() self.assertTrue(dc >= period) self.assertTrue((df_cv['cutoff'] < df_cv['ds']).all()) # Each y in df_cv and self.__df with same ds should be equal df_merged = pd.merge(df_cv, self.__df, 'left', on='ds') self.assertAlmostEqual( np.sum((df_merged['y_x'] - df_merged['y_y']) ** 2), 0.0) df_cv = diagnostics.cross_validation( m, horizon='4 days', period='10 days', initial='135 days') self.assertEqual(len(np.unique(df_cv['cutoff'])), 1) with self.assertRaises(ValueError): diagnostics.cross_validation( m, horizon='10 days', period='10 days', initial='140 days')
def add_prophet_features(df_shop): df = df_shop[['day', 'pays_count']].rename(columns={'day': 'ds', 'pays_count': 'y'}) results = [] biweek_max = df_shop.biweek_id.max() for m in range(biweek_max - 1, 0, -1): train_idx = df_shop.biweek_id >= m df_train = df[train_idx] not_null = ~df_train.y.isnull() if not_null.sum() < 7: continue p = Prophet().fit(df_train) future = p.make_future_dataframe(14, include_history=False) pred = p.predict(future) results.append(pred) df_res = pd.concat(results) df_res.columns = ['prophet_%s' % c for c in pred.columns] df_res = df_shop.merge(df_res, how='left', left_on='day', right_on='prophet_ds') del df_res['prophet_t'], df_res['prophet_ds'] df_res.drop_duplicates('days_from_beginning', keep='last', inplace=1) if len(df_res) != len(df_shop): raise Exception("size doesn't match") return df_res
def get_predictions(validate, train): total_dates = train['date'].unique() result = pd.DataFrame(columns=['id', 'unit_sales']) problem_pairs = [] example_items = [510052, 1503899, 2081175, 1047674, 215327, 1239746, 765520, 1463867, 1010755, 1473396] store47examples = validate.loc[(validate.store_nbr == 47) & (validate.item_nbr.isin(example_items))] print("ONLY PREDICTING ITEMS {} IN STORE NO. 47!".format(example_items)) for name, y in store47examples.groupby(['item_nbr']): # for name, y in validate.groupby(['item_nbr', 'store_nbr']): item_nbr=int(name) store_nbr = 47 df = train[(train.item_nbr==item_nbr)&(train.store_nbr==store_nbr)] CV_SIZE = 16 #if you make it bigger, fill missing dates in cv with 0 if any TRAIN_SIZE = 365 total_dates = train['date'].unique() df = fill_missing_date(df, total_dates) df = df.sort_values(by=['date']) X = df[-TRAIN_SIZE:] X = X[['date','unit_sales']] X.columns = ['ds', 'y'] m = Prophet(yearly_seasonality=True) try: m.fit(X) except ValueError: print("problem for this item store pair") problem_pairs.append((item_nbr, store_nbr)) continue future = m.make_future_dataframe(periods=CV_SIZE) pred = m.predict(future) data = pred[['ds','yhat']].tail(CV_SIZE) data = pred[['ds','yhat']].merge(y, left_on='ds', right_on='date') data['unit_sales'] = data['yhat'].fillna(0).clip(0, 999999) result = result.append(data[['id', 'unit_sales']]) return (result, problem_pairs)
def test_performance_metrics(self): m = Prophet() m.fit(self.__df) df_cv = diagnostics.cross_validation( m, horizon='4 days', period='10 days', initial='90 days') # Aggregation level none df_none = diagnostics.performance_metrics(df_cv, rolling_window=0) self.assertEqual( set(df_none.columns), {'horizon', 'coverage', 'mae', 'mape', 'mse', 'rmse'}, ) self.assertEqual(df_none.shape[0], 16) # Aggregation level 0.2 df_horizon = diagnostics.performance_metrics(df_cv, rolling_window=0.2) self.assertEqual(len(df_horizon['horizon'].unique()), 4) self.assertEqual(df_horizon.shape[0], 14) # Aggregation level all df_all = diagnostics.performance_metrics(df_cv, rolling_window=1) self.assertEqual(df_all.shape[0], 1) for metric in ['mse', 'mape', 'mae', 'coverage']: self.assertEqual(df_all[metric].values[0], df_none[metric].mean()) # Custom list of metrics df_horizon = diagnostics.performance_metrics( df_cv, metrics=['coverage', 'mse'], ) self.assertEqual( set(df_horizon.columns), {'coverage', 'mse', 'horizon'}, )
def test_holidays(self): holidays = pd.DataFrame({ 'ds': pd.to_datetime(['2016-12-25']), 'holiday': ['xmas'], 'lower_window': [-1], 'upper_window': [0], }) model = Prophet(holidays=holidays) df = pd.DataFrame({ 'ds': pd.date_range('2016-12-20', '2016-12-31') }) feats = model.make_holiday_features(df['ds']) # 11 columns generated even though only 8 overlap self.assertEqual(feats.shape, (df.shape[0], 2)) self.assertEqual((feats.sum(0) - np.array([1.0, 1.0])).sum(), 0) holidays = pd.DataFrame({ 'ds': pd.to_datetime(['2016-12-25']), 'holiday': ['xmas'], 'lower_window': [-1], 'upper_window': [10], }) feats = Prophet(holidays=holidays).make_holiday_features(df['ds']) # 12 columns generated even though only 8 overlap self.assertEqual(feats.shape, (df.shape[0], 12))
def test_fit(self): train = pd.DataFrame({ 'ds': np.array(['2012-05-18', '2012-05-20']), 'y': np.array([38.23, 21.25]) }) forecaster = Prophet(mcmc_samples=1) forecaster.fit(train)
def test_fit_predict(self): N = DATA.shape[0] train = DATA.head(N // 2) future = DATA.tail(N // 2) forecaster = Prophet() forecaster.fit(train) forecaster.predict(future)
def test_fit_predict_no_seasons(self): N = DATA.shape[0] train = DATA.head(N // 2) future = DATA.tail(N // 2) forecaster = Prophet(weekly_seasonality=False, yearly_seasonality=False) forecaster.fit(train) forecaster.predict(future)
def test_fit_predict_no_changepoints(self): N = DATA.shape[0] train = DATA.head(N // 2) future = DATA.tail(N // 2) forecaster = Prophet(n_changepoints=0) forecaster.fit(train) forecaster.predict(future)
def test_fit_with_holidays(self): holidays = pd.DataFrame({ 'ds': pd.to_datetime(['2012-06-06', '2013-06-06']), 'holiday': ['seans-bday'] * 2, 'lower_window': [0] * 2, 'upper_window': [1] * 2, }) model = Prophet(holidays=holidays, uncertainty_samples=0) model.fit(DATA).predict()
def test_subdaily_holidays(self): holidays = pd.DataFrame({ 'ds': pd.to_datetime(['2017-01-02']), 'holiday': ['special_day'], }) m = Prophet(holidays=holidays) m.fit(DATA2) fcst = m.predict() self.assertEqual(sum(fcst['special_day'] == 0), 575)
def test_fit_predict_duplicates(self): N = DATA.shape[0] train1 = DATA.head(N // 2).copy() train2 = DATA.head(N // 2).copy() train2['y'] += 10 train = train1.append(train2) future = pd.DataFrame({'ds': DATA['ds'].tail(N // 2)}) forecaster = Prophet() forecaster.fit(train) forecaster.predict(future)
def test_override_n_changepoints(self): m = Prophet() history = DATA.head(20).copy() history = m.setup_dataframe(history, initialize_scales=True) m.history = history m.set_changepoints() self.assertEqual(m.n_changepoints, 15) cp = m.changepoints_t self.assertEqual(cp.shape[0], 15)
def test_get_zero_changepoints(self): m = Prophet(n_changepoints=0) N = DATA.shape[0] history = DATA.head(N // 2).copy() history = m.setup_dataframe(history, initialize_scales=True) m.history = history m.set_changepoints() cp = m.changepoints_t self.assertEqual(cp.shape[0], 1) self.assertEqual(cp[0], 0)
def hello(): print('Hello, world!') df = pd.read_csv(url) df['y'] = np.log(df['y']) df.head() m = Prophet() m.fit(df); future = m.make_future_dataframe(periods=365) future.tail() forecast = m.predict(future) forecast[['ds', 'yhat', 'yhat_lower', 'yhat_upper']].tail() return forecast.to_json(orient='table')
def test_cross_validation_default_value_check(self): m = Prophet() m.fit(self.__df) # Default value of initial should be equal to 3 * horizon df_cv1 = diagnostics.cross_validation( m, horizon='32 days', period='10 days') df_cv2 = diagnostics.cross_validation( m, horizon='32 days', period='10 days', initial='96 days') self.assertAlmostEqual( ((df_cv1['y'] - df_cv2['y']) ** 2).sum(), 0.0) self.assertAlmostEqual( ((df_cv1['yhat'] - df_cv2['yhat']) ** 2).sum(), 0.0)
def test_setup_dataframe(self): m = Prophet() N = DATA.shape[0] history = DATA.head(N // 2).copy() history = m.setup_dataframe(history, initialize_scales=True) self.assertTrue('t' in history) self.assertEqual(history['t'].min(), 0.0) self.assertEqual(history['t'].max(), 1.0) self.assertTrue('y_scaled' in history) self.assertEqual(history['y_scaled'].max(), 1.0)
def test_logistic_floor(self): m = Prophet(growth='logistic') N = DATA.shape[0] history = DATA.head(N // 2).copy() history['floor'] = 10. history['cap'] = 80. future = DATA.tail(N // 2).copy() future['cap'] = 80. future['floor'] = 10. m.fit(history, algorithm='Newton') self.assertTrue(m.logistic_floor) self.assertTrue('floor' in m.history) self.assertAlmostEqual(m.history['y_scaled'][0], 1.) fcst1 = m.predict(future) m2 = Prophet(growth='logistic') history2 = history.copy() history2['y'] += 10. history2['floor'] += 10. history2['cap'] += 10. future['cap'] += 10. future['floor'] += 10. m2.fit(history2, algorithm='Newton') self.assertAlmostEqual(m2.history['y_scaled'][0], 1.) fcst2 = m2.predict(future) fcst2['yhat'] -= 10. # Check for approximate shift invariance self.assertTrue((np.abs(fcst1['yhat'] - fcst2['yhat']) < 1).all())
def run(): journal = ledger.read_journal("./secret/ledger.dat") last_post = None amount = 0 for post in journal.query(""): if last_post == None or post.date == last_post.date: if str(post.amount.commodity) != "£": continue amount = amount + post.amount else: print post.date, ",", amount amount = 0 last_post = post df = pd.read_csv('./testing.csv') df['y'] = np.multiply(100, df['y']) m = Prophet() m.fit(df); forecast = m.predict(future) forecast[['ds', 'yhat', 'yhat_lower', 'yhat_upper']].tail() m.plot(forecast); m.plot_components(forecast);
def test_growth_init(self): model = Prophet(growth='logistic') history = DATA.iloc[:468].copy() history['cap'] = history['y'].max() history = model.setup_dataframe(history, initialize_scales=True) k, m = model.linear_growth_init(history) self.assertAlmostEqual(k, 0.3055671) self.assertAlmostEqual(m, 0.5307511) k, m = model.logistic_growth_init(history) self.assertAlmostEqual(k, 1.507925, places=4) self.assertAlmostEqual(m, -0.08167497, places=4)
def test_get_changepoints(self): m = Prophet() N = DATA.shape[0] history = DATA.head(N // 2).copy() history = m.setup_dataframe(history, initialize_scales=True) m.history = history m.set_changepoints() cp = m.changepoints_t self.assertEqual(cp.shape[0], m.n_changepoints) self.assertEqual(len(cp.shape), 1) self.assertTrue(cp.min() > 0) cp_indx = int(np.ceil(0.8 * history.shape[0])) self.assertTrue(cp.max() <= history['t'].values[cp_indx])
def test_fourier_series_weekly(self): mat = Prophet.fourier_series(DATA['ds'], 7, 3) # These are from the R forecast package directly. true_values = np.array([ 0.7818315, 0.6234898, 0.9749279, -0.2225209, 0.4338837, -0.9009689, ]) self.assertAlmostEqual(np.sum((mat[0] - true_values)**2), 0.0)
def test_fourier_series_yearly(self): mat = Prophet.fourier_series(DATA['ds'], 365.25, 3) # These are from the R forecast package directly. true_values = np.array([ 0.7006152, -0.7135393, -0.9998330, 0.01827656, 0.7262249, 0.6874572, ]) self.assertAlmostEqual(np.sum((mat[0] - true_values)**2), 0.0)
def test_make_future_dataframe(self): N = 468 train = DATA.head(N // 2) forecaster = Prophet() forecaster.fit(train) future = forecaster.make_future_dataframe(periods=3, freq='D', include_history=False) correct = pd.DatetimeIndex(['2013-04-26', '2013-04-27', '2013-04-28']) self.assertEqual(len(future), 3) for i in range(3): self.assertEqual(future.iloc[i]['ds'], correct[i]) future = forecaster.make_future_dataframe(periods=3, freq='M', include_history=False) correct = pd.DatetimeIndex(['2013-04-30', '2013-05-31', '2013-06-30']) self.assertEqual(len(future), 3) for i in range(3): self.assertEqual(future.iloc[i]['ds'], correct[i])
def test_piecewise_linear(self): model = Prophet() t = np.arange(11.) m = 0 k = 1.0 deltas = np.array([0.5]) changepoint_ts = np.array([5]) y = model.piecewise_linear(t, deltas, k, m, changepoint_ts) y_true = np.array([0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.5, 8.0, 9.5, 11.0, 12.5]) self.assertEqual((y - y_true).sum(), 0.0) t = t[8:] y_true = y_true[8:] y = model.piecewise_linear(t, deltas, k, m, changepoint_ts) self.assertEqual((y - y_true).sum(), 0.0)
def test_auto_yearly_seasonality(self): # Should be True m = Prophet() self.assertEqual(m.yearly_seasonality, 'auto') m.fit(DATA) self.assertEqual(m.yearly_seasonality, True) # Should be False due to too short history N = 240 train = DATA.head(N) m = Prophet() m.fit(train) self.assertEqual(m.yearly_seasonality, False) m = Prophet(yearly_seasonality=True) m.fit(train) self.assertEqual(m.yearly_seasonality, True)
def train_prophet(df, modelDir, confidence=0.99): # train and cache into modelDir m = Prophet( yearly_seasonality=True, daily_seasonality=True, interval_width=confidence ) with suppress_stdout_stderr(): m.fit(df) # Predict the future. print "PREDICTING!" future = m.make_future_dataframe(periods=0) forecast = m.predict(future) # Merge in the historical data. forecast["y"] = df.y.astype(float) # Backup the model. forecast.to_csv( pJoin(modelDir, "forecasted_{}.csv".format(confidence)), index=False ) return forecast
def test_piecewise_logistic(self): model = Prophet() t = np.arange(11.) cap = np.ones(11) * 10 m = 0 k = 1.0 deltas = np.array([0.5]) changepoint_ts = np.array([5]) y = model.piecewise_logistic(t, cap, deltas, k, m, changepoint_ts) y_true = np.array([5.000000, 7.310586, 8.807971, 9.525741, 9.820138, 9.933071, 9.984988, 9.996646, 9.999252, 9.999833, 9.999963]) self.assertAlmostEqual((y - y_true).sum(), 0.0, places=5) t = t[8:] y_true = y_true[8:] cap = cap[8:] y = model.piecewise_logistic(t, cap, deltas, k, m, changepoint_ts) self.assertAlmostEqual((y - y_true).sum(), 0.0, places=5)
def test_added_regressors(self): m = Prophet() m.add_regressor('binary_feature', prior_scale=0.2) m.add_regressor('numeric_feature', prior_scale=0.5) m.add_regressor('numeric_feature2', prior_scale=0.5, mode='multiplicative') m.add_regressor('binary_feature2', standardize=True) df = DATA.copy() df['binary_feature'] = ['0'] * 255 + ['1'] * 255 df['numeric_feature'] = range(510) df['numeric_feature2'] = range(510) with self.assertRaises(ValueError): # Require all regressors in df m.fit(df) df['binary_feature2'] = [1] * 100 + [0] * 410 m.fit(df) # Check that standardizations are correctly set self.assertEqual( m.extra_regressors['binary_feature'], { 'prior_scale': 0.2, 'mu': 0, 'std': 1, 'standardize': 'auto', 'mode': 'additive', }, ) self.assertEqual(m.extra_regressors['numeric_feature']['prior_scale'], 0.5) self.assertEqual(m.extra_regressors['numeric_feature']['mu'], 254.5) self.assertAlmostEqual(m.extra_regressors['numeric_feature']['std'], 147.368585, places=5) self.assertEqual(m.extra_regressors['numeric_feature2']['mode'], 'multiplicative') self.assertEqual(m.extra_regressors['binary_feature2']['prior_scale'], 10.) self.assertAlmostEqual(m.extra_regressors['binary_feature2']['mu'], 0.1960784, places=5) self.assertAlmostEqual(m.extra_regressors['binary_feature2']['std'], 0.3974183, places=5) # Check that standardization is done correctly df2 = m.setup_dataframe(df.copy()) self.assertEqual(df2['binary_feature'][0], 0) self.assertAlmostEqual(df2['numeric_feature'][0], -1.726962, places=4) self.assertAlmostEqual(df2['binary_feature2'][0], 2.022859, places=4) # Check that feature matrix and prior scales are correctly constructed seasonal_features, prior_scales, component_cols, modes = ( m.make_all_seasonality_features(df2)) self.assertEqual(seasonal_features.shape[1], 30) names = ['binary_feature', 'numeric_feature', 'binary_feature2'] true_priors = [0.2, 0.5, 10.] for i, name in enumerate(names): self.assertIn(name, seasonal_features) self.assertEqual(sum(component_cols[name]), 1) self.assertEqual( sum(np.array(prior_scales) * component_cols[name]), true_priors[i], ) # Check that forecast components are reasonable future = pd.DataFrame({ 'ds': ['2014-06-01'], 'binary_feature': [0], 'numeric_feature': [10], 'numeric_feature2': [10], }) with self.assertRaises(ValueError): m.predict(future) future['binary_feature2'] = 0 fcst = m.predict(future) self.assertEqual(fcst.shape[1], 37) self.assertEqual(fcst['binary_feature'][0], 0) self.assertAlmostEqual( fcst['extra_regressors_additive'][0], fcst['numeric_feature'][0] + fcst['binary_feature2'][0], ) self.assertAlmostEqual( fcst['extra_regressors_multiplicative'][0], fcst['numeric_feature2'][0], ) self.assertAlmostEqual( fcst['additive_terms'][0], fcst['yearly'][0] + fcst['weekly'][0] + fcst['extra_regressors_additive'][0], ) self.assertAlmostEqual( fcst['multiplicative_terms'][0], fcst['extra_regressors_multiplicative'][0], ) self.assertAlmostEqual( fcst['yhat'][0], fcst['trend'][0] * (1 + fcst['multiplicative_terms'][0]) + fcst['additive_terms'][0], ) # Check works if constant extra regressor at 0 df['constant_feature'] = 0 m = Prophet() m.add_regressor('constant_feature') m.fit(df) self.assertEqual(m.extra_regressors['constant_feature']['std'], 1)
import pandas as pd import plotly.offline as py import plotly.io as pio from fbprophet import Prophet from fbprophet.plot import plot_plotly pio.renderers.default = "png" df = pd.read_csv("example_wp_log_peyton_manning.csv") m = Prophet() m.fit(df) future = m.make_future_dataframe(periods=365) forecast = m.predict(future) fig1 = m.plot(forecast) fig2 = m.plot_components(forecast) #py.init_notebook_mode() fig = plot_plotly(m, forecast) # This returns a plotly Figure py.iplot(fig) fig.show() """ import matplotlib.pyplot as plt x = 2
df_humidity = dfall_humidity_1 dfall_pressure_1 = dfall_beijing_day_pressure.rename(columns={ 'date': 'ds', 'pressure': 'y' }) #dfall['y'] = np.log(dfall['y']) dfall_pressure_1['y'] = (dfall_pressure_1['y'] - dfall_pressure_1['y'].min() ) / (dfall_pressure_1['y'].max() - dfall_pressure_1['y'].min()) dfall_pressure_1['ds'] = pd.to_datetime(dfall_pressure_1['ds']) dfall_pressure_1.set_index('ds') df_pressure = dfall_pressure_1 m_temperature = Prophet(daily_seasonality=False, weekly_seasonality=False, changepoint_prior_scale=0.01) m_temperature.fit(df_temperature) m_humidity = Prophet(daily_seasonality=False, weekly_seasonality=False, changepoint_prior_scale=0.01) m_humidity.fit(df_humidity) m_pressure = Prophet(daily_seasonality=False, weekly_seasonality=False, changepoint_prior_scale=0.01) m_pressure.fit(df_pressure) future_temperature = m_temperature.make_future_dataframe(periods=180) future_temperature.tail()
# Python import pandas as pd from fbprophet import Prophet # 读入数据集 df = pd.read_csv('examples/example_wp_log_peyton_manning.csv') df.head() # 拟合模型 m = Prophet() m.fit(df) # 构建待预测日期数据框,periods = 365 代表除历史数据的日期外再往后推 365 天 future = m.make_future_dataframe(periods=365) future.tail() # 预测数据集 forecast = m.predict(future) forecast[['ds', 'yhat', 'yhat_lower', 'yhat_upper']].tail() # 展示预测结果 m.plot(forecast) # 预测的成分分析绘图,展示预测中的趋势、周效应和年度效应 m.plot_components(forecast) print(forecast.columns) Index([ 'ds', 'trend', 'trend_lower', 'trend_upper', 'yhat_lower', 'yhat_upper', 'additive_terms', 'additive_terms_lower', 'additive_terms_upper', 'multiplicative_terms', 'multiplicative_terms_lower', 'multiplicative_terms_upper', 'weekly', 'weekly_lower', 'weekly_upper',
def test_seasonality_modes(self): # Model with holidays, seasonalities, and extra regressors holidays = pd.DataFrame({ 'ds': pd.to_datetime(['2016-12-25']), 'holiday': ['xmas'], 'lower_window': [-1], 'upper_window': [0], }) m = Prophet(seasonality_mode='multiplicative', holidays=holidays) m.add_seasonality('monthly', period=30, mode='additive', fourier_order=3) m.add_regressor('binary_feature', mode='additive') m.add_regressor('numeric_feature') # Construct seasonal features df = DATA.copy() df['binary_feature'] = [0] * 255 + [1] * 255 df['numeric_feature'] = range(510) df = m.setup_dataframe(df, initialize_scales=True) m.history = df.copy() m.set_auto_seasonalities() seasonal_features, prior_scales, component_cols, modes = ( m.make_all_seasonality_features(df)) self.assertEqual(sum(component_cols['additive_terms']), 7) self.assertEqual(sum(component_cols['multiplicative_terms']), 29) self.assertEqual( set(modes['additive']), { 'monthly', 'binary_feature', 'additive_terms', 'extra_regressors_additive' }, ) self.assertEqual( set(modes['multiplicative']), { 'weekly', 'yearly', 'xmas', 'numeric_feature', 'multiplicative_terms', 'extra_regressors_multiplicative', 'holidays', }, )
finish_date = datetime.strptime("2011-12-31 23:59", "%Y-%m-%d %H:%M") number_tower = "Data_1st" query = '''SELECT Date, %s FROM Data WHERE Date >= \'%s\' AND Date <= \'%s\';''' % ( str(number_tower), start_date, finish_date) conn = sqlite3.connect("mydatabase.db") cursor = conn.cursor() dataset = pd.read_sql_query(query, conn) dataset.Date = dataset["Date"].apply(pd.to_datetime) print(dataset) predictions = 180 # приводим dataframe к нужному формату df = dataset print(df.head()) df.columns = ['ds', 'y'] # отрезаем из обучающей выборки последние 30 точек, чтобы измерить на них качество train_df = df[:-predictions] m = Prophet() m.fit(train_df) future = m.make_future_dataframe(periods=predictions) forecast = m.predict(future) m.plot(forecast) m.plot_components(forecast) plt.show()
def Table_generator(): import pandas as pd import numpy as np from matplotlib import pyplot as plt from pylab import rcParams from sklearn import linear_model from fbprophet import Prophet from numpy import inf filepath = 'C:/weekly sales and labour cost for all shops 2013 to 20177.csv' df = pd.read_csv(filepath) df2 = df[df.sales_status != 0] # df2.week_no.isnull().values.any() nulldetect = df2.week_no.isnull() # nulldetect[nulldetect==True].index df2.week_no.loc[nulldetect == True] = 54 df2['week_no'] = df2.week_no - 2 len_week1 = [] for i in df2.shop_id: len_week = len(df2.week_no[df2.shop_id == i]) len_week1.append(len_week) len_week2 = pd.DataFrame(len_week1) len_week2 d = {'shop_id': df2.shop_id, 'len_of_weeks': len_week1} d1 = pd.DataFrame(d) d2 = d1.drop_duplicates() dtt = pd.DataFrame(index=list(range(1, 53)), columns=d2.shop_id[d2.len_of_weeks < 52].values) # table = pd.DataFrame(columns=['shop_id','week_no','dates','forecasted_sales']) for uu in d2.shop_id[d2.len_of_weeks < 52].values: df3 = df2[df2.shop_id == uu] # df1 = df[(= -1) & (df.b != -1)] dff4 = df3.set_index('start_date') # df4=df3.week_no-2 a = df3[[ 'week_no', 'shop_id', 'sales_amount', 'transactions', 'total_tax', 'item_sold' ]] # print(a) dates = pd.date_range(dff4.index[-1], periods=52, freq='W-MON', format='%Y-%m-%d') dates1 = pd.DataFrame(dates) dates2 = pd.date_range(dff4.index[0], periods=len(dff4.index), freq='W-MON', format='%Y-%m-%d') mean_week_item = [] for i in dates.week: mean_item_sold = a.item_sold[a.week_no == i].mean() mean_week_item.append(mean_item_sold) mean_week_item1 = pd.DataFrame(mean_week_item) trans_week_item = [] for i1 in dates.week: mean_trans_sold = a.transactions[a.week_no == i1].mean() trans_week_item.append(mean_trans_sold) sales_week = [] for ii1 in dates.week: mean_sales_sold = a.sales_amount[a.week_no == ii1].mean() sales_week.append(mean_sales_sold) dd = { 'date': dates, 'weeks_no': dates.week, 'sales': sales_week, 'mean_item': mean_week_item, 'mean_trans': trans_week_item } dd1 = pd.DataFrame(dd) dff1 = df[df.sales_status != 0] nulldetect = dff1.week_no.isnull() dff1.week_no.loc[nulldetect == True] = 54 dff1['week_no'] = dff1.week_no - 2 X_Cluster = dff1[['shop_id', 'sales_amount']] from sklearn.cluster import KMeans kmeans_model = KMeans(n_clusters=3, random_state=8).fit(X_Cluster) y_hat = kmeans_model.labels_ # clusters cen = kmeans_model.cluster_centers_ y_hat1 = pd.DataFrame(y_hat) group_low_sales = X_Cluster[y_hat == 0] group_middle_sales = X_Cluster[y_hat == 2] group_high_sales = X_Cluster[y_hat == 1] fff = [] for j in X_Cluster.shop_id: dfdf = X_Cluster.sales_amount[X_Cluster.shop_id == j].mean() fff.append(dfdf) f3 = pd.DataFrame(X_Cluster.shop_id.drop_duplicates()) f4 = pd.DataFrame(fff) f5 = f4.drop_duplicates() f3['salle'] = f5.values Xx2 = f3[['shop_id', 'salle']] kmeans_model2 = KMeans(n_clusters=3, random_state=8).fit(Xx2) y_hat2 = kmeans_model2.labels_ # clusters cen2 = kmeans_model2.cluster_centers_ group_middle_sales2 = Xx2[y_hat2 == 0] group_high_sales2 = Xx2[y_hat2 == 2] group_low_sales2 = Xx2[y_hat2 == 1] nullweeks = dd1.weeks_no[dd1.mean_trans.isnull() == True] if (group_low_sales2.shop_id.values == uu).any() == True: cx = int(group_low_sales.sales_amount[group_low_sales.shop_id == uu].values.mean()) trt = group_low_sales[group_low_sales.sales_amount > cx - 3000] trt2 = trt[trt.sales_amount < cx + 3000] valid_cls = dff1[[ 'sales_amount', 'item_sold', 'transactions', 'week_no' ]].loc[trt2.index.values] # print("Cluster of shop %s is low sales" %uu) # print("Average sales per week of shop %s is" %uu,cx) elif (group_middle_sales2.shop_id.values == uu).any() == True: # valid_cls=dff1[['sales_amount','item_sold','transactions','week_no']].loc[group_middle_sales.shop_id.index.values] cx = int(group_middle_sales.sales_amount[group_middle_sales.shop_id == uu].values.mean()) trt = group_middle_sales[group_middle_sales.sales_amount > cx - 3000] trt2 = trt[trt.sales_amount < cx + 3000] valid_cls = dff1[[ 'sales_amount', 'item_sold', 'transactions', 'week_no' ]].loc[trt2.index.values] # print("Cluster of shop %s is average sales" %uu) # print("Average sales per week of shop %s is " %uu,cx) elif (group_high_sales2.shop_id.values == uu).any() == True: # valid_cls=dff1[['sales_amount','item_sold','transactions','week_no']].loc[group_high_sales.shop_id.index.values] cx = int(group_high_sales.sales_amount[group_high_sales.shop_id == uu].values.mean()) trt = group_high_sales[group_high_sales.sales_amount > cx - 4000] trt2 = trt[trt.sales_amount < cx + 4000] valid_cls = dff1[[ 'sales_amount', 'item_sold', 'transactions', 'week_no' ]].loc[trt2.index.values] # print("Cluster of shop %s is high sales" %uu) # print("Average sales per week of shop %s is" %uu,cx) drr = valid_cls drr = valid_cls # dff1[['sales_amount','item_sold','transactions','week_no']].loc[trt2.index.values] itt = [] trr = [] sale = [] for i3 in nullweeks: item = drr.item_sold[drr.week_no == i3].mean() trans = drr.transactions[drr.week_no == i3].mean() salee = drr.sales_amount[drr.week_no == i3].mean() itt.append(item) trr.append(trans) sale.append(salee) df_insert = { 'sales_amountt': sale, 'ittem': itt, 'trans': trr, 'weeks_no': nullweeks } df_insert1 = pd.DataFrame(df_insert) forecastdf = dd1.fillna({ 'mean_item': df_insert1.ittem, 'mean_trans': df_insert1.trans, 'sales': df_insert1.sales_amountt }) forecastdf1 = forecastdf.fillna({ 'mean_item': df_insert1.ittem.mean(), 'mean_trans': df_insert1.trans.mean(), 'sales': df_insert1.sales_amountt.mean() }) regr3 = linear_model.LinearRegression() X = forecastdf1[['mean_item', 'mean_trans']] Y = forecastdf1.sales regr3.fit(X, Y) y_predictionss = regr3.predict(X) y_predictionss1 = pd.DataFrame(y_predictionss) pred_y = round(y_predictionss1, 2) #print(pred_y.values) forecastdf1['forecasted_sales'] = pred_y.values # ddt.fillna() forecastdf1.sort_values('weeks_no', inplace=True) # forecastdf1 # forecastdf1.forecasted_sales.reset_index() f = forecastdf1.set_index('weeks_no') # dtt = pd.DataFrame(index=list(range(1,53)), columns=d2.shop_id[d2.len_of_weeks<52].values) dtt['shop_id'] = dtt.index.values # dtt[dtt.shop_id==uu].fillna() dtt[[uu]] = f.forecasted_sales.values.reshape((52, 1)) dtt1 = pd.DataFrame(index=d2.shop_id[d2.len_of_weeks < 52].values, columns=list(range(1, 53))) for jj in dtt.index.values: dtt1.loc[:, jj] = dtt.loc[jj, :] data = pd.read_csv( 'C:/weekly sales and labour cost for all shops 2013 to 20177.csv', index_col='start_date', parse_dates=True) # shopID = input("Enter your shop id") df2 = data[data.sales_status != 0] # df2.week_no.isnull().values.any() nulldetect = df2.week_no.isnull() # nulldetect[nulldetect==True].index df2.week_no.loc[nulldetect == True] = 54 df2['week_no'] = df2.week_no - 2 len_week1 = [] for i in df2.shop_id: len_week = len(df2.week_no[df2.shop_id == i]) len_week1.append(len_week) len_week2 = pd.DataFrame(len_week1) len_week2 d = {'shop_id': df2.shop_id, 'len_of_weeks': len_week1} d1 = pd.DataFrame(d) d2 = d1.drop_duplicates() dtt2 = pd.DataFrame(index=list(range(1, 53)), columns=d2.shop_id[d2.len_of_weeks > 52].values) for j in d2.shop_id[d2.len_of_weeks >= 52].values: data2 = data[[ 'sales_id', 'shop_id', 'week_no', 'sales_amount', 'item_sold', 'transactions', 'total_tax', 'sales_status' ]] df1 = data2[data2.shop_id == j] # input №1 df2 = df1[df1.sales_status != 0] df2.week_no.isnull().values.any() nulldetect = df1.week_no.isnull() nulldetect[nulldetect == True].index df2.week_no.loc[nulldetect == True] = 54 df2['week_no'] = df2.week_no - 2 dff = df2[['sales_amount']] data3 = dff.reset_index() data4 = data3 data5 = data4.rename(columns={'start_date': 'ds', 'sales_amount': 'y'}) data5.set_index('ds') # y.plot() data5['y'] = np.log(data5['y']) data5 = data5.replace([np.inf, -np.inf], np.nan).fillna(0) data5.set_index('ds') model = Prophet() model.fit(data5) future = model.make_future_dataframe(periods=52, freq='w') forecast = model.predict(future) data5.set_index('ds', inplace=True) forecast.set_index('ds', inplace=True) viz_df = dff.join(forecast[['yhat', 'yhat_lower', 'yhat_upper']], how='outer') viz_df['yhat_rescaled'] = np.exp(viz_df['yhat']) dff.index = pd.to_datetime( dff.index) # make sure our index as a datetime object connect_date = dff.index[-2] # select the 2nd to last date mask = (forecast.index > connect_date) predict_df = forecast.loc[mask] viz_df = dff.join(predict_df[['yhat', 'yhat_lower', 'yhat_upper']], how='outer') viz_df['yhat_scaled'] = np.exp(viz_df['yhat']) ii = len(dff.sales_amount) - 1 viz_df.yhat_scaled[ii:] predicted_future_sales = pd.DataFrame(viz_df.yhat_scaled[ii:]) predicted_future_sales1 = predicted_future_sales.rename( columns={'yhat_scaled': 'future_sales'}) predicted_future_sales2 = predicted_future_sales1.reset_index() week_no = predicted_future_sales2['index'].dt.week future_sales = predicted_future_sales2['future_sales'] future_sales1 = round(future_sales, 2) start_date = predicted_future_sales2['index'] predict_data = { 'shop_id': int(df2.shop_id.mean()), 'future_sales': future_sales1, 'week_no': week_no, 'start_date': start_date } predict_data1 = pd.DataFrame(predict_data) predict_data1 = predict_data1.drop_duplicates(subset=['week_no']) predict_data1.sort_values('week_no', inplace=True) f1 = predict_data1.set_index('week_no') dtt2[[j]] = f1.future_sales.values.reshape((52, 1)) dtt3 = pd.DataFrame(index=d2.shop_id[d2.len_of_weeks > 52].values, columns=list(range(1, 53))) for qq in dtt.index.values: dtt3.loc[:, qq] = dtt2.loc[qq, :] tab = dtt1.append(dtt3) tab['shop_id'] = tab.index.values tab.sort_values('shop_id', inplace=True) tab_id = tab.shop_id tab = tab.drop('shop_id', axis=1) tab.insert(0, 'shop_id', tab_id) #writer = pd.ExcelWriter('output.xlsx') #tab.to_excel(writer, 'Sheet1') #writer.save() tab.to_json(path_or_buf='df.json', orient='records') memval2 = tab
def test_fit_predict_no_changepoints(self): N = DATA.shape[0] train = DATA.head(N // 2) future = DATA.tail(N // 2) forecaster = Prophet(n_changepoints=0) forecaster.fit(train) forecaster.predict(future) forecaster = Prophet(n_changepoints=0, mcmc_samples=100) forecaster.fit(train) forecaster.predict(future)
def test_fit_predict_with_country_holidays(self): holidays = pd.DataFrame({ 'ds': pd.to_datetime(['2012-06-06', '2013-06-06']), 'holiday': ['seans-bday'] * 2, 'lower_window': [0] * 2, 'upper_window': [1] * 2, }) # Test with holidays and country_holidays model = Prophet(holidays=holidays, uncertainty_samples=0) model.add_country_holidays(country_name='US') model.fit(DATA).predict() # There are training holidays missing in the test set train = DATA.head(154) future = DATA.tail(355) model = Prophet(uncertainty_samples=0) model.add_country_holidays(country_name='US') model.fit(train).predict(future) # There are test holidays missing in the training set train = DATA.tail(355) future = DATA2 model = Prophet(uncertainty_samples=0) model.add_country_holidays(country_name='US') model.fit(train).predict(future)
def test_fit_changepoint_not_in_history(self): train = DATA[(DATA['ds'] < '2013-01-01') | (DATA['ds'] > '2014-01-01')] future = pd.DataFrame({'ds': DATA['ds']}) forecaster = Prophet(changepoints=['2013-06-06']) forecaster.fit(train) forecaster.predict(future)
from fbprophet import Prophet import matplotlib.pyplot as plt plt.style.use('fivethirtyeight') df = pd.read_csv('oil_prices.csv') format = '%d/%m/%Y' df['ts'] = pd.to_datetime(df['ts'], format=format) df = df.set_index(pd.DatetimeIndex(df['ts'])) df = df.rename(columns={'ts': 'ds', 'price': 'y'}) # set the uncertainty interval to 95% (the Prophet default is 80%) my_model = Prophet(n_changepoints=10,interval_width=0.95,daily_seasonality=False,weekly_seasonality=True,yearly_seasonality=True,uncertainty_samples=1000) forecast_period = 12 # Number of forecasted time steps my_model.fit(df) # In order to obtain forecasts of our time series, we must provide Prophet with a new DataFrame # containing a ds column that holds the dates for which we want predictions: future_dates = my_model.make_future_dataframe(periods=forecast_period, freq='12MS') # The DataFrame of future dates is then used as input to the predict method of our fitted model: forecast = my_model.predict(future_dates) ''' Prophet returns a large DataFrame with many interesting columns, but we subset our output to the columns most relevant to forecasting, which are: ds: the datestamp of the forecasted value
#print(templst) #dfp = pd.DataFrame(templst) temp = data3[['datep', 'pm2.5']] temp.columns = ['ds', 'y'] temp['PRES'] = data3['PRES'] temp['DEWP'] = data3['DEWP'] temp['TEMP'] = data3['TEMP'] temp['Iws'] = data3['Iws'] #temp.y.plot() plt.plot(temp.y) print(temp.head()) # initializing the fbprophet model and fitting the data model = Prophet() #model.add_regressor('PRES', standardize = "auto", mode='additive') model.add_regressor('PRES') #model.add_regressor('DEWP') #model.add_regressor('TEMP') #model.add_regressor('IWS') model.fit(temp) temp_pres = data3[['datep', 'PRES']] temp_pres.columns = ['ds', 'y'] # initializing the fbprophet model and fitting the data model = Prophet() model.fit(temp_pres) #creating a separate dataframe for predicted values
def make_model(self): self.model = Prophet()
crv.Show_prediction_by_day(future_dates_reported,' Bayesian Interpolation predictions for Greece',predict_bayesian_gr,days_in_future) daily_world_cases = np.array(daily_world_cases) daily_world_cases.shape = (len(daily_world_cases),1) print(daily_world_cases.shape) Dates_reported = np.array(Dates_reported) Dates_reported.shape = (len(Dates_reported),1) print(Dates_reported.shape) World_cases_perday = np.concatenate((Dates_reported,daily_world_cases), axis=1) print((World_cases_perday.shape)) World_cases_perday = pd.DataFrame(data=World_cases_perday) World_cases_perday.columns = ['ds', 'y'] print(World_cases_perday) ph = Prophet(n_changepoints=41, changepoint_prior_scale=1 , interval_width=1, daily_seasonality=True, yearly_seasonality=True, seasonality_mode='additive',seasonality_prior_scale=10) ph.fit(World_cases_perday) World_cases_perday.tail() future_prediction = ph.make_future_dataframe(periods=10) future_prediction.tail(10) forecast = ph.predict(future_prediction) forecast[['ds', 'yhat', 'yhat_lower', 'yhat_upper']].tail(10) y = np.array(forecast['yhat'])[-10:] y_l = np.array(forecast['yhat_lower'])[-10:] y_u = np.array(forecast['yhat_upper'])[-10:] for i in range(len(y)): y[i] = int (y[i]) y_l[i] = int (y_l[i])
def test_conditional_custom_seasonality(self): m = Prophet(weekly_seasonality=False, yearly_seasonality=False) m.add_seasonality(name='conditional_weekly', period=7, fourier_order=3, prior_scale=2., condition_name='is_conditional_week') m.add_seasonality(name='normal_monthly', period=30.5, fourier_order=5, prior_scale=2.) df = DATA.copy() with self.assertRaises(ValueError): # Require all conditions names in df m.fit(df) df['is_conditional_week'] = [0] * 255 + [2] * 255 with self.assertRaises(ValueError): # Require boolean compatible values m.fit(df) df['is_conditional_week'] = [0] * 255 + [1] * 255 m.fit(df) self.assertEqual( m.seasonalities['conditional_weekly'], { 'period': 7, 'fourier_order': 3, 'prior_scale': 2., 'mode': 'additive', 'condition_name': 'is_conditional_week' }, ) self.assertIsNone(m.seasonalities['normal_monthly']['condition_name']) seasonal_features, prior_scales, component_cols, modes = ( m.make_all_seasonality_features(m.history)) # Confirm that only values without is_conditional_week has non zero entries conditional_weekly_columns = seasonal_features.columns[ seasonal_features.columns.str.startswith('conditional_weekly')] self.assertTrue( np.array_equal( (seasonal_features[conditional_weekly_columns] != 0).any( axis=1).values, df['is_conditional_week'].values))
def multi_input(): shop_Id = request.form['shop_ids'] shopID = [int(x) for x in shop_Id.split(',')] data = pd.read_csv( 'C:/weekly sales and labour cost for all shops 2013 to 20177.csv', index_col='start_date', parse_dates=True) # shopID = input("Enter your shop id") shopID1 = list(shopID) for j in shopID1: data2 = data[[ 'sales_id', 'shop_id', 'week_no', 'sales_amount', 'item_sold', 'transactions', 'total_tax', 'sales_status' ]] df1 = data2[data2.shop_id == j] # input №1 df2 = df1[df1.sales_status != 0] df2.week_no.isnull().values.any() nulldetect = df1.week_no.isnull() nulldetect[nulldetect == True].index df2.week_no.loc[nulldetect == True] = 54 df2['week_no'] = df2.week_no - 2 if len(df2.week_no) > 51: dff = df2[['sales_amount']] data3 = dff.reset_index() data4 = data3 data5 = data4.rename(columns={ 'start_date': 'ds', 'sales_amount': 'y' }) data5.set_index('ds') # y.plot() data5['y'] = np.log(data5['y']) data5 = data5.replace([np.inf, -np.inf], np.nan).fillna(0) data5.set_index('ds') model = Prophet() model.fit(data5) future = model.make_future_dataframe(periods=52, freq='w') forecast = model.predict(future) data5.set_index('ds', inplace=True) forecast.set_index('ds', inplace=True) viz_df = dff.join(forecast[['yhat', 'yhat_lower', 'yhat_upper']], how='outer') viz_df['yhat_rescaled'] = np.exp(viz_df['yhat']) dff.index = pd.to_datetime( dff.index) # make sure our index as a datetime object connect_date = dff.index[-2] # select the 2nd to last date mask = (forecast.index > connect_date) predict_df = forecast.loc[mask] viz_df = dff.join(predict_df[['yhat', 'yhat_lower', 'yhat_upper']], how='outer') viz_df['yhat_scaled'] = np.exp(viz_df['yhat']) ii = len(dff.sales_amount) - 1 viz_df.yhat_scaled[ii:] predicted_future_sales = pd.DataFrame(viz_df.yhat_scaled[ii:]) predicted_future_sales1 = predicted_future_sales.rename( columns={'yhat_scaled': 'future_sales'}) predicted_future_sales2 = predicted_future_sales1.reset_index() week_no = predicted_future_sales2['index'].dt.week future_sales = predicted_future_sales2['future_sales'] future_sales1 = round(future_sales, 2) start_date = predicted_future_sales2['index'] predict_data = { 'future_sales': future_sales1, 'week_no': week_no, 'start_date': start_date } predict_data1 = pd.DataFrame(predict_data) #weekNO = int(input("Enter week number for shop %s" % j)) print("Predicted sales amount for shop #" + str(j) + ": " + str( float(predict_data1.future_sales[predict_data1.week_no == 23].values[0]))) # fig, ax1 = plt.subplots() plt.plot(viz_df.sales_amount, label='Actual Sales shop %s' % j) plt.plot(viz_df.yhat_scaled, label='Forecasted Sales %s' % j) # ax1.fill_between(viz_df.index, np.exp(viz_df['yhat_upper']), np.exp(viz_df['yhat_lower']), alpha=0.5, color='darkgray') plt.title('Sales (Orange) vs Sales Forecast (Black) for shop ' + str(j)) plt.ylabel('Dollar Sales') plt.xlabel('Dates') plt.legend() else: # dff4=df2.set_index('start_date') # df4=df3.week_no-2 a = df2[[ 'week_no', 'shop_id', 'sales_amount', 'transactions', 'total_tax', 'item_sold' ]] # start=[] # idd=list1 dates = pd.date_range(df2.index[-1], periods=52, freq='W-MON', format='%Y-%m-%d') dates1 = pd.DataFrame(dates) dates2 = pd.date_range(df2.index[0], periods=len(df2.index), freq='W-MON', format='%Y-%m-%d') # dates1.set_index() mean_week_item = [] for i in dates.week: mean_item_sold = a.item_sold[a.week_no == i].mean() mean_week_item.append(mean_item_sold) mean_week_item1 = pd.DataFrame(mean_week_item) trans_week_item = [] for i1 in dates.week: mean_trans_sold = a.transactions[a.week_no == i1].mean() trans_week_item.append(mean_trans_sold) sales_week = [] for ii1 in dates.week: mean_sales_sold = a.sales_amount[a.week_no == ii1].mean() sales_week.append(mean_sales_sold) dd = { 'date': dates, 'weeks_no': dates.week, 'sales': sales_week, 'mean_item': mean_week_item, 'mean_trans': trans_week_item } dd1 = pd.DataFrame(dd) data1 = pd.read_csv( 'C:/weekly sales and labour cost for all shops 2013 to 20177.csv' ) dff1 = data1[data1.sales_status != 0] nulldetect = dff1.week_no.isnull() dff1.week_no.loc[nulldetect == True] = 54 dff1['week_no'] = dff1.week_no - 2 X_Cluster = dff1[['shop_id', 'sales_amount']] from sklearn.cluster import KMeans kmeans_model = KMeans(n_clusters=3, random_state=8).fit(X_Cluster) y_hat = kmeans_model.labels_ # clusters cen = kmeans_model.cluster_centers_ y_hat1 = pd.DataFrame(y_hat) group_low_sales = X_Cluster[y_hat == 0] group_middle_sales = X_Cluster[y_hat == 2] group_high_sales = X_Cluster[y_hat == 1] fff = [] for j in X_Cluster.shop_id: dfdf = X_Cluster.sales_amount[X_Cluster.shop_id == j].mean() fff.append(dfdf) f3 = pd.DataFrame(X_Cluster.shop_id.drop_duplicates()) f4 = pd.DataFrame(fff) f5 = f4.drop_duplicates() f3['salle'] = f5.values # from sklearn.cluster import KMeans Xx2 = f3[['shop_id', 'salle']] kmeans_model2 = KMeans(n_clusters=3, random_state=8).fit(Xx2) y_hat2 = kmeans_model2.labels_ # clusters cen2 = kmeans_model2.cluster_centers_ group_middle_sales2 = Xx2[y_hat2 == 0] group_high_sales2 = Xx2[y_hat2 == 2] group_low_sales2 = Xx2[y_hat2 == 1] # cx=int(group_low_sales.sales_amount[group_low_sales.shop_id==uu].values.mean()) # trt=group_low_sales[group_low_sales.sales_amount>cx-3000] # trt2=trt[trt.sales_amount<cx+3000] nullweeks = dd1.weeks_no[dd1.mean_trans.isnull() == True] q = int(a.shop_id.mean()) if (group_low_sales2.shop_id.values == q).any() == True: cx = int(group_low_sales.sales_amount[group_low_sales.shop_id == q].values.mean()) trt = group_low_sales[group_low_sales.sales_amount > cx - 3000] trt2 = trt[trt.sales_amount < cx + 3000] valid_cls = dff1[[ 'sales_amount', 'item_sold', 'transactions', 'week_no' ]].loc[trt2.index.values] #print("Cluster of shop %s is low sales" % q) # print("Average sales per week of shop %s is" %uu,cx) elif (group_middle_sales2.shop_id.values == q).any() == True: # valid_cls=dff1[['sales_amount','item_sold','transactions','week_no']].loc[group_middle_sales.shop_id.index.values] cx = int(group_middle_sales.sales_amount[ group_middle_sales.shop_id == q].values.mean()) trt = group_middle_sales[group_middle_sales.sales_amount > cx - 3000] trt2 = trt[trt.sales_amount < cx + 3000] valid_cls = dff1[[ 'sales_amount', 'item_sold', 'transactions', 'week_no' ]].loc[trt2.index.values] #print("Cluster of shop %s is average sales" % q) # print("Average sales per week of shop %s is " %uu,cx) elif (group_high_sales2.shop_id.values == q).any() == True: # valid_cls=dff1[['sales_amount','item_sold','transactions','week_no']].loc[group_high_sales.shop_id.index.values] cx = int(group_high_sales.sales_amount[group_high_sales.shop_id == q].values.mean()) trt = group_high_sales[group_high_sales.sales_amount > cx - 4000] trt2 = trt[trt.sales_amount < cx + 4000] valid_cls = dff1[[ 'sales_amount', 'item_sold', 'transactions', 'week_no' ]].loc[trt2.index.values] #print("Cluster of shop %s is high sales" % q) # print("Average sales per week of shop %s is" %uu,cx) # drr=valid_cls # if (group_low_sales2.shop_id.values==99).any()==True: # valid_cls=dff1[['sales_amount','item_sold','transactions','week_no']].loc[group_low_sales.shop_id.index.values] # elif (group_middle_sales2.shop_id.values==99).any()==True: # valid_cls=dff1[['sales_amount','item_sold','transactions','week_no']].loc[group_middle_sales.shop_id.index.values] # elif (group_high_sales2.shop_id.values==99).any()==True: # valid_cls=dff1[['sales_amount','item_sold','transactions','week_no']].loc[group_high_sales.shop_id.index.values] drr = valid_cls # dff1[['sales_amount','item_sold','transactions','week_no']].loc[trt2.index.values] itt = [] trr = [] sale = [] for i3 in nullweeks: item = drr.item_sold[drr.week_no == i3].mean() trans = drr.transactions[drr.week_no == i3].mean() salee = drr.sales_amount[drr.week_no == i3].mean() itt.append(item) trr.append(trans) sale.append(salee) df_insert = { 'sales_amountt': sale, 'ittem': itt, 'trans': trr, 'weeks_no': nullweeks } df_insert1 = pd.DataFrame(df_insert) # group_low_sales.shop_id.drop_duplicates().index.values # null=dd1.isnull() # dd1.isnull().loc[null==True] # for i4 in dates.week: # a.transactions[a.week_no==i1].mean() # trans_week_item.append(mean_trans_sold) forecastdf = dd1.fillna({ 'mean_item': df_insert1.ittem, 'mean_trans': df_insert1.trans, 'sales': df_insert1.sales_amountt }) # forecastdf # print("Average amount of transactions per week of shop %s is " %uu+str(int(forecastdf.mean_trans.mean()))+"\n") regr3 = linear_model.LinearRegression() X = forecastdf[['mean_item', 'mean_trans']] Y = forecastdf.sales regr3.fit(X, Y) y_predictionss = regr3.predict(X) y_predictionss1 = pd.DataFrame(y_predictionss) # dff1[['item_sold','transactions','week_no']].index#group_low_sales.shop_id.drop_duplicates().index # plt.figure(figsize=(19,6)) # from pylab import rcParams #plt.rcParams['figure.figsize'] = 15, 10 #from pylab import rcParams #rcParams['figure.figsize'] = (20, 10) plt.plot(dates2, df2.sales_amount, label="actual sales shop %s" % q) plt.plot(dates, y_predictionss1, label="predicted sales shop %s" % q) plt.title( 'Comparison actual and predicted sales for whole period of shops %s' % shopID1) plt.xlabel('Weeks') plt.ylabel('Sales amount') plt.legend() figg = plt.gcf() figg.set_size_inches(13, 7) # print(valid_cls) #mpld3.show() # L=ax1.legend() #get the legend # L.get_texts()[0].set_text('Actual Sales') #change the legend text for 1st plot # L.get_texts()[1].set_text('Forecasted Sales') #change the legend text for 2nd plot #plt.show() #img = BytesIO() #plt.savefig(img, format='png') #img.seek(0) #return send_file(img, mimetype='image/png') #figg=mpld3 graphh = mpld3.fig_to_html(figg) #mpld3.show(fig) #@app.route('/multi/') #def multi_input1(): return render_template('multinput.html', graphh=graphh, value2=shopID1)
'2019-01-02', '2019-01-03', '2019-01-13', '2019-04-29', '2019-04-30', '2019-05-01', '2019-05-03', '2019-05-04', '2019-06-22', '2020-01-02', '2020-01-03', '2020-01-04', '2020-01-11']), 'lower_window':0, 'upper_window':1, }) holidays = pd.concat((events, superholidays)) #Hyperparameters model = Prophet(holidays=holidays, yearly_seasonality=True, weekly_seasonality=True, daily_seasonality=False, seasonality_prior_scale=0.1, interval_width=0.95, holidays_prior_scale=10, changepoint_prior_scale=0.15) model.add_country_holidays(country_name='JP') model.fit(df_train) forecast = model.predict(df) forecast[['ds','yhat']].head() model.plot_components(forecast) fig, ax = plt.subplots(figsize=(15,5)) ax.plot(df_train['ds'], df_train['y'], c='grey', marker='o', ms=3, linestyle='-', label='Train')
scores = {} train = train[['region', 'date', 'mortality_rate']] train.columns = ['region', 'ds', 'y'] #train.y = np.log( train.y ) test = test[['Id', 'region', 'date']] test.columns = ['Id', 'region', 'ds'] for r in sorted(train.region.unique()): train_regions[r] = train[train.region == r].copy() test_regions[r] = test[test.region == r].reset_index(drop=True) print r, len(train_regions[r]), len(test_regions[r]) for r in train_regions: prophets[r] = Prophet() prophets[r].fit(train_regions[r]) predictions[r] = prophets[r].predict(test_regions[r]) predictions[r]['mortality_rate'] = predictions[r].yhat for r in train_regions: print predictions[r].head() prophets[r].plot(predictions[r]) #prophets[r].plot_components( predictions[r] ) plt.show() submissions = [] for r in predictions: tmp = predictions[r][['Id', 'mortality_rate']] submissions.append(tmp)
class ProphetProfit: def __init__(self, engine, query, item): self.engine = engine self.query = query self.item = item self.df = None self.data = None self.m = None self.lmbda = None self.forecast = None self.ma = None self.item_list = None self.positive_trend = [] self.negative_trend = [] self.profit = pd.DataFrame() def sql_call(self): self.df = pd.read_sql(self.query, self.engine) self.df = self.df.sort_values(by='when') def prophet_fit(self, periods=31): mask = self.df['name_enus'] == self.item self.data = self.df[mask][['when', 'priceavg']].rename(columns={ 'when': 'ds', 'priceavg': 'y' }) self.data['ds'] = pd.to_datetime(self.data['ds']) # remove outliers std = self.data['y'].std() * 1.5 mean = self.data['y'].mean() self.data = self.data[(self.data['y'] < mean + std) & (self.data['y'] > mean - std)] # box-cox transformation # yt, self.lmbda = stats.boxcox(self.data['y']) # self.data['y'] = yt # fit self.m = Prophet(n_changepoints=20) self.m.add_seasonality(period=30.4, fourier_order=5, name='monthly') self.m.fit(self.data) future = self.m.make_future_dataframe(periods) self.forecast = self.m.predict(future) # create moving average colunmn # self.forecast['yhat'] = inv_boxcox(self.forecast['yhat'], self.lmbda) # self.data['y'] = inv_boxcox(self.data['y'], self.lmbda) self.ma = pd.concat([ self.data['y'].reset_index(drop=True), self.forecast[['ds', 'yhat', 'trend']] ], axis=1) self.ma['7day'] = self.ma['trend'].rolling(7).mean() self.ma.loc[(self.ma['trend'] > self.ma['7day']), 'trend_pos'] = 1 self.ma.loc[(self.ma['trend'] < self.ma['7day']), 'trend_pos'] = -1 def plot(self): self.sql_call() self.prophet_fit() fig1 = self.m.plot(self.forecast) fig2 = self.m.plot_components(self.forecast) plt.show() def make_lists(self, buy_date): self.sql_call() self.item_list = self.df.groupby('name_enus').mean().sort_values( 'quantityavg')[::-1] buy_date = np.datetime64( datetime.datetime.strptime(buy_date, '%Y-%m-%d').date()) for item in self.item_list.index: self.item = item if self.profit.shape[1] < 10: self.prophet_fit() if self.ma['trend_pos'].iloc[-31:-39:-1].sum() > 1: try: buy = self.data[self.data['ds'] == buy_date]['y'].values[0] except: buy = self.data[self.data['ds'] == ( buy_date - np.timedelta64(1, 'D'))]['y'].values[0] profit_temp = self.forecast[ self.forecast['ds'] > buy_date][[ 'ds', 'yhat' ]].reset_index(drop=True) profit_temp['buy'] = buy profit_temp[ item] = profit_temp['yhat'] - profit_temp['buy'] self.profit = pd.concat([self.profit, profit_temp[item]], axis=1) elif self.ma['trend_pos'].iloc[-31:-39:-1].sum() < -1: self.negative_trend.append(self.item) else: pickle.dump(self.profit, open('../data/profit_df.pkl', 'wb')) break def cross_val(self): df_cv = cross_validation(self.m, initial='62 days', period='1 days', horizon='7 days') # for col in ['yhat', 'yhat_lower', 'yhat_upper', 'y']: # df_cv[col] = inv_boxcox(df_cv[col], lmbda) print(df_cv.sort_values('ds').tail()) df_p = performance_metrics(df_cv) print(df_p) def mabp_random(self): df = pd.read_pickle('../data/profit_df.pkl') df.fillna(df.mean()) scaler = RobustScaler().fit(df) df = scaler.transform(df) N = df.shape[0] d = df.shape[1] selected = [] total_reward = 0 total_profit = 0 for n in range(0, N): item = random.randrange(d) selected.append(item) reward = df[n, item] profit = scaler.inverse_tranform(df)[n, item] total_reward = total_reward + reward total_profit = total_profit + profit return pd.Series(selected).value_counts(normalize=True) def mapb_ucb(self): df = pd.read_pickle('../data/profit_df.pkl') df.fillna(df.mean()) scaler = RobustScaler().fit(df) df = scaler.transform(df) N = df.shape[0] d = df.shape[1] selected = [] numbers_of_selections = [0] * d sums_of_reward = [0] * d total_reward = 0 total_profit = 0 for n in range(0, N): item = 0 max_upper_bound = 0 for i in range(0, d): if (numbers_of_selections[i] > 0): average_reward = sums_of_reward[i] / numbers_of_selections[ i] delta_i = math.sqrt(2 * math.log(n + 1) / numbers_of_selections[i]) upper_bound = average_reward + delta_i else: upper_bound = 1e400 if upper_bound > max_upper_bound: max_upper_bound = upper_bound item = i selected.append(item) numbers_of_selections[item] += 1 reward = df[n, item] profit = scaler.inverse_tranform(df)[n, item] sums_of_reward[item] += reward total_reward += reward total_profit += profit return pd.Series(selected).value_counts(normalize=True)
# plot the avocado prices vs. regions for organic avocados organic = sns.catplot('AveragePrice', 'region', data=df[df['type'] == 'organic'], hue='year', height=20) # # TASK 4: PREPARE THE DATA BEFORE APPLYING FACEBOOK PROPHET TOOL df_sample = df[['Date', 'AveragePrice']] df_sample df_sample = df_sample.rename(columns={'Date': 'ds', 'AveragePrice': 'y'}) df_sample # # TASK 5: DEVELOP MODEL AND MAKE PREDICTIONS - PART A m = Prophet() m.fit(df_sample) # Forcasting into the future future = m.make_future_dataframe(periods=365) forecast = m.predict(future) forecast figure = m.plot(forecast, xlabel='Date', ylabel='Price') figure2 = m.plot_components(forecast) # # TASK 6: DEVELOP MODEL AND MAKE PREDICTIONS (REGION SPECIFIC) - PART B # Select specific region df_r1 = df[df['region'] == 'West'] df_r2 = df[df['region'] == 'Chicago']
import numpy as np import pandas as pd import matplotlib.pyplot as plt from pandas_datareader import DataReader import datetime as dt from fbprophet import Prophet ticker = 'AAPL' num_of_years = 20 start = dt.datetime.now() - dt.timedelta(int(365.25 * num_of_years)) now = dt.datetime.now() data = DataReader(ticker, 'yahoo', start, now) data = data.reset_index() data = data[["Date","Close"]] data = data.rename(columns = {"Date":"ds","Close":"y"}) m = Prophet(daily_seasonality = True) m.fit(data) future = m.make_future_dataframe(periods=30) prediction = m.predict(future) m.plot(prediction) plt.title(f"Prediction of the {ticker}'s Stock Price using the Prophet") plt.xlabel("Date") plt.ylabel("Close Price") plt.show()
def test_holidays(self): holidays = pd.DataFrame({ 'ds': pd.to_datetime(['2016-12-25']), 'holiday': ['xmas'], 'lower_window': [-1], 'upper_window': [0], }) model = Prophet(holidays=holidays) df = pd.DataFrame({'ds': pd.date_range('2016-12-20', '2016-12-31')}) feats, priors, names = model.make_holiday_features( df['ds'], model.holidays) # 11 columns generated even though only 8 overlap self.assertEqual(feats.shape, (df.shape[0], 2)) self.assertEqual((feats.sum(0) - np.array([1.0, 1.0])).sum(), 0) self.assertEqual(priors, [10., 10.]) # Default prior self.assertEqual(names, ['xmas']) holidays = pd.DataFrame({ 'ds': pd.to_datetime(['2016-12-25']), 'holiday': ['xmas'], 'lower_window': [-1], 'upper_window': [10], }) m = Prophet(holidays=holidays) feats, priors, names = m.make_holiday_features(df['ds'], m.holidays) # 12 columns generated even though only 8 overlap self.assertEqual(feats.shape, (df.shape[0], 12)) self.assertEqual(priors, list(10. * np.ones(12))) self.assertEqual(names, ['xmas']) # Check prior specifications holidays = pd.DataFrame({ 'ds': pd.to_datetime(['2016-12-25', '2017-12-25']), 'holiday': ['xmas', 'xmas'], 'lower_window': [-1, -1], 'upper_window': [0, 0], 'prior_scale': [5., 5.], }) m = Prophet(holidays=holidays) feats, priors, names = m.make_holiday_features(df['ds'], m.holidays) self.assertEqual(priors, [5., 5.]) self.assertEqual(names, ['xmas']) # 2 different priors holidays2 = pd.DataFrame({ 'ds': pd.to_datetime(['2012-06-06', '2013-06-06']), 'holiday': ['seans-bday'] * 2, 'lower_window': [0] * 2, 'upper_window': [1] * 2, 'prior_scale': [8] * 2, }) holidays2 = pd.concat((holidays, holidays2), sort=True) m = Prophet(holidays=holidays2) feats, priors, names = m.make_holiday_features(df['ds'], m.holidays) pn = zip(priors, [s.split('_delim_')[0] for s in feats.columns]) for t in pn: self.assertIn(t, [(8., 'seans-bday'), (5., 'xmas')]) holidays2 = pd.DataFrame({ 'ds': pd.to_datetime(['2012-06-06', '2013-06-06']), 'holiday': ['seans-bday'] * 2, 'lower_window': [0] * 2, 'upper_window': [1] * 2, }) holidays2 = pd.concat((holidays, holidays2), sort=True) feats, priors, names = Prophet( holidays=holidays2, holidays_prior_scale=4).make_holiday_features(df['ds'], holidays2) self.assertEqual(set(priors), {4., 5.}) # Check incompatible priors holidays = pd.DataFrame({ 'ds': pd.to_datetime(['2016-12-25', '2016-12-27']), 'holiday': ['xmasish', 'xmasish'], 'lower_window': [-1, -1], 'upper_window': [0, 0], 'prior_scale': [5., 6.], }) with self.assertRaises(ValueError): Prophet(holidays=holidays).make_holiday_features( df['ds'], holidays)
def test_custom_seasonality(self): holidays = pd.DataFrame({ 'ds': pd.to_datetime(['2017-01-02']), 'holiday': ['special_day'], 'prior_scale': [4.], }) m = Prophet(holidays=holidays) m.add_seasonality(name='monthly', period=30, fourier_order=5, prior_scale=2.) self.assertEqual( m.seasonalities['monthly'], { 'period': 30, 'fourier_order': 5, 'prior_scale': 2., 'mode': 'additive', 'condition_name': None }, ) with self.assertRaises(ValueError): m.add_seasonality(name='special_day', period=30, fourier_order=5) with self.assertRaises(ValueError): m.add_seasonality(name='trend', period=30, fourier_order=5) m.add_seasonality(name='weekly', period=30, fourier_order=5) # Test fourier order <= 0 m = Prophet() with self.assertRaises(ValueError): m.add_seasonality(name='weekly', period=7, fourier_order=0) with self.assertRaises(ValueError): m.add_seasonality(name='weekly', period=7, fourier_order=-1) # Test priors m = Prophet( holidays=holidays, yearly_seasonality=False, seasonality_mode='multiplicative', ) m.add_seasonality(name='monthly', period=30, fourier_order=5, prior_scale=2., mode='additive') m.fit(DATA.copy()) self.assertEqual(m.seasonalities['monthly']['mode'], 'additive') self.assertEqual(m.seasonalities['weekly']['mode'], 'multiplicative') seasonal_features, prior_scales, component_cols, modes = ( m.make_all_seasonality_features(m.history)) self.assertEqual(sum(component_cols['monthly']), 10) self.assertEqual(sum(component_cols['special_day']), 1) self.assertEqual(sum(component_cols['weekly']), 6) self.assertEqual(sum(component_cols['additive_terms']), 10) self.assertEqual(sum(component_cols['multiplicative_terms']), 7) if seasonal_features.columns[0] == 'monthly_delim_1': true = [2.] * 10 + [10.] * 6 + [4.] self.assertEqual(sum(component_cols['monthly'][:10]), 10) self.assertEqual(sum(component_cols['weekly'][10:16]), 6) else: true = [10.] * 6 + [2.] * 10 + [4.] self.assertEqual(sum(component_cols['weekly'][:6]), 6) self.assertEqual(sum(component_cols['monthly'][6:16]), 10) self.assertEqual(prior_scales, true)
@author: batesc Takes a .csv of hourly feeder loading data and projects it out for max and min values for a year. To be ran in a time-series load flow analysis. """ import pandas as pd from fbprophet import Prophet # instantiate the model and set parameters model = Prophet(changepoint_prior_scale=0.01, interval_width=0.95, growth='linear', daily_seasonality=True, weekly_seasonality=False, yearly_seasonality=True, seasonality_mode='additive') history_pd = pd.read_csv("load.csv") # fit the model to historical data model.fit(history_pd) # projects over 8760 hours - 1 year future_pd = model.make_future_dataframe(periods=8760, freq='H', include_history=True) # predict over the dataset
def test_auto_daily_seasonality(self): # Should be enabled m = Prophet() self.assertEqual(m.daily_seasonality, 'auto') m.fit(DATA2) self.assertIn('daily', m.seasonalities) self.assertEqual( m.seasonalities['daily'], { 'period': 1, 'fourier_order': 4, 'prior_scale': 10., 'mode': 'additive', 'condition_name': None }, ) # Should be disabled due to too short history N = 430 train = DATA2.head(N) m = Prophet() m.fit(train) self.assertNotIn('daily', m.seasonalities) m = Prophet(daily_seasonality=True) m.fit(train) self.assertIn('daily', m.seasonalities) m = Prophet(daily_seasonality=7, seasonality_prior_scale=3.) m.fit(DATA2) self.assertEqual( m.seasonalities['daily'], { 'period': 1, 'fourier_order': 7, 'prior_scale': 3., 'mode': 'additive', 'condition_name': None }, ) m = Prophet() m.fit(DATA) self.assertNotIn('daily', m.seasonalities)
'2016-02-07', '2016-02-08', '2016-02-09', '2016-02-10', '2016-02-11', '2016-02-12', '2016-02-13', '2017-01-27', '2017-01-28', '2017-01-29', '2017-01-30', '2017-01-31', '2017-02-01', '2017-02-02' ]), 'lower_window': 0, 'upper_window': 0, }) holidays = pd.concat((playoffs, superbowls)) for i in user: df2 = generatedata(df, i) # plt.figure(1) # plt.plot(df2['ds'], df2['y']) # plt.grid(True) prophet = Prophet() #,yearly_seasonality=True holidays=holidays prophet.fit(df2) future = prophet.make_future_dataframe( periods=90) #, include_history=False df_cv = cross_validation(prophet, '90 days', initial='270 days', period='90 days') print(df_cv) plt.figure(1) plt.plot(df_cv['ds'], df_cv['y']) plt.plot(df_cv['ds'], df_cv['yhat']) plt.grid(True) plt.show() break forecast = prophet.predict(future)
def test_auto_weekly_seasonality(self): # Should be enabled N = 15 train = DATA.head(N) m = Prophet() self.assertEqual(m.weekly_seasonality, 'auto') m.fit(train) self.assertIn('weekly', m.seasonalities) self.assertEqual( m.seasonalities['weekly'], { 'period': 7, 'fourier_order': 3, 'prior_scale': 10., 'mode': 'additive', 'condition_name': None }, ) # Should be disabled due to too short history N = 9 train = DATA.head(N) m = Prophet() m.fit(train) self.assertNotIn('weekly', m.seasonalities) m = Prophet(weekly_seasonality=True) m.fit(train) self.assertIn('weekly', m.seasonalities) # Should be False due to weekly spacing train = DATA.iloc[::7, :] m = Prophet() m.fit(train) self.assertNotIn('weekly', m.seasonalities) m = Prophet(weekly_seasonality=2, seasonality_prior_scale=3.) m.fit(DATA) self.assertEqual( m.seasonalities['weekly'], { 'period': 7, 'fourier_order': 2, 'prior_scale': 3., 'mode': 'additive', 'condition_name': None }, )
# from the prophet documentation every variables should have specific names volume = volume.rename(columns={'date': 'ds', 'volume': 'y'}) volume.head() # In[ ]: # plot daily sales ax = volume.set_index('ds').plot(figsize=(12, 4), color=c) ax.set_ylabel('Daily volume of A.N') ax.set_xlabel('Date') plt.show() # In[ ]: # set the uncertainty interval to 95% (the Prophet default is 80%) my_model = Prophet(interval_width=0.95) my_model.fit(volume) # dataframe that extends into future 6 weeks future_dates = my_model.make_future_dataframe(periods=1) print("First day to forecast.") future_dates # predictions forecast = my_model.predict(future_dates) # preditions for last week forecast[['ds', 'yhat', 'yhat_lower', 'yhat_upper']] # In[ ]:
'lower_window': 0, 'upper_window': 1, }) else: extra_holiday = False #changepoints=['2020-03-17', '2020-05-11', '2020-07-28', '2008-09-14'] start_time = time.time() pro = Prophet(growth = param['growth'], #default = 'linear' yearly_seasonality = param['yearly_seasonality'], #default = True weekly_seasonality = param['weekly_seasonality'], #default = True daily_seasonality = param['daily_seasonality'], #default = True holidays = extra_holiday, #default = None changepoint_range = param['changepoint_range'], #default = 0.8 changepoint_prior_scale = param['changepoint_prior_scale'] #default = 0.5 ) pro.add_country_holidays(country_name = param['country_name']) pro.fit(Data) end_time = time.time() print("Fitting time:", end_time - start_time) future = pro.make_future_dataframe(periods = days2predict) forecast = pro.predict(future) fig1 = pro.plot(forecast)
def my_form_post(): shop_Id = request.form['shop_id'] week_No = request.form['week_no'] ShopID = int(shop_Id) WeekNo = int(week_No) # %matplotlib inline #plt.rcParams['figure.figsize'] = (20, 10) plt.style.use('ggplot') data_f = pd.read_csv( 'C:/weekly sales and labour cost for all shops 2013 to 20177.csv') data = pd.read_csv( 'C:/weekly sales and labour cost for all shops 2013 to 20177.csv', index_col='start_date', parse_dates=True) # shopID = input("Enter your shop id") shopID1 = ShopID # if shopID1<min(data.shop_id) or shopID1>max(data.shop_id): # print("Enter correct shop id number") # return select_model() WeekNo1 = WeekNo data2 = data[[ 'sales_id', 'shop_id', 'week_no', 'sales_amount', 'item_sold', 'transactions', 'total_tax', 'sales_status' ]] df1 = data2[data2.shop_id == shopID1] # input №1 df2 = df1[df1.sales_status != 0] df2.week_no.isnull().values.any() nulldetect = df1.week_no.isnull() nulldetect[nulldetect == True].index df2.week_no.loc[nulldetect == True] = 54 df2['week_no'] = df2.week_no - 2 if len(df2.week_no) > 51: dff = df2[['sales_amount']] data3 = dff.reset_index() data4 = data3 data5 = data4.rename(columns={'start_date': 'ds', 'sales_amount': 'y'}) data5.set_index('ds') data5 = data5.replace([np.inf, -np.inf], np.nan).fillna(0) # y.plot() data5['y'] = np.log(data5['y']) data5.set_index('ds') model = Prophet() model.fit(data5) future = model.make_future_dataframe(periods=52, freq='w') forecast = model.predict(future) data5.set_index('ds', inplace=True) forecast.set_index('ds', inplace=True) viz_df = dff.join(forecast[['yhat', 'yhat_lower', 'yhat_upper']], how='outer') viz_df['yhat_rescaled'] = np.exp(viz_df['yhat']) dff.index = pd.to_datetime( dff.index) # make sure our index as a datetime object connect_date = dff.index[-2] # select the 2nd to last date mask = (forecast.index > connect_date) predict_df = forecast.loc[mask] viz_df = dff.join(predict_df[['yhat', 'yhat_lower', 'yhat_upper']], how='outer') viz_df['yhat_scaled'] = np.exp(viz_df['yhat']) ii = len(dff.sales_amount) - 1 viz_df.yhat_scaled[ii:] predicted_future_sales = pd.DataFrame(viz_df.yhat_scaled[ii:]) predicted_future_sales1 = predicted_future_sales.rename( columns={'yhat_scaled': 'future_sales'}) predicted_future_sales2 = predicted_future_sales1.reset_index() week_no = predicted_future_sales2['index'].dt.week future_sales = predicted_future_sales2['future_sales'] future_sales1 = round(future_sales, 2) start_date = predicted_future_sales2['index'] predict_data = { 'future_sales': future_sales1, 'week_no': week_no, 'start_date': start_date } predict_data1 = pd.DataFrame(predict_data) predict_data2 = predict_data1.set_index('start_date') frames = [df2.sales_amount, predict_data2.future_sales] join = pd.concat(frames) detrend_sdata = signal.detrend(join) trend = join - detrend_sdata p2 = predict_data1.set_index('start_date') r = [] for jj in pd.DataFrame( df2.index.year.values).drop_duplicates().index.values: sale_year = df2.sales_amount[str( int(pd.DataFrame( df2.index.year).drop_duplicates().loc[jj]))].mean() r.append(sale_year) years = pd.DataFrame( df2.index.year).drop_duplicates().start_date.values holday = [] for t in years[0:len(years) - 1]: h = df2.sales_amount[df2.week_no >= 50][str(t)].mean( ) + df2.sales_amount[df2.week_no <= 3][str(int(t) + 1)].mean() holday.append(h / 2) year_last = p2.future_sales[p2.week_no >= 50][str(years[-1])].mean( ) + p2.future_sales[p2.week_no <= 3].mean() # 2018 holday.append(year_last / 2) N = len(r) Holiday_Means = holday All_Year_Means = r ind = np.arange(N) avg_sale = round(df2.sales_amount.mean(), 2) maxSale = round(max(df2.sales_amount), 2) minSale = round(min(df2.sales_amount), 2) itemTrans = round((df2.item_sold / df2.transactions).mean(), 2) fig, ax1 = plt.subplots(figsize=(7, 4)) ax1.plot(viz_df.sales_amount) ax1.plot(viz_df.yhat_scaled, color='green') ax1.plot(join.index, trend, color='blue', alpha=0.5, label='Trend') #ax1.plot(join.index, trend, color='blue', alpha=0.5, label='Trend') #ax1.fill_between(viz_df.index, np.exp(viz_df['yhat_upper']), np.exp(viz_df['yhat_lower']), alpha=0.5, #color='darkgray') ax1.set_title('Sales (Orange) vs Sales Forecast (Green) for shop ' + str(shopID1)) ax1.set_ylabel('Sales amount') ax1.set_xlabel('Dates') L = ax1.legend() # get the legend L.get_texts()[0].set_text( 'Actual Sales') # change the legend text for 1st plot L.get_texts()[1].set_text( 'Forecasted Sales') # change the legend text for 2nd plot graph = mpld3.fig_to_html(fig) fig, ax2 = plt.subplots(figsize=(7, 4)) bar_width = 0.4 opacity = 0.8 bar1 = ax2.bar(ind, Holiday_Means, bar_width, opacity, label='Holidays') bar2 = ax2.bar(ind + bar_width, All_Year_Means, bar_width, opacity, label='Avg sales per year') ticks = pd.DataFrame( df2.index.year).drop_duplicates().start_date.values ax2.set_ylabel('Sales_amount') ax2.set_title( 'Holiday sales (Xmas & NY) vs Average sales per year (shop #%s)' % shopID1) plt.xticks(ind + 0.25, ticks) ax2.legend() graph1 = mpld3.fig_to_html(fig) f_sale = str( float(predict_data1.future_sales[predict_data1.week_no == WeekNo1].values[0])) n_week = WeekNo1 id_shop = shopID1 sale_mean = avg_sale max_sale = maxSale min_sale = minSale item_trans = itemTrans else: a = df2[[ 'sales_amount', 'shop_id', 'week_no', 'transactions', 'item_sold' ]] y = a.iloc[:, 0] x = a.iloc[:, 3:5] # print (df2) from sklearn import linear_model regr2 = linear_model.LinearRegression() X1 = x y1 = y regr2.fit(X1, y1) y_predictions = regr2.predict(X1) y_predictions1 = pd.DataFrame(y_predictions) d = {'actual sales': y, 'predicted sales': y_predictions1} d1 = np.array(d) dates = pd.date_range(y.index[-1], periods=52, freq='W-MON', format='%Y-%m-%d') dates1 = pd.DataFrame(dates) mean_week_item = [] for i in dates.week: mean_item_sold = a.item_sold[a.week_no == i].mean() mean_week_item.append(mean_item_sold) mean_week_item1 = pd.DataFrame(mean_week_item) trans_week_item = [] for i1 in dates.week: mean_trans_sold = a.transactions[a.week_no == i1].mean() trans_week_item.append(mean_trans_sold) sales_week = [] for ii1 in dates.week: mean_sales_sold = a.sales_amount[a.week_no == ii1].mean() sales_week.append(mean_sales_sold) dd = { 'date': dates, 'weeks_no': dates.week, 'sales': sales_week, 'mean_item': mean_week_item, 'mean_trans': trans_week_item } dd1 = pd.DataFrame(dd) dff1 = data_f[data_f.sales_status != 0] nulldetect = dff1.week_no.isnull() dff1.week_no.loc[nulldetect == True] = 54 dff1['week_no'] = dff1.week_no - 2 X_Cluster = dff1[['shop_id', 'sales_amount']] from sklearn.cluster import KMeans kmeans_model = KMeans(n_clusters=3, random_state=8).fit(X_Cluster) y_hat = kmeans_model.labels_ # clusters cen = kmeans_model.cluster_centers_ y_hat1 = pd.DataFrame(y_hat) group_low_sales = X_Cluster[y_hat == 0] group_middle_sales = X_Cluster[y_hat == 2] group_high_sales = X_Cluster[y_hat == 1] fff = [] for j in X_Cluster.shop_id: dfdf = X_Cluster.sales_amount[X_Cluster.shop_id == j].mean() fff.append(dfdf) f3 = pd.DataFrame(X_Cluster.shop_id.drop_duplicates()) f4 = pd.DataFrame(fff) f5 = f4.drop_duplicates() f3['salle'] = f5.values # from sklearn.cluster import KMeans Xx2 = f3[['shop_id', 'salle']] kmeans_model2 = KMeans(n_clusters=3, random_state=8).fit(Xx2) y_hat2 = kmeans_model2.labels_ # clusters cen2 = kmeans_model2.cluster_centers_ group_middle_sales2 = Xx2[y_hat2 == 0] group_high_sales2 = Xx2[y_hat2 == 2] group_low_sales2 = Xx2[y_hat2 == 1] nullweeks = dd1.weeks_no[dd1.mean_trans.isnull() == True] if (group_low_sales2.shop_id.values == shopID1).any() == True: cx = int(group_low_sales.sales_amount[group_low_sales.shop_id == shopID1].values.mean()) trt = group_low_sales[group_low_sales.sales_amount > cx - 3000] trt2 = trt[trt.sales_amount < cx + 3000] valid_cls = dff1[[ 'sales_amount', 'item_sold', 'transactions', 'week_no' ]].loc[trt2.index.values] #print("Cluster of shop %s is low sales" % shopID1) elif (group_middle_sales2.shop_id.values == shopID1).any() == True: # valid_cls=dff1[['sales_amount','item_sold','transactions','week_no']].loc[group_middle_sales.shop_id.index.values] cx = int(group_middle_sales.sales_amount[group_middle_sales.shop_id == shopID1].values.mean()) trt = group_middle_sales[group_middle_sales.sales_amount > cx - 3000] trt2 = trt[trt.sales_amount < cx + 3000] valid_cls = dff1[[ 'sales_amount', 'item_sold', 'transactions', 'week_no' ]].loc[trt2.index.values] #print("Cluster of shop %s is average sales" % shopID1) elif (group_high_sales2.shop_id.values == shopID1).any() == True: # valid_cls=dff1[['sales_amount','item_sold','transactions','week_no']].loc[group_high_sales.shop_id.index.values] cx = int(group_high_sales.sales_amount[group_high_sales.shop_id == shopID1].values.mean()) trt = group_high_sales[group_high_sales.sales_amount > cx - 4000] trt2 = trt[trt.sales_amount < cx + 4000] valid_cls = dff1[[ 'sales_amount', 'item_sold', 'transactions', 'week_no' ]].loc[trt2.index.values] #print("Cluster of shop %s is high sales" % shopID1) drr = valid_cls #print('Avg sales per week for whole period ', avg_sale = round(df2.sales_amount.mean(), 2) # avg sales per week for whole period # avg_items_week=round(df2.item_sold[df2.week_no==17].mean(),2)# avg items for input week #print('Avg items sold per week for whole period ', #round(df2.item_sold.mean(), 2) # avg items per week for whole period # avg_trans_week=round(df2.transactions[df2.week_no==17].mean(),2)# avg trans for input week #print('Avg trans per week for whole period ', #round(df2.transactions.mean(), 2) # avg trans per week for whole period # avg_item_per_trans=round((df2.item_sold[df2.week_no==17]/df2.transactions[df2.week_no==17]).mean(),2)#items per transactions w itemTrans = round((df2.item_sold / df2.transactions).mean(), 2) # max_w=round(max(df2.sales_amount[df2.week_no==17]),2) # min_w=round(min(df2.sales_amount[df2.week_no==17]),2) maxSale = round(max(df2.sales_amount), 2) minSale = round(min(df2.sales_amount), 2) # worst=df2.week_no[df2.sales_amount>min(df2.sales_amount)] #df2[['week_no', 'sales_amount']][(df2.sales_amount >= min(df2.sales_amount)) & (df2.sales_amount <= min(df2.sales_amount) + 1500)]) #df2[['week_no', 'sales_amount']][(df2.sales_amount <= max(df2.sales_amount)) & (df2.sales_amount >= max(df2.sales_amount) - 3000)]) #print('Price of trans ', round((df2.sales_amount / df2.transactions).mean(), 2)) #print('Price of item ', round((df2.sales_amount / df2.item_sold).mean(), 2)) itt = [] trr = [] sale = [] for i3 in nullweeks: item = drr.item_sold[drr.week_no == i3].mean() trans = drr.transactions[drr.week_no == i3].mean() salee = drr.sales_amount[drr.week_no == i3].mean() itt.append(item) trr.append(trans) sale.append(salee) df_insert = { 'sales_amountt': sale, 'ittem': itt, 'trans': trr, 'weeks_no': nullweeks } df_insert1 = pd.DataFrame(df_insert) forecastdf = dd1.fillna({ 'mean_item': df_insert1.ittem, 'mean_trans': df_insert1.trans, 'sales': df_insert1.sales_amountt }) regr3 = linear_model.LinearRegression() X = forecastdf[['mean_item', 'mean_trans']] Y = forecastdf.sales regr3.fit(X, Y) y_predictionss = regr3.predict(X) y_predictionss1 = pd.DataFrame(y_predictionss) forecastdf['future_sales1'] = y_predictionss1.values f1 = forecastdf.set_index('date') frames1 = [df2.sales_amount, f1.future_sales1] join1 = pd.concat(frames1) detrend_sdata1 = signal.detrend(join1) trend1 = join1 - detrend_sdata1 r1 = [] for jj1 in pd.DataFrame( df2.index.year.values).drop_duplicates().index.values: sale_year1 = df2.sales_amount[str( int(pd.DataFrame( df2.index.year).drop_duplicates().loc[jj1]))].mean() r1.append(sale_year1) years1 = pd.DataFrame( df2.index.year).drop_duplicates().start_date.values holday1 = [] for t1 in years1[0:len(years1) - 1]: h1 = df2.sales_amount[df2.week_no >= 50][str(t1)].mean( ) + df2.sales_amount[df2.week_no <= 3][str(int(t1) + 1)].mean() holday1.append(h1 / 2) year_last1 = f1.future_sales1[f1.weeks_no >= 50][str(years1[-1])].mean( ) + f1.future_sales1[f1.weeks_no <= 3].mean() # 2018 holday1.append(year_last1 / 2) N1 = len(r1) Holiday_Means1 = holday1 All_Year_Means1 = r1 ind1 = np.arange(N1) f_sale = int( forecastdf.future_sales1[forecastdf.weeks_no == WeekNo1].values) n_week = WeekNo1 id_shop = shopID1 sale_mean = avg_sale max_sale = maxSale min_sale = minSale item_trans = itemTrans # print(y.index) fig3, ax3 = plt.subplots(figsize=(7, 4)) # dates = pd.date_range(y.index[0], periods=104, freq='W-MON',format='%Y-%m-%d') # plt.plot(y.index,y,color='blue',label="actual sales") ax3.plot(y.index, a.sales_amount, color='red', label="actual sales") ax3.plot(dates, y_predictionss1, color='green', label="forecasted sales") ax3.plot(join1.index, trend1, color='blue', alpha=0.5, label='Trend') ax3.set_title( 'Comparison actual and predicted sales for whole period of shop ' + str(shopID1) + '\n') ax3.set_xlabel('Weeks') ax3.set_ylabel('Sales amount') ax3.legend() graph = mpld3.fig_to_html(fig3) fig4, ax4 = plt.subplots(figsize=(7, 4)) bar_width1 = 0.4 opacity1 = 0.8 ax4.bar(ind1, Holiday_Means1, bar_width1, opacity1, label='Holidays') ax4.bar(ind1 + bar_width1, All_Year_Means1, bar_width1, opacity1, label='Avg sales per year') ax4.set_ylabel('Sales_amount') ax4.set_title( 'Holiday sales (Xmas & NY) vs Average sales per year (shop #%s)' % shopID1) plt.xticks( ind1 + 0.25, (pd.DataFrame(df2.index.year).drop_duplicates().start_date.values)) ax4.legend() graph1 = mpld3.fig_to_html(fig4) return render_template('index.html', graph1=graph1, graph=graph, value6=itemTrans, value5=min_sale, value4=max_sale, value3=sale_mean, value2=id_shop, value1=n_week, value=f_sale)