def save(self): fbmodel = self.fbmodel self.fbmodel = serialize.model_to_json(fbmodel) # Save the model to disk try: if not os.path.exists('./config/models'): os.makedirs('./config/models') with open(f'./config/models/{self.id()}', 'wb+') as f: pickle.dump(self, f) finally: self.fbmodel = fbmodel
def _discover_model(self): df_train, self.max_cap = self._transform_features(self.log) # print(df_train) days = df_train.ds.max() - df_train.iloc[int(len(df_train) * 0.8)].ds periods = days * 0.5 param_grid = { 'changepoint_prior_scale': [0.001, 0.01, 0.1, 0.5], 'seasonality_prior_scale': [0.01, 0.1, 1.0, 10.0] } # Generate all combinations of parameters all_params = [ dict(zip(param_grid.keys(), v)) for v in itertools.product(*param_grid.values()) ] rmses = [] # Store the RMSEs for each params here # Use cross validation to evaluate all parameters for params in all_params: params['growth'] = 'logistic' try: m = Prophet(**params).fit( df_train) # Fit model with given params df_cv = cross_validation(m, horizon=days, period=periods, parallel="processes") df_p = performance_metrics(df_cv, rolling_window=1) rmses.append(df_p['rmse'].values[0]) except: # df_train.to_csv('df_train_fail.csv') traceback.print_exc() pass # Find the best parameters tuning_results = pd.DataFrame(all_params) tuning_results['rmse'] = rmses best_params = all_params[np.argmin(rmses)] m = Prophet(**best_params).fit(df_train) self.temp_output with open( os.path.join(self.temp_output, self.parms['file'].split('.')[0] + '_prf.json'), 'w') as fout: json.dump(model_to_json(m), fout, cls=NumpyEncoder) # Save model return {'loss': tuning_results.iloc[np.argmin(rmses)].rmse}
def test_simple_serialize(self): m = Prophet() days = 30 N = DATA.shape[0] df = DATA.head(N - days) m.fit(df) future = m.make_future_dataframe(2, include_history=False) fcst = m.predict(future) model_str = model_to_json(m) # Make sure json doesn't get too large in the future self.assertTrue(len(model_str) < 200000) z = json.loads(model_str) self.assertEqual(z['__fbprophet_version'], '1.0') m2 = model_from_json(model_str) # Check that m and m2 are equal self.assertEqual(m.__dict__.keys(), m2.__dict__.keys()) for k, v in m.__dict__.items(): if k in ['stan_fit', 'stan_backend']: continue if k == 'params': self.assertEqual(v.keys(), m2.params.keys()) for kk, vv in v.items(): self.assertTrue(np.array_equal(vv, m2.params[kk])) elif k in PD_SERIES and v is not None: self.assertTrue(v.equals(m2.__dict__[k])) elif k in PD_DATAFRAME and v is not None: pd.testing.assert_frame_equal(v, m2.__dict__[k]) elif k == 'changepoints_t': self.assertTrue(np.array_equal(v, m.__dict__[k])) else: self.assertEqual(v, m2.__dict__[k]) self.assertTrue(m2.stan_fit is None) self.assertTrue(m2.stan_backend is None) # Check that m2 makes the same forecast future2 = m2.make_future_dataframe(2, include_history=False) fcst2 = m2.predict(future2) self.assertTrue( np.array_equal(fcst['yhat'].values, fcst2['yhat'].values))
def train_prophet(train_df, model_location_prophet): c.start() X = train_df.drop(['ds', 'y'], axis=1) model = Prophet(growth='linear', seasonality_mode='multiplicative', weekly_seasonality=True) model.add_seasonality(name='monthly', period=30.5, fourier_order=5) model.add_country_holidays(country_name='US') for col in X: model.add_regressor(col) # Fit and predict model.fit(train_df) with open(model_location_prophet, 'w') as file_out: json.dump(model_to_json(model), file_out) print(f'Prophet model fitted: {c.stop()} seconds') return model
def test_full_serialize(self): # Construct a model with all attributes holidays = pd.DataFrame({ 'ds': pd.to_datetime(['2012-06-06', '2013-06-06']), 'holiday': ['seans-bday'] * 2, 'lower_window': [0] * 2, 'upper_window': [1] * 2, }) # Test with holidays and country_holidays m = Prophet( holidays=holidays, seasonality_mode='multiplicative', changepoints=['2012-07-01', '2012-10-01', '2013-01-01'], ) m.add_country_holidays(country_name='US') m.add_seasonality(name='conditional_weekly', period=7, fourier_order=3, prior_scale=2., condition_name='is_conditional_week') m.add_seasonality(name='normal_monthly', period=30.5, fourier_order=5, prior_scale=2.) df = DATA.copy() df['is_conditional_week'] = [0] * 255 + [1] * 255 m.add_regressor('binary_feature', prior_scale=0.2) m.add_regressor('numeric_feature', prior_scale=0.5) m.add_regressor('numeric_feature2', prior_scale=0.5, mode='multiplicative') m.add_regressor('binary_feature2', standardize=True) df['binary_feature'] = ['0'] * 255 + ['1'] * 255 df['numeric_feature'] = range(510) df['numeric_feature2'] = range(510) df['binary_feature2'] = [1] * 100 + [0] * 410 train = df.head(400) test = df.tail(100) m.fit(train) future = m.make_future_dataframe(periods=100, include_history=False) fcst = m.predict(test) # Serialize! m2 = model_from_json(model_to_json(m)) # Check that m and m2 are equal self.assertEqual(m.__dict__.keys(), m2.__dict__.keys()) for k, v in m.__dict__.items(): if k in ['stan_fit', 'stan_backend']: continue if k == 'params': self.assertEqual(v.keys(), m2.params.keys()) for kk, vv in v.items(): self.assertTrue(np.array_equal(vv, m2.params[kk])) elif k in PD_SERIES and v is not None: self.assertTrue(v.equals(m2.__dict__[k])) elif k in PD_DATAFRAME and v is not None: pd.testing.assert_frame_equal(v, m2.__dict__[k]) elif k == 'changepoints_t': self.assertTrue(np.array_equal(v, m.__dict__[k])) else: self.assertEqual(v, m2.__dict__[k]) self.assertTrue(m2.stan_fit is None) self.assertTrue(m2.stan_backend is None) # Check that m2 makes the same forecast future = m2.make_future_dataframe(periods=100, include_history=False) fcst2 = m2.predict(test) self.assertTrue( np.array_equal(fcst['yhat'].values, fcst2['yhat'].values))
forecast = m.predict(future) print(forecast[['ds', 'yhat', 'yhat_lower', 'yhat_upper']].tail()) plt.figure(figsize=(15, 10)) fig1 = m.plot(forecast) plt.show(fig1) a = plot_yearly(m) pm25_UP012_test['ds'] = [str(x) for x in pm25_UP012_test['ds']] forecast['ds'] = [str(x) for x in forecast['ds']] pm25_UP012['ds'] = [str(x) for x in pm25_UP012['ds']] y = [] yhat = [] for i in [x for x in pm25_UP012['ds']]: # print(i) if i in [x for x in forecast['ds']]: print(i) y.append(pm25_UP012.iloc[[x for x in pm25_UP012['ds']].index(i), -1]) yhat.append(forecast.iloc[[x for x in forecast['ds']].index(i), -1]) r2_score(y, yhat) with open('serialized_model.json', 'w') as fout: json.dump(model_to_json(m), fout) # Save model with open('serialized_model.json', 'r') as fin: m = model_from_json(json.load(fin)) # Load model
def save_model(model, path): with open(f"{path}/model.json", 'w') as fout: json.dump(model_to_json(model), fout) return f'saved model at {path}/model.json'
def model_train(): """ function to train model call load aavail dataframe split loaded data by country train models and save to local storage log output """ ## start timer for runtime time_start = time.time() #remove print('calling load aavail data') ## data ingestion df = load_aavail_data() #remove print('splitting dfs into individual country dfs') #split df into dfs per country df_individual_countries = split_preprocessed_df(df, top10countries) #remove print('now about to train models') #train and save model to models directory for each country for dfname, df in df_individual_countries.items(): #remove print('to train: ' + str(dfname)) print(df.head()) print('NaNs:') print(df.isna().sum()) model = Prophet() model.fit(df) modelname = 'model_' + str(dfname) + '.json' #remove print(str(modelname) + ' trained') with open(os.path.join((MODEL_DIR), modelname), 'w') as f: modeltosave = model_to_json(model) json.dump(modeltosave, f) print('saved ' + modelname) m, s = divmod(time.time() - time_start, 60) h, m = divmod(m, 60) runtime = "%03d:%02d:%02d" % (h, m, s) #remove print('updating train log') ## update the log file update_train_log( df['ds'].shape, #eval_test, runtime, MODEL_VERSION, MODEL_VERSION_NOTE, ) #remove print('updated train log')