d[name]['Prev Close'] = d[name]['y'].shift(1) #simplify prev close d[name]['ActR'] = abs(d[name]['HIGH']-d[name]['LOW']) #define actual trading range d[name]['TRHigh'] = abs(d[name]['HIGH']-d[name]['Prev Close']) #absolute value of diff prev C to H d[name]['TRLow'] = abs(d[name]['LOW']-d[name]['Prev Close']) #absolute value of diff prev C to L d[name]['True Range'] = d[name][['ActR','TRHigh','TRLow']].apply(max,axis=1) #define true range d[name]['ATR_E'] = d[name]['True Range'].ewm(span=ATR_Period).mean() #exponential ATR d[name]['MA_E'] = d[name]['y'].ewm(span=periods).mean() d[name]['UB']=d[name]['MA_E']+(d[name]['ATR_E']*sds) d[name]['LB']=d[name]['MA_E']-(d[name]['ATR_E']*sds) d[name]['STARCWidth']=d[name]['UB']-d[name]['LB'] d[name]['STARC%'] = ((d[name]['y'] - d[name]['LB'])/d[name]['STARCWidth'])*100 #build models and fit to data m[name] = fbprophet.Prophet( weekly_seasonality=False, yearly_seasonality=True, interval_width=0.8) m[name].fit(d[name]) #create new dataframes for predictions; still in transform scale z[name] = m[name].make_future_dataframe(periods=15, freq='D') z[name] = m[name].predict(z[name]) #move back to original scale k[name] = z[name][['ds', 'yhat', 'yhat_lower', 'yhat_upper']] #k[name].loc[:,'yhat'] = np.exp(k[name]['yhat']) #k[name].loc[:, 'yhat_lower'] = np.exp(k[name]['yhat_lower']) #k[name].loc[:, 'yhat_upper'] = np.exp(k[name]['yhat_upper']) # k[name]['UB'] = d[name]['UB'] # k[name]['LB'] = d[name]['LB'] # k[name]['MA'] = d[name]['MA_E']
def forecasting(file_name): # reading data data_set = pd.read_csv(file_name + "_ticker.csv", header=None) book_file = pd.read_csv(file_name + "_book.csv", header=None) trades_file = pd.read_csv(file_name + "_trades.csv", header=None) data_set['price'] = (data_set[7] + data_set[8]) / 2 # target data_set = data_set.drop([7, 8], axis=1) # calculate important data from *_book and *_trades file order_volume = [] for i in range(0, len(book_file)): sum_of_orders = 0 for j in range(3, 151, 3): sum_of_orders = sum_of_orders + book_file.loc[i, j] order_volume.append(sum_of_orders) mapping = dict(enumerate(order_volume)) data_set['order_volume'] = data_set[1].map(mapping) turnover = [] for i in range(0, len(trades_file)): sum_of_turnover = 0 for j in range(3, 481, 4): sum_of_turnover = sum_of_turnover + trades_file.loc[i, j] turnover.append(sum_of_turnover) mapping = dict(enumerate(turnover)) data_set['turnover'] = data_set[1].map(mapping) data_set['transactionـprice'] = data_set[1].map(mapping) # print(data_set) # change date format date_list = [] for i in range(0, len(data_set)): date_list.append( datetime.datetime.fromtimestamp(int( data_set[0][i])).strftime('%Y-%m-%d %H:%M')) date_sries = pd.Series(date_list, name='date') data_set[0] = date_sries # preprocessing # set NaN value for noisy data in pandas data_set.loc[(data_set['price'] == 0) & (data_set[0] == -1), 'price'] = None # split for train and test test_set = data_set[round(len(data_set) * (0.9)) + 1:len(data_set)] data_set = data_set[0:round(len(data_set) * (0.9))] # use statistic model for forecasting data_set = data_set.rename(columns={0: 'ds', 'price': 'y'}) A_prophet = fbprophet.Prophet() A_prophet.fit(data_set) A_forecast = A_prophet.make_future_dataframe(freq='H', periods=72) A_forecast = A_prophet.predict(A_forecast) A_prophet.plot(A_forecast, xlabel='Date', ylabel='Price') plt.title('prediction for ' + file_name) os.system('clear') # Evaluate prediction predicted_df = A_forecast.set_index('ds').join(test_set.set_index(0)) predicted_df = predicted_df.dropna() predicted_df = predicted_df[["yhat", "price"]] y_pred = predicted_df['yhat'].tolist() y_true = predicted_df['price'].tolist() print("======================================================") print("mean squar error for " + file_name + ":", mean_squared_error(y_true, y_pred)) print("======================================================\n") input( "press any key to present forecasting plot...\nand to continue close the figure." ) plt.plot(predicted_df.index, y_pred, color='red', label="predicted value") plt.plot(predicted_df.index, y_true, color='green', label="true value") plt.ylim((0, 150)) plt.legend(loc="upper left", bbox_to_anchor=[0, 1], ncol=2, shadow=True, title="Legend", fancybox=True) plt.show() os.system('clear')
def weekly_runoff_forecast(filename, wtd): # Import raw data def import_data(): raw_data_df = pd.read_excel( 'data/' + filename + '.xlsx', header=0) # creates a Pandas data frame for input value return raw_data_df # In[3]: raw_data_df = import_data() raw_data_df.head() # In[4]: raw_data_df['Date'] = pd.to_datetime(raw_data_df['Date']) for i in range(1, len(raw_data_df.columns)): raw_data_df[raw_data_df.columns[i]] = raw_data_df[ raw_data_df.columns[i]].fillna( raw_data_df[raw_data_df.columns[i]].mean()) data = pd.DataFrame() data['Date'] = raw_data_df["Date"] data['weekly runoff'] = raw_data_df["weekly runoff"] data = data.set_index(['Date']) # In[5]: data.head() # In[7]: data.isnull().sum() # In[8]: data.dropna().describe() #---------------Resampling------------------------------- # In[9]: monthly = data.resample('M').sum() monthly.plot(style=[':', '--', '-'], title='Monthly Trends') # In[10]: # yearly = data.resample('Y').sum() # yearly.plot(style=[':', '--', '-'],title='Yearly Trends') # # In[11]: # yearly.head() # In[12]: weekly = data.resample('W').sum() #weekly.plot(style=[':', '--', '-'],title='Weekly Trends') # In[13]: daily = data.resample('D').sum() #daily.rolling(30, center=True).sum().plot(style=[':', '--', '-'],title='Daily Trends') daily.head() #----------------------Scaling----------------------------- #Use MinMaxScaler to normalize to range from 0 to 1 values = daily['weekly runoff'].values.reshape(-1, 1) values = values.astype('float32') scaler = MinMaxScaler(feature_range=(0, 1)) scaled = scaler.fit_transform(values) # In[27]: scale = daily scale["weekly runoff"] = scaled scale.head() # In[28]: scale.shape # In[38]: #----------Making data set for Testing or Training------- def making_dataset(i=1): #Testing the future prediction if i == 0: #Taking data of last two years as testing data df1 = scale.iloc[6940:, :] #Training Data df2 = scale.iloc[:6940, :] df2.reset_index(inplace=True) # Prophet requires columns ds (Date) and y (value) df2 = df2.rename(columns={'Date': 'ds', 'weekly runoff': 'y'}) return df1, df2 else: #Predicting the future values after 2018 df2 = scale.iloc[:, :] df2.reset_index(inplace=True) # Prophet requires columns ds (Date) and y (value) df2 = df2.rename(columns={'Date': 'ds', 'weekly runoff': 'y'}) return df2, df2 # In[39]: df1, df2 = making_dataset(wtd) df2.head() # In[40]: import warnings warnings.simplefilter(action='ignore', category=FutureWarning) #----------------------Model(FbProphet)--------------------------------- # Make the prophet model and fit on the data df2_prophet = fbprophet.Prophet(changepoint_prior_scale=0.05) df2_prophet.fit(df2) path = 'trained/' + filename + '_weekly_runoff' joblib.dump(df2_prophet, path + '.pkl') warnings.resetwarnings() # Making future DataFrame def predicting_data(i=1): if i == 0: #For testing # Make a future dataframe for (2 Years) df2_forecast = df2_prophet.make_future_dataframe(periods=30 * 25, freq='D') # Make predictions df2_forecast = df2_prophet.predict(df2_forecast) df3 = df2_forecast[['ds', 'yhat']] df3.shape, df1.shape, df2.shape df4 = df3.iloc[6940:-20, :] else: #For Future prediction of 2019 # Make a future dataframe for 12 months df2_forecast = df2_prophet.make_future_dataframe( periods=30 * 12, freq='D', include_history=False) # Make predictions df2_forecast = df2_prophet.predict(df2_forecast) df3 = df2_forecast[['ds', 'yhat']] #df3.shape,df1.shape,df2.shape df4 = df3.iloc[:, :] return df4, df2_forecast # In[46]: df4, df2_forecast = predicting_data(wtd) ypred = df4.iloc[:, 1:] ytest = df1.iloc[:, :] ypred.shape, ytest.shape df4.tail() # In[52]: ypred = df4.iloc[:, 1:] ytest = df1.iloc[:, :] ypred.shape, ytest.shape # In[47]: from sklearn.metrics import mean_absolute_error if wtd == 0: print("mean_absolute_error=", mean_absolute_error(ytest, ypred)) # In[48]: df2_prophet.plot(df2_forecast, xlabel='Date', ylabel='weekly runoff') plt.title('simple test') # Plot the trends and patterns df2_prophet.plot_components(df2_forecast) df4.columns = ['Date', 'weekly runoff'] #Getting the vaues in original range values = df4['weekly runoff'].values.reshape(-1, 1) values = values.astype('float32') valu = scaler.inverse_transform(values) df4['weekly runoff'] = valu df4['weekly runoff'] = abs(df4['weekly runoff']) df4.to_csv('data/forecast/' + filename + '_weekly_runoff_forecast.csv', index=False) return df4
# During Q2 2017, Tesla [sold](https://en.wikipedia.org/wiki/Tesla,_Inc.#Production_and_sales) 22026 cars while GM [sold](http://gmauthority.com/blog/gm/general-motors-sales-numbers/) 725000. In Q3 2017, Tesla sold 26137 cars and GM sold 808000. In all of 2017, Tesla sold 103084 cars and GM sold 3002237. That means GM was valued less than Tesla in a year during which it sold 29 times more cars than Tesla! Interesting to say the least. # # Prophet Models # In[185]: import fbprophet # Prophet requires columns ds (Date) and y (value) gm = gm.rename(columns={'Date': 'ds', 'cap': 'y'}) # Put market cap in billions gm['y'] = gm['y'] / 1e9 # Make the prophet models and fit on the data # changepoint_prior_scale can be changed to achieve a better fit gm_prophet = fbprophet.Prophet(changepoint_prior_scale=0.05) gm_prophet.fit(gm) # Repeat for the tesla data tesla = tesla.rename(columns={'Date': 'ds', 'cap': 'y'}) tesla['y'] = tesla['y'] / 1e9 tesla_prophet = fbprophet.Prophet(changepoint_prior_scale=0.05, n_changepoints=10) tesla_prophet.fit(tesla) # ### Predictions for 2 Years # In[186]: # Make a future dataframe for 2 years gm_forecast = gm_prophet.make_future_dataframe(periods=365 * 2, freq='D')
def modeling_n_prediction(df, device, position): # Filter table with keyword from mobile devices and position less than equal to 10 data = df[(df['device'] == device) & (df['position'] <= position)] data['rank'] = data['position'].astype(int) # Order the tables data = data.sort_values(["keyword", "date", "rank"], ascending=(True, True, True)) # Group keywords, date and rank and calculate sum of clicks and impressions grouped_data = data.groupby(['keyword', 'date', 'rank']).agg( all_clicks=pd.NamedAgg(column='clicks', aggfunc=sum), all_impressions=pd.NamedAgg(column='impressions', aggfunc=sum)) grouped_data = grouped_data.reset_index() grouped_data = grouped_data.sort_values(["keyword", "date", "rank"], ascending=(True, True, True)) # Get the list of unique keywords in google search console data mobile_keywords = grouped_data['keyword'].unique() grouped_data['keyword'] = grouped_data['keyword'].astype(str) key_date_df_list = {} count = 1 for key in list(keyword_master['keywords'].unique()): key_df_list = {} print(count) print('Processing for keyword: ', key) print() distance = [ dis.get_jaro_distance(key, word) for word in mobile_keywords ] distance = np.array(distance) cluster = np.where(distance <= 0.3) total_count = len(mobile_keywords[cluster]) - 1 words = '|'.join(mobile_keywords.tolist()) key_df = pd.DataFrame( columns=['keyword', 'date', 'rank', 'clicks', 'impressions']) dt_list = list(grouped_data['date'].drop_duplicates().astype(str)) dt_list.sort() apply1(dt_list, key_df_list, grouped_data, mobile_keywords, cluster, key) temp_df = pd.DataFrame( columns=['keyword', 'date', 'rank', 'clicks', 'impressions']) for k, val in key_df_list.items(): temp_df = pd.concat([temp_df, val], ignore_index=True) key_date_df_list[key] = temp_df count = count + 1 t_df = pd.DataFrame( columns=['keyword', 'date', 'rank', 'clicks', 'impressions']) for k, val in key_date_df_list.items(): t_df = pd.concat([t_df, val], ignore_index=True) all_ranks_df = t_df if (device == 'MOBILE'): ctrs = ctr_df[['position', 'mobile_ctr']] else: ctrs = ctr_df[['position', 'web_ctr']] all_ranks_df['rank'] = all_ranks_df['rank'].astype(int) all_ranks_df['impressions'] = all_ranks_df['impressions'].astype(float) all_ranks_df = pd.merge(all_ranks_df, ctrs, left_on="rank", right_on="position") # Calculate the max and avg impressions for the keyword for each date temp_all_ranks_df = all_ranks_df.groupby(['keyword', 'date']).agg( avg_impressions=pd.NamedAgg(column='impressions', aggfunc=round_mean), max_impressions=pd.NamedAgg(column='impressions', aggfunc=round_max)) temp_all_ranks_df = temp_all_ranks_df.reset_index() all_ranks_df = pd.merge(all_ranks_df, temp_all_ranks_df, on=['keyword', 'date']) # Replace NA values with avg impressions all_ranks_df['impressions'] = all_ranks_df['impressions'].fillna( all_ranks_df['avg_impressions']) all_ranks_df = all_ranks_df.sort_values(["keyword", "date", "rank"], ascending=(True, True, True)) #df['First Season'] = np.where(df['First Season'] > 1990, 1, df['First Season']) all_ranks_df['impressions'] = np.where( all_ranks_df['impressions'] <= all_ranks_df['avg_impressions'], all_ranks_df['max_impressions'], all_ranks_df['impressions']) if (device == 'MOBILE'): all_ranks_df['clicks'] = (all_ranks_df['mobile_ctr'] * all_ranks_df['impressions']) / 100 else: all_ranks_df['clicks'] = (all_ranks_df['web_ctr'] * all_ranks_df['impressions']) / 100 all_ranks_df.clicks = all_ranks_df.clicks.round() all_ranks_df['clicks'] = all_ranks_df['clicks'].astype(int) if (device == 'MOBILE'): all_ranks_df['mobile_ctr'] = None else: all_ranks_df['web_ctr'] = None all_ranks_df['avg_impressions'] = None all_ranks_df['max_impressions'] = None all_ranks_df['keyword'] = all_ranks_df['keyword'].astype(str) all_ranks_df['impressions'] = all_ranks_df['impressions'].astype(int) all_ranks_df['date'] = all_ranks_df['date'].astype(str) casted_df = all_ranks_df.pivot_table(index=['keyword', 'date'], columns='rank', values=['clicks', 'impressions']) casted_df.columns = [ "{0}_{1}".format(l1, l2) for l1, l2 in casted_df.columns ] casted_df = casted_df.reset_index() casted_df['keyword'] = casted_df['keyword'].astype('category') key_pred_list = {} for key in list(keyword_master['keywords'].unique()): print('Forecasting for keyword - ', key) print() pred_pos_list = {} for position in range(1, 11): print('Position - ', position) print() key_sub = casted_df[casted_df['keyword'] == key] key_sub['date'] = pd.to_datetime(key_sub['date']) clicks_trend = key_sub[['clicks_' + str(position), 'date']] clicks_trend.columns = ["y", "ds"] prediction_days = 14 pred_len = 0 totalRow = len(clicks_trend) pred_range = [totalRow - pred_len + 1, totalRow] pre_views = clicks_trend.head(totalRow - pred_len) post_views = clicks_trend.tail(pred_len) m = fbprophet.Prophet() m.fit(pre_views) future = m.make_future_dataframe(periods=prediction_days) fcast = m.predict(future) pred_df = fcast[['ds', 'yhat', 'yhat_lower', 'yhat_upper']].tail(prediction_days) pred_df['position'] = position pred_df['keyword'] = key pred_df.columns = [ "date", "clicks", "clicks_lower", "clicks_upper", "position", "keyword" ] pred_df = pred_df[[ "keyword", "date", "position", "clicks", "clicks_lower", "clicks_upper" ]] pred_df.clicks_upper = pred_df.clicks_upper.round() pred_df.clicks_lower = pred_df.clicks_lower.round() #fig1 = m.plot(fcast) pred_pos_list[position] = pred_df t1_df = pd.DataFrame(columns=[ "keyword", "date", "position", "clicks", "clicks_lower", "clicks_upper" ]) for k, val in pred_pos_list.items(): t1_df = pd.concat([t1_df, val], ignore_index=True) key_pred_list[key] = t1_df print('\n') t2_df = pd.DataFrame(columns=[ "keyword", "date", "position", "clicks", "clicks_lower", "clicks_upper" ]) for k, val in key_pred_list.items(): t2_df = pd.concat([t2_df, val], ignore_index=True) pred_key_df = t2_df casted_pred_df = pred_key_df.pivot_table( index=['keyword', 'date'], columns='position', values=['clicks', 'clicks_lower', 'clicks_upper']) casted_pred_df.columns = [ "{0}_{1}".format(l1, l2) for l1, l2 in casted_pred_df.columns ] casted_pred_df = casted_pred_df.reset_index() casted_pred_df = pd.merge(keywords_df, casted_pred_df, left_on="keywords", right_on="keyword") # casted_df['impressions'] = np.where(all_ranks_df['impressions'] <= all_ranks_df['avg_impressions'], all_ranks_df['max_impressions'], all_ranks_df['impressions']) #print(casted_pred_df['date']) casted_pred_df['date'] = casted_pred_df['date'].astype(str) casted_pred_df = casted_pred_df.astype(int, errors='ignore') #casted_pred_df['date'] = casted_pred_df['date'].astype(str) num = casted_pred_df._get_numeric_data() num[num < 0] = 0 #print(casted_pred_df['date']) casted_pred_df.to_json(r'FinalResults_UK_' + device + '.json', orient='records') return list([casted_df, casted_pred_df])
encoding='utf-8', index_col='date', parse_dates=True) print(stock_df.head()) df = stock_df.reset_index() print(df.head()) df = df.rename(columns={'date': 'ds', 'close': 'y'}) print(df.head()) df.set_index('ds').y.plot() plt.show() # # applying prophet model # model = Prophet.Prophet(changepoint_range=1, changepoint_prior_scale=0.05) model.fit(df) # Create future dataframe future = model.make_future_dataframe(periods=90) print(future.tail()) # Forecast for future dataframe forecast = model.predict(future) print(forecast.tail()) print('Forecast: \n', forecast[['ds', 'yhat', 'yhat_lower', 'yhat_upper']].tail()) model.plot(forecast) model.plot_components(forecast) plt.show()
#df['ds'] = pd.to_datetime(df['ds'], format = '%Y%M%D') #Set Date as index #df = df.set_index('ds') plt.figure(figsize=(20, 10)) plt.plot(df.ds, df.y) plt.title('AirPassengers') #plt.savefig('test2png.png', dpi=100) ###Prophet Model # set the uncertainty interval to 95% (the Prophet default is 80%) my_model = Prophet.Prophet(interval_width=0.95, weekly_seasonality=True, daily_seasonality=True, growth='logistic') #fit df to model my_model.fit(df) """ In order to obtain forecasts of our time series, we must provide Prophet with a new DataFrame containing a ds column that holds the dates for which we want predictions. Conveniently, we do not have to concern ourselves with manually creating this DataFrame, as Prophet provides the make_future_dataframe helper function:""" future_dates = my_model.make_future_dataframe(periods=90, freq='D') future_dates['cap'] = 50 future_dates.tail()
df_train = full_df.loc[(full_df['date'] < '2019-06-01')] df_test = full_df.loc[(full_df['date'] >= '2019-06-01')] #%% df_train = df_train[['date', 'total_amount']] df_test = df_test[['date', 'total_amount']] #prophet standard df_train.columns = ['ds', 'y'] df_test.columns = ['ds', 'y'] df_train = df_train.groupby('ds', as_index=False)['y'].sum() df_test = df_test.groupby('ds', as_index=False)['y'].sum() #%% #fit a basic prophet model model = prop.Prophet() model.fit(df_train) #%% #test it with half a year future = model.make_future_dataframe(periods=210) forecast = model.predict(future) fig_forecast = model.plot(forecast) #%% #check components fig_components = model.plot_components(forecast) #%% preds = forecast.loc[forecast['ds'] >= '2019-06-01']
def predict_caps(request): plt.clf() quandl.ApiConfig.api_key = 'hSBzwas1PTzHyjs58m3G' ticker2 = request.POST.get('ticker2') gm = quandl.get('WIKI/' + ticker2) gm = gm["2010":] # Yearly average number of shares outstanding for Tesla and GM tesla_shares = { 2018: 168e6, 2017: 162e6, 2016: 144e6, 2015: 128e6, 2014: 125e6, 2013: 119e6, 2012: 107e6, 2011: 100e6, 2010: 51e6 } gm_shares = { 2018: 1.42e9, 2017: 1.50e9, 2016: 1.54e9, 2015: 1.59e9, 2014: 1.61e9, 2013: 1.39e9, 2012: 1.57e9, 2011: 1.54e9, 2010: 1.50e9 } apple_shares = { 2018: 4.927e9, 2017: 5.252e9, 2016: 5.5e9, 2015: 5.793e9, 2014: 6.123e9, 2013: 6.522e9, 2012: 6.617e9, 2011: 6.557e9, 2010: 6.473e9 } company_shares = {} if ticker2 == 'TSLA': company_shares = tesla_shares elif ticker2 == 'GM': company_shares = gm_shares else: company_shares = apple_shares # Create a year column gm['Year'] = gm.index.year # Take Dates from index and move to Date column gm.reset_index(inplace=True) gm['cap'] = 0 for i, year in enumerate(gm['Year']): # Retrieve the shares for the year shares = company_shares.get(year) # Update the cap column to shares times the price gm.ix[i, 'cap'] = shares * gm.ix[i, 'Adj. Close'] # Prophet requires columns ds (Date) and y (value) gm = gm.rename(columns={'Date': 'ds', 'cap': 'y'}) # Put market cap in billions gm['y'] = gm['y'] / 1e9 # Make the prophet models and fit on the data # changepoint_prior_scale can be changed to achieve a better fit gm_prophet = fbprophet.Prophet(changepoint_prior_scale=0.05) gm_prophet.fit(gm) # Make a future dataframe for 2 years gm_forecast = gm_prophet.make_future_dataframe(periods=365 * 2, freq='D') # Make predictions gm_forecast = gm_prophet.predict(gm_forecast) gm_prophet.plot(gm_forecast, xlabel='Date', ylabel='Market Cap (billions $)') plt.title('Market Cap of ' + ticker2) fi = cStringIO.StringIO() plt.savefig(fi, format="png", facecolor=(0.95, 0.95, 0.95)) plt.clf() return HttpResponse(fi.getvalue(), content_type="image/png")
['year-month']).sum().reset_index().rename(columns={ 'year-month': 'ds', 'active_listing_count': 'y' }) # In[4]: profile = pp.ProfileReport(local_data, 'mls profile report') # In[5]: local_listing_sum.tail() # looks good # In[6]: m = fbprophet.Prophet() # we have an inventory floor of 0, so let's establish that local_listing_sum['floor'] = 0 m.fit(local_listing_sum) # future does NOT predict, it simply makes timestamps into the future on a monthly frequency future = m.make_future_dataframe(periods=12, freq='MS') # In[7]: forecast = m.predict(future) forecast[['ds', 'yhat', 'yhat_lower', 'yhat_upper']].tail(15) # In[8]: fig1 = m.plot(forecast) decomp_plot = m.plot_components(forecast)
con = None con = psycopg2.connect(database = dbname, user = username) for site in site_no : sql_query = """ SELECT * FROM n"""+site+"""; """ site_data_from_sql = pd.read_sql_query(sql_query,con) nonzero_mean = site_data_from_sql[ site_data_from_sql.y != 0 ].mean() site_data_from_sql.loc[ site_data_from_sql.y == 0, "y" ] = nonzero_mean df_site = site_data_from_sql df_site = df_site.rename(columns={'datetime':'ds'}) df_site['y'] = np.log(df_site['y']) df_prophet = fbprophet.Prophet(changepoint_prior_scale=0.05, yearly_seasonality=True, interval_width = 0.75) df_prophet.fit(df_site) df_forecast = df_prophet.make_future_dataframe(periods=450 * 1, freq='D') df_forecast = df_prophet.predict(df_forecast) df_site.set_index('ds', inplace=True) df_forecast.set_index('ds', inplace=True) site_data_from_sql = pd.DataFrame(site_data_from_sql) site_data_from_sql.set_index('datetime', inplace=True) viz_df = site_data_from_sql.join(df_forecast[['yhat', 'yhat_lower','yhat_upper']], how = 'outer') viz_df['ds'] = viz_df.index
def run(self): model = fbprophet.Prophet( daily_seasonality=False, weekly_seasonality=False, yearly_seasonality=True, changepoint_prior_scale=0.05, changepoints=None, ) model.add_seasonality(name="monthly", period=30.5, fourier_order=5) stock = pdr.get_data_yahoo(self.mainwindow.ui.lineEdit.text(), start=datetime.datetime(2006, 10, 1), end=datetime.datetime.now()).reset_index() stock["ds"] = stock["Date"] stock["y"] = stock["Adj Close"] max_date = max(stock["Date"]) # Fit on the stock history for self.training_years number of years stock_history = stock[stock["Date"] > (max_date - pd.DateOffset(years=3))] model.fit(stock_history) # Make and predict for next year with future dataframe future = model.make_future_dataframe(periods=self.days, freq="D") future = model.predict(future) title = "%s Historical and Predicted Stock Price" % get_company_name( self.mainwindow.ui.lineEdit.text()) ##-- plotly self.fig_model = go.Figure([ go.Scatter(x=stock_history["ds"], y=stock_history["y"], mode="lines", opacity=0.8, name="Observations", line=dict(width=2, color='Black')), go.Scatter(x=future["ds"], y=future["yhat"], name="Modeled", mode="lines", line=dict(width=2.4, color='Green')), go.Scatter(name="Upper Bound", x=future["ds"], y=future["yhat_upper"], mode="lines", showlegend=False, line=dict(color='Green')), go.Scatter(name="Lower Bound", x=future["ds"], y=future["yhat_lower"], mode="lines", fillcolor='rgba(67,255,1, 0.3)', fill='tonexty', showlegend=False, line=dict(color='Green')) ]) self.fig_model.update_layout( title=title, # xaxis_title="Время", yaxis_title="Цена, $", font=dict(family="Courier New, monospace", size=14, color="RebeccaPurple"))
spain_ts = spain_counts.set_index('incident_date') spain_ts = spain_ts.reindex(idx, fill_value=0) spain_ts.head() spain_ts.describe() import fbprophet import numpy as np spain_fb = spain_ts.copy() spain_fb['index1'] = spain_fb.index spain_fb.columns = ['y', 'ds'] spain_fb.head() prophet1 = fbprophet.Prophet(changepoint_prior_scale=0.15, daily_seasonality=True) prophet1.fit(spain_fb) # Specify 365 days out to predict future_data = prophet1.make_future_dataframe(periods=1460, freq='D') # Predict the values forecast_data = prophet1.predict(future_data) print(forecast_data[['ds', 'yhat', 'yhat_lower', 'yhat_upper']]) print('--*-- Convertions --*--') print(np.exp(forecast_data[['yhat', 'yhat_lower', 'yhat_upper']])) prophet1.plot(forecast_data, xlabel='Date', ylabel='Attacks') plt.title('Predicted Terrorist Attacks in World', fontsize=10) plt.savefig("Global.png") prophet1.plot_components(forecast_data)
def __init__(self): super().__init__() self.model = fbp.Prophet() self.y = None
daily_df = daily_df.reset_index() daily_df = daily_df.rename(columns={'DateTime': 'ds', 'KWH_hh': 'y'}) display(daily_df.head()) # In[84]: # In prophet, the changepoint_prior_scale parameter is used # to control how sensitive the trend is to changes, # with a higher value being more sensitive and # a lower value less sensitive. # https://facebook.github.io/prophet/docs/trend_changepoints.html prophet_inst = fbprophet.Prophet(changepoint_prior_scale=0.10) prophet_inst.fit(daily_df) # In[86]: forecast = prophet_inst.make_future_dataframe(periods=30 * 2, freq='D') forecast = prophet_inst.predict(forecast) # In[88]: # The black dots represent the actual values # the blue line indicates the forecasted values # the light blue shaded region is the uncertainty. prophet_inst.plot(forecast)
def predict_testcounts( testcounts: pandas.Series, *, country: str, region: typing.Optional[typing.Union[str, typing.List[str]]], regional_holidays: bool = False, keep_data: bool, ignore_before: typing.Optional[typing.Union[datetime.datetime, pandas.Timestamp, str]] = None, **kwargs, ) -> ForecastingResult: """ Predict/smooth missing test counts with Prophet. Implemented by Laura Helleckes and Michael Osthege. Parameters ---------- testcounts : pandas.Series date-indexed series of observed testcounts country : str name or short code of country (as used by https://github.com/dr-prodigy/python-holidays) region : optional, [str] if None or []: only nation-wide if "all": nation-wide and all regions if "CA": nation-wide and those for region "CA" if ["CA", "NY", "FL"]: nation-wide and those for all listed regions regional_holidays: bool, default False if True, fetch regional holidays for each region, if `region` is not set to None or to only one region. if False (default), fetch only national holidays (useful for countries where test data is available at the regional-level, but which only have national holidays). keep_data : bool if True, existing entries are kept if False, existing entries are also predicted, resulting in a smoothed profile ignore_before : timestamp all dates before this are ignored Use this argument to prevent an unrealistic upwards trend due to initial testing ramp-up **kwargs optional kwargs for the `fbprophet.Prophet`. For example: * growth: 'linear' or 'logistic' (default) * seasonality_mode: 'additive' or 'multiplicative' (default) Returns ------- result : pandas.Series the date-indexed series of smoothed/predicted testcounts m : fbprophet.Prophet the phophet model forecast : pandas.DataFrame contains the model prediction holidays : dict of { datetime : str } dictionary of the holidays that were used in the model """ testcounts.index.name = "date" testcounts.name = "total" if not ignore_before: ignore_before = testcounts.index[0] mask_fit = testcounts.index >= ignore_before if keep_data: mask_predict = numpy.logical_and(testcounts.index >= ignore_before, numpy.isnan(testcounts.values)) else: mask_predict = testcounts.index >= ignore_before years = set([testcounts.index[0].year, testcounts.index[-1].year]) regions = numpy.atleast_1d(region) if region != "all" and len(regions) <= 1 and regional_holidays: raise ValueError( "Predicting test counts only at national level or for one region only. " "Can't ask for regional holiday. Set `regional_holidays` kwarg to False." ) # need last condition because some countries only national holidays for all regions: if (region == "all" or len(regions) > 1) and regional_holidays: # distinguish between national and regional holidays all_holidays = get_holidays(country, region, years=years) national_holidays = get_holidays(country, region=None, years=years) holiday_df = pandas.DataFrame( data=[( date, name, "national" if date in national_holidays.keys() else "regional", ) for date, name in all_holidays.items()], columns=["ds", "name", "holiday"], ) else: # none, or only one region -> no distinction between national/regional holidays all_holidays = get_holidays(country, region=None, years=years) holiday_df = pandas.DataFrame( dict( holiday="holiday", name=list(all_holidays.values()), ds=pandas.to_datetime(list(all_holidays.keys())), )) # Config settings of forecast model days = (testcounts.index[-1] - testcounts.index[0]).days prophet_kwargs = dict( growth="logistic", seasonality_mode="multiplicative", daily_seasonality=False, weekly_seasonality=True, yearly_seasonality=False, holidays=holiday_df, mcmc_samples=500, # restrict number of potential changepoints: n_changepoints=int(numpy.ceil(days / 30)), ) # override defaults with user-specified kwargs prophet_kwargs.update(kwargs) m = fbprophet.Prophet(**prophet_kwargs) # fit only the selected subset of the data df_fit = (testcounts.loc[mask_fit].reset_index().rename(columns={ "date": "ds", "total": "y" })) if prophet_kwargs["growth"] == "logistic": cap = numpy.max(testcounts) * 1 df_fit["floor"] = 0 df_fit["cap"] = cap m.fit(df_fit) # predict for all dates in the input df_predict = testcounts.reset_index().rename(columns={"date": "ds"}) if prophet_kwargs["growth"] == "logistic": df_predict["floor"] = 0 df_predict["cap"] = cap forecast = m.predict(df_predict) # make a series of the result that has the same index as the input result = pandas.Series(index=testcounts.index, data=testcounts.copy().values, name="testcount") result.loc[mask_predict] = numpy.clip( forecast.set_index("ds").yhat, 0, forecast.yhat.max()) # full-length result series, model and forecast are returned return result, m, forecast, all_holidays
logger.info('Bitcoin Analysis\n') logger.info('Time period: %s to %s' % (btc_data_year.index[0].strftime('%Y-%m-%d'), btc_data_year.index[-1].strftime('%Y-%m-%d'))) logger.info('Weight Price Stats') logger.info('Max: %.2f, Min: %.2f, Mean: %.2f, Std: %.2f' % (btc_year_price_stats['Weighted Price']['max'], btc_year_price_stats['Weighted Price']['min'], btc_year_price_stats['Weighted Price']['mean'], btc_year_price_stats['Weighted Price']['std'])) # Perform basic timeseries analysis using fbprophet btc_fbprohpet = btc_data.copy() btc_fbprohpet.reset_index(level=0, inplace=True) btc_fbprohpet = btc_fbprohpet.rename(columns={'Date': 'ds'}) # Drop zero values btc_fbprohpet = btc_fbprohpet[btc_fbprohpet['Weighted Price'] != 0] # Fit to the log of the data btc_fbprohpet['y'] = np.log(btc_fbprohpet['Weighted Price']) btc_data_prophet_log = fbprophet.Prophet(yearly_seasonality=True, weekly_seasonality=True, changepoint_prior_scale=0.15) # Pickle if os.path.isfile(directory+'Data/Models/fbprophet_logweightprice.model.sav'): btc_data_prophet_log = pickle.load(open(directory+'Data/Models/fbprophet_logweightprice.model.sav', 'rb')) else: btc_data_prophet_log.fit(btc_fbprohpet) pickle.dump(btc_data_prophet_log, open(directory+'Data/Models/fbprophet_logweightprice.model.sav', 'wb')) btc_data_forecast_log = btc_data_prophet_log.make_future_dataframe(periods=365*2, freq='D') btc_data_forecast_log = btc_data_prophet_log.predict(btc_data_forecast_log) # Identify change points btc_changepoints = btc_data_prophet_log.changepoints # Work out if they are +ve or -ve c_data = btc_fbprohpet.ix[btc_changepoints, :] deltas = btc_data_prophet_log.params['delta'][0] c_data['delta'] = deltas
# set out route for forecast tables out = [] # set out route for 2016 & 2017 Train predictions train_preds = [] # make DataFrame of column values as datetime datetimes = pd.DataFrame(data=pd.to_datetime(pd.Series(data=train_df.columns))) # go though each place in train_df for i in range(len(train_df)): if i % 5 == 0: print(i) # extract DataFrame for that place df = train_df.iloc[i] # add datetime values to DataFrame df = pd.concat([df.reset_index(),datetimes],axis=1) # use fbprophet to make Prophet model place_prophet = fbprophet.Prophet(changepoint_prior_scale=0.1) # rename Place df's columns to agree with prophet formatting df.columns = ['drop','y','ds'] # adjust df ; forget index column (drop) df = df[['ds','y']] # fit place on prophet model place_prophet.fit(df) # make a future dataframe for 2016 & 2017 years place_forecast = place_prophet.make_future_dataframe( periods=30, freq='Y' ) # establish predictions forecast = place_prophet.predict(place_forecast) # tag and bag (forecast table) out.append(forecast) # store 2016 and 2017 predictions train_preds.append([ forecast.loc[forecast.ds == '2016-12-31'].yhat.values[0],
def create_prophet_model(self, days=0, resample=False): # self.reset_plot() try: # UIFunctions.simple_strategy_utils_not_visible() self.ui.lineEdit_long.setVisible(False) self.ui.spinBox_long.setVisible(False) self.ui.horizontalSlider_long.setVisible(False) self.ui.lineEdit_short.setVisible(False) self.ui.spinBox_short.setVisible(False) self.ui.horizontalSlider_short.setVisible(False) self.ui.spinBox_fbprophet.setVisible(False) self.ui.comboBox.setVisible(False) self.ui.button_fb.setVisible(True) self.ui.spinBox_fbprophet.setVisible(True) model = fbprophet.Prophet( daily_seasonality=False, weekly_seasonality=False, yearly_seasonality=True, changepoint_prior_scale=0.05, changepoints=None, ) if True: # Add monthly seasonality model.add_seasonality(name="monthly", period=30.5, fourier_order=5) stock = pdr.get_data_yahoo( self.ui.lineEdit.text(), start=datetime.datetime(2006, 10, 1), end=datetime.datetime.now()).reset_index() stock["ds"] = stock["Date"] stock["y"] = stock["Adj Close"] training_years = 3 max_date = max(stock["Date"]) # Fit on the stock history for self.training_years number of years stock_history = stock[stock["Date"] > ( max_date - pd.DateOffset(years=training_years))] model.fit(stock_history) # Make and predict for next year with future dataframe future = model.make_future_dataframe(periods=days, freq="D") future = model.predict(future) if days > 0: title = "%s Historical and Predicted Stock Price" % self.ui.lineEdit.text( ) else: title = "%s Historical and Modeled Stock Price" % self.ui.lineEdit.text( ) ##-- plotly fig_model = go.Figure([ go.Scatter(x=stock_history["ds"], y=stock_history["y"], mode="lines", opacity=0.8, name="Observations", line=dict(width=1.4, color='Black')), go.Scatter(x=future["ds"], y=future["yhat"], name="Modeled", mode="lines", line=dict(width=2.4, color='Green')), go.Scatter(name="Upper Bound", x=future["ds"], y=future["yhat_upper"], mode="lines", showlegend=False, line=dict(color='Green')), go.Scatter(name="Lower Bound", x=future["ds"], y=future["yhat_lower"], mode="lines", fillcolor='rgba(67,255,1, 0.3)', fill='tonexty', showlegend=False, line=dict(color='Green')) ]) fig_model.update_layout(title=title, xaxis_title="Date", yaxis_title="Price", font=dict(family="Courier New, monospace", size=14, color="RebeccaPurple")) self.ui.browser.setHtml(fig_model.to_html(include_plotlyjs='cdn')) self.ui.stackedWidget.setCurrentWidget(self.ui.browser) except Exception as e: pass
plt.figure(figsize=(10, 8)) plt.plot(cars['Date'], cars['gm_cap'], 'b-', label='GM') plt.plot(cars['Date'], cars['tesla_cap'], 'r-', label='TESLA') plt.xlabel('Date') plt.ylabel('Market Cap (Billions $)') plt.title('Market Cap of GM and Tesla') # plt.show() # Prophet requires columns ds (Date) and y (value) gm = gm.rename(columns={'Date': 'ds', 'cap': 'y'}) tesla = tesla.rename(columns={'Date': 'ds', 'cap': 'y'}) # Put market cap in billions gm['y'] = gm['y'] / 1e9 tesla['y'] = tesla['y'] / 1e9 # Make the prophet model and fit on the data gm_prophet = fbprophet.Prophet(changepoint_prior_scale=0.15) gm_prophet.fit(gm) tesla_prophet = fbprophet.Prophet(changepoint_prior_scale=0.15) tesla_prophet.fit(tesla) # Make a future dataframe for 2 years gm_forecast = gm_prophet.make_future_dataframe(periods=365 * 2, freq='D') tesla_forecast = tesla_prophet.make_future_dataframe(periods=365 * 2, freq='D') # Make predictions gm_forecast = gm_prophet.predict(gm_forecast) tesla_forecast = tesla_prophet.predict(tesla_forecast) gm_prophet.plot(gm_forecast, xlabel='Date', ylabel='Market Cap (billions $)') tesla_prophet.plot(tesla_forecast, xlabel='Date', ylabel='Market Cap (billions $)')
import numpy as np import matplotlib.pyplot as plt import tensorflow as tf import fbprophet df = pd.read_csv('2330_v1.csv', header=0, sep='\t', delimiter='\t', encoding='utf-8') df['date'] = pd.to_datetime(df['date'], format='%Y/%m/%d') df.set_index('date') # The adjusted close accounts for stock splits, so that is what we should graph plt.plot(df.index, df['close']) plt.title('TW 2330 Stock Price') plt.ylabel('Price ($)') plt.show() df.columns = ['ds', 'y'] df.tail() df_new = df.iloc[:, :] m = fbprophet.Prophet(changepoint_prior_scale=0.95) m.fit(df_new) future = m.make_future_dataframe(periods=90) forecast = m.predict(future) m.plot(forecast) #m.plot_components(forecast) plt.show()
def prophet_model(self): model = fbprophet.Prophet(daily_seasonality=False, yearly_seasonality=False) return model
df = sids.get_historical(fields, start_date, end_date) df = df.resample('B').last() df = df.interpolate('linear') df = df.fillna(method = 'ffill') df.columns = df.columns.droplevel(-1) #df = df.stack(level = 0, dropna=False) #df['y_orig'] = df['CPI YOY Index'] #df['CPI YOY Index'] = np.log(df['CPI YOY Index']) df = df.rename(columns={'date': 'ds', 'CPI YOY Index': 'y'}) df['ds'] = df.index df_prophet = fbprophet.Prophet(interval_width = 0.80 #weekly_seasonality=False, yearly_seasonality=True, ).fit(df) #df_prophet.add_regressor(df['VOLUME']) #df_prophet.fit(df) # Make a future dataframe for 2 years df_forecast = df_prophet.make_future_dataframe(periods=180) #freq = 'MS') # Make predictions df_forecast = df_prophet.predict(df_forecast) fig = df_prophet.plot(df_forecast, xlabel = 'Date', ylabel = 'CPI') plt.title('CPI Price Action') #for cp in df_prophet.changepoints: # plt.axvline(cp, c='gray', ls = '--', lw=2) #df_cv = cross_validation(df_prophet, horizon = '30 days')
import pandas from matplotlib import pyplot as plt import fbprophet RATES = pandas.read_csv('/storage/bin/exrates.csv', header=0) RATES['ds'] = pandas.to_datetime(RATES['time.time'], unit='s') RATES['y'] = RATES['www.exchangerates.org.uk'] maxts = max(RATES['time.time']) RATES = RATES.drop(['hryvna.today', 'time.time', 'www.exchangerates.org.uk'], axis=1) model = fbprophet.Prophet() model.fit(RATES) future = list() for i in range(1, 7): # 10 days future.append(maxts + 86400 * i) future = pandas.DataFrame(future) current = pandas.DataFrame(RATES['ds']) future.columns = ['ds'] future['ds'] = pandas.to_datetime(future['ds'], unit='s') data = pandas.concat([current, future], axis=0) forecast = model.predict(data) model.plot(forecast, xlabel='Date', ylabel='UAHs for 1 USD') plt.show()
def fit(self, X, y=None, time_col=TIME_COL, value_col=VALUE_COL, **fit_params): """Fits fbprophet model. Parameters ---------- X : `pandas.DataFrame` Input timeseries, with timestamp column, value column, and any additional regressors. The value column is the response, included in X to allow transformation by `sklearn.pipeline.Pipeline` y : ignored The original timeseries values, ignored. (The y for fitting is included in ``X``.) time_col : `str` Time column name in ``X`` value_col : `str` Value column name in ``X`` fit_params : `dict` additional parameters for null model Returns ------- self : self Fitted model is stored in ``self.model``. """ super().fit(X, y=y, time_col=time_col, value_col=value_col, **fit_params) if self.add_regressor_dict is None: fit_columns = [time_col, value_col] else: reg_cols = list(self.add_regressor_dict.keys()) fit_columns = [time_col, value_col] + reg_cols fit_df = X.reset_index(drop=True)[fit_columns] fit_df.rename(columns={time_col: "ds", value_col: "y"}, inplace=True) # Prophet expects these column names. Other estimators can use TIME_COL, etc. # uses coverage instead of interval_width to set prediction band width. This ensures a common # interface for parameters common to every BaseForecastEstimator, usually also needed for forecast evaluation # model must be initialized here, not in __init__, to update parameters in grid search self.model = fbprophet.Prophet( growth=self.growth, changepoints=self.changepoints, n_changepoints=self.n_changepoints, changepoint_range=self.changepoint_range, yearly_seasonality=self.yearly_seasonality, weekly_seasonality=self.weekly_seasonality, daily_seasonality=self.daily_seasonality, holidays=self.holidays, seasonality_mode=self.seasonality_mode, seasonality_prior_scale=self.seasonality_prior_scale, holidays_prior_scale=self.holidays_prior_scale, changepoint_prior_scale=self.changepoint_prior_scale, mcmc_samples=self.mcmc_samples, interval_width=self.coverage, uncertainty_samples=self.uncertainty_samples) # if extra regressors are given, we add them to temporal features data # This implementation assumes that the regressor(s) are provided in time series df, alongside target column. if self.add_regressor_dict is not None: for reg_col, reg_params in self.add_regressor_dict.items(): self.model.add_regressor(name=reg_col, **reg_params) # if custom seasonality is provided, we supply it to Prophet model if self.add_seasonality_dict is not None: for seasonality_type, seasonality_params in self.add_seasonality_dict.items( ): self.model.add_seasonality(name=seasonality_type, **seasonality_params) self.model.fit(fit_df) return self
1, }) for j in states: df2 = pd.read_csv('Statewise/' + j + '.csv') # Prophet requires columns ds (Date) and y (value) df2 = df2.rename(columns={'Date': 'ds', 'Confirmed': 'y'}) # Put market cap in billions #gm['y'] = gm['y'] / 1e9 # Make the prophet model and fit on the data df_prophet = fbprophet.Prophet(changepoint_prior_scale=0.6, holidays=holidays, holidays_prior_scale=40, seasonality_mode='multiplicative', seasonality_prior_scale=10, daily_seasonality=False, yearly_seasonality=False, weekly_seasonality=False).add_seasonality( name='daily', period=1, fourier_order=15).add_seasonality( name='incubation_period', period=5, fourier_order=20) df_prophet.fit(df2) # Python future = df_prophet.make_future_dataframe(periods=10, freq='D') forecast = df_prophet.predict(future) df_prophet.plot(forecast) cv = cross_validation(df_prophet, initial=str(len(df2) - 5) + ' days', horizon='2 days')
import pandas as pd import matplotlib.pyplot as plt import fbprophet df = pd.read_html( 'https://raw.githubusercontent.com/diazonic/vegetables_price_forecast/main/Grlic.html' )[-1] df.drop(df.tail(1).index, inplace=True) df['Modal Price (Rs/q)'] = df['Modal Price (Rs/q)'].astype(int) df['date'] = df['Month Name'] + '-' + df['Year'] df['date'] = pd.to_datetime(df['date']) df_new = df[['date', 'Modal Price (Rs/q)']] df_new = df_new.rename(columns={'date': 'ds', 'Modal Price (Rs/q)': 'y'}) df_new = df_new.sort_values(by='ds') plt.plot(df_new['ds'], df_new['y']) plt.show() m = fbprophet.Prophet(changepoint_prior_scale=1, seasonality_prior_scale=1) m.fit(df_new) future = m.make_future_dataframe(periods=365) forecast = m.predict(future) forecast[['ds', 'yhat', 'yhat_lower', 'yhat_upper']].tail() fig1 = m.plot(forecast, xlabel='Date', ylabel='Price of Potato') plt.figure(figsize=(20, 10)) plt.scatter(df_new['ds'], df_new['y'], s=5, c='k') plt.plot(forecast['ds'], forecast['yhat'])
def population_by_place(years=20, n_places=1000, changepoint_prior=0.15, indicate=False, time=False): # total population by place (1970 to 2010) pop_by_place = pd.read_csv( '../../data/NHGIS/nhgis0002_csv/nhgis0002_ts_nominal_place.csv', encoding='ISO-8859-1') """ inputs) >> years > number of years to forecast >> places > number of places to forecast +1 >> e.g. 99 = first 100 places (max==25102) >> changepoint_prior > set changepoint_prior_scale for prophet model >> indicate > default False > if True, print number of place forecasted after each forecast >> time > default False > if True, prints time the function took to run right before returning output function: >> generate DataFrame of population: > from 1970 to 2010 > by unique place (use NHGISCODE as Id) >> drop > places with less than 2 measurements > can only predict places which have been measured 2+ times >> extract list of places > each as a DataFrame ready for prediction > column0='ds' , column1='y' >> make and fit prophet model on each place >> return prophet model's predictions > of each place > for {years} years """ if time == True: import time now = time.time() # df by NHGISCODE with measurements by decade (31436 rows × 5 columns) unique_places = pop_by_place.copy()[[ 'NHGISCODE', 'AV0AA1970', 'AV0AA1980', 'AV0AA2000', 'AV0AA2010' ]] # drop NaN rows @ thresh = 3 due to NHGISCODE being non-NaN (25103 rows × 5 columns ; 6333 non-measurable) measureable_unique_places = unique_places.dropna(axis=0, thresh=3) # convert NaN values to 0 (note: there are 270 'dead' counties ('A00AA2010' == 0)) measureable_unique_places = measureable_unique_places.fillna(0) # generate list of remaining NHGISCODE codes codes_of_measureable_unique_places = [ code for code in measureable_unique_places.NHGISCODE ] # drop NHGISCODE column (25103 rows × 4 columns) measureable_unique_places = measureable_unique_places.drop('NHGISCODE', axis=1) # list of str column names as years (for conversion to datetime) year_only_columns = [i[5:] for i in measureable_unique_places.columns] # convert year_only_columns to DatetimeIndex of Timestamps dt_columns = pd.to_datetime(arg=year_only_columns) # convert dt_columns into dataframe datetime_df = pd.DataFrame(dt_columns).T # w/ columns, so concatable with measureable_unique_counties datetime_df.columns = measureable_unique_places.columns # generate list of remaining places (each as pd.Series) dfs_of_measureable_unique_places = [ measureable_unique_places.iloc[place] for place in range(len(measureable_unique_places)) ] # add datetime_df to each dataframe as first row prophet_places = [ pd.concat((datetime_df, pd.DataFrame(place).T), axis=0) for place in dfs_of_measureable_unique_places ] # then transpose to 2 rows x 23 columns prophet_almost_ready_places = [place.T for place in prophet_places] # set collection of prophets prophet_by_place = [] # run prophet model on first 1000 places for place in range(len(prophet_almost_ready_places[:n_places])): # make the prophet model place_prophet = fbprophet.Prophet( changepoint_prior_scale=changepoint_prior) # identify county a = prophet_almost_ready_places[place] # rename place df's columns to agree with prophet formatting a.columns = ['ds', 'y'] # fit place on prophet model b = place_prophet.fit(a) # make a future dataframe for 20 years place_forecast = place_prophet.make_future_dataframe(periods=1 * years, freq='Y') # establish predictions place_forecast = place_prophet.predict(place_forecast) # add to collection prophet_by_place.append(place_forecast) # did we ask for indication (hint: do this if calculating for > 1000 places unless you enjoy anxiety) if indicate == True: # let us know the count print(place) if time == True: then = time.time() print(f'now = {now}\nthen = {then}\ntime = {now-then}') # return forecasts return prophet_by_place
first_date.date(), last_date.date())) '''During Q2 2017, Tesla sold 22026 cars while GM sold 725000. In Q3 2017, Tesla sold 26137 cars and GM sold 808000. In all of 2017, Tesla sold 103084 cars and GM sold 3002237. That means GM was valued less than Tesla in a year during which it sold 29 times more cars than Tesla! Interesting to say the least.''' import fbprophet # Prophet requires columns ds (Date) and y (value) gm = gm.rename(columns={'Date': 'ds', 'cap': 'y'}) # Put market cap in billions gm['y'] = gm['y'] / 1e9 # Make the prophet models and fit on the data # changepoint_prior_scale can be changed to achieve a better fit gm_prophet = fbprophet.Prophet(changepoint_prior_scale=0.05) gm_prophet.fit(gm) # Repeat for the tesla data tesla = tesla.rename(columns={'Date': 'ds', 'cap': 'y'}) tesla['y'] = tesla['y'] / 1e9 tesla_prophet = fbprophet.Prophet(changepoint_prior_scale=0.05, n_changepoints=10) tesla_prophet.fit(tesla) # Make a future dataframe for 2 years gm_forecast = gm_prophet.make_future_dataframe(periods=365 * 2, freq='D') # Make predictions gm_forecast = gm_prophet.predict(gm_forecast) tesla_forecast = tesla_prophet.make_future_dataframe(periods=365 * 2, freq='D')
plt.legend() import numpy as np # The time when Reliance was valued the highest till now highest_date = market_cap.loc[market_cap['cap'].idxmax(), 'Date'] print("Reliance was valued the highest in {} .".format(highest_date.date())) import fbprophet # Prophet requires columns ds (Date) and y (value) reliance = reliance.rename(columns={ 'Date': 'ds', 'cap': 'y' }) # Put market cap in Rupees reliance[ 'y'] = reliance['y'] / 1e9 # Make the prophet model and fit on the data reliance_prophet = fbprophet.Prophet(changepoint_prior_scale=0.15) reliance_prophet.fit(reliance) # Make a future dataframe for 2 years reliance_forecast = reliance_prophet.make_future_dataframe( periods=365 * 2, freq='D') # Make predictions reliance_forecast = reliance_prophet.predict(reliance_forecast) reliance_prophet.plot(reliance_forecast, xlabel='Date', ylabel='Market Cap (Rupees)') plt.title('Market Cap of Reliance') # Plot the trends and patterns reliance_prophet.plot_components(reliance_forecast)