Exemplo n.º 1
0
    d[name]['Prev Close'] = d[name]['y'].shift(1) #simplify prev close
    d[name]['ActR'] = abs(d[name]['HIGH']-d[name]['LOW']) #define actual trading range
    d[name]['TRHigh'] = abs(d[name]['HIGH']-d[name]['Prev Close']) #absolute value of diff prev C to H
    d[name]['TRLow'] = abs(d[name]['LOW']-d[name]['Prev Close']) #absolute value of diff prev C to L
    d[name]['True Range'] = d[name][['ActR','TRHigh','TRLow']].apply(max,axis=1) #define true range
    
    d[name]['ATR_E'] = d[name]['True Range'].ewm(span=ATR_Period).mean() #exponential ATR
    d[name]['MA_E'] = d[name]['y'].ewm(span=periods).mean()
    d[name]['UB']=d[name]['MA_E']+(d[name]['ATR_E']*sds)
    d[name]['LB']=d[name]['MA_E']-(d[name]['ATR_E']*sds)
    d[name]['STARCWidth']=d[name]['UB']-d[name]['LB']
    d[name]['STARC%'] = ((d[name]['y'] - d[name]['LB'])/d[name]['STARCWidth'])*100
    
    #build models and fit to data
    m[name] = fbprophet.Prophet(
                 weekly_seasonality=False, yearly_seasonality=True,
                 interval_width=0.8)
    m[name].fit(d[name])
    
    #create new dataframes for predictions; still in transform scale
    z[name] = m[name].make_future_dataframe(periods=15, freq='D')
    z[name] = m[name].predict(z[name])
    
    #move back to original scale
    k[name] = z[name][['ds', 'yhat', 'yhat_lower', 'yhat_upper']]
    #k[name].loc[:,'yhat'] = np.exp(k[name]['yhat'])
    #k[name].loc[:, 'yhat_lower'] = np.exp(k[name]['yhat_lower'])
    #k[name].loc[:, 'yhat_upper'] = np.exp(k[name]['yhat_upper'])
#    k[name]['UB'] = d[name]['UB']
#    k[name]['LB'] = d[name]['LB']
#    k[name]['MA'] = d[name]['MA_E']
Exemplo n.º 2
0
def forecasting(file_name):
    # reading data
    data_set = pd.read_csv(file_name + "_ticker.csv", header=None)
    book_file = pd.read_csv(file_name + "_book.csv", header=None)
    trades_file = pd.read_csv(file_name + "_trades.csv", header=None)
    data_set['price'] = (data_set[7] + data_set[8]) / 2  # target
    data_set = data_set.drop([7, 8], axis=1)

    # calculate important data from *_book and *_trades file
    order_volume = []
    for i in range(0, len(book_file)):
        sum_of_orders = 0
        for j in range(3, 151, 3):
            sum_of_orders = sum_of_orders + book_file.loc[i, j]
        order_volume.append(sum_of_orders)
    mapping = dict(enumerate(order_volume))
    data_set['order_volume'] = data_set[1].map(mapping)

    turnover = []
    for i in range(0, len(trades_file)):
        sum_of_turnover = 0
        for j in range(3, 481, 4):
            sum_of_turnover = sum_of_turnover + trades_file.loc[i, j]
        turnover.append(sum_of_turnover)
    mapping = dict(enumerate(turnover))
    data_set['turnover'] = data_set[1].map(mapping)
    data_set['transactionـprice'] = data_set[1].map(mapping)

    # print(data_set)

    # change date format
    date_list = []
    for i in range(0, len(data_set)):
        date_list.append(
            datetime.datetime.fromtimestamp(int(
                data_set[0][i])).strftime('%Y-%m-%d %H:%M'))

    date_sries = pd.Series(date_list, name='date')
    data_set[0] = date_sries

    # preprocessing
    # set NaN value for noisy data in pandas
    data_set.loc[(data_set['price'] == 0) & (data_set[0] == -1),
                 'price'] = None
    # split for train and test
    test_set = data_set[round(len(data_set) * (0.9)) + 1:len(data_set)]
    data_set = data_set[0:round(len(data_set) * (0.9))]

    # use statistic model for forecasting
    data_set = data_set.rename(columns={0: 'ds', 'price': 'y'})
    A_prophet = fbprophet.Prophet()
    A_prophet.fit(data_set)

    A_forecast = A_prophet.make_future_dataframe(freq='H', periods=72)
    A_forecast = A_prophet.predict(A_forecast)

    A_prophet.plot(A_forecast, xlabel='Date', ylabel='Price')
    plt.title('prediction for ' + file_name)

    os.system('clear')

    # Evaluate prediction
    predicted_df = A_forecast.set_index('ds').join(test_set.set_index(0))
    predicted_df = predicted_df.dropna()
    predicted_df = predicted_df[["yhat", "price"]]
    y_pred = predicted_df['yhat'].tolist()
    y_true = predicted_df['price'].tolist()
    print("======================================================")
    print("mean squar error for " + file_name + ":",
          mean_squared_error(y_true, y_pred))
    print("======================================================\n")
    input(
        "press any key to present forecasting plot...\nand to continue close the figure."
    )

    plt.plot(predicted_df.index, y_pred, color='red', label="predicted value")
    plt.plot(predicted_df.index, y_true, color='green', label="true value")
    plt.ylim((0, 150))
    plt.legend(loc="upper left",
               bbox_to_anchor=[0, 1],
               ncol=2,
               shadow=True,
               title="Legend",
               fancybox=True)
    plt.show()
    os.system('clear')
Exemplo n.º 3
0
def weekly_runoff_forecast(filename, wtd):
    # Import raw data
    def import_data():
        raw_data_df = pd.read_excel(
            'data/' + filename + '.xlsx',
            header=0)  # creates a Pandas data frame for input value
        return raw_data_df

    # In[3]:

    raw_data_df = import_data()
    raw_data_df.head()

    # In[4]:

    raw_data_df['Date'] = pd.to_datetime(raw_data_df['Date'])

    for i in range(1, len(raw_data_df.columns)):
        raw_data_df[raw_data_df.columns[i]] = raw_data_df[
            raw_data_df.columns[i]].fillna(
                raw_data_df[raw_data_df.columns[i]].mean())

    data = pd.DataFrame()

    data['Date'] = raw_data_df["Date"]
    data['weekly runoff'] = raw_data_df["weekly runoff"]
    data = data.set_index(['Date'])
    # In[5]:
    data.head()
    # In[7]:
    data.isnull().sum()

    # In[8]:
    data.dropna().describe()

    #---------------Resampling-------------------------------

    # In[9]:
    monthly = data.resample('M').sum()
    monthly.plot(style=[':', '--', '-'], title='Monthly Trends')
    # In[10]:
    # yearly = data.resample('Y').sum()
    # yearly.plot(style=[':', '--', '-'],title='Yearly Trends')
    # # In[11]:
    # yearly.head()

    # In[12]:
    weekly = data.resample('W').sum()
    #weekly.plot(style=[':', '--', '-'],title='Weekly Trends')

    # In[13]:
    daily = data.resample('D').sum()
    #daily.rolling(30, center=True).sum().plot(style=[':', '--', '-'],title='Daily Trends')

    daily.head()

    #----------------------Scaling-----------------------------

    #Use MinMaxScaler to normalize  to range from 0 to 1
    values = daily['weekly runoff'].values.reshape(-1, 1)
    values = values.astype('float32')
    scaler = MinMaxScaler(feature_range=(0, 1))
    scaled = scaler.fit_transform(values)
    # In[27]:
    scale = daily
    scale["weekly runoff"] = scaled
    scale.head()
    # In[28]:
    scale.shape

    # In[38]:

    #----------Making data set for Testing or Training-------

    def making_dataset(i=1):
        #Testing the future prediction
        if i == 0:
            #Taking data of last two years as testing data
            df1 = scale.iloc[6940:, :]
            #Training Data
            df2 = scale.iloc[:6940, :]
            df2.reset_index(inplace=True)
            # Prophet requires columns ds (Date) and y (value)
            df2 = df2.rename(columns={'Date': 'ds', 'weekly runoff': 'y'})
            return df1, df2
        else:
            #Predicting the future values after 2018
            df2 = scale.iloc[:, :]
            df2.reset_index(inplace=True)
            # Prophet requires columns ds (Date) and y (value)
            df2 = df2.rename(columns={'Date': 'ds', 'weekly runoff': 'y'})
            return df2, df2

    # In[39]:
    df1, df2 = making_dataset(wtd)
    df2.head()
    # In[40]:
    import warnings
    warnings.simplefilter(action='ignore', category=FutureWarning)

    #----------------------Model(FbProphet)---------------------------------

    # Make the prophet model and fit on the data
    df2_prophet = fbprophet.Prophet(changepoint_prior_scale=0.05)
    df2_prophet.fit(df2)
    path = 'trained/' + filename + '_weekly_runoff'
    joblib.dump(df2_prophet, path + '.pkl')
    warnings.resetwarnings()

    #                    Making future DataFrame

    def predicting_data(i=1):
        if i == 0:
            #For testing
            # Make a future dataframe for (2 Years)
            df2_forecast = df2_prophet.make_future_dataframe(periods=30 * 25,
                                                             freq='D')
            # Make predictions
            df2_forecast = df2_prophet.predict(df2_forecast)
            df3 = df2_forecast[['ds', 'yhat']]
            df3.shape, df1.shape, df2.shape
            df4 = df3.iloc[6940:-20, :]

        else:
            #For Future prediction of 2019
            # Make a future dataframe for 12 months
            df2_forecast = df2_prophet.make_future_dataframe(
                periods=30 * 12, freq='D', include_history=False)
            # Make predictions
            df2_forecast = df2_prophet.predict(df2_forecast)
            df3 = df2_forecast[['ds', 'yhat']]
            #df3.shape,df1.shape,df2.shape
            df4 = df3.iloc[:, :]
        return df4, df2_forecast

    # In[46]:
    df4, df2_forecast = predicting_data(wtd)
    ypred = df4.iloc[:, 1:]
    ytest = df1.iloc[:, :]
    ypred.shape, ytest.shape

    df4.tail()
    # In[52]:
    ypred = df4.iloc[:, 1:]
    ytest = df1.iloc[:, :]
    ypred.shape, ytest.shape

    # In[47]:

    from sklearn.metrics import mean_absolute_error
    if wtd == 0:
        print("mean_absolute_error=", mean_absolute_error(ytest, ypred))

    # In[48]:

    df2_prophet.plot(df2_forecast, xlabel='Date', ylabel='weekly runoff')
    plt.title('simple test')

    # Plot the trends and patterns
    df2_prophet.plot_components(df2_forecast)
    df4.columns = ['Date', 'weekly runoff']

    #Getting the vaues in original range
    values = df4['weekly runoff'].values.reshape(-1, 1)
    values = values.astype('float32')
    valu = scaler.inverse_transform(values)
    df4['weekly runoff'] = valu
    df4['weekly runoff'] = abs(df4['weekly runoff'])
    df4.to_csv('data/forecast/' + filename + '_weekly_runoff_forecast.csv',
               index=False)

    return df4
# During Q2 2017, Tesla [sold](https://en.wikipedia.org/wiki/Tesla,_Inc.#Production_and_sales) 22026 cars while GM [sold](http://gmauthority.com/blog/gm/general-motors-sales-numbers/) 725000. In Q3 2017, Tesla sold 26137 cars and GM sold 808000. In all of 2017, Tesla sold 103084 cars and GM sold 3002237. That means GM was valued less than Tesla in a year during which it sold 29 times more cars than Tesla! Interesting to say the least.

# # Prophet Models

# In[185]:

import fbprophet

# Prophet requires columns ds (Date) and y (value)
gm = gm.rename(columns={'Date': 'ds', 'cap': 'y'})
# Put market cap in billions
gm['y'] = gm['y'] / 1e9

# Make the prophet models and fit on the data
# changepoint_prior_scale can be changed to achieve a better fit
gm_prophet = fbprophet.Prophet(changepoint_prior_scale=0.05)
gm_prophet.fit(gm)

# Repeat for the tesla data
tesla = tesla.rename(columns={'Date': 'ds', 'cap': 'y'})
tesla['y'] = tesla['y'] / 1e9
tesla_prophet = fbprophet.Prophet(changepoint_prior_scale=0.05,
                                  n_changepoints=10)
tesla_prophet.fit(tesla)

# ### Predictions for 2 Years

# In[186]:

# Make a future dataframe for 2 years
gm_forecast = gm_prophet.make_future_dataframe(periods=365 * 2, freq='D')
Exemplo n.º 5
0
def modeling_n_prediction(df, device, position):
    # Filter table with keyword from mobile devices and position less than equal to 10
    data = df[(df['device'] == device) & (df['position'] <= position)]
    data['rank'] = data['position'].astype(int)

    # Order the tables
    data = data.sort_values(["keyword", "date", "rank"],
                            ascending=(True, True, True))

    # Group keywords, date and rank and calculate sum of clicks and impressions
    grouped_data = data.groupby(['keyword', 'date', 'rank']).agg(
        all_clicks=pd.NamedAgg(column='clicks', aggfunc=sum),
        all_impressions=pd.NamedAgg(column='impressions', aggfunc=sum))
    grouped_data = grouped_data.reset_index()
    grouped_data = grouped_data.sort_values(["keyword", "date", "rank"],
                                            ascending=(True, True, True))
    # Get the list of unique keywords in google search console data
    mobile_keywords = grouped_data['keyword'].unique()

    grouped_data['keyword'] = grouped_data['keyword'].astype(str)

    key_date_df_list = {}
    count = 1
    for key in list(keyword_master['keywords'].unique()):
        key_df_list = {}
        print(count)
        print('Processing for keyword: ', key)
        print()

        distance = [
            dis.get_jaro_distance(key, word) for word in mobile_keywords
        ]
        distance = np.array(distance)
        cluster = np.where(distance <= 0.3)
        total_count = len(mobile_keywords[cluster]) - 1

        words = '|'.join(mobile_keywords.tolist())
        key_df = pd.DataFrame(
            columns=['keyword', 'date', 'rank', 'clicks', 'impressions'])

        dt_list = list(grouped_data['date'].drop_duplicates().astype(str))
        dt_list.sort()

        apply1(dt_list, key_df_list, grouped_data, mobile_keywords, cluster,
               key)
        temp_df = pd.DataFrame(
            columns=['keyword', 'date', 'rank', 'clicks', 'impressions'])
        for k, val in key_df_list.items():
            temp_df = pd.concat([temp_df, val], ignore_index=True)
        key_date_df_list[key] = temp_df
        count = count + 1
    t_df = pd.DataFrame(
        columns=['keyword', 'date', 'rank', 'clicks', 'impressions'])
    for k, val in key_date_df_list.items():
        t_df = pd.concat([t_df, val], ignore_index=True)
    all_ranks_df = t_df

    if (device == 'MOBILE'):
        ctrs = ctr_df[['position', 'mobile_ctr']]
    else:
        ctrs = ctr_df[['position', 'web_ctr']]

    all_ranks_df['rank'] = all_ranks_df['rank'].astype(int)
    all_ranks_df['impressions'] = all_ranks_df['impressions'].astype(float)
    all_ranks_df = pd.merge(all_ranks_df,
                            ctrs,
                            left_on="rank",
                            right_on="position")

    # Calculate the max and avg impressions for the keyword for each date
    temp_all_ranks_df = all_ranks_df.groupby(['keyword', 'date']).agg(
        avg_impressions=pd.NamedAgg(column='impressions', aggfunc=round_mean),
        max_impressions=pd.NamedAgg(column='impressions', aggfunc=round_max))
    temp_all_ranks_df = temp_all_ranks_df.reset_index()
    all_ranks_df = pd.merge(all_ranks_df,
                            temp_all_ranks_df,
                            on=['keyword', 'date'])

    # Replace NA values with avg impressions
    all_ranks_df['impressions'] = all_ranks_df['impressions'].fillna(
        all_ranks_df['avg_impressions'])
    all_ranks_df = all_ranks_df.sort_values(["keyword", "date", "rank"],
                                            ascending=(True, True, True))
    #df['First Season'] = np.where(df['First Season'] > 1990, 1, df['First Season'])
    all_ranks_df['impressions'] = np.where(
        all_ranks_df['impressions'] <= all_ranks_df['avg_impressions'],
        all_ranks_df['max_impressions'], all_ranks_df['impressions'])

    if (device == 'MOBILE'):
        all_ranks_df['clicks'] = (all_ranks_df['mobile_ctr'] *
                                  all_ranks_df['impressions']) / 100
    else:
        all_ranks_df['clicks'] = (all_ranks_df['web_ctr'] *
                                  all_ranks_df['impressions']) / 100
    all_ranks_df.clicks = all_ranks_df.clicks.round()
    all_ranks_df['clicks'] = all_ranks_df['clicks'].astype(int)

    if (device == 'MOBILE'):
        all_ranks_df['mobile_ctr'] = None
    else:
        all_ranks_df['web_ctr'] = None

    all_ranks_df['avg_impressions'] = None
    all_ranks_df['max_impressions'] = None

    all_ranks_df['keyword'] = all_ranks_df['keyword'].astype(str)
    all_ranks_df['impressions'] = all_ranks_df['impressions'].astype(int)
    all_ranks_df['date'] = all_ranks_df['date'].astype(str)

    casted_df = all_ranks_df.pivot_table(index=['keyword', 'date'],
                                         columns='rank',
                                         values=['clicks', 'impressions'])
    casted_df.columns = [
        "{0}_{1}".format(l1, l2) for l1, l2 in casted_df.columns
    ]
    casted_df = casted_df.reset_index()
    casted_df['keyword'] = casted_df['keyword'].astype('category')

    key_pred_list = {}

    for key in list(keyword_master['keywords'].unique()):
        print('Forecasting for keyword - ', key)
        print()

        pred_pos_list = {}

        for position in range(1, 11):
            print('Position - ', position)
            print()

            key_sub = casted_df[casted_df['keyword'] == key]
            key_sub['date'] = pd.to_datetime(key_sub['date'])
            clicks_trend = key_sub[['clicks_' + str(position), 'date']]
            clicks_trend.columns = ["y", "ds"]

            prediction_days = 14
            pred_len = 0
            totalRow = len(clicks_trend)
            pred_range = [totalRow - pred_len + 1, totalRow]
            pre_views = clicks_trend.head(totalRow - pred_len)
            post_views = clicks_trend.tail(pred_len)

            m = fbprophet.Prophet()
            m.fit(pre_views)
            future = m.make_future_dataframe(periods=prediction_days)
            fcast = m.predict(future)

            pred_df = fcast[['ds', 'yhat', 'yhat_lower',
                             'yhat_upper']].tail(prediction_days)
            pred_df['position'] = position
            pred_df['keyword'] = key
            pred_df.columns = [
                "date", "clicks", "clicks_lower", "clicks_upper", "position",
                "keyword"
            ]
            pred_df = pred_df[[
                "keyword", "date", "position", "clicks", "clicks_lower",
                "clicks_upper"
            ]]
            pred_df.clicks_upper = pred_df.clicks_upper.round()
            pred_df.clicks_lower = pred_df.clicks_lower.round()
            #fig1 = m.plot(fcast)

            pred_pos_list[position] = pred_df
        t1_df = pd.DataFrame(columns=[
            "keyword", "date", "position", "clicks", "clicks_lower",
            "clicks_upper"
        ])
        for k, val in pred_pos_list.items():
            t1_df = pd.concat([t1_df, val], ignore_index=True)
        key_pred_list[key] = t1_df

    print('\n')
    t2_df = pd.DataFrame(columns=[
        "keyword", "date", "position", "clicks", "clicks_lower", "clicks_upper"
    ])
    for k, val in key_pred_list.items():
        t2_df = pd.concat([t2_df, val], ignore_index=True)
    pred_key_df = t2_df
    casted_pred_df = pred_key_df.pivot_table(
        index=['keyword', 'date'],
        columns='position',
        values=['clicks', 'clicks_lower', 'clicks_upper'])
    casted_pred_df.columns = [
        "{0}_{1}".format(l1, l2) for l1, l2 in casted_pred_df.columns
    ]
    casted_pred_df = casted_pred_df.reset_index()

    casted_pred_df = pd.merge(keywords_df,
                              casted_pred_df,
                              left_on="keywords",
                              right_on="keyword")
    #  casted_df['impressions'] = np.where(all_ranks_df['impressions'] <= all_ranks_df['avg_impressions'], all_ranks_df['max_impressions'], all_ranks_df['impressions'])

    #print(casted_pred_df['date'])
    casted_pred_df['date'] = casted_pred_df['date'].astype(str)
    casted_pred_df = casted_pred_df.astype(int, errors='ignore')
    #casted_pred_df['date'] = casted_pred_df['date'].astype(str)
    num = casted_pred_df._get_numeric_data()
    num[num < 0] = 0
    #print(casted_pred_df['date'])
    casted_pred_df.to_json(r'FinalResults_UK_' + device + '.json',
                           orient='records')

    return list([casted_df, casted_pred_df])
Exemplo n.º 6
0
    encoding='utf-8',
    index_col='date',
    parse_dates=True)
print(stock_df.head())
df = stock_df.reset_index()
print(df.head())
df = df.rename(columns={'date': 'ds', 'close': 'y'})
print(df.head())
df.set_index('ds').y.plot()
plt.show()

#
# applying prophet model
#

model = Prophet.Prophet(changepoint_range=1, changepoint_prior_scale=0.05)
model.fit(df)

# Create future dataframe
future = model.make_future_dataframe(periods=90)
print(future.tail())

# Forecast for future dataframe
forecast = model.predict(future)
print(forecast.tail())
print('Forecast: \n', forecast[['ds', 'yhat', 'yhat_lower',
                                'yhat_upper']].tail())

model.plot(forecast)
model.plot_components(forecast)
plt.show()
Exemplo n.º 7
0
#df['ds'] = pd.to_datetime(df['ds'], format = '%Y%M%D')

#Set Date as index
#df = df.set_index('ds')

plt.figure(figsize=(20, 10))
plt.plot(df.ds, df.y)
plt.title('AirPassengers')

#plt.savefig('test2png.png', dpi=100)

###Prophet Model

# set the uncertainty interval to 95% (the Prophet default is 80%)
my_model = Prophet.Prophet(interval_width=0.95,
                           weekly_seasonality=True,
                           daily_seasonality=True,
                           growth='logistic')

#fit df to model
my_model.fit(df)
"""
In order to obtain forecasts of our time series, 
we must provide Prophet with a new DataFrame containing a ds column that holds 
the dates for which we want predictions. Conveniently, we do not have to concern 
ourselves with manually creating this DataFrame,
 as Prophet provides the make_future_dataframe helper function:"""

future_dates = my_model.make_future_dataframe(periods=90, freq='D')
future_dates['cap'] = 50
future_dates.tail()
Exemplo n.º 8
0
df_train = full_df.loc[(full_df['date'] < '2019-06-01')]
df_test = full_df.loc[(full_df['date'] >= '2019-06-01')]

#%%
df_train = df_train[['date', 'total_amount']]
df_test = df_test[['date', 'total_amount']]
#prophet standard
df_train.columns = ['ds', 'y']
df_test.columns = ['ds', 'y']

df_train = df_train.groupby('ds', as_index=False)['y'].sum()
df_test = df_test.groupby('ds', as_index=False)['y'].sum()

#%%
#fit a basic prophet model
model = prop.Prophet()
model.fit(df_train)

#%%
#test it with half a year
future = model.make_future_dataframe(periods=210)

forecast = model.predict(future)
fig_forecast = model.plot(forecast)

#%%
#check components
fig_components = model.plot_components(forecast)

#%%
preds = forecast.loc[forecast['ds'] >= '2019-06-01']
Exemplo n.º 9
0
def predict_caps(request):
    plt.clf()
    quandl.ApiConfig.api_key = 'hSBzwas1PTzHyjs58m3G'
    ticker2 = request.POST.get('ticker2')
    gm = quandl.get('WIKI/' + ticker2)
    gm = gm["2010":]

    # Yearly average number of shares outstanding for Tesla and GM
    tesla_shares = {
        2018: 168e6,
        2017: 162e6,
        2016: 144e6,
        2015: 128e6,
        2014: 125e6,
        2013: 119e6,
        2012: 107e6,
        2011: 100e6,
        2010: 51e6
    }

    gm_shares = {
        2018: 1.42e9,
        2017: 1.50e9,
        2016: 1.54e9,
        2015: 1.59e9,
        2014: 1.61e9,
        2013: 1.39e9,
        2012: 1.57e9,
        2011: 1.54e9,
        2010: 1.50e9
    }

    apple_shares = {
        2018: 4.927e9,
        2017: 5.252e9,
        2016: 5.5e9,
        2015: 5.793e9,
        2014: 6.123e9,
        2013: 6.522e9,
        2012: 6.617e9,
        2011: 6.557e9,
        2010: 6.473e9
    }

    company_shares = {}

    if ticker2 == 'TSLA':
        company_shares = tesla_shares
    elif ticker2 == 'GM':
        company_shares = gm_shares
    else:
        company_shares = apple_shares
    # Create a year column
    gm['Year'] = gm.index.year

    # Take Dates from index and move to Date column
    gm.reset_index(inplace=True)

    gm['cap'] = 0

    for i, year in enumerate(gm['Year']):
        # Retrieve the shares for the year
        shares = company_shares.get(year)

        # Update the cap column to shares times the price
        gm.ix[i, 'cap'] = shares * gm.ix[i, 'Adj. Close']

    # Prophet requires columns ds (Date) and y (value)
    gm = gm.rename(columns={'Date': 'ds', 'cap': 'y'})
    # Put market cap in billions
    gm['y'] = gm['y'] / 1e9

    # Make the prophet models and fit on the data
    # changepoint_prior_scale can be changed to achieve a better fit
    gm_prophet = fbprophet.Prophet(changepoint_prior_scale=0.05)
    gm_prophet.fit(gm)

    # Make a future dataframe for 2 years
    gm_forecast = gm_prophet.make_future_dataframe(periods=365 * 2, freq='D')
    # Make predictions
    gm_forecast = gm_prophet.predict(gm_forecast)
    gm_prophet.plot(gm_forecast,
                    xlabel='Date',
                    ylabel='Market Cap (billions $)')

    plt.title('Market Cap of ' + ticker2)
    fi = cStringIO.StringIO()
    plt.savefig(fi, format="png", facecolor=(0.95, 0.95, 0.95))
    plt.clf()
    return HttpResponse(fi.getvalue(), content_type="image/png")
Exemplo n.º 10
0
    ['year-month']).sum().reset_index().rename(columns={
        'year-month': 'ds',
        'active_listing_count': 'y'
    })

# In[4]:

profile = pp.ProfileReport(local_data, 'mls profile report')

# In[5]:

local_listing_sum.tail()  # looks good

# In[6]:

m = fbprophet.Prophet()
# we have an inventory floor of 0, so let's establish that
local_listing_sum['floor'] = 0
m.fit(local_listing_sum)
# future does NOT predict, it simply makes timestamps into the future on a monthly frequency
future = m.make_future_dataframe(periods=12, freq='MS')

# In[7]:

forecast = m.predict(future)
forecast[['ds', 'yhat', 'yhat_lower', 'yhat_upper']].tail(15)

# In[8]:

fig1 = m.plot(forecast)
decomp_plot = m.plot_components(forecast)
Exemplo n.º 11
0
con = None
con = psycopg2.connect(database = dbname, user = username)

for site in site_no :
    sql_query = """
    SELECT * FROM n"""+site+""";
    """
    site_data_from_sql = pd.read_sql_query(sql_query,con)
    nonzero_mean = site_data_from_sql[ site_data_from_sql.y != 0 ].mean()
    site_data_from_sql.loc[ site_data_from_sql.y == 0, "y" ] = nonzero_mean

    df_site = site_data_from_sql
    df_site = df_site.rename(columns={'datetime':'ds'})
    df_site['y'] = np.log(df_site['y'])
    
    df_prophet = fbprophet.Prophet(changepoint_prior_scale=0.05, yearly_seasonality=True, interval_width = 0.75)
    df_prophet.fit(df_site)
    
    df_forecast = df_prophet.make_future_dataframe(periods=450 * 1, freq='D')
    df_forecast = df_prophet.predict(df_forecast)
    
    df_site.set_index('ds', inplace=True)
    df_forecast.set_index('ds', inplace=True)

    site_data_from_sql = pd.DataFrame(site_data_from_sql)

    site_data_from_sql.set_index('datetime', inplace=True)

    viz_df = site_data_from_sql.join(df_forecast[['yhat', 'yhat_lower','yhat_upper']], how = 'outer')

    viz_df['ds'] = viz_df.index
Exemplo n.º 12
0
    def run(self):
        model = fbprophet.Prophet(
            daily_seasonality=False,
            weekly_seasonality=False,
            yearly_seasonality=True,
            changepoint_prior_scale=0.05,
            changepoints=None,
        )
        model.add_seasonality(name="monthly", period=30.5, fourier_order=5)
        stock = pdr.get_data_yahoo(self.mainwindow.ui.lineEdit.text(),
                                   start=datetime.datetime(2006, 10, 1),
                                   end=datetime.datetime.now()).reset_index()
        stock["ds"] = stock["Date"]
        stock["y"] = stock["Adj Close"]
        max_date = max(stock["Date"])
        # Fit on the stock history for self.training_years number of years
        stock_history = stock[stock["Date"] > (max_date -
                                               pd.DateOffset(years=3))]

        model.fit(stock_history)

        # Make and predict for next year with future dataframe
        future = model.make_future_dataframe(periods=self.days, freq="D")
        future = model.predict(future)

        title = "%s Historical and Predicted Stock Price" % get_company_name(
            self.mainwindow.ui.lineEdit.text())

        ##-- plotly
        self.fig_model = go.Figure([
            go.Scatter(x=stock_history["ds"],
                       y=stock_history["y"],
                       mode="lines",
                       opacity=0.8,
                       name="Observations",
                       line=dict(width=2, color='Black')),
            go.Scatter(x=future["ds"],
                       y=future["yhat"],
                       name="Modeled",
                       mode="lines",
                       line=dict(width=2.4, color='Green')),
            go.Scatter(name="Upper Bound",
                       x=future["ds"],
                       y=future["yhat_upper"],
                       mode="lines",
                       showlegend=False,
                       line=dict(color='Green')),
            go.Scatter(name="Lower Bound",
                       x=future["ds"],
                       y=future["yhat_lower"],
                       mode="lines",
                       fillcolor='rgba(67,255,1, 0.3)',
                       fill='tonexty',
                       showlegend=False,
                       line=dict(color='Green'))
        ])
        self.fig_model.update_layout(
            title=title,
            # xaxis_title="Время",
            yaxis_title="Цена, $",
            font=dict(family="Courier New, monospace",
                      size=14,
                      color="RebeccaPurple"))
Exemplo n.º 13
0
spain_ts = spain_counts.set_index('incident_date')

spain_ts = spain_ts.reindex(idx, fill_value=0)
spain_ts.head()

spain_ts.describe()

import fbprophet
import numpy as np

spain_fb = spain_ts.copy()
spain_fb['index1'] = spain_fb.index
spain_fb.columns = ['y', 'ds']

spain_fb.head()
prophet1 = fbprophet.Prophet(changepoint_prior_scale=0.15,
                             daily_seasonality=True)
prophet1.fit(spain_fb)
# Specify 365 days out to predict
future_data = prophet1.make_future_dataframe(periods=1460, freq='D')

# Predict the values
forecast_data = prophet1.predict(future_data)
print(forecast_data[['ds', 'yhat', 'yhat_lower', 'yhat_upper']])
print('--*-- Convertions --*--')
print(np.exp(forecast_data[['yhat', 'yhat_lower', 'yhat_upper']]))

prophet1.plot(forecast_data, xlabel='Date', ylabel='Attacks')
plt.title('Predicted Terrorist Attacks in World', fontsize=10)
plt.savefig("Global.png")

prophet1.plot_components(forecast_data)
Exemplo n.º 14
0
 def __init__(self):
     super().__init__()
     self.model = fbp.Prophet()
     self.y = None
Exemplo n.º 15
0
daily_df = daily_df.reset_index()

daily_df = daily_df.rename(columns={'DateTime': 'ds', 'KWH_hh': 'y'})

display(daily_df.head())

# In[84]:

# In prophet, the changepoint_prior_scale parameter is used
# to control how sensitive the trend is to changes,
# with a higher value being more sensitive and
# a lower value less sensitive.

# https://facebook.github.io/prophet/docs/trend_changepoints.html

prophet_inst = fbprophet.Prophet(changepoint_prior_scale=0.10)
prophet_inst.fit(daily_df)

# In[86]:

forecast = prophet_inst.make_future_dataframe(periods=30 * 2, freq='D')

forecast = prophet_inst.predict(forecast)

# In[88]:

# The black dots represent the actual values
# the blue line indicates the forecasted values
# the light blue shaded region is the uncertainty.

prophet_inst.plot(forecast)
Exemplo n.º 16
0
def predict_testcounts(
    testcounts: pandas.Series,
    *,
    country: str,
    region: typing.Optional[typing.Union[str, typing.List[str]]],
    regional_holidays: bool = False,
    keep_data: bool,
    ignore_before: typing.Optional[typing.Union[datetime.datetime,
                                                pandas.Timestamp, str]] = None,
    **kwargs,
) -> ForecastingResult:
    """ Predict/smooth missing test counts with Prophet.

    Implemented by Laura Helleckes and Michael Osthege.

    Parameters
    ----------
    testcounts : pandas.Series
        date-indexed series of observed testcounts
    country : str
        name or short code of country (as used by https://github.com/dr-prodigy/python-holidays)
    region : optional, [str]
        if None or []: only nation-wide
        if "all": nation-wide and all regions
        if "CA": nation-wide and those for region "CA"
        if ["CA", "NY", "FL"]: nation-wide and those for all listed regions
    regional_holidays: bool, default False
        if True, fetch regional holidays for each region, if `region` is not set to None or to only
        one region.
        if False (default), fetch only national holidays (useful for countries where test data is
        available at the regional-level, but which only have national holidays).
    keep_data : bool
        if True, existing entries are kept
        if False, existing entries are also predicted, resulting in a smoothed profile
    ignore_before : timestamp
        all dates before this are ignored
        Use this argument to prevent an unrealistic upwards trend due to initial testing ramp-up
    **kwargs
        optional kwargs for the `fbprophet.Prophet`. For example:
        * growth: 'linear' or 'logistic' (default)
        * seasonality_mode: 'additive' or 'multiplicative' (default)

    Returns
    -------
    result : pandas.Series
        the date-indexed series of smoothed/predicted testcounts
    m : fbprophet.Prophet
        the phophet model
    forecast : pandas.DataFrame
        contains the model prediction
    holidays : dict of { datetime : str }
        dictionary of the holidays that were used in the model
    """
    testcounts.index.name = "date"
    testcounts.name = "total"
    if not ignore_before:
        ignore_before = testcounts.index[0]

    mask_fit = testcounts.index >= ignore_before
    if keep_data:
        mask_predict = numpy.logical_and(testcounts.index >= ignore_before,
                                         numpy.isnan(testcounts.values))
    else:
        mask_predict = testcounts.index >= ignore_before

    years = set([testcounts.index[0].year, testcounts.index[-1].year])
    regions = numpy.atleast_1d(region)

    if region != "all" and len(regions) <= 1 and regional_holidays:
        raise ValueError(
            "Predicting test counts only at national level or for one region only. "
            "Can't ask for regional holiday. Set `regional_holidays` kwarg to False."
        )
    # need last condition because some countries only national holidays for all regions:
    if (region == "all" or len(regions) > 1) and regional_holidays:
        # distinguish between national and regional holidays
        all_holidays = get_holidays(country, region, years=years)
        national_holidays = get_holidays(country, region=None, years=years)

        holiday_df = pandas.DataFrame(
            data=[(
                date,
                name,
                "national" if date in national_holidays.keys() else "regional",
            ) for date, name in all_holidays.items()],
            columns=["ds", "name", "holiday"],
        )
    else:
        # none, or only one region -> no distinction between national/regional holidays
        all_holidays = get_holidays(country, region=None, years=years)
        holiday_df = pandas.DataFrame(
            dict(
                holiday="holiday",
                name=list(all_holidays.values()),
                ds=pandas.to_datetime(list(all_holidays.keys())),
            ))

    # Config settings of forecast model
    days = (testcounts.index[-1] - testcounts.index[0]).days
    prophet_kwargs = dict(
        growth="logistic",
        seasonality_mode="multiplicative",
        daily_seasonality=False,
        weekly_seasonality=True,
        yearly_seasonality=False,
        holidays=holiday_df,
        mcmc_samples=500,
        # restrict number of potential changepoints:
        n_changepoints=int(numpy.ceil(days / 30)),
    )
    # override defaults with user-specified kwargs
    prophet_kwargs.update(kwargs)
    m = fbprophet.Prophet(**prophet_kwargs)

    # fit only the selected subset of the data
    df_fit = (testcounts.loc[mask_fit].reset_index().rename(columns={
        "date": "ds",
        "total": "y"
    }))

    if prophet_kwargs["growth"] == "logistic":
        cap = numpy.max(testcounts) * 1
        df_fit["floor"] = 0
        df_fit["cap"] = cap
    m.fit(df_fit)

    # predict for all dates in the input
    df_predict = testcounts.reset_index().rename(columns={"date": "ds"})
    if prophet_kwargs["growth"] == "logistic":
        df_predict["floor"] = 0
        df_predict["cap"] = cap
    forecast = m.predict(df_predict)

    # make a series of the result that has the same index as the input
    result = pandas.Series(index=testcounts.index,
                           data=testcounts.copy().values,
                           name="testcount")
    result.loc[mask_predict] = numpy.clip(
        forecast.set_index("ds").yhat, 0, forecast.yhat.max())
    # full-length result series, model and forecast are returned
    return result, m, forecast, all_holidays
Exemplo n.º 17
0
logger.info('Bitcoin Analysis\n')
logger.info('Time period: %s to %s' % (btc_data_year.index[0].strftime('%Y-%m-%d'), btc_data_year.index[-1].strftime('%Y-%m-%d')))
logger.info('Weight Price Stats')
logger.info('Max: %.2f, Min: %.2f, Mean: %.2f, Std: %.2f' % (btc_year_price_stats['Weighted Price']['max'], btc_year_price_stats['Weighted Price']['min'], btc_year_price_stats['Weighted Price']['mean'], btc_year_price_stats['Weighted Price']['std']))

# Perform basic timeseries analysis using fbprophet
btc_fbprohpet = btc_data.copy()
btc_fbprohpet.reset_index(level=0, inplace=True)
btc_fbprohpet = btc_fbprohpet.rename(columns={'Date': 'ds'})
# Drop zero values
btc_fbprohpet = btc_fbprohpet[btc_fbprohpet['Weighted Price'] != 0]

# Fit to the log of the data
btc_fbprohpet['y'] = np.log(btc_fbprohpet['Weighted Price'])
btc_data_prophet_log = fbprophet.Prophet(yearly_seasonality=True, weekly_seasonality=True, changepoint_prior_scale=0.15)
# Pickle
if os.path.isfile(directory+'Data/Models/fbprophet_logweightprice.model.sav'):
    btc_data_prophet_log = pickle.load(open(directory+'Data/Models/fbprophet_logweightprice.model.sav', 'rb'))
else:
    btc_data_prophet_log.fit(btc_fbprohpet)
    pickle.dump(btc_data_prophet_log, open(directory+'Data/Models/fbprophet_logweightprice.model.sav', 'wb'))
btc_data_forecast_log = btc_data_prophet_log.make_future_dataframe(periods=365*2, freq='D')
btc_data_forecast_log = btc_data_prophet_log.predict(btc_data_forecast_log)

# Identify change points
btc_changepoints = btc_data_prophet_log.changepoints
# Work out if they are +ve or -ve
c_data = btc_fbprohpet.ix[btc_changepoints, :]
deltas = btc_data_prophet_log.params['delta'][0]
c_data['delta'] = deltas
 # set out route for forecast tables
 out = []
 # set out route for 2016 & 2017 Train predictions
 train_preds = []
 # make DataFrame of column values as datetime
 datetimes = pd.DataFrame(data=pd.to_datetime(pd.Series(data=train_df.columns)))
 # go though each place in train_df
 for i in range(len(train_df)):
     if i % 5 == 0:
         print(i)
     # extract DataFrame for that place
     df = train_df.iloc[i]
     # add datetime values to DataFrame
     df = pd.concat([df.reset_index(),datetimes],axis=1)
     # use fbprophet to make Prophet model
     place_prophet = fbprophet.Prophet(changepoint_prior_scale=0.1)
     # rename Place df's columns to agree with prophet formatting
     df.columns = ['drop','y','ds']
     # adjust df ; forget index column (drop)
     df = df[['ds','y']]
     # fit place on prophet model 
     place_prophet.fit(df)
     # make a future dataframe for 2016 & 2017 years
     place_forecast = place_prophet.make_future_dataframe( periods=30, freq='Y' )
     # establish predictions
     forecast = place_prophet.predict(place_forecast)
     # tag and bag (forecast table)
     out.append(forecast)
     # store 2016 and 2017 predictions
     train_preds.append([
         forecast.loc[forecast.ds == '2016-12-31'].yhat.values[0],
Exemplo n.º 19
0
    def create_prophet_model(self, days=0, resample=False):

        # self.reset_plot()
        try:
            # UIFunctions.simple_strategy_utils_not_visible()

            self.ui.lineEdit_long.setVisible(False)
            self.ui.spinBox_long.setVisible(False)
            self.ui.horizontalSlider_long.setVisible(False)
            self.ui.lineEdit_short.setVisible(False)
            self.ui.spinBox_short.setVisible(False)
            self.ui.horizontalSlider_short.setVisible(False)
            self.ui.spinBox_fbprophet.setVisible(False)
            self.ui.comboBox.setVisible(False)
            self.ui.button_fb.setVisible(True)
            self.ui.spinBox_fbprophet.setVisible(True)
            model = fbprophet.Prophet(
                daily_seasonality=False,
                weekly_seasonality=False,
                yearly_seasonality=True,
                changepoint_prior_scale=0.05,
                changepoints=None,
            )

            if True:
                # Add monthly seasonality
                model.add_seasonality(name="monthly",
                                      period=30.5,
                                      fourier_order=5)

            stock = pdr.get_data_yahoo(
                self.ui.lineEdit.text(),
                start=datetime.datetime(2006, 10, 1),
                end=datetime.datetime.now()).reset_index()
            stock["ds"] = stock["Date"]
            stock["y"] = stock["Adj Close"]
            training_years = 3
            max_date = max(stock["Date"])
            # Fit on the stock history for self.training_years number of years
            stock_history = stock[stock["Date"] > (
                max_date - pd.DateOffset(years=training_years))]

            model.fit(stock_history)

            # Make and predict for next year with future dataframe
            future = model.make_future_dataframe(periods=days, freq="D")
            future = model.predict(future)

            if days > 0:
                title = "%s Historical and Predicted Stock Price" % self.ui.lineEdit.text(
                )
            else:
                title = "%s Historical and Modeled Stock Price" % self.ui.lineEdit.text(
                )

            ##-- plotly
            fig_model = go.Figure([
                go.Scatter(x=stock_history["ds"],
                           y=stock_history["y"],
                           mode="lines",
                           opacity=0.8,
                           name="Observations",
                           line=dict(width=1.4, color='Black')),
                go.Scatter(x=future["ds"],
                           y=future["yhat"],
                           name="Modeled",
                           mode="lines",
                           line=dict(width=2.4, color='Green')),
                go.Scatter(name="Upper Bound",
                           x=future["ds"],
                           y=future["yhat_upper"],
                           mode="lines",
                           showlegend=False,
                           line=dict(color='Green')),
                go.Scatter(name="Lower Bound",
                           x=future["ds"],
                           y=future["yhat_lower"],
                           mode="lines",
                           fillcolor='rgba(67,255,1, 0.3)',
                           fill='tonexty',
                           showlegend=False,
                           line=dict(color='Green'))
            ])
            fig_model.update_layout(title=title,
                                    xaxis_title="Date",
                                    yaxis_title="Price",
                                    font=dict(family="Courier New, monospace",
                                              size=14,
                                              color="RebeccaPurple"))
            self.ui.browser.setHtml(fig_model.to_html(include_plotlyjs='cdn'))
            self.ui.stackedWidget.setCurrentWidget(self.ui.browser)
        except Exception as e:
            pass
plt.figure(figsize=(10, 8))
plt.plot(cars['Date'], cars['gm_cap'], 'b-', label='GM')
plt.plot(cars['Date'], cars['tesla_cap'], 'r-', label='TESLA')
plt.xlabel('Date')
plt.ylabel('Market Cap (Billions $)')
plt.title('Market Cap of GM and Tesla')
# plt.show()

# Prophet requires columns ds (Date) and y (value)
gm = gm.rename(columns={'Date': 'ds', 'cap': 'y'})
tesla = tesla.rename(columns={'Date': 'ds', 'cap': 'y'})
# Put market cap in billions
gm['y'] = gm['y'] / 1e9
tesla['y'] = tesla['y'] / 1e9
# Make the prophet model and fit on the data
gm_prophet = fbprophet.Prophet(changepoint_prior_scale=0.15)
gm_prophet.fit(gm)
tesla_prophet = fbprophet.Prophet(changepoint_prior_scale=0.15)
tesla_prophet.fit(tesla)

# Make a future dataframe for 2 years
gm_forecast = gm_prophet.make_future_dataframe(periods=365 * 2, freq='D')
tesla_forecast = tesla_prophet.make_future_dataframe(periods=365 * 2, freq='D')
# Make predictions
gm_forecast = gm_prophet.predict(gm_forecast)
tesla_forecast = tesla_prophet.predict(tesla_forecast)

gm_prophet.plot(gm_forecast, xlabel='Date', ylabel='Market Cap (billions $)')
tesla_prophet.plot(tesla_forecast,
                   xlabel='Date',
                   ylabel='Market Cap (billions $)')
import numpy as np
import matplotlib.pyplot as plt
import tensorflow as tf
import fbprophet

df = pd.read_csv('2330_v1.csv',
                 header=0,
                 sep='\t',
                 delimiter='\t',
                 encoding='utf-8')
df['date'] = pd.to_datetime(df['date'], format='%Y/%m/%d')
df.set_index('date')

# The adjusted close accounts for stock splits, so that is what we should graph
plt.plot(df.index, df['close'])
plt.title('TW 2330 Stock Price')
plt.ylabel('Price ($)')
plt.show()

df.columns = ['ds', 'y']
df.tail()

df_new = df.iloc[:, :]
m = fbprophet.Prophet(changepoint_prior_scale=0.95)
m.fit(df_new)
future = m.make_future_dataframe(periods=90)
forecast = m.predict(future)
m.plot(forecast)
#m.plot_components(forecast)
plt.show()
Exemplo n.º 22
0
 def prophet_model(self):
     model = fbprophet.Prophet(daily_seasonality=False,
                               yearly_seasonality=False)
     return model
Exemplo n.º 23
0
df = sids.get_historical(fields, start_date, end_date)
df = df.resample('B').last()
df = df.interpolate('linear')
df = df.fillna(method = 'ffill')

df.columns = df.columns.droplevel(-1)
#df = df.stack(level = 0, dropna=False)
#df['y_orig'] = df['CPI YOY Index']
#df['CPI YOY Index'] = np.log(df['CPI YOY Index'])

df = df.rename(columns={'date': 'ds', 'CPI YOY Index': 'y'})
df['ds'] = df.index

df_prophet = fbprophet.Prophet(interval_width = 0.80
                 #weekly_seasonality=False, yearly_seasonality=True,
                 ).fit(df)
#df_prophet.add_regressor(df['VOLUME'])
#df_prophet.fit(df)

# Make a future dataframe for 2 years
df_forecast = df_prophet.make_future_dataframe(periods=180) #freq = 'MS')
# Make predictions
df_forecast = df_prophet.predict(df_forecast)

fig = df_prophet.plot(df_forecast, xlabel = 'Date', ylabel = 'CPI')
plt.title('CPI Price Action')
#for cp in df_prophet.changepoints:
#    plt.axvline(cp, c='gray', ls = '--', lw=2)

#df_cv = cross_validation(df_prophet, horizon = '30 days')
Exemplo n.º 24
0
import pandas
from matplotlib import pyplot as plt
import fbprophet

RATES = pandas.read_csv('/storage/bin/exrates.csv', header=0)
RATES['ds'] = pandas.to_datetime(RATES['time.time'], unit='s')
RATES['y'] = RATES['www.exchangerates.org.uk']
maxts = max(RATES['time.time'])
RATES = RATES.drop(['hryvna.today', 'time.time', 'www.exchangerates.org.uk'],
                   axis=1)

model = fbprophet.Prophet()
model.fit(RATES)
future = list()
for i in range(1, 7):  # 10 days
    future.append(maxts + 86400 * i)

future = pandas.DataFrame(future)
current = pandas.DataFrame(RATES['ds'])

future.columns = ['ds']
future['ds'] = pandas.to_datetime(future['ds'], unit='s')

data = pandas.concat([current, future], axis=0)
forecast = model.predict(data)
model.plot(forecast, xlabel='Date', ylabel='UAHs for 1 USD')

plt.show()
Exemplo n.º 25
0
    def fit(self,
            X,
            y=None,
            time_col=TIME_COL,
            value_col=VALUE_COL,
            **fit_params):
        """Fits fbprophet model.

        Parameters
        ----------
        X : `pandas.DataFrame`
            Input timeseries, with timestamp column,
            value column, and any additional regressors.
            The value column is the response, included in
            X to allow transformation by `sklearn.pipeline.Pipeline`
        y : ignored
            The original timeseries values, ignored.
            (The y for fitting is included in ``X``.)
        time_col : `str`
            Time column name in ``X``
        value_col : `str`
            Value column name in ``X``
        fit_params : `dict`
            additional parameters for null model

        Returns
        -------
        self : self
            Fitted model is stored in ``self.model``.
        """
        super().fit(X,
                    y=y,
                    time_col=time_col,
                    value_col=value_col,
                    **fit_params)

        if self.add_regressor_dict is None:
            fit_columns = [time_col, value_col]
        else:
            reg_cols = list(self.add_regressor_dict.keys())
            fit_columns = [time_col, value_col] + reg_cols

        fit_df = X.reset_index(drop=True)[fit_columns]
        fit_df.rename(columns={time_col: "ds", value_col: "y"}, inplace=True)
        # Prophet expects these column names. Other estimators can use TIME_COL, etc.
        # uses coverage instead of interval_width to set prediction band width. This ensures a common
        # interface for parameters common to every BaseForecastEstimator, usually also needed for forecast evaluation
        # model must be initialized here, not in __init__, to update parameters in grid search
        self.model = fbprophet.Prophet(
            growth=self.growth,
            changepoints=self.changepoints,
            n_changepoints=self.n_changepoints,
            changepoint_range=self.changepoint_range,
            yearly_seasonality=self.yearly_seasonality,
            weekly_seasonality=self.weekly_seasonality,
            daily_seasonality=self.daily_seasonality,
            holidays=self.holidays,
            seasonality_mode=self.seasonality_mode,
            seasonality_prior_scale=self.seasonality_prior_scale,
            holidays_prior_scale=self.holidays_prior_scale,
            changepoint_prior_scale=self.changepoint_prior_scale,
            mcmc_samples=self.mcmc_samples,
            interval_width=self.coverage,
            uncertainty_samples=self.uncertainty_samples)
        # if extra regressors are given, we add them to temporal features data
        # This implementation assumes that the regressor(s) are provided in time series df, alongside target column.
        if self.add_regressor_dict is not None:
            for reg_col, reg_params in self.add_regressor_dict.items():
                self.model.add_regressor(name=reg_col, **reg_params)

        # if custom seasonality is provided, we supply it to Prophet model
        if self.add_seasonality_dict is not None:
            for seasonality_type, seasonality_params in self.add_seasonality_dict.items(
            ):
                self.model.add_seasonality(name=seasonality_type,
                                           **seasonality_params)

        self.model.fit(fit_df)
        return self
Exemplo n.º 26
0
    1,
})
for j in states:
    df2 = pd.read_csv('Statewise/' + j + '.csv')
    # Prophet requires columns ds (Date) and y (value)
    df2 = df2.rename(columns={'Date': 'ds', 'Confirmed': 'y'})
    # Put market cap in billions
    #gm['y'] = gm['y'] / 1e9
    # Make the prophet model and fit on the data
    df_prophet = fbprophet.Prophet(changepoint_prior_scale=0.6,
                                   holidays=holidays,
                                   holidays_prior_scale=40,
                                   seasonality_mode='multiplicative',
                                   seasonality_prior_scale=10,
                                   daily_seasonality=False,
                                   yearly_seasonality=False,
                                   weekly_seasonality=False).add_seasonality(
                                       name='daily',
                                       period=1,
                                       fourier_order=15).add_seasonality(
                                           name='incubation_period',
                                           period=5,
                                           fourier_order=20)

    df_prophet.fit(df2)
    # Python
    future = df_prophet.make_future_dataframe(periods=10, freq='D')
    forecast = df_prophet.predict(future)
    df_prophet.plot(forecast)
    cv = cross_validation(df_prophet,
                          initial=str(len(df2) - 5) + ' days',
                          horizon='2 days')
Exemplo n.º 27
0
import pandas as pd
import matplotlib.pyplot as plt
import fbprophet
df = pd.read_html(
    'https://raw.githubusercontent.com/diazonic/vegetables_price_forecast/main/Grlic.html'
)[-1]
df.drop(df.tail(1).index, inplace=True)
df['Modal Price (Rs/q)'] = df['Modal Price (Rs/q)'].astype(int)
df['date'] = df['Month Name'] + '-' + df['Year']
df['date'] = pd.to_datetime(df['date'])
df_new = df[['date', 'Modal Price (Rs/q)']]
df_new = df_new.rename(columns={'date': 'ds', 'Modal Price (Rs/q)': 'y'})
df_new = df_new.sort_values(by='ds')
plt.plot(df_new['ds'], df_new['y'])
plt.show()
m = fbprophet.Prophet(changepoint_prior_scale=1, seasonality_prior_scale=1)
m.fit(df_new)
future = m.make_future_dataframe(periods=365)
forecast = m.predict(future)
forecast[['ds', 'yhat', 'yhat_lower', 'yhat_upper']].tail()
fig1 = m.plot(forecast, xlabel='Date', ylabel='Price of Potato')

plt.figure(figsize=(20, 10))
plt.scatter(df_new['ds'], df_new['y'], s=5, c='k')
plt.plot(forecast['ds'], forecast['yhat'])
Exemplo n.º 28
0
def population_by_place(years=20,
                        n_places=1000,
                        changepoint_prior=0.15,
                        indicate=False,
                        time=False):
    # total population by place (1970 to 2010)
    pop_by_place = pd.read_csv(
        '../../data/NHGIS/nhgis0002_csv/nhgis0002_ts_nominal_place.csv',
        encoding='ISO-8859-1')
    """
    inputs) 
    >> years
        > number of years to forecast
    >> places
        > number of places to forecast +1 
            >> e.g. 99 = first 100 places (max==25102)
    >> changepoint_prior
        > set changepoint_prior_scale for prophet model
    >> indicate
        > default False
        > if True, print number of place forecasted after each forecast
    >> time
        > default False
        > if True, prints time the function took to run right before returning output
    
    function: 
    >> generate DataFrame of population:
        > from 1970 to 2010
        > by unique place (use NHGISCODE as Id)
    >> drop 
        > places with less than 2 measurements
            > can only predict places which have been measured 2+ times 
    >> extract list of places
        > each as a DataFrame ready for prediction 
        > column0='ds' , column1='y'
    >> make and fit prophet model on each place
    >> return prophet model's predictions
        > of each place
        > for {years} years
    """
    if time == True:
        import time
        now = time.time()

    # df by NHGISCODE with measurements by decade (31436 rows × 5 columns)
    unique_places = pop_by_place.copy()[[
        'NHGISCODE', 'AV0AA1970', 'AV0AA1980', 'AV0AA2000', 'AV0AA2010'
    ]]

    # drop NaN rows @ thresh = 3 due to NHGISCODE being non-NaN (25103 rows × 5 columns ; 6333 non-measurable)
    measureable_unique_places = unique_places.dropna(axis=0, thresh=3)
    # convert NaN values to 0 (note: there are 270 'dead' counties ('A00AA2010' == 0))
    measureable_unique_places = measureable_unique_places.fillna(0)

    # generate list of remaining NHGISCODE codes
    codes_of_measureable_unique_places = [
        code for code in measureable_unique_places.NHGISCODE
    ]
    # drop NHGISCODE column (25103 rows × 4 columns)
    measureable_unique_places = measureable_unique_places.drop('NHGISCODE',
                                                               axis=1)

    # list of str column names as years (for conversion to datetime)
    year_only_columns = [i[5:] for i in measureable_unique_places.columns]
    # convert year_only_columns to DatetimeIndex of Timestamps
    dt_columns = pd.to_datetime(arg=year_only_columns)

    # convert dt_columns into dataframe
    datetime_df = pd.DataFrame(dt_columns).T
    # w/ columns, so concatable with measureable_unique_counties
    datetime_df.columns = measureable_unique_places.columns

    # generate list of remaining places (each as pd.Series)
    dfs_of_measureable_unique_places = [
        measureable_unique_places.iloc[place]
        for place in range(len(measureable_unique_places))
    ]

    # add datetime_df to each dataframe as first row
    prophet_places = [
        pd.concat((datetime_df, pd.DataFrame(place).T), axis=0)
        for place in dfs_of_measureable_unique_places
    ]
    # then transpose to 2 rows x 23 columns
    prophet_almost_ready_places = [place.T for place in prophet_places]

    # set collection of prophets
    prophet_by_place = []

    # run prophet model on first 1000 places
    for place in range(len(prophet_almost_ready_places[:n_places])):
        # make the prophet model
        place_prophet = fbprophet.Prophet(
            changepoint_prior_scale=changepoint_prior)
        # identify county
        a = prophet_almost_ready_places[place]
        # rename place df's columns to agree with prophet formatting
        a.columns = ['ds', 'y']
        # fit place on prophet model
        b = place_prophet.fit(a)
        # make a future dataframe for 20 years
        place_forecast = place_prophet.make_future_dataframe(periods=1 * years,
                                                             freq='Y')
        # establish predictions
        place_forecast = place_prophet.predict(place_forecast)
        # add to collection
        prophet_by_place.append(place_forecast)
        # did we ask for indication (hint: do this if calculating for > 1000 places unless you enjoy anxiety)
        if indicate == True:
            # let us know the count
            print(place)

    if time == True:
        then = time.time()
        print(f'now = {now}\nthen = {then}\ntime = {now-then}')

    # return forecasts
    return prophet_by_place
    first_date.date(), last_date.date()))
'''During Q2 2017, Tesla sold 22026 cars while GM sold 725000. In Q3 2017, Tesla sold 
   26137 cars and GM sold 808000. In all of 2017, Tesla sold 103084 cars and GM sold 3002237.
   That means GM was valued less than Tesla in a year during which it sold 29 times more cars 
   than Tesla! Interesting to say the least.'''

import fbprophet

# Prophet requires columns ds (Date) and y (value)
gm = gm.rename(columns={'Date': 'ds', 'cap': 'y'})
# Put market cap in billions
gm['y'] = gm['y'] / 1e9

# Make the prophet models and fit on the data
# changepoint_prior_scale can be changed to achieve a better fit
gm_prophet = fbprophet.Prophet(changepoint_prior_scale=0.05)
gm_prophet.fit(gm)

# Repeat for the tesla data
tesla = tesla.rename(columns={'Date': 'ds', 'cap': 'y'})
tesla['y'] = tesla['y'] / 1e9
tesla_prophet = fbprophet.Prophet(changepoint_prior_scale=0.05,
                                  n_changepoints=10)
tesla_prophet.fit(tesla)

# Make a future dataframe for 2 years
gm_forecast = gm_prophet.make_future_dataframe(periods=365 * 2, freq='D')
# Make predictions
gm_forecast = gm_prophet.predict(gm_forecast)

tesla_forecast = tesla_prophet.make_future_dataframe(periods=365 * 2, freq='D')
Exemplo n.º 30
0
plt.legend()

import numpy as np

# The time when Reliance was valued the highest till now
highest_date = market_cap.loc[market_cap['cap'].idxmax(), 'Date']
print("Reliance was valued the highest in {} .".format(highest_date.date()))

import fbprophet
# Prophet requires columns ds (Date) and y (value)
reliance = reliance.rename(columns={
    'Date': 'ds',
    'cap': 'y'
})  # Put market cap in Rupees
reliance[
    'y'] = reliance['y'] / 1e9  # Make the prophet model and fit on the data
reliance_prophet = fbprophet.Prophet(changepoint_prior_scale=0.15)
reliance_prophet.fit(reliance)

# Make a future dataframe for 2 years
reliance_forecast = reliance_prophet.make_future_dataframe(
    periods=365 * 2, freq='D')  # Make predictions
reliance_forecast = reliance_prophet.predict(reliance_forecast)

reliance_prophet.plot(reliance_forecast,
                      xlabel='Date',
                      ylabel='Market Cap (Rupees)')
plt.title('Market Cap of Reliance')

# Plot the trends and patterns
reliance_prophet.plot_components(reliance_forecast)