Exemplo n.º 1
0
def run():
    journal = ledger.read_journal("./secret/ledger.dat")
    last_post = None
    amount = 0

    for post in journal.query(""):
        if last_post == None or post.date == last_post.date:
            if str(post.amount.commodity) != "£":
                continue
            amount = amount + post.amount
        else:
            print post.date, ",", amount
            amount = 0
        last_post = post

    df = pd.read_csv('./testing.csv')
    df['y'] = np.multiply(100, df['y'])

    m = Prophet()
    m.fit(df);

    forecast = m.predict(future)
    forecast[['ds', 'yhat', 'yhat_lower', 'yhat_upper']].tail()

    m.plot(forecast);
    m.plot_components(forecast);
Exemplo n.º 2
0
forecast[['ds', 'yhat', 'yhat_lower', 'yhat_upper']].tail(10)

y = np.array(forecast['yhat'])[-10:]
y_l = np.array(forecast['yhat_lower'])[-10:]
y_u = np.array(forecast['yhat_upper'])[-10:]
for i in range(len(y)):
  y[i] = int (y[i])
  y_l[i] = int (y_l[i])
  y_u[i] = int (y_u[i])
print(y)
print(y_l)
print(y_u)

future_plot = ph.plot(forecast, figsize=(14,8), xlabel='days', ylabel='cases')

trends_weekly_dayly_plot = ph.plot_components(forecast, figsize=(14,8))

!pip install statsmodels

#evaluate an ARIMA model using a walk-forward validation
from pandas import read_csv
from pandas import datetime
from matplotlib import pyplot
import statsmodels.api as sm
#import statsmodels.tsa.arima #.model import ARIMA
from sklearn.metrics import mean_squared_error
from math import sqrt
data_a = Daily_World_cases
labels = np.array(data_a)
size = int(len(labels) * 0.9)
train, test = labels[0:size], labels[size:len(labels)]
Exemplo n.º 3
0
            yearly_seasonality=True,
            weekly_seasonality=True,
            daily_seasonality=False,

            seasonality_prior_scale=0.1,
            interval_width=0.95,
            holidays_prior_scale=10,
            changepoint_prior_scale=0.15)

model.add_country_holidays(country_name='JP')
model.fit(df_train)

forecast = model.predict(df)
forecast[['ds','yhat']].head()

model.plot_components(forecast)

fig, ax = plt.subplots(figsize=(15,5))
ax.plot(df_train['ds'], df_train['y'], c='grey', marker='o', ms=3, linestyle='-', label='Train')
ax.plot(df_test['ds'], df_test['y'], c='red', marker='o',ms=3, linestyle='-', label='Test')
ax.plot(forecast['ds'], forecast['yhat'], c='blue', marker='o', ms=3, linestyle='-', label='Forecast', alpha=0.5)
ax.legend()
ax.set_xlabel('Date')
ax.set_ylabel('Sales');

forecast[['ds', 'yhat', 'yhat_lower', 'yhat_upper']].tail()
metric_df = forecast.set_index('ds')[['yhat']].join(df.set_index('ds').y).reset_index()

metric_df.tail(5)
metric_df.dropna(inplace=True)
metric_df.tail()
Exemplo n.º 4
0
class ProphetProfit:
    def __init__(self, engine, query, item):
        self.engine = engine
        self.query = query
        self.item = item
        self.df = None
        self.data = None
        self.m = None
        self.lmbda = None
        self.forecast = None
        self.ma = None
        self.item_list = None
        self.positive_trend = []
        self.negative_trend = []
        self.profit = pd.DataFrame()

    def sql_call(self):
        self.df = pd.read_sql(self.query, self.engine)
        self.df = self.df.sort_values(by='when')

    def prophet_fit(self, periods=31):
        mask = self.df['name_enus'] == self.item
        self.data = self.df[mask][['when', 'priceavg']].rename(columns={
            'when': 'ds',
            'priceavg': 'y'
        })
        self.data['ds'] = pd.to_datetime(self.data['ds'])
        # remove outliers
        std = self.data['y'].std() * 1.5
        mean = self.data['y'].mean()
        self.data = self.data[(self.data['y'] < mean + std)
                              & (self.data['y'] > mean - std)]
        # box-cox transformation
        # yt, self.lmbda = stats.boxcox(self.data['y'])
        # self.data['y'] = yt
        # fit
        self.m = Prophet(n_changepoints=20)
        self.m.add_seasonality(period=30.4, fourier_order=5, name='monthly')
        self.m.fit(self.data)
        future = self.m.make_future_dataframe(periods)
        self.forecast = self.m.predict(future)
        # create moving average colunmn
        # self.forecast['yhat'] = inv_boxcox(self.forecast['yhat'], self.lmbda)
        # self.data['y'] = inv_boxcox(self.data['y'], self.lmbda)
        self.ma = pd.concat([
            self.data['y'].reset_index(drop=True),
            self.forecast[['ds', 'yhat', 'trend']]
        ],
                            axis=1)
        self.ma['7day'] = self.ma['trend'].rolling(7).mean()
        self.ma.loc[(self.ma['trend'] > self.ma['7day']), 'trend_pos'] = 1
        self.ma.loc[(self.ma['trend'] < self.ma['7day']), 'trend_pos'] = -1

    def plot(self):
        self.sql_call()
        self.prophet_fit()
        fig1 = self.m.plot(self.forecast)
        fig2 = self.m.plot_components(self.forecast)
        plt.show()

    def make_lists(self, buy_date):
        self.sql_call()
        self.item_list = self.df.groupby('name_enus').mean().sort_values(
            'quantityavg')[::-1]
        buy_date = np.datetime64(
            datetime.datetime.strptime(buy_date, '%Y-%m-%d').date())
        for item in self.item_list.index:
            self.item = item
            if self.profit.shape[1] < 10:
                self.prophet_fit()
                if self.ma['trend_pos'].iloc[-31:-39:-1].sum() > 1:
                    try:
                        buy = self.data[self.data['ds'] ==
                                        buy_date]['y'].values[0]
                    except:
                        buy = self.data[self.data['ds'] == (
                            buy_date - np.timedelta64(1, 'D'))]['y'].values[0]
                    profit_temp = self.forecast[
                        self.forecast['ds'] > buy_date][[
                            'ds', 'yhat'
                        ]].reset_index(drop=True)
                    profit_temp['buy'] = buy
                    profit_temp[
                        item] = profit_temp['yhat'] - profit_temp['buy']
                    self.profit = pd.concat([self.profit, profit_temp[item]],
                                            axis=1)
                elif self.ma['trend_pos'].iloc[-31:-39:-1].sum() < -1:
                    self.negative_trend.append(self.item)
            else:
                pickle.dump(self.profit, open('../data/profit_df.pkl', 'wb'))
                break

    def cross_val(self):
        df_cv = cross_validation(self.m,
                                 initial='62 days',
                                 period='1 days',
                                 horizon='7 days')
        # for col in ['yhat', 'yhat_lower', 'yhat_upper', 'y']:
        #     df_cv[col] = inv_boxcox(df_cv[col], lmbda)
        print(df_cv.sort_values('ds').tail())
        df_p = performance_metrics(df_cv)
        print(df_p)

    def mabp_random(self):
        df = pd.read_pickle('../data/profit_df.pkl')
        df.fillna(df.mean())
        scaler = RobustScaler().fit(df)
        df = scaler.transform(df)
        N = df.shape[0]
        d = df.shape[1]
        selected = []
        total_reward = 0
        total_profit = 0
        for n in range(0, N):
            item = random.randrange(d)
            selected.append(item)
            reward = df[n, item]
            profit = scaler.inverse_tranform(df)[n, item]
            total_reward = total_reward + reward
            total_profit = total_profit + profit
        return pd.Series(selected).value_counts(normalize=True)

    def mapb_ucb(self):
        df = pd.read_pickle('../data/profit_df.pkl')
        df.fillna(df.mean())
        scaler = RobustScaler().fit(df)
        df = scaler.transform(df)
        N = df.shape[0]
        d = df.shape[1]
        selected = []
        numbers_of_selections = [0] * d
        sums_of_reward = [0] * d
        total_reward = 0
        total_profit = 0

        for n in range(0, N):
            item = 0
            max_upper_bound = 0
            for i in range(0, d):
                if (numbers_of_selections[i] > 0):
                    average_reward = sums_of_reward[i] / numbers_of_selections[
                        i]
                    delta_i = math.sqrt(2 * math.log(n + 1) /
                                        numbers_of_selections[i])
                    upper_bound = average_reward + delta_i
                else:
                    upper_bound = 1e400
                if upper_bound > max_upper_bound:
                    max_upper_bound = upper_bound
                    item = i
            selected.append(item)
            numbers_of_selections[item] += 1
            reward = df[n, item]
            profit = scaler.inverse_tranform(df)[n, item]
            sums_of_reward[item] += reward
            total_reward += reward
            total_profit += profit
        return pd.Series(selected).value_counts(normalize=True)
Exemplo n.º 5
0
    future = prophet.make_future_dataframe(
        periods=90)  #, include_history=False
    df_cv = cross_validation(prophet,
                             '90 days',
                             initial='270 days',
                             period='90 days')
    print(df_cv)
    plt.figure(1)
    plt.plot(df_cv['ds'], df_cv['y'])
    plt.plot(df_cv['ds'], df_cv['yhat'])
    plt.grid(True)
    plt.show()
    break
    forecast = prophet.predict(future)
    # print(forecast)
    prophet.plot_components(forecast)
    # plt.grid(True)
    # plt.figure(3)
    # plt.plot(forecast['ds'], forecast['yhat'])
    # plt.grid(True)
    # plt.show()
    remain = forecast.ix[89, ['yhat']]
    res[i] = remain
df3 = pd.DataFrame.from_dict(res, orient='index')
df3.columns = ['a']
p = df3.sort_values(by='a', axis=0, ascending=False).head(20)
x = p.index

# def save_result(re_na, QRTA, ISIR, VOLUME, top20):
#     with open(re_na+'.txt', 'w') as fr:
#         fr.writelines(['QRTA'+'\t'+str(QRTA)+'\n', 'ISIR'+'\t'+str(ISIR)+'\n', 'VOLUME'+'\t'+str(VOLUME)+'\n'])
Exemplo n.º 6
0
def mod_prophet(train,
                test,
                dependent_var_col,
                outpath,
                name,
                changepoints=None,
                freq='D',
                n_changepoints=10,
                reg_cols=None,
                country_iso_code='ES',
                change_scale=0.05):
    """
This function performs the training and testing of a prophet model and returns the main performance metrics
    :param train: dataset with train data
    :param test: dataset with test data. The columns should be the same ones than in the train data
    :param ts_col: name of the column with the objective variable
    :param outpath: path to save the files and plots in
    :param name: name of the series to use when saving the plots
    :param changepoints: list of dates where a break in the series is added manually. Defaults to None.
    :param freq: frequency of the series ('D' for daily, 'W' for weekly, 'M' for monthly). Defaults to daily.
    :param n_changepoints: Number of changepoints to be used in the model. Defaults to 10.
    :param reg_cols: list of names of the columns in the dataframe to be added as regressors in the model.
    :param country_iso_code: country code to use the holidays of each one.
    :param change_scale: rate of learning in the prophet model. Defaults to 0.05
    :return: mae, rmse, mape, name, predictions, conf_intervals
    """
    # path definition
    if name not in os.listdir(outpath):
        os.mkdir(outpath + name)
        print('creating output folder in: \n', outpath + name)
    report_output_path = str(outpath) + str(name) + '/'

    # join both dataframes to plot when the model is done
    train.index = pd.to_datetime(train.index)
    test.index = pd.to_datetime(test.index)
    orig_df = train.append(test)

    if changepoints != None:
        changepoints = list(pd.to_datetime(changepoints))
        changepoints = [
            date for date in changepoints if date < train.index.max()
        ]
        if len(changepoints) == 0:
            changepoints = None

    if changepoints is None:
        mod = Prophet(n_changepoints=n_changepoints,
                      yearly_seasonality=True,
                      changepoint_prior_scale=change_scale,
                      changepoint_range=0.95,
                      seasonality_mode='additive')
    else:
        mod = Prophet(changepoints=changepoints,
                      yearly_seasonality=True,
                      changepoint_prior_scale=change_scale,
                      changepoint_range=0.95,
                      seasonality_mode='additive')

    mod.add_country_holidays(country_name=country_iso_code)

    if reg_cols is not None:
        for regressor in reg_cols:
            mod.add_regressor(regressor,
                              standardize=False,
                              mode='multiplicative')
            print('adding regressor: ', regressor, '\n')
            reg_cols.append(dependent_var_col)
            cols = reg_cols.copy()
            reg_cols.remove(dependent_var_col)
        tr_df = train[cols].reset_index(drop=False)
    else:
        tr_df = train[[dependent_var_col]].reset_index(drop=False)

    if 'Date' not in list(tr_df.columns):
        tr_df.reset_index(drop=False, inplace=True)
    tr_df = tr_df.rename(columns={'Date': 'ds', dependent_var_col: 'y'})

    print(tr_df.head())
    # fit the data
    mod.fit(tr_df)
    # forecast

    future = mod.make_future_dataframe(periods=test.shape[0], freq=freq)
    if reg_cols is not None:
        for column in reg_cols:
            # change after testing
            future[str(column)] = 0

    forecast = mod.predict(future)

    mae = mean_absolute_error(y_pred=forecast['yhat'].tail(test.shape[0]),
                              y_true=test[dependent_var_col])
    rmse = np.sqrt(
        mean_squared_error(y_pred=forecast['yhat'].tail(test.shape[0]),
                           y_true=test[dependent_var_col]))
    mape = mean_absolute_percentage_error(y_pred=forecast['yhat'].tail(
        test.shape[0]),
                                          y_true=test[dependent_var_col])

    plot = mod.plot(forecast, xlabel='Date', ylabel=dependent_var_col)
    a = add_changepoints_to_plot(plot.gca(), mod, forecast)
    plt.savefig(report_output_path + 'fc_plot_' + name + '.png')
    plt.close('all')

    # components plot
    mod.plot_components(forecast)
    plt.savefig(report_output_path + name + 'components_plot.png')
    plt.close('all')

    deltas = mod.params['delta'].mean(0)
    fig = plt.figure(facecolor='w')
    ax = fig.add_subplot(111)
    ax.bar(range(len(deltas)), deltas)
    ax.grid(True, which='major', c='gray', ls='-', lw=1, alpha=0.2)
    ax.set_ylabel('Rate change')
    ax.set_xlabel('Potential changepoint')
    fig.tight_layout()

    # Create a text box for the iteration results
    mod.plot_components(forecast)
    fig = plot_plotly(mod, forecast)
    py.plot(fig,
            filename=report_output_path + name + 'fbprophet_plot.html',
            auto_open=False)

    aux_res_df = forecast.set_index('ds', drop=True)
    aux_res_df.index = pd.to_datetime(aux_res_df.index)
    aux_res_df = orig_df.merge(aux_res_df, left_index=True, right_index=True)

    predictions = aux_res_df.tail(test.shape[0])
    conf_intervals = forecast[['yhat_lower', 'yhat_upper']]

    aux_res_df = aux_res_df.tail(180)
    fig, ax = plt.subplots(figsize=(15, 10))
    plt.plot(aux_res_df["yhat"], color='darkred', label='Forecast')
    plt.plot(aux_res_df[dependent_var_col], color='darkblue', label='Real')
    plt.plot(aux_res_df['yhat_lower'],
             color='darkgreen',
             label='Upper',
             alpha=0.5)
    plt.plot(aux_res_df['yhat_upper'],
             color='darkgreen',
             label='Lower',
             alpha=0.5)
    ax.fill_between(test.index,
                    aux_res_df['yhat_lower'].tail(test.shape[0]),
                    aux_res_df['yhat_upper'].tail(test.shape[0]),
                    facecolor='darkgreen',
                    alpha=0.2,
                    interpolate=True)
    plt.axvline(x=pd.to_datetime(test.index.min(), format='%Y-%m-%d'),
                color='grey',
                linestyle='--')
    ax.xaxis.set_major_locator(mticker.MultipleLocator(10000))
    plt.gcf().autofmt_xdate()

    # generate a text box
    props = dict(boxstyle='round', facecolor='white')
    # place a text box in upper left in axes coords

    textstr = 'MAE:' + str(round(mae, 0)) + '\n' + 'MAPE:' + str(round(
        mape, 2))
    ax.text(0.05,
            0.95,
            textstr,
            transform=ax.transAxes,
            fontsize=14,
            verticalalignment='top',
            bbox=props)

    plt.legend()
    plt.savefig(report_output_path + 'pfc_' + name + '_' + str(
        datetime.strftime(pd.to_datetime(test.index.min()), format='%Y-%m-%d'))
                + '.png')

    plt.close('all')

    return mae, rmse, mape, name, predictions, conf_intervals
Exemplo n.º 7
0
plt.ylabel('Number of Crimes')

#Preparing the data for the prediction by using prophet
data_prophet = data.resample('M').size().reset_index()

data_prophet
data_prophet.columns = ['Date', 'Crime Count']
data_prophet
data_prophet_df = pd.DataFrame(data_prophet)
data_prophet_df

#renaming the columns in order to adapt it to the prediction
data_prophet_df2 = data_prophet_df.rename(columns={'Date':'ds', 'Crime Count':'y'})

data_prophet_df2

m = Prophet()
m.fit(data_prophet_df2)

# Forcasting into the future
future = m.make_future_dataframe(periods=365)
forecast = m.predict(future)

forecast

#Visualize
figure = m.plot(forecast, xlabel='Date', ylabel='Crime Rate')

#Visualizing the trend for the future years 
figure3 = m.plot_components(forecast)
Exemplo n.º 8
0
#prediction
prediction=model.predict(future_dates)

prediction.head()

prediction[["ds","yhat","yhat_lower","yhat_upper"]].tail()

prediction[["ds","yhat","yhat_lower","yhat_upper"]].tail()

#predction projection

model.plot(prediction)

#visualise each componenets tends and weekly

model.plot_components(prediction)

from fbprophet.diagnostics import cross_validation

df.shape

df_cv=cross_validation(model,horizon='730 days',period='180 days',initial='1095 days')
df_cv.head()


from fbprophet.diagnostics import performance_metrics
df_performance=performance_metrics(df_cv)
df_performance.head()
from fbprophet.plot import plot_cross_validation_metric
fig=plot_cross_validation_metric(df_cv,metric='mse')
def main():
    df = load_data()

    #page = st.sidebar.radio("Choose a page", ["Homepage", "SignUp"])

    verified = "True"
    result = "F.A.S.T. WebApp - For Interview Demo"
    st.sidebar.title(result)
    st.sidebar.write(
        "Created By: Akash M Dubey [LinkedIn](https://www.linkedin.com/in/akashmdubey/)"
    )
    st.sidebar.write(
        "Checkout more projects at [www.akashmdubey.com/projects](https://akashmdubey.com/)"
    )

    page = st.sidebar.radio("Choose a Function", [
        "About the Project", "Live News Sentiment", "Company Basic Details",
        "Company Advanced Details", "Stock Future Prediction",
        "Google Trends with Forecast", "Twitter Trends",
        "Meeting Summarization"
    ])

    if page == "Google Trends with Forecast":
        st.sidebar.write("""
        ## Choose a keyword and a prediction period 
        """)
        keyword = st.sidebar.text_input("Keyword", "Company name")
        periods = st.sidebar.slider('Prediction time in days:', 7, 365, 90)

        # main section
        st.write("""
        # Welcome to Trend Predictor App
        ### This app predicts the **Google Trend** you want!
        """)
        st.image(
            'https://s3.eu-west-2.amazonaws.com/cdn.howtomakemoneyfromhomeuk.com/wp-content/uploads/2020/10/Google-Trends.jpg',
            width=350,
            use_column_width=200)
        st.write("Evolution of interest:", keyword)

        df = get_data(keyword)
        forecast, fig1, fig2 = make_pred(df, periods)

        st.pyplot(fig1)

        st.write("Trends Over the Years and Months")
        st.pyplot(fig2)

    elif page == "About the Project":

        st.title('Data Sources')
        st.write("""
        ### Our F.A.S.T application have 3 data sources for two different use cases:
        #### 1. Web Scrapping to get Live News Data
        #### 2. Twitter API to get Real time Tweets
        #### 3. Google Trends API to get Real time Trends
        """)
        st.text('')

        link = '[Project Report](https://codelabs-preview.appspot.com/?file_id=1qxniFjwkDir6NT17KkvS1zDbmIgawcrEEwbbfCtAk8k#1)'
        st.markdown(link, unsafe_allow_html=True)

        st.title('AWS Data Architecture')
        st.image('./Images/Architecture Final AWS_FAST.jpg',
                 width=900,
                 use_column_width=1200)

        st.title('Dashboard')
        import streamlit.components.v1 as components
        components.iframe(
            "https://app.powerbi.com/view?r=eyJrIjoiZjMzMGUyZTEtM2RiMS00NzFlLWE3MWMtZDgzMjIxNTgxYmY3IiwidCI6ImE4ZWVjMjgxLWFhYTMtNGRhZS1hYzliLTlhMzk4YjkyMTVlNyIsImMiOjN9&pageName=ReportSection842eec15de524192b588",
            height=600,
            width=900)

    elif page == "Meeting Summarization":

        symbols = [
            './Audio Files/Meeting 1.mp3', './Audio Files/Meeting 2.mp3',
            './Audio Files/Meeting 3.mp3', './Audio Files/Meeting 4.mp3'
        ]

        track = st.selectbox('Choose a the Meeting Audio', symbols)

        st.audio(track)
        data_dir = './inference-data/'

        ratiodata = st.text_input(
            "Please Enter a Ratio you want summary by: (TRY: 0.01)")
        if st.button("Generate a Summarized Version of the Meeting"):
            time.sleep(2.4)
            #st.success("This is the Summarized text of the Meeting Audio Files xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx xxxxxxxxxxxxxxxxxxxxxxxxxxxxxx xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx  xxxxxxgeeeeeeeeeeeeeee eeeeeeeeeeeeeehjjjjjjjjjjjjjjjsdbjhvsdk vjbsdkvjbsdvkb skbdv")

            if track == "./Audio Files/Meeting 2.mp3":
                user_input = "NKE"
                time.sleep(1.4)
                try:
                    with open(data_dir + user_input) as f:
                        st.success(summarize(f.read(), ratio=float(ratiodata)))
                        #print()
                        st.warning("Sentiment: Negative")
                except:
                    st.text("Please Enter a valid Decimal value like 0.01")

            else:
                user_input = "AGEN"
                time.sleep(1.4)
                try:
                    with open(data_dir + user_input) as f:
                        st.success(summarize(f.read(), ratio=float(ratiodata)))
                        #print()
                        st.success("Sentiment: Positive")
                except:
                    st.text("Please Enter a valid Decimal value like 0.01")

    elif page == "Twitter Trends":

        st.write("""
        # Welcome to Twitter Sentiment App
        ### This app predicts the **Twitter Sentiments** you want!
        """)
        st.image(
            'https://assets.teenvogue.com/photos/56b4f21327a088e24b967bb6/3:2/w_531,h_354,c_limit/twitter-gifs.gif',
            width=250,
            use_column_width=200)

        #st.subheader("Select a topic which you'd like to get the sentiment analysis on :")

        ################# Twitter API Connection #######################
        consumer_key = "MaA51EmeZbgYazwFYOZxNRZR5"
        consumer_secret = "6ZUmFGFhiNzePsbkiPlKRBF7R9nq2dkDqfyfx7uU5eNgDhR8ci"
        access_token = "1359729189700722691-g8oMz8ONW6qtvibbQqqc6OAXJCIjeE"
        access_token_secret = "FzXBd0XRy2yZmtpH90GuyMaclmnyPGQEdxfCBO68BI0nb"

        # Use the above credentials to authenticate the API.

        auth = tweepy.OAuthHandler(consumer_key, consumer_secret)
        auth.set_access_token(access_token, access_token_secret)
        api = tweepy.API(auth)
        ################################################################

        df = pd.DataFrame(columns=[
            "Date", "User", "IsVerified", "Tweet", "Likes", "RT",
            'User_location'
        ])

        # Write a Function to extract tweets:
        def get_tweets(Topic, Count):
            i = 0
            #my_bar = st.progress(100) # To track progress of Extracted tweets
            for tweet in tweepy.Cursor(api.search,
                                       q=Topic,
                                       count=100,
                                       lang="en",
                                       exclude='retweets').items():
                #time.sleep(0.1)
                #my_bar.progress(i)
                df.loc[i, "Date"] = tweet.created_at
                df.loc[i, "User"] = tweet.user.name
                df.loc[i, "IsVerified"] = tweet.user.verified
                df.loc[i, "Tweet"] = tweet.text
                df.loc[i, "Likes"] = tweet.favorite_count
                df.loc[i, "RT"] = tweet.retweet_count
                df.loc[i, "User_location"] = tweet.user.location
                #df.to_csv("TweetDataset.csv",index=False)
                #df.to_excel('{}.xlsx'.format("TweetDataset"),index=False)   ## Save as Excel
                i = i + 1
                if i > Count:
                    break
                else:
                    pass

        # Function to Clean the Tweet.
        def clean_tweet(tweet):
            return ' '.join(
                re.sub(
                    '(@[A-Za-z0-9]+)|([^0-9A-Za-z \t])|(\w+:\/\/\S+)|([RT])',
                    ' ', tweet.lower()).split())

        # Funciton to analyze Sentiment
        def analyze_sentiment(tweet):
            analysis = TextBlob(tweet)
            if analysis.sentiment.polarity > 0:
                return 'Positive'
            elif analysis.sentiment.polarity == 0:
                return 'Neutral'
            else:
                return 'Negative'

        #Function to Pre-process data for Worlcloud
        def prepCloud(Topic_text, Topic):
            Topic = str(Topic).lower()
            Topic = ' '.join(re.sub('([^0-9A-Za-z \t])', ' ', Topic).split())
            Topic = re.split("\s+", str(Topic))
            stopwords = set(STOPWORDS)
            stopwords.update(
                Topic
            )  ### Add our topic in Stopwords, so it doesnt appear in wordClous
            ###
            text_new = " ".join(
                [txt for txt in Topic_text.split() if txt not in stopwords])
            return text_new

        # Collect Input from user :
        Topic = str()
        Topic = str(
            st.sidebar.text_input(
                "Enter the topic you are interested in (Press Enter once done)",
                "enter company name"))

        if len(Topic) > 0:

            # Call the function to extract the data. pass the topic and filename you want the data to be stored in.
            with st.spinner("Please wait, Tweets are being extracted"):
                get_tweets(Topic, Count=200)
            st.success('Tweets have been Extracted !!!!')

            # Call function to get Clean tweets
            df['clean_tweet'] = df['Tweet'].apply(lambda x: clean_tweet(x))

            # Call function to get the Sentiments
            df["Sentiment"] = df["Tweet"].apply(lambda x: analyze_sentiment(x))

            # Write Summary of the Tweets
            st.write("Total Tweets Extracted for Topic '{}' are : {}".format(
                Topic, len(df.Tweet)))
            st.write("Total Positive Tweets are : {}".format(
                len(df[df["Sentiment"] == "Positive"])))
            st.write("Total Negative Tweets are : {}".format(
                len(df[df["Sentiment"] == "Negative"])))
            st.write("Total Neutral Tweets are : {}".format(
                len(df[df["Sentiment"] == "Neutral"])))

            # See the Extracted Data :
            if st.button("See the Extracted Data"):
                #st.markdown(html_temp, unsafe_allow_html=True)
                st.success("Below is the Extracted Data :")
                st.write(df.head(50))

            # get the countPlot
            if st.button("Get Count Plot for Different Sentiments"):
                st.success("Generating A Count Plot")
                st.subheader(" Count Plot for Different Sentiments")
                st.write(sns.countplot(df["Sentiment"], palette="Blues"))
                st.pyplot()

            # Piechart
            if st.button("Get Pie Chart for Different Sentiments"):
                st.success("Generating A Pie Chart")
                a = len(df[df["Sentiment"] == "Positive"])
                b = len(df[df["Sentiment"] == "Negative"])
                c = len(df[df["Sentiment"] == "Neutral"])
                d = np.array([a, b, c])
                explode = (0.1, 0.0, 0.1)
                st.write(
                    plt.pie(d,
                            shadow=True,
                            explode=explode,
                            labels=["Positive", "Negative", "Neutral"],
                            autopct='%1.2f%%'))
                st.pyplot()

            # get the countPlot Based on Verified and unverified Users
            if st.button(
                    "Get Count Plot Based on Verified and unverified Users"):
                st.success(
                    "Generating A Count Plot (Verified and unverified Users)")
                st.subheader(
                    " Count Plot for Different Sentiments for Verified and unverified Users"
                )
                st.write(sns.countplot(df["Sentiment"], hue=df.IsVerified))
                st.pyplot()

            ## Points to add 1. Make Backgroud Clear for Wordcloud 2. Remove keywords from Wordcloud

            # Create a Worlcloud
            if st.button("Get WordCloud for all things said about {}".format(
                    Topic)):
                st.success(
                    "Generating A WordCloud for all things said about {}".
                    format(Topic))
                text = " ".join(review for review in df.clean_tweet)
                stopwords = set(STOPWORDS)
                text_newALL = prepCloud(text, Topic)
                wordcloud = WordCloud(
                    stopwords=stopwords,
                    max_words=800,
                    max_font_size=75,
                    colormap="Blues",
                    background_color="black").generate(text_newALL)
                st.write(plt.imshow(wordcloud, interpolation='bilinear'))
                st.pyplot()

            #Wordcloud for Positive tweets only
            if st.button(
                    "Get WordCloud for all Positive Tweets about {}".format(
                        Topic)):
                st.success(
                    "Generating A WordCloud for all Positive Tweets about {}".
                    format(Topic))
                text_positive = " ".join(review for review in df[
                    df["Sentiment"] == "Positive"].clean_tweet)
                stopwords = set(STOPWORDS)
                text_new_positive = prepCloud(text_positive, Topic)
                #text_positive=" ".join([word for word in text_positive.split() if word not in stopwords])
                wordcloud = WordCloud(
                    stopwords=stopwords,
                    max_words=800,
                    max_font_size=75,
                    colormap="Greens",
                    background_color="black").generate(text_new_positive)
                st.write(plt.imshow(wordcloud, interpolation='bilinear'))
                st.pyplot()

            #Wordcloud for Negative tweets only
            if st.button(
                    "Get WordCloud for all Negative Tweets about {}".format(
                        Topic)):
                st.success(
                    "Generating A WordCloud for all Positive Tweets about {}".
                    format(Topic))
                text_negative = " ".join(review for review in df[
                    df["Sentiment"] == "Negative"].clean_tweet)
                stopwords = set(STOPWORDS)
                text_new_negative = prepCloud(text_negative, Topic)
                #text_negative=" ".join([word for word in text_negative.split() if word not in stopwords])
                wordcloud = WordCloud(
                    stopwords=stopwords,
                    max_words=800,
                    max_font_size=75,
                    colormap="Reds",
                    background_color="black").generate(text_new_negative)
                st.write(plt.imshow(wordcloud, interpolation='bilinear'))
                st.pyplot()

        #st.sidebar.subheader("Scatter-plot setup")
        #box1 = st.sidebar.selectbox(label= "X axis", options = numeric_columns)
        #box2 = st.sidebar.selectbox(label="Y axis", options=numeric_columns)
        #sns.jointplot(x=box1, y= box2, data=df, kind = "reg", color= "red")
        #st.pyplot()

    elif page == "Stock Future Prediction":
        snp500 = pd.read_csv("./Datasets/SP500.csv")
        symbols = snp500['Symbol'].sort_values().tolist()

        ticker = st.sidebar.selectbox('Choose a S&P 500 Stock', symbols)

        START = "2015-01-01"
        TODAY = date.today().strftime("%Y-%m-%d")

        st.title('Stock Forecast App')

        st.image(
            'https://media2.giphy.com/media/JtBZm3Getg3dqxK0zP/giphy-downsized-large.gif',
            width=250,
            use_column_width=200)

        # stocks = ('GOOG', 'AAPL', 'MSFT', 'GME', 'W', 'TSLA')
        # selected_stock = st.selectbox('Select dataset for prediction', stocks)

        n_years = st.slider('Years of prediction:', 1, 4)
        period = n_years * 365

        st.title('Stock Forecast App To Do part in stockapp.py')

        data_load_state = st.text('Loading data...')

        data = yf.download(ticker, START, TODAY)
        data.reset_index(inplace=True)
        data_load_state.text('Loading data... done!')

        st.subheader('Raw data')
        st.write(data.tail())

        # Plot raw data
        def plot_raw_data():
            fig = go.Figure()
            fig.add_trace(
                go.Scatter(x=data['Date'], y=data['Open'], name="stock_open"))
            fig.add_trace(
                go.Scatter(x=data['Date'], y=data['Close'],
                           name="stock_close"))
            fig.layout.update(title_text='Time Series data with Rangeslider',
                              xaxis_rangeslider_visible=True)
            st.plotly_chart(fig)

        plot_raw_data()

        # Predict forecast with Prophet.
        df_train = data[['Date', 'Close']]
        df_train = df_train.rename(columns={"Date": "ds", "Close": "y"})

        m = Prophet()
        m.fit(df_train)
        future = m.make_future_dataframe(periods=period)
        forecast = m.predict(future)

        # Show and plot forecast
        st.subheader('Forecast data')
        st.write(forecast.tail())

        st.write(f'Forecast plot for {n_years} years')
        fig1 = plot_plotly(m, forecast)
        st.plotly_chart(fig1)

        st.write("Forecast components")
        fig2 = m.plot_components(forecast)
        st.write(fig2)

    elif page == "Company Advanced Details":
        snp500 = pd.read_csv("./Datasets/SP500.csv")
        symbols = snp500['Symbol'].sort_values().tolist()

        ticker = st.sidebar.selectbox('Choose a S&P 500 Stock', symbols)

        stock = yf.Ticker(ticker)

        def calcMovingAverage(data, size):
            df = data.copy()
            df['sma'] = df['Adj Close'].rolling(size).mean()
            df['ema'] = df['Adj Close'].ewm(span=size, min_periods=size).mean()
            df.dropna(inplace=True)
            return df

        def calc_macd(data):
            df = data.copy()
            df['ema12'] = df['Adj Close'].ewm(span=12, min_periods=12).mean()
            df['ema26'] = df['Adj Close'].ewm(span=26, min_periods=26).mean()
            df['macd'] = df['ema12'] - df['ema26']
            df['signal'] = df['macd'].ewm(span=9, min_periods=9).mean()
            df.dropna(inplace=True)
            return df

        def calcBollinger(data, size):
            df = data.copy()
            df["sma"] = df['Adj Close'].rolling(size).mean()
            df["bolu"] = df["sma"] + 2 * df['Adj Close'].rolling(size).std(
                ddof=0)
            df["bold"] = df["sma"] - 2 * df['Adj Close'].rolling(size).std(
                ddof=0)
            df["width"] = df["bolu"] - df["bold"]
            df.dropna(inplace=True)
            return df

        st.title('Company Stocks Advanced Details')
        st.subheader('Moving Average')

        coMA1, coMA2 = st.beta_columns(2)

        with coMA1:
            numYearMA = st.number_input('Insert period (Year): ',
                                        min_value=1,
                                        max_value=10,
                                        value=2,
                                        key=0)

        with coMA2:
            windowSizeMA = st.number_input('Window Size (Day): ',
                                           min_value=5,
                                           max_value=500,
                                           value=20,
                                           key=1)

        start = dt.datetime.today() - dt.timedelta(numYearMA * 365)
        end = dt.datetime.today()
        dataMA = yf.download(ticker, start, end)
        df_ma = calcMovingAverage(dataMA, windowSizeMA)
        df_ma = df_ma.reset_index()

        figMA = go.Figure()

        figMA.add_trace(
            go.Scatter(x=df_ma['Date'],
                       y=df_ma['Adj Close'],
                       name="Prices Over Last " + str(numYearMA) + " Year(s)"))

        figMA.add_trace(
            go.Scatter(x=df_ma['Date'],
                       y=df_ma['sma'],
                       name="SMA" + str(windowSizeMA) + " Over Last " +
                       str(numYearMA) + " Year(s)"))

        figMA.add_trace(
            go.Scatter(x=df_ma['Date'],
                       y=df_ma['ema'],
                       name="EMA" + str(windowSizeMA) + " Over Last " +
                       str(numYearMA) + " Year(s)"))

        figMA.update_layout(
            legend=dict(yanchor="top", y=0.99, xanchor="left", x=0.01))

        figMA.update_layout(legend_title_text='Trend')
        figMA.update_yaxes(tickprefix="$")

        st.plotly_chart(figMA, use_container_width=True)

        st.subheader('Moving Average Convergence Divergence (MACD)')
        numYearMACD = st.number_input('Insert period (Year): ',
                                      min_value=1,
                                      max_value=10,
                                      value=2,
                                      key=2)

        startMACD = dt.datetime.today() - dt.timedelta(numYearMACD * 365)
        endMACD = dt.datetime.today()
        dataMACD = yf.download(ticker, startMACD, endMACD)
        df_macd = calc_macd(dataMACD)
        df_macd = df_macd.reset_index()

        figMACD = make_subplots(rows=2,
                                cols=1,
                                shared_xaxes=True,
                                vertical_spacing=0.01)

        figMACD.add_trace(go.Scatter(x=df_macd['Date'],
                                     y=df_macd['Adj Close'],
                                     name="Prices Over Last " +
                                     str(numYearMACD) + " Year(s)"),
                          row=1,
                          col=1)

        figMACD.add_trace(go.Scatter(x=df_macd['Date'],
                                     y=df_macd['ema12'],
                                     name="EMA 12 Over Last " +
                                     str(numYearMACD) + " Year(s)"),
                          row=1,
                          col=1)

        figMACD.add_trace(go.Scatter(x=df_macd['Date'],
                                     y=df_macd['ema26'],
                                     name="EMA 26 Over Last " +
                                     str(numYearMACD) + " Year(s)"),
                          row=1,
                          col=1)

        figMACD.add_trace(go.Scatter(x=df_macd['Date'],
                                     y=df_macd['macd'],
                                     name="MACD Line"),
                          row=2,
                          col=1)

        figMACD.add_trace(go.Scatter(x=df_macd['Date'],
                                     y=df_macd['signal'],
                                     name="Signal Line"),
                          row=2,
                          col=1)

        figMACD.update_layout(legend=dict(
            orientation="h", yanchor="bottom", y=1, xanchor="left", x=0))

        figMACD.update_yaxes(tickprefix="$")
        st.plotly_chart(figMACD, use_container_width=True)

        st.subheader('Bollinger Band')
        coBoll1, coBoll2 = st.beta_columns(2)
        with coBoll1:
            numYearBoll = st.number_input('Insert period (Year): ',
                                          min_value=1,
                                          max_value=10,
                                          value=2,
                                          key=6)

        with coBoll2:
            windowSizeBoll = st.number_input('Window Size (Day): ',
                                             min_value=5,
                                             max_value=500,
                                             value=20,
                                             key=7)

        startBoll = dt.datetime.today() - dt.timedelta(numYearBoll * 365)
        endBoll = dt.datetime.today()
        dataBoll = yf.download(ticker, startBoll, endBoll)
        df_boll = calcBollinger(dataBoll, windowSizeBoll)
        df_boll = df_boll.reset_index()
        figBoll = go.Figure()
        figBoll.add_trace(
            go.Scatter(x=df_boll['Date'], y=df_boll['bolu'],
                       name="Upper Band"))

        figBoll.add_trace(
            go.Scatter(x=df_boll['Date'],
                       y=df_boll['sma'],
                       name="SMA" + str(windowSizeBoll) + " Over Last " +
                       str(numYearBoll) + " Year(s)"))

        figBoll.add_trace(
            go.Scatter(x=df_boll['Date'], y=df_boll['bold'],
                       name="Lower Band"))

        figBoll.update_layout(legend=dict(
            orientation="h", yanchor="bottom", y=1, xanchor="left", x=0))

        figBoll.update_yaxes(tickprefix="$")
        st.plotly_chart(figBoll, use_container_width=True)

    elif page == "Live News Sentiment":

        st.image('https://www.visitashland.com/files/latestnews.jpg',
                 width=250,
                 use_column_width=200)

        snp500 = pd.read_csv("./Datasets/SP500.csv")
        symbols = snp500['Symbol'].sort_values().tolist()

        ticker = st.sidebar.selectbox('Choose a S&P 500 Stock', symbols)

        if st.button("Click here to See Latest News about " + ticker + ""):

            st.header('Latest News')

            def newsfromfizviz(temp):

                # time.sleep(5)

                finwiz_url = 'https://finviz.com/quote.ashx?t='

                news_tables = {}
                tickers = [temp]

                for ticker in tickers:
                    url = finwiz_url + ticker
                    req = Request(url=url,
                                  headers={'user-agent': 'my-app/0.0.1'})
                    response = urlopen(req)
                    # Read the contents of the file into 'html'
                    html = BeautifulSoup(response)
                    # Find 'news-table' in the Soup and load it into 'news_table'
                    news_table = html.find(id='news-table')
                    # Add the table to our dictionary
                    news_tables[ticker] = news_table

                parsed_news = []

                # Iterate through the news
                for file_name, news_table in news_tables.items():
                    # Iterate through all tr tags in 'news_table'
                    for x in news_table.findAll('tr'):
                        # read the text from each tr tag into text
                        # get text from a only
                        text = x.a.get_text()
                        # splite text in the td tag into a list
                        date_scrape = x.td.text.split()
                        # if the length of 'date_scrape' is 1, load 'time' as the only element

                        if len(date_scrape) == 1:
                            time = date_scrape[0]

                        # else load 'date' as the 1st element and 'time' as the second
                        else:
                            date = date_scrape[0]
                            time = date_scrape[1]
                        # Extract the ticker from the file name, get the string up to the 1st '_'
                        ticker = file_name.split('_')[0]

                        # Append ticker, date, time and headline as a list to the 'parsed_news' list
                        parsed_news.append([ticker, date, time, text])

                # Instantiate the sentiment intensity analyzer
                vader = SentimentIntensityAnalyzer()

                # Set column names
                columns = ['ticker', 'date', 'time', 'headline']

                # Convert the parsed_news list into a DataFrame called 'parsed_and_scored_news'
                parsed_and_scored_news = pd.DataFrame(parsed_news,
                                                      columns=columns)

                # Iterate through the headlines and get the polarity scores using vader
                scores = parsed_and_scored_news['headline'].apply(
                    vader.polarity_scores).tolist()

                # Convert the 'scores' list of dicts into a DataFrame
                scores_df = pd.DataFrame(scores)

                # Join the DataFrames of the news and the list of dicts
                parsed_and_scored_news = parsed_and_scored_news.join(
                    scores_df, rsuffix='_right')

                # Convert the date column from string to datetime
                parsed_and_scored_news['date'] = pd.to_datetime(
                    parsed_and_scored_news.date).dt.date

                parsed_and_scored_news['Sentiment'] = np.where(
                    parsed_and_scored_news['compound'] > 0, 'Positive',
                    (np.where(parsed_and_scored_news['compound'] == 0,
                              'Neutral', 'Negative')))

                return parsed_and_scored_news

            df = newsfromfizviz(ticker)
            df_pie = df[['Sentiment', 'headline']].groupby('Sentiment').count()
            fig = px.pie(df_pie,
                         values=df_pie['headline'],
                         names=df_pie.index,
                         color=df_pie.index,
                         color_discrete_map={
                             'Positive': 'green',
                             'Neutral': 'darkblue',
                             'Negative': 'red'
                         })

            st.subheader('Dataframe with Latest News')
            st.dataframe(df)

            st.subheader('Latest News Sentiment Distribution using Pie Chart')
            st.plotly_chart(fig)

            plt.rcParams['figure.figsize'] = [11, 5]

            # Group by date and ticker columns from scored_news and calculate the mean
            mean_scores = df.groupby(['ticker', 'date']).mean()

            # Unstack the column ticker
            mean_scores = mean_scores.unstack()

            # Get the cross-section of compound in the 'columns' axis
            mean_scores = mean_scores.xs('compound',
                                         axis="columns").transpose()

            # Plot a bar chart with pandas
            mean_scores.plot(kind='bar')

            plt.grid()

            st.set_option('deprecation.showPyplotGlobalUse', False)

            st.subheader('Sentiments over Time')
            st.pyplot()

    elif page == "Company Basic Details":
        snp500 = pd.read_csv("./Datasets/SP500.csv")
        symbols = snp500['Symbol'].sort_values().tolist()

        ticker = st.sidebar.selectbox('Choose a S&P 500 Stock', symbols)

        stock = yf.Ticker(ticker)
        stock = yf.Ticker(ticker)
        info = stock.info
        st.title('Company Basic Details')
        st.subheader(info['longName'])
        st.markdown('** Sector **: ' + info['sector'])
        st.markdown('** Industry **: ' + info['industry'])
        st.markdown('** Phone **: ' + info['phone'])
        st.markdown('** Address **: ' + info['address1'] + ', ' +
                    info['city'] + ', ' + info['zip'] + ', ' + info['country'])
        st.markdown('** Website **: ' + info['website'])
        st.markdown('** Business Summary **')
        st.info(info['longBusinessSummary'])

        fundInfo = {
            'Enterprise Value (USD)': info['enterpriseValue'],
            'Enterprise To Revenue Ratio': info['enterpriseToRevenue'],
            'Enterprise To Ebitda Ratio': info['enterpriseToEbitda'],
            'Net Income (USD)': info['netIncomeToCommon'],
            'Profit Margin Ratio': info['profitMargins'],
            'Forward PE Ratio': info['forwardPE'],
            'PEG Ratio': info['pegRatio'],
            'Price to Book Ratio': info['priceToBook'],
            'Forward EPS (USD)': info['forwardEps'],
            'Beta ': info['beta'],
            'Book Value (USD)': info['bookValue'],
            'Dividend Rate (%)': info['dividendRate'],
            'Dividend Yield (%)': info['dividendYield'],
            'Five year Avg Dividend Yield (%)':
            info['fiveYearAvgDividendYield'],
            'Payout Ratio': info['payoutRatio']
        }

        fundDF = pd.DataFrame.from_dict(fundInfo, orient='index')
        fundDF = fundDF.rename(columns={0: 'Value'})
        st.subheader('Fundamental Info')
        st.table(fundDF)

        st.subheader('General Stock Info')
        st.markdown('** Market **: ' + info['market'])
        st.markdown('** Exchange **: ' + info['exchange'])
        st.markdown('** Quote Type **: ' + info['quoteType'])

        start = dt.datetime.today() - dt.timedelta(2 * 365)
        end = dt.datetime.today()
        df = yf.download(ticker, start, end)
        df = df.reset_index()
        fig = go.Figure(data=go.Scatter(x=df['Date'], y=df['Adj Close']))
        fig.update_layout(
            title={
                'text': "Stock Prices Over Past Two Years",
                'y': 0.9,
                'x': 0.5,
                'xanchor': 'center',
                'yanchor': 'top'
            })
        st.plotly_chart(fig, use_container_width=True)

        marketInfo = {
            "Volume": info['volume'],
            "Average Volume": info['averageVolume'],
            "Market Cap": info["marketCap"],
            "Float Shares": info['floatShares'],
            "Regular Market Price (USD)": info['regularMarketPrice'],
            'Bid Size': info['bidSize'],
            'Ask Size': info['askSize'],
            "Share Short": info['sharesShort'],
            'Short Ratio': info['shortRatio'],
            'Share Outstanding': info['sharesOutstanding']
        }

        marketDF = pd.DataFrame(data=marketInfo, index=[0])
        st.table(marketDF)

    else:
        verified = "False"
        result = "Please enter valid Username, Password and Acess Token!!"

        st.title(result)
Exemplo n.º 10
0
            y=bikerides['rain'],
            name='Rain',
        ))

if temp_con == True:
    fig.add_trace(
        go.Scatter(
            x=bikerides['ds'],
            y=bikerides['temp'],
            name='Temp',
        ))

st.write(fig)

st.write(m.plot(forecast))
st.write(m.plot_components(forecast))

st.write(getPerformanceMetrics(m).mean())

st.header('Forecasting Rides')
period = st.slider(
    'Forecast Periods in days',
    1,
    365,
)
st.write("Forecast Periods ", period, " days")

# We must create a data frame holding dates for our forecast. The periods # parameter counts days as long as the frequency is 'D' for the day. Let's # do a 180 day forecast, approximately half a year.
future = m.make_future_dataframe(periods=period, freq='D')

future = future.merge(bikerides, on='ds', how='left')
def compare_models(data, variable, test_size):

    test_split = len(data) - test_size

    # simple model using mean electricity use by month, weekday and hour
    data_train = data[:test_split][[variable]]
    data_train_grouped = (data_train[[variable]].groupby([
        data_train.index.month, data_train.index.weekday, data_train.index.hour
    ]).mean())
    data_train_grouped.index.names = ["month", "weekday", "hour"]

    data_test = pd.DataFrame(
        data={
            "month": data[test_split:].index.month,
            "weekday": data[test_split:].index.weekday,
            "hour": data[test_split:].index.hour,
        },
        index=data[test_split:].index,
    )

    mean_grouped_predictions = data_test.join(data_train_grouped,
                                              how="left",
                                              on=["month", "weekday",
                                                  "hour"])[variable]

    # preparing data for prophet
    df = data[variable].reset_index(level=0)
    df.columns = ["ds", "y"]

    df_train = df[:test_split]
    df_test = df[test_split:]

    m_simple = Prophet()
    m_simple.fit(df_train)
    future_simple = m_simple.make_future_dataframe(periods=test_size, freq="H")
    forecast_simple = m_simple.predict(future_simple)
    # limiting low predictions to zero
    forecast_simple["yhat"] = np.where(forecast_simple["yhat"] < 0, 0,
                                       forecast_simple["yhat"])
    forecast_simple["yhat_lower"] = np.where(forecast_simple["yhat_lower"] < 0,
                                             0, forecast_simple["yhat_lower"])
    forecast_simple["yhat_upper"] = np.where(forecast_simple["yhat_upper"] < 0,
                                             0, forecast_simple["yhat_upper"])
    global forecast_plot_simple
    global component_plot_simple

    forecast_plot_simple = m_simple.plot(forecast_simple)
    component_plot_simple = m_simple.plot_components(forecast_simple)

    # using inbuilt holidays because this automatically applies to predictions also
    m_holiday = Prophet()
    m_holiday.add_country_holidays(country_name="FRA")
    m_holiday.fit(df_train)
    future_holiday = m_holiday.make_future_dataframe(periods=test_size,
                                                     freq="H")
    forecast_holiday = m_holiday.predict(future_holiday)
    # limiting low predictions to zero
    forecast_holiday["yhat"] = np.where(forecast_holiday["yhat"] < 0, 0,
                                        forecast_holiday["yhat"])
    forecast_holiday["yhat_lower"] = np.where(
        forecast_holiday["yhat_lower"] < 0, 0, forecast_holiday["yhat_lower"])
    forecast_holiday["yhat_upper"] = np.where(
        forecast_holiday["yhat_upper"] < 0, 0, forecast_holiday["yhat_upper"])
    global forecast_plot_holiday
    global component_plot_holiday

    forecast_plot_holiday = m_holiday.plot(forecast_holiday)
    component_plot_holiday = m_holiday.plot_components(forecast_holiday)

    m_temp = Prophet()
    m_temp.add_regressor("temperature")
    m_temp.add_regressor("temperature2")
    m_temp.add_regressor("temperature_lag")
    m_temp.add_regressor("temperature2_lag")
    df_train["temperature"] = data["temperature"][:test_split].to_numpy()
    df_train["temperature2"] = df_train["temperature"]**2
    df_train["temperature_lag"] = df_train["temperature"].shift(
        1, fill_value=df_train["temperature"].mean())
    df_train["temperature2_lag"] = df_train["temperature2"].shift(
        1, fill_value=df_train["temperature"].mean()**2)
    m_temp.fit(df_train)
    future_temp = m_temp.make_future_dataframe(periods=test_size, freq="H")
    future_temp["temperature"] = data["temperature"][-len(future_temp
                                                          ):].to_numpy()
    future_temp["temperature2"] = future_temp["temperature"]**2
    future_temp["temperature_lag"] = future_temp["temperature"].shift(
        1, fill_value=future_temp["temperature"].mean())
    future_temp["temperature2_lag"] = future_temp["temperature2"].shift(
        1, fill_value=future_temp["temperature"].mean()**2)
    forecast_temp = m_temp.predict(future_temp)
    # limiting low predictions to zero
    forecast_temp["yhat"] = np.where(forecast_temp["yhat"] < 0, 0,
                                     forecast_temp["yhat"])
    forecast_temp["yhat_lower"] = np.where(forecast_temp["yhat_lower"] < 0, 0,
                                           forecast_temp["yhat_lower"])
    forecast_temp["yhat_upper"] = np.where(forecast_temp["yhat_upper"] < 0, 0,
                                           forecast_temp["yhat_upper"])

    global forecast_plot_temp
    global component_plot_temp

    forecast_plot_temp = m_temp.plot(forecast_temp)
    component_plot_temp = m_temp.plot_components(forecast_temp)

    # calculate rmse

    df_test.y.describe()
    print(
        "Mean RMSE: ",
        mean_squared_error(df_test.y, np.repeat(df_train.y.mean(),
                                                len(df_test))),
    )
    print("Mean grouped RMSE: ",
          mean_squared_error(df_test.y, mean_grouped_predictions))
    print(
        "Simple Prophet: ",
        mean_squared_error(df_test.y, forecast_simple.yhat[test_split:]),
    )
    print(
        "Holiday Prophet: ",
        mean_squared_error(df_test.y, forecast_holiday.yhat[test_split:]),
    )
    print(
        "Temperature Prophet: ",
        mean_squared_error(df_test.y, forecast_temp.yhat[test_split:]),
    )
Exemplo n.º 12
0
organic_df

plt.figure(figsize=(20,5))
plt.plot(organic_df['Date'], organic_df['AveragePrice'])
plt.xticks([])

organic_df = organic_df.rename(columns={'Date':'ds', 'AveragePrice':'y'})

m = Prophet()
m.fit(organic_df)
future = m.make_future_dataframe(periods=365)
forecast = m.predict(future)

figure = m.plot(forecast, xlabel='Date', ylabel='Price')

decomposed_figure = m.plot_components(forecast)

"""##Price Prediction with Conventional Type"""

conventional_df = df[df['type']=='conventional']
conventional_df

plt.figure(figsize=(20,5))
plt.plot(conventional_df['Date'], conventional_df['AveragePrice'])
plt.xticks([])

conventional_df = conventional_df.rename(columns={'Date':'ds', 'AveragePrice':'y'})

m = Prophet()
m.fit(conventional_df)
future = m.make_future_dataframe(periods=365)
Exemplo n.º 13
0
def main():
    st.header("Online Stock Price Ticker")

    # symbol = st.text_input('심볼 입력 : ')
    symbol = 'AMZN'

    data = yf.Ticker(symbol)

    today = datetime.now().date().isoformat()  #문자열로
    print(today)

    #------------------------------------------------------
    df = data.history(start='2010-06-01', end=today)

    st.dataframe(df)
    # --------------------Close차트 -----------------
    st.subheader('종가')
    st.line_chart(df['Close'])

    #------------------------------------------------
    st.subheader('거래량')
    st.line_chart(df['Volume'])

    # yfinace의 라이브러리만의 정보
    # data.info
    # data.calendar
    # data.major_holders
    # data.institutional_holders
    # data.recommendations
    div_df = data.dividends  #배당금정보
    st.dataframe(div_df.resample('Y').sum())

    new_df = div_df.reset_index()
    new_df['Year'] = new_df['Date'].dt.year

    st.dataframe(new_df)

    fig = plt.figure()
    plt.bar(new_df['Year'], new_df['Dividends'])
    st.pyplot(fig)

    # 여러주식 한번에 보여주기

    favorites = ['msft', 'aapl', 'amzn', 'tsla', 'nvda']

    f_df = pd.DataFrame()
    for stock in favorites:
        f_df[stock] = yf.Ticker(stock).history(start='2010-01-01',
                                               end=today)['Close']

    st.dataframe(f_df)
    # 차트그리기
    st.line_chart(f_df)
    #=============================stocktwits===================================
    # 스탁 트윗 API 호출
    res = requests.get(
        'https://api.stocktwits.com/api/2/streams/symbol/{}.json'.format(
            symbol))
    # json 형식이므로 .json()이용
    res_data = res.json()
    # 파이썬의 딕셔너리와 리스트로 활용
    # st.write(res_data)

    for massage in res_data['messages']:

        col1, col2 = st.beta_columns([1, 4])  #영역 잡기(비율)

        with col1:
            st.image(massage['user']['avatar_url'])  #아바타 사진
        with col2:
            st.write('유저이름 : ' + massage['user']['username'])
            st.write('트윗 내용 : ' + massage['body'])
            st.write('올린 시간 : ' + massage['created_at'])
    p_df = df.reset_index()

    p_df.rename(columns={'Date': 'ds', 'Close': 'y'}, inplace=True)

    st.dataframe(p_df)
    #예측 가능
    m = Prophet()
    m.fit(p_df)

    future = m.make_future_dataframe(periods=365)
    forecast = m.predict(future)

    st.dataframe(forecast)

    fig1 = m.plot(forecast)
    st.pyplot(fig1)

    fig2 = m.plot_components(forecast)
    st.pyplot(fig2)

    pass
# In[77]:

plt.figure(figsize=(10, 7))
plt.plot(forecast['Date'], forecast['furniture_trend'], 'b-')
plt.plot(forecast['Date'], forecast['office_trend'], 'r-')
plt.legend()
plt.xlabel('Date')
plt.ylabel('Sales')
plt.title('Furniture vs. Office Supplies Sales Trend')

# In[78]:

plt.figure(figsize=(10, 7))
plt.plot(forecast['Date'], forecast['furniture_yhat'], 'b-')
plt.plot(forecast['Date'], forecast['office_yhat'], 'r-')
plt.legend()
plt.xlabel('Date')
plt.ylabel('Sales')
plt.title('Furniture vs. Office Supplies Estimate')

# # Trends and Patterns
# Now, we can use the Prophet Models to inspect different trends of these two categories in the data.

# In[79]:

furniture_model.plot_components(furniture_forecast)

# In[80]:

office_model.plot_components(office_forecast)
Exemplo n.º 15
0
import pandas as pd
import numpy as np
from fbprophet import Prophet

# Prep the dataset

data = pd.read_csv("/home/dusty/Econ8310/DataSets/chicagoBusRiders.csv")
route3 = data[data.route=='3'][['date','rides']]
route3.date = pd.to_datetime(route3.date, infer_datetime_format=True)
route3.columns = [['ds', 'y']]

# Initialize Prophet instance and fit to data

m = Prophet()
m.fit(route3)

# Create timeline for 1 year in future, then generate predictions based on that timeline

future = m.make_future_dataframe(periods=365)
forecast = m.predict(future)

# Create plots of forecast and truth, as well as component breakdowns of the trends

plt = m.plot(forecast)
plt.show()

comp = m.plot_components(forecast)
comp.show()
Exemplo n.º 16
0
prophet_pred = prophet_pred.reset_index('ds')


# In[79]:


#plot prophet yhat, yhat_lower, and yhat_upper

pro.plot(prophet_pred)
plt.savefig(r'/home/songy4/Documents/prophet_log_range.png')


# In[80]:


pro.plot_components(prophet_pred)
plt.savefig(r'/home/songy4/Documents/prophet_components.png')


# In[81]:


#calculate rmse, mse errors and mean value

prophet_rmse_error = rmse(test_US['Total_case'], test_US['Prophet_yhat'])
prophet_mse_error = prophet_rmse_error **2
mean_value = df_US['Total_case'].mean()

print(f'RMSE Error: {prophet_rmse_error} \n  MSE Error: {prophet_mse_error} \n Mean: {mean_value}')

Exemplo n.º 17
0
pjme_test_fcst.tail()

# In[15]:

# Plot the forecast
f, ax = plt.subplots(1)
f.set_figheight(5)
f.set_figwidth(15)
fig = model.plot(pjme_test_fcst, ax=ax)
plt.show()

# In[16]:

# Plot the components of the model
fig = model.plot_components(pjme_test_fcst)

# In[17]:

# Plot the forecast with the actuals
f, ax = plt.subplots(1)
f.set_figheight(5)
f.set_figwidth(15)
ax.scatter(pjme_test.index, pjme_test['PJME_MW'], color='r')
fig = model.plot(pjme_test_fcst, ax=ax)

# In[40]:

# Plot the forecast with the actuals
f, ax = plt.subplots(1)
f.set_figheight(5)
Exemplo n.º 18
0
# モデル作成
m = Prophet(growth="linear",
            yearly_seasonality=10,
            weekly_seasonality=False,
            daily_seasonality=False,
            seasonality_mode="multiplicative").fit(train)

# 予測期間
future = m.make_future_dataframe(periods=24, freq="MS")
forecast = m.predict(df=future)
fig = m.plot(forecast)
fig.savefig("figure/02_prophet_forecast.png")
fig.clf()

# トレンドと年周期
fig = m.plot_components(forecast)
fig.savefig("figure/03_prophet_components.png")
fig.clf()

# 実測値と予測値、信頼区間
plt.scatter(df.index,
            df["#Passengers"],
            color="black",
            s=10,
            label="#Passengers")
plt.plot(forecast["ds"], forecast["yhat"], label="yhat")
plt.fill_between(df.index,
                 forecast["yhat_upper"],
                 forecast["yhat_lower"],
                 color="blue",
                 alpha=.1,
Exemplo n.º 19
0
m = Prophet(holidays=holidays, weekly_seasonality=True,
            daily_seasonality=False,
            yearly_seasonality=False, n_changepoints=20)
m.add_country_holidays(country_name='Chile')
m.fit(df)

# Se indica cuáles serán los futures.
future = m.make_future_dataframe(periods=7)
future.tail()

# Forecast
forecast = m.predict(future)
forecast[['ds', 'yhat', 'yhat_lower', 'yhat_upper']].tail(14)

# Se grafican los componentes del forecast (trend, weekly, yearly)
fig2 = m.plot_components(forecast)
plt.title('Componentes del forecast sin tuning')
plt.show()

# Se grafica cuándo se producen los mayores cambios en la tendencia.
fig3 = m.plot(forecast)
a = add_changepoints_to_plot(fig3.gca(), m, forecast)
plt.title('Pronóstico con changepoints modelo sin tunear')
plt.show()

# Crossvalidation.
df_cv = cross_validation(m, initial='30 days', horizon='7 days',
                         parallel='processes', period='1 days')

df_p = performance_metrics(df_cv, rolling_window=1)
Exemplo n.º 20
0
forecastHorizon = 24
forecastModel = Prophet()
forecastModel.fit(train_X)

#forecast[['ds', 'yhat', 'yhat_lower', 'yhat_upper']]
#Trend lines and confident intervals can be found in forecast object's above attribues

#Eliminate days that are weekend because stock market is closed.
future = forecastModel.make_future_dataframe(periods=40)
future['day'] = future['ds'].dt.weekday
future = future[future['day']<=4]
forecast = forecastModel.predict(future)

#Plot stock trend forecasted for specified horizon above.
forecastPlot = forecastModel.plot(forecast)
forecastComponentPlot = forecastModel.plot_components(forecast)
plotWithChangepoints = add_changepoints_to_plot(forecastPlot.gca(), forecastModel, forecast)

#in Prophet model constructor, change_prior_scale argument control how much flexibility is allowed for changepoints.
#Increasing that parameter means that model is that much more flexible, otherwise less flexible.
#This parameter have an effect on avoiding overfitting data. Therefore , It should be considered carefully.
#Prophet defaults that parameter to 0.05

change_point_scale = [0.00,0.25,0.50,0.75,1.00,1.25,1.50]
performances = []
for changePoint in change_point_scale:
    forecastModelCP = Prophet(changepoint_prior_scale=changePoint)
    forecastModelCP.fit(train_X)
    forecastCV = cross_validation(forecastModelCP, horizon='40 days')
    forecastPM = performance_metrics(forecastCV)
    performances.append((forecastPM,forecastCV))
Exemplo n.º 21
0
# Finally, we create the matplot figure.
figure = model.plot(forecast, xlabel='Date', ylabel='Points')

# Get the D3 HTML graph.
# with open("test.html", "w") as file:
#     fig = pd.Series(figure).to_json()
#     html = mpld3.fig_to_html(fig)
#     file.write(html)

# And then save it to an image file.
matplotlib.pyplot.savefig("out.png")
pp.pprint(figure)

# Components figure.
figure_components = model.plot_components(forecast)

# And then save it to an image file.
matplotlib.pyplot.savefig("out-components.png")
pp.pprint(figure)

with open("predictions.json", "w") as file:
    forecast_data_orig = forecast
    #forecast_data_orig['yhat'] = np.exp(forecast_data_orig['yhat'])
    forecast_data_orig['yhat_lower'] = np.exp(forecast_data_orig['yhat_lower'])
    forecast_data_orig['yhat_upper'] = np.exp(forecast_data_orig['yhat_upper'])

    # print(forecast_data_orig)
    d = forecast_data_orig['yhat'].to_dict()
    predictions = []
df_sample
df_sample = df_sample.rename(columns={'Date': 'ds', 'AveragePrice': 'y'})
df_sample

# # TASK 5: DEVELOP MODEL AND MAKE PREDICTIONS - PART A

m = Prophet()
m.fit(df_sample)

# Forcasting into the future
future = m.make_future_dataframe(periods=365)
forecast = m.predict(future)
forecast

figure = m.plot(forecast, xlabel='Date', ylabel='Price')
figure2 = m.plot_components(forecast)

# # TASK 6: DEVELOP MODEL AND MAKE PREDICTIONS (REGION SPECIFIC) - PART B

# Select specific region
df_r1 = df[df['region'] == 'West']
df_r2 = df[df['region'] == 'Chicago']

df_r1 = df_r1.sort_values('Date')
df_r2 = df_r2.sort_values('Date')

plt.plot(df_r1['Date'], df_r1['AveragePrice'])
plt.plot(df_r2['Date'], df_r2['AveragePrice'])

df_r1 = df_r1.rename(columns={'Date': 'ds', 'AveragePrice': 'y'})
df_r2 = df_r2.rename(columns={'Date': 'ds', 'AveragePrice': 'y'})
Exemplo n.º 23
0
#!/usr/bin/env python
# -*- coding: utf-8 -*-
"""
@author = 'wyx'
@time = 2017/6/22 10:04
@annotation = ''
"""
import numpy as np
import pandas as pd
from fbprophet import Prophet
from matplotlib import pyplot as plot

"""
Prophet allows you to make forecasts using a logistic growth trend model, with a specified carrying capacity.


"""
df = pd.read_csv('example_wp_R.csv')
df['y'] = np.log(df['y'])
df['cap'] = 8.5
m = Prophet(growth='logistic')
m.fit(df)

future = m.make_future_dataframe(periods=1826)
future['cap'] = 8.5
fcst = m.predict(future)
m.plot_components(fcst)

plot.show()
Exemplo n.º 24
0
"""

import pandas as pd
from fbprophet import Prophet

# instantiate the model and set parameters
model = Prophet(changepoint_prior_scale=0.01,
                interval_width=0.95,
                growth='linear',
                daily_seasonality=True,
                weekly_seasonality=False,
                yearly_seasonality=True,
                seasonality_mode='additive')

history_pd = pd.read_csv("load.csv")

# fit the model to historical data
model.fit(history_pd)

# projects over 8760 hours - 1 year
future_pd = model.make_future_dataframe(periods=8760,
                                        freq='H',
                                        include_history=True)

# predict over the dataset
forecast_pd = model.predict(future_pd)

fig1 = model.plot(forecast_pd, xlabel='date', ylabel='load')
fig2 = model.plot_components(forecast_pd)
#instantiate Prophet
model = Prophet()
model.fit(df)
future_data = model.make_future_dataframe(periods=10, freq='D')  #dropdown
future_data.tail()
forecast_data = model.predict(future_data)
forecast_data[['ds', 'yhat', 'yhat_lower', 'yhat_upper']].tail(5)

# make sure we save the original forecast data
forecast_data_orig = forecast_data
forecast_data_orig['yhat'] = np.exp(forecast_data_orig['yhat'])
forecast_data_orig['yhat_lower'] = np.exp(forecast_data_orig['yhat_lower'])
forecast_data_orig['yhat_upper'] = np.exp(forecast_data_orig['yhat_upper'])
fig = model.plot(forecast_data_orig)

fig2 = model.plot_components(forecast_data_orig)

df['y_log'] = df['y']
df['y'] = df['y_orig']

# Python
from fbprophet.plot import plot_plotly
import plotly.offline as py
py.init_notebook_mode()

fig = plot_plotly(model, forecast_data_orig)  # This returns a plotly Figure
py.iplot(fig)

final_df = pd.DataFrame(forecast_data_orig)
actual_chart = go.Scatter(y=df["y_orig"], name='Actual')
predict_chart = go.Scatter(y=final_df["yhat"], name='Predicted')
Exemplo n.º 26
0
finish_date = datetime.strptime("2011-12-31 23:59", "%Y-%m-%d %H:%M")
number_tower = "Data_1st"

query = '''SELECT Date, %s FROM Data WHERE  Date >=  \'%s\' AND Date <= \'%s\';''' % (
    str(number_tower), start_date, finish_date)
conn = sqlite3.connect("mydatabase.db")
cursor = conn.cursor()
dataset = pd.read_sql_query(query, conn)
dataset.Date = dataset["Date"].apply(pd.to_datetime)

print(dataset)

predictions = 180
# приводим dataframe к нужному формату
df = dataset
print(df.head())
df.columns = ['ds', 'y']
# отрезаем из обучающей выборки последние 30 точек, чтобы измерить на них качество
train_df = df[:-predictions]

m = Prophet()
m.fit(train_df)

future = m.make_future_dataframe(periods=predictions)
forecast = m.predict(future)

m.plot(forecast)
m.plot_components(forecast)

plt.show()
Exemplo n.º 27
0
def sol():
  data = pd.read_csv('./Datasets/Confirmed.csv', header=0, index_col=0)
  data.head()

  rf = pd.read_csv('./Datasets/Deceased.csv',header=0, index_col=0)
  rf.head()

  jf = pd.read_csv('./Datasets/Recovered.csv',header=0, index_col=0)

  jf.head()



  tc = data.mean()
  plt.ylabel("Mean of Daily cases in India")
  plt.savefig('./static/content/Images_India/India_mean_daily_Confirm_cases.png')

  va = data.var()
  plt.ylabel("Variance of Daily cases")
  plt.savefig('./static/content/Images_India/India_variance_daily_Confirm_cases.png')

  sc = data.std()
  plt.ylabel("Standard Deviation Of daily cases")
  plt.savefig('./static/content/Images_India/India_standard_daily_Confirm_cases.png')

  data = data.T

  data.tail(10)

  a = np.array(data['India'])
  b = np.array(data.index)

  from datetime import datetime
  l = [i for i in range(len(a))]
  s = []
  for i in range(len(a)):
    x = datetime.strptime(b[i]+'20','%d-%b-%Y')
    s.append([x,float(str(a[i]).replace(',',''))])

  p = pd.DataFrame(data=s,index=l,columns=['ds','y'])
  p.plot('ds','y')
  plt.title("Trend in Increase of Confirmed Cases for India")
  plt.ylabel('Date')
  plt.xlabel('Number of Confirmed cases')
  plt.savefig('./static/content/Images_India/India_confirmed_trend.png')

  from fbprophet import Prophet
  m = Prophet()
  m.fit(p)
  future = m.make_future_dataframe(periods=30)
  forecast = m.predict(future)

  india_plot = m.plot(forecast)
  plt.ylabel("No. of Cases for India")
  india_plot.savefig('./static/content/Images_India/india_confirm.png');
  india_forecast_plot = m.plot_components(forecast)
  india_forecast_plot.savefig('./static/content/Images_India/India_confirm_components.png');

  forecast.tail(55)

  for j in ['Maharashtra','Delhi',	'Gujrat',	'Madhya Pradesh',	'Tamilnadu',	'Rajasthan',	'Uttar Pradesh',	'Telengana',	'Andhra Pradesh', 'Kerala',	'Karnataka',	'Jammu And Kashmir',	'West Bengal',	'Harayana',	'Punjab',	'Bihar',	'Orissa',	'Uttarakhand',	'Himachal Pradesh',	'Chattisgarh',	'Assam',	'Jharkhand',	'Chandigarh',	'Ladakh',	'Andaman And Nicobar',	'Meghalaya',	'Goa',	'Puducherry',	'Manipur',	'Tripura',	'Arunachal Pradesh',	'Mizoram','Nagaland','Dadra And Nagar Haveli','Daman And Deu','Lakshadweep']:
    a = np.array(data[j])
    b = np.array(data.index)
    from datetime import datetime
    l = [i for i in range(len(a))]
    s = []
    for i in range(len(a)):
        x = datetime.strptime(b[i]+'20','%d-%b-%Y')
        s.append([x,float(str(a[i]).replace(',',''))])
    p = pd.DataFrame(data=s,index=l,columns=['ds','y'])
    from fbprophet import Prophet
    m = Prophet()
    m.fit(p)
    future = m.make_future_dataframe(periods=30)
    forecast = m.predict(future)
    x = './static/content/Images_Confirmed/'
    state_plot = m.plot(forecast)
    plt.ylabel("No. of Confirmed Cases "+j)
    state_plot.savefig(x+j+'_seasonality.png');
    state_forecast_plot = m.plot_components(forecast)
    state_forecast_plot.savefig(x+j+'_components.png');

  for j in ['Maharashtra','Delhi',	'Gujrat',	'Madhya Pradesh',	'Tamilnadu',	'Rajasthan',	'Uttar Pradesh',	'Telengana',	'Andhra Pradesh', 'Kerala',	'Karnataka',	'Jammu And Kashmir',	'West Bengal',	'Harayana',	'Punjab',	'Bihar',	'Orissa',	'Uttarakhand',	'Himachal Pradesh',	'Chattisgarh',	'Assam',	'Jharkhand',	'Chandigarh',	'Ladakh',	'Andaman And Nicobar',	'Meghalaya',	'Goa',	'Puducherry',	'Manipur',	'Tripura',	'Arunachal Pradesh',	'Mizoram','Nagaland','Dadra And Nagar Haveli','Daman And Deu','Lakshadweep']:
    a = np.array(data[j])
    b = np.array(data.index)
    from datetime import datetime
    l = [i for i in range(len(a))]
    s = []
    for i in range(len(a)):
        x = datetime.strptime(b[i]+'20','%d-%b-%Y')
        s.append([x,float(str(a[i]).replace(',',''))])
    p = pd.DataFrame(data=s,index=l,columns=['ds','y'])
    pd.plotting.register_matplotlib_converters()
    x = p.plot('ds','y')
    plt.title("Cases for "+j)
    plt.savefig('./static/content/Images_statewise_confirmed_plot/'+j+'_mean.png')

  rf.head()

  rf = rf.T
  rf.tail()

  a = np.array(rf['INDIA'])
  b = np.array(rf.index)
  from datetime import datetime
  l = [i for i in range(len(a))]
  s = []
  for i in range(len(a)):
    x = datetime.strptime(b[i]+'20','%d-%b-%Y')
    s.append([x,float(str(a[i]).replace(',',''))])
  p = pd.DataFrame(data=s,index=l,columns=['ds','y'])
  pd.plotting.register_matplotlib_converters()
  p.plot('ds','y')
  plt.title("Trend in Increase of Deceased Cases for India")
  plt.ylabel('Date')
  plt.xlabel('Number of Deceased cases')
  plt.savefig('./static/content/Images_India/India_deaths_trend.png')

  from fbprophet import Prophet
  m = Prophet()
  m.fit(p)
  future = m.make_future_dataframe(periods=30)
  forecast = m.predict(future)
  india_plot = m.plot(forecast)
  plt.ylabel("No. of Cases for India")
  india_plot.savefig('./static/content/Images_India/india_deceased_seasonalities.png');
  india_forecast_plot = m.plot_components(forecast)
  india_forecast_plot.savefig('./static/content/Images_India/India_deceased_components.png');

  for j in ['MAHARASHTRA', 'DELHI',	'GUJRAT',	'MADHYA PRADESH',	'TAMILNADU',	'RAJASTHAN',	'UTTAR PRADESH',	'TELENGANA',	'ANDHRA PRADESH', 'KERALA',	'KARNATAKA',	'JAMMU AND KASHMIR',	'WEST BENGAL',	'HARAYANA',	'PUNJAB',	'BIHAR',	'ORISSA',	'UTTARAKHAND',	'HIMACHAL PRADESH',	'CHATTISGARH',	'ASSAM',	'JHARKHAND',	'CHANDIGARH',	'LADAKH',	'ANDAMAN AND NICOBAR',	'MEGHALAYA',	'GOA',	'PUDUCHERRY',	'MANIPUR',	'TRIPURA',	'ARUNACHAL PRADESH',	'MIZORAM','NAGALAND','DADRA AND NAGAR HAVELI','DAMAN AND DEU','LAKSHADWEEP']:
    a = np.array(rf[j])
    b = np.array(rf.index)
    from datetime import datetime
    l = [i for i in range(len(a))]
    s = []
    for i in range(len(a)):
      x = datetime.strptime(b[i]+'20','%d-%b-%Y')
      s.append([x,float(str(a[i]).replace(',',''))])
    p = pd.DataFrame(data=s,index=l,columns=['ds','y'])
    pd.plotting.register_matplotlib_converters()
    x = p.plot('ds','y')
    plt.title("Deaths for "+j)
    plt.savefig('./static/content/Images_statewise_deaths_plot/'+j+'_deathtrend.png')



  for j in ['MAHARASHTRA', 'DELHI',	'GUJRAT',	'MADHYA PRADESH',	'TAMILNADU',	'RAJASTHAN',	'UTTAR PRADESH',	'TELENGANA',	'ANDHRA PRADESH', 'KERALA',	'KARNATAKA',	'JAMMU AND KASHMIR',	'WEST BENGAL',	'HARAYANA',	'PUNJAB',	'BIHAR',	'ORISSA',	'UTTARAKHAND',	'HIMACHAL PRADESH',	'CHATTISGARH',	'ASSAM',	'JHARKHAND',	'CHANDIGARH',	'LADAKH',	'ANDAMAN AND NICOBAR',	'MEGHALAYA',	'GOA',	'PUDUCHERRY',	'MANIPUR',	'TRIPURA',	'ARUNACHAL PRADESH',	'MIZORAM','NAGALAND','DADRA AND NAGAR HAVELI','DAMAN AND DEU','LAKSHADWEEP']:
    a = np.array(rf[j])
    b = np.array(rf.index)
    from datetime import datetime
    l = [i for i in range(len(a))]
    s = []
    for i in range(len(a)):
      x = datetime.strptime(b[i]+'20','%d-%b-%Y')
      s.append([x,float(str(a[i]).replace(',',''))])
    p = pd.DataFrame(data=s,index=l,columns=['ds','y'])
    from fbprophet import Prophet
    m = Prophet(interval_width=0.95)
    m.fit(p)
    future = m.make_future_dataframe(periods=30)
    forecast = m.predict(future)
    x = './static/content/Images_Death/'
    state_plot = m.plot(forecast)
    plt.ylabel("No. of deaths for "+j)
    state_plot.savefig(x+j+'_seasonalities.png')
    state_forecast_plot = m.plot_components(forecast)
    state_forecast_plot.savefig(x+j+'_components.png')

  jf.head()

  jf = jf.T
  jf.tail(10)

  a = np.array(jf['INDIA'])
  b = np.array(jf.index)
  from datetime import datetime
  l = [i for i in range(len(a))]
  s = []
  for i in range(len(a)):
    x = datetime.strptime(b[i]+'20','%d-%b-%Y')
    s.append([x,float(str(a[i]).replace(',',''))])
  p = pd.DataFrame(data=s,index=l,columns=['ds','y'])
  pd.plotting.register_matplotlib_converters()
  p.plot('ds','y')
  plt.title("Trend in Increase of Recovered Cases for India")
  plt.ylabel('Date')
  plt.xlabel('Number of Recovered cases')
  plt.savefig('./static/content/Images_India/India_recovered_trend.png')

  from fbprophet import Prophet
  m = Prophet()
  m.fit(p)
  future = m.make_future_dataframe(periods=30)
  forecast = m.predict(future)
  india_plot = m.plot(forecast)
  plt.ylabel("No. of Cases for India")
  india_plot.savefig('./static/content/Images_India/india_recovered_seasonalities.png');
  india_forecast_plot = m.plot_components(forecast)
  india_forecast_plot.savefig('./static/content/Images_India/India_recovered_components.png');

  for j in ['MAHARASHTRA', 'DELHI',	'GUJRAT',	'MADHYA PRADESH',	'TAMILNADU',	'RAJASTHAN',	'UTTAR PRADESH',	'TELENGANA',	'ANDHRA PRADESH', 'KERALA',	'KARNATAKA',	'JAMMU AND KASHMIR',	'WEST BENGAL',	'HARAYANA',	'PUNJAB',	'BIHAR',	'ORISSA',	'UTTARAKHAND',	'HIMACHAL PRADESH',	'CHATTISGARH',	'ASSAM',	'JHARKHAND',	'CHANDIGARH',	'LADAKH',	'ANDAMAN AND NICOBAR',	'MEGHALAYA',	'GOA',	'PUDUCHERRY',	'MANIPUR',	'TRIPURA',	'ARUNACHAL PRADESH',	'MIZORAM','NAGALAND','DADRA AND NAGAR HAVELI','DAMAN AND DEU','LAKSHADWEEP']:
    a = np.array(jf[j])
    b = np.array(jf.index)
    from datetime import datetime
    l = [i for i in range(len(a))]
    s = []
    for i in range(len(a)):
      x = datetime.strptime(b[i]+'20','%d-%b-%Y')
      s.append([x,float(str(a[i]).replace(',',''))])
    p = pd.DataFrame(data=s,index=l,columns=['ds','y'])
    x = p.plot('ds','y')
    plt.title("Recovery Cases for "+j)
    plt.savefig('./static/content/Images_statewise_recovered_plot/'+j+'_normal.png')



  for j in ['MAHARASHTRA', 'DELHI',	'GUJRAT',	'MADHYA PRADESH',	'TAMILNADU',	'RAJASTHAN',	'UTTAR PRADESH',	'TELENGANA',	'ANDHRA PRADESH', 'KERALA',	'KARNATAKA',	'JAMMU AND KASHMIR',	'WEST BENGAL',	'HARAYANA',	'PUNJAB',	'BIHAR',	'ORISSA',	'UTTARAKHAND',	'HIMACHAL PRADESH',	'CHATTISGARH',	'ASSAM',	'JHARKHAND',	'CHANDIGARH',	'LADAKH',	'ANDAMAN AND NICOBAR',	'MEGHALAYA',	'GOA',	'PUDUCHERRY',	'MANIPUR',	'TRIPURA',	'ARUNACHAL PRADESH',	'MIZORAM','NAGALAND','DADRA AND NAGAR HAVELI','DAMAN AND DEU','LAKSHADWEEP']:
    a = np.array(jf[j])
    b = np.array(jf.index)
    from datetime import datetime
    l = [i for i in range(len(a))]
    s = []
    for i in range(len(a)):
      x = datetime.strptime(b[i]+'20','%d-%b-%Y')
      s.append([x,float(str(a[i]).replace(',',''))])
    p = pd.DataFrame(data=s,index=l,columns=['ds','y'])
    from fbprophet import Prophet
    m = Prophet(interval_width=0.95)
    m.fit(p)
    future = m.make_future_dataframe(periods=30)
    forecast = m.predict(future)
    x = './static/content/Images_Recovered/'
    state_plot = m.plot(forecast)
    plt.ylabel("No. of Recovered cases for "+j)
    state_plot.savefig(x+j+'.png')
    state_forecast_plot = m.plot_components(forecast)
    state_forecast_plot.savefig(x+j+'_components.png')
Exemplo n.º 28
0
import plotly.io as pio

from fbprophet import Prophet
from fbprophet.plot import plot_plotly

pio.renderers.default = "png"

df = pd.read_csv("example_wp_log_peyton_manning.csv")
m = Prophet()
m.fit(df)

future = m.make_future_dataframe(periods=365)
forecast = m.predict(future)

fig1 = m.plot(forecast)
fig2 = m.plot_components(forecast)

#py.init_notebook_mode()

fig = plot_plotly(m, forecast)  # This returns a plotly Figure
py.iplot(fig)
fig.show()

"""
import matplotlib.pyplot as plt

x = 2

plt.plot([4, 7, 9, 15])
plt.ylabel('some numbers')
plt.savefig("scatch.png")
Exemplo n.º 29
0
    df_deaths = df_country.loc[:,['ds','death_day']]
    df_deaths.rename(columns={'death_day':'y'}, inplace =True)
    
    # fiting the model and making prediction
    m_cases = Prophet(yearly_seasonality=False, daily_seasonality=False, interval_width=0.95, growth='linear')
    m_cases.fit(df_cases)
    m_deaths = Prophet(yearly_seasonality=False, daily_seasonality=False, interval_width=0.95, growth='linear')
    m_deaths.fit(df_deaths)
    
    future_cases = m_cases.make_future_dataframe(periods=daysToPredict, freq='D', include_history=False)
    future_deaths = m_deaths.make_future_dataframe(periods=daysToPredict, freq='D', include_history=False)
    
    forecast_cases = m_cases.predict(future_cases)
    forecast_deaths = m_deaths.predict(future_deaths)
    if country == 'Brazil':
        fig = m_cases.plot_components(forecast_cases)
        fig.savefig('../predictions/brazil_prophet_cases.png')
        fig = m_deaths.plot_components(forecast_deaths)
        fig.savefig('../predictions/brazil_prophet_deaths.png')
        
    p = forecast_cases.loc[:,['ds','yhat']]
    p.rename(columns={'yhat': 'y'}, inplace= True)
    t = df_cases.append(p[['ds','y']], ignore_index=True)
    
    p = forecast_deaths.loc[:,['ds','yhat']]
    p.rename(columns={'yhat': 'y'}, inplace= True)
    s = df_deaths.append(p[['ds','y']], ignore_index=True)

    t['ds'] = t['ds'].astype('datetime64[ns]')
    t.rename(columns={'y': 'case_day'}, inplace= True)
    t['case_day'] = t['case_day'].astype('int32')
Exemplo n.º 30
0
class ProphetForecaster(UVariateTimeSeriesClass):
    """Univariate time series child class using Prophet for forecasting,ref. to https://facebook.github.io/prophet

    Attributes
    ----------
    _prophet_interval_width: float
         The width of the uncertainty intervals (by default 80%), also
         ref. to https://facebook.github.io/prophet/docs/uncertainty_intervals.html
    _yearly_seasonality: bool
        Consider yearly seasonality yes/no
    _monthly_seasonality: bool
        Consider monthly seasonality yes/no
    _quarterly_seasonality: bool
       Consider quarterly seasonality yes/no
    _weekly_seasonality:
       Consider weekly seasonality yes/no
    _daily_seasonality: bool
       Consider daily seasonality yes/no
    _weekend_seasonality: bool#
       Consider week-end seasonality yes/no.
       ref. to https://facebook.github.io/prophet/docs/seasonality,_holiday_effects,_and_regressors.html#modeling-holidays-and-special-events
    _changepoint_prior_scale: float
       If the trend changes are being overfit (too much flexibility) or underfit (not enough flexibility),
       you can adjust the strength of the sparse prior using this argument.
       By default, this parameter is set to 0.05. Increasing it will make the trend more flexible.
       Decreasing it will make the trend less flexible.
       ref. to https://facebook.github.io/prophet/docs/trend_changepoints.html#automatic-changepoint-detection-in-prophet

    _changepoint_range: float
        By default changepoints are only inferred for the first 80% of the time series in order to have plenty of runway
        for projecting the trend forward and to avoid overfitting fluctuations at the end of the time series.
        This default works in many situations but not all, and can be changed using the changepoint_range argument.
        For example, m = Prophet(changepoint_range=0.9) will place potential changepoints in
        the first 90% of the time series.
        ref. to https://facebook.github.io/prophet/docs/trend_changepoints.html#automatic-changepoint-detection-in-prophet
     _add_change_points: bool
        Whether to add change points to the plots
        ref. to https://facebook.github.io/prophet/docs/trend_changepoints.html#automatic-changepoint-detection-in-prophet

     _diagnose: bool
         Whether to run cross validation yes/no
     _history: str
         Amount of historic data in days for cross validation,
         Corresponds to initial in  https://facebook.github.io/prophet/docs/diagnostics.html
     _step: str
         Correspons to period in the linke above. Defines step in days to shift the historic data
     _horizon: str
         Forecasting horizon in days for each cross validation run
    _consider_holidays: bool
         Whether to consider holiodays yes/no
         ref. to https://facebook.github.io/prophet/docs/seasonality,_holiday_effects,_and_regressors.html#modeling-holidays-and-special-events
    _country: str
         The country for which holidays are to be considered

    _prophet_logger: Logger
        The logger for logging

    Methods
    ----------
    assertions()
       Assertion tests, must be overrided
    set_params()
       Sets new parameter values
    get_params_dict()
        Gets parameter values as a dictionary
    ts_fit()
       Fits the auto_arima model to time series
    ts_diagnose()
       Diagnoses the fitted model. Cross validation is started
    plot_residuals()
       Generates residual plots
    ts_test()
       Evaluates fitted model on the test data, if this one has been generated
    ts_forecast()
       Forecasts time series and plots the results
    plot_forecasts()
       Plots forecasted time-series
    """
    def __init__(self,
                 prophet_interval_width=0.95,
                 yearly_seasonality=False,
                 monthly_seasonality=False,
                 quarterly_seasonality=False,
                 weekly_seasonality=False,
                 daily_seasonality=False,
                 weekend_seasonality=False,
                 changepoint_prior_scale=0.001,
                 changepoint_range=0.9,
                 add_change_points=True,
                 diagnose=False,
                 history=None,
                 step=None,
                 horizon=None,
                 consider_holidays=True,
                 country='DE',
                 **kwds):
        """Initializes the object ProphetForecaster"""
        self._prophet_logger = Logger('prophet')

        try:
            super(ProphetForecaster, self).__init__(**kwds)
        except TypeError:
            self._prophet_logger.exception(
                "TypeError occurred, Arguments missing")

        self._model = None

        self._prophet_interval_width = prophet_interval_width
        self._yearly_seasonality = yearly_seasonality
        self._monthly_seasonality = monthly_seasonality
        self._quarterly_seasonality = quarterly_seasonality
        self._weekly_seasonality = weekly_seasonality
        self._daily_seasonality = daily_seasonality
        self._weekend_seasonality = weekend_seasonality

        self._changepoint_prior_scale = changepoint_prior_scale
        self._changepoint_range = changepoint_range
        self._add_change_points = add_change_points

        self._diagnose = diagnose
        self._history = history
        self._step = step
        self._horizon = horizon
        self._prophet_cv = None
        self._prophet_p = None

        self._consider_holidays = consider_holidays
        self._country = country

        self._id = 'Prophet'

    def __copy__(self):
        """Copies the object"""
        result = super(ProphetForecaster, self).__copy__()
        #
        result._model = self._model
        result._prophet_interval_width = self._prophet_interval_width
        result._yearly_seasonality = self._yearly_seasonality
        result._monthly_seasonality = self._monthly_seasonality
        result._quarterly_seasonality = self._quarterly_seasonality
        result._weekly_seasonality = self._weekly_seasonality
        result._daily_seasonality = self._daily_seasonality
        result._weekend_seasonality = self._weekend_seasonality

        result._changepoint_prior_scale = self._changepoint_prior_scale
        result._changepoint_range = self._changepoint_range
        result._add_change_points = self._add_change_points

        result._diagnose = self._diagnose
        result._history = self._history
        result._step = self._step
        result._horizon = self._horizon
        result._prophet_cv = self._prophet_cv
        result._prophet_p = self._prophet_p

        result._consider_holidays = self._consider_holidays
        result._country = self._country

        result._prophet_logger = self._prophet_logger

        return result

    def set_params(self, p_dict=None, **kwargs):
        """Sets new parameters"""
        params_dict = kwargs
        if p_dict is not None:
            params_dict = p_dict
        #
        for k, v in params_dict.items():
            if k == 'ts_df':
                self.ts_df = v
            elif k == 'freq':
                self.freq = v
            elif k == 'n_test':
                self.n_test = v
            elif k == 'n_val':
                self.n_val = v
            elif k == 'timeformat':
                self.time_format = v
            elif k == "prophet_interval_width":
                self._prophet_interval_width = v
            elif k == "yearly_seasonality":
                self._yearly_seasonality = v
            elif k == "monthly_seasonality":
                self._monthly_seasonality = v
            elif k == "quarterly_seasonality":
                self._quarterly_seasonality = v
            elif k == "weekly_seasonality":
                self._weekly_seasonality = v
            elif k == "daily_seasonality":
                self._daily_seasonality = v
            elif k == "weekend_seasonality":
                self._weekend_seasonality = v
            elif k == "changepoint_prior_scale":
                self._changepoint_prior_scale = v
            elif k == "changepoint_range":
                self._changepoint_range = v
            elif k == "add_change_points":
                self._add_change_points = v
            elif k == "diagnose":
                self._diagnose = v
            elif k == "history":
                self._history = v
            elif k == "step":
                self._step = v
            elif k == "horizon":
                self._horizon = v
            elif k == "consider_holidays":
                self._consider_holidays = v
            elif k == "country":
                self._country = v

        return self

    def get_params_dict(self):
        """Gets parameters as a dictionary"""
        return {
            'prophet_interval_width': self._prophet_interval_width,
            'yearly_seasonality': self._yearly_seasonality,
            'monthly_seasonality': self._monthly_seasonality,
            'quarterly_seasonality': self._quarterly_seasonality,
            'weekly_seasonality': self._weekly_seasonality,
            'daily_seasonality': self._daily_seasonality,
            'weekend_seasonality': self._weekend_seasonality,
            'changepoint_prior_scale': self._changepoint_prior_scale,
            'changepoint_range': self._changepoint_range,
            'add_change_points': self._add_change_points,
            'diagnose': self._diagnose,
            'history': self._history,
            'step': self._step,
            'horizon': self._horizon,
            'consider_holidays': self._consider_holidays,
            'country': self._country
        }

    @staticmethod
    def we_season(ds):
        """Lambda function to prepare weekend_seasonality for  Prophet"""
        date = pd.to_datetime(ds)
        return date.weekday() >= 5

    def ts_fit(self, suppress=False):
        """Fit Prophet to the time series data.

         Parameters:
         ----------
         suppress: bool
            Suppress or not some of the output messages
         """

        if self.hyper_params is not None:
            self._gs.set_forecaster(self)
            self._gs.set_hyper_params(self.hyper_params)
            # a very important command here to avoid endless loop
            self.hyper_params = None
            self._prophet_logger.info("***** Starting grid search *****")
            self._gs = self._gs.grid_search(suppress=suppress, show_plot=False)
            #
            self.best_model = self._gs.best_model
            self.__dict__.update(self.best_model['forecaster'].__dict__)
            self._prophet_logger.info("***** Finished grid search *****")
        else:
            self._prepare_fit()
            self._model = None
            self.ts_split()

            ts_df = self._train_dt.copy()
            ts_test_df = self._test_dt
            # sanity check
            if 'on_weekend' in ts_df.columns:
                ts_df.drop(['on_weekend', 'off_weekend'], inplace=True, axis=1)
                # ts_test_df.drop(['on_weekend', 'off_weekend'], inplace=True, axis=1)
            # Fit
            self._prophet_logger.info("Trying to fit the Prophet model....")
            try:
                if not suppress:
                    self._prophet_logger.info("...via using parameters\n")
                    print_attributes(self)
                # diagnose on?
                if self._diagnose:
                    try:
                        assert self._step is not None and self._horizon is not None
                    except (KeyError, AssertionError):
                        self._prophet_logger.warning(
                            "You want to diagnose the Prophet model. Please provide parameters "
                            "'step' and 'horizon' within object initialization!"
                        )
                        sys.exit("STOP")

                ts_df = ts_df.reset_index()
                ts_df.columns = self._ts_df_cols
                if ts_test_df is not None and not ts_test_df.empty:
                    ts_test_df = ts_test_df.reset_index()
                    ts_test_df.columns = self._ts_df_cols
                #
                weekly_s = self._weekly_seasonality
                if self._weekend_seasonality:
                    # force to False
                    weekly_s = False
                #
                if not self._consider_holidays:
                    self._model = Prophet(
                        interval_width=self._prophet_interval_width,
                        yearly_seasonality=self._yearly_seasonality,
                        weekly_seasonality=weekly_s,
                        daily_seasonality=self._daily_seasonality,
                        changepoint_range=self._changepoint_range,
                        changepoint_prior_scale=self._changepoint_prior_scale)
                else:
                    try:
                        assert self._country in ['AT', 'DE', 'US']
                    except AssertionError:
                        self._prophet_logger.exception(
                            "Assrtion exception occurred. Right now, Austria (AT), "
                            "Germany(DE) and USA (US) supported.")
                        sys.exit("STOP")
                    else:
                        holi = None
                        if self._country == 'AT':
                            holi = holidays.AT(
                                state=None,
                                years=list(
                                    np.unique(np.asarray(
                                        self.ts_df.index.year))))
                        elif self._country == 'DE':
                            holi = holidays.DE(
                                state=None,
                                years=list(
                                    np.unique(np.asarray(
                                        self.ts_df.index.year))))
                        elif self._country == 'US':
                            holi = holidays.US(
                                state=None,
                                years=list(
                                    np.unique(np.asarray(
                                        self.ts_df.index.year))))
                        #
                        holi_dict = dict()
                        for date, name in sorted(holi.items()):
                            holi_dict[date] = name

                        df_holi = pd.DataFrame.from_dict(
                            data=holi_dict, orient='index').reset_index()
                        df_holi.columns = ['ds', 'holiday']
                        df_holi['lower_window'] = 0
                        df_holi['upper_window'] = 0
                        self._model = Prophet(
                            interval_width=self._prophet_interval_width,
                            yearly_seasonality=self._yearly_seasonality,
                            weekly_seasonality=weekly_s,
                            daily_seasonality=self._daily_seasonality,
                            changepoint_range=self._changepoint_range,
                            changepoint_prior_scale=self.
                            _changepoint_prior_scale,
                            holidays=df_holi)

                if self._monthly_seasonality:
                    self._model.add_seasonality(name='monthly',
                                                period=30.5,
                                                fourier_order=20)
                    if not suppress:
                        self._prophet_logger.info("Added monthly seasonality.")

                if self._quarterly_seasonality:
                    self._model.add_seasonality(name='quarterly',
                                                period=91.5,
                                                fourier_order=20)
                    if not suppress:
                        self._prophet_logger.info(
                            "Added quarterly seasonality.")

                if self._weekend_seasonality:
                    ts_df['on_weekend'] = ts_df['ds'].apply(self.we_season)
                    ts_df['off_weekend'] = ~ts_df['ds'].apply(self.we_season)
                    self._train_dt = ts_df.copy()
                    self._train_dt.set_index('ds', inplace=True)
                    #
                    if ts_test_df is not None and not ts_test_df.empty:
                        ts_test_df['on_weekend'] = ts_test_df['ds'].apply(
                            self.we_season)
                        ts_test_df['off_weekend'] = ~ts_test_df['ds'].apply(
                            self.we_season)
                        self._test_dt = ts_test_df.copy()
                        self._test_dt.set_index('ds', inplace=True)
                    # and add
                    self._model.add_seasonality(name='weekend_on_season',
                                                period=7,
                                                fourier_order=5,
                                                condition_name='on_weekend')
                    self._model.add_seasonality(name='weekend_off_season',
                                                period=7,
                                                fourier_order=5,
                                                condition_name='off_weekend')

                    if not suppress:
                        self._prophet_logger.info(
                            "Added week-end seasonality.")

                # tic
                start = time()
                self.model_fit = self._model.fit(ts_df)
                # toc
                if not suppress:
                    self._prophet_logger.info(
                        "Time elapsed: {} sec.".format(time() - start))
            except (Exception, ValueError):
                self._prophet_logger.exception("Prophet error...")
                return -1
            else:
                self._prophet_logger.info(
                    "Model successfully fitted to the data!")

                # Fitted values
                self._prophet_logger.info(
                    "Computing fitted values and residuals...")
                # in-sample predict
                try:
                    self.fittedvalues = self._model.predict(
                        ts_df.drop('y', axis=1))
                except (Exception, ValueError):
                    self._prophet_logger.exception("Prophet predict error...")
                # Residuals
                try:
                    # use fittedvalues to fill in the model dictionary
                    self.residuals = pd.Series(
                        np.asarray(ts_df.y) -
                        np.asarray(self.fittedvalues['yhat']),
                        index=self._train_dt.index)
                except (KeyError, AttributeError):
                    self._prophet_logger.exception(
                        "Model was not fitted or ts has other structure...")
                #
                self.lower_conf_int = pd.Series(np.asarray(
                    self.fittedvalues['yhat_lower']),
                                                index=self._train_dt.index)
                self.upper_conf_int = pd.Series(np.asarray(
                    self.fittedvalues['yhat_upper']),
                                                index=self._train_dt.index)

                self._prophet_logger.info("Done.")
        return self

    def ts_diagnose(self):
        """Diagnoses the fitted model"""
        try:
            assert self.model_fit is not None
        except AssertionError:
            self._prophet_logger.exception(
                "Model has to be fitted first! Please call ts_fit(...)")
            sys.exit("STOP")

        self.plot_residuals()

        if self._diagnose:
            if input(
                    "Run cross validation y/n? Note, depending on parameters provided "
                    "this can take some time...").strip().lower() == 'y':
                start = time()
                self._prophet_logger.info(
                    "Running cross validation using parameters provided....")
                if self._history is not None:
                    try:
                        self._prophet_cv = cross_validation(
                            self.model_fit,
                            initial=self._history,
                            period=self._step,
                            horizon=self._horizon)
                    except Exception:
                        self._prophet_logger.exception(
                            "Prophet cross validation error: check your "
                            "parameters 'history', 'horizon', 'step'!")
                else:
                    try:
                        self._prophet_cv = cross_validation(
                            self.model_fit,
                            period=self._step,
                            horizon=self._horizon)
                    except Exception:
                        self._prophet_logger.exception(
                            "Prophet cross validation error: "
                            "check your parameters 'horizon', 'step'!")

                self._prophet_logger.info("Time elapsed: {}".format(time() -
                                                                    start))
                simu_intervals = self._prophet_cv.groupby('cutoff')['ds'].agg([
                    ('forecast_start', 'min'), ('forecast_till', 'max')
                ])
                self._prophet_logger.info(
                    "Following time windows and cutoffs have been set-up:\n")
                print(simu_intervals)
                #
                plot_cross_validation_metric(self._prophet_cv, metric='mape')
                #
                self._prophet_logger.info("Running performance metrics...")
                self._prophet_p = performance_metrics(self._prophet_cv)

            else:
                self._prophet_logger.info("OK")
                return

    def plot_residuals(self):
        """Plot the residuals"""
        fig, axes = super(ProphetForecaster, self)._plot_residuals(
            y=np.asarray(self._train_dt['y']),
            yhat=np.asarray(self.fittedvalues['yhat']),
            _id="Prophet")
        plt.gcf().autofmt_xdate()
        plt.grid(True)
        plt.show()

    def ts_test(self, show_plot=True):
        """Test the fitted model if test data available"""
        if super(ProphetForecaster, self)._check_ts_test() < 0:
            return

        self._prophet_logger.info(
            "Evaluating the fitted Prophet model on the test data...")
        self.forecast = self._model.predict(
            self._test_dt.copy().reset_index().drop('y', axis=1))
        # confidence intervals
        self.lower_conf_int = pd.concat([
            self.lower_conf_int,
            pd.Series(np.asarray(self.forecast['yhat_lower']),
                      index=self._test_dt.index)
        ],
                                        axis=0)
        self.upper_conf_int = pd.concat([
            self.upper_conf_int,
            pd.Series(np.asarray(self.forecast['yhat_upper']),
                      index=self._test_dt.index)
        ],
                                        axis=0)

        self.residuals_forecast = pd.Series(np.asarray(self._test_dt['y']) -
                                            np.asarray(self.forecast['yhat']),
                                            index=self._test_dt.index)
        self.measure_rmse()
        self._prophet_logger.info("RMSE on test data: {}".format(self.rmse))
        # plot
        if show_plot:
            self.plot_forecast()

    def ts_forecast(self, n_forecast, suppress):
        """Forecast time series over time frame in the future specified via n_forecast"""
        #
        n_forecast = super(ProphetForecaster,
                           self)._check_ts_forecast(n_forecast)
        #
        self._prophet_logger.info("Fitting using all data....")
        self._mode = 'forecast'
        self.ts_fit(suppress=suppress)

        self._prophet_logger.info("Forecasting next " + str(n_forecast) +
                                  str(self.ts_df.index.freq))
        #
        future = self._model.make_future_dataframe(periods=n_forecast,
                                                   freq=self.freq)
        if self._weekend_seasonality:
            future['on_weekend'] = future['ds'].apply(self.we_season)
            future['off_weekend'] = ~future['ds'].apply(self.we_season)

        self.forecast = self._model.predict(future)
        # confidence intervals
        self.lower_conf_int = pd.concat([
            self.lower_conf_int,
            pd.Series(np.asarray(self.forecast['yhat_lower']), index=future.ds)
        ],
                                        axis=0)
        self.upper_conf_int = pd.concat([
            self.upper_conf_int,
            pd.Series(np.asarray(self.forecast['yhat_upper']), index=future.ds)
        ],
                                        axis=0)

        self.residuals_forecast = None
        self.plot_forecast()

    def plot_forecast(self):
        """Plot forecasted values"""
        if self.residuals_forecast is not None:
            fig, axes = super(ProphetForecaster, self)._plot_forecast(
                y=np.asarray(self._train_dt['y']),
                yhat=np.asarray(self.fittedvalues['yhat']),
                forecast=pd.Series(np.asarray(self.forecast['yhat']),
                                   index=self.forecast['ds']),
                _id='Prophet')
        else:
            fig_forecast = self._model.plot(self.forecast)
            fig_components = self._model.plot_components(self.forecast)
            if self._add_change_points:
                a = add_changepoints_to_plot(fig_forecast.gca(), self._model,
                                             self.forecast)

        plt.gcf().autofmt_xdate()
        plt.grid(True)
        plt.show()
Exemplo n.º 31
0
# In[11]:


k=Prophet(yearly_seasonality=13).fit(a)
j=plot_yearly(k)


# In[16]:


m2= Prophet()


forecast4 = m2.fit(a).predict(future)
fig = m2.plot_components(forecast4)


# In[33]:


a.loc[a.loc[:]['y'] > 280]=None


# In[35]:


g=Prophet()
model=g.fit(a)
fig=model.plot(model.predict(future))
## Now merge to bring the ds back into the df
## Without the "on" keyword the join key is implicitly the index which is what we're doing here
forecast2 = forecast2.join(forecast1['ds'], how='inner')



#%%
## This works
## This will create a plot that includes Forecasted, C.I.'s, and Actual values
m.plot(forecast1)

#%%
## I think it is unecessary to review exponentiated components 
## Plus the complexity of joining forecast2 with forecast1
m.plot_components(forecast1);

#%%
## It was necessary, in the fill_between, to use a datetime index associated with 
## the first parameter of the function.
## This necessitated converting the existing ds datetime element to an index
pplt.subplots(figsize=(30,10))
forecast2.set_index('ds',inplace=True)

## If using the view_hour data it will be REQUIRED to exponentiate the forecasts (i.e., forecast2)
pplt.plot(view_hour['distinct_freq_sum'], label='Original', color='black');

pplt.plot(forecast2.yhat, color='red', label='Forecast');
pplt.fill_between(forecast2.index, forecast2['yhat_upper'], forecast2['yhat_lower'], color='gray', alpha=0.25)
pplt.ylabel('Distinct Freq Sums');
pplt.xlabel('Hours');
Exemplo n.º 33
0
def main():
    """Customer Lifetime Value & Sales Revenue Forecasting"""

    st.title("Customer Lifetime Value & Sales Revenue Forecasting")
    st.subheader(
        "Built with Streamlit,Lifetimes, fbProphet and Plotly library")

    # Menu
    menu = [
        'Exploratory Data Analysis', 'Customer Lifetime Value',
        'Sales Revenue Forecasting', 'About'
    ]
    choices = st.sidebar.selectbox('Select Menu', menu)

    if choices == 'Exploratory Data Analysis':
        st.subheader('Exploratory Data Analysis')

        clean = pd.read_csv('data/clean_df.csv')
        clean = clean.drop('Unnamed: 0', axis=1)
        clean = clean.rename(columns={"Price": "Revenue"})
        clean["Date"] = pd.to_datetime(clean["Date"])
        clean["Month"] = clean["Date"].dt.strftime("%B")

        if st.checkbox('View Data'):
            st.dataframe(clean)

        st.subheader("Annual Aggregation")

        if st.checkbox('View Top 10 Items By Revenue'):
            revenue = clean.groupby(
                "Description")["Revenue"].sum().reset_index().sort_values(
                    by="Revenue", ascending=False)
            revenue_head = revenue.head(10).sort_values(by="Revenue")
            fig1 = px.bar(revenue_head,
                          x="Revenue",
                          y="Description",
                          orientation="h")
            st.plotly_chart(fig1)

        if st.checkbox('View Bottom 10 Items By Revenue'):
            revenue = clean.groupby(
                "Description")["Revenue"].sum().reset_index().sort_values(
                    by="Revenue", ascending=False)
            revenue_tail = revenue.tail(10).sort_values(by="Revenue")
            fig2 = px.bar(revenue_tail,
                          x="Revenue",
                          y="Description",
                          orientation="h")
            st.plotly_chart(fig2)

        if st.checkbox('View Top 10 Popular Items'):
            quantity = clean.groupby(
                "Description")["Quantity"].sum().reset_index().sort_values(
                    by="Quantity", ascending=False)
            quantity_head = quantity.head(10).sort_values(by="Quantity")
            fig3 = px.bar(quantity_head,
                          x="Quantity",
                          y="Description",
                          orientation="h")
            st.plotly_chart(fig3)

        if st.checkbox('View Least Popular Items'):
            qty1 = st.selectbox("Select Total Quantity Sold",
                                [1, 2, 3, 4, 5, 6, 7, 8, 9, 10],
                                key="qty1")
            quantity = clean.groupby(
                "Description")["Quantity"].sum().reset_index().sort_values(
                    by="Quantity", ascending=False)
            quantity_tail = quantity[quantity["Quantity"] == qty1].reset_index(
                drop=True)
            st.dataframe(quantity_tail[["Description"]])

        st.subheader("Monthly Aggregation")

        if st.checkbox('View Monthly Top 10 Items By Revenue'):
            mth1 = st.selectbox("Select Month", [
                "January", "February", "March", "April", "May", "June", "July",
                "August", "September", "October", "November", "December"
            ],
                                key="mth1")
            monthrevenue = clean.groupby(["Month", "Description"
                                          ])["Revenue"].sum().reset_index()
            month_revenue = monthrevenue[monthrevenue["Month"] ==
                                         mth1].sort_values(by="Revenue",
                                                           ascending=False)
            month_revenue_head = month_revenue.head(10).sort_values(
                by="Revenue")
            fig4 = px.bar(month_revenue_head,
                          x="Revenue",
                          y="Description",
                          orientation="h")
            st.plotly_chart(fig4)

        if st.checkbox('View Monthly Bottom 10 Items by Revenue'):
            mth2 = st.selectbox("Select Month", [
                "January", "February", "March", "April", "May", "June", "July",
                "August", "September", "October", "November", "December"
            ],
                                key="mth2")
            monthrevenue = clean.groupby(["Month", "Description"
                                          ])["Revenue"].sum().reset_index()
            month_revenue = monthrevenue[monthrevenue["Month"] ==
                                         mth2].sort_values(by="Revenue",
                                                           ascending=False)
            month_revenue_tail = month_revenue.tail(10).sort_values(
                by="Revenue")
            fig5 = px.bar(month_revenue_tail,
                          x="Revenue",
                          y="Description",
                          orientation="h")
            st.plotly_chart(fig5)

        if st.checkbox('View Monthly Top 10 Popular Items'):
            mth3 = st.selectbox("Select Month", [
                "January", "February", "March", "April", "May", "June", "July",
                "August", "September", "October", "November", "December"
            ],
                                key="mth3")
            monthquantity = clean.groupby(["Month", "Description"
                                           ])["Quantity"].sum().reset_index()
            month_quantity = monthquantity[monthquantity["Month"] ==
                                           mth3].sort_values(by="Quantity",
                                                             ascending=False)
            month_quantity_head = month_quantity.head(10).sort_values(
                by="Quantity")
            fig6 = px.bar(month_quantity_head,
                          x="Quantity",
                          y="Description",
                          orientation="h")
            st.plotly_chart(fig6)

        if st.checkbox('View Monthly Least Popular Items'):
            mth4 = st.selectbox("Select Month", [
                "January", "February", "March", "April", "May", "June", "July",
                "August", "September", "October", "November", "December"
            ],
                                key="mth4")
            qty2 = st.selectbox("Select Total Quantity Sold",
                                [1, 2, 3, 4, 5, 6, 7, 8, 9, 10],
                                key="qty2")
            monthquantity = clean.groupby(["Month", "Description"
                                           ])["Quantity"].sum().reset_index()
            month_quantity_tail = monthquantity[
                (monthquantity["Month"] == mth4)
                & (monthquantity["Quantity"] == qty2)].reset_index(drop=True)
            st.dataframe(month_quantity_tail[["Description"]])

    if choices == 'Customer Lifetime Value':
        st.subheader('Customer Lifetime Value')
        st.subheader("Model Based On 30 Days")

        output = pd.read_csv('data/output_df.csv')
        output["predicted_purchases"] = output["predicted_purchases"].round()
        output["expected_total_monetary_value"] = output[
            "predicted_purchases"] * output["expected_monetary_value"]
        #output=output.rename(columns={"probability":"probability_alive"})

        if st.checkbox('View Predictions'):
            #st.dataframe(output[["CustomerID","predicted_purchases","expected_monetary_value","expected_total_monetary_value","probability_alive"]])
            st.dataframe(output[[
                "CustomerID", "predicted_purchases", "expected_monetary_value",
                "expected_total_monetary_value"
            ]])

            def get_table_download_link(df):
                csv = df.to_csv(index=False)
                b64 = base64.b64encode(csv.encode()).decode()
                return f'<a href="data:file/csv;base64,{b64}" download="data/output_df.csv">Download</a>'

            st.markdown(get_table_download_link(output),
                        unsafe_allow_html=True)

        if st.checkbox('View More On Expected Total Monetary Value'):
            exp_tot = output["expected_total_monetary_value"].describe(
            ).to_frame()
            st.dataframe(exp_tot)

            st.subheader("Boxplot")
            fig7 = px.box(output, y="expected_total_monetary_value")
            st.plotly_chart(fig7)

            st.subheader("Histogram")
            fig8 = px.histogram(output, x="expected_total_monetary_value")
            st.plotly_chart(fig8)

    if choices == 'Sales Revenue Forecasting':
        st.subheader('Sales Revenue Forecasting')

        df_load_state = st.text('Loading data...')
        df = load_data('data/data.csv')
        df_load_state.text('Loading data... done!')

        chart = df.groupby(['InvoiceDate'])[['Revenue']].sum()

        def plot_fig():
            fig = go.Figure()
            fig.add_trace(
                go.Scatter(x=chart.index, y=chart['Revenue'], name="Revenue"))
            fig.layout.update(title_text='UK Revenue for year 2011 ',
                              xaxis_rangeslider_visible=True)
            st.plotly_chart(fig)
            return fig

        # plotting the figure of Actual Data
        plot_fig()

        if st.checkbox('Show raw data'):
            st.subheader('Raw data')
            st.write(chart)

        #shape the df w.r.t requirement by fbProphet
        df_prophet = df.groupby(['InvoiceDate'],
                                as_index=False)[['Revenue']].sum()

        #remove negative value
        #fbprophet works with 'None'
        df_prophet.iloc[21, 1] = None
        df_prophet.columns = ['ds', 'y']

        #function to remove outliers
        def outliers_to_na(ts, devs):
            median = ts['y'].median()
            #print(median)
            std = np.std(ts['y'])
            #print(std)
            for x in range(len(ts)):
                val = ts['y'][x]
                #print(ts['y'][x])
                if (val < median - devs * std or val > median + devs * std):
                    ts['y'][x] = None
            return ts

        # remove outliers based on 2 std dev
        outliers_to_na(df_prophet, 2)

        #st.write(df_prophet)

        #season_choice = st.selectbox('Seasonality Mode',['additive','multiplicative'])
        #model_choice = st.selectbox('Model Choice',['Logistic Regression','Neural Network'])

        #if changepoint_prior_scale == 'additive':
        m = Prophet(seasonality_mode='additive', changepoint_prior_scale=0.11)
        m.fit(df_prophet)
        future = m.make_future_dataframe(periods=3, freq='M')
        future = m.predict(future)

        #plot forecast
        fig1 = plot_plotly(m, future)
        if st.checkbox('Show forecast data'):
            st.subheader('forecast data')
            st.write(future.loc[305:, ['ds', 'yhat']])
            st.write(
                'Quarterly Sales Revenue for Dec 2011, Jan 2012 , Feb 2012')
            st.plotly_chart(fig1)

        #plot component wise forecast
        st.write("Component wise forecast")
        fig2 = m.plot_components(future)
        st.write(fig2)

    if choices == 'About':
        st.subheader('About')