Пример #1
0
    def get_last_trade_data(self, tkr, exchange, duration):
        """
		tkr should be a valid ticker for the exchange
		echange should be a valid exchange code
		"""
        stock = dict()
        stock['q'] = tkr
        stock['x'] = exchange

        params = list()
        params.append(stock)

        self.logger.info("Queried data for " + tkr)
        data = gfc.get_prices_data(params, duration)

        return data
Пример #2
0
def Stock_name(cmpyname):
    from googlefinance.client import get_price_data, get_prices_data, get_prices_time_data
    params = [{'q':cmpyname}]
    period = "5Y"
    df = get_prices_data(params, period)
    forecast_col = cmpyname+'_Close'
    df.fillna(-99999,inplace=True)
    forecast_out = int(math.ceil(0.09*len(df)))
    df['label']=df[forecast_col].shift(-forecast_out)
    X = np.array(df.drop([cmpyname+'_Close'],1))
    X = X[:-forecast_out]
    X_lately = X[-forecast_out:]
    df.dropna(inplace=True)
    Y = np.array(df['label'])
    clf = LinearRegression(n_jobs=-1)
    X_train, X_test, Y_train, Y_test = cross_validation.train_test_split(X, Y, test_size=0.2)
    clf.fit(X_train, Y_train)
    accuracy = clf.score(X_test, Y_test)
    style.use('ggplot')
    forecast_set = clf.predict(X_lately)
    df['Forecast'] = np.nan
    d = df.iloc[-1].name
    last_date=datetime.combine(d, datetime.min.time())
    last_unix = last_date.timestamp()
    one_day = 86400
    next_unix = last_unix + one_day
    for i in forecast_set:
        next_date = datetime.fromtimestamp(next_unix)
        next_unix += 86400
        df.loc[next_date] = [np.nan for _ in range(len(df.columns)-1)]+[i]
    df.reset_index(level=None,inplace=True)
    df=df.rename(index=str, columns={"index": "Date"})
    df['Date'] = pd.to_datetime(df['Date'])
    df = df.set_index('Date')
    plt.figure(figsize=(20,8))
    df[cmpyname+'_Close'].plot()
    df['Forecast'].plot()
    plt.legend(loc=4)
    plt.xlabel('Date')
    plt.ylabel('Price')
    df['Forecast'].plot()
    df.reset_index(level=None,inplace=True)
    df=df[1050:]
    #print("inside")
    #var2 = "new"+str(timestamp)+".png"
    plt.savefig('static/images/var4')
    return(df)
Пример #3
0
 def example_2(self):
     params = [
         # Dow Jones
         {
             'q': ".DJI",
             'x': "INDEXDJX",
         },
         # NYSE COMPOSITE (DJ)
         {
             'q': "NYA",
             'x': "INDEXNYSEGIS",
         },
         # S&P 500
         {
             'q': ".INX",
             'x': "INDEXSP",
         }
     ]
     period = "1Y"
     # get open, high, low, close, volume data (return pandas dataframe)
     df = get_prices_data(params, period)
     print(df)
Пример #4
0
 def fetch_nyse_index(self):
     """
         Method to fetch the daily NYSE index.
         : param self
         : return nyse_index(pandas Dataframe)
                  error_val(int)
     """
     error_val = -1
     try:
         params = [
             # NYSE COMPOSITE (DJ)
             {
                 'q': "NYA",
                 'x': "INDEXNYSEGIS",
             }
         ]
         period = "5d"
         nyse_index = get_prices_data(params, period)
         nyse_index.index.name = INDEX
         return nyse_index.tail(n=1)
     except Exception as e:
         print(e)
         return error_val
Пример #5
0
        'x': "INDEXDJX",
    },
    # NYSE COMPOSITE (DJ)
    {
        'q': "NYA",
        'x': "INDEXNYSEGIS",
    },
    # S&P 500
    {
        'q': ".INX",
        'x': "INDEXSP",
    }
]
period = "1Y"
# get open, high, low, close, volume data (return pandas dataframe)
df = get_prices_data(params, period)
print(df)
#            .DJI_Open  .DJI_High  .DJI_Low  .DJI_Close  .DJI_Volume  \
# 2016-07-20   18503.12   18562.53  18495.11    18559.01    85840786
# 2016-07-21   18582.70   18622.01  18555.65    18595.03    93233337
# 2016-07-22   18589.96   18590.44  18469.67    18517.23    86803016
# 2016-07-23   18524.15   18571.30  18491.59    18570.85    87706622
# 2016-07-26   18554.49   18555.69  18452.62    18493.06    76807470
# ...               ...        ...       ...         ...         ...

params = [
    # Dow Jones
    {
        'q': ".DJI",
        'x': "INDEXDJX",
    },
Пример #6
0
def Stock_name(cmpyname):
    from googlefinance.client import get_price_data, get_prices_data, get_prices_time_data
    params = [{'q': cmpyname}]
    period = "5Y"
    # get open, high, low, close, volume data (return pandas dataframe)
    df = get_prices_data(params, period)
    df.reset_index(level=None, inplace=True)
    df = df.rename(index=str, columns={"index": "Date"})
    df['Date'] = pd.to_datetime(df['Date'])
    df = df.set_index('Date')
    df['Close_diff'] = df[cmpyname + '_Close'] - df.shift()[cmpyname +
                                                            '_Close']
    df['Close_diff_log'] = np.log1p(df[cmpyname + '_Close']) - np.log1p(
        df.shift()[cmpyname + '_Close'])
    df = df[[cmpyname + '_Close', 'Close_diff', 'Close_diff_log']]
    df = df.dropna()
    train = df[0:880]
    test = df[880:]
    len(train), len(test)
    ts = train[cmpyname + '_Close'].as_matrix()
    predictions = np.empty((0), dtype=np.float32)
    for i in range(len(test)):
        arima_3_1_0 = ARIMA(ts, order=(3, 1, 0)).fit(dist=False)
        predict = arima_3_1_0.forecast()[0]
        predictions = np.hstack([predictions, predict])
        ts = np.hstack([ts, predict])
        predictions
        nans = np.zeros(len(train))
    nans[:] = np.nan
    orgs = pd.concat([train[cmpyname + '_Close'], test[cmpyname + '_Close']])
    orgs = pd.DataFrame({
        'Date': orgs.index,
        'Original': orgs.as_matrix(),
        'Prediction': np.hstack([nans, predictions])
    })
    orgs = orgs.set_index('Date')
    orgs.plot(color=['blue', 'red'])

    ## 1 year
    train = df[734:1100]
    test = df[1100:]
    #return len(train), len(test)
    ts = train[cmpyname + '_Close'].as_matrix()
    predictions = np.empty((0), dtype=np.float32)
    for i in range(len(test)):
        arima_3_1_0 = ARIMA(ts, order=(3, 1, 0)).fit(dist=False)
        predict = arima_3_1_0.forecast()[0]
        predictions = np.hstack([predictions, predict])
        ts = np.hstack([ts, predict])
        nans = np.zeros(len(train))
    nans[:] = np.nan
    orgs = pd.concat([train[cmpyname + '_Close'], test[cmpyname + '_Close']])
    orgs = pd.DataFrame({
        'Date': orgs.index,
        'Original': orgs.as_matrix(),
        'Prediction(trend)': np.hstack([nans, predictions])
    })
    orgs = orgs.set_index('Date')
    orgs.plot(color=['blue'])
    #     return (plt.show())
    plt.savefig('static/images/firstimage.png')
    return render_template("PredictionPage.html",
                           imagename="static/images/firstimage.png")
def summary_metrics(compnyname):
    from googlefinance.client import get_price_data, get_prices_data, get_prices_time_data
    params = [{'q': compnyname}]
    period = "5Y"
    df = get_prices_data(params, period)
    Model_name = []
    Mean_squared_error = []
    R2_score = []
    model = LinearRegression
    clf = LinearRegression(n_jobs=-1)
    X = df.iloc[:, [0, 1, 2, 4]].values
    Y = df.iloc[:, 3].values
    X_train, X_test, Y_train, Y_test = train_test_split(X,
                                                        Y,
                                                        test_size=0.2,
                                                        random_state=np.random)
    clf.fit(X_train, Y_train)
    filename1 = 'Linear_Regression_model.pckl'
    pickle.dump(clf, open(filename1, 'wb'))
    prediction1 = clf.predict(X_test)
    MSE = mean_squared_error(Y_test, prediction1)
    R2 = r2_score(Y_test, prediction1)
    Model_name.append(' LinearRegression')
    Mean_squared_error.append(MSE)
    R2_score.append(R2)
    model = RandomForestRegressor
    regressor = RandomForestRegressor()
    X = df.iloc[:, [0, 1, 2, 4]].values
    Y = df.iloc[:, 3].values
    X_train, X_test, Y_train, Y_test = train_test_split(X,
                                                        Y,
                                                        test_size=0.2,
                                                        random_state=np.random)
    regressor.fit(X_train, Y_train)
    filename2 = 'Random_Forest_Regressor_model.pckl'
    pickle.dump(regressor, open(filename2, 'wb'))
    prediction2 = regressor.predict(X_test)
    MSE = mean_squared_error(Y_test, prediction2)
    R2 = r2_score(Y_test, prediction2)
    Model_name.append('RandomForestRegressor')
    Mean_squared_error.append(MSE)
    R2_score.append(R2)
    model = ARIMA
    df = get_prices_data(params, period)
    df.reset_index(level=0, inplace=True)
    df = df.rename(index=str, columns={"index": "Date"})
    df['Date'] = pd.to_datetime(df['Date'])
    df1 = df.set_index('Date')
    df2 = df1[compnyname + '_Close']
    model = ARIMA(df2, order=(3, 1, 0))
    model_fit = model.fit(disp=0)
    filename = 'ARIMA_model.pckl'
    pickle.dump(model, open(filename, 'wb'))
    X = df2.values
    size = int(len(X) * 0.80)
    train = X[0:size]
    test = X[size:len(X)]
    history = [x for x in train]
    predictions = list()
    for t in range(len(test)):
        model = ARIMA(history, order=(5, 1, 0))
        model_fit = model.fit(disp=0)
        output = model_fit.forecast()
        yhat = output[0]
        predictions.append(yhat)
        obs = test[t]
        history.append(obs)
        ('predicted=%f, expected=%f' % (yhat, obs))

    MSE = mean_squared_error(test, predictions)
    R2 = r2_score(test, predictions)
    Model_name.append('ARIMA')
    Mean_squared_error.append(MSE)
    R2_score.append(R2)
    summary2 = Model_name, Mean_squared_error, R2_score
    describe1 = pd.DataFrame(summary2[0], columns={"Model_Name"})
    describe2 = pd.DataFrame(summary2[1], columns={"Mean_squared_error"})
    describe3 = pd.DataFrame(summary2[2], columns={"R2_score"})
    des = describe1.merge(describe2,
                          left_index=True,
                          right_index=True,
                          how='inner')
    des = des.merge(describe3, left_index=True, right_index=True, how='inner')
    df = des.sort_values(ascending=False, by="R2_score").reset_index(drop=True)
    return (df)