Exemplo n.º 1
0
def get_stock_data_classification(stock_name, normalize=True, ma=[]):
    '''Rise = +1, Fall = -1, otherwise = 0'''
    start = datetime.datetime(1950, 1, 1)
    end = datetime.date.today()
    df = web.DataReader(stock_name, "yahoo", start, end)
    if ma != []:
        for moving in ma:
            df['{}ma'.format(moving)] = df['Adj Close'].rolling(window=moving).mean()
    df.drop(['Close'], 1, inplace=True)
    df['Pct'] = df['Adj Close'].pct_change()
    df.dropna(inplace=True)

    if normalize:
        min_max_scaler = preprocessing.MinMaxScaler()
        df['Open'] = min_max_scaler.fit_transform(df.Open.values.reshape(-1,1))
        df['High'] = min_max_scaler.fit_transform(df.High.values.reshape(-1,1))
        df['Low'] = min_max_scaler.fit_transform(df.Low.values.reshape(-1,1))
        df['Volume'] = min_max_scaler.fit_transform(df.Volume.values.reshape(-1,1))
        df['Adj Close'] = min_max_scaler.fit_transform(df['Adj Close'].values.reshape(-1,1))

        if ma != []:
            for moving in ma:
                df['{}ma'.format(moving)] = min_max_scaler.fit_transform(df['{}ma'.format(moving)].values.reshape(-1,1))

    df['Pos'] = list(map(buy_sell_hold_pos, df['Pct']))
    df['Neg'] = list(map(buy_sell_hold_neg, df['Pct']))
    
    if normalize:
        min_max_scaler = preprocessing.MinMaxScaler()
        df['Pct'] = min_max_scaler.fit_transform(df.Pct.values.reshape(-1,1))

    return df
Exemplo n.º 2
0
def get_stock_data(stock_name, normalize=True, ma=[]):
    start = datetime.datetime(1950, 1, 1)
    end = datetime.date.today()
    df = web.DataReader(stock_name, "yahoo", start, end)
    if ma != []:
        for moving in ma:
            df['{}ma'.format(moving)] = df['Adj Close'].rolling(
                window=moving).mean()
    df.drop(['Close'], 1, inplace=True)
    df.dropna(inplace=True)

    if normalize:
        min_max_scaler = preprocessing.MinMaxScaler()
        df['Open'] = min_max_scaler.fit_transform(df.Open.values.reshape(
            -1, 1))
        df['High'] = min_max_scaler.fit_transform(df.High.values.reshape(
            -1, 1))
        df['Low'] = min_max_scaler.fit_transform(df.Low.values.reshape(-1, 1))
        df['Volume'] = min_max_scaler.fit_transform(
            df.Volume.values.reshape(-1, 1))
        df['Adj Close'] = min_max_scaler.fit_transform(
            df['Adj Close'].values.reshape(-1, 1))
        if ma != []:
            for moving in ma:
                df['{}ma'.format(moving)] = min_max_scaler.fit_transform(
                    df['{}ma'.format(moving)].values.reshape(-1, 1))
    return df
Exemplo n.º 3
0
def denormalize(stock_name, normalized_value):
    start = datetime.datetime(2000, 1, 1)
    end = datetime.date.today()
    df = web.DataReader(stock_name, "yahoo", start, end)

    df = df['Adj Close'].values.reshape(-1, 1)
    normalized_value = normalized_value.reshape(-1, 1)

    #return df.shape, p.shape
    min_max_scaler = preprocessing.MinMaxScaler()
    a = min_max_scaler.fit_transform(df)
    new = min_max_scaler.inverse_transform(normalized_value)
    return new
def get_stock_data(stock_name, normalize):
    start = datetime.datetime(1950, 1, 1)
    end = datetime.date.today()
    df = web.DataReader(stock_name, "yahoo", start, end)
    df.drop(['Volume', 'Close'], 1, inplace=True)

    if normalize:
        min_max_scaler = preprocessing.MinMaxScaler()
        df['Open'] = min_max_scaler.fit_transform(df.Open.values.reshape(-1,1))
        df['High'] = min_max_scaler.fit_transform(df.High.values.reshape(-1,1))
        df['Low'] = min_max_scaler.fit_transform(df.Low.values.reshape(-1,1))
        df['Adj Close'] = min_max_scaler.fit_transform(df['Adj Close'].values.reshape(-1,1))
    return df
def creacionElementosDiccionario(intervalo1, intervalo2, hora_del_dia, hora_de_referencia,day,df1_aux,pair):
        try:
           conn = psycopg2.connect("dbname='tfgtest1' user='******' host='localhost' password='******'")
        except:
           print("I am unable to connect to the database")
        cur = conn.cursor()
        query = "select time_window[1], volume from traffic_volume_tollgates_test1 where tollgate_id = '"+ str(pair[0]) +"' AND direction = " + str(pair[1]) + " AND (time_window[1].time BETWEEN " + intervalo1 + ") AND (time_window[1].date = DATE '"+str(day)+"') order by time_window;"
        cur.execute(query)
        rows = cur.fetchall()
        df2 = pd.DataFrame.from_records(rows, columns=['date','volume'])
        result_dataframe = pd.concat([df1_aux,df2])
        try:
           conn = psycopg2.connect("dbname='tfgtraining2' user='******' host='localhost' password='******'")
        except:
           print("I am unable to connect to the database")
        query = "select time_window[1], volume from traffic_volume_tollgates_training2 where tollgate_id = '"+ str(pair[0]) +"' AND direction = " + str(pair[1]) +  "AND (time_window[1].date = DATE '"+str(day)+"') AND (time_window[1].time BETWEEN " + intervalo2 + ") order by time_window;"
        cur = conn.cursor()
        cur.execute(query)
        rows2 = cur.fetchall()
        serie = pd.Series(result_dataframe['volume'].values, index=result_dataframe['date'])
        best_score, best_cfg = float("inf"), None
        valores_reales = [element[1] for element in rows2]

        for p in range(3,10):
          for d in range(3):
            for q in range(5):   
                 print("ORDEN : ", (p,d,q))
                 orderr = (p,d,q)
                 try:    
                         model = ARIMA(serie, order=orderr)    
                         model_fit = model.fit(disp=0)
                         forecast = model_fit.forecast(steps=6)[0]
                         new_forecast=[]
                         for element in rows2:
                                  new_forecast.append(forecast[((datetime.datetime(2018,1,1,element[0].hour,element[0].minute, 0)-hora_de_referencia)/1200).seconds])
                         mse = mean_squared_error(valores_reales, new_forecast)
                         print("MSE : ", mse, " BEST_SCORE: ", best_score)
                         if mse < best_score:
                                print("BEST_SCORE : ", best_score)
                                best_score, best_cfg = mse, orderr
                 except:
                         continue
        print('Best ARIMA%s MSE=%.3f' % (best_cfg, best_score)) 
        
        model = ARIMA(serie, order= best_cfg)
        model_fit = model.fit(disp=0)
        predictions = model_fit.forecast(steps=6)[0]
        predictions = [round(element) for element in predictions]
        predicciones_pair_dia[pair[0], pair[1],day,hora_del_dia] = predictions
        predicciones_pair_dia[pair[0], pair[1],day,hora_del_dia] = np.append(predicciones_pair_dia[pair[0], pair[1],day,hora_del_dia], str(best_cfg))
   conn = psycopg2.connect("dbname='tfgtraining2' user='******' host='localhost' password='******'")
except:
   print("I am unable to connect to the database")

cur = conn.cursor()
cur.execute("""SELECT * FROM travel_time_intersection_to_tollgate_training2 WHERE (time_window[1].time BETWEEN TIME '08:00:00' AND TIME '09:40:00') OR (time_window[1].time BETWEEN TIME '17:00:00' AND TIME '18:40:00') ORDER BY intersection_id, tollgate_id, time_window """)
rows = cur.fetchall()
colnames = ['intersection_id', 'tollgate_id', 'time_window', 'avg_travel_time']
intervals_to_predict_real_avgtraveltime = pd.DataFrame(rows, columns=colnames)
routes = np.array(intervals_to_predict_real_avgtraveltime.iloc[:,0:2].values.tolist())
time_intervals = np.array(intervals_to_predict_real_avgtraveltime.iloc[:,2].values.tolist())
routes = np.unique(routes, axis=0);
days = np.unique([time_interval[0].strftime("%Y-%m-%d") for time_interval in time_intervals], axis=0)
aux = np.array([])
for time_interval in time_intervals:
        time_interval[0] = datetime.datetime(2016, 10,18, time_interval[0].hour, time_interval[0].minute)
        aux = np.append(aux,time_interval[0]);
time_intervals = sorted(set(aux))

routes_sum = 0;
for route in routes:
        try:
                conn = psycopg2.connect("dbname='tfgdatosmodificados' user='******' host='localhost' password='******'")
        except:
               print("I am unable to connect to the database")
        cur = conn.cursor()
        query = "select time_window[1], avg_travel_time from travel_time_intersection_to_tollgate_modified  where intersection_id = '" +route[0] +"' AND tollgate_id = " + route[1] + " order by time_window;"
        cur.execute(query)
        rows = cur.fetchall()
        df1 = pd.DataFrame.from_records(rows, columns=['date','avg_travel_time'])
        df1 = df1[(df1.avg_travel_time > 50) & (df1.avg_travel_time < 150)]
   conn = psycopg2.connect("dbname='tfgtraining2' user='******' host='localhost' password='******'")
except:
   print("I am unable to connect to the database")

cur = conn.cursor()
cur.execute("""SELECT tollgate_id, direction, time_window, volume FROM traffic_volume_tollgates_training2 WHERE (time_window[1].time BETWEEN TIME '08:00:00' AND TIME '09:40:00') OR (time_window[1].time BETWEEN TIME '17:00:00' AND TIME '18:40:00') ORDER BY tollgate_id, direction, time_window """)
rows = cur.fetchall()
colnames = ['tollgate_id', 'direction', 'time_window', 'volume']
pairs_to_predict_real_trafficvolume = pd.DataFrame(rows, columns=colnames)
pairs = np.array(pairs_to_predict_real_trafficvolume.iloc[:,0:2].values.tolist())
time_intervals = np.array(pairs_to_predict_real_trafficvolume.iloc[:,2].values.tolist())
pairs = np.unique(pairs, axis=0);
days = np.unique([time_interval[0].strftime("%Y-%m-%d") for time_interval in time_intervals], axis=0)
aux = np.array([])
for time_interval in time_intervals:
        time_interval[0] = datetime.datetime(2016, 10,18, time_interval[0].hour, time_interval[0].minute)
        aux = np.append(aux,time_interval[0]);
time_intervals = sorted(set(aux))


predicciones_pair_dia = dict()
count = 0;
for pair in pairs:
        try:
                conn = psycopg2.connect("dbname='tfgdatosmodificados' user='******' host='localhost' password='******'")
        except:
               print("I am unable to connect to the database")
        cur = conn.cursor()
        query = "select time_window[1], volume from traffic_volume_tollgates_modified where tollgate_id = '" +str(pair[0]) +"' AND direction = " + str(pair[1]) + " order by time_window;"
        cur.execute(query)
        rows = cur.fetchall()
Exemplo n.º 8
0
# In[73]:


plt.figure(figsize=(16,8))
lag_plot(azn_df['open'], lag=5)
plt.title('AZN Stock - Autocorrelation plot with lag = 5')
plt.show()


# In[74]:


plt.figure(figsize=(16,8))
plt.plot(azn_df["date"], azn_df["close"])
xticks = pd.date_range(datetime.datetime(2010,1,1), datetime.datetime(2021,1,1), freq='YS')
xticks=xticks.to_pydatetime()
plt.xticks(xticks)
plt.title("AZN stock price over time")
plt.xlabel("time")
plt.ylabel("price")
plt.show()


# In[116]:


X_train, X_test = azn_df[0:int(len(azn_df)*0.8)], azn_df[int(len(azn_df)*0.8):]
X_train = X_train.set_index('date')
X_test = X_test.set_index('date')
X_test
import h5py

#tensorflow at backend

#input parameter

stock_name = '^GSPC'
seq_len = 22
d = 0.2
shape = [4, seq_len, 1] # feature, window, output
neurons = [128, 128, 32, 1]
epochs = 300

#data from 1950

    start = datetime.datetime(1950, 1, 1)
    end = datetime.date.today()
    df = web.DataReader(stock_name, "yahoo", start, end)
    df.drop(['Volume', 'Close'], 1, inplace=True)
    
    if normalize:        
        min_max_scaler = preprocessing.MinMaxScaler()
        df['Open'] = min_max_scaler.fit_transform(df.Open.values.reshape(-1,1))
        df['High'] = min_max_scaler.fit_transform(df.High.values.reshape(-1,1))
        df['Low'] = min_max_scaler.fit_transform(df.Low.values.reshape(-1,1))
        df['Adj Close'] = min_max_scaler.fit_transform(df['Adj Close'].values.reshape(-1,1))
    return df

df = get_stock_data(stock_name, normalize=True)

#plot normalized closing price
)
rows = cur.fetchall()
colnames = ['intersection_id', 'tollgate_id', 'time_window', 'avg_travel_time']
intervals_to_predict_real_avgtraveltime = pd.DataFrame(rows, columns=colnames)
routes = np.array(
    intervals_to_predict_real_avgtraveltime.iloc[:, 0:2].values.tolist())
time_intervals = np.array(
    intervals_to_predict_real_avgtraveltime.iloc[:, 2].values.tolist())
routes = np.unique(routes, axis=0)
days = np.unique([
    time_interval[0].strftime("%Y-%m-%d") for time_interval in time_intervals
],
                 axis=0)
aux = np.array([])
for time_interval in time_intervals:
    time_interval[0] = datetime.datetime(2016, 10, 18, time_interval[0].hour,
                                         time_interval[0].minute)
    aux = np.append(aux, time_interval[0])
time_intervals = sorted(set(aux))

predicciones_ruta_dia = dict()
count = 0
for route in routes:
    try:
        conn = psycopg2.connect(
            "dbname='tfgdatosmodificados' user='******' host='localhost' password='******'"
        )
    except:
        print("I am unable to connect to the database")
    cur = conn.cursor()
    query = "select time_window[1], avg_travel_time from travel_time_intersection_to_tollgate_modified  where intersection_id = '" + route[
        0] + "' AND tollgate_id = " + route[1] + " order by time_window;"