def load_csv_timeData(self, file_dir="./storage/"):
    # The file must have the name:
    #  symbolName_TimeScale.csv
    # TODO put periods in loading
    whole_path = file_dir + ul.period_dic[
        self.period] + "/" + self.symbol + "_" + ul.period_dic[
            self.period] + ".csv"
    try:
        dataCSV = pd.read_csv(whole_path,
                              sep=',',
                              index_col=0,
                              dtype={"Date": dt.datetime})

        dataCSV.index = ul.str_to_datetime(dataCSV.index.tolist())

    except IOError:
        error_msg = "File does not exist: " + whole_path
        print error_msg
        dataCSV = ul.empty_df

    except:
        print "Unexpected error in file: " + whole_path
    # We transform the index to the real ones

    return dataCSV
示例#2
0
def download_TD_yahoo(symbol="AAPL",
                      precision="m",
                      start_date=dt.datetime(2016, 1, 1),
                      end_date=dt.datetime(2017, 1, 1)):

    # data1 = dt.datetime.fromtimestamp(1284101485)
    sdate = start_date
    edate = end_date

    #    sdate_ts = int(get_timeStamp(sdate))
    #    edae_ts = int(get_timeStamp(edate))

    #    url_root = "https://finance.yahoo.com/quote/"
    #    url_root += symbol
    #    url_root += "/history?"
    #    url_root += "period1=" + str(sdate_ts)
    #    url_root += "&period2=" + str(edate_ts)
    #    url_root += "&interval=" + precision
    #    url_root += "&filter=history&frequency=" + precision

    url_root = "http://chart.finance.yahoo.com/table.csv?"
    url_root += "s=" + symbol
    url_root += "&a=" + str(sdate.day) + "&b=" + str(
        sdate.month) + "&c=" + str(sdate.year)
    url_root += "&d=" + str(edate.day) + "&e=" + str(
        edate.month) + "&f=" + str(edate.year)
    url_root += "&g=" + precision
    url_root += "&ignore=.csv"

    #    print url_root
    response = urlopen(url_root)
    data = response.read().split('\n')
    nlines = len(data)
    for i in range(nlines):
        data[i] = data[i].split(",")

#    print data[0:4]
    df = pd.DataFrame(data)
    df.columns = df.ix[
        0]  #['Date','Open', 'High', 'Low', 'Close', 'Volume', "Adj Close"]
    #    print df.columns

    ### REMOVE FIRST ROW (Headers)
    df.drop(0, inplace=True)
    ### REMOVE LAST ROW (Nones)
    #    print len(df) - 1
    #    print df.ix[len(df) - 1]
    df.drop(df.index.values[len(df) - 1], inplace=True)
    ### CONEVERT DATES TO TIMESTAMPS (Nones)
    #    print df.Date
    df.index = ul.str_to_datetime(df.Date)

    del df['Date']
    ##
    # We have to
    return df
示例#3
0
def download_TD_yahoo(symbol = "AAPL", precision = "m", 
                   start_date = dt.datetime(2016,1,1), end_date = dt.datetime(2017,1,1)):

    # data1 = dt.datetime.fromtimestamp(1284101485)
    sdate = start_date
    edate = end_date
    
#    sdate_ts = int(get_timeStamp(sdate))
#    edae_ts = int(get_timeStamp(edate))

#    url_root = "https://finance.yahoo.com/quote/"
#    url_root += symbol
#    url_root += "/history?"
#    url_root += "period1=" + str(sdate_ts)
#    url_root += "&period2=" + str(edate_ts)
#    url_root += "&interval=" + precision
#    url_root += "&filter=history&frequency=" + precision
 
    url_root = "http://chart.finance.yahoo.com/table.csv?"
    url_root += "s=" + symbol
    url_root += "&a=" +str(sdate.day)+ "&b=" +str(sdate.month)+ "&c=" +str(sdate.year)
    url_root += "&d=" +str(edate.day)+ "&e=" +str(edate.month)+"&f="+str(edate.year)
    url_root += "&g=" + precision
    url_root += "&ignore=.csv"
    
#    print url_root
    response = urlopen(url_root)
    data = response.read().split('\n')
    nlines = len(data)
    for i in range(nlines):
        data[i] = data[i].split(",")
        
#    print data[0:4]
    df = pd.DataFrame(data)
    df.columns = df.ix[0]  #['Date','Open', 'High', 'Low', 'Close', 'Volume', "Adj Close"]
#    print df.columns
 
    
    ### REMOVE FIRST ROW (Headers) 
    df.drop(0, inplace = True)
    ### REMOVE LAST ROW (Nones)
#    print len(df) - 1
#    print df.ix[len(df) - 1]
    df.drop(df.index.values[len(df) - 1], inplace = True)
    ### CONEVERT DATES TO TIMESTAMPS (Nones)
#    print df.Date
    df.index = ul.str_to_datetime(df.Date)
    
    del df['Date']
    ## 
    # We have to
    return df
示例#4
0
def load_csv_timeData(symbol, file_dir = "./storage/"):

    whole_path = file_dir + symbol + ".csv"
    try:
        dataCSV = pd.read_csv(whole_path,
                          sep = ',', index_col = 0, dtype = {"Date":dt.datetime})
    
        dataCSV.index = ul.str_to_datetime (dataCSV.index.tolist())
        
    except IOError:
        error_msg = "File does not exist: " + whole_path 
        print error_msg
    except:
        print "Unexpected error in file: " + whole_path
    # We transform the index to the real ones
    return dataCSV
示例#5
0
plt.close("all")

################# FLAGS to activate ########################
model_OLS = 1 # Use linear model from the OLS library
model_sklearn = 1  # Use linear model from the sklearn library
lag_analysis = 1
################# READ THE DATA FROM DISK ########################
# This way we do not have to write everyhing again.
# We create the data in the previous file up to some point and
# we read it with this one.

######## PANDAS FORMAT
folder_dataFeatures = "./data/"
data = pd.read_csv(folder_dataFeatures + "dataPandas.csv", sep = ',', index_col = 0, 
                      dtype = {"Date":dt.datetime})
data.index = ul.str_to_datetime (data.index.tolist())
######## NUMPY ARRAYS FORMAT
X_data = np.loadtxt(folder_dataFeatures + "Xdata.csv", delimiter=",")
price = np.loadtxt(folder_dataFeatures + "price.csv", delimiter=",")
price = price.reshape(Y_data.size,1) # TODO: Just to put it in the sahpe as it was before writing it to disk
dates = np.loadtxt(folder_dataFeatures + "dates.csv", delimiter=",")
## Generate the Y variable to estimate
lag = 20
Y_data = bMA.get_return(price, lag = lag)
Y_data = bMA.shift(Y_data, lag = -lag, cval = np.NaN)

if (model_OLS):
    # Makes use of the pandas structures
    ##############################################################################
    # Multilinear regression model, calculating fit, P-values, confidence
    # intervals etc.
示例#6
0
import pandas as pd
plt.close("all")  # Close all previous Windows
import indicators_lib as intl

load = 0
plotting_zones = 0
spot_hired_analysis = 0

if (load):
    file1 = "Michael Schat-Holm - RainMaking output (Final).csv"
    file2 = "DFDS order data.csv"
    df = pd.read_csv("./RainMaking/" + file1,
                     sep=";",
                     index_col=0,
                     dtype={"TrackingTime": dt.datetime})
    df.index = ul.str_to_datetime(df.index.tolist())
    keys = df.keys()
    df = df.dropna(how='any', thresh=None, subset=keys, inplace=False)

    #    df.reset_index(inplace=True)
    Nsamples, Ndim = df.shape

    # Now we separate the datafrate into different ones, one per Administration
    df_list = []
    orgcol = "OrganizationName"  # "OrganizationCode"
    DifferenOC = np.unique(df[orgcol])

    droplist = [3, 9, 11, 13]
    for dropi in droplist:
        df = df[df[orgcol] != DifferenOC[dropi]]
    DifferenOC = np.unique(df[orgcol])