def load_csv_timeData(self, file_dir="./storage/"): # The file must have the name: # symbolName_TimeScale.csv # TODO put periods in loading whole_path = file_dir + ul.period_dic[ self.period] + "/" + self.symbol + "_" + ul.period_dic[ self.period] + ".csv" try: dataCSV = pd.read_csv(whole_path, sep=',', index_col=0, dtype={"Date": dt.datetime}) dataCSV.index = ul.str_to_datetime(dataCSV.index.tolist()) except IOError: error_msg = "File does not exist: " + whole_path print error_msg dataCSV = ul.empty_df except: print "Unexpected error in file: " + whole_path # We transform the index to the real ones return dataCSV
def download_TD_yahoo(symbol="AAPL", precision="m", start_date=dt.datetime(2016, 1, 1), end_date=dt.datetime(2017, 1, 1)): # data1 = dt.datetime.fromtimestamp(1284101485) sdate = start_date edate = end_date # sdate_ts = int(get_timeStamp(sdate)) # edae_ts = int(get_timeStamp(edate)) # url_root = "https://finance.yahoo.com/quote/" # url_root += symbol # url_root += "/history?" # url_root += "period1=" + str(sdate_ts) # url_root += "&period2=" + str(edate_ts) # url_root += "&interval=" + precision # url_root += "&filter=history&frequency=" + precision url_root = "http://chart.finance.yahoo.com/table.csv?" url_root += "s=" + symbol url_root += "&a=" + str(sdate.day) + "&b=" + str( sdate.month) + "&c=" + str(sdate.year) url_root += "&d=" + str(edate.day) + "&e=" + str( edate.month) + "&f=" + str(edate.year) url_root += "&g=" + precision url_root += "&ignore=.csv" # print url_root response = urlopen(url_root) data = response.read().split('\n') nlines = len(data) for i in range(nlines): data[i] = data[i].split(",") # print data[0:4] df = pd.DataFrame(data) df.columns = df.ix[ 0] #['Date','Open', 'High', 'Low', 'Close', 'Volume', "Adj Close"] # print df.columns ### REMOVE FIRST ROW (Headers) df.drop(0, inplace=True) ### REMOVE LAST ROW (Nones) # print len(df) - 1 # print df.ix[len(df) - 1] df.drop(df.index.values[len(df) - 1], inplace=True) ### CONEVERT DATES TO TIMESTAMPS (Nones) # print df.Date df.index = ul.str_to_datetime(df.Date) del df['Date'] ## # We have to return df
def download_TD_yahoo(symbol = "AAPL", precision = "m", start_date = dt.datetime(2016,1,1), end_date = dt.datetime(2017,1,1)): # data1 = dt.datetime.fromtimestamp(1284101485) sdate = start_date edate = end_date # sdate_ts = int(get_timeStamp(sdate)) # edae_ts = int(get_timeStamp(edate)) # url_root = "https://finance.yahoo.com/quote/" # url_root += symbol # url_root += "/history?" # url_root += "period1=" + str(sdate_ts) # url_root += "&period2=" + str(edate_ts) # url_root += "&interval=" + precision # url_root += "&filter=history&frequency=" + precision url_root = "http://chart.finance.yahoo.com/table.csv?" url_root += "s=" + symbol url_root += "&a=" +str(sdate.day)+ "&b=" +str(sdate.month)+ "&c=" +str(sdate.year) url_root += "&d=" +str(edate.day)+ "&e=" +str(edate.month)+"&f="+str(edate.year) url_root += "&g=" + precision url_root += "&ignore=.csv" # print url_root response = urlopen(url_root) data = response.read().split('\n') nlines = len(data) for i in range(nlines): data[i] = data[i].split(",") # print data[0:4] df = pd.DataFrame(data) df.columns = df.ix[0] #['Date','Open', 'High', 'Low', 'Close', 'Volume', "Adj Close"] # print df.columns ### REMOVE FIRST ROW (Headers) df.drop(0, inplace = True) ### REMOVE LAST ROW (Nones) # print len(df) - 1 # print df.ix[len(df) - 1] df.drop(df.index.values[len(df) - 1], inplace = True) ### CONEVERT DATES TO TIMESTAMPS (Nones) # print df.Date df.index = ul.str_to_datetime(df.Date) del df['Date'] ## # We have to return df
def load_csv_timeData(symbol, file_dir = "./storage/"): whole_path = file_dir + symbol + ".csv" try: dataCSV = pd.read_csv(whole_path, sep = ',', index_col = 0, dtype = {"Date":dt.datetime}) dataCSV.index = ul.str_to_datetime (dataCSV.index.tolist()) except IOError: error_msg = "File does not exist: " + whole_path print error_msg except: print "Unexpected error in file: " + whole_path # We transform the index to the real ones return dataCSV
plt.close("all") ################# FLAGS to activate ######################## model_OLS = 1 # Use linear model from the OLS library model_sklearn = 1 # Use linear model from the sklearn library lag_analysis = 1 ################# READ THE DATA FROM DISK ######################## # This way we do not have to write everyhing again. # We create the data in the previous file up to some point and # we read it with this one. ######## PANDAS FORMAT folder_dataFeatures = "./data/" data = pd.read_csv(folder_dataFeatures + "dataPandas.csv", sep = ',', index_col = 0, dtype = {"Date":dt.datetime}) data.index = ul.str_to_datetime (data.index.tolist()) ######## NUMPY ARRAYS FORMAT X_data = np.loadtxt(folder_dataFeatures + "Xdata.csv", delimiter=",") price = np.loadtxt(folder_dataFeatures + "price.csv", delimiter=",") price = price.reshape(Y_data.size,1) # TODO: Just to put it in the sahpe as it was before writing it to disk dates = np.loadtxt(folder_dataFeatures + "dates.csv", delimiter=",") ## Generate the Y variable to estimate lag = 20 Y_data = bMA.get_return(price, lag = lag) Y_data = bMA.shift(Y_data, lag = -lag, cval = np.NaN) if (model_OLS): # Makes use of the pandas structures ############################################################################## # Multilinear regression model, calculating fit, P-values, confidence # intervals etc.
import pandas as pd plt.close("all") # Close all previous Windows import indicators_lib as intl load = 0 plotting_zones = 0 spot_hired_analysis = 0 if (load): file1 = "Michael Schat-Holm - RainMaking output (Final).csv" file2 = "DFDS order data.csv" df = pd.read_csv("./RainMaking/" + file1, sep=";", index_col=0, dtype={"TrackingTime": dt.datetime}) df.index = ul.str_to_datetime(df.index.tolist()) keys = df.keys() df = df.dropna(how='any', thresh=None, subset=keys, inplace=False) # df.reset_index(inplace=True) Nsamples, Ndim = df.shape # Now we separate the datafrate into different ones, one per Administration df_list = [] orgcol = "OrganizationName" # "OrganizationCode" DifferenOC = np.unique(df[orgcol]) droplist = [3, 9, 11, 13] for dropi in droplist: df = df[df[orgcol] != DifferenOC[dropi]] DifferenOC = np.unique(df[orgcol])