def get_timeSeriesReturn(self, transform="log"): # Gets the Return of the Time Series, if it has not been created yet, then it creates it # if (self.timeSeries == []): # Check existence of timeSeries transform = "pene" # We will try as well to get the return of the first datapoint # if we actually have it in the database. For this, we check our mask. # If the first "1" found is not at 0, we can do this self.get_timeSeries(transform="tus muertos") pos1 = (self.time_mask).tolist().index(1) if (pos1 > 0): # If we actually have more signal. ps = self.TD[self.seriesNames].iloc[pos1 - 1] ps = np.array(ps).T ps = ps.reshape(ps.size / len(self.seriesNames), len(self.seriesNames)) # print ps # print self.timeSeries.shape # print ps.shape self.timeSeriesReturn = bMl.get_return( np.concatenate((ps, self.timeSeries), axis=0)) self.timeSeriesReturn = self.timeSeriesReturn[1:, :] else: self.timeSeriesReturn = bMl.get_return(self.timeSeries) if (transform == "log"): ## We perform log of this shit + 1 to get the log returns self.timeSeriesReturn = np.log(self.timeSeriesReturn + 1) return copy.deepcopy(self.timeSeriesReturn)
def Chaikin_vol(df, n=14): HLRange = df['High'] - df['Low'] EMA = HLRange.ewm(span=n, min_periods=n).mean() Chikinin_volat = bMA.get_return(ul.fnp(EMA), lag=n, cval=np.NaN) Chikinin_volat = ul.fnp(Chikinin_volat) return [ul.fnp(EMA), Chikinin_volat]
def Chaikin_vol(df, n = 14): HLRange = df['High'] - df['Low'] EMA = HLRange.ewm( span = n, min_periods = n).mean() Chikinin_volat = bMA.get_return(ul.fnp(EMA), lag = n, cval = np.NaN) Chikinin_volat = ul.fnp(Chikinin_volat) return [ul.fnp(EMA), Chikinin_volat]
def get_timeSeriesReturn(self, seriesNames=[], indexes=[], transform="no"): # Gets the Return of the Time Series, if it has not been created yet, then it creates it # if (self.timeSeries == []): # Check existence of timeSeries # We will try as well to get the return of the first datapoint # if we actually have it in the database. For this, we check our mask. # If the first "1" found is not at 0, we can do this self.set_inner_timeSeries(seriesNames, indexes) timeSeries = self.get_timeSeries(seriesNames, indexes, transform="tus muertos") # Position of the first sample we are using # pos1 = self.time_mask[0] # TODO. Make it work for series Names not in the dataset. # if (pos1 > 0 and self.period >= 1440): # If we actually have more signal. if (0): # We could compute the real previous return by concatenating the previous # sample, computing the return and then removinf the first 0 # For now it only works if the time series is one of the originals, not the # transformations, because then we have to get the transformation and we dont want to # ps = self.TD[self.seriesNames].iloc[pos1-1] # ps = np.array(ps).T # ps = ps.reshape(ps.size/len(self.seriesNames), len(self.seriesNames)) ## We obtain the returns of the signal adding the previous. timeSeriesPlus = self.get_timeSeries( indexes=np.insert(indexes, 0, pos1 - 1)) # print timeSeriesPlus.shape self.timeSeriesReturn = bMl.get_return(timeSeriesPlus) self.timeSeriesReturn = self.timeSeriesReturn[1:, :] else: self.timeSeriesReturn = bMl.get_return(timeSeries) if (transform == "log"): ## We perform log of this shit + 1 to get the log returns self.timeSeriesReturn = np.log(self.timeSeriesReturn + 1) return copy.deepcopy(self.timeSeriesReturn)
RSI_vel = indl.get_SMA(ATR_vel, L=nsmooth_vel) ########################################################### ################# PREPARE THE DATA ######################## ########################################################### X_data = np.concatenate((MACD[:, [indx]], MACD_vel[:, [indx]]), axis=1) X_data = np.concatenate((X_data, RSI[:, [indx]], ATR[:, [indx]]), axis=1) X_data = np.concatenate((X_data, RSI_vel[:, [indx]], ATR_vel[:, [indx]]), axis=1) Y_data = bMA.diff(prices[:, indx], lag=lag, cval=np.NaN) Y_data = bMA.shift(Y_data, lag=-lag, cval=np.NaN) ### Returns lag_ret = 20 return_Ydata = bMA.get_return(prices[:, [indx]], lag=lag_ret) reconstruct_Ydata = bMA.reconstruc_return(prices[:, [indx]], return_Ydata, lag=lag_ret) gl.plot([], prices[:, [indx]], legend=["price"]) gl.plot([], reconstruct_Ydata, nf=0, legend=["reconstruction"]) gl.plot([], return_Ydata, nf=0, na=1, legend=["return"]) Y_data = return_Ydata Y_data = bMA.shift(Y_data, lag=-lag, cval=np.NaN) def filter_by_risk(): # This funciton will filter the samples used in the analysis #
# We create the data in the previous file up to some point and # we read it with this one. ######## PANDAS FORMAT folder_dataFeatures = "./data/" data = pd.read_csv(folder_dataFeatures + "dataPandas.csv", sep = ',', index_col = 0, dtype = {"Date":dt.datetime}) data.index = ul.str_to_datetime (data.index.tolist()) ######## NUMPY ARRAYS FORMAT X_data = np.loadtxt(folder_dataFeatures + "Xdata.csv", delimiter=",") price = np.loadtxt(folder_dataFeatures + "price.csv", delimiter=",") price = price.reshape(Y_data.size,1) # TODO: Just to put it in the sahpe as it was before writing it to disk dates = np.loadtxt(folder_dataFeatures + "dates.csv", delimiter=",") ## Generate the Y variable to estimate lag = 20 Y_data = bMA.get_return(price, lag = lag) Y_data = bMA.shift(Y_data, lag = -lag, cval = np.NaN) if (model_OLS): # Makes use of the pandas structures ############################################################################## # Multilinear regression model, calculating fit, P-values, confidence # intervals etc. # Fit the model model = ols("Y ~ MACD + RSI + ATR + MACD_vel + ATR_vel + RSI_vel", data).fit() params = model._results.params # Print the summary print(model.summary()) print("OLS model Parameters") print(params) # Peform analysis of variance on fitted linear model
RSI_vel = indl.get_SMA(ATR_vel, L = nsmooth_vel) ########################################################### ################# PREPARE THE DATA ######################## ########################################################### X_data = np.concatenate((MACD[:,[indx]],MACD_vel[:,[indx]]), axis = 1) X_data = np.concatenate((X_data,RSI[:,[indx]], ATR[:,[indx]]), axis = 1) X_data = np.concatenate((X_data,RSI_vel[:,[indx]], ATR_vel[:,[indx]]), axis = 1) Y_data = bMA.diff(prices[:,indx], lag = lag, cval = np.NaN) Y_data = bMA.shift(Y_data, lag = -lag, cval = np.NaN) ### Returns lag_ret = 20 return_Ydata = bMA.get_return(prices[:,[indx]], lag = lag_ret) reconstruct_Ydata = bMA.reconstruc_return(prices[:,[indx]], return_Ydata, lag = lag_ret) gl.plot([],prices[:,[indx]], legend= ["price"]) gl.plot([],reconstruct_Ydata, nf = 0, legend= ["reconstruction"]) gl.plot([],return_Ydata, nf = 0, na = 1, legend= ["return"]) Y_data = return_Ydata Y_data = bMA.shift(Y_data, lag = -lag, cval = np.NaN) def filter_by_risk(): # This funciton will filter the samples used in the analysis # pass # We also should analyse abs(ret) pare detectar que cuando hay mucho riesgo
folder_dataFeatures = "./data/" data = pd.read_csv(folder_dataFeatures + "dataPandas.csv", sep=',', index_col=0, dtype={"Date": dt.datetime}) data.index = ul.str_to_datetime(data.index.tolist()) ######## NUMPY ARRAYS FORMAT X_data = np.loadtxt(folder_dataFeatures + "Xdata.csv", delimiter=",") price = np.loadtxt(folder_dataFeatures + "price.csv", delimiter=",") price = price.reshape( Y_data.size, 1) # TODO: Just to put it in the sahpe as it was before writing it to disk dates = np.loadtxt(folder_dataFeatures + "dates.csv", delimiter=",") ## Generate the Y variable to estimate lag = 20 Y_data = bMA.get_return(price, lag=lag) Y_data = bMA.shift(Y_data, lag=-lag, cval=np.NaN) if (model_OLS): # Makes use of the pandas structures ############################################################################## # Multilinear regression model, calculating fit, P-values, confidence # intervals etc. # Fit the model model = ols("Y ~ MACD + RSI + ATR + MACD_vel + ATR_vel + RSI_vel", data).fit() params = model._results.params # Print the summary print(model.summary()) print("OLS model Parameters") print(params)