def __init__(self, ticker, over=7): super(Stock, self).__init__() self.ticker = ticker self.over = over self.rescale = MaxRescale( ['Adjusted Close', 'Volume', 'Open', 'Low', 'High', 'SP Close']) self.bollinger10 = BollingerBand(window=10) self.bollinger5 = BollingerBand(window=5)
class Stock(object): """docstring for Stock""" def __init__(self, ticker, over=7): super(Stock, self).__init__() self.ticker = ticker self.over = over self.rescale = MaxRescale( ['Adjusted Close', 'Volume', 'Open', 'Low', 'High', 'SP Close']) self.bollinger10 = BollingerBand(window=10) self.bollinger5 = BollingerBand(window=5) def get_data(self, start_date, end_date, fit=False, cache=True): d_file_name = "d{}_{}_{}.csv".format(start_date, end_date, self.ticker) l_file_name = "l{}_{}_{}.csv".format(start_date, end_date, self.ticker) if cache and os.path.exists(d_file_name) and os.path.exists(l_file_name): data = pd.read_csv(d_file_name, index_col=0, parse_dates='Date') label = pd.read_csv( l_file_name, index_col=0, parse_dates=True, squeeze=True, header=None) else: data = QuandlAPI.get_data(self.ticker, start_date, end_date, 160, 15) label = Return(self.over).transform(data) ####################### # Augment with index # ####################### sp500 = QuandlAPI.get_data( "INDEX_GSPC", start_date, end_date, 75, 15) data = data.join(sp500['Adj Close'], how='left') data.rename(columns={'Adj Close': 'SP Close'}, inplace=True) # Deal with outliers desc = data.describe() iqr = desc.ix['75%'] - desc.ix['25%'] data = data[data <= desc.ix['50%'] + 1.5*iqr] data = data[data >= desc.ix['50%'] - 1.5*iqr] # Cache the raw data if cache: data.to_csv(d_file_name, index=True) label.to_csv(l_file_name, index=True) ########## # Adjust # ########## # TODO There may be a problem when subsequent call are made # The adjusted is always according to the last date # So two subsequent call with two different period will be adjusted # differently. Not sure if it matters or not ratio = data['Adjusted Close'] / data['Close'] data['High'] = data['High']*ratio data['Low'] = data['Low']*ratio data['Open'] = data['Open']*ratio ################## # Rescale by max # ################## if fit: self.rescale.fit(data, start_date, end_date) data = self.rescale.transform(data) ###################### # Average True Range # ###################### # pdb.set_trace() data['ATR'] = ATR(data, 14) ################## # Bollinger band # ################## data = self.bollinger10(data) # data = self.bollinger5(data) ################# # Rolling stats # ################# data = high(22, data) # Chandelier data = low(22, data) # Chandelier # Ichimoku Clouds # http://stockcharts.com/school/doku.php?id=chart_school:technical_indicators:ichimoku_cloud data['tenkansen9'] = sen(data, 9) data['kijunsen26'] = sen(data, 26) data['senkouA'] = (data['tenkansen9'] + data['kijunsen26']) / 2. # Not sure if redondant. Affine transformation of two other features data['senkouB52'] = sen(data, 52) data['chikou26'] = data['Adjusted Close'].shift(26) # Moving Average # http://stockcharts.com/school/doku.php?id=chart_school:technical_indicators:moving_averages data['ema10'] = ema(data, 10) data['ema10-100'] = ema(data, 10) / ema(data, 100) data['ema25-100'] = ema(data, 25) / ema(data, 100) data['ema50-100'] = ema(data, 50) / ema(data, 100) data['kama'] = kama(data) data['ret_over2'] = return_over(data, 2) # RSI # http://stockcharts.com/school/doku.php?id=chart_school:technical_indicators:relative_strength_index_rsi data['rsi'] = rsi(data) # Price Volume Oscillator # http://stockcharts.com/school/doku.php?id=chart_school:technical_indicators:percentage_volume_oscillator_pvo ema_vol10 = ema(data, 10, 'Volume') ema_vol26 = ema(data, 26, 'Volume') data['pvo'] = (ema_vol10-ema_vol26)/ema_vol10 # This is to make sure that both index are aligned. # If the start_date and end_date don't play well with the features # I'm making sure that it does not screw up the rest data.dropna(inplace=True) label.dropna(inplace=True) data, label = data[start_date:end_date], label[start_date:end_date] data_index, label_index = data.index, label.index max_start = max(data_index[0], label_index[0]) min_end = min(data_index[-1], label_index[-1]) data, label = data[max_start: min_end], label[max_start: min_end] del data['Close'] # del data['Volume'] # pdb.set_trace() return data, label