def findEvents(symbols, startday,endday, marketSymbol): # Reading the Data for the list of Symbols. timeofday=dt.timedelta(hours=16) timestamps = du.getNYSEdays(startday,endday,timeofday) dataobj = da.DataAccess('Yahoo') # Reading the Data close = dataobj.get_data(timestamps, symbols, closefield) np_eventmat = copy.deepcopy(close) for sym in symbols: for time in timestamps: np_eventmat[sym][time]=np.NAN f = open('order.csv','w') totaldays = len(timestamps) for symbol in symbols: for i in range(1,totaldays): if close[symbol][i-1] >= 6. and close[symbol][i] < 6. : #print timestamps[i].year,',',timestamps[i].month,',',timestamps[i].day,',Buy,',symbol,',100' soutput = str(timestamps[i].year)+','+str(timestamps[i].month)+','+str(timestamps[i].day)+','+symbol+',Buy,100\n' f.write(soutput) j = i+5 if j >= totaldays: j = totaldays-1 soutput = str(timestamps[j].year)+','+str(timestamps[j].month)+','+str(timestamps[j].day)+','+symbol+',Sell,100\n' f.write(soutput) f.close()
def marketsim(cash, orders_file, data_item): # Read orders orders = defaultdict(list) symbols = set([]) for year, month, day, sym, action, num in csv.reader(open(orders_file, "rU")): orders[date(int(year), int(month), int(day))].append((sym, action, int(num))) symbols.add(sym) days = orders.keys() days.sort() day, end = days[0], days[-1] # Reading the Data for the list of Symbols. timestamps = getNYSEdays(datetime(day.year,day.month,day.day), datetime(end.year,end.month,end.day+1), timedelta(hours=16)) dataobj = DataAccess('Yahoo') close = dataobj.get_data(timestamps, symbols, data_item) values = [] portfolio = Portfolio(cash) for i, t in enumerate(timestamps): for sym, action, num in orders[date(t.year, t.month, t.day)]: if action == 'Sell': num *= -1 portfolio.update(sym, num, close[sym][i]) entry = (t.year, t.month, t.day, portfolio.value(close, i)) values.append(entry) return values
def load_from_csv(self, tickers, index, fields=Fields.QUOTES, **kwargs): ''' Return a quote panel ''' #TODO Replace adj_close with actual_close #TODO Add reindex methods, and start, end, delta parameters reverse = kwargs.get('reverse', False) verbose = kwargs.get('verbose', False) if self.connected['database']: symbols, markets = self.db.getTickersCodes(tickers) elif not symbols: self._logger.error('** No database neither informations provided') return None timestamps = du.getNYSEdays(index[0], index[-1], dt.timedelta(hours=16)) csv = da.DataAccess('Yahoo') df = csv.get_data(timestamps, symbols.values(), fields, verbose=verbose) quotes_dict = dict() for ticker in tickers: j = 0 quotes_dict[ticker] = dict() for field in fields: serie = df[j][symbols[ticker]].groupby( index.freq.rollforward).aggregate(np.mean) #TODO add a function parameter to decide what to do about it clean_serie = serie.fillna(method='pad') quotes_dict[ticker][field] = clean_serie j += 1 if reverse: return Panel.from_dict(quotes_dict, intersect=True, orient='minor') return Panel.from_dict(quotes_dict, intersect=True)
def _generate_data(self): year = 2009 startday = dt.datetime(year-1, 12, 1) endday = dt.datetime(year+1, 1, 31) l_symbols = ['$SPX'] #Get desired timestamps timeofday = dt.timedelta(hours = 16) ldt_timestamps = du.getNYSEdays(startday, endday, timeofday) dataobj = da.DataAccess('Norgate') self.df_close = dataobj.get_data( \ ldt_timestamps, l_symbols, "close", verbose=True) self.df_alloc = pand.DataFrame( \ index=[dt.datetime(year, 1, 1)], \ data=[1], columns=l_symbols) for i in range(11): self.df_alloc = self.df_alloc.append( \ pand.DataFrame(index=[dt.datetime(year, i+2, 1)], \ data=[1], columns=l_symbols)) self.df_alloc['_CASH'] = 0.0 #Based on hand calculation using the transaction costs and slippage. self.i_open_result = 1.15921341122
def findEvents(symbols, startday, endday, marketSymbol, verbose=False): timeofday = dt.timedelta(hours=16) timestamps = du.getNYSEdays(startday, endday, timeofday) if verbose: print __name__ + " reading data" close = dataobj.get_data(timestamps, symbols, closefield) close = (close.fillna(method="ffill")).fillna(method="backfill") np_eventmat = copy.deepcopy(close) for sym in symbols: for time in timestamps: np_eventmat[sym][time] = np.NAN if verbose: print __name__ + " finding events" price = 7.0 for symbol in symbols: for i in range(1, len(close[symbol])): if close[symbol][i - 1] >= price and close[symbol][i] < price: np_eventmat[symbol][i] = 1.0 return np_eventmat
def alloc_backtest(alloc, start): """ @summary: Back tests an allocation from a pickle file. Uses a starting portfolio value of start. @param alloc: Name of allocation pickle file. Pickle file contains a DataMatrix with timestamps as indexes and stock symbols as columns, with the last column being the _CASH symbol, indicating how much of the allocation is in cash. @param start: integer specifying the starting value of the portfolio @return funds: List of fund values indicating the value of the portfolio throughout the back test. @rtype timeSeries """ #read in alloc table from command line arguements alloc_input_file = open(alloc, "r") alloc = pickle.load(alloc_input_file) # Get the data from the data store dataobj = da.DataAccess('Norgate') startday = alloc.index[0] - dt.timedelta(days=10) endday = alloc.index[-1] # Get desired timestamps timeofday = dt.timedelta(hours=16) timestamps = du.getNYSEdays(startday, endday, timeofday) historic = dataobj.get_data(timestamps, list(alloc.columns[0:-1]), "close") #backtestx [fund, leverage, commissions, slippage] = qs.tradesim(alloc, historic, int(start), 1, True, 0.02, 5, 0.02) return [fund, leverage, commissions, slippage]
def print_industry_coer(fund_ts, ostream): """ @summary prints standard deviation of returns for a fund @param fund_ts: pandas fund time series @param years: list of years to print out @param ostream: stream to print to """ industries = [['$DJUSBM', 'Materials'], ['$DJUSNC', 'Goods'], ['$DJUSCY', 'Services'], ['$DJUSFN', 'Financials'], ['$DJUSHC', 'Health'], ['$DJUSIN', 'Industrial'], ['$DJUSEN', 'Oil & Gas'], ['$DJUSTC', 'Technology'], ['$DJUSTL', 'TeleComm'], ['$DJUSUT', 'Utilities']] for i in range(0, len(industries)): if (i % 2 == 0): ostream.write("\n") #load data norObj = de.DataAccess('mysql') ldtTimestamps = du.getNYSEdays(fund_ts.index[0], fund_ts.index[-1], dt.timedelta(hours=16)) ldfData = norObj.get_data(ldtTimestamps, [industries[i][0]], ['close']) #get corelation ldfData[0] = ldfData[0].fillna(method='pad') ldfData[0] = ldfData[0].fillna(method='bfill') a = np.corrcoef(np.ravel(tsu.daily(ldfData[0][industries[i][0]])), np.ravel(tsu.daily(fund_ts.values))) b = np.ravel(tsu.daily(ldfData[0][industries[i][0]])) f = np.ravel(tsu.daily(fund_ts)) fBeta, unused = np.polyfit(b, f, 1) ostream.write("%10s(%s):%+6.2f, %+6.2f " % (industries[i][1], industries[i][0], a[0, 1], fBeta))
def findEvents(symbols, startday,endday, marketSymbol,verbose=False): # Reading the Data for the list of Symbols. timeofday=dt.timedelta(hours=16) timestamps = du.getNYSEdays(startday,endday,timeofday) dataobj = da.DataAccess(storename) if verbose: print __name__ + " reading data" # Reading the Data close = dataobj.get_data(timestamps, symbols, closefield) # Completing the Data - Removing the NaN values from the Matrix #close = (close.fillna(method='ffill')).fillna(method='backfill') # Calculating the Returns of the Stock Relative to the Market # So if a Stock went up 5% and the Market rose 3%, the return relative to market is 2% np_eventmat = copy.deepcopy(close) for sym in symbols: for time in timestamps: np_eventmat[sym][time]=np.NAN if verbose: print __name__ + " finding events" # Generating the Event Matrix # Event described is : when the actual close of the stock price drops below $5.00 for symbol in symbols: for i in range(2,len(close[symbol])): if close[symbol][i-1] >=7.0 and close[symbol][i] < 7.0 : np_eventmat[symbol][i] = 1.0 #overwriting by the bit, marking the event return np_eventmat
def speedTest(lfcFeature, ldArgs): ''' @Author: Tingyu Zhu @summary: Function to test the runtime for a list of features, and output them by speed @param lfcFeature: a list of features that will be sorted by runtime @param dArgs: Arguments to pass into feature function @return: A list of sorted tuples of format (time, function name/param string) ''' '''pulling out 2 years data to run test''' daData = de.DataAccess('mysql') dtStart = dt.datetime(2010, 1, 1) dtEnd = dt.datetime(2011, 12, 31) dtTimeofday = dt.timedelta(hours=16) lsSym = ['AAPL', 'GOOG', 'XOM', 'AMZN', 'BA', 'GILD', '$SPX'] #print lsSym '''set up variables for applyFeatures''' lsKeys = ['open', 'high', 'low', 'close', 'volume', 'actual_close'] ldtTimestamps = du.getNYSEdays(dtStart, dtEnd, dtTimeofday) ldfData = daData.get_data(ldtTimestamps, lsSym, lsKeys) dData = dict(zip(lsKeys, ldfData)) '''loop through features''' ltResults = [] for i in range(len(lfcFeature)): dtFuncStart = dt.datetime.now() ldfFeatures = applyFeatures(dData, [lfcFeature[i]], [ldArgs[i]], sMarketRel='$SPX') ltResults.append((dt.datetime.now() - dtFuncStart, lfcFeature[i].__name__ + ' : ' + str(ldArgs[i]))) ltResults.sort() '''print out result''' for tResult in ltResults: print tResult[1], ':', tResult[0] return ltResults
def print_other_coer(fund_ts, ostream): """ @summary prints standard deviation of returns for a fund @param fund_ts: pandas fund time series @param years: list of years to print out @param ostream: stream to print to """ industries = [['$SPX', ' S&P Index'], ['$DJI', ' Dow Jones'], ['$DJUSEN', 'Oil & Gas'], ['$DJGSP', ' Metals']] for i in range(0, len(industries)): if (i % 2 == 0): ostream.write("\n") #load data norObj = de.DataAccess('mysql') ldtTimestamps = du.getNYSEdays(fund_ts.index[0], fund_ts.index[-1], dt.timedelta(hours=16)) ldfData = norObj.get_data(ldtTimestamps, [industries[i][0]], ['close']) #get corelation ldfData[0] = ldfData[0].fillna(method='pad') ldfData[0] = ldfData[0].fillna(method='bfill') a = np.corrcoef(np.ravel(tsu.daily(ldfData[0][industries[i][0]])), np.ravel(tsu.daily(fund_ts.values))) b = np.ravel(tsu.daily(ldfData[0][industries[i][0]])) f = np.ravel(tsu.daily(fund_ts)) fBeta, unused = np.polyfit(b, f, 1) ostream.write("%10s(%s):%+6.2f, %+6.2f " % (industries[i][1], industries[i][0], a[0, 1], fBeta))
def load_from_csv(self, tickers, index, fields=Fields.QUOTES, **kwargs): ''' Return a quote panel ''' #TODO Replace adj_close with actual_close #TODO Add reindex methods, and start, end, delta parameters reverse = kwargs.get('reverse', False) verbose = kwargs.get('verbose', False) if self.connected['database']: symbols, markets = self.db.getTickersCodes(tickers) elif not symbols: self._logger.error('** No database neither informations provided') return None timestamps = du.getNYSEdays(index[0], index[-1], dt.timedelta(hours=16)) csv = da.DataAccess('Yahoo') df = csv.get_data(timestamps, symbols.values(), fields, verbose=verbose) quotes_dict = dict() for ticker in tickers: j = 0 quotes_dict[ticker] = dict() for field in fields: serie = df[j][symbols[ticker]].groupby(index.freq.rollforward).aggregate(np.mean) #TODO add a function parameter to decide what to do about it clean_serie = serie.fillna(method='pad') quotes_dict[ticker][field] = clean_serie j += 1 if reverse: return Panel.from_dict(quotes_dict, intersect=True, orient='minor') return Panel.from_dict(quotes_dict, intersect=True)
def main(): print "Creating Stock data from Sine Waves" dt_start = dt.datetime(2000, 1, 1) dt_end = dt.datetime(2012, 10, 31) ldt_timestamps = du.getNYSEdays(dt_start, dt_end, dt.timedelta(hours=16)) x = np.array(range(len(ldt_timestamps))) ls_symbols = ['SINE_FAST', 'SINE_SLOW', 'SINE_FAST_NOISE', 'SINE_SLOW_NOISE'] sine_fast = 10*np.sin(x/10.) + 100 sine_slow = 10*np.sin(x/30.) + 100 sine_fast_noise = 10*(np.sin(x/10.) + np.random.randn(x.size)) + 100 sine_slow_noise = 10*(np.sin(x/30.) + np.random.randn(x.size)) + 100 d_data = dict(zip(ls_symbols, [sine_fast, sine_slow, sine_fast_noise, sine_slow_noise])) write(ls_symbols, d_data, ldt_timestamps) plt.clf() plt.plot(ldt_timestamps, sine_fast) plt.plot(ldt_timestamps, sine_slow) plt.plot(ldt_timestamps, sine_fast_noise) plt.plot(ldt_timestamps, sine_slow_noise) plt.ylim(50,150) plt.xticks(size='xx-small') plt.legend(ls_symbols, loc='best') plt.savefig('test.png',format='png')
def findEvents(symbols, startday,endday, marketSymbol,verbose=False): # Reading the Data for the list of Symbols. timeofday=dt.timedelta(hours=16) timestamps = du.getNYSEdays(startday,endday,timeofday) dataobj = da.DataAccess('Yahoo') if verbose: print __name__ + " reading data" # Reading the Data close = dataobj.get_data(timestamps, symbols, closefield) # Calculating the Returns of the Stock Relative to the Market # So if a Stock went up 5% and the Market rised 3%. The the return relative to market is 2% #mktneutDM = close - close[marketSymbol] np_eventmat = copy.deepcopy(close) for sym in symbols: for time in timestamps: np_eventmat[sym][time]=np.NAN if verbose: print __name__ + " finding events" # Generating the Event Matrix # Event described is : Market falls more than 3% plus the stock falls 5% more than the Market # Suppose : The market fell 3%, then the stock should fall more than 8% to mark the event. # And if the market falls 5%, then the stock should fall more than 10% to mark the event. for symbol in symbols: for i in range(1,len(close[symbol])): if close[symbol][i] < 25.0 and close[symbol][i-1] >= 30.0 : # When market fall is more than 3% and also the stock compared to market is also fell by more than 5%. np_eventmat[symbol][i] = 1.0 #overwriting by the bit, marking the event return np_eventmat
def strat_backtest2(strat, start, end, diff, dur, startval): """ @summary: Back tests a strategy defined in a python script that takes in a start and end date along with a starting value over a given period. @param strat: filename of python script strategy @param start: starting date in a datetime object @param end: ending date in a datetime object @param diff: offset in days of the tests @param dur: length of a test @param startval: starting value of fund during back tests @return fundsmatrix: Datamatrix of fund values returned from each test @rtype datamatrix """ fundsmatrix = [] startdates = du.getNYSEdays(start, end, dt.timedelta(hours=16)) for i in range(0, len(startdates), diff): if(i + dur >= len(startdates)): enddate = startdates[-1] else: enddate = startdates[i + dur] cmd = "python %s %s %s temp_alloc.pkl" % ( strat, startdates[i].strftime("%m-%d-%Y"), enddate.strftime("%m-%d-%Y") ) os.system(cmd) funds = alloc_backtest('temp_alloc.pkl', startval) fundsmatrix.append(funds) return fundsmatrix
def log500( sLog ): ''' @summary: Loads cached features. @param sLog: Filename of features. @return: Nothing, logs features to desired location ''' lsSym = ['A', 'AA', 'AAPL', 'ABC', 'ABT', 'ACE', 'ACN', 'ADBE', 'ADI', 'ADM', 'ADP', 'ADSK', 'AEE', 'AEP', 'AES', 'AET', 'AFL', 'AGN', 'AIG', 'AIV', 'AIZ', 'AKAM', 'AKS', 'ALL', 'ALTR', 'AMAT', 'AMD', 'AMGN', 'AMP', 'AMT', 'AMZN', 'AN', 'ANF', 'ANR', 'AON', 'APA', 'APC', 'APD', 'APH', 'APOL', 'ARG', 'ATI', 'AVB', 'AVP', 'AVY', 'AXP', 'AZO', 'BA', 'BAC', 'BAX', 'BBBY', 'BBT', 'BBY', 'BCR', 'BDX', 'BEN', 'BF.B', 'BHI', 'BIG', 'BIIB', 'BK', 'BLK', 'BLL', 'BMC', 'BMS', 'BMY', 'BRCM', 'BRK.B', 'BSX', 'BTU', 'BXP', 'C', 'CA', 'CAG', 'CAH', 'CAM', 'CAT', 'CB', 'CBG', 'CBS', 'CCE', 'CCL', 'CEG', 'CELG', 'CERN', 'CF', 'CFN', 'CHK', 'CHRW', 'CI', 'CINF', 'CL', 'CLF', 'CLX', 'CMA', 'CMCSA', 'CME', 'CMG', 'CMI', 'CMS', 'CNP', 'CNX', 'COF', 'COG', 'COH', 'COL', 'COP', 'COST', 'COV', 'CPB', 'CPWR', 'CRM', 'CSC', 'CSCO', 'CSX', 'CTAS', 'CTL', 'CTSH', 'CTXS', 'CVC', 'CVH', 'CVS', 'CVX', 'D', 'DD', 'DE', 'DELL', 'DF', 'DFS', 'DGX', 'DHI', 'DHR', 'DIS', 'DISCA', 'DNB', 'DNR', 'DO', 'DOV', 'DOW', 'DPS', 'DRI', 'DTE', 'DTV', 'DUK', 'DV', 'DVA', 'DVN', 'EBAY', 'ECL', 'ED', 'EFX', 'EIX', 'EL', 'EMC', 'EMN', 'EMR', 'EOG', 'EP', 'EQR', 'EQT', 'ERTS', 'ESRX', 'ETFC', 'ETN', 'ETR', 'EW', 'EXC', 'EXPD', 'EXPE', 'F', 'FAST', 'FCX', 'FDO', 'FDX', 'FE', 'FFIV', 'FHN', 'FII', 'FIS', 'FISV', 'FITB', 'FLIR', 'FLR', 'FLS', 'FMC', 'FO', 'FRX', 'FSLR', 'FTI', 'FTR', 'GAS', 'GCI', 'GD', 'GE', 'GILD', 'GIS', 'GLW', 'GME', 'GNW', 'GOOG', 'GPC', 'GPS', 'GR', 'GS', 'GT', 'GWW', 'HAL', 'HAR', 'HAS', 'HBAN', 'HCBK', 'HCN', 'HCP', 'HD', 'HES', 'HIG', 'HNZ', 'HOG', 'HON', 'HOT', 'HP', 'HPQ', 'HRB', 'HRL', 'HRS', 'HSP', 'HST', 'HSY', 'HUM', 'IBM', 'ICE', 'IFF', 'IGT', 'INTC', 'INTU', 'IP', 'IPG', 'IR', 'IRM', 'ISRG', 'ITT', 'ITW', 'IVZ', 'JBL', 'JCI', 'JCP', 'JDSU', 'JEC', 'JNJ', 'JNPR', 'JNS', 'JOYG', 'JPM', 'JWN', 'K', 'KEY', 'KFT', 'KIM', 'KLAC', 'KMB', 'KMX', 'KO', 'KR', 'KSS', 'L', 'LEG', 'LEN', 'LH', 'LIFE', 'LLL', 'LLTC', 'LLY', 'LM', 'LMT', 'LNC', 'LO', 'LOW', 'LSI', 'LTD', 'LUK', 'LUV', 'LXK', 'M', 'MA', 'MAR', 'MAS', 'MAT', 'MCD', 'MCHP', 'MCK', 'MCO', 'MDT', 'MET', 'MHP', 'MHS', 'MJN', 'MKC', 'MMC', 'MMI', 'MMM', 'MO', 'MOLX', 'MON', 'MOS', 'MPC', 'MRK', 'MRO', 'MS', 'MSFT', 'MSI', 'MTB', 'MU', 'MUR', 'MWV', 'MWW', 'MYL', 'NBL', 'NBR', 'NDAQ', 'NE', 'NEE', 'NEM', 'NFLX', 'NFX', 'NI', 'NKE', 'NOC', 'NOV', 'NRG', 'NSC', 'NTAP', 'NTRS', 'NU', 'NUE', 'NVDA', 'NVLS', 'NWL', 'NWSA', 'NYX', 'OI', 'OKE', 'OMC', 'ORCL', 'ORLY', 'OXY', 'PAYX', 'PBCT', 'PBI', 'PCAR', 'PCG', 'PCL', 'PCLN', 'PCP', 'PCS', 'PDCO', 'PEG', 'PEP', 'PFE', 'PFG', 'PG', 'PGN', 'PGR', 'PH', 'PHM', 'PKI', 'PLD', 'PLL', 'PM', 'PNC', 'PNW', 'POM', 'PPG', 'PPL', 'PRU', 'PSA', 'PWR', 'PX', 'PXD', 'QCOM', 'QEP', 'R', 'RAI', 'RDC', 'RF', 'RHI', 'RHT', 'RL', 'ROK', 'ROP', 'ROST', 'RRC', 'RRD', 'RSG', 'RTN', 'S', 'SAI', 'SBUX', 'SCG', 'SCHW', 'SE', 'SEE', 'SHLD', 'SHW', 'SIAL', 'SJM', 'SLB', 'SLE', 'SLM', 'SNA', 'SNDK', 'SNI', 'SO', 'SPG', 'SPLS', 'SRCL', 'SRE', 'STI', 'STJ', 'STT', 'STZ', 'SUN', 'SVU', 'SWK', 'SWN', 'SWY', 'SYK', 'SYMC', 'SYY', 'T', 'TAP', 'TDC', 'TE', 'TEG', 'TEL', 'TER', 'TGT', 'THC', 'TIE', 'TIF', 'TJX', 'TLAB', 'TMK', 'TMO', 'TROW', 'TRV', 'TSN', 'TSO', 'TSS', 'TWC', 'TWX', 'TXN', 'TXT', 'TYC', 'UNH', 'UNM', 'UNP', 'UPS', 'URBN', 'USB', 'UTX', 'V', 'VAR', 'VFC', 'VIA.B', 'VLO', 'VMC', 'VNO', 'VRSN', 'VTR', 'VZ', 'WAG', 'WAT', 'WDC', 'WEC', 'WFC', 'WFM', 'WFR', 'WHR', 'WIN', 'WLP', 'WM', 'WMB', 'WMT', 'WPI', 'WPO', 'WU', 'WY', 'WYN', 'WYNN', 'X', 'XEL', 'XL', 'XLNX', 'XOM', 'XRAY', 'XRX', 'YHOO', 'YUM', 'ZION', 'ZMH'] lsSym.append('$SPX') lsSym.sort() ''' Max lookback is 6 months ''' dtEnd = dt.datetime.now() dtEnd = dtEnd.replace(hour=16, minute=0, second=0, microsecond=0) dtStart = dtEnd - relativedelta(months=6) ''' Pull in current data ''' norObj = da.DataAccess('Norgate') ''' Get 2 extra months for moving averages and future returns ''' ldtTimestamps = du.getNYSEdays( dtStart - relativedelta(months=2), \ dtEnd + relativedelta(months=2), dt.timedelta(hours=16) ) dfPrice = norObj.get_data( ldtTimestamps, lsSym, 'close' ) dfVolume = norObj.get_data( ldtTimestamps, lsSym, 'volume' ) ''' Imported functions from qstkfeat.features, NOTE: last function is classification ''' lfcFeatures, ldArgs, lsNames = getFeatureFuncs() ''' Generate a list of DataFrames, one for each feature, with the same index/column structure as price data ''' applyFeatures( dfPrice, dfVolume, lfcFeatures, ldArgs, sLog=sLog )
def readdata(valuefile,closefield='close',stores='Yahoo'): funddata = nu.loadtxt(valuefile, delimiter=',', dtype='i4,i4,i4,f8') # values = readcsv(valuefile) datelist = [] fundvalue = [] for record in funddata: fundvalue.append(record[3]) date = dt.datetime(record[0],record[1],record[2]) datelist.append(date) # read in the $SPX data timeofday = dt.timedelta(hours=16) startdate = datelist[0] enddate = datelist[-1] + dt.timedelta(days=1) # fix the off-by-1 error #enddate = datelist[-1] timestamps = du.getNYSEdays(startdate,enddate, timeofday) # get the value for benchmark dataobj = da.DataAccess(stores) symbols = [bench_symbol] close = dataobj.get_data(timestamps,symbols,closefield) benchmark_price = [] benchmark_value = [] for time in timestamps: benchmark_price.append(close[bench_symbol][time]) bench_shares = fundvalue[0]/benchmark_price[0] for i in range(len(benchmark_price)): benchmark_value.append(bench_shares*benchmark_price[i]) return timestamps,fundvalue,benchmark_value
def alloc_backtest(alloc, start): """ @summary: Back tests an allocation from a pickle file. Uses a starting portfolio value of start. @param alloc: Name of allocation pickle file. Pickle file contains a DataMatrix with timestamps as indexes and stock symbols as columns, with the last column being the _CASH symbol, indicating how much of the allocation is in cash. @param start: integer specifying the starting value of the portfolio @return funds: List of fund values indicating the value of the portfolio throughout the back test. @rtype timeSeries """ #read in alloc table from command line arguements alloc_input_file = open(alloc, "r") alloc = cPickle.load(alloc_input_file) # Get the data from the data store dataobj = da.DataAccess('Norgate') startday = alloc.index[0] - dt.timedelta(days=10) endday = alloc.index[-1] # Get desired timestamps timeofday = dt.timedelta(hours=16) timestamps = du.getNYSEdays(startday, endday, timeofday) historic = dataobj.get_data(timestamps, list(alloc.columns[0:-1]), "close") #backtestx [fund, leverage, commissions, slippage] = qs.tradesim(alloc, historic, int(start), 1, True, 0.02, 5, 0.02) return [fund, leverage, commissions, slippage]
def print_other_coer(fund_ts, ostream): """ @summary prints standard deviation of returns for a fund @param fund_ts: pandas fund time series @param years: list of years to print out @param ostream: stream to print to """ industries = [['$SPX', ' S&P Index'], ['$DJI', ' Dow Jones'], ['$DJUSEN', 'Oil & Gas'], ['$DJGSP', ' Metals']] for i in range(0, len(industries) ): if(i%2==0): ostream.write("\n") #load data norObj =de.DataAccess('mysql') ldtTimestamps = du.getNYSEdays( fund_ts.index[0], fund_ts.index[-1], dt.timedelta(hours=16) ) ldfData = norObj.get_data( ldtTimestamps, [industries[i][0]], ['close'] ) #get corelation ldfData[0]=ldfData[0].fillna(method='pad') ldfData[0]=ldfData[0].fillna(method='bfill') a=np.corrcoef(np.ravel(tsu.daily(ldfData[0][industries[i][0]])),np.ravel(tsu.daily(fund_ts.values))) b=np.ravel(tsu.daily(ldfData[0][industries[i][0]])) f=np.ravel(tsu.daily(fund_ts)) fBeta, unused = np.polyfit(b,f,1) ostream.write("%10s(%s):%+6.2f, %+6.2f " % (industries[i][1], industries[i][0], a[0,1], fBeta))
def readdata(valuefile, closefield='close', stores='Yahoo'): funddata = nu.loadtxt(valuefile, delimiter=',', dtype='i4,i4,i4,f8') # values = readcsv(valuefile) datelist = [] fundvalue = [] for record in funddata: fundvalue.append(record[3]) date = dt.datetime(record[0], record[1], record[2]) datelist.append(date) # read in the $SPX data timeofday = dt.timedelta(hours=16) startdate = datelist[0] enddate = datelist[-1] + dt.timedelta(days=1) # fix the off-by-1 error #enddate = datelist[-1] timestamps = du.getNYSEdays(startdate, enddate, timeofday) # get the value for benchmark dataobj = da.DataAccess(stores) symbols = [bench_symbol] close = dataobj.get_data(timestamps, symbols, closefield) benchmark_price = [] benchmark_value = [] for time in timestamps: benchmark_price.append(close[bench_symbol][time]) bench_shares = fundvalue[0] / benchmark_price[0] for i in range(len(benchmark_price)): benchmark_value.append(bench_shares * benchmark_price[i]) return timestamps, fundvalue, benchmark_value
def findEvents(symbols, startday,endday, marketSymbol,verbose=False): # Reading the Data for the list of Symbols. timeofday=dt.timedelta(hours=16) timestamps = du.getNYSEdays(startday,endday,timeofday) dataobj = da.DataAccess(storename) if verbose: print __name__ + " reading data" # Reading the Data close = dataobj.get_data(timestamps, symbols, closefield) # Completing the Data - Removing the NaN values from the Matrix #close = (close.fillna(method='ffill')).fillna(method='backfill') if verbose: print __name__ + " finding events" # Generating the orders # Event described is : when the actual close of the stock price drops below $5.00 f = open('orders.csv', 'wt') writer = csv.writer(f) for symbol in symbols: for i in range(2,len(close[symbol])): if close[symbol][i-1] >=5.0 and close[symbol][i] < 5.0 : writer.writerow( (close.index[i].year, close.index[i].month, close.index[i].day, symbol, 'BUY', 100) ) j = i + 5 if (j > len(close[symbol])) : j = len(close[ysmbol]) writer.writerow( (close.index[j].year, close.index[j].month, close.index[j].day, symbol, 'SELL', 100) ) f.close()
def strat_backtest2(strat, start, end, diff, dur, startval): """ @summary: Back tests a strategy defined in a python script that takes in a start and end date along with a starting value over a given period. @param strat: filename of python script strategy @param start: starting date in a datetime object @param end: ending date in a datetime object @param diff: offset in days of the tests @param dur: length of a test @param startval: starting value of fund during back tests @return fundsmatrix: Datamatrix of fund values returned from each test @rtype datamatrix """ fundsmatrix = [] startdates = du.getNYSEdays(start, end, dt.timedelta(hours=16)) for i in range(0, len(startdates), diff): if (i + dur >= len(startdates)): enddate = startdates[-1] else: enddate = startdates[i + dur] os.system('python ' + strat + ' ' + startdates[i].strftime("%m-%d-%Y")\ + ' ' + enddate.strftime("%m-%d-%Y") + ' temp_alloc.pkl') funds = alloc_backtest('temp_alloc.pkl', startval) fundsmatrix.append(funds) return fundsmatrix
def _generate_data(self): year = 2009 startday = dt.datetime(year-1, 12, 1) endday = dt.datetime(year+1, 1, 31) l_symbols = ['$SPX'] #Get desired timestamps timeofday = dt.timedelta(hours = 16) ldt_timestamps = du.getNYSEdays(startday, endday, timeofday) dataobj = da.DataAccess('Norgate') self.df_close = dataobj.get_data( \ ldt_timestamps, l_symbols, "close", verbose=True) self.df_alloc = pand.DataFrame( \ index=[dt.datetime(year, 1, 1)], \ data=[-1], columns=l_symbols) for i in range(11): self.df_alloc = self.df_alloc.append( \ pand.DataFrame(index=[dt.datetime(year, i+2, 1)], \ data=[-1], columns=l_symbols)) self.df_alloc['_CASH'] = 0.0 #Based on hand calculation using the transaction costs and slippage. self.i_open_result = 0.7541428779600005
def findEvents(symbols,startday,endday,marketSymbol,verbose = False): timeofday = dt.timedelta(hours = 16) timestamps = du.getNYSEdays(startday,endday,timeofday) if verbose: print __name__ + " reading data" close = dataobj.get_data(timestamps,symbols,closefield) close = (close.fillna(method="ffill")).fillna(method="backfill") np_eventmat = copy.deepcopy(close) for sym in symbols: for time in timestamps: np_eventmat[sym][time] = np.NAN if verbose: print __name__ + " finding events" price = 7.0 for symbol in symbols: for i in range(1,len(close[symbol])): if close[symbol][i-1] >= price and close[symbol][i] < price: np_eventmat[symbol][i] = 1.0 return np_eventmat
def marketsim(cash, orders_file, data_item): # Read orders orders = defaultdict(list) symbols = set([]) for year, month, day, sym, action, num in csv.reader( open(orders_file, "rU")): orders[date(int(year), int(month), int(day))].append( (sym, action, int(num))) symbols.add(sym) days = orders.keys() days.sort() day, end = days[0], days[-1] # Reading the Data for the list of Symbols. timestamps = getNYSEdays(datetime(day.year, day.month, day.day), datetime(end.year, end.month, end.day + 1), timedelta(hours=16)) dataobj = DataAccess('Yahoo') close = dataobj.get_data(timestamps, symbols, data_item) values = [] portfolio = Portfolio(cash) for i, t in enumerate(timestamps): for sym, action, num in orders[date(t.year, t.month, t.day)]: if action == 'Sell': num *= -1 portfolio.update(sym, num, close[sym][i]) entry = (t.year, t.month, t.day, portfolio.value(close, i)) values.append(entry) return values
def log500(sLog): ''' @summary: Loads cached features. @param sLog: Filename of features. @return: Nothing, logs features to desired location ''' lsSym = ['A', 'AA', 'AAPL', 'ABC', 'ABT', 'ACE', 'ACN', 'ADBE', 'ADI', 'ADM', 'ADP', 'ADSK', 'AEE', 'AEP', 'AES', 'AET', 'AFL', 'AGN', 'AIG', 'AIV', 'AIZ', 'AKAM', 'AKS', 'ALL', 'ALTR', 'AMAT', 'AMD', 'AMGN', 'AMP', 'AMT', 'AMZN', 'AN', 'ANF', 'ANR', 'AON', 'APA', 'APC', 'APD', 'APH', 'APOL', 'ARG', 'ATI', 'AVB', 'AVP', 'AVY', 'AXP', 'AZO', 'BA', 'BAC', 'BAX', 'BBBY', 'BBT', 'BBY', 'BCR', 'BDX', 'BEN', 'BF.B', 'BHI', 'BIG', 'BIIB', 'BK', 'BLK', 'BLL', 'BMC', 'BMS', 'BMY', 'BRCM', 'BRK.B', 'BSX', 'BTU', 'BXP', 'C', 'CA', 'CAG', 'CAH', 'CAM', 'CAT', 'CB', 'CBG', 'CBS', 'CCE', 'CCL', 'CEG', 'CELG', 'CERN', 'CF', 'CFN', 'CHK', 'CHRW', 'CI', 'CINF', 'CL', 'CLF', 'CLX', 'CMA', 'CMCSA', 'CME', 'CMG', 'CMI', 'CMS', 'CNP', 'CNX', 'COF', 'COG', 'COH', 'COL', 'COP', 'COST', 'COV', 'CPB', 'CPWR', 'CRM', 'CSC', 'CSCO', 'CSX', 'CTAS', 'CTL', 'CTSH', 'CTXS', 'CVC', 'CVH', 'CVS', 'CVX', 'D', 'DD', 'DE', 'DELL', 'DF', 'DFS', 'DGX', 'DHI', 'DHR', 'DIS', 'DISCA', 'DNB', 'DNR', 'DO', 'DOV', 'DOW', 'DPS', 'DRI', 'DTE', 'DTV', 'DUK', 'DV', 'DVA', 'DVN', 'EBAY', 'ECL', 'ED', 'EFX', 'EIX', 'EL', 'EMC', 'EMN', 'EMR', 'EOG', 'EP', 'EQR', 'EQT', 'ERTS', 'ESRX', 'ETFC', 'ETN', 'ETR', 'EW', 'EXC', 'EXPD', 'EXPE', 'F', 'FAST', 'FCX', 'FDO', 'FDX', 'FE', 'FFIV', 'FHN', 'FII', 'FIS', 'FISV', 'FITB', 'FLIR', 'FLR', 'FLS', 'FMC', 'FO', 'FRX', 'FSLR', 'FTI', 'FTR', 'GAS', 'GCI', 'GD', 'GE', 'GILD', 'GIS', 'GLW', 'GME', 'GNW', 'GOOG', 'GPC', 'GPS', 'GR', 'GS', 'GT', 'GWW', 'HAL', 'HAR', 'HAS', 'HBAN', 'HCBK', 'HCN', 'HCP', 'HD', 'HES', 'HIG', 'HNZ', 'HOG', 'HON', 'HOT', 'HP', 'HPQ', 'HRB', 'HRL', 'HRS', 'HSP', 'HST', 'HSY', 'HUM', 'IBM', 'ICE', 'IFF', 'IGT', 'INTC', 'INTU', 'IP', 'IPG', 'IR', 'IRM', 'ISRG', 'ITT', 'ITW', 'IVZ', 'JBL', 'JCI', 'JCP', 'JDSU', 'JEC', 'JNJ', 'JNPR', 'JNS', 'JOYG', 'JPM', 'JWN', 'K', 'KEY', 'KFT', 'KIM', 'KLAC', 'KMB', 'KMX', 'KO', 'KR', 'KSS', 'L', 'LEG', 'LEN', 'LH', 'LIFE', 'LLL', 'LLTC', 'LLY', 'LM', 'LMT', 'LNC', 'LO', 'LOW', 'LSI', 'LTD', 'LUK', 'LUV', 'LXK', 'M', 'MA', 'MAR', 'MAS', 'MAT', 'MCD', 'MCHP', 'MCK', 'MCO', 'MDT', 'MET', 'MHP', 'MHS', 'MJN', 'MKC', 'MMC', 'MMI', 'MMM', 'MO', 'MOLX', 'MON', 'MOS', 'MPC', 'MRK', 'MRO', 'MS', 'MSFT', 'MSI', 'MTB', 'MU', 'MUR', 'MWV', 'MWW', 'MYL', 'NBL', 'NBR', 'NDAQ', 'NE', 'NEE', 'NEM', 'NFLX', 'NFX', 'NI', 'NKE', 'NOC', 'NOV', 'NRG', 'NSC', 'NTAP', 'NTRS', 'NU', 'NUE', 'NVDA', 'NVLS', 'NWL', 'NWSA', 'NYX', 'OI', 'OKE', 'OMC', 'ORCL', 'ORLY', 'OXY', 'PAYX', 'PBCT', 'PBI', 'PCAR', 'PCG', 'PCL', 'PCLN', 'PCP', 'PCS', 'PDCO', 'PEG', 'PEP', 'PFE', 'PFG', 'PG', 'PGN', 'PGR', 'PH', 'PHM', 'PKI', 'PLD', 'PLL', 'PM', 'PNC', 'PNW', 'POM', 'PPG', 'PPL', 'PRU', 'PSA', 'PWR', 'PX', 'PXD', 'QCOM', 'QEP', 'R', 'RAI', 'RDC', 'RF', 'RHI', 'RHT', 'RL', 'ROK', 'ROP', 'ROST', 'RRC', 'RRD', 'RSG', 'RTN', 'S', 'SAI', 'SBUX', 'SCG', 'SCHW', 'SE', 'SEE', 'SHLD', 'SHW', 'SIAL', 'SJM', 'SLB', 'SLE', 'SLM', 'SNA', 'SNDK', 'SNI', 'SO', 'SPG', 'SPLS', 'SRCL', 'SRE', 'STI', 'STJ', 'STT', 'STZ', 'SUN', 'SVU', 'SWK', 'SWN', 'SWY', 'SYK', 'SYMC', 'SYY', 'T', 'TAP', 'TDC', 'TE', 'TEG', 'TEL', 'TER', 'TGT', 'THC', 'TIE', 'TIF', 'TJX', 'TLAB', 'TMK', 'TMO', 'TROW', 'TRV', 'TSN', 'TSO', 'TSS', 'TWC', 'TWX', 'TXN', 'TXT', 'TYC', 'UNH', 'UNM', 'UNP', 'UPS', 'URBN', 'USB', 'UTX', 'V', 'VAR', 'VFC', 'VIA.B', 'VLO', 'VMC', 'VNO', 'VRSN', 'VTR', 'VZ', 'WAG', 'WAT', 'WDC', 'WEC', 'WFC', 'WFM', 'WFR', 'WHR', 'WIN', 'WLP', 'WM', 'WMB', 'WMT', 'WPI', 'WPO', 'WU', 'WY', 'WYN', 'WYNN', 'X', 'XEL', 'XL', 'XLNX', 'XOM', 'XRAY', 'XRX', 'YHOO', 'YUM', 'ZION', 'ZMH'] lsSym.append('$SPX') lsSym.sort() ''' Max lookback is 6 months ''' dtEnd = dt.datetime.now() dtEnd = dtEnd.replace(hour=16, minute=0, second=0, microsecond=0) dtStart = dtEnd - relativedelta(months=6) ''' Pull in current data ''' norObj = da.DataAccess('Norgate') ''' Get 2 extra months for moving averages and future returns ''' ldtTimestamps = du.getNYSEdays(dtStart - relativedelta(months=2), dtEnd + relativedelta(months=2), dt.timedelta(hours=16)) dfPrice = norObj.get_data(ldtTimestamps, lsSym, 'close') dfVolume = norObj.get_data(ldtTimestamps, lsSym, 'volume') ''' Imported functions from qstkfeat.features, NOTE: last function is classification ''' lfcFeatures, ldArgs, lsNames = getFeatureFuncs() ''' Generate a list of DataFrames, one for each feature, with the same index/column structure as price data ''' applyFeatures(dfPrice, dfVolume, lfcFeatures, ldArgs, sLog=sLog)
def simulate(symbols, allocations, startday, endday): """ @symbols: list of symbols @allocations: list of weights @startday: ... @endday: ... """ timeofday = dt.timedelta(hours=16) timestamps = du.getNYSEdays(startday,endday,timeofday) dataobj = da.DataAccess('Yahoo') close = dataobj.get_data(timestamps, symbols, "close", verbose=False) close = close.values norm_close = close / close[0, :] allocations = allocations / np.sum(allocations) portfolio_value = np.dot(norm_close, allocations) portfolio_return = portfolio_value.copy() tsu.returnize0(portfolio_return) sharpe = tsu.get_sharpe_ratio(portfolio_return) accum = portfolio_value[-1] / portfolio_value[0] average = np.mean(portfolio_return) stddev = np.std(portfolio_return) result = {"sharpe":sharpe, "cumulative_return":accum, "average":average, "stddev":stddev} return result
def print_industry_coer(fund_ts, ostream): """ @summary prints standard deviation of returns for a fund @param fund_ts: pandas fund time series @param years: list of years to print out @param ostream: stream to print to """ industries = [['$DJUSBM', 'Materials'], ['$DJUSNC', 'Goods'], ['$DJUSCY', 'Services'], ['$DJUSFN', 'Financials'], ['$DJUSHC', 'Health'], ['$DJUSIN', 'Industrial'], ['$DJUSEN', 'Oil & Gas'], ['$DJUSTC', 'Technology'], ['$DJUSTL', 'TeleComm'], ['$DJUSUT', 'Utilities']] for i in range(0, len(industries) ): if(i%2==0): ostream.write("\n") #load data norObj = de.DataAccess('mysql') ldtTimestamps = du.getNYSEdays( fund_ts.index[0], fund_ts.index[-1], dt.timedelta(hours=16) ) ldfData = norObj.get_data( ldtTimestamps, [industries[i][0]], ['close'] ) #get corelation ldfData[0]=ldfData[0].fillna(method='pad') ldfData[0]=ldfData[0].fillna(method='bfill') a=np.corrcoef(np.ravel(tsu.daily(ldfData[0][industries[i][0]])),np.ravel(tsu.daily(fund_ts.values))) b=np.ravel(tsu.daily(ldfData[0][industries[i][0]])) f=np.ravel(tsu.daily(fund_ts)) fBeta, unused = np.polyfit(b,f,1) ostream.write("%10s(%s):%+6.2f, %+6.2f " % (industries[i][1], industries[i][0], a[0,1], fBeta))
def get_data(syms, startday, endday): endday = endday - dt.timedelta(days=1) timeofday=dt.timedelta(hours=16) timestamps = du.getNYSEdays(startday, endday, timeofday) dataobj = da.DataAccess('Yahoo') price_data = dataobj.get_data(timestamps, syms, 'close') price_data = (price_data.fillna(method='ffill')).fillna(method='backfill') return price_data
def time_price(startdate,enddate,portsyms): # set the time boundaries timestamps = du.getNYSEdays(startdate,enddate,timeofday) #get the close price dataobj = da.DataAccess(storename) close = dataobj.get_data(timestamps, portsyms, closefield) # close is not the same as 'actual close' return (timestamps,close)
def genData(startday, endday, datadirectory, symbols): coredirectory = os.environ['QS']+'Tools/Visualizer/Data/' directorylocation= coredirectory+datadirectory+'_'+startday.date().isoformat() +'_'+endday.date().isoformat() if not os.path.exists(directorylocation): os.mkdir(directorylocation) directorylocation = directorylocation +'/' timeofday = dt.timedelta(hours=16) timestamps = du.getNYSEdays(startday,endday,timeofday) #Creating a txt file of timestamps file = open(directorylocation +'TimeStamps.txt', 'w') for onedate in timestamps: stringdate=dt.date.isoformat(onedate) file.write(stringdate+'\n') file.close() # Reading the Stock Price Data dataobj = da.DataAccess('Norgate') all_symbols = dataobj.get_all_symbols() badsymbols=set(symbols)-set(all_symbols) if len(list(badsymbols))>0: print "Some Symbols are not valid" + str(badsymbols) symbols=list(set(symbols)-badsymbols) lsKeys = ['open', 'high', 'low', 'close', 'volume'] ldfData = dataobj.get_data( timestamps, symbols, lsKeys ) dData = dict(zip(lsKeys, ldfData)) # Creating the 3D Matrix (lfcFeatures, ldArgs, lsNames)= feat.getFeatureFuncs22() FinalData = feat.applyFeatures( dData, lfcFeatures, ldArgs, sMarketRel='SPY') #Creating a txt file of symbols file = open(directorylocation +'Symbols.txt', 'w') for sym in symbols: file.write(str(sym)+'\n') file.close() #Creating a txt file of Features file = open(directorylocation +'Features.txt', 'w') for f in lsNames: file.write(f+'\n') file.close() Numpyarray=[] for IndicatorData in FinalData: Numpyarray.append(IndicatorData.values) pickle.dump(Numpyarray,open(directorylocation +'ALLDATA.pkl', 'wb' ),-1)
def main(): '''Main Function''' # List of symbols ls_symbols = ["AAPL", "GOOG"] # Start and End date of the charts dt_start = dt.datetime(2008, 1, 1) dt_end = dt.datetime(2010, 12, 31) # We need closing prices so the timestamp should be hours=16. dt_timeofday = dt.timedelta(hours=16) # Get a list of trading days between the start and the end. ldt_timestamps = du.getNYSEdays(dt_start, dt_end, dt_timeofday) # Creating an object of the dataaccess class with Yahoo as the source. c_dataobj = da.DataAccess('Yahoo') # Reading just the close prices df_close = c_dataobj.get_data(ldt_timestamps, ls_symbols, "close") # Creating the allocation dataframe # We offset the time for the simulator to have atleast one # datavalue before the allocation. df_alloc = pd.DataFrame(np.array([[0.5, 0.5]]), index=[ldt_timestamps[0] + dt.timedelta(hours=5)], columns=ls_symbols) dt_last_date = ldt_timestamps[0] # Looping through all dates and creating monthly allocations for dt_date in ldt_timestamps[1:]: if dt_last_date.month != dt_date.month: # Create allocation na_vals = np.random.randint(0, 1000, len(ls_symbols)) na_vals = na_vals / float(sum(na_vals)) na_vals = na_vals.reshape(1, -1) # Append to the dataframe df_new_row = pd.DataFrame(na_vals, index=[dt_date], columns=ls_symbols) df_alloc = df_alloc.append(df_new_row) dt_last_date = dt_date # Adding cash to the allocation matrix df_alloc['_CASH'] = 0.0 # Running the simulator on the allocation frame (ts_funds, ts_leverage, f_commission, f_slippage, f_borrow_cost) = qstksim.tradesim(df_alloc, df_close, f_start_cash=10000.0, i_leastcount=1, b_followleastcount=True, f_slippage=0.0005, f_minimumcommision=5.0, f_commision_share=0.0035, i_target_leverage=1, f_rate_borrow=3.5, log="transaction.csv") print "Simulated Fund Time Series : " print ts_funds print "Transaction Costs : " print "Commissions : ", f_commission print "Slippage : ", f_slippage print "Borrowing Cost : ", f_borrow_cost
def getData(symbols, startday,endday): # Reading the Data for the list of Symbols. timeofday=dt.timedelta(hours=16) timestamps = du.getNYSEdays(startday,endday,timeofday) dataobj = da.DataAccess("Yahoo") # Reading the Data close = dataobj.get_data(timestamps, symbols, closefield) return close
def getData(symbols, startday, endday): # Reading the Data for the list of Symbols. timeofday = dt.timedelta(hours=16) timestamps = du.getNYSEdays(startday, endday, timeofday) dataobj = da.DataAccess("Yahoo") # Reading the Data close = dataobj.get_data(timestamps, symbols, closefield) return close
def getTradingDays(orders): '''(pandas) -> list of timestamp Return list of timestamps for all trading dates between first and last day of order. ''' last = len(orders) - 1 startday = dt.datetime(orders['year'][0], orders['month'][0], orders['day'][0]) endday = dt.datetime(orders['year'][last], orders['month'][last], orders['day'][last]+1) timeofday=dt.timedelta(hours=16) return du.getNYSEdays(startday,endday,timeofday)
def previous_nyse_day(self, date): ''' the date give has to be a trading day ''' timeofday = dt.timedelta(hours=16) if date.hour == 0: date = date + timeofday timestamps = du.getNYSEdays(date - dt.timedelta(days=10), date, timeofday) if timestamps[-1] == date: return timestamps[-2] else: raise Exception('not a trading day')
def time_price(startdate, enddate, portsyms): # set the time boundaries timestamps = du.getNYSEdays(startdate, enddate, timeofday) #get the close price dataobj = da.DataAccess(storename) close = dataobj.get_data( timestamps, portsyms, closefield) # close is not the same as 'actual close' return (timestamps, close)
def findEvents(symbols, startday, endday, verbose=False, generateOrders=False,targetPrice=target_price): # Reading data timeofday=dt.timedelta(hours=16) timestamps = du.getNYSEdays(startday,endday,timeofday) dataobj = da.DataAccess(storename) if verbose: print __name__ + " reading data from " + storename # Read 'actual close' field data value actual_close = dataobj.get_data(timestamps, symbols, actual_close_field) # trim the data - removing the NaN values from the matrix #actual_close = (actual_close.fillna(method='ffill')).fillna(method='backfill') np_eventmat = copy.deepcopy(actual_close) # create event matrix with np value for sym in symbols: for t in timestamps: np_eventmat[sym][t]=np.NAN # Create Trading Orders File based on Event if generateOrders: orderCSV = open(order_filename, "wb") write = csv.writer(orderCSV, delimiter=',') # fill in event event_count=0 for symbol in symbols: for t in range(1, len(actual_close[symbol])): if(actual_close[symbol][t-1]>= targetPrice and actual_close[symbol][t]< targetPrice): if generateOrders: # generate a order Buy and Sell after 5 trading days write.writerow([timestamps[t].year, timestamps[t].month, timestamps[t].day, symbol, buy, str(shares)]) if t + 5 < len(timestamps): fiveDaysLater = timestamps[t+5] else: fiveDaysLater = timestamps[len(timestamps)-1] write.writerow([fiveDaysLater.year, fiveDaysLater.month, fiveDaysLater.day, symbol, sell, str(shares)]) if verbose: print __name__ + " found event for symbol: " + symbol, actual_close[symbol][t-1] , actual_close[symbol][t] np_eventmat[symbol][t] = 1.0 event_count = event_count +1 # print out event matrix for debug print __name__ + "Event Matrix" for sym in symbols: for t in range(1, len(actual_close[sym])): if(np_eventmat[sym][t] == 1.0): print timestamps[t] , sym , " has event" print "####### Found ", event_count, " in total #######" return np_eventmat
def timestamps(cls, start_time, end_time): start_day = dt.datetime(start_time.year, start_time.month, start_time.day) end_day = dt.datetime(end_time.year, end_time.month, end_time.day) day_timestamps = du.getNYSEdays(start_day, end_day + dt.timedelta(days=1), dt.timedelta(hours=9, minutes=30)) timestamps = [] delta = 300 for day in day_timestamps: for x in range(79): time = day + dt.timedelta(seconds = (x * delta)) if time >= start_time and time <= end_time: timestamps.append(time) return timestamps
def getTradingDays(orders): '''(pandas) -> list of timestamp Return list of timestamps for all trading dates between first and last day of order. ''' last = len(orders) - 1 startday = dt.datetime(orders['year'][0], orders['month'][0], orders['day'][0]) endday = dt.datetime(orders['year'][last], orders['month'][last], orders['day'][last] + 1) timeofday = dt.timedelta(hours=16) return du.getNYSEdays(startday, endday, timeofday)
def share_table2fund(share_table): """ @summary converts data frame of shares into fund values @param share_table: data frame containing shares on days transactions occured @return fund : time series containing fund value over time @return leverage : time series containing fund value over time """ # Get the data from the data store dataobj = de.DataAccess('mysql') startday = share_table.index[0] endday = share_table.index[-1] symbols = list(share_table.columns) symbols.remove('_CASH') # print symbols # Get desired timestamps timeofday = dt.timedelta(hours=16) timestamps = du.getNYSEdays(startday - dt.timedelta(days=5), endday + dt.timedelta(days=1), timeofday) historic = dataobj.get_data(timestamps, symbols, ["close"])[0] historic["_CASH"] = 1 closest = historic[historic.index <= share_table.index[0]].ix[:] ts_leverage = pandas.Series(0, index=[closest.index[-1]]) # start shares/fund out as 100% cash first_val = closest.ix[-1] * share_table.ix[0] fund_ts = pandas.Series([first_val.sum(axis=1)], index=[closest.index[-1]]) prev_row = share_table.ix[0] for row_index, row in share_table.iterrows(): # print row_index trade_price = historic.ix[row_index:].ix[0:1] trade_date = trade_price.index[0] # print trade_date # get stock prices on all the days up until this trade to_calculate = historic[(historic.index <= trade_date) & (historic.index > fund_ts.index[-1])] # multiply prices by our current shares values_by_stock = to_calculate * prev_row # for date, sym in values_by_stock.iteritems(): # print date,sym # print values_by_stock prev_row = row #update leverage ts_leverage = _calculate_leverage(values_by_stock, ts_leverage) # calculate total value and append to our fund history fund_ts = fund_ts.append([values_by_stock.sum(axis=1)]) return [fund_ts, ts_leverage]
def generate_report(funds_list, graph_names, out_file, i_start_cash=10000): """ @summary generates a report given a list of fund time series """ html_file = open("report.html", "w") print_header(html_file, out_file) html_file.write("<IMG SRC = \'./funds.png\' width = 400/>\n") html_file.write("<BR/>\n\n") i = 0 pyplot.clf() #load spx for time frame symbol = ["$SPX"] start_date = 0 end_date = 0 for fund in funds_list: if (type(fund) != type(list())): if (start_date == 0 or start_date > fund.index[0]): start_date = fund.index[0] if (end_date == 0 or end_date < fund.index[-1]): end_date = fund.index[-1] mult = i_start_cash / fund.values[0] pyplot.plot(fund.index, fund.values * mult, label = \ path.basename(graph_names[i])) else: if (start_date == 0 or start_date > fund[0].index[0]): start_date = fund[0].index[0] if (end_date == 0 or end_date < fund[0].index[-1]): end_date = fund[0].index[-1] mult = i_start_cash / fund[0].values[0] pyplot.plot(fund[0].index, fund[0].values * mult, label = \ path.basename(graph_names[i])) i += 1 timeofday = dt.timedelta(hours=16) timestamps = du.getNYSEdays(start_date, end_date, timeofday) dataobj = de.DataAccess('mysql') benchmark_close = dataobj.get_data(timestamps, symbol, ["close"], \ verbose = False)[0] mult = i_start_cash / benchmark_close.values[0] i = 0 for fund in funds_list: if (type(fund) != type(list())): print_stats(fund, ["$SPX"], graph_names[i]) else: print_stats(fund[0], ["$SPX"], graph_names[i]) i += 1 pyplot.plot(benchmark_close.index, \ benchmark_close.values*mult, label = "SSPX") pyplot.ylabel('Fund Value') pyplot.xlabel('Date') pyplot.legend() savefig('funds.png', format='png') print_footer(html_file)
def testFeature( fcFeature, dArgs ): ''' @summary: Quick function to run a feature on some data and plot it to see if it works. @param fcFeature: Feature function to test @param dArgs: Arguments to pass into feature function @return: Void ''' ''' Get Train data for 2009-2010 ''' dtStart = dt.datetime(2009, 1, 1) dtEnd = dt.datetime(2009, 5, 1) ''' Pull in current training data and test data ''' norObj = da.DataAccess('Norgate') ''' Get 2 extra months for moving averages and future returns ''' ldtTimestamps = du.getNYSEdays( dtStart, dtEnd, dt.timedelta(hours=16) ) lsSym = ['GOOG'] lsSym.append('WMT') lsSym.append('$SPX') lsSym.append('$VIX') lsSym.sort() lsKeys = ['open', 'high', 'low', 'close', 'volume'] ldfData = norObj.get_data( ldtTimestamps, lsSym, lsKeys ) dData = dict(zip(lsKeys, ldfData)) dfPrice = dData['close'] #print dfPrice.values ''' Generate a list of DataFrames, one for each feature, with the same index/column structure as price data ''' dtStart = dt.datetime.now() ldfFeatures = applyFeatures( dData, [fcFeature], [dArgs], sMarketRel='$SPX' ) print 'Runtime:', dt.datetime.now() - dtStart ''' Use last 3 months of index, to avoid lookback nans ''' dfPrint = ldfFeatures[0]['GOOG'] print 'GOOG values:', dfPrint.values print 'GOOG Sum:', dfPrint.ix[dfPrint.notnull()].sum() for sSym in lsSym: plt.subplot( 211 ) plt.plot( ldfFeatures[0].index[-60:], dfPrice[sSym].values[-60:] ) plt.plot( ldfFeatures[0].index[-60:], dfPrice['$SPX'].values[-60:] * dfPrice[sSym].values[-60] / dfPrice['$SPX'].values[-60] ) plt.legend((sSym, '$SPX')) plt.title(sSym) plt.subplot( 212 ) plt.plot( ldfFeatures[0].index[-60:], ldfFeatures[0][sSym].values[-60:] ) plt.title( '%s-%s'%(fcFeature.__name__, str(dArgs)) ) plt.show()
def share_table2fund(share_table): """ @summary converts data frame of shares into fund values @param share_table: data frame containing shares on days transactions occured @return fund : time series containing fund value over time @return leverage : time series containing fund value over time """ # Get the data from the data store dataobj = de.DataAccess('mysql') startday = share_table.index[0] endday = share_table.index[-1] symbols = list(share_table.columns) symbols.remove('_CASH') # print symbols # Get desired timestamps timeofday = dt.timedelta(hours=16) timestamps = du.getNYSEdays(startday - dt.timedelta(days=5), endday + dt.timedelta(days=1), timeofday) historic = dataobj.get_data(timestamps, symbols, ["close"])[0] historic.fillna(method='ffill', inplace=True) historic["_CASH"] = 1 closest = historic[historic.index <= share_table.index[0]].ix[:] ts_leverage = pandas.Series(0, index=[closest.index[-1]]) # start shares/fund out as 100% cash first_val = closest.ix[-1] * share_table.ix[0] fund_ts = pandas.Series([first_val.sum(axis=1)], index=[closest.index[-1]]) prev_row = share_table.ix[0] for row_index, row in share_table.iterrows(): # print row_index trade_price = historic.ix[row_index:].ix[0:1] trade_date = trade_price.index[0] # print trade_date # get stock prices on all the days up until this trade to_calculate = historic[(historic.index <= trade_date) & (historic.index > fund_ts.index[-1])] # multiply prices by our current shares values_by_stock = to_calculate * prev_row # for date, sym in values_by_stock.iteritems(): # print date,sym # print values_by_stock prev_row = row #update leverage ts_leverage = _calculate_leverage(values_by_stock, ts_leverage) # calculate total value and append to our fund history fund_ts = fund_ts.append([values_by_stock.sum(axis=1)]) return [fund_ts, ts_leverage]
def generate_report(funds_list, graph_names, out_file, i_start_cash = 10000): """ @summary generates a report given a list of fund time series """ html_file = open("report.html","w") print_header(html_file, out_file) html_file.write("<IMG SRC = \'./funds.png\' width = 400/>\n") html_file.write("<BR/>\n\n") i = 0 pyplot.clf() #load spx for time frame symbol = ["$SPX"] start_date = 0 end_date = 0 for fund in funds_list: if(type(fund)!= type(list())): if(start_date == 0 or start_date>fund.index[0]): start_date = fund.index[0] if(end_date == 0 or end_date<fund.index[-1]): end_date = fund.index[-1] mult = i_start_cash/fund.values[0] pyplot.plot(fund.index, fund.values * mult, label = \ path.basename(graph_names[i])) else: if(start_date == 0 or start_date>fund[0].index[0]): start_date = fund[0].index[0] if(end_date == 0 or end_date<fund[0].index[-1]): end_date = fund[0].index[-1] mult = i_start_cash/fund[0].values[0] pyplot.plot(fund[0].index, fund[0].values * mult, label = \ path.basename(graph_names[i])) i += 1 timeofday = dt.timedelta(hours = 16) timestamps = du.getNYSEdays(start_date, end_date, timeofday) dataobj = de.DataAccess('mysql') benchmark_close = dataobj.get_data(timestamps, symbol, ["close"], \ verbose = False)[0] mult = i_start_cash/benchmark_close.values[0] i = 0 for fund in funds_list: if(type(fund)!= type(list())): print_stats(fund, ["$SPX"], graph_names[i]) else: print_stats( fund[0], ["$SPX"], graph_names[i]) i += 1 pyplot.plot(benchmark_close.index, \ benchmark_close.values*mult, label = "SSPX") pyplot.ylabel('Fund Value') pyplot.xlabel('Date') pyplot.legend() savefig('funds.png', format = 'png') print_footer(html_file)
def runOther(funds,symbols): tsstart =dt.datetime(funds.index[0].year,funds.index[0].month,funds.index[0].day) tsend =dt.datetime(funds.index[-1].year,funds.index[-1].month,funds.index[-1].day) timeofday=dt.timedelta(hours=16) timestamps=du.getNYSEdays(tsstart,tsend,timeofday) dataobj=da.DataAccess('Norgate') historic=dataobj.get_data(timestamps,symbols,"close") alloc_val=float(0.1/(float(len(symbols))+1)) alloc_vals=alloc_val*ones(len(symbols)) alloc=DataMatrix(index=[historic.index[0]],data=[alloc_vals], columns=symbols) alloc=alloc.append(DataMatrix(index=[historic.index[-1]], data=[alloc_vals], columns=symbols)) alloc['_CASH']=alloc_val return qs.quickSim(alloc,historic,1000)
def __init__(self, dataAccess, listOfStocks, startTime, endTime): self.dataAccess = dataAccess timeofday=dt.timedelta(hours=16) self.timestampIndex = du.getNYSEdays(startTime,endTime,timeofday) self.symbolIndex = listOfStocks print __name__ + " reading data" # Reading the Data self.priceArray = dataAccess.get_data(self.timestampIndex, self.symbolIndex, "actual_close") self.prevTsIdx = 0
def findEvents(symbols, startday,endday, marketSymbol,verbose=False): # Reading the Data for the list of Symbols. timeofday=dt.timedelta(hours=16) timestamps = du.getNYSEdays(startday,endday,timeofday) dataobj = da.DataAccess('Yahoo') if verbose: print __name__ + " reading data" # Reading the Data close = dataobj.get_data(timestamps, symbols, closefield) # Completing the Data - Removing the NaN values from the Matrix # close = (close.fillna(method='ffill')).fillna(method='backfill') # Calculating Daily Returns for the Market #!!! tsu.returnize0(close.values) # SPYValues=close[marketSymbol] # Calculating the Returns of the Stock Relative to the Market # So if a Stock went up 5% and the Market rised 3%. The the return relative to market is 2% # mktneutDM = close - close[marketSymbol] np_eventmat = copy.deepcopy(close) for sym in symbols: for time in timestamps: np_eventmat[sym][time]=np.NAN if verbose: print __name__ + " finding events" # Generating the Event Matrix # Event described is : Market falls more than 3% plus the stock falls 5% more than the Market # Suppose : The market fell 3%, then the stock should fall more than 8% to mark the event. # And if the market falls 5%, then the stock should fall more than 10% to mark the event. output = open("hw4.csv", "w") totalDays = len(close[marketSymbol]) for i in range(1, totalDays): for symbol in symbols: if close[symbol][timestamps[i-1]] >= eventThreachold and close[symbol][timestamps[i]] < eventThreachold : moment = timestamps[i] output.write('%(year)04d,%(month)02d,%(day)02d,%(symbol)s,Buy, 100 \n' % {"year":moment.year, "month":moment.month, "day":moment.day, "symbol":symbol }) sellMomentIndex = min(i + 5, totalDays-1) # print "i:", i, ", totalDays: ", totalDays, "===", sellMomentIndex moment = timestamps[sellMomentIndex] output.write('%(year)04d,%(month)02d,%(day)02d,%(symbol)s,Sell,100 \n' % {"year":moment.year, "month":moment.month, "day":moment.day, "symbol":symbol }) np_eventmat[symbol][i] = 1.0 #overwriting by the bit, marking the event output.close() return np_eventmat
def get_price(symbols, start_day, end_day, close_field='close', verbose='False'): ''' return prices in pandas dataframe type ''' time_of_day = datetime.timedelta(hours=16) time_stamps = qsdateutil.getNYSEdays(start_day, end_day, time_of_day) data_obj = DataAccess.DataAccess('Yahoo') if verbose: print __name__ + "reading data ..." price = data_obj.get_data(time_stamps, symbols, close_field) return price
def daily(lfFunds): """ @summary Computes daily returns centered around 0 @param funds: A time series containing daily fund values @return an array of daily returns """ if type(lfFunds) == type(pd.Series()): ldt_timestamps = du.getNYSEdays(lfFunds.index[0], lfFunds.index[-1], dt.timedelta(hours=16)) lfFunds = lfFunds.reindex(index=ldt_timestamps, method='ffill') nds = np.asarray(deepcopy(lfFunds)) s = np.shape(nds) if len(s) == 1: nds = np.expand_dims(nds, 1) returnize0(nds) return (nds)
def findEvents(symbols, startday,endday, marketSymbol,verbose=False): # Reading the Data for the list of Symbols. timeofday=dt.timedelta(hours=16) timestamps = du.getNYSEdays(startday,endday,timeofday) dataobj = da.DataAccess('Yahoo') if verbose: print __name__ + " reading data" # Reading the Data close = dataobj.get_data(timestamps, symbols, closefield) # Completing the Data - Removing the NaN values from the Matrix # close = (close.fillna(method='ffill')).fillna(method='backfill') # Calculating Daily Returns for the Market #!!! tsu.returnize0(close.values) # SPYValues=close[marketSymbol] # Calculating the Returns of the Stock Relative to the Market # So if a Stock went up 5% and the Market rised 3%. The the return relative to market is 2% # mktneutDM = close - close[marketSymbol] np_eventmat = copy.deepcopy(close) for sym in symbols: for time in timestamps: np_eventmat[sym][time]=np.NAN if verbose: print __name__ + " finding events" # Generating the Event Matrix # Event described is : Market falls more than 3% plus the stock falls 5% more than the Market # Suppose : The market fell 3%, then the stock should fall more than 8% to mark the event. # And if the market falls 5%, then the stock should fall more than 10% to mark the event. for symbol in symbols: for i in range(1,len(close[symbol])): if close[symbol][timestamps[i-1]] >= 7.0 and close[symbol][timestamps[i]] < 7.0 : np_eventmat[symbol][i] = 1.0 #overwriting by the bit, marking the event # if SPYValues[i]<-0.03 and mktneutDM[symbol][i] < -0.05 : # When # market fall is more than 3% and also the stock # compared to market is also fell by more than 5%. return np_eventmat
def get_price(symbols, start_day, end_day, close_field='close', data_source='Yahoo', verbose='False'): ''' return prices in pandas dataframe type ''' time_of_day = datetime.timedelta(hours=16) time_stamps = qsdateutil.getNYSEdays(start_day, end_day, time_of_day) data_obj = DataAccess.DataAccess(data_source) if verbose: print __name__ + " is reading data from %s..." % data_source market_data = data_obj.get_data(time_stamps, symbols, close_field) if verbose: print __name__ + "finished reading data." return market_data
def findEvents(symbols, startday, endday, verbose=False): timeofday = dt.timedelta(hours=16) timestamps = du.getNYSEdays(startday, endday, timeofday) dataobj = da.DataAccess('Yahoo') if verbose: print __name__ + " reading data" close = dataobj.get_data(timestamps, symbols, closefield) close = (close.fillna()).fillna(method='backfill') if verbose: print __name__ + " finding events" for symbol in symbols: close[symbol][close[symbol] >= 1.0] = np.NAN for i in range(1, len(close[symbol])): if np.isnan( close[symbol][i - 1] ) and close[symbol][i] < 1.0: #(i-1)th was > $1, and (i)th is <$1 close[symbol][i] = 1.0 #overwriting the price by the bit close[symbol][close[symbol] < 1.0] = np.NAN return close
def runOther(funds, symbols): tsstart = dt.datetime(funds.index[0].year, funds.index[0].month, funds.index[0].day) tsend = dt.datetime(funds.index[-1].year, funds.index[-1].month, funds.index[-1].day) timeofday = dt.timedelta(hours=16) timestamps = du.getNYSEdays(tsstart, tsend, timeofday) dataobj = da.DataAccess('Norgate') historic = dataobj.get_data(timestamps, symbols, "close") alloc_val = float(0.1 / (float(len(symbols)) + 1)) alloc_vals = alloc_val * ones(len(symbols)) alloc = DataMatrix(index=[historic.index[0]], data=[alloc_vals], columns=symbols) alloc = alloc.append( DataMatrix(index=[historic.index[-1]], data=[alloc_vals], columns=symbols)) alloc['_CASH'] = alloc_val return qs.quickSim(alloc, historic, 1000)
def __init__(self,eventMatrix,startday,endday,\ lookback_days = 20, lookforward_days =20,\ verbose=False): """ Event Profiler class construtor Parameters : evenMatrix : startday : endday (optional) : lookback_days ( default = 20) (optional) : lookforward_days( default = 20) eventMatrix is a pandas DataMatrix eventMatrix must have the following structure: |IBM |GOOG|XOM |MSFT| GS | JP | (d1)|nan |nan | 1 |nan |nan | 1 | (d2)|nan | 1 |nan |nan |nan |nan | (d3)| 1 |nan | 1 |nan | 1 |nan | (d4)|nan | 1 |nan | 1 |nan |nan | ................................... ................................... Also, d1 = start date nan = no information about any event. = status bit(positively confirms the event occurence) """ self.eventMatrix = eventMatrix self.startday = startday self.endday = endday self.symbols = eventMatrix.columns self.lookback_days = lookback_days self.lookforward_days = lookforward_days self.total_days = lookback_days + lookforward_days + 1 self.dataobj = da.DataAccess('Yahoo') self.timeofday = dt.timedelta(hours=16) self.timestamps = du.getNYSEdays(startday, endday, self.timeofday) self.verbose = verbose if verbose: print __name__ + " reading historical data" self.close = self.dataobj.get_data(self.timestamps,\ self.symbols, "close", verbose=self.verbose) self.close = (self.close.fillna()).fillna(method='backfill')
def calculate_efficiency(dt_start_date, dt_end_date, s_stock): """ @summary calculates the exit-entry/high-low trade efficiency of a stock from historical data @param start_date: entry point for the trade @param end_date: exit point for the trade @param stock: stock to compute efficiency for @return: float representing efficiency """ # Get the data from the data store dataobj = de.DataAccess('mysql') # Get desired timestamps timeofday=dt.timedelta(hours=16) timestamps = du.getNYSEdays(dt_start_date,dt_end_date+dt.timedelta(days=1),timeofday) historic = dataobj.get_data( timestamps, [s_stock] ,["close"] )[0] # print "######" # print historic hi=numpy.max(historic.values) low=numpy.min(historic.values) entry=historic.values[0] exit_price=historic.values[-1] return (((exit_price-entry)/(hi-low))[0])
def findEvents(symbols, startday,endday, marketSymbol,verbose=False): # Reading the Data for the list of Symbols. timeofday=dt.timedelta(hours=16) timestamps = du.getNYSEdays(startday,endday,timeofday) dataobj = da.DataAccess('Yahoo') if verbose: print __name__ + " reading data" # Reading the Data close = dataobj.get_data(timestamps, symbols, closefield) # Completing the Data - Removing the NaN values from the Matrix close = (close.fillna(method='ffill')).fillna(method='backfill') # Calculating Daily Returns for the Market SPYValues=close[marketSymbol] # Calculating the Returns of the Stock Relative to the Market # So if a Stock went up 5% and the Market rised 3%. The the return relative to market is 2% np_eventmat = copy.deepcopy(close) for sym in symbols: for time in timestamps: np_eventmat[sym][time]=np.NAN if verbose: print __name__ + " finding events" orders = open('orders.csv', 'w') # Generating the Event Matrix for symbol in symbols: for i in range(2,len(close[symbol])): if close[symbol][i-1]>=7.0 and close[symbol][i]<7.0: #TRUE if price drops below 5.0 np_eventmat[symbol][i] = 1.0 #overwriting by the bit, marking the event j = min([i+5, len(close) -1]) #order reversing day writeOrder(close.index[i], symbol, 'Buy', 100, orders) writeOrder(close.index[j], symbol, 'Sell', 100, orders) orders.close() return np_eventmat