def get_data(dt_start, dt_end, symbols, lookback=0): """ Given a date range, return the adjusted_close price for the given symbols If lookback is specified, it will move the start_date back that many trading days """ assert(lookback >= 0) ldt_timestamps = du.getNYSEdays(dt_start, dt_end, dt.timedelta(hours=16)) if lookback >= 0: lookback_timestamps = du.getNYSEdays(dt_start - dt.timedelta(days=lookback*2),dt_start, dt.timedelta(hours=16)) if ldt_timestamps[0] == lookback_timestamps[-1]: debug("contains the end time") lookback_timestamps.pop() ldt_timestamps = lookback_timestamps[-(lookback-1):] + ldt_timestamps dataobj = da.DataAccess('Yahoo') debug("getting data from %s to %s" \ % tuple([dt_day.strftime("%Y-%m-%d") for dt_day in (ldt_timestamps[ndx] for ndx in (0,-1))])) ldf_data = dataobj.get_data(ldt_timestamps, symbols, 'close') ldf_data = ldf_data.fillna(method='ffill') ldf_data = ldf_data.fillna(method='bfill') ldf_data = ldf_data.fillna(1.0) return ldf_data
def totalvalue(cash_ini,orderform,valueform): trades = pd.read_csv(orderform,header=None,sep=',') trades = trades.dropna(axis = 1, how='all') trades.columns = ['Year','Month','Day','Symbol','Order','Share'] dateall = [] for i in np.arange(len(trades.Year)): dateall.append(dt.datetime(trades['Year'][i],trades['Month'][i],trades['Day'][i],16)) dateall = pd.to_datetime(dateall) trades=trades.drop(['Year','Month','Day'],axis=1) trades['Date']=dateall trades.set_index('Date',inplace=True) ls_symbols = [] for symbol in trades.Symbol: if symbol not in ls_symbols: ls_symbols.append(symbol) startdate = dateall[0] enddate = dateall[-1] dt_timeofday = dt.timedelta(hours=16) ldt_timestamps = du.getNYSEdays(startdate,enddate+dt_timeofday,dt_timeofday) ls_keys = 'close' c_dataobj = da.DataAccess('Yahoo') price = c_dataobj.get_data(ldt_timestamps, ls_symbols, ls_keys) orders = price*np.NaN orders = orders.fillna(0) for i in np.arange(len(trades.index)): ind = trades.index[i] if trades.ix[i,'Order']=='Buy': orders.loc[ind,trades.ix[i,'Symbol']]+=trades.ix[i,'Share'] else: orders.loc[ind,trades.ix[i,'Symbol']]+=-trades.ix[i,'Share'] # keys = ['price','orders'] # trading_table = pd.concat([ldf_data,orders],keys=keys,axis=1) cash = np.zeros(np.size(price[ls_symbols[0]]),dtype=np.float) cash[0] = cash_ini # updating the cash value for i in np.arange(len(orders.index)): if i == 0: cash[i] = cash[i] - pd.Series.sum(price.ix[i,:]*orders.ix[i,:]) else: cash[i] = cash[i-1] - pd.Series.sum(price.ix[i,:]*orders.ix[i,:]) # updating ownership ownership = orders*np.NaN for i in np.arange(len(orders.index)): ownership.ix[i,:]=orders.ix[:i+1,:].sum(axis=0) # updating total portofolio value value = np.zeros_like(cash) for i in np.arange(len(ownership.index)): value[i] = pd.Series.sum(price.ix[i,:]*ownership.ix[i,:]) keys = ['price','orders','ownership'] trading_table = pd.concat([price,orders,ownership],keys = keys, axis=1) trading_table[('value','CASH')]=cash trading_table[('value','STOCK')]=value total = np.zeros_like(cash) total = cash + value trading_table[('value','TOTAL')]=total trading_table[('value','TOTAL')].to_csv(valueform)
def simulate(startdate, enddate, symbols, alloc): ls_symbols = symbols lf_alloc = alloc dt_start = startdate dt_end = enddate dt_timeofday=dt.timedelta(hours=16) ldt_timestamps = du.getNYSEdays(dt_start, dt_end, dt_timeofday) c_dataobj = da.DataAccess('Yahoo', cachestalltime=0) ls_keys = ['open', 'close', 'high', 'low', 'volume'] ldf_data = c_dataobj.get_data(ldt_timestamps, ls_symbols, ls_keys) d_data = dict(zip(ls_keys, ldf_data)) for s_key in ls_keys: d_data[s_key] = d_data[s_key].fillna(method='ffill') d_data[s_key] = d_data[s_key].fillna(method='bfill') d_data[s_key] = d_data[s_key].fillna(1.0) na_price = d_data['close'].values na_normalized_price = na_price/na_price[0, :] na_port = na_normalized_price*lf_alloc na_port_daily_totals = np.sum(na_port, axis = 1) na_rets = na_port_daily_totals.copy() tsu.returnize0(na_rets) vol = np.std(na_rets) daily_ret = np.mean(na_rets) sharpe = mt.sqrt(252)*daily_ret/vol cum_ret = na_port_daily_totals[-1]/na_port_daily_totals[0] return (vol, daily_ret, sharpe, cum_ret)
def simulate(dt_start, dt_end, ls_symbols, ratio): dt_timeofday = dt.timedelta(hours=16) ldt_timestamps = du.getNYSEdays(dt_start, dt_end, dt_timeofday) c_dataobj = da.DataAccess('Yahoo', cachestalltime=0) ls_keys = ['open','high','low','close','volume','actual_close'] ldf_data = c_dataobj.get_data(ldt_timestamps, ls_symbols, ls_keys) d_data = dict(zip(ls_keys, ldf_data)) #for l in ls_symbols: na_price = d_data['close'].values na_normalized_price = na_price / na_price[0,:] na_normalized_price_ratio = np.multiply(na_normalized_price, ratio) all_price_ratio = np.sum(na_normalized_price_ratio, axis=1) """ calculate Volatility """ vol = np.std(tsu.returnize0(all_price_ratio)) """ daily return """ daily_ret = np.mean(tsu.returnize0(all_price_ratio)) """ cumulative daily return""" cum_ret = 0 sharpe = 0 return vol, daily_ret, sharpe, cum_ret
def GetNormalizedReturn(dt_start,dt_end, symbols,c_dataobj): # # # # Keys to be read from the data, it is good to read everything in one go. ls_keys = ['open', 'high', 'low', 'close', 'volume', 'actual_close'] # We need closing prices so the timestamp should be hours=16. dt_timeofday = dt.timedelta(hours=16) # Get a list of trading days between the start and the end. ldt_timestamps = du.getNYSEdays(dt_start, dt_end, dt_timeofday) # Timestamps and symbols are the ones that were specified before. ldf_data = c_dataobj.get_data(ldt_timestamps, symbols, ls_keys) d_data = dict(zip(ls_keys, ldf_data)) #normalized returns data frame df_rets=d_data['close'].copy() close=df_rets.values #normalized returns close_norm=close/close[0,:] #print 'close norm = ',close_norm.shape return close_norm
def portfolio_by_date(start_date, end_date, codes, trades): ny_days = du.getNYSEdays(start_date, end_date, dt.timedelta(hours=00)) portfolio_codes = collections.OrderedDict() # {"AAPL": [0, 0, 0, ...], "GOOG": [0, 0, ...]} for x in codes: portfolio_codes[x] = np.zeros(len(ny_days)) portfolio_trades = copy.deepcopy(portfolio_codes) for curr_trade in trades: if curr_trade[2].lower() == "sell": value = curr_trade[-1] else: value = curr_trade[-1] * -1 index = ny_days.index(curr_trade[0]) stock = curr_trade[1] portfolio_codes[stock][index] += value for i in range(index, len(portfolio_trades[stock])): portfolio_trades[stock][i] += value df_portfolio = pd.DataFrame(portfolio_codes).sort(axis=1) df_ptrades = pd.DataFrame(portfolio_trades).sort(axis=1) return df_portfolio, df_ptrades
def bollinger_band(self, tick, window=20, k=2, nml=False, mi_only=False): """ Return four arrays for Bollinger Band. The first one is the moving average. The second one is the upper band. The thrid one is the lower band. The fourth one is the Bollinger value. If mi_only, then return the moving average only. """ ldt_timestamps = self.index dt_timeofday = dt.timedelta(hours=16) days_delta = dt.timedelta(days=(np.ceil(window*7/5)+5)) dt_start = ldt_timestamps[0] - days_delta dt_end = ldt_timestamps[0] - dt.timedelta(days=1) pre_timestamps = du.getNYSEdays(dt_start, dt_end, dt_timeofday) # ldf_data has the data prior to our current interest. # This is used to calculate moving average for the first window. ldf_data = ut.get_tickdata([tick], pre_timestamps) if nml: ma_data = pd.concat([ldf_data[tick]['nml_close'], self['nml_close']]) else: ma_data = pd.concat([ldf_data[tick]['close'], self['close']]) bo = dict() bo['mi'] = pd.rolling_mean(ma_data, window=window)[ldt_timestamps] if mi_only: return bo['mi'] else: sigma = pd.rolling_std(ma_data, window=window) bo['up'] = bo['mi'] + k * sigma[ldt_timestamps] bo['lo'] = bo['mi'] - k * sigma[ldt_timestamps] bo['ba'] = (ma_data[ldt_timestamps] - bo['mi']) / (k * sigma[ldt_timestamps]) return bo
def marketsim(cash, orders_file, data_item, dataobj): # Read orders orders = defaultdict(list) symbols = set([]) for year, month, day, sym, action, num in csv.reader(open(orders_file, "rU")): orders[dt.date(int(year), int(month), int(day))].append((sym, action, int(num))) symbols.add(sym) days = orders.keys() days.sort() day, end = days[0], days[-1] # Reading the Data for the list of Symbols. timestamps = du.getNYSEdays(dt.datetime(day.year,day.month,day.day), dt.datetime(end.year,end.month,end.day+1), dt.timedelta(hours=16)) # dataobj = da.DataAccess('Yahoo', cachestalltime = 0) close = dataobj.get_data(timestamps, symbols, data_item) values = [] portfolio = pf.Portfolio(cash) for i, t in enumerate(timestamps): for sym, action, num in orders[dt.date(t.year, t.month, t.day)]: if action == 'Sell': num *= -1 portfolio.update(sym, num, close[sym][i]) entry = (t.year, t.month, t.day, portfolio.value(close, i)) values.append(entry) return values
def simulate(startDate, endDate, symbolsEq, allocationEq) : dt_timeofday = dt.timedelta(hours=16) ldt_timestamps = du.getNYSEdays(dt_start, dt_end, dt_timeofday) c_dataobj = da.DataAccess('Yahoo') ls_keys = ['open', 'high', 'low', 'close', 'volume', 'actual_close'] ldf_data = c_dataobj.get_data(ldt_timestamps, ls_symbols, ls_keys) d_data = dict(zip(ls_keys, ldf_data)) na_price = d_data['close'].values na_normalized_price = na_price / na_price[0, :] symbolSP = ["$SPX"] sp_data = c_dataobj.get_data(ldt_timestamps, symbolSP, ls_keys) sp_d_data = dict(zip(ls_keys, sp_data)) sp_price = sp_d_data['close'].values sp_price_normalized = sp_price / sp_price[0, :] dailyReturnSP = sp_price_normalized.copy() tsu.returnize0(dailyReturnSP) na_normalizedPriceAllocation = na_normalized_price*allocationEq na_sumRows = na_normalizedPriceAllocation.sum(axis=1) dailyReturn = na_sumRows.copy() tsu.returnize0(dailyReturn) avgDailyReturn = np.average(dailyReturn) dailyReturnStdDev = np.std(dailyReturn) sharpeRatio = np.sqrt(252)*avgDailyReturn/dailyReturnStdDev excessReturn = dailyReturn - dailyReturnSP avgExcessReturn = np.average(excessReturn) excessReturnStdDev = np.std(excessReturn) cumulativeReturn = na_sumRows[-1] return dailyReturnStdDev, avgDailyReturn, sharpeRatio, cumulativeReturn
def __init__(self,start,end,symbols): self.start = start self.end = end self.symbols = symbols timeofday = dt.timedelta(hours=16) timestamps = du.getNYSEdays(start, end, timeofday) keys = ['open', 'high', 'low', 'close', 'volume', 'actual_close'] # initialize query and get portfolio close data dataobj =da.DataAccess('Yahoo') raw_data = dataobj.get_data(timestamps, symbols, keys) close_data = dict(zip(keys, raw_data))["close"] # normalize data, get number of days, convert close data into array close_data_array = close_data.values close_data_array = close_data_array / close_data_array[0,:] # get reference data "$SPX" ref = da.DataAccess('Yahoo') ref_data = ref.get_data(timestamps, ["$SPX"], keys)[2] #reference close data ref_data_array = ref_data.values ref_data_array = ref_data_array / ref_data_array[0,:] self.timestamps = timestamps self.close_data_array = close_data_array self.spx_returns = ref_data_array
def main(argv): start_date = dt.datetime.strptime(argv[0], "%Y-%m-%d") end_date = dt.datetime.strptime(argv[1], "%Y-%m-%d") symbol_list = argv[2] #sp5002012 output_dir = argv[3] timestamps = du.getNYSEdays(start_date, end_date, dt.timedelta(hours = 16)) dataobj = da.DataAccess('Yahoo') symbols = dataobj.get_symbols_from_list(symbol_list) symbols.append('SPY') keys = ['close', 'actual_close'] data_dict = dict(zip(keys, dataobj.get_data(timestamps, symbols, keys))) for key in keys: data_dict[key] = data_dict[key].fillna(method = 'ffill') data_dict[key] = data_dict[key].fillna(method = 'bfill') data_dict[key] = data_dict[key].fillna(1.0) bollinger = bollinger_bands(symbols, data_dict['close'], timestamps) events = find_events(symbols, bollinger, timestamps) ep.eventprofiler( events, data_dict, i_lookback = 20, i_lookforward = 20, s_filename = file_name(output_dir, symbol_list, start_date, end_date), b_market_neutral = True, b_errorbars = True, s_market_sym = 'SPY' )
def get_data(symbols, dt_start, dt_end, ): dt_timeofday = dt.timedelta(hours=16) ldt_timestamps = du.getNYSEdays(dt_start, dt_end, dt_timeofday) c_dataobj = da.DataAccess('Yahoo') close_price = c_dataobj.get_data(ldt_timestamps, symbols, "close") return close_price.values
def simulate(startdate, enddate, ls_symbols, ls_alloc): dt_timeofday = dt.timedelta(hours = 16) ldt_timestamps = du.getNYSEdays(startdate, enddate, dt_timeofday) c_dataobj = da.DataAccess('Yahoo', cachestalltime = 0) ls_keys = ['open', 'high', 'low', 'close', 'volume', 'actual_close'] ldf_data = c_dataobj.get_data(ldt_timestamps, ls_symbols, ls_keys) d_data = dict(zip(ls_keys, ldf_data)) for s_key in ls_keys: d_data[s_key] = d_data[s_key].fillna(method='ffill') d_data[s_key] = d_data[s_key].fillna(method='bfill') d_data[s_key] = d_data[s_key].fillna(1.0) na_price = d_data['close'].values na_normalized_price = na_price / na_price[0,:] na_port_price = np.sum(ls_alloc * na_normalized_price, 1) na_daily_rets = na_port_price.copy() tsu.returnize0(na_daily_rets) vol = np.std(na_daily_rets) daily_ret = np.average(na_daily_rets) sharpe_ratio = np.sqrt(252) * daily_ret / vol cum_ret = na_port_price[-1] return vol, daily_ret, sharpe_ratio, cum_ret
def read_csv(file_name): ls_date = [] ls_symbols = [] # Read symbol list and read inital and end dates with open(file_name,'rU') as csv_in: order = csv.reader(csv_in,delimiter = ',') # for each row in .csv for row in order: ls_date.append([row[0],row[1],row[2]]) #concatenate the date so we have a 1D list to use Set() #ls_date.append(row[0]+row[1]+row[2]) ls_symbols.append(row[3]) #An equivalent way to do it is using numpy array #ls_symbols = np.append(ls_symbols,row[3]) #ls_date2 = list(Set(ls_date)) #ls_date = sorted(ls_date2, key = ls_date.index) # inital and final date ls_symbols = list(Set(ls_symbols)) start, end = ls_date[0], ls_date[len(ls_date)-1] dt_start, dt_end = str_dt(start), str_dt(end) # Create array of trading dates dt_timeofday = dt.timedelta(hours=16) # offset one day ldt_timestamps = du.getNYSEdays(dt_start, dt_end+dt.timedelta(days = 1), dt_timeofday) return ldt_timestamps, ls_symbols
def get_close_data(start_date, end_date, symbols): """ Returns the adjusted close prices of the symbols passed in @param start_date: start date to grab data from @param end_date: end date to grab data from @param time_of_day: nubmer of hours in the day @param symbols: symbols to get data for @return: list of adjust close prices, indexed by symbol and list of dates for trading """ # Grab the number of trading days dt_timeofday = dt.timedelta(hours=16) ldt_timestamps = du.getNYSEdays(start_date, end_date, dt_timeofday) # Grab data from QSTK data. Yahoo as the data source c_dataobj = da.DataAccess('Yahoo') ls_keys = ['open', 'high', 'low', 'close', 'volume', 'actual_close'] ldf_data = c_dataobj.get_data(ldt_timestamps, symbols, ls_keys) d_data = dict(zip(ls_keys, ldf_data)) for s_key in ls_keys: d_data[s_key] = d_data[s_key].fillna(method='ffill') d_data[s_key] = d_data[s_key].fillna(method='bfill') d_data[s_key] = d_data[s_key].fillna(1.0) close_price = d_data['close'].values return close_price, ldt_timestamps
def load(ls_symbols, ldt_timestamps): dataobj = da.DataAccess('Yahoo') #print ldt_timestamps #print ls_symbols ldt_timestamps.sort() print "---" print ldt_timestamps[0] print ldt_timestamps[-1] ldt_timestamps = du.getNYSEdays(ldt_timestamps[0], ldt_timestamps[-1], dt.timedelta(hours=16)) print "---" ls_keys = ['open', 'high', 'low', 'close', 'volume', 'actual_close'] ldf_data = dataobj.get_data(ldt_timestamps, ls_symbols, ls_keys) d_data = dict(zip(ls_keys, ldf_data)) for s_key in ls_keys: d_data[s_key] = d_data[s_key].fillna(method='ffill') d_data[s_key] = d_data[s_key].fillna(method='bfill') d_data[s_key] = d_data[s_key].fillna(1.0) #print d_data['close'].loc[ldt_timestamps[0]] #print d_data['close'].loc[ldt_timestamps[-1]] #print d_data['close'].loc[dt.datetime(2010,12,22,16)] return d_data
def simulate(startdate, enddate, symbols, percentage): dt_timeofday = dt.timedelta(hours=16) ldt_timestamps = du.getNYSEdays(startdate, enddate, dt_timeofday) c_dataobj = da.DataAccess('Yahoo', cachestalltime=0) ls_keys = ['open', 'high', 'low', 'close', 'volume', 'actual_close'] ldf_data = c_dataobj.get_data(ldt_timestamps, ls_symbols, ls_keys) d_data = dict(zip(ls_keys, ldf_data)) normalized_vals = d_data['close'].values / d_data['close'].values[0, :] indiv_daily_value = percentage*normalized_vals total_dv = indiv_daily_value.sum(axis=1) #calculate daily return #print total_dv B = total_dv[1:] A = total_dv[0:-1] C = (B/A)-1 dailyret = np.zeros(C.shape[0]+1) dailyret[1:] = C #print np.average(dailyret) sharpe = (math.sqrt(252)*np.average(dailyret))/np.std(dailyret) #print sharpe return (np.std(dailyret), np.average(dailyret), sharpe, (total_dv[-1]/total_dv[0]))
def read_data(start_date, end_date, ls_symbols): ''' read the prices for the specified symbols''' # We need closing prices so the timestamp should be hours=16. dt_timeofday = dt.timedelta(hours=16) # Get a list of trading days between the start and the end. ldt_timestamps = du.getNYSEdays(start_date, end_date, dt_timeofday) print "Reading data" # Creating an object of the dataaccess class with Yahoo as the source. c_dataobj = da.DataAccess('Yahoo', cachestalltime = 0) # Keys to be read from the data, it is good to read everything in one go. ls_keys = ['close'] # Reading the data, now d_data is a dictionary with the keys above. # Timestamps and symbols are the ones that were specified before. ldf_data = c_dataobj.get_data(ldt_timestamps, ls_symbols, ls_keys) print "Done" d_data = dict(zip(ls_keys, ldf_data)) # Filling the data for NAN for s_key in ls_keys: d_data[s_key] = d_data[s_key].fillna(method='ffill') d_data[s_key] = d_data[s_key].fillna(method='bfill') d_data[s_key] = d_data[s_key].fillna(1.0) # Getting the numpy ndarray of close prices. na_price = d_data['close'].values na_price_df = pd.DataFrame(na_price, columns = ls_symbols, index = d_data['close'].index) return(na_price_df)
def bollinger_bands(symbol,dt_start, dt_end,rolling_period): #define dates, data source, retrieve actual close price dt_end = du.getNextNNYSEdays(dt_end,1,dt.timedelta(hours = 16))[0] ldt_timestamps = du.getNYSEdays(dt_start, dt_end,dt.timedelta(hours=16)) c_dataobj = da.DataAccess('Yahoo', cachestalltime=0) ls_keys = ['actual_close','close'] ldf_data = c_dataobj.get_data(ldt_timestamps, symbol, ls_keys) d_data = dict(zip(ls_keys, ldf_data)) actual_close = d_data['actual_close'] #columbs: price, rolling average, rolling sttdev, bollinger_val bollinger_matrix = np.zeros((len(ldt_timestamps),4)) rolling_prices = deque(np.zeros(rolling_period)) #iterate through the actual for i in range(0, (len(ldt_timestamps))): bollinger_matrix[i,0] = actual_close[symbol[0]].ix[ldt_timestamps[i]] fx = rolling_prices.popleft() rolling_prices.append(bollinger_matrix[i,0]) if (i < (rolling_period -1)): #no rolling average yet bollinger_matrix[i,1] = np.nan #rolling avg bollinger_matrix[i,2] = np.nan # rolling stddev bollinger_matrix[i,3] = np.nan # bollinger_val else: bollinger_matrix[i,1] = np.average(rolling_prices) #rolling avg print bollinger_matrix[i,1] bollinger_matrix[i,2] = np.std(rolling_prices) # rolling stddev bollinger_matrix[i,3] = (bollinger_matrix[i,0] - bollinger_matrix[i,1])/bollinger_matrix[i,2] # bollinger_val: (price - rolling avg) / rolling stddev return bollinger_matrix, ldt_timestamps,actual_close
def _generate_data(self): year = 2009 startday = dt.datetime(year-1, 12, 1) endday = dt.datetime(year+1, 1, 31) l_symbols = ['$SPX'] #Get desired timestamps timeofday = dt.timedelta(hours = 16) ldt_timestamps = du.getNYSEdays(startday, endday, timeofday) dataobj = da.DataAccess('Norgate') self.df_close = dataobj.get_data( \ ldt_timestamps, l_symbols, "close", verbose=True) self.df_alloc = pand.DataFrame( \ index=[dt.datetime(year, 1, 1)], \ data=[-1], columns=l_symbols) for i in range(11): self.df_alloc = self.df_alloc.append( \ pand.DataFrame(index=[dt.datetime(year, i+2, 1)], \ data=[-1], columns=l_symbols)) self.df_alloc['_CASH'] = 0.0 #Based on hand calculation using the transaction costs and slippage. self.i_open_result = 0.7541428779600005
def portfolio_simulate(start_date, end_date, symbols, allocations): """ For the given portfolio and dates, calculate the std deviation of daily returns (volatility), the average daily return, the sharpe ratio, and the cumulative return """ # Generate timestamps for the NYSE closing times closing_time = dt.timedelta(hours=16) timestamps = date_util.getNYSEdays(start_date, end_date, closing_time) # Get adjusted closing prices #stock_dao = data_access.DataAccess('Yahoo', cachestalltime=0) stock_dao = data_access.DataAccess('Yahoo') stock_data_as_list_of_data_frames = stock_dao.get_data(timestamps, symbols, ['close']) portfolio_closing_values = stock_data_as_list_of_data_frames[0] # Calculate adjusted closing prices normalized relative to initial closing prices initial_portfolio_closing_values = portfolio_closing_values.values[0,:] portfolio_normalized_closing_values = portfolio_closing_values / initial_portfolio_closing_values # Calculated portfolio normalized values portfolio_normalized_weighted_closing_values = portfolio_normalized_closing_values * allocations portfolio_normalized_values = portfolio_normalized_weighted_closing_values.sum(axis=1) # Calculate the portfolio statistics cumulative_return = portfolio_normalized_values[-1] daily_returns = tsu.returnize0(portfolio_normalized_values) ave_daily_return = daily_returns.mean() std_deviation = daily_returns.std() sharpe_ratio = tsu.get_sharpe_ratio(daily_returns, 0.0)[0] return start_date, end_date, symbols, allocations, sharpe_ratio, std_deviation, ave_daily_return, cumulative_return
def getData(dt_start,dt_end,ls_name): # 1st Jan,2008 to 31st Dec, 2009. #dt_start = dt.datetime(2008, 1, 1) #dt_end = dt.datetime(2009, 12, 31) ldt_timestamps = du.getNYSEdays(dt_start, dt_end, dt.timedelta(hours=16)) dataobj = da.DataAccess('Yahoo') # #ls_symbols = dataobj.get_symbols_from_list('sp5002012') ls_symbols = dataobj.get_symbols_from_list(ls_name) ls_symbols.append('SPY') ls_keys = ['open', 'high', 'low', 'close', 'volume', 'actual_close'] ldf_data = dataobj.get_data(ldt_timestamps, ls_symbols, ls_keys) d_data = dict(zip(ls_keys, ldf_data)) #remove nan for s_key in ls_keys: d_data[s_key] = d_data[s_key].fillna(method = 'ffill') d_data[s_key] = d_data[s_key].fillna(method = 'bfill') d_data[s_key] = d_data[s_key].fillna(1.0) return d_data,ls_symbols
def simulate(startdate, enddate, equities, allocations): # Date timestamps ldt_timestamps = du.getNYSEdays(startdate, enddate, dt.timedelta(hours = 16)) # Data access c_dataobj = da.DataAccess('Yahoo') ls_keys = ['open', 'high', 'low', 'close', 'volume', 'actual_close'] ldf_data = c_dataobj.get_data(ldt_timestamps, equities, ls_keys) d_data = dict(zip(ls_keys, ldf_data)) # Calculate normalized close data close_data = d_data['close'].values normalized_close_data = close_data / close_data[0, :] # Make sure allocations is a column vector: allocations = np.array(allocations).reshape(len(allocations), 1) # Calculate portfolio close data portfolio_close_data = np.dot(normalized_close_data, allocations) # Calculate total return portfolio_close_data_copy = portfolio_close_data.copy() portfolio_normalized_cumulative_daily_return = np.sum(portfolio_close_data_copy, axis = 1) cum_ret = portfolio_normalized_cumulative_daily_return[-1] # Calculate volatility, average daily return and sharpe ratio portfolio_close_data_copy = portfolio_close_data.copy() tsu.returnize0(portfolio_close_data_copy) avg_daily_ret = np.mean(portfolio_close_data_copy) std_dev = np.std(portfolio_close_data_copy) sharpe = np.sqrt(252) * avg_daily_ret / std_dev return std_dev, avg_daily_ret, sharpe, cum_ret
def calculate_returns(): global values_matrix a = uniqueDates[1] b = uniqueDates[-1] + dt.timedelta(days=1) ldt_timestamps = du.getNYSEdays(a, b, dt.timedelta(hours=16)) print "calc", ldt_timestamps[0] print "calc", ldt_timestamps[-1] #dateList = [] #for x in range (0, numdays): # dateList.append(a + dt.timedelta(days = x)) print "------", uniqueSymbols values_matrix = pd.DataFrame(index=ldt_timestamps, columns=uniqueSymbols) values_matrix = values_matrix.fillna(0) for date in ldt_timestamps: #print d_data['close'].loc[date+dt.timedelta(hours=16)] #print holding_matrix.loc[date] #print date #print d_data['close'].loc[date] a = d_data['close'].loc[date] index = bisect.bisect(uniqueDates,date) # print date # print uniqueDates[index-1] b = holding_matrix.loc[uniqueDates[index-1]] #print a #print b c = a.mul(b) #print c values_matrix.loc[date]=c
def perform_step2(): global uniqueDates uniqueDates.sort() # print uniqueDates dt_start = uniqueDates[0] dt_end = uniqueDates[-1] + dt.timedelta(days=1) ldt_timestamps = du.getNYSEdays(dt_start, dt_end, dt.timedelta(hours=16)) dataobj = da.DataAccess('Yahoo') ls_symbols = uniqueSymbols # ls_symbols = dataobj.get_symbols_from_list('sp5002008') # ls_symbols.append('SPX') ls_symbols.append('_CASH') ls_keys = ['open', 'high', 'low', 'close', 'volume', 'close'] ldf_data = dataobj.get_data(ldt_timestamps, ls_symbols, ls_keys) global d_data d_data = dict(zip(ls_keys, ldf_data)) for s_key in ls_keys: d_data[s_key] = d_data[s_key].fillna(method='ffill') d_data[s_key] = d_data[s_key].fillna(method='bfill') d_data[s_key] = d_data[s_key].fillna(1.0) d_data['_CASH'] = 1.0 uniqueDates = [0] + uniqueDates
def getPrices(startDate, endDate, symbols, fields, fillna=True, isSymbolsList=False, includeLastDay=True): """ reads stock prices from Yahoo the prices returned INCLUDE the endDate @param isSymbolsList: whether the symbols passed in is a stock symbol or a list symbol (e.g. sp5002012). If true, symbols can contain only one symbol. @return prices with NaNs filled (forward, backward, 1.0) """ assert not isSymbolsList or isinstance(symbols, str) or len(symbols) == 1, \ 'When isSymbolsList is true, symbols can only contain one symbol.' if includeLastDay: endDate += timedelta(days=1) dataReader = DataAccess('Yahoo') timeStamps = getNYSEdays(startDate, endDate, timedelta(hours=16)) if isSymbolsList: symbols = dataReader.get_symbols_from_list(symbols if isinstance(symbols, str) else symbols[0]) data = dataReader.get_data(timeStamps, symbols, fields) if fillna: data = fillNA(data) # data.index = pd.Series(data.index) - timedelta(hours=16) # remove 16 from the dates return data
def fetchNYSEData(dt_start, dt_end, ls_symbols): # The Time of Closing is 1600 hrs dt_timeofday = dt.timedelta(hours=16) # Get a list of trading days between the start and the end. ldt_timestamps = du.getNYSEdays(dt_start, dt_end, dt_timeofday) # Creating an object of the dataaccess class with Yahoo as the source. c_dataobj = da.DataAccess('Yahoo', cachestalltime=0) # Keys to be read from the data, it is good to read everything in one go. ls_keys = ['open', 'high', 'low', 'close', 'volume', 'actual_close'] # Reading the data, now d_data is a dictionary with the keys above. # Timestamps and symbols are the ones that were specified before. ldf_data = c_dataobj.get_data(ldt_timestamps, ls_symbols, ls_keys) d_data = dict(zip(ls_keys, ldf_data)) timestampsForNYSEDays = d_data['close'].index # Filling the data for NAN for s_key in ls_keys: d_data[s_key] = d_data[s_key].fillna(method='ffill') d_data[s_key] = d_data[s_key].fillna(method='bfill') d_data[s_key] = d_data[s_key].fillna(1.0) # Getting the numpy ndarray of close prices. na_price = d_data['close'].values # returning the closed prices for all the days return na_price, ldt_timestamps
def main(): dt_start = dt.datetime(2008, 1, 1) dt_end = dt.datetime(2009, 12, 31) ldt_timestamps = du.getNYSEdays(dt_start, dt_end, dt.timedelta(hours=16)) global dataObj ls_symbols_2012 = dataObj.get_symbols_from_list('sp5002012') ls_symbols_2012.append('SPY') ls_symbols_2008 = dataObj.get_symbols_from_list('sp5002008') ls_symbols_2008.append('SPY') #lf_priceDrop2008 = [5.0, 6.0, 7.0, 8.0, 9.0, 10.0] #lf_priceDrop2012 = [5.0, 6.0, 7.0, 8.0, 9.0, 10.0] lf_priceDrop2008 = [7.0, 8.0, 10.0] lf_priceDrop2012 = [6.0, 7.0, 9.0, 10.0] try: #thread.start_new_thread(create_study, (ls_symbols_2008, ldt_timestamps, '2008StudyPriceDrop',lf_priceDrop2008)) #thread.start_new_thread(create_study, (ls_symbols_2012, ldt_timestamps, '2012StudyPriceDrop',lf_priceDrop2012)) create_study(ls_symbols_2008, ldt_timestamps, '2008StudyPriceDrop',lf_priceDrop2008) create_study(ls_symbols_2012, ldt_timestamps, '2012StudyPriceDrop',lf_priceDrop2012) except: print "Error: unable to start thread"
def simulate(startdate, enddate, ls_symbols, allocation): # We need closing prices so the timestamp should be hours=16. dt_timeofday = dt.timedelta(hours=16) # Get a list of trading days between the start and the end. ldt_timestamps = du.getNYSEdays(dt_start, dt_end, dt_timeofday) # Creating an object of the dataaccess class with Yahoo as the source. c_dataobj = da.DataAccess('Yahoo',cachestalltime=0) ls_keys = ['open', 'high', 'low', 'close', 'volume', 'actual_close'] ldf_data = c_dataobj.get_data(ldt_timestamps, ls_symbols, ls_keys) d_data = dict(zip(ls_keys, ldf_data)) # You need to normalize na_price = d_data['close'].values / d_data['close'].values[0] na_price *= allocation na_price = na_price.sum(axis=1) r = tsu.returnize0(na_price.copy()) avg_daily_return = np.average(r) std = np.std(r) sharpe_ratio = avg_daily_return/std *math.sqrt(252) cul_return = na_price[-1] / na_price[0] return std,avg_daily_return,sharpe_ratio,cul_return
def run(dt_start, dt_end, ls_symbols, alloc): dt_timeofday = dt.timedelta(hours=16) ldt_timestamps = du.getNYSEdays(dt_start, dt_end, dt_timeofday) ls_keys = ['open', 'high', 'low', 'close', 'volume', 'actual_close'] c_dataobj = da.DataAccess('Yahoo') ldf_data = c_dataobj.get_data(ldt_timestamps, ls_symbols, ls_keys) d_data = dict(zip(ls_keys, ldf_data)) na_price = d_data['close'].values na_normalized_prices = na_price / na_price[0, :] na_daily_returns = tsu.returnize0(na_normalized_prices.copy()) na_allocated_daily_ret = na_daily_returns * alloc print 'Allocated-Adjusted Daily Returns' print na_allocated_daily_ret na_total_daily_ret = np.sum(na_allocated_daily_ret, 1) print 'Total Daily Returns: ' print na_total_daily_ret std_dev = np.std(na_total_daily_ret) print 'Standard Deviation (Vol): ' + str(std_dev) avg_daily_ret = np.average(na_total_daily_ret) print 'Average Daily Return: ' + str(avg_daily_ret) sharpe = calc_sharpe_ratio(avg_daily_ret, std_dev) print 'Sharpe: ' + str(sharpe) cum_ret = calc_cum_return(na_total_daily_ret) print 'Cumulative Return: ' + str(cum_ret) return std_dev, avg_daily_ret, sharpe, cum_ret
def simulate(start_date, end_date, ls_symbols): print "Start Date: %s" % start_date print "End Date: %s" % end_date print "Symbols: %s" % ls_symbols dt_timeofday = dt.timedelta(hours=16) ldt_timestamps = du.getNYSEdays(dt_start, dt_end, dt_timeofday) # get data c_dataobj = da.DataAccess('Yahoo') ls_keys = ['open', 'high', 'low', 'close', 'volume', 'actual_close'] ldf_data = c_dataobj.get_data(ldt_timestamps, ls_symbols, ls_keys) d_data = dict(zip(ls_keys, ldf_data)) # fill forward df_rets = d_data['close'].copy() df_rets = df_rets.fillna(method='ffill') df_rets = df_rets.fillna(method='bfill') # extract prices as values na_price = df_rets.values # calculate normalized prices as cumulative return na_normalized_price = na_price / na_price[0, :] #print na_normalized_price # create array with legal combinations of the portfolio shares = [0.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0] legal_ports = [] set = [p for p in itertools.product(shares, repeat=len(ls_symbols))] for x in set: if sum(x) == 1.0: legal_ports.append(list(x)) #print legal_ports highest_sharpe = 0 for comb in legal_ports: na_port_rets = np.sum(na_normalized_price * comb, axis=1) #print na_port_rets normalized_portfolio_return = na_port_rets.copy() port_daily_rets = tsu.returnize0(normalized_portfolio_return) #print port_daily_rets #na_port_daily_rets = np.sum(daily_ret*ls_alloc, axis=1) # calculate metrics port_mean = mean(port_daily_rets.copy()) port_std = std(port_daily_rets.copy()) sharpe_port = sqrt(252) * (port_mean / port_std.copy()) if sharpe_port > highest_sharpe: highest_sharpe = sharpe_port.copy() opt_alloc = comb print "Volatility (stdev of daily returns): %f" % port_std print "Sharpe Ratio: %f" % highest_sharpe print "Optimal Alloc: %s" % opt_alloc print "Average daily return: %f" % port_mean print "Cumulative Return: %f" % na_port_rets[-1] return opt_alloc
orders_csv_pd = orders_csv_pd.sort(['Date']) orders_csv_pd = orders_csv_pd.reset_index(drop=True) orders_csv_np = orders_csv_pd.values[:, 1:] syms = set(orders_csv_np[:, 1]) for i in range(len(orders_csv_pd['Date'])): orders_csv_pd['Date'][i] = parser.parse(orders_csv_pd['Date'][i]) orders_csv_pd['Date'] = pd.DatetimeIndex(orders_csv_pd['Date']) start_date = orders_csv_pd['Date'].min() - dt.timedelta(hours=16) end_date = orders_csv_pd['Date'].max() keys = ['actual_close', 'close'] database = da.DataAccess('Yahoo') closetime = dt.timedelta(hours=16) opentimes = du.getNYSEdays(start_date, end_date, closetime) prices = database.get_data(opentimes, syms, keys) prices = dict(zip(keys, prices)) prices_close = prices['close'] prices_close = prices_close.fillna(method='ffill') prices_close = prices_close.fillna(method='bfill') cash = 50000 casharray = copy.deepcopy(prices_close) * 0 casharray = casharray[list(syms)[0]] own = copy.deepcopy(prices_close) * 0 valuearray = casharray.copy() for row in range(len(orders_csv_pd)): if orders_csv_pd.ix[row]['Order'] == 'Buy': own[orders_csv_pd.ix[row]['Sym']][
@author: Glacier ''' import QSTK.qstkutil.qsdateutil as du import QSTK.qstkutil.DataAccess as da import datetime as dt import matplotlib.pyplot as plt import pandas import numpy as np dataobj=da.DataAccess('ML4Trading') ls_symbols=dataobj.get_all_symbols() #print "All symbols: ",ls_symbols symbols_toread=['ML4T-000'] ''' ldt_timestamps=[] ldt_timestamps.append(dt.datetime(2012,9,12,16)) ''' dtstart=dt.datetime(2012,8,01) dtend=dt.datetime(2012,9,13) ldttimestamps=du.getNYSEdays(dtstart,dtend,dt.timedelta(hours=16)) lsKeys = ['open', 'high', 'low', 'close', 'volume'] # square bracket will be wrong ldfdata=dataobj.get_data(ldttimestamps,symbols_toread,lsKeys) #df_close = dataobj.get_data(ldttimestamps, symbols_toread, lsKeys) ldfdata=np.array(ldfdata[0]) print ldfdata
delimiter=',', skipinitialspace=True) for row in order_reader: date = dt.datetime(int(row[0]), int(row[1]), int(row[2]), 16) o = Order(action=row[4], date=date, tick=row[3], shares=row[5]) order_list.append(o) # order_list needs to be sorted. Otherwise the algorithm won't work. date_list = [x.date for x in order_list] date_list.sort() dt_start = date_list[0] dt_end = date_list[-1] tick_set = sets.Set([x.tick for x in order_list]) ls_symbols = ['$SPX'] while (tick_set): ls_symbols.append(tick_set.pop()) ldt_timestamps = du.getNYSEdays(dt_start, dt_end, dt_timeofday) all_stocks = get_tickdata(ls_symbols=ls_symbols, ldt_timestamps=ldt_timestamps) pf = Portfolio(equities=all_stocks, cash=cash, dates=ldt_timestamps, order_list=order_list) pf.sim() equity_col = ['buy', 'sell', 'close'] pf.csvwriter(csv_file=value_file, d=',', cash=False) print "Details of the Performance of the portfolio :" print "Data Range :", ldt_timestamps[0], "to", ldt_timestamps[-1] print "Sharpe Ratio of Fund :", pf.sharpe() print "Sortino Ratio of Fund :", pf.sortino() print "Sharpe Ratio of $SPX :", pf.equities['$SPX'].sharpe() print "Total Return of Fund :", pf.totalrtn()
def calculateBBands(startdate="January 1,2010", enddate="December 31, 2010", ls_symbols = ["GOOG"], lookBack = 20, plot=False): # Start and End date of the charts dt_start = dtParser.parse(startdate) #print dt_start dt_end=dtParser.parse(enddate) #print dt_end # We need closing prices so the timestamp should be hours=16. dt_timeofday = dt.timedelta(hours=16) # Get a list of trading days between the start and the end. ldt_timestamps = du.getNYSEdays(dt_start, dt_end, dt_timeofday) ldt_timestamps.sort() # Creating an object of the dataaccess class with Yahoo as the source. c_dataobj = da.DataAccess('Yahoo') # Keys to be read from the data, it is good to read everything in one go. ls_keys = ['open', 'high', 'low', 'close', 'volume', 'actual_close'] # Reading the data, now d_data is a dictionary with the keys above. # Timestamps and symbols are the ones that were specified before. ldf_data = c_dataobj.get_data(ldt_timestamps, ls_symbols, ls_keys) #define dictionary (relate ls_keys to data columns) d_data = dict(zip(ls_keys, ldf_data)) # Filling the data for NAN for s_key in ls_keys: d_data[s_key] = d_data[s_key].fillna(method='ffill') d_data[s_key] = d_data[s_key].fillna(method='bfill') d_data[s_key] = d_data[s_key].fillna(1.0) # Getting the numpy ndarray of close prices. na_price = d_data['close'].values rolling_mean = pd.rolling_mean(na_price, lookBack) rolling_std = pd.rolling_std(na_price, lookBack) up_band = rolling_mean + rolling_std low_band = rolling_mean - rolling_std boll_val = (na_price-rolling_mean)/rolling_std dt_boll_val = pd.DataFrame(boll_val) dt_boll_val.index=ldt_timestamps dt_boll_val.columns=ls_symbols print dt_boll_val.tail(5) if plot==True: fig, axes = plt.subplots(nrows=2) # Plotting the plot of daily returns plt.clf() plt.figure(1) plt.subplot(211) plt.plot(ldt_timestamps, up_band) plt.plot(ldt_timestamps, na_price) plt.plot(ldt_timestamps, low_band) plt.axhline(y=0, color='r') plt.legend(['up','Prices','low'], loc=4) plt.ylabel('Rolling Mean') plt.xlabel('Dates') plt.subplot(212) plt.plot(ldt_timestamps, up_band) plt.plot(ldt_timestamps, rolling_mean) plt.plot(ldt_timestamps, low_band) plt.axhline(y=0, color='r') plt.legend(['up', 'rolling Mean','low'], loc=4) plt.ylabel('BBands') plt.xlabel('Dates') plt.savefig('bbands.pdf', format='pdf') return dt_boll_val
import pandas as pd import time, sys import numpy as np import argparse def datetimeNumpy(array): return dt.datetime(int(float(array[0])), int(float(array[1])), int(float(array[2])), 16) if __name__ == '__main__': inputFile = sys.argv[1] symbol = sys.argv[2] # Year, month, Symbol, Order, amount value = np.loadtxt(inputFile, delimiter=',', dtype='str' ) dates = np.apply_along_axis( datetimeNumpy, axis=1, arr=value ) start, end = min(dates), max(dates) ldt=du.getNYSEdays(start, end, dt.timedelta(hours=16)) c_dataobj = da.DataAccess('Yahoo') ls_keys = ['open', 'high', 'low', 'close', 'volume', 'actual_close'] ldf_data = c_dataobj.get_data(ldt, [symbol], ls_keys) d_data = dict(zip(ls_keys, ldf_data)) for s_key in ls_keys: d_data[s_key] = d_data[s_key].fillna(method='ffill') d_data[s_key] = d_data[s_key].fillna(method='bfill') d_data[s_key] = d_data[s_key].fillna(1.0) na_price = d_data['close'] na_price['Custom'] = value[:,3] na_price=na_price.values.astype(float) na_normalized_price = na_price / na_price[0, :] # Plotting the prices with x-axis=timestamps plt.clf()
def main(): ''' Main Function''' # Reading the portfolio na_portfolio = np.loadtxt('tutorial3portfolio.csv', dtype='S5,f4', delimiter=',', comments="#", skiprows=1) print(na_portfolio) # Sorting the portfolio by symbol name na_portfolio = sorted(na_portfolio, key=lambda x: x[0]) print(na_portfolio) # Create two list for symbol names and allocation ls_port_syms = [] lf_port_alloc = [] for port in na_portfolio: ls_port_syms.append(port[0]) lf_port_alloc.append(port[1]) # Creating an object of the dataaccess class with Yahoo as the source. c_dataobj = da.DataAccess('Yahoo') ls_all_syms = c_dataobj.get_all_symbols() # Bad symbols are symbols present in portfolio but not in all syms ls_bad_syms = list(set(ls_port_syms) - set(ls_all_syms)) if len(ls_bad_syms) != 0: print("Portfolio contains bad symbols : ", ls_bad_syms) for s_sym in ls_bad_syms: i_index = ls_port_syms.index(s_sym) ls_port_syms.pop(i_index) lf_port_alloc.pop(i_index) # Reading the historical data. dt_end = dt.datetime(2011, 1, 1) dt_start = dt_end - dt.timedelta(days=1095) # Three years # We need closing prices so the timestamp should be hours=16. dt_timeofday = dt.timedelta(hours=16) # Get a list of trading days between the start and the end. ldt_timestamps = du.getNYSEdays(dt_start, dt_end, dt_timeofday) # Keys to be read from the data, it is good to read everything in one go. ls_keys = ['open', 'high', 'low', 'close', 'volume', 'actual_close'] # Reading the data, now d_data is a dictionary with the keys above. # Timestamps and symbols are the ones that were specified before. ldf_data = c_dataobj.get_data(ldt_timestamps, ls_port_syms, ls_keys) d_data = dict(list(zip(ls_keys, ldf_data))) # Copying close price into separate dataframe to find rets df_rets = d_data['close'].copy() # Filling the data. df_rets = df_rets.fillna(method='ffill') df_rets = df_rets.fillna(method='bfill') df_rets = df_rets.fillna(1.0) # Numpy matrix of filled data values na_rets = df_rets.values # returnize0 works on ndarray and not dataframes. tsu.returnize0(na_rets) # Estimate portfolio returns na_portrets = np.sum(na_rets * lf_port_alloc, axis=1) na_port_total = np.cumprod(na_portrets + 1) na_component_total = np.cumprod(na_rets + 1, axis=0) # Plotting the results plt.clf() fig = plt.figure() fig.add_subplot(111) plt.plot(ldt_timestamps, na_component_total, alpha=0.4) plt.plot(ldt_timestamps, na_port_total) ls_names = ls_port_syms ls_names.append('Portfolio') plt.legend(ls_names) plt.ylabel('Cumulative Returns') plt.xlabel('Date') fig.autofmt_xdate(rotation=45) plt.savefig('tutorial3.pdf', format='pdf')
def main(): """ This demo is for simulating the strategy Variables """ dt_start = dt.datetime(2008, 1, 1) dt_end = dt.datetime(2009, 12, 31) sym_list = 'sp5002012.txt' market_sym = 'SPY' starting_cash = 100000 bol_period = 20 print "Setting Up ..." # Obtatining data from Yahoo ldt_timestamps = du.getNYSEdays(dt_start, dt_end, dt.timedelta(hours=16)) dataobj = da.DataAccess('Yahoo') ls_symbols = load_symlists(sym_list) ls_symbols.append(market_sym) """ key values. Creating a dictionary. """ ls_keys = ['open', 'high', 'low', 'close', 'volume', 'actual_close'] ldf_data = dataobj.get_data(ldt_timestamps, ls_symbols, ls_keys) d_data = dict(zip(ls_keys, ldf_data)) """ fill out N/A values """ for s_key in ls_keys: d_data[s_key] = d_data[s_key].fillna(method='ffill') d_data[s_key] = d_data[s_key].fillna(method='bfill') d_data[s_key] = d_data[s_key].fillna(1.0) """ df_close contains only a close column. """ df_close = d_data['close'] df_volume = d_data['volume'] print "Finding Events ..." ''' Finding the event dataframe ''' ts_market = df_close['SPY'] # Creating an empty dataframe df_events = copy.deepcopy(df_close) * 0 # Time stamps for the event range ldt_timestamps = df_close.index rolling_mean = pd.rolling_mean(df_close, window=bol_period) rolling_std = pd.rolling_std(df_close, window=bol_period) rolling_mean_vol = pd.rolling_mean(df_volume, window=bol_period) rolling_std_vol = pd.rolling_std(df_volume, window=bol_period) ''' finding_events starts here ''' bol_clo = (df_close - rolling_mean) / rolling_std for s_sym in ls_symbols: for i in range(1, len(ldt_timestamps) - 5): bol_tod = bol_clo[s_sym].loc[ldt_timestamps[i]] bol_yes = bol_clo[s_sym].loc[ldt_timestamps[i - 1]] bol_tod_mark = bol_clo["SPY"].loc[ldt_timestamps[i]] if (bol_tod <= -2.0 and bol_yes >= -2.0 and bol_tod_mark >= 1.0): for delay in range(5): df_events[s_sym].loc[ldt_timestamps[i + delay]] += ( 10000.00 / df_close[s_sym].loc[ldt_timestamps[i]]) if df_close[s_sym].loc[ldt_timestamps[ i + delay]] > df_close[s_sym].loc[ldt_timestamps[i]]: break print "Starting Simulation ..." ls_symbols_red = [] for sym in ls_symbols: for i in range(len(ldt_timestamps)): if df_events[sym].loc[ldt_timestamps[i]] > 0: ls_symbols_red.append(sym) break ''' value and cash are zero arrays ''' # df_orders = copy.deepcopy(df_events) print "ls_symbols_red", ls_symbols_red df_orders = df_events[ls_symbols_red] value = copy.deepcopy(df_events) * 0 cash = copy.deepcopy(value[market_sym]) ''' Update value ''' print "Updating Value and Cash Array..." for s_sym in ls_symbols_red: for i in range(len(ldt_timestamps)): ind_time = ldt_timestamps[i] if i == 0: if df_orders[s_sym].loc[ind_time] > 0: sym_value = df_orders[s_sym].loc[ind_time] * df_close[ s_sym].loc[ind_time] value[s_sym].loc[ind_time] = sym_value cash[ind_time] -= sym_value else: ind_time_yest = ldt_timestamps[i - 1] if df_orders[s_sym].loc[ind_time] > 0 and df_orders[s_sym].loc[ ind_time_yest] == 0: sym_value = df_orders[s_sym].loc[ind_time] * df_close[ s_sym].loc[ind_time] value[s_sym].loc[ind_time] = sym_value cash[ind_time] -= sym_value elif df_orders[s_sym].loc[ind_time_yest] > 0: sym_value = df_orders[s_sym].loc[ind_time] * df_close[ s_sym].loc[ind_time] value[s_sym].loc[ind_time] = sym_value cash[ind_time] -= (df_orders[s_sym].loc[ind_time] - df_orders[s_sym].loc[ind_time_yest] ) * df_close[s_sym].loc[ind_time_yest] ''' Update cash ''' print "Modifying Cash Array..." cash[ldt_timestamps[0]] += starting_cash for i in range(1, len(ldt_timestamps)): ind_prev = cash[ldt_timestamps[i - 1]] ind_curr = cash[ldt_timestamps[i]] cash[ldt_timestamps[i]] = ind_curr + ind_prev cash.to_csv("c:/cash.csv", sep=",", mode="w") value.to_csv("c:/value.csv", sep=",", mode="w") print "Updating Total..." for i in range(len(ldt_timestamps)): sym_sum = 0 for s_sym in ls_symbols_red: sym_sum += value[s_sym].ix[ldt_timestamps[i]] cash[ldt_timestamps[i]] += sym_sum cash_raw = copy.deepcopy(cash) cash.to_csv("c:/total.csv", sep=",", mode="w") ts_market.to_csv("c:/ts_market.csv", sep=",", mode="w") cash /= cash[0] ts_market /= ts_market[0] print "Calculating Total Return..." tot_ret_fund = cash[-1] tot_ret_mark = ts_market[-1] print "Calculating Volatility..." ''' Create new array for fund and market ''' daily_ret_fund = np.zeros((len(ldt_timestamps), 1)) daily_ret_mark = copy.deepcopy(daily_ret_fund) for i in range(1, len(ldt_timestamps)): daily_ret_fund[ i] = cash[ldt_timestamps[i]] / cash[ldt_timestamps[i - 1]] - 1 daily_ret_mark[i] = ts_market[ldt_timestamps[i]] / ts_market[ ldt_timestamps[i - 1]] - 1 vol_fund = np.std(daily_ret_fund) vol_mark = np.std(daily_ret_mark) print "Calculating Average Daily Return..." avg_ret_fund = np.average(daily_ret_fund) avg_ret_mark = np.average(daily_ret_mark) print "Calculating Sharpe Ratio..." sharpe_fund = np.sqrt(252) * avg_ret_fund / vol_fund sharpe_mark = np.sqrt(252) * avg_ret_mark / vol_mark print "Start Date:", dt_start print "End Date :", dt_end print " " print "Sharpe Ratio of Fund: ", sharpe_fund print "Sharpe Ratio of $SPX: ", sharpe_mark print " " print "Total Return of Fund: ", tot_ret_fund print "Total Return of $SPX: ", tot_ret_mark print " " print "Standard Deviation of Fund: ", vol_fund print "Standard Deviation of $SPX: ", vol_mark print " " print "Average Daily Return of Fund: ", avg_ret_fund print "Average Daily Return of $SPX: ", avg_ret_mark plt.plot(cash.index, cash, 'r', ts_market.index, ts_market, 'b') plt.show()
def main(): ''' Main Function''' # List of symbols ls_symbols = ["AAPL", "GLD", "GOOG", "$SPX", "XOM"] # Start and End date of the charts dt_start = dt.datetime(2010, 1, 1) dt_end = dt.datetime(2010, 1, 15) # We need closing prices so the timestamp should be hours=16. dt_timeofday = dt.timedelta(hours=16) # Get a list of trading days between the start and the end. ldt_timestamps = du.getNYSEdays(dt_start, dt_end, dt_timeofday) # Creating an object of the dataaccess class with Yahoo as the source. c_dataobj = da.DataAccess('Yahoo') # Keys to be read from the data, it is good to read everything in one go. ls_keys = ['open', 'high', 'low', 'close', 'volume', 'actual_close'] # Reading the data, now d_data is a dictionary with the keys above. # Timestamps and symbols are the ones that were specified before. ldf_data = c_dataobj.get_data(ldt_timestamps, ls_symbols, ls_keys) d_data = dict(zip(ls_keys, ldf_data)) # Filling the data for NAN for s_key in ls_keys: d_data[s_key] = d_data[s_key].fillna(method='ffill') d_data[s_key] = d_data[s_key].fillna(method='bfill') d_data[s_key] = d_data[s_key].fillna(1.0) # Getting the numpy ndarray of close prices. na_price = d_data['close'].values # Plotting the prices with x-axis=timestamps plt.clf() plt.plot(ldt_timestamps, na_price) plt.legend(ls_symbols) plt.ylabel('Adjusted Close') plt.xlabel('Date') plt.savefig('adjustedclose.pdf', format='pdf') # Normalizing the prices to start at 1 and see relative returns na_normalized_price = na_price / na_price[0, :] # Plotting the prices with x-axis=timestamps plt.clf() plt.plot(ldt_timestamps, na_normalized_price) plt.legend(ls_symbols) plt.ylabel('Normalized Close') plt.xlabel('Date') plt.savefig('normalized.pdf', format='pdf') # Copy the normalized prices to a new ndarry to find returns. na_rets = na_normalized_price.copy() # Calculate the daily returns of the prices. (Inplace calculation) # returnize0 works on ndarray and not dataframes. tsu.returnize0(na_rets) # Plotting the plot of daily returns plt.clf() plt.plot(ldt_timestamps[0:50], na_rets[0:50, 3]) # $SPX 50 days plt.plot(ldt_timestamps[0:50], na_rets[0:50, 4]) # XOM 50 days plt.axhline(y=0, color='r') plt.legend(['$SPX', 'XOM']) plt.ylabel('Daily Returns') plt.xlabel('Date') plt.savefig('rets.pdf', format='pdf') # Plotting the scatter plot of daily returns between XOM VS $SPX plt.clf() plt.scatter(na_rets[:, 3], na_rets[:, 4], c='blue') plt.ylabel('XOM') plt.xlabel('$SPX') plt.savefig('scatterSPXvXOM.pdf', format='pdf') # Plotting the scatter plot of daily returns between $SPX VS GLD plt.clf() plt.scatter(na_rets[:, 3], na_rets[:, 1], c='blue') # $SPX v GLD plt.ylabel('GLD') plt.xlabel('$SPX') plt.savefig('scatterSPXvGLD.pdf', format='pdf')
def main(argv): orders_file = argv[0] values_file = argv[1] symbols = [] dates = [] order_file = [] reader = csv.reader(open(orders_file, 'rU'), delimiter=",") for row in reader: order_file.append(row) symbols.append(row[3]) dates.append(map(int, row[:3])) symbols.append("_CASH") uniq_sym = sorted(list(set(symbols))) dt_start = dt.datetime(dates[0][0], dates[0][1], dates[0][2]) dt_end = dt.datetime(dates[-1][0], dates[-1][1], dates[-1][2]) dt_end_read = dt_end + dt.timedelta(days=1) data_obj = da.DataAccess('Yahoo') ls_keys = ['close', 'actual_close'] ldt_timestamps = du.getNYSEdays(dt_start, dt_end_read, dt.timedelta(hours=16)) ldf_data = data_obj.get_data(ldt_timestamps, uniq_sym, ls_keys) d_data = dict(zip(ls_keys, ldf_data)) trades_data = pd.DataFrame(index=list(ldt_timestamps), columns=list(uniq_sym)) curr_stocks = dict() for sym in uniq_sym: curr_stocks[sym] = 0 trades_data[sym][ldt_timestamps[0]] = 0 curr_cash = 1000000 trades_data["_CASH"][ldt_timestamps[0]] = curr_cash for index, row in enumerate(order_file): curr_date = dt.datetime(dates[index][0], dates[index][1], dates[index][2], 16) sym = row[3] stock_value = int(d_data['close'][sym][curr_date]) quantity = int(row[5]) position = row[4] if position == "Buy": curr_cash -= stock_value * quantity trades_data["_CASH"][curr_date] = curr_cash curr_stocks[sym] += quantity trades_data[sym][curr_date] = curr_stocks[sym] else: curr_cash += stock_value * quantity trades_data["_CASH"][curr_date] = curr_cash curr_stocks[sym] -= quantity trades_data[sym][curr_date] = curr_stocks[sym] trades_data = trades_data.fillna(method="pad") writer = csv.writer(open(values_file, 'wb'), delimiter=',') for curr_date in trades_data.index: value_of_portfolio = 0 for sym in uniq_sym: if sym == "_CASH": value_of_portfolio += trades_data[sym][curr_date] else: value_of_portfolio += trades_data[sym][curr_date] * int( d_data['close'][sym][curr_date]) writer.writerow([curr_date, value_of_portfolio])
values_fund, stddev_fund, avgret_fund, sharpe_fund, return_fund = calculate_for( fund_values) print "The final value of the portfolio using the sample file is -- %s" % last_line print "" print "Details of the Performance of the portfolio" print "" print "Data Range : %s to %s" % (start_date, end_date) print "" print "Sharpe Ratio of Fund : %f" % sharpe_fund print "Sharpe Ratio of %s : %f" % (comparison[0], sharpe_comp) print "" print "Total Return of Fund : %f" % return_fund print "Total Return of %s : %f" % (comparison[0], return_comp) print "" print "Standard Deviation of Fund : %f" % stddev_fund print "Standard Deviation of %s : %f" % (comparison[0], stddev_comp) print "" print "Average Daily Return of Fund : %f" % avgret_fund print "Average Daily Return of %s : %f" % (comparison[0], avgret_comp) ldt_timestamps = du.getNYSEdays(start_date, end_date + dt.timedelta(1), dt.timedelta(hours=16)) plt.plot(ldt_timestamps, values_fund, label='Portfolio') plt.plot(ldt_timestamps, values_comp, label=comparison[0]) plt.legend() plt.ylabel('Returns') plt.xlabel('Date') plt.savefig('homework3.pdf', format='pdf')
f = open('2008Dow30.txt') lsSymTrain = f.read().splitlines() + ['$SPX'] f.close() f = open('2010Dow30.txt') lsSymTest = f.read().splitlines() + ['$SPX'] f.close() lsSym = list(set(lsSymTrain).union(set(lsSymTest))) dtStart = dt.datetime(2008,0o1,0o1) dtEnd = dt.datetime(2010,12,31) norObj = da.DataAccess('Norgate') ldtTimestamps = du.getNYSEdays( dtStart, dtEnd, dt.timedelta(hours=16) ) lsKeys = ['open', 'high', 'low', 'close', 'volume'] ldfData = norObj.get_data( ldtTimestamps, lsSym, lsKeys ) #this line is important even though the ret value is not used for temp in ldfData: temp.fillna(method="ffill").fillna(method="bfill") ldfDataTrain = norObj.get_data( ldtTimestamps, lsSymTrain, lsKeys ) ldfDataTest = norObj.get_data( ldtTimestamps, lsSymTest, lsKeys) for temp in ldfDataTrain: temp.fillna(method="ffill").fillna(method="bfill") for temp in ldfDataTest:
import QSTK.qstkutil.qsdateutil as du import QSTK.qstkutil.tsutil as tsu import QSTK.qstkutil.DataAccess as da import datetime as dt import matplotlib.pyplot as plt import pandas from pylab import * # # Prepare to read the data # symbols = ["AAPL","GOOG","IBM","MSFT"] startday = dt.datetime(2010,1,1) endday = dt.datetime(2010,12,31) timeofday=dt.timedelta(hours=16) timestamps = du.getNYSEdays(startday,endday,timeofday) dataobj = da.DataAccess('Yahoo') voldata = dataobj.get_data(timestamps, symbols, "volume") adjcloses = dataobj.get_data(timestamps, symbols, "close") actualclose = dataobj.get_data(timestamps, symbols, "actual_close") #adjcloses = adjcloses.fillna() adjcloses = adjcloses.fillna(method='backfill') rolling_mean = pandas.rolling_mean(adjcloses,20,min_periods=20) rolling_std = pandas.rolling_std(adjcloses,20,min_periods=20) bollinger_val = pandas.DataFrame(index=timestamps,columns=symbols) for i in range(len(rolling_std[symbols[0]])): if rolling_std[symbols[0]][i] > 0:
def yahoo_read_data(comparison): ldt_timestamps = du.getNYSEdays(start_date, end_date + dt.timedelta(1), dt.timedelta(hours=16)) data_obj = da.DataAccess('Yahoo') return data_obj.get_data(ldt_timestamps, comparison, ["close"])[0].values
def get_nyse_days_of_market_open_between(start_of_period, end_of_period): time_of_day = dt.timedelta(hours=16) return du.getNYSEdays(start_of_period, end_of_period, time_of_day)
def log500(sLog): ''' @summary: Loads cached features. @param sLog: Filename of features. @return: Nothing, logs features to desired location ''' lsSym = [ 'A', 'AA', 'AAPL', 'ABC', 'ABT', 'ACE', 'ACN', 'ADBE', 'ADI', 'ADM', 'ADP', 'ADSK', 'AEE', 'AEP', 'AES', 'AET', 'AFL', 'AGN', 'AIG', 'AIV', 'AIZ', 'AKAM', 'AKS', 'ALL', 'ALTR', 'AMAT', 'AMD', 'AMGN', 'AMP', 'AMT', 'AMZN', 'AN', 'ANF', 'ANR', 'AON', 'APA', 'APC', 'APD', 'APH', 'APOL', 'ARG', 'ATI', 'AVB', 'AVP', 'AVY', 'AXP', 'AZO', 'BA', 'BAC', 'BAX', 'BBBY', 'BBT', 'BBY', 'BCR', 'BDX', 'BEN', 'BF.B', 'BHI', 'BIG', 'BIIB', 'BK', 'BLK', 'BLL', 'BMC', 'BMS', 'BMY', 'BRCM', 'BRK.B', 'BSX', 'BTU', 'BXP', 'C', 'CA', 'CAG', 'CAH', 'CAM', 'CAT', 'CB', 'CBG', 'CBS', 'CCE', 'CCL', 'CEG', 'CELG', 'CERN', 'CF', 'CFN', 'CHK', 'CHRW', 'CI', 'CINF', 'CL', 'CLF', 'CLX', 'CMA', 'CMCSA', 'CME', 'CMG', 'CMI', 'CMS', 'CNP', 'CNX', 'COF', 'COG', 'COH', 'COL', 'COP', 'COST', 'COV', 'CPB', 'CPWR', 'CRM', 'CSC', 'CSCO', 'CSX', 'CTAS', 'CTL', 'CTSH', 'CTXS', 'CVC', 'CVH', 'CVS', 'CVX', 'D', 'DD', 'DE', 'DELL', 'DF', 'DFS', 'DGX', 'DHI', 'DHR', 'DIS', 'DISCA', 'DNB', 'DNR', 'DO', 'DOV', 'DOW', 'DPS', 'DRI', 'DTE', 'DTV', 'DUK', 'DV', 'DVA', 'DVN', 'EBAY', 'ECL', 'ED', 'EFX', 'EIX', 'EL', 'EMC', 'EMN', 'EMR', 'EOG', 'EP', 'EQR', 'EQT', 'ERTS', 'ESRX', 'ETFC', 'ETN', 'ETR', 'EW', 'EXC', 'EXPD', 'EXPE', 'F', 'FAST', 'FCX', 'FDO', 'FDX', 'FE', 'FFIV', 'FHN', 'FII', 'FIS', 'FISV', 'FITB', 'FLIR', 'FLR', 'FLS', 'FMC', 'FO', 'FRX', 'FSLR', 'FTI', 'FTR', 'GAS', 'GCI', 'GD', 'GE', 'GILD', 'GIS', 'GLW', 'GME', 'GNW', 'GOOG', 'GPC', 'GPS', 'GR', 'GS', 'GT', 'GWW', 'HAL', 'HAR', 'HAS', 'HBAN', 'HCBK', 'HCN', 'HCP', 'HD', 'HES', 'HIG', 'HNZ', 'HOG', 'HON', 'HOT', 'HP', 'HPQ', 'HRB', 'HRL', 'HRS', 'HSP', 'HST', 'HSY', 'HUM', 'IBM', 'ICE', 'IFF', 'IGT', 'INTC', 'INTU', 'IP', 'IPG', 'IR', 'IRM', 'ISRG', 'ITT', 'ITW', 'IVZ', 'JBL', 'JCI', 'JCP', 'JDSU', 'JEC', 'JNJ', 'JNPR', 'JNS', 'JOYG', 'JPM', 'JWN', 'K', 'KEY', 'KFT', 'KIM', 'KLAC', 'KMB', 'KMX', 'KO', 'KR', 'KSS', 'L', 'LEG', 'LEN', 'LH', 'LIFE', 'LLL', 'LLTC', 'LLY', 'LM', 'LMT', 'LNC', 'LO', 'LOW', 'LSI', 'LTD', 'LUK', 'LUV', 'LXK', 'M', 'MA', 'MAR', 'MAS', 'MAT', 'MCD', 'MCHP', 'MCK', 'MCO', 'MDT', 'MET', 'MHP', 'MHS', 'MJN', 'MKC', 'MMC', 'MMI', 'MMM', 'MO', 'MOLX', 'MON', 'MOS', 'MPC', 'MRK', 'MRO', 'MS', 'MSFT', 'MSI', 'MTB', 'MU', 'MUR', 'MWV', 'MWW', 'MYL', 'NBL', 'NBR', 'NDAQ', 'NE', 'NEE', 'NEM', 'NFLX', 'NFX', 'NI', 'NKE', 'NOC', 'NOV', 'NRG', 'NSC', 'NTAP', 'NTRS', 'NU', 'NUE', 'NVDA', 'NVLS', 'NWL', 'NWSA', 'NYX', 'OI', 'OKE', 'OMC', 'ORCL', 'ORLY', 'OXY', 'PAYX', 'PBCT', 'PBI', 'PCAR', 'PCG', 'PCL', 'PCLN', 'PCP', 'PCS', 'PDCO', 'PEG', 'PEP', 'PFE', 'PFG', 'PG', 'PGN', 'PGR', 'PH', 'PHM', 'PKI', 'PLD', 'PLL', 'PM', 'PNC', 'PNW', 'POM', 'PPG', 'PPL', 'PRU', 'PSA', 'PWR', 'PX', 'PXD', 'QCOM', 'QEP', 'R', 'RAI', 'RDC', 'RF', 'RHI', 'RHT', 'RL', 'ROK', 'ROP', 'ROST', 'RRC', 'RRD', 'RSG', 'RTN', 'S', 'SAI', 'SBUX', 'SCG', 'SCHW', 'SE', 'SEE', 'SHLD', 'SHW', 'SIAL', 'SJM', 'SLB', 'SLE', 'SLM', 'SNA', 'SNDK', 'SNI', 'SO', 'SPG', 'SPLS', 'SRCL', 'SRE', 'STI', 'STJ', 'STT', 'STZ', 'SUN', 'SVU', 'SWK', 'SWN', 'SWY', 'SYK', 'SYMC', 'SYY', 'T', 'TAP', 'TDC', 'TE', 'TEG', 'TEL', 'TER', 'TGT', 'THC', 'TIE', 'TIF', 'TJX', 'TLAB', 'TMK', 'TMO', 'TROW', 'TRV', 'TSN', 'TSO', 'TSS', 'TWC', 'TWX', 'TXN', 'TXT', 'TYC', 'UNH', 'UNM', 'UNP', 'UPS', 'URBN', 'USB', 'UTX', 'V', 'VAR', 'VFC', 'VIA.B', 'VLO', 'VMC', 'VNO', 'VRSN', 'VTR', 'VZ', 'WAG', 'WAT', 'WDC', 'WEC', 'WFC', 'WFM', 'WFR', 'WHR', 'WIN', 'WLP', 'WM', 'WMB', 'WMT', 'WPI', 'WPO', 'WU', 'WY', 'WYN', 'WYNN', 'X', 'XEL', 'XL', 'XLNX', 'XOM', 'XRAY', 'XRX', 'YHOO', 'YUM', 'ZION', 'ZMH' ] lsSym.append('$SPX') lsSym.sort() ''' Max lookback is 6 months ''' dtEnd = dt.datetime.now() dtEnd = dtEnd.replace(hour=16, minute=0, second=0, microsecond=0) dtStart = dtEnd - relativedelta(months=6) ''' Pull in current data ''' norObj = da.DataAccess('Norgate') ''' Get 2 extra months for moving averages and future returns ''' ldtTimestamps = du.getNYSEdays( dtStart - relativedelta(months=2), \ dtEnd + relativedelta(months=2), dt.timedelta(hours=16) ) dfPrice = norObj.get_data(ldtTimestamps, lsSym, 'close') dfVolume = norObj.get_data(ldtTimestamps, lsSym, 'volume') ''' Imported functions from qstkfeat.features, NOTE: last function is classification ''' lfcFeatures, ldArgs, lsNames = getFeatureFuncs() ''' Generate a list of DataFrames, one for each feature, with the same index/column structure as price data ''' applyFeatures(dfPrice, dfVolume, lfcFeatures, ldArgs, sLog=sLog)
data = np.loadtxt(inp, delimiter=',', dtype={'names': ('year', 'month','day','ticker','action','number'), 'formats': ('I2','I1','I1','S6','S4','I2')}) sym = data['ticker'] vol = data['number'] act = data['action'] day = [] hour = 16 for i in range(len(data)): day.append(dt.datetime(data['year'][i],data['month'][i],data['day'][i],hour)) ls_symbols = list(set(sym)) #ls_symbols.append('_CASH') dt_timeofday = dt.timedelta(hours=hour) ldt_timestamps = du.getNYSEdays(day[0], day[-1], dt_timeofday) c_dataobj = da.DataAccess('Yahoo') df_close = c_dataobj.get_data(ldt_timestamps, ls_symbols, "close") exist={} for sn in range(len(data)): exist[day[sn]] = exist.get(day[sn], 0) + 0 dt_action=day[sn] if sn>1: na_old = df_alloc.xs(day[sn-1]).values print sn, na_old if exist[dt_action]<1: exist[dt_action] = exist.get(dt_action, 0) + 1 na_vals=na_old for stk in range(len(ls_symbols)):
def main(): '''Main Function''' # S&P 100 ls_symbols = ['TEF.MC', 'IBCX.MU', 'IEGA.L', 'IDYV.L', 'IWDA.L'] # Creating an object of the dataaccess class with Yahoo as the source. c_dataobj = da.DataAccess('Yahoo') ls_all_syms = c_dataobj.get_all_symbols() # Bad symbols are symbols present in portfolio but not in all syms ls_bad_syms = list(set(ls_symbols) - set(ls_all_syms)) for s_sym in ls_bad_syms: i_index = ls_symbols.index(s_sym) ls_symbols.pop(i_index) # Start and End date of the charts dt_end = dt.datetime(2013, 1, 1) dt_start = dt_end - dt.timedelta(days=365) dt_test = dt_end + dt.timedelta(days=365) # We need closing prices so the timestamp should be hours=16. dt_timeofday = dt.timedelta(hours=16) # Get a list of trading days between the start and the end. ldt_timestamps = du.getNYSEdays(dt_start, dt_end, dt_timeofday) ldt_timestamps_test = du.getNYSEdays(dt_end, dt_test, dt_timeofday) # Reading just the close prices df_close = c_dataobj.get_data(ldt_timestamps, ls_symbols, "close") df_close_test = c_dataobj.get_data(ldt_timestamps_test, ls_symbols, "close") # Filling the data for missing NAN values df_close = df_close.fillna(method='ffill') df_close = df_close.fillna(method='bfill') df_close_test = df_close_test.fillna(method='ffill') df_close_test = df_close_test.fillna(method='bfill') # Copying the data values to a numpy array to get returns na_data = df_close.values.copy() na_data_test = df_close_test.values.copy() # Getting the daily returns tsu.returnize0(na_data) tsu.returnize0(na_data_test) # Calculating the frontier. (lf_returns, lf_std, lna_portfolios, na_avgrets, na_std) = getFrontier(na_data) (lf_returns_test, lf_std_test, unused, unused, unused) = getFrontier(na_data_test) # Plotting the efficient frontier plt.clf() plt.plot(lf_std, lf_returns, 'b') plt.plot(lf_std_test, lf_returns_test, 'r') # Plot where the efficient frontier would be the following year lf_ret_port_test = [] lf_std_port_test = [] for na_portfolio in lna_portfolios: na_port_rets = np.dot(na_data_test, na_portfolio) lf_std_port_test.append(np.std(na_port_rets)) lf_ret_port_test.append(np.average(na_port_rets)) plt.plot(lf_std_port_test, lf_ret_port_test, 'k') # Plot indivisual stock risk/return as green + for i, f_ret in enumerate(na_avgrets): plt.plot(na_std[i], f_ret, 'g+') # # Plot some arrows showing transistion of efficient frontier # for i in range(0, 101, 10): # plt.arrow(lf_std[i], lf_returns[i], lf_std_port_test[i] - lf_std[i], # lf_ret_port_test[i] - lf_returns[i], color='k') # Labels and Axis plt.legend([ '2009 Frontier', '2010 Frontier', 'Performance of \'09 Frontier in 2010' ], loc='lower right') plt.title('Efficient Frontier For S&P 100 ') plt.ylabel('Expected Return') plt.xlabel('StDev') plt.savefig('tutorial8.pdf', format='pdf')
def analyzeTrades(tradeFile, benchmark): ''' #orders.csv 2011,1,10,AAPL,Buy,1500, 2011,1,13,AAPL,Sell,1500, 2011,1,13,IBM,Buy,4000, 2011,1,26,GOOG,Buy,1000, 2011,2,2,XOM,Sell,4000, 2011,2,10,XOM,Buy,4000, 2011,3,3,GOOG,Sell,1000, 2011,3,3,IBM,Sell,2200, 2011,5,3,IBM,Buy,1500, 2011,6,3,IBM,Sell,3300, 2011,6,10,AAPL,Buy,1200, 2011,8,1,GOOG,Buy,55, 2011,8,1,GOOG,Sell,55, 2011,12,20,AAPL,Sell,1200, #The final value of the portfolio using the sample file is -- 2011,12,20,1133860 #Details of the Performance of the portfolio : #Data Range : 2011-01-10 16:00:00 to 2011-12-20 16:00:00 #Sharpe Ratio of Fund : 1.21540462111 #Sharpe Ratio of $SPX : 0.0183391412227 #Total Return of Fund : 1.13386 #Total Return of $SPX : 0.97759401457 #Standard Deviation of Fund : 0.00717514512699 #Standard Deviation of $SPX : 0.0149090969828 #Average Daily Return of Fund : 0.000549352749569 #Average Daily Return of $SPX : 1.72238432443e-05 #The other sample file is orders2.csv that you can use to test your code, and compare with others. 2011,1,14,AAPL,Buy,1500, 2011,1,19,AAPL,Sell,1500, 2011,1,19,IBM,Buy,4000, 2011,1,31,GOOG,Buy,1000, 2011,2,4,XOM,Sell,4000, 2011,2,11,XOM,Buy,4000, 2011,3,2,GOOG,Sell,1000, 2011,3,2,IBM,Sell,2200, 2011,5,23,IBM,Buy,1500, 2011,6,2,IBM,Sell,3300, 2011,6,10,AAPL,Buy,1200, 2011,8,9,GOOG,Buy,55, 2011,8,11,GOOG,Sell,55, 2011,12,14,AAPL,Sell,1200, #The final value of the portfolio using the sample file is -- 2011,12,14, 1078753 #Data Range : 2011-01-14 16:00:00 to 2011-12-14 16:00:00 #Sharpe Ratio of Fund : 0.788988545538 #Sharpe Ratio of $SPX :-0.177204632551 #Total Return of Fund : 1.078753 #Total Return of $SPX : 0.937041848381 #Standard Deviation of Fund : 0.00708034656073 #Standard Deviation of $SPX : 0.0149914504972 #Average Daily Return of Fund : 0.000351904599618 #Average Daily Return of $SPX :-0.000167347202139 ''' np_transactions = readCSV(tradeFile) ls_symbols = sorted(set(np_transactions[:, 3])) ls_dates = list() for a in np_transactions: ls_dates.append(dt.datetime(int(a[0]), int(a[1]), int(a[2]))) dt_min = min(ls_dates) dt_max = max(ls_dates) + dt.timedelta(1) #notrans = len(np_transactions) #df_transactions = pd.DataFrame(np.random.randn(notrans,4),ls_dates,columns=['sym', 'trans', 'lot']) #df_transactions.describe() #print df_transactions ldf_data = getYahooData(dt_min, dt_max, ls_symbols) ldf_benchmark = getYahooData(dt_min, dt_max, benchmark) ldt_timestamps = du.getNYSEdays(dt_min, dt_max, dt.timedelta(hours=16)) ldf_close = ldf_data['close'] ldf_benchmark_close = copy.deepcopy(ldf_benchmark['close']) df_alloc = copy.deepcopy(ldf_data['close']) df_cash = np.sum(ldf_close, axis=1) df_cash.fill(1000000) for keys in df_alloc: df_alloc[keys] = 0 # We need closing prices so the timestamp should be hour_s=16. dt_cob = dt.timedelta(hours=16) for a in np_transactions: trans_dt = dt.datetime(int(a[0]), int(a[1]), int(a[2]), 16) a[6] = trans_dt for a in np_transactions: trans_dt = dt.datetime(int(a[0]), int(a[1]), int(a[2]), 16) lot = int(a[5]) sym = a[3] transType = str(a[4]).lower() price = ldf_close.ix[trans_dt, sym] costOfTransaction = lot * price cashBefore = df_cash.ix[trans_dt] if (transType.find('buy') > -1): #df_alloc[a[3]][trans_dt] = a[5] df_alloc.ix[trans_dt:, sym] += lot df_cash.ix[trans_dt] -= costOfTransaction else: #df_alloc[a[3]][trans_dt] = -lot df_alloc.ix[trans_dt:, sym] -= lot df_cash.ix[trans_dt:] += costOfTransaction cashAfter = df_cash.ix[trans_dt] df_cash.ix[trans_dt:] = cashAfter print trans_dt, transType, lot, sym, price, costOfTransaction, cashBefore, cashAfter df_portfoliovalue = df_alloc * ldf_close df_netliquidation = np.sum(df_portfoliovalue, axis=1) #print df_cash.ix[dt.datetime(2011,12,19,16) :] #print df_netliquidation.ix[dt_max - dt.timedelta(2) :] df_netliquidation += df_cash #print df_netliquidation.ix[max(ldt_timestamps) :] #print dt.datetime(2011,11,9,16) #print df_netliquidation.ix[dt.datetime(2011,11,9,16)] #print dt.datetime(2011,3,28,16) #print df_netliquidation.ix[dt.datetime(2011,3,28,16)] cum_ret, vol, daily_ret, sharpe, na_normalized_price = getPortfolioStats( df_netliquidation, [1]) cum_ret1, vol1, daily_ret1, sharpe1, na_normalized_price1 = getPortfolioStats( ldf_benchmark_close, [1]) print "Final Value of the portfolio = ", df_netliquidation.ix[max( ldt_timestamps)] print "Sharpe Ratio of Fund = ", sharpe print "Sharpe Ratio of benchmark = ", sharpe1 print "Total Return of Fund = ", cum_ret print "Total Return of benchmark = ", cum_ret1 print "Standard Deviation of Fund = ", vol print "Standard Deviation of benchmark = ", vol1 print "Average Daily Return of Fund = ", daily_ret print "Average Daily Return of benchmark = ", daily_ret1 # Plotting the prices with x-axis=timestamps plt.clf() plt.plot(ldt_timestamps, na_normalized_price, label='portfolio') plt.plot(ldt_timestamps, na_normalized_price1, label='benchmark') plt.legend() plt.ylabel('Returns') plt.xlabel('Date') fileName = tradeFile + '_marketSim.pdf' plt.savefig(fileName, format='pdf')
def main(): initial_cash = 1000000 orders_file = "../data/orders.csv" values_file = "../data/values.csv" netCash = initial_cash netValue = [] resultFile = open(values_file, 'wb') writer = csv.writer(resultFile, dialect='excel') print "***************Market Simulator*********************************" dates, symbols, orders, volume, tradeCount, symbolList, dateList = _csv_read_trades( orders_file) df = pd.read_csv( orders_file, parse_dates=True, names=['year', 'month', 'day', 'symbol', 'order', 'size', 'empty'], header=0) del df['empty'] df = df.sort(columns=['year', 'month', 'day'], ascending=1) #print df startdate = dateList[0] enddate = dateList[-1] dt_timeofday = dt.timedelta(hours=16) print "Fetching Data..." ldt_timestamps = du.getNYSEdays(startdate, enddate, dt_timeofday) c_dataobj = da.DataAccess('Yahoo') #, cachestalltime=0) ls_keys = ['close'] ldf_data = c_dataobj.get_data(ldt_timestamps, symbolList, ls_keys) d_data = dict(zip(ls_keys, ldf_data)) for s_key in ls_keys: d_data[s_key] = d_data[s_key].fillna(method='ffill') d_data[s_key] = d_data[s_key].fillna(method='bfill') d_data[s_key] = d_data[s_key].fillna(1.0) na_price = d_data['close'].values #print na_price.shape ownedStocks = np.zeros(na_price.shape[1]) date_index = 0 print "Processing Orders..." for ldt_ts in ldt_timestamps: order_count = 0 for order_date in dates: if ldt_ts == order_date: #print "New Comp:" #print symbols[order_count] #print order_date,ldt_ts symbol_index = 0 for order_symbols in symbolList: if order_symbols == symbols[order_count]: cash = na_price[date_index][symbol_index] * volume[ order_count] if orders[order_count] == "Buy": ownedStocks[symbol_index] = ownedStocks[ symbol_index] + volume[order_count] # print ownedStocks cash = -cash else: ownedStocks[symbol_index] = ownedStocks[ symbol_index] - volume[order_count] # print ownedStocks #ownedValue netCash = netCash + cash symbol_index = symbol_index + 1 order_count = order_count + 1 # sym_idx = 0 owned_value = 0 for volume_own in ownedStocks: #print volume_own #print ownedStocks #print na_price[date_index][sym_idx] owned_value = owned_value + volume_own * na_price[date_index][ sym_idx] #print owned_value sym_idx = sym_idx + 1 #print netCash #print owned_value+netCash append1 = owned_value + netCash netValue.append(append1) results = [ str(ldt_ts.year), str(ldt_ts.month), str(ldt_ts.day), str(int(append1)) ] writer.writerow(results) date_index = date_index + 1 #print netValue print "Generating Output file..." print "Done." print "****************************************************************"
def main(t_val): dt_start = dt.datetime(2008, 1, 1) dt_end = dt.datetime(2009, 12, 31) ldt_timestamps = du.getNYSEdays(dt_start, dt_end, dt.timedelta(hours=16)) drops_below = float(t_val) dataobj = da.DataAccess('Yahoo') ls_symbols = dataobj.get_symbols_from_list('sp5002012') ls_symbols.append('SPY') ls_keys = ['open', 'high', 'low', 'close', 'volume', 'actual_close'] ldf_data = dataobj.get_data(ldt_timestamps, ls_symbols, ls_keys) d_data = dict(zip(ls_keys, ldf_data)) for s_key in ls_keys: d_data[s_key] = d_data[s_key].fillna(method='ffill') d_data[s_key] = d_data[s_key].fillna(method='bfill') d_data[s_key] = d_data[s_key].fillna(1.0) df_close = d_data['actual_close'] ts_market = df_close['SPY'] # Creating an empty dataframe df_events = copy.deepcopy(df_close) df_events = df_events * np.NAN # Time stamps for the event range ldt_timestamps = df_close.index # Create empty dataframe df_columns = ['year', 'month', 'day', 'equity', 'order', 'shares'] df = pd.DataFrame(columns=df_columns) for s_sym in ls_symbols: for i in range(1, len(ldt_timestamps)): # Calculating the returns for this timestamp f_symprice_today = df_close[s_sym].ix[ldt_timestamps[i]] f_symprice_yest = df_close[s_sym].ix[ldt_timestamps[i - 1]] if f_symprice_yest >= drops_below and f_symprice_today < drops_below: row = pd.DataFrame([{ 'year': ldt_timestamps[i].year, 'month': ldt_timestamps[i].month, 'day': ldt_timestamps[i].day, 'equity': s_sym, 'order': 'Buy', 'shares': 100 }]) df = df.append(row) sell_day = i + 5 if sell_day >= len(ldt_timestamps): sell_day = len(ldt_timestamps) - 1 row = pd.DataFrame([{ 'year': ldt_timestamps[sell_day].year, 'month': ldt_timestamps[sell_day].month, 'day': ldt_timestamps[sell_day].day, 'equity': s_sym, 'order': 'Sell', 'shares': 100 }]) df = df.append(row) df.to_csv('orders_%s.csv' % (t_val), header=None, index=None, columns=df_columns)
def getNyseDaysOfMarketOpenBetween(startOfPeriod, endOfPeriod): endPlusOne = endOfPeriod + dt.timedelta(days=1) timeOfDay = dt.timedelta(hours=16) return du.getNYSEdays(startOfPeriod, endPlusOne, timeOfDay)
import QSTK.qstkutil.DataAccess as da import datetime as dt import matplotlib.pyplot as plt import pandas as pd import numpy as np import QSTK.qstkstudy.EventProfiler as ep import copy yahoodatabase = da.DataAccess('Yahoo') syms = yahoodatabase.get_symbols_from_list('sp5002012') syms.append('SPY') keys = ['actual_close', 'close'] start_date = dt.datetime(2008, 01, 01) end_date = dt.datetime(2009, 12, 31) delta_time = dt.timedelta(hours=16) opentimes = du.getNYSEdays(start_date, end_date, delta_time) prices = yahoodatabase.get_data(opentimes, syms, keys) prices_dic = dict(zip(keys, prices)) for key in keys: prices_dic[key] = prices_dic[key].fillna(method='ffill') prices_dic[key] = prices_dic[key].fillna(method='bfill') prices_dic[key] = prices_dic[key].fillna(1.0) prices_actclose_all = prices_dic['actual_close'] prices_actclose_SPY = prices_actclose_all['SPY'] events = copy.deepcopy(prices_actclose_all) events = events * np.NAN for sym in syms:
dt_start = min(df_orders.index) dt_end = max(df_orders.index) ls_dt_all_from_orders = df_orders.index.tolist() sym_all_from_orders = df_orders['sym'].tolist() #remove duplicate syms, dates ls_sym_unique = list(set(sym_all_from_orders)) ls_dt_unique = list(set(ls_dt_all_from_orders)) ls_dt_unique.sort() ## STEP 2 -- put this into a function! #read in the data from Yahoo dataobj = da.DataAccess('Yahoo') dt_start_read = dt_start dt_end_read = dt_end + dt.timedelta( days=1) #end date needs to be offset by one ldt_timestamps = du.getNYSEdays(dt_start_read, dt_end_read, dt.timedelta(hours=16)) ls_keys = ['open', 'high', 'low', 'close', 'volume', 'actual_close'] ldf_data = dataobj.get_data(ldt_timestamps, ls_sym_unique, ls_keys) d_data = dict(zip( ls_keys, ldf_data)) #this is the data for the symbols we're interested in #remove the NaNs from the price data for s_key in ls_keys: d_data[s_key] = d_data[s_key].fillna(method='ffill') d_data[s_key] = d_data[s_key].fillna(method='bfill') d_data[s_key] = d_data[s_key].fillna(1.0) ## STEP 3 #dataframe for SHARES of each symbol that you are CURRENTLY HOLDING- make sure they are floating point numbers! df_trade_matrix = np.zeros((len(ldt_timestamps), len(ls_sym_unique))) df_trade_matrix = pd.DataFrame(df_trade_matrix, index=ldt_timestamps,
def tutorial01(): # Define the company list to get the stock prices. ls_symbols = ["AAPL", "GLD", "GOOG", "$SPX", "XOM"] # Define the date range of stock data. dt_start = dt.datetime(2006, 1, 1) dt_end = dt.datetime(2010, 12, 31) # Define the time of date as 4:00 PM, when it is the close of the day. dt_timeofday = dt.timedelta(hours=16) # Create a timestamp list object for QSTK. ldt_timestamps = du.getNYSEdays(dt_start, dt_end, dt_timeofday) # Define the data repository as Yahoo Finance. c_dataobj = da.DataAccess("Yahoo") # Define data keys. ls_keys = ["open", "high", "low", "close", "volume", "actual_close"] # Retrieve data. ldf_data = c_dataobj.get_data(ldt_timestamps, ls_symbols, ls_keys) # Create a dictionary (like std::map in C++). d_data = dict(zip(ls_keys, ldf_data)) # Choose a set of data to plot. na_price = d_data["close"].values # Clear the figure canvas. plt.clf() # Plot the data. plt.plot(ldt_timestamps, na_price) # Arrange the apprearance of the figure. plt.legend(ls_symbols) plt.ylabel("Adjusted Close") plt.xlabel("Date") # Save the figure in PDF format. plt.savefig("adjustedclose.pdf", format="pdf") # Normalize the price data so that data.begin = 1.0. na_normalized_price = na_price / na_price[0, :] # Draw another figure and save it. plt.clf() plt.plot(ldt_timestamps, na_normalized_price) plt.legend(ls_symbols) plt.ylabel("Normalized Close") plt.xlabel("Date") plt.savefig("normalizedclose.pdf", format="pdf") # Calculate daily returns. na_rets = na_normalized_price.copy() tsu.returnize0(na_rets) # Draw another figure and save it. plt.clf() plt.plot(ldt_timestamps, na_rets) plt.legend(ls_symbols) plt.ylabel("Daily Returns") plt.xlabel("Date") plt.savefig("dailyreturns.pdf", format="pdf") # Check the correlation between '$SPX' and 'XOM' using scatter plots. plt.clf() plt.scatter(na_rets[:, 3], na_rets[:, 1], c='blue') plt.xlabel("$SPX") plt.ylabel("XOM") plt.savefig("correlationscatter.pdf", format="pdf") # Calculate cumulative returns. daily_cum_ret = np.empty(na_rets.shape) daily_cum_ret[0, :] = 1.0 for t in range(1, na_rets.shape[0]): daily_cum_ret[t] = daily_cum_ret[t - 1] * (1.0 + na_rets[t, :]) plt.clf() plt.plot(ldt_timestamps, daily_cum_ret) plt.legend(ls_symbols) plt.ylabel("Cumulative Returns") plt.xlabel("Date") plt.savefig("cumulativereturns.pdf", format="pdf")
def main(): '''Main Function''' # S&P 100 ls_symbols = [ 'AAPL', 'ABT', 'ACN', 'AEP', 'ALL', 'AMGN', 'AMZN', 'APC', 'AXP', 'BA', 'BAC', 'BAX', 'BHI', 'BK', 'BMY', 'BRK.B', 'CAT', 'C', 'CL', 'CMCSA', 'COF', 'COP', 'COST', 'CPB', 'CSCO', 'CVS', 'CVX', 'DD', 'DELL', 'DIS', 'DOW', 'DVN', 'EBAY', 'EMC', 'EXC', 'F', 'FCX', 'FDX', 'GD', 'GE', 'GILD', 'GOOG', 'GS', 'HAL', 'HD', 'HNZ', 'HON', 'HPQ', 'IBM', 'INTC', 'JNJ', 'JPM', 'KFT', 'KO', 'LLY', 'LMT', 'LOW', 'MA', 'MCD', 'MDT', 'MET', 'MMM', 'MO', 'MON', 'MRK', 'MS', 'MSFT', 'NKE', 'NOV', 'NSC', 'NWSA', 'NYX', 'ORCL', 'OXY', 'PEP', 'PFE', 'PG', 'PM', 'QCOM', 'RF', 'RTN', 'SBUX', 'SLB', 'HSH', 'SO', 'SPG', 'T', 'TGT', 'TWX', 'TXN', 'UNH', 'UPS', 'USB', 'UTX', 'VZ', 'WAG', 'WFC', 'WMB', 'WMT', 'XOM' ] # Creating an object of the dataaccess class with Yahoo as the source. c_dataobj = da.DataAccess('Yahoo') ls_all_syms = c_dataobj.get_all_symbols() # Bad symbols are symbols present in portfolio but not in all syms ls_bad_syms = list(set(ls_symbols) - set(ls_all_syms)) for s_sym in ls_bad_syms: i_index = ls_symbols.index(s_sym) ls_symbols.pop(i_index) # Start and End date of the charts dt_end = dt.datetime(2010, 1, 1) dt_start = dt_end - dt.timedelta(days=365) dt_test = dt_end + dt.timedelta(days=365) # We need closing prices so the timestamp should be hours=16. dt_timeofday = dt.timedelta(hours=16) # Get a list of trading days between the start and the end. ldt_timestamps = du.getNYSEdays(dt_start, dt_end, dt_timeofday) ldt_timestamps_test = du.getNYSEdays(dt_end, dt_test, dt_timeofday) # Reading just the close prices df_close = c_dataobj.get_data(ldt_timestamps, ls_symbols, "close") df_close_test = c_dataobj.get_data(ldt_timestamps_test, ls_symbols, "close") # Filling the data for missing NAN values df_close = df_close.fillna(method='ffill') df_close = df_close.fillna(method='bfill') df_close_test = df_close_test.fillna(method='ffill') df_close_test = df_close_test.fillna(method='bfill') # Copying the data values to a numpy array to get returns na_data = df_close.values.copy() na_data_test = df_close_test.values.copy() # Getting the daily returns tsu.returnize0(na_data) tsu.returnize0(na_data_test) # Calculating the frontier. (lf_returns, lf_std, lna_portfolios, na_avgrets, na_std) = getFrontier(na_data) (lf_returns_test, lf_std_test, unused, unused, unused) = getFrontier(na_data_test) # Plotting the efficient frontier plt.clf() plt.plot(lf_std, lf_returns, 'b') plt.plot(lf_std_test, lf_returns_test, 'r') # Plot where the efficient frontier would be the following year lf_ret_port_test = [] lf_std_port_test = [] for na_portfolio in lna_portfolios: na_port_rets = np.dot(na_data_test, na_portfolio) lf_std_port_test.append(np.std(na_port_rets)) lf_ret_port_test.append(np.average(na_port_rets)) plt.plot(lf_std_port_test, lf_ret_port_test, 'k') # Plot indivisual stock risk/return as green + for i, f_ret in enumerate(na_avgrets): plt.plot(na_std[i], f_ret, 'g+') # # Plot some arrows showing transistion of efficient frontier # for i in range(0, 101, 10): # plt.arrow(lf_std[i], lf_returns[i], lf_std_port_test[i] - lf_std[i], # lf_ret_port_test[i] - lf_returns[i], color='k') # Labels and Axis plt.legend([ '2009 Frontier', '2010 Frontier', 'Performance of \'09 Frontier in 2010' ], loc='lower right') plt.title('Efficient Frontier For S&P 100 ') plt.ylabel('Expected Return') plt.xlabel('StDev') plt.savefig('tutorial8.pdf', format='pdf')
symbol_list = list(set(symbol_list)) date_trade_list = sorted(date_trade_list) date_list = sorted(list(set(date_list))) dt_end_read = date_trade_list[-1][0] + dt.timedelta(days=1) dataobj = da.DataAccess('Yahoo') symbols = symbol_list startdate = date_trade_list[0][0] enddate = dt_end_read dt_timeofday = dt.timedelta(hours=16) dt_start = startdate dt_end = enddate ls_symbols = symbols ldt_timestamps = du.getNYSEdays(startdate, enddate, dt_timeofday) ls_keys = ['open', 'high', 'low', 'close', 'volume', 'actual_close'] ldf_data = dataobj.get_data(ldt_timestamps, ls_symbols, ls_keys) d_data = dict(zip(ls_keys, ldf_data)) for s_key in ls_keys: d_data[s_key] = d_data[s_key].fillna(method='ffill') d_data[s_key] = d_data[s_key].fillna(method='bfill') d_data[s_key] = d_data[s_key].fillna(1.0) df_close = d_data['close'] df = pd.DataFrame(np.zeros((len(ldt_timestamps), len(symbols))), index=ldt_timestamps, columns=symbols) df_price = pd.DataFrame(np.zeros((len(ldt_timestamps), len(symbols))), index=ldt_timestamps, columns=symbols)
def simulate(start_date, end_date, symbols, allocations): #START DATE #END DATE #SYMBOLS FOR EQUITIES #ALLOCATIONS # We need closing prices so the timestamp should be hours=16. dt_timeofday = dt.timedelta(hours=16) # Get a list of trading days between the start and the end. ldt_timestamps = du.getNYSEdays(start_date, end_date, dt_timeofday) # Creating an object of the dataaccess class with Yahoo as the source. c_dataobj = da.DataAccess('Yahoo', cachestalltime=0) # Keys to be read from the data, it is good to read everything in one go. keys = ['open', 'high', 'low', 'close', 'volume', 'actual_close'] # Reading the data, now d_data is a dictionary with the keys above. # Timestamps and symbols are the ones that were specified before. ldf_data = c_dataobj.get_data(ldt_timestamps, symbols, keys) d_data = dict(zip(keys, ldf_data)) # Getting the numpy ndarray of close prices. na_price = d_data['close'].values # Normalizing the prices to start at 1 and see relative returns na_normalized_price = na_price / na_price[0, :] # Copy the normalized prices to a new ndarry to find returns. na_rets = na_normalized_price.copy() # Calculate the daily returns of the prices. (Inplace calculation) # returnize0 works on ndarray and not dataframes. tsu.returnize0(na_rets) ### CHANGE THIS!! #ADD 1 TO na_rets na_rets = na_rets + 1 #calculate standard deviation of returns std_rets = range(na_rets.shape[1]) for c in range(na_rets.shape[1]): column = na_rets[:, c] col_mean = column.mean() col_std = column.std() std_rets[c] = col_std #calculate average daily return of TOTAL portfolio total_daily_ret = na_rets[:, 1].copy( ) #placeholder for the daily return of TOTAL portfolio numdays = len(range(na_rets.shape[0])) for c in range(na_rets.shape[0]): #find the total return for that day if c == 0: total_ret_today_percent = 0 else: na_rets_today = na_rets[c, :] na_rets_yesterday = na_rets[c - 1, :] total_ret_today_dollars = sum(allocations * (na_rets_today - na_rets_yesterday)) total_value_yesterday = sum(allocations * (na_rets_yesterday)) total_ret_today_percent = total_ret_today_dollars / total_value_yesterday total_daily_ret[c] = total_ret_today_percent average_daily_ret = np.mean(total_daily_ret) std_daily_ret = np.std(total_daily_ret) cum_return = sum(total_daily_ret) sharpe_ratio = numdays * average_daily_ret / std_daily_ret #sharpe ratio return std_daily_ret, average_daily_ret, sharpe_ratio, cum_return
# Sort the traded dates by date order traded_dates.sort(key=lambda trade: trade[0]) traded_dates = np.array(traded_dates) # Add a day to the datetime period_end = period_end + dt.timedelta(days=1) # print traded_dates print "Trading symbols:" + str(symbols) print "From:" + str(period_start) print "To:" + str(period_end) # 3 - Read in data # - Read in adjusted close ldt_timestamps = du.getNYSEdays(period_start, period_end, dt.timedelta(hours=16)) dataobj = da.DataAccess('Yahoo') ls_keys = ['open', 'high', 'low', 'close', 'volume', 'actual_close'] ldf_data = dataobj.get_data(ldt_timestamps, symbols, ls_keys) d_data = dict(zip(ls_keys, ldf_data)) # Read in adjusted closing prices for the equities. adjusted_close_price = d_data['close'].values timestamps = np.array(ldt_timestamps) timestamps = timestamps.reshape((timestamps.size, 1)) adjusted_close_price = np.concatenate((adjusted_close_price, timestamps), axis=1) # 4 - Scan trades to update cash # - BUY is a cash reduction
def analysis(mkt, dt_date): plot_chart = False '''Main Function''' #ls_symbols = ["G"] # We need closing prices so the timestamp should be hours=16. dt_timeofday = dt.timedelta(hours=16) # Start and End date of the charts #dt_start = dt.datetime.strptime(sys.argv[1], "%Y/%m/%d") + dt.timedelta(hours=16) dt_start = dt.datetime(2016, 1, 1) + dt_timeofday dt_end = dt_date #dt.datetime.strptime(datestr, "%Y/%m/%d") + dt_timeofday # Get a list of trading days between the start and the end. ldt_timestamps = qdu.getNYSEdays(dt_start, dt_end, dt_timeofday, mkt) dt_start = qdu.getNYSEoffset(dt_end, -199, mkt) ldt_timestamps = qdu.getNYSEdays(dt_start, dt_end, dt_timeofday, mkt) # Creating an object of the dataaccess class with Yahoo as the source. c_dataobj = da.DataAccess(mkt + 'Yahoo') # Keys to be read from the data, it is good to read everything in one go. ls_keys = ['open', 'high', 'low', 'close', 'volume', 'actual_close'] # Reading the data, now d_data is a dictionary with the keys above. # Timestamps and symbols are the ones that were specified before. ldf_data = c_dataobj.get_data(ldt_timestamps, ls_symbols, ls_keys) d_data = dict(zip(ls_keys, ldf_data)) # Copying close price into separate dataframe to find rets df_close = d_data['close'] df_actual_close = d_data['actual_close'] #SMA df_sma50 = pd.rolling_mean(d_data['actual_close'], 50) df_sma200 = pd.rolling_mean(d_data['actual_close'], 200) #Bollinger Bands df_mean = pd.rolling_mean(d_data['actual_close'], 20) df_std = pd.rolling_std(d_data['actual_close'], 20) upper_bband = df_mean + (2 * df_std) lower_bband = df_mean - (2 * df_std) df_bollinger = (df_actual_close - df_mean) / (2 * df_std) #MACD emaslow = pd.ewma(d_data['actual_close'], span=26) emafast = pd.ewma(d_data['actual_close'], span=12) macd = emafast - emaslow ema9 = pd.ewma(macd, span=9) str_date = dt.datetime.strftime(dt_end, "%Y%m%d") f = open(analysisPath + 'analysis-' + str_date + '.csv', 'w') f.write( 'symbol,close,sma50,sma200,sma50/sma200,upper_bband,lower_bband,bollinger,macd,ema9,macd-ema9\n' ) #print df_close.tail() #print df_mean.tail() #print df_std.tail() #print df_bollinger.tail() # Plotting the prices with x-axis=timestamps for ls_symbol in ls_symbols: #print ls_symbol #print ls_symbol + ",{},{},{},{}".format(df_close[ls_symbol][dt_end],df_sma50[ls_symbol][dt_end],df_sma200[ls_symbol][dt_end], df_bollinger[ls_symbol][dt_end]) f.write(ls_symbol + ",{},{},{},{},{},{},{},{},{},{}".format( df_close[ls_symbol][dt_end], df_sma50[ls_symbol][dt_end], df_sma200[ls_symbol][dt_end], df_sma50[ls_symbol][dt_end] / df_sma200[ls_symbol][dt_end], upper_bband[ls_symbol][dt_end], lower_bband[ls_symbol][dt_end], df_bollinger[ls_symbol][dt_end], macd[ls_symbol][dt_end], ema9[ls_symbol][dt_end], macd[ls_symbol][dt_end] - ema9[ls_symbol][dt_end]) + '\n') #print df_bollinger[ls_symbol].tail(1) if plot_chart: if df_bollinger[ls_symbol].tail(1) <= -0.75: plt.clf() plt.subplot(211) plt.plot(ldt_timestamps, df_close[ls_symbol], label=ls_symbol) plt.legend() plt.ylabel('Price') plt.xlabel('Date') plt.xticks(size='xx-small') plt.xlim(ldt_timestamps[0], ldt_timestamps[-1]) plt.subplot(212) plt.plot(ldt_timestamps, df_bollinger[ls_symbol], label=ls_symbol + '-Bollinger') plt.axhline(1.0, color='r') plt.axhline(-1.0, color='r') plt.legend() plt.ylabel('Bollinger') plt.xlabel('Date') plt.xticks(size='xx-small') plt.xlim(ldt_timestamps[0], ldt_timestamps[-1]) plt.savefig(ls_symbol + '.pdf', format='pdf') f.close()