def test_dot_real(data_dict): """Dot operator testing with real datasets""" data_dir = os.path.join(os.getcwd(), 'data') path = os.path.join(data_dir, data_dict['data_name']) if not os.path.exists(path): get_data( data_dir, data_dict['data_name'], data_dict['url'], data_dict['data_origin_name'] ) assert os.path.exists(path) k = data_dict['feature_dim'] m = data_dict['m'] batch_size_list = data_dict['batch_size'] default_output_index = data_dict['default_index']['output_dim'] default_batch_size_index = data_dict['default_index']['batch_size'] density = estimate_density(path, data_dict['feature_dim']) num_batches = data_dict['num_batches'] assert default_batch_size_index < len(batch_size_list) assert default_output_index < len(m) if ARGS.verbose: print("Running Benchmarking on %r data") % data_dict['data_mini'] print('{:>15} {:>10} {:>10} {:>10} {:>20} {:>15} {:>15} {:>10} {:>10}'.format('density(%)', 'n', 'm', 'k', 't_dense/t_sparse', 't_dense(ms)', 't_sparse(ms)', 'is_transpose', 'rhs_rsp')) for output_dim in m: _compare_sparse_dense(data_dir, data_dict['data_name'], data_dict['data_mini'], k, output_dim, density, batch_size_list[default_batch_size_index], num_batches) _compare_sparse_dense(data_dir, data_dict['data_name'], data_dict['data_mini'], k, output_dim, density, batch_size_list[default_batch_size_index], num_batches, transpose=True) _compare_sparse_dense(data_dir, data_dict['data_name'], data_dict['data_mini'], k, output_dim, density, batch_size_list[default_batch_size_index], num_batches, rsp=True) for batch_size in batch_size_list: _compare_sparse_dense(data_dir, data_dict['data_name'], data_dict['data_mini'], k, m[default_output_index], density, batch_size, num_batches) _compare_sparse_dense(data_dir, data_dict['data_name'], data_dict['data_mini'], k, m[default_output_index], density, batch_size, num_batches, transpose=True) _compare_sparse_dense(data_dir, data_dict['data_name'], data_dict['data_mini'], k, output_dim, density, batch_size_list[default_batch_size_index], num_batches, rsp=True)
def compute_portvals(start_date, end_date, orders, startval): # get the trading days using SPY as reference dates = pd.date_range(start_date, end_date) df = get_data(['SPY'], dates) # Make the sell orders a negative value orders['Shares'][orders['Order'].str.upper()=='SELL'] = -orders['Shares'][orders['Order'].str.upper()=='SELL'] # Create a data frame to hold a matrix of all the stocks symbols = np.unique(orders['Symbol'].values.ravel()) for stock in symbols: df[stock]=0 # Get the prices for each day in the index # Front fill the prices where we have an NA, then backfill prices = get_data(symbols, df.index, False) prices = prices.fillna(method='ffill', axis=0) prices = prices.fillna(method='bfill', axis=0) # Add the starting value and a cash value df['Cash'] = startval + 0.0 prices['Cash'] = 1 orders['Prices'] = 0 for ind, row in orders.iterrows(): # calculate leverage # leverage = (sum(longs) + sum(abs(shorts)) / ((sum(longs) - sum(abs(shorts)) + cash) # get temporary table after the transaction is made, and before the transaction is made df_chk, df_chk_b4 = df.ix[ind,1:], df.ix[ind,1:] df_chk [row['Symbol']] = df[row['Symbol']][ind] + row['Shares'] df_chk ['Cash'] = df['Cash'][ind] - prices[row['Symbol']][ind] * row['Shares'] df_chk = prices.ix[ind] * df_chk df_chk_b4 = prices.ix[ind] * df_chk_b4 # calculate the leverage after and before lev_after = sum(abs(df_chk[:-1])) / sum(df_chk ) lev_before = sum(abs(df_chk_b4[:-1])) / sum(df_chk_b4 ) # print lev_after, lev_before, ind #if lev_after < 1000.0 or lev_after < lev_before : df[row['Symbol']][ind:end_date] = df[row['Symbol']][ind:end_date] + row['Shares'] df['Cash'][ind:end_date] = df['Cash'][ind:end_date] - prices[row['Symbol']][ind] * row['Shares'] #else: # print "Cancel the order", ind, row['Symbol'], row['Shares'], "Lev before", lev_before , "Lev after", lev_after df = df.iloc[:,1:] * prices portvals = df.sum(axis=1) # print portvals return portvals #def test_run(): """Driver function.""" # Define input parameters start_date = '2011-01-05' end_date = '2011-01-20' orders_file = os.path.join("orders", "orders-short.csv") start_val = 1000000
def test_run(): symbols = ['IBM'] train_dates = pd.date_range('2008-1-1', '2010-12-31') test_dates = pd.date_range('2011-1-1', '2011-12-31') training = get_data(symbols, train_dates) testing = get_data(symbols, test_dates) trainingIBM = training[symbols] testingIBM = testing[symbols] testing_result = train_model(trainingIBM, testingIBM) ax = testing_result[['IBM', 'pred']].plot(title='Predicted market') ax.set_xlabel('Date') ax.set_ylabel('Price') fig = ax.get_figure() fig.savefig("output/predicted_market.png") generate_orders(testing_result) orders_file = os.path.join("orders", "orders.csv") start_val = 10000 # Process orders portvals = compute_portvals('2011-1-1', '2011-12-31', orders_file, start_val) if isinstance(portvals, pd.DataFrame): portvals = portvals[portvals.columns[0]] # if a DataFrame is returned select the first column to get a Series # print portvals # Get portfolio stats cum_ret, avg_daily_ret, std_daily_ret, sharpe_ratio = get_portfolio_stats(portvals) # Simulate a $SPX-only reference portfolio to get stats prices_SPX = get_data(['$SPX'], test_dates) prices_SPX = prices_SPX[['$SPX']] # remove SPY portvals_SPX = get_portfolio_value(prices_SPX, [1.0]) cum_ret_SPX, avg_daily_ret_SPX, std_daily_ret_SPX, sharpe_ratio_SPX = get_portfolio_stats(portvals_SPX) print "Data Range: {} to {}".format('2011-1-1', '2011-12-31') print print "Sharpe Ratio of Fund: {}".format(sharpe_ratio) print "Sharpe Ratio of $SPX: {}".format(sharpe_ratio_SPX) print print "Cumulative Return of Fund: {}".format(cum_ret) print "Cumulative Return of $SPX: {}".format(cum_ret_SPX) print print "Standard Deviation of Fund: {}".format(std_daily_ret) print "Standard Deviation of $SPX: {}".format(std_daily_ret_SPX) print print "Average Daily Return of Fund: {}".format(avg_daily_ret) print "Average Daily Return of $SPX: {}".format(avg_daily_ret_SPX) print print "Final Portfolio Value: {}".format(portvals[-1]) # Plot computed daily portfolio value df_temp = pd.concat([portvals, prices_SPX['$SPX']], keys=['Portfolio', '$SPX'], axis=1) plot_normalized_data(df_temp, title="Daily portfolio value and $SPX")
def assess_portfolio(sd = dt.datetime(2008,1,1), ed = dt.datetime(2009,1,1), \ syms = ['GOOG','AAPL','GLD','XOM'], \ allocs=[0.1,0.2,0.3,0.4], \ sv=1000000, rfr=0.0, sf=252.0, \ gen_plot=False): # Read in adjusted closing prices for given symbols, date range dates = pd.date_range(sd, ed) prices_all = get_data(syms, dates) # automatically adds SPY prices = prices_all[syms] # only portfolio symbols prices_SPY = prices_all['SPY'] # only SPY, for comparison later prices_SPY = (prices_SPY/prices_SPY.iloc[0])*sv # Get daily portfolio value port_val = get_portfolio_value(prices, allocs, sv) # Get portfolio statistics (note: std_daily_ret = volatility) cr, adr, sddr, sr = get_portfolio_stats(port_val, rfr, sf) # Compare daily portfolio value with SPY using a normalized plot if gen_plot: # Plot normalized portfolio value. df_temp = pd.concat([port_val/sv, prices_SPY/sv], keys=['Portfolio', 'SPY'], axis=1) plot_data(df_temp, title="Daily portfolio value and SPY", ylabel="Normalized price") # Compute end value ev = port_val[-1] return cr, adr, sddr, sr, ev
def run_strategy(): symbol = 'IBM' start_date = '2007-12-31' end_date = '2009-12-31' prices_IBM = get_data([symbol], pd.date_range(start_date, end_date)) bollinger_df = calc_bollinger_bands(prices_IBM[symbol], 20) order_df = build_orders(bollinger_df) order_df.index.name = 'Date' order_df.to_csv("bollinger_order.csv") #Build Plot plt.style.use('ggplot') ax = bollinger_df.plot() #Add lines showing buy/sells for index, row in order_df.iterrows(): if row['Order'] == 'BUY': ax.axvline(x=index, color='g') elif row['Order'] == 'SELL': ax.axvline(x=index, color='r') plt.show()
def compute_insample_data_ML4T(start_date,end_date): dates=pd.date_range(start_date,end_date) prices=get_data(['ML4T-399'],dates,True) prices=prices.drop('SPY',axis=1) #calculating volatility daily_returns = prices.copy() daily_returns[1:] = (prices[1:]/prices[:-1].values)-1.0 daily_returns.ix[0,:]=0 vol=pd.rolling_std(daily_returns,window=10) vol1=vol[9:-5] #calculating momentum momentum=prices.copy() momentum[9:] = (prices[9:]/prices[:-9].values)-1.0 momentum1=momentum[9:-5] #calculating bollinger values sma = pd.rolling_mean(prices,window=10) sma1 = sma.dropna() std=pd.rolling_std(prices,window=10) std1=std[9:] bb_prices=prices[9:] bb_value=(bb_prices - sma1)/(2*std1) bb_value1=bb_value[0:-5] #shifting prices shifted_prices=prices.shift(-5) future_prices = prices.copy() prices1=prices[9:-5] future_return=(shifted_prices/prices) - 1.0 future_return1=future_return[9:-5] vol_array=vol1.values momentum_array=momentum1.values bb_value_array=bb_value1.values data=np.concatenate((vol_array,momentum_array,bb_value_array,future_return1), axis=1) predY = knn_learner(data) predY_df_return=pd.DataFrame(predY,index=future_return1.index,columns=['predY']) predY_df_price=prices1*(predY_df_return.values+1) future_prices=prices1*(future_return1.values+1) predY_df_price=predY_df_price.rename(columns={'ML4T-399':'Predicted Y'}) ax=predY_df_price.plot(title="Sine Data Training Y/Price/Predicted Y[2008-2009]") future_prices = future_prices.rename(columns={'ML4T-399':'Training Y'}) prices1 = prices1.rename(columns={'ML4T-399':'Price'}) future_prices.plot(ax=ax) prices1.plot(ax=ax) plt.ylim((0,100)) start_date='2008-01-15' end_date='2009-12-23' print print "Sine Data In Sample Statistics" print my_strategy_ML4T(prices1,predY_df_price,start_date,end_date,"ML4T_insample_orders","Sine Data In Sample Entries/Exits","Sine Data In Sample Backtest") start_date='2010-01-01' end_date='2010-12-31' compute_outsample_data_ML4T(data,start_date,end_date)
def get_sequence_list_and_phyche_value_pseknc(input_data, extra_phyche_index=None): """For PseDNC, PseKNC, make sequence_list and phyche_value. :param input_data: file type or handle. :param extra_phyche_index: dict, the key is the dinucleotide (string), the value is its physicochemical property value (list). It means the user-defined physicochemical indices. """ if extra_phyche_index is None: extra_phyche_index = {} original_phyche_value = { 'AA': [0.06, 0.5, 0.09, 1.59, 0.11, -0.11], 'AC': [1.5, 0.5, 1.19, 0.13, 1.29, 1.04], 'GT': [1.5, 0.5, 1.19, 0.13, 1.29, 1.04], 'AG': [0.78, 0.36, -0.28, 0.68, -0.24, -0.62], 'CC': [0.06, 1.08, -0.28, 0.56, -0.82, 0.24], 'CA': [-1.38, -1.36, -1.01, -0.86, -0.62, -1.25], 'CG': [-1.66, -1.22, -1.38, -0.82, -0.29, -1.39], 'TT': [0.06, 0.5, 0.09, 1.59, 0.11, -0.11], 'GG': [0.06, 1.08, -0.28, 0.56, -0.82, 0.24], 'GC': [-0.08, 0.22, 2.3, -0.35, 0.65, 1.59], 'AT': [1.07, 0.22, 0.83, -1.02, 2.51, 1.17], 'GA': [-0.08, 0.5, 0.09, 0.13, -0.39, 0.71], 'TG': [-1.38, -1.36, -1.01, -0.86, -0.62, -1.25], 'TA': [-1.23, -2.37, -1.38, -2.24, -1.51, -1.39], 'TC': [-0.08, 0.5, 0.09, 0.13, -0.39, 0.71], 'CT': [0.78, 0.36, -0.28, 0.68, -0.24, -0.62]} sequence_list = get_data(input_data) phyche_value = extend_phyche_index(original_phyche_value, extra_phyche_index) return sequence_list, phyche_value
def optimize_portfolio(sd=dt.datetime(2008,1,1), ed=dt.datetime(2009,1,1), \ syms=['GOOG','AAPL','GLD','XOM'], gen_plot=False): # Read in adjusted closing prices for given symbols, date range dates = pd.date_range(sd, ed) prices_all = get_data(syms, dates) # automatically adds SPY prices = prices_all[syms] # only portfolio symbols prices_SPY = prices_all['SPY'] # only SPY, for comparison later # find the allocations for the optimal portfolio # note that the values here ARE NOT meant to be correct for a test case allocs = optimize_allocs(prices, min_sharpe_fun) cr, adr, sddr, sr = compute_portfolio_stats(get_port_val(prices, allocs), allocs) # Get daily portfolio value port_val = get_port_val(prices, allocs) # Compare daily portfolio value with SPY using a normalized plot if gen_plot: # add code to plot here df_temp = pd.concat([port_val, prices_SPY], keys=['Portfolio', 'SPY'], axis=1) df_temp = df_temp / df_temp.iloc[0] plot_stock_data(df_temp.ix[sd : ed, ['Portfolio', 'SPY']]) pass return allocs, cr, adr, sddr, sr
def test_run(): # Read data dates = pd.date_range('2012-01-01', '2012-12-31') symbols = ['SPY'] df = get_data(symbols, dates) # Compute Bollinger Bands # 1. Compute rolling mean rm_SPY = get_rolling_mean(df['SPY'], window=20) # 2. Compute rolling standard deviation rstd_SPY = get_rolling_std(df['SPY'], window=20) # 3. Compute upper and lower bands upper_band, lower_band = get_bollinger_bands(rm_SPY, rstd_SPY) # Plot raw SPY values, rolling mean and Bollinger Bands ax = df['SPY'].plot(title="Bollinger Bands", label='SPY') rm_SPY.plot(label='Rolling mean', ax=ax) upper_band.plot(label='upper band', ax=ax) lower_band.plot(label='lower band', ax=ax) # Add axis labels and legend ax.set_xlabel("Date") ax.set_ylabel("Price") ax.legend(loc='upper left') plt.show()
def ipseknc(input_data, k, w, lamada, phyche_list, alphabet, extra_index_file=None, all_prop=False): """This is a complete process in iPseKNC, k is kmer, but the index is just for dinucleotide. :param k: int, the value of k-tuple. :param phyche_list: list, the input physicochemical properties list. :param extra_index_file: a file path includes the user-defined phyche_index. :param all_prop: bool, choose all physicochemical properties or not. """ phyche_list = get_phyche_list(k=2, phyche_list=phyche_list, extra_index_file=extra_index_file, alphabet=alphabet, all_prop=all_prop) # Get phyche_vals. if extra_index_file is not None: extra_phyche_index = get_extra_index(extra_index_file) from util import normalize_index phyche_vals = get_phyche_value(k=2, phyche_list=phyche_list, alphabet=alphabet, extra_phyche_index=normalize_index(extra_phyche_index, alphabet, is_convert_dict=True)) else: phyche_vals = get_phyche_value(k=2, phyche_list=phyche_list, alphabet=alphabet) seq_list = get_data(input_data, alphabet) return make_pseknc_vector(seq_list, phyche_vals, k, w, lamada, alphabet, theta_type=3)
def main(): dates = pd.date_range('2009-01-01', '2012-12-31') symbols = ['SPY'] df = get_data(symbols, dates) plot_data(df) daily_returns = compute_daily_returns(df) # histogram daily_returns.hist(bins=20) ''' call this twice if wanting to plot 2+ charts on same chart: daily_returns['SPY'].hist(bins=20, label="SPY") daily_returns['XOM'].hist(bins=20, label="XOM") ''' mean = daily_returns['SPY'].mean() std = daily_returns['SPY'].std() plt.axvline(mean, color='w', linestyle='dashed', linewidth=2) plt.axvline(std, color='r', linestyle='dashed', linewidth=2) plt.axvline(-std, color='r', linestyle='dashed', linewidth=2) plt.show() print daily_returns.kurtosis()
def generate_orders(start_date, end_date, symbols): dates = pd.date_range(start_date, end_date) prices_all = get_data(symbols, dates) prices = prices_all[symbols] prices_bands = rollinger_bands(prices) long_entry, long_exit, short_entry, short_exit = calculate_entries(start_date, prices_bands) # save to orders.csv contatenated_entries = long_entry + long_exit + short_entry + short_exit contatenated_entries = sorted(contatenated_entries, key=lambda x:x[0]) df_entries = pd.DataFrame(contatenated_entries, columns=['Date', 'Symbol', 'Order', 'Shares'], index=None) df_entries = df_entries.set_index('Date') df_entries.to_csv("orders/orders.csv") # plot orders ax = prices_bands.plot(title="Bollinger Bands") ax.set_xlabel("Date") ax.set_ylabel("Price") for entry in long_entry: ax.axvline(entry[0], c='green') for entry in long_exit: ax.axvline(entry[0], c='black') for entry in short_entry: ax.axvline(entry[0], c='red') for entry in short_exit: ax.axvline(entry[0], c='black') # plt.show() fig = ax.get_figure() fig.savefig("output/entries.png")
def optimize_portfolio(sd=dt.datetime(2008,1,1), ed=dt.datetime(2009,1,1), \ syms=['GOOG','AAPL','GLD','XOM'], gen_plot=False): # Read in adjusted closing prices for given symbols, date range dates = pd.date_range(sd, ed) prices_all = get_data(syms, dates, sd, ed) # automatically adds SPY prices = prices_all[syms] # only portfolio symbols prices_SPY = prices_all['SPY'] # only SPY, for comparison later # find the allocations for the optimal portfolio sv = 1000000 normalized_prices = prices / prices.ix[0, :] x0 = np.array([0.2, 0.2, 0.3, 0.3, 0.0]) optimal_allocs = spo.minimize(f, x0, args=(normalized_prices, sv), method='SLSQP', options={'disp': True}, bounds=tuple((0, 1) for i in range(0, x0.size)), constraints = ({'type': 'eq', 'fun': \ lambda inputs: 1.0 - np.sum(inputs)}) ) allocs = optimal_allocs.x port_val, cr, adr, sddr, sr = calc_portfolio_stats(allocs, normalized_prices, sv) # Compare daily portfolio value with SPY using a normalized plot if gen_plot: df_temp = pd.concat([port_val, prices_SPY], keys=['Portfolio', 'SPY'], axis=1) df_temp = df_temp / df_temp.ix[0, :] df_temp.plot() plt.show() pass return allocs, cr, adr, sddr, sr
def test_run(): # Read data dates = pd.date_range('2009-01-01', '2012-12-31') symbols = ['SPY'] df = get_data(symbols, dates) plot_data(df) # Compute daily returns daily_returns = compute_daily_returns(df) plot_data(daily_returns, title="Daily returns", ylabel="Daily returns") # Plot a histogram daily_returns.hist(bins=20) # changing # of bins to 20 # Get mean and standard deviation mean = daily_returns['SPY'].mean() print "mean =", mean std = daily_returns['SPY'].std() print "std =", std plt.axvline(mean,color='w',linestyle='dashed',linewidth=2) plt.axvline(std,color='r',linestyle='dashed',linewidth=2) plt.axvline(-std,color='r',linestyle='dashed',linewidth=2) plt.show() # Compute kurtosis print daily_returns.kurtosis()
def get_vals(start_date, end_date, symbol): # Read in adjusted closing prices for given symbols, date range # to allow getting SMA from day 1 on start_date we initially read in an earlier date to calc SMA dates = pd.date_range(start_date, end_date) prices_all = util.get_data(symbol, dates) # automatically adds SPY prices = prices_all[symbol] # only portfolio symbols return prices
def define_y(symbol, startdate_string ='12/31/07', enddate_string ='12/31/09', window=5): """ :param symbol: STRING :param startdate_string: STRING 'MM/DD/YY' :param enddate_string: STRING 'MM/DD/YY' :param window: size of rolling averages for 5 day forecast :return: data, data_np. Features in both Pandas and Numpy formats. 4 columns each of ['bb_value', 'momentum', 'daily_returns', 'volatility'] """ # Import Orders into DataFrame (CURRENTLY HAS ALL DATES including non-trading) start_date = pd.to_datetime(startdate_string) #StartDate per Instructions end_date = pd.to_datetime(enddate_string) #EndDate per Instructions dates = pd.date_range(start_date, end_date) symbols = [symbol, '$SPX'] # Read in adjusted closing prices for given symbols, date range prices_all = get_data(symbols, dates) # automatically adds SPY prices = prices_all[[symbol]] # only portfolio symbols #prices_np = prices.as_matrix() #index_df = prices.index # Compute SMA sma = pd.rolling_mean(prices, window) sma.columns = prices.columns #sma_np = sma.as_matrix() y = (prices.shift(-5)/prices)-1 y_np = y.as_matrix().transpose() #need to transpose y. As a 1d Output variable, need to have it be a series. Not sure why, but whatever return y, y_np, prices
def get_indicators(start_date, end_date, symbols): """Simulate and assess the performance of a stock portfolio.""" # Read in adjusted closing prices for given symbols, date range dates = pd.date_range(start_date, end_date) prices_all = get_data(symbols, dates) # automatically adds SPY prices = prices_all[symbols] # only portfolio symbols # prices_SPY = prices_all['SPY'] # only SPY, for comparison later sym = symbols[1] x1 = (prices[sym] - pd.rolling_mean(prices[sym], 20)) / (2 * pd.rolling_std(prices[sym], 20)) x1_dis = pd.cut(x1, 10, labels=False) x2 = prices[sym].pct_change(20) x2_dis = pd.cut(x2, 10, labels=False) x3 = pd.rolling_std(prices[sym].pct_change(1), 20) x3_dis = pd.cut(x3, 10, labels=False) # return pd.concat([x1_,x2_0,x3_0], axis=1).dropna(), prices tempdf = pd.concat([x1_dis, x2_dis, x3_dis], axis=1).dropna() tempdf.columns = ["x1", "x2", "x3"] print tempdf.dtypes tempdf["holding"] = np.random.randint(0, 3, size=len(tempdf)) # 0 = no position , 1 = negative positin 2 =holding long tempdf["s"] = 1000 * tempdf["holding"] + 100 * tempdf["x3"] + 10 * tempdf["x2"] + 1 * tempdf["x1"] print tempdf.head(50) return tempdf, prices
def compute_portvals(start_date, end_date, orders_file, start_val): """Compute daily portfolio value given a sequence of orders in a CSV file. Parameters ---------- start_date: first date to track end_date: last date to track orders_file: CSV file to read orders from start_val: total starting cash available Returns ------- portvals: portfolio value for each trading day from start_date to end_date (inclusive) """ dates = pd.date_range(start_date, end_date) orders = construct_orders(orders_file, dates) # print orders symbols = list(set(orders.Symbol)) prices_all = get_data(symbols, dates) prices = prices_all[symbols] trades = calculate_trades(prices.index, orders, symbols) # print trades holdings = pd.DataFrame(index=prices.index) holdings['cash'] = start_val + (-1.0 * (prices * trades).sum(axis=1)).cumsum() holdings['stock'] = (prices * trades.cumsum()).sum(axis=1) portvals = holdings.cash + holdings.stock return portvals
def test_run(): # Read data dates = pd.date_range('2009-01-01', '2012-12-31') # one month only symbols = ['SPY','XOM','GLD'] df = get_data(symbols, dates) plot_data(df) # Compute daily returns daily_returns = compute_daily_returns(df) #plot_data(daily_returns, title="Daily returns", ylabel="Daily returns") # Scatterplot SPY vs XOM daily_returns.plot(kind='scatter',x='SPY',y='XOM') beta_XOM,alpha_XOM=np.polyfit(daily_returns['SPY'],daily_returns['XOM'],1) print "beta_XOM= ",beta_XOM print "alpha_XOM= ",alpha_XOM plt.plot(daily_returns['SPY'],beta_XOM*daily_returns['SPY']+alpha_XOM,'-',color='r') plt.grid() plt.show() # Scatterplot SPY vs GLD daily_returns.plot(kind='scatter',x='SPY',y='GLD') beta_GLD,alpha_GLD=np.polyfit(daily_returns['SPY'],daily_returns['GLD'],1) print "beta_GLD= ",beta_GLD print "alpha_GLD= ",alpha_GLD plt.plot(daily_returns['SPY'],beta_GLD*daily_returns['SPY']+alpha_GLD,'-',color='r') plt.grid() plt.show() # Calculate correlation coefficient print daily_returns.corr(method='pearson')
def optimize_portfolio(sd=dt.datetime(2008,1,1), ed=dt.datetime(2009,1,1), \ syms=['GOOG','AAPL','GLD','XOM'], gen_plot=False): # Read in adjusted closing prices for given symbols, date range dates = pd.date_range(sd, ed) prices_all = get_data(syms, dates) # automatically adds SPY prices = prices_all[syms] # only portfolio symbols prices_SPY = prices_all['SPY'] # only SPY, for comparison later # find the allocations for the optimal portfolio # note that the values here ARE NOT meant to be correct for a test case # add code here to find the allocations x0 = np.random.random(len(syms)) x0 /= x0.sum() # x0=np.asarray([0.2, 0.2, 0.3, 0.3, 0.0]) fun = lambda x: -sharp_ratio(prices.values,x) cons = ({ 'type': 'eq', 'fun': lambda inputs: 1 - np.sum(inputs) }) bnds = tuple((0,None) for i in range(len(syms))) res = minimize(fun, x0 , method='SLSQP', bounds=bnds, constraints=cons) allocs = res.x cr, adr, sddr, sr = [0.25, 0.001, 0.0005, 2.1] # add code here to compute stats priceSPY=prices_SPY.values priceSPY /= priceSPY[0] price_stocks = prices.values price_stocks /= price_stocks[0] price_stocks *= allocs port_val = pd.DataFrame(price_stocks.sum(axis=1),index=prices.index) prices_SPY = pd.DataFrame(priceSPY,index=prices.index) # Get daily portfolio value # port_val = prices_SPY # add code here to compute daily portfolio values cr = port_val.values[-1] -1 dr = port_val.values drShift = np.vstack([dr[0],dr[0:(len(dr)-1)]]) dr = dr/drShift -1 adr = dr.mean() sddr=dr.std() k = math.sqrt(252) sr = k*np.mean(dr)/np.std(dr) # Compare daily portfolio value with SPY using a normalized plot if gen_plot: # add code to plot here df_temp = pd.concat([port_val, prices_SPY], keys=['Portfolio', 'SPY'], axis=1) df_temp.columns=df_temp.columns.get_level_values(0) plot_data(df_temp, "Daily portfolio value and SPY", "Date", "Normalized prices") return allocs, cr, adr, sddr, sr
def optimize_portfolio(start_date, end_date, symbols): """Simulate and optimize portfolio allocations.""" # Read in adjusted closing prices for given symbols, date range dates = pd.date_range(start_date, end_date) prices_all = get_data(symbols, dates) # automatically adds SPY prices = prices_all[symbols] # only portfolio symbols prices_SPY = prices_all['SPY'] # only SPY, for comparison later # Get optimal allocations allocs = find_optimal_allocations(prices) allocs = allocs / np.sum(allocs) # normalize allocations, if they don't sum to 1.0 # Get daily portfolio value (already normalized since we use default start_val=1.0) port_val = get_portfolio_value(prices, allocs) # Get portfolio statistics (note: std_daily_ret = volatility) cum_ret, avg_daily_ret, std_daily_ret, sharpe_ratio = get_portfolio_stats(port_val) # Print statistics print "Start Date:", start_date print "End Date:", end_date print "Symbols:", symbols print "Optimal allocations:", allocs print "Sharpe Ratio:", sharpe_ratio print "Volatility (stdev of daily returns):", std_daily_ret print "Average Daily Return:", avg_daily_ret print "Cumulative Return:", cum_ret # Compare daily portfolio value with normalized SPY normed_SPY = prices_SPY / prices_SPY.ix[0, :] df_temp = pd.concat([port_val, normed_SPY], keys=['Portfolio', 'SPY'], axis=1) plot_data(df_temp, title="Daily Portfolio Value and SPY")
def compute_portvals(start_date, end_date, orders_file, start_val): """Compute daily portfolio value given a sequence of orders in a CSV file. Parameters ---------- start_date: first date to track end_date: last date to track orders_file: CSV file to read orders from start_val: total starting cash available Returns ------- portvals: portfolio value for each trading day from start_date to end_date (inclusive) """ # TODO: Your code here #create df_prices df_temp = pd.read_csv(orders_file, index_col='Date', parse_dates=True) symbols = [] for index,row in df_temp.iterrows(): symbols.append(row['Symbol']) symbols = list(set(symbols)) dates = pd.date_range(start_date, end_date) df_prices = get_data(symbols, dates) df_prices = df_prices.drop('SPY',1) df_prices['CASH'] = 1.0 #print df_prices #Create df_trade. #Check for leverage by create a curr_list that save the cumulative holding. #When a new order comes, create a temp_list with update holding and multiply it with current prices, #then check to see if leverage exceeds 2 or not. If it's not, then process the order, #change curr_list to temp_list and update df_trade #If it exceeds 2, then don't process the order and do nothing df_trade = df_prices.copy() df_trade[df_trade != 0] = 0 df_trade.ix[start_date,'CASH'] = start_val curr_list = df_trade.ix[start_date].copy() for index, row in df_temp.iterrows(): temp_list = curr_list.copy() temp_list.ix[row['Symbol']] += (1 if row['Order'] == 'BUY' else -1)*float(row['Shares']) temp_list.ix['CASH'] += (-1 if row['Order'] == 'BUY' else 1)*float(row['Shares'])*df_prices.ix[index,row['Symbol']] sum_abs_all = abs(temp_list).dot(df_prices.ix[index]) sum_cash = abs(temp_list['CASH']) sum_all = temp_list.dot(df_prices.ix[index]) leverage = (sum_abs_all-sum_cash)/sum_all #print df_prices.ix[index,row['Symbol']], sum_abs_all , sum_cash, sum_all, leverage if (leverage <= 2.0): curr_list = temp_list.copy() df_trade.ix[index,row['Symbol']] += (1 if row['Order'] == 'BUY' else -1)*float(row['Shares']) df_trade.ix[index,'CASH'] += (-1 if row['Order'] == 'BUY' else 1)*float(row['Shares'])*df_prices.ix[index,row['Symbol']] #print df_trade #calculate holding from df_trade and portvals portvals = pd.Series(index = df_prices.index) portvals.ix[0] = df_prices.ix[0].dot(df_trade.ix[0]) for i in range(1,df_trade.shape[0]): df_trade.ix[i] += df_trade.ix[i-1] portvals.ix[i] = df_prices.ix[i].dot(df_trade.ix[i]) print portvals return portvals
def pseknc(input_data, k, w, lamada, phyche_list, alphabet, extra_index_file=None, all_prop=False, theta_type=1): """This is a complete process in PseKNC. :param k: int, the value of k-tuple. :param phyche_list: list, the input physicochemical properties list. :param extra_index_file: a file path includes the user-defined phyche_index. :param all_prop: bool, choose all physicochemical properties or not. """ phyche_list = get_phyche_list(k, phyche_list, extra_index_file=extra_index_file, alphabet=alphabet, all_prop=all_prop) # Get phyche_vals. if alphabet == index_list.DNA or alphabet == index_list.RNA: if extra_index_file is not None: extra_phyche_index = get_extra_index(extra_index_file) from util import normalize_index phyche_vals = get_phyche_value(k, phyche_list, alphabet, normalize_index(extra_phyche_index, alphabet, is_convert_dict=True)) else: phyche_vals = get_phyche_value(k, phyche_list, alphabet) elif alphabet == index_list.PROTEIN: phyche_vals = get_aaindex(phyche_list) if extra_index_file is not None: phyche_vals.extend(extend_aaindex(extra_index_file)) seq_list = get_data(input_data, alphabet) return make_pseknc_vector(seq_list, phyche_vals, k, w, lamada, alphabet, theta_type)
def assess_portfolio(start_date, end_date, symbols, allocs, start_val=1): """Simulate and assess the performance of a stock portfolio.""" # Read in adjusted closing prices for given symbols, date range dates = pd.date_range(start_date, end_date) prices_all = get_data(symbols, dates) # automatically adds SPY prices = prices_all[symbols] # only portfolio symbols prices_SPY = prices_all['SPY'] # only SPY, for comparison later # Get daily portfolio value port_val = get_portfolio_value(prices, allocs, start_val) plot_data(port_val, title="Daily Portfolio Value") # Get portfolio statistics (note: std_daily_ret = volatility) cum_ret, avg_daily_ret, std_daily_ret, sharpe_ratio = get_portfolio_stats(port_val) # Print statistics print "Start Date:", start_date print "End Date:", end_date print "Symbols:", symbols print "Allocations:", allocs print "Sharpe Ratio:", sharpe_ratio print "Volatility (stdev of daily returns):", std_daily_ret print "Average Daily Return:", avg_daily_ret print "Cumulative Return:", cum_ret # Compare daily portfolio value with SPY using a normalized plot df_temp = pd.concat([port_val, prices_SPY], keys=['Portfolio', 'SPY'], axis=1) plot_normalized_data(df_temp, title="Daily portfolio value and SPY")
def assess_portfolio(sd = dt.datetime(2008,1,1), ed = dt.datetime(2009,1,1), \ syms = ['GOOG','AAPL','GLD','XOM'], \ allocs=[0.1,0.2,0.3,0.4], \ sv=1000000, rfr=0.0, sf=252.0, \ gen_plot=False): # Read in adjusted closing prices for given symbols, date range dates = pd.date_range(sd, ed) prices_all = get_data(syms, dates) # automatically adds SPY prices = prices_all[syms] # only portfolio symbols prices_SPY = prices_all['SPY'] # only SPY, for comparison later # Get daily portfolio value #port_val = prices_SPY # add code here to compute daily portfolio values priceSPY=prices_SPY.values priceSPY /= priceSPY[0] price_stocks = prices.values price_stocks /= price_stocks[0] price_stocks *= allocs port_val = pd.DataFrame(price_stocks.sum(axis=1),index=prices.index) prices_SPY = pd.DataFrame(priceSPY,index=prices.index) # Get portfolio statistics (note: std_daily_ret = volatility) cr, adr, sddr, sr = [0.25, 0.001, 0.0005, 2.1] # add code here to compute stats cr = port_val.values[-1] -1 dr = port_val.values drShift = np.vstack([dr[0],dr[0:(len(dr)-1)]]) dr = dr/drShift -1 adr = dr.mean() sddr=dr.std() k = math.sqrt(sf) sr = k*np.mean(dr-rfr)/np.std(dr-rfr) # Compare daily portfolio value with SPY using a normalized plot if gen_plot: # add code to plot here df_temp = pd.concat([port_val, prices_SPY], keys=['Portfolio', 'SPY'], axis=1) df_temp.columns=df_temp.columns.get_level_values(0) plot_data(df_temp, "Daily portfolio value and SPY", "Date", "Normalized prices") # Add code here to properly compute end value ev = sv*port_val.values[-1] return cr, adr, sddr, sr, ev
def compute_insample_data_IBM(start_date, end_date): dates = pd.date_range(start_date, end_date) prices = get_data(["IBM"], dates, True) prices = prices.drop("SPY", axis=1) # calculating volatility daily_returns = prices.copy() daily_returns[1:] = (prices[1:] / prices[:-1].values) - 1.0 daily_returns.ix[0, :] = 0 vol = pd.rolling_std(daily_returns, window=10) vol1 = vol[9:-5] # calculating momentum momentum = prices.copy() momentum[9:] = (prices[9:] / prices[:-9].values) - 1.0 momentum1 = momentum[9:-5] # calculating bollinger values sma = pd.rolling_mean(prices, window=10) sma1 = sma.dropna() std = pd.rolling_std(prices, window=10) std1 = std[9:] bb_prices = prices[9:] bb_value = (bb_prices - sma1) / (2 * std1) bb_value1 = bb_value[0:-5] # shifting prices shifted_prices = prices.shift(-5) future_prices = prices.copy() prices1 = prices[9:-5] future_return = (shifted_prices / prices) - 1.0 future_return1 = future_return[9:-5] vol_array = vol1.values momentum_array = momentum1.values bb_value_array = bb_value1.values data = np.concatenate((vol_array, momentum_array, bb_value_array, future_return1), axis=1) predY = knn_learner(data) predY_df_return = pd.DataFrame(predY, index=future_return1.index, columns=["predY"]) predY_df_price = prices1 * (predY_df_return.values + 1) future_prices = prices1 * (future_return1.values + 1) predY_df_price = predY_df_price.rename(columns={"IBM": "Predicted Y"}) ax = predY_df_price.plot(title="Training Y/Price/Predicted Y: 2008-2009") future_prices = future_prices.rename(columns={"IBM": "Training Y"}) prices1 = prices1.rename(columns={"IBM": "Price"}) future_prices.plot(ax=ax) prices1.plot(ax=ax) start_date = "2008-01-15" end_date = "2009-12-23" my_strategy_IBM(prices1, predY_df_price, start_date, end_date, "IBM_insample_orders", "Strategy 2008-2009") start_date = "2010-01-01" end_date = "2010-12-31" compute_outsample_data(data, start_date, end_date)
def test_run(): dates=pd.date_range('2010-01-01', '2010-12-31') symbols =['GOOG','IBM','GLD'] df1 = get_data(symbols, dates) # print df1.ix['2010-03-01':'2010-04-01', ['SPY','IBM']] # print df1 # plot_data(df1) plot_selected(df1, ['SPY', 'IBM', 'GOOG'], '2010-03-01', '2010-04-01')
def compute_outsample_data_IBM(traindata,start_date,end_date): dates=pd.date_range(start_date,end_date) prices=get_data(['IBM'],dates,True) prices=prices.drop('SPY',axis=1) #calculating volatility daily_returns = prices.copy() daily_returns[1:] = (prices[1:]/prices[:-1].values)-1.0 daily_returns.ix[0,:]=0 vol=pd.rolling_std(daily_returns,window=10) vol1=vol[9:-5] #calculating momentum momentum=prices.copy() momentum[9:] = (prices[9:]/prices[:-9].values)-1.0 momentum1=momentum[9:-5] #calculating bollinger bands value sma = pd.rolling_mean(prices,window=10) sma1 = sma.dropna() std=pd.rolling_std(prices,window=10) std1=std[9:] bb_prices=prices[9:] bb_value=(bb_prices - sma1)/(2*std1) bb_value1=bb_value[0:-5] #calculating 5 day shifted prices shifted_prices=prices.shift(-5) future_prices = prices.copy() prices1=prices[9:-5] future_return=(shifted_prices/prices) - 1.0 future_return1=future_return[9:-5] vol_array=vol1.values momentum_array=momentum1.values bb_value_array=bb_value1.values data=np.concatenate((vol_array,momentum_array,bb_value_array,future_return1), axis=1) #getting predicted Y from knn print print "KNN Learner Statistics" print predY = knn_learner_test(traindata,data) predY_df_return=pd.DataFrame(predY,index=future_return1.index,columns=['predY']) predY_df_price=prices1*(predY_df_return.values+1) future_prices=prices1*(future_return1.values+1) predY_df_price=predY_df_price.rename(columns={'IBM':'Predicted Y'}) future_prices = future_prices.rename(columns={'IBM':'Training Y'}) prices1 = prices1.rename(columns={'IBM':'Price'}) start_date='2010-01-15' end_date='2010-12-23' print print "IBM Out of Sample Satistics" print my_strategy_IBM(prices1,predY_df_price,start_date,end_date,"IBM_outsample_orders","IBM Data Out of Sample Entries/Exits","IBM Data Out of Sample Backtest")
def compute_portvals(start_date, end_date, orders_file, start_val): """Compute daily portfolio value given a sequence of orders in a CSV file. Parameters ---------- start_date: first date to track end_date: last date to track orders_file: CSV file to read orders from start_val: total starting cash available Returns ------- portvals: portfolio value for each trading day from start_date to end_date (inclusive) """ df_orders = pd.read_csv(orders_file, index_col='Date', parse_dates=True, usecols=['Date', 'Symbol', 'Order', 'Shares']) symbols = list(set(df_orders['Symbol'])) dates = pd.date_range(start_date, end_date) prices_all = get_data(symbols, dates) # automatically adds SPY prices = prices_all[symbols] cash = start_val holdings = {symbol:0 for symbol in symbols} portvals = pd.Series(index=prices.index) for date in prices.index: if date in df_orders.index: df_orders2 = df_orders.ix[date:date] if len(df_orders2.shape) == 1: symbol = df_orders2.ix['Symbol'] order = df_orders2.ix['Order'] shares = df_orders2.ix['Shares'] if order == 'SELL': shares = -shares if symbol in holdings.keys(): holdings[symbol] += shares else: holdings[symbol] = shares cash -= prices.ix[date, symbol]*shares else: for i in range(len(df_orders2)): symbol = df_orders2.ix[i,'Symbol'] order = df_orders2.ix[i,'Order'] shares = df_orders2.ix[i,'Shares'] if order == 'SELL': shares = -shares if symbol in holdings.keys(): holdings[symbol] += shares else: holdings[symbol] = shares cash -= prices.ix[date, symbol]*shares stocksval = 0 for k,v in holdings.iteritems(): stocksval += prices.ix[date, k]*v portvals.ix[date] = cash+stocksval return portvals
def test_run(): """Driver function.""" # Define input parameters # Test 1 # start_date = '2011-01-05' # end_date = '2011-01-20' # orders_file = os.path.join(".\orders", "orders-short.csv") # start_val = 1000000 # Test 2 # start_date = '2011-01-10' # end_date = '2011-12-20' # orders_file = os.path.join(".\orders", "orders.csv") # start_val = 1000000 # Test 3 start_date = '2011-01-14' end_date = '2011-12-14' orders_file = os.path.join(".\orders", "orders2.csv") start_val = 1000000 # Process orders portvals = compute_portvals(start_date, end_date, orders_file, start_val) if isinstance(portvals, pd.DataFrame): portvals = portvals[portvals.columns[0]] # if a DataFrame is returned select the first column to get a Series # Get portfolio stats cum_ret, avg_daily_ret, std_daily_ret, sharpe_ratio = get_portfolio_stats(portvals) # Simulate a $SPX-only reference portfolio to get stats prices_SPX = get_data(['$SPX'], pd.date_range(start_date, end_date)) prices_SPX = prices_SPX[['$SPX']] # remove SPY portvals_SPX = get_portfolio_value(prices_SPX, [1.0]) cum_ret_SPX, avg_daily_ret_SPX, std_daily_ret_SPX, sharpe_ratio_SPX = get_portfolio_stats(portvals_SPX) # Compare portfolio against $SPX print "Data Range: {} to {}".format(start_date, end_date) print print "Sharpe Ratio of Fund: {}".format(sharpe_ratio) print "Sharpe Ratio of $SPX: {}".format(sharpe_ratio_SPX) print print "Cumulative Return of Fund: {}".format(cum_ret) print "Cumulative Return of $SPX: {}".format(cum_ret_SPX) print print "Standard Deviation of Fund: {}".format(std_daily_ret) print "Standard Deviation of $SPX: {}".format(std_daily_ret_SPX) print print "Average Daily Return of Fund: {}".format(avg_daily_ret) print "Average Daily Return of $SPX: {}".format(avg_daily_ret_SPX) print print "Final Portfolio Value: {}".format(portvals[-1]) # Plot computed daily portfolio value df_temp = pd.concat([portvals, prices_SPX['$SPX']], keys=['Portfolio', '$SPX'], axis=1) plot_normalized_data(df_temp, title="Daily portfolio value and $SPX")
def run_simulations(symbol = "IBM", \ sd_train = dt.datetime(2007,12,31), \ ed_train = dt.datetime(2009,12,31),\ sd_test = dt.datetime(2009,12,31),\ ed_test = dt.datetime(2011,12,31), \ sv =10000, \ alpha=0.2, \ rar=0.98, \ radr=0.99, \ window=15, \ num_simulation = 10, \ plot_results = False, \ verbose = False): syms = [symbol] # read in training data train_dates = pd.date_range(sd_train, ed_train) train_prices_all = ut.get_data(syms, train_dates) train_prices = train_prices_all[syms] # read in testing data dates = pd.date_range(sd_test, ed_test) prices_all = ut.get_data(syms, dates) prices = prices_all[syms] # compute benchmark train_cumulative_return_buy_hold_strategy = ( (train_prices.ix[-1, :][0] - train_prices.ix[0, :][0]) * 100 + sv) / sv - 1 if verbose: print "cumulative return of buy-and-hold strategy on training:",\ train_cumulative_return_buy_hold_strategy cumulative_return_buy_hold_strategy = ( (prices.ix[-1, :][0] - prices.ix[0, :][0]) * 100 + sv) / sv - 1 if verbose: print "cumulative return of buy-and-hold strategy on testing:",\ cumulative_return_buy_hold_strategy cumulative_returns_train = np.zeros(num_simulation) cumulative_returns_test = np.zeros(num_simulation) for i in range(0, num_simulation): # instantiate the strategy learner learner = sl.StrategyLearner(alpha=alpha,\ rar = rar,\ radr = radr, \ verbose = False) # learning cumulative_return = learner.addEvidence(symbol = symbol,\ sd = sd_train, \ ed = ed_train, sv = 10000) if plot_results: plt.plot(cumulative_return) #save the final result cumulative_returns_train[i] = cumulative_return[-1] if verbose: print "cumulative_return of training:", cumulative_returns_train[i] # test the learner df_trades, cumulative_returns_test[i] = learner.testPolicy(symbol = symbol, sd = sd_test, \ ed = ed_test, sv = 10000) if plot_results: plt.title("Cumulative return on training set of ten simulations") plt.ylabel("Cumulative return") plt.xlabel("Trials") plt.show() if verbose: print "cumulative_returns_train", cumulative_returns_train print "cumulative_returns_test", cumulative_returns_test avg_cumulative_returns_train = np.mean(cumulative_returns_train) avg_cumulative_returns_test = np.mean(cumulative_returns_test) return (avg_cumulative_returns_train, avg_cumulative_returns_test)
max_votes = 0 max_votes_class = -1 for v, count in votes.items(): # we loop through the votes if count > max_votes: # if this vote is grater than our max_votes we make this vote our max_votes and we store the count of that vote max_votes = count max_votes_class = v y[i] = max_votes_class # we set yi to the corresponding class return y def score(self, X, Y): P = self.predict(X) return np.mean(P == Y) if __name__ == '__main__': X, y = get_data(2000) #X,y = get_xor() #X,y = get_donut() Ntrain = 1000 X_train, y_train = X[:Ntrain], y[:Ntrain] X_test, y_test = X[Ntrain:], y[Ntrain:] for k in (1, 2, 3, 4, 5): knn = KNN(k) print(f"\nThis is for K = {k} \n\n") t0 = datetime.now() knn.fit(X_train, y_train) print(f'Training time: {datetime.now()-t0}') knn.predict(X_test) t0 = datetime.now() print(f"Train accuracy: {knn.score(X_train,y_train)}") print(
def addEvidence(self, symbol = "IBM", \ sd=dt.datetime(2008,1,1), \ ed=dt.datetime(2009,1,1), \ sv = 10000): # add your code to do learning here # compute the technical indicators sym = [symbol] momentum3, sma_ratio3, bbp3 = ind.indicators(sd, ed, sym, 70, False) momentum14, sma_ratio14, bbp14 = ind.indicators(sd, ed, sym, 14, False) # create feature array and discretize the values of the features row = momentum3.values[:, 0].size - 70 features = np.zeros((row, 7)) features[:, 0] = momentum3.ix[70:, symbol].values features[:, 1] = sma_ratio3.ix[70:, symbol].values features[:, 2] = bbp3.ix[70:, symbol].values features[:, 3] = momentum14.ix[70:, symbol].values features[:, 4] = sma_ratio14.ix[70:, symbol].values features[:, 5] = bbp14.ix[70:, symbol].values fmin = features.min(axis=0) fmax = features.max(axis=0) for i in range(6): bins = np.linspace(fmin[i], fmax[i], 9) features[:, i] = np.digitize(features[:, i], bins) # print features[0:30, ] for i in range(row): features[i, 6] = int( str(int(features[i, 0])) + str(int(features[i, 1])) + str(int(features[i, 2])) + str(int(features[i, 3])) + str(int(features[i, 4])) + str(int(features[i, 5]))) # print features[0:30, ] # Read in the SPY & symbol data (adj_close) using util.py dates = pd.date_range(sd, ed) prices_all = get_data(sym, dates) # automatically adds SPY price = prices_all / prices_all.ix[0, :] price = price.ix[70:, symbol].values # print(price[0:9]) ## initiate the qlearner pre_action = 0 ## track the previous action cur_action = self.learner.querysetstate(int(features[0, 6])) ##print("state: ", features[0, 6], "action", cur_action) ## update the qlearner until converge i = 1 total_reward = 0 last_reward = 0 while i < row: cur_state = int(str(int(features[i, 6])) + str(int(pre_action))) ##print("days of ", i, "state: ", cur_state, "action: ", cur_action) if cur_action == 1: ## buy and long 1000 if pre_action == 0: cur_reward = (price[i] - price[i - 1] ) * 1000 - price[i - 1] * 1000 * self.impact elif pre_action == 1: cur_reward = (price[i] - price[i - 1]) * 1000 elif pre_action == 2: cur_reward = (price[i] - price[i - 1] ) * 1000 - price[i - 1] * 2000 * self.impact elif cur_action == 2: ## sell and short 1000 if pre_action == 0: cur_reward = (price[i - 1] - price[i] ) * 1000 - price[i - 1] * 1000 * self.impact elif pre_action == 1: cur_reward = (price[i - 1] - price[i] ) * 1000 - price[i - 1] * 2000 * self.impact elif pre_action == 2: cur_reward = (price[i - 1] - price[i]) * 1000 else: ## no holding if pre_action == 0: cur_reward = 0 elif pre_action == 1: cur_reward = -price[i - 1] * 1000 * self.impact elif pre_action == 2: cur_reward = -price[i - 1] * 1000 * self.impact total_reward = total_reward + cur_reward action = self.learner.query(cur_state, cur_reward) ##print("current reward: ", cur_reward, "next_action: ", action) pre_action = cur_action cur_action = action i = i + 1 j = 0 while total_reward != last_reward: ## initiate the qlearner pre_action = 0 ## track the previous action cur_action = self.learner.querysetstate( int(str(int(features[0, 6])) + str(int(pre_action)))) ##print "total reward is ", total_reward, " last_reward is ", last_reward i = 1 last_reward = total_reward total_reward = 0 while i < row: cur_state = int( str(int(features[i, 6])) + str(int(pre_action))) if cur_action == 1: ## buy and long 1000 if pre_action == 0: cur_reward = (price[i] - price[i - 1]) * 1000 - price[ i - 1] * 1000 * self.impact elif pre_action == 1: cur_reward = (price[i] - price[i - 1]) * 1000 elif pre_action == 2: cur_reward = (price[i] - price[i - 1]) * 1000 - price[ i - 1] * 2000 * self.impact elif cur_action == 2: ## sell and short 1000 if pre_action == 0: cur_reward = (price[i - 1] - price[i]) * 1000 - price[ i - 1] * 1000 * self.impact elif pre_action == 1: cur_reward = (price[i - 1] - price[i]) * 1000 - price[ i - 1] * 2000 * self.impact elif pre_action == 2: cur_reward = (price[i - 1] - price[i]) * 1000 else: ## no holding if pre_action == 0: cur_reward = 0 elif pre_action == 1: cur_reward = -price[i - 1] * 1000 * self.impact elif pre_action == 2: cur_reward = -price[i - 1] * 1000 * self.impact total_reward = total_reward + cur_reward action = self.learner.query(cur_state, cur_reward) ''' if (cur_reward != 0): print "\ndays of ", i, "state: ", cur_state, "action: ", cur_action, "pre-action: ", pre_action print "price[i] ", price[i], " price[i-1] ", price[i - 1], " impact ", self.impact print "current reward: ", cur_reward, "next_action: ", action ''' pre_action = cur_action cur_action = action i = i + 1 j = j + 1
P = self.predict(X) return np.mean(P == Y) def predict(self, X): N, D = X.shape K = len(self.gaussians) P = np.zeros((N, K)) for c, g in self.gaussians.iteritems(): # print "c:", c mean, var = g['mean'], g['var'] P[:,c] = mvn.logpdf(X, mean=mean, cov=var) + np.log(self.priors[c]) return np.argmax(P, axis=1) if __name__ == '__main__': X, Y = get_data(10000) Ntrain = len(Y) / 2 Xtrain, Ytrain = X[:Ntrain], Y[:Ntrain] Xtest, Ytest = X[Ntrain:], Y[Ntrain:] model = NaiveBayes() t0 = datetime.now() model.fit(Xtrain, Ytrain) print "Training time:", (datetime.now() - t0) t0 = datetime.now() print "Train accuracy:", model.score(Xtrain, Ytrain) print "Time to compute train accuracy:", (datetime.now() - t0), "Train size:", len(Ytrain) t0 = datetime.now() print "Test accuracy:", model.score(Xtest, Ytest)
def compute_portvals(order, start_val=100000, commission=0, impact=0): # this is the function the autograder will call to test your code # NOTE: orders_file may be a string, or it may be a file object. Your # code should work correctly with either input # TODO: Your code here order = order.sort_index() #print(order.head(5)) sym = "JPM" #print(sym) start_date = order.index.values[0] end_date = order.index.values[-1] date_range = pd.date_range(start_date, end_date) prices = get_data([sym], date_range) #print(prices.head()) prices['Cash'] = 1.00 #print(prices.head()) trade = pd.DataFrame(index=prices.index, columns=prices.columns) trade = trade.fillna(0) trade['Cash'].iloc[0] = start_val #order = orders_df.iloc[0] #print(order) for idx, row in order.iterrows(): order_price = prices[sym].loc[idx] order_units = row[0] # if row == "BUY": # s = -1 # else: # s = 1 #print(order_units) #print(order_price) trade.loc[idx, sym] += order_units trade.loc[idx, "Cash"] += order_units * order_price * -1 trade.loc[idx, "Cash"] -= commission share_impact = abs(order_units) * order_price * impact trade.loc[idx, "Cash"] -= share_impact for i in range(1, trade.shape[0]): for j in range(0, trade.shape[1]): trade.iloc[i, j] += trade.iloc[i - 1, j] #print(shares) portvals = prices * trade #print(prices.head()) #print(trade.head()) #print(portvals.head()) portvals = portvals.sum(axis=1) #print(portvals.head()) # cum_ret=(portvals[-1]/portvals[0])-1 # daily_ret=(portvals/portvals.shift(1))-1 # avg_daily_ret=daily_ret.mean() # std_daily_ret=daily_ret.std() # sharpe_ratio=np.sqrt(252)*(daily_ret).mean()/std_daily_ret # #print(portvals.shape) # # Compare portfolio against $SPX # print(f"Date Range: {start_date} to {end_date}") # print() # print(f"Sharpe Ratio of Fund: {sharpe_ratio}") # #print(f"Sharpe Ratio of SPY : {sharpe_ratio_SPY}") # print() # print(f"Cumulative Return of Fund: {cum_ret}") # #print(f"Cumulative Return of SPY : {cum_ret_SPY}") # print() # print(f"Standard Deviation of Fund: {std_daily_ret}") # #print(f"Standard Deviation of SPY : {std_daily_ret_SPY}") # print() # print(f"Average Daily Return of Fund: {avg_daily_ret}") # #print(f"Average Daily Return of SPY : {avg_daily_ret_SPY}") # print() # print(f"Final Portfolio Value: {portvals[-1]}") #return rv return portvals
ax2 = ax1.twinx() normed = prices / prices.ix[0] normed_spy = spy_prices / spy_prices.ix[0] normed.plot(ax=ax1, color='orange', lw=1.2, legend=False) normed_spy.plot(ax=ax1, color='green', lw=1.2, legend=False) ratio.plot(ax=ax2, color='blue', lw=1.2) ax1.set_ylabel('Normalized Price') ax2.set_ylabel('Ratio of SPY to JPM') ax1.set_xlabel('Date') plt.grid(True) or_patch = mpatches.Patch(color='orange', label='JPM') green_patch = mpatches.Patch(color='green', label='SPY') blue_patch = mpatches.Patch(color='blue', label='Ratio of SPY to JPM') plt.legend(handles=[or_patch, green_patch, blue_patch], loc='lower left') plt.title('SPY-to-JPM Normalized Ratio Indicator') #plt.show() plt.savefig('spy_jpm_ratio.pdf') if __name__ == "__main__": start = '01-01-2008' end = '12-31-2009' dates = pd.date_range(start, end) prices = get_data(['JPM'], dates).drop(['SPY'], axis=1) spy = get_data(['SPY'], dates, addSPY=False) sma = get_sma(prices, 50)[1] print(sma.join(sma, lsuffix='_sma', rsuffix='_sma')).join(sma).as_matrix() #sma_plot = plot_sma(prices, 50) #bb_plot = plot_bb(prices, 50) #ratio_plot = plot_spy_ratio(prices, spy)
df[['MACD', 'MACD_SIGNAL']].plot(kind='bar', ax=ax) # ax2 = ax.twinx() # ax2.plot(ax.get_xticks(), df[['MACD', 'MACD_SIGNAL']].values) plt.grid() plt.savefig(filename + '.png') if __name__ == "__main__": syms = ['JPM'] sd = dt.datetime(2008, 1, 1) ed = dt.datetime(2009, 12, 31) dates = pd.date_range(sd, ed + dt.timedelta(days=1)) prices_all = get_data(syms, dates) # automatically adds SPY prices = prices_all[syms] # only portfolio symbols prices = prices.fillna(method='ffill', inplace=False) prices = prices.fillna(method='bfill', inplace=False) priceOverSMAValues = priceOverSMA(prices) priceOverEMAClubbed = priceOverEMAClubbed(prices) bband, bbp = bbands(prices) macdVal = macd(prices) ema_fast = ema(prices, window=12) ema_fast.rename(columns={'JPM': 'EMA Fast (12)'}, inplace=True) ema_slow = ema(prices, window=26) ema_slow.rename(columns={'JPM': 'EMA Slow(26)'}, inplace=True) ema = pd.concat([prices, ema_fast, ema_slow], axis=1) rsiV = rsi(prices)
def experiment_2_helper(symbol, start_train, end_train, start_test, end_test, sv=100000, commission=0.0, impact=0.0): np.random.seed(1000) random.seed(1000) slearner = sl.StrategyLearner(impact=impact) slearner.addEvidence(symbol=symbol, sd=start_train, ed=end_train, sv=sv) # In-Sample Experiment 1 # Calculate manual strategy portfolio values manual_trades = man.testPolicy(symbol=symbol, sd=start_test, ed=end_test, sv=sv) manual_port_vals = ms.compute_portvals(manual_trades, start_val=sv, commission=commission, impact=impact) manual_port_vals = manual_port_vals / manual_port_vals.iloc[0, :] manual_port_vals.rename(columns={"Portfolio Value": "Manual"}, inplace=True) # Calculate benchmark portfolio values dates = pd.date_range(start_test, end_test) syms = [symbol] price_range = get_data(syms, dates) # automatically adds SPY benchmark_trades = pd.DataFrame( data=[[symbol, "BUY", 1000]], index=[price_range.index[0], price_range.index[-1]], columns=["Symbol", "Order", "Shares"]) bench_port_vals = ms.compute_portvals(benchmark_trades, start_val=sv, commission=commission, impact=impact) bench_port_vals = bench_port_vals / bench_port_vals.iloc[0, :] bench_port_vals.rename(columns={"Portfolio Value": "Benchmark"}, inplace=True) # Calculate strategy learner portfolio values temp_strategylearner_trades = slearner.testPolicy(symbol=symbol, sd=start_test, ed=end_test, sv=sv) strategylearner_trades = pd.DataFrame( columns=['Order', 'Symbol', 'Shares']) for row_idx in temp_strategylearner_trades.index: nshares = temp_strategylearner_trades.loc[row_idx][0] if nshares == 0: continue order = 'SELL' if nshares < 0 else 'BUY' new_row = pd.DataFrame([ [order, symbol, abs(nshares)], ], columns=['Order', 'Symbol', 'Shares'], index=[ row_idx, ]) strategylearner_trades = strategylearner_trades.append(new_row) strategylearner_port_vals = ms.compute_portvals(strategylearner_trades, start_val=sv, commission=commission, impact=impact) strategylearner_port_vals = strategylearner_port_vals / strategylearner_port_vals.iloc[ 0, :] strategylearner_port_vals.rename(columns={"Portfolio Value": "Strategy"}, inplace=True) port_vals = pd.DataFrame(bench_port_vals["Benchmark"], index=bench_port_vals.index) port_vals["Manual"] = manual_port_vals["Manual"] port_vals["Strategy"] = strategylearner_port_vals["Strategy"] port_vals.fillna(method='ffill', inplace=True) mcr, madr, msddr, msr = id.calculate_portfolio_metrics(manual_port_vals) bcr, badr, bsddr, bsr = id.calculate_portfolio_metrics(bench_port_vals) scr, sadr, ssddr, ssr = id.calculate_portfolio_metrics( strategylearner_port_vals) return mcr, madr, msddr, msr, bcr, badr, bsddr, bsr, scr, sadr, ssddr, ssr
def test_run(): # Set up start_date = dt.datetime(2009, 01, 01) end_date = dt.datetime(2011, 01, 01) symbols = ['GOOG', 'AAPL', 'GLD', 'XOM'] allocations = [0.2, 0.3, 0.4, 0.1] start_val = 1000000 risk_free_rate = 0.0 sample_freq = 252 # New Function, call this function here dates = pd.date_range(start_date, end_date) prices_all = util.get_data(symbols, dates) prices = prices_all[symbols] # only portfolio symbols prices_SPY = prices_all["SPY"] # Only SPY, for comparison later # Get Daily Portfolio Value prices_SPY = prices_SPY / prices_SPY.ix[0, :] # Normalizes prices_SPY normed_prices = prices / prices.ix[0, :] alloced = normed_prices * allocations port_vals = alloced.sum(axis=1) daily_returns = compute_daily_returns(port_vals) # Get Portfolio Statistics cum_ret = (port_vals[-1] / port_vals[0]) - 1 # Cumulative Returns avg_daily_ret = daily_returns.mean() std_daily_ret = daily_returns.std() # Do if statement to be sure this is daily, else yearly / monthly # Below is Daily: # avg_daily_risk_free_rate = (1.0 + yearly_risk_free_rate)**(1. / 252) - 1 # ASK ABOUT Daily RISK FREE RATE USED OR NOT # SR = (avg_daily_ret - avg_daily_risk_free_rate) / std_daily_ret # K = np.sqrt(252) # SRannualized = K * SR # #Below is Weekly: # avg_weekly_risk_free_rate = (1.0 + yearly_risk_free_rate)**(1. / 52) - 1 # ASK ABOUT Weekly RISK FREE RATE USED OR NOT # SR = (avg_weekly_ret - avg_weekly_risk_free_rate) / std_weekly_ret # ASK ABOUT AVG_MONTHLY_RETURNS / STD_MONTHLY_RETURNS # K = np.sqrt(12) # SRannualized = K * SR # #Below is Monthly: # avg_monthly_risk_free_rate = (1.0 + yearly_risk_free_rate)**(1. / 12) - 1 # ASK ABOUT Monthly RISK FREE RATE USED OR NOT ## Question: for monthly do I simply replace 252 with 12? what about the 1.0? # SR = (avg_daily_ret - avg_monthly_risk_free_rate) / std_monthly_ret # ASK ABOUT AVG_MONTHLY_RETURNS / STD_MONTHLY_RETURNS # K = np.sqrt(12) # SRannualized = K * SR #Below is Yearly: # risk_free_rate = risk_free_rate # ASK ABOUT Annual RISK FREE RATE USED OR NOT # SR = (avg_yearly_return - risk_free_rate) / std_yearly_return # ASK ABOUT this being ave yearly? # SRannualized = SR rfr_freq_calc = ((1.0 + float(risk_free_rate))**(1. / sample_freq)) - 1 # SR = (daily_returns - risk_free_rate).mean() / (np.sqrt(sample_freq) * (daily_returns).std()) # 0.00870688461805 SR = (float(avg_daily_ret) - float(rfr_freq_calc)) / float(std_daily_ret) K = np.sqrt(sample_freq) SRannualized = K * SR end_value = start_val * (cum_ret + 1) # Compare daily portfolio value with SPY using a normalized plot df_temp = pd.concat([port_vals, prices_SPY], keys=['Portfolio', 'SPY'], axis=1) # util.plot_data(df_temp, ylabel="Normalized Price") # plt.show() # Print statistics print "Start Date:", start_date print "End Date:", end_date print "Symbols:", symbols print "Allocations:", allocations print "Sharpe Ratio:", SRannualized print "Volatility (stdev of daily returns):", std_daily_ret print "Average Daily Return:", avg_daily_ret print "Cumulative Return:", cum_ret
def getPrices(self, startDate, endDate, symbolList): dateRange = pd.date_range(startDate, endDate) prices = util.get_data(symbolList, dateRange) self.prices = prices[symbolList] self.normalizedPrices = self.prices / self.prices.ix[0] self.prevTransaction = 0.0
def main(args): assert args.dataset in ['mnist', 'cifar', 'svhn'], \ "Dataset parameter must be either 'mnist', 'cifar' or 'svhn'" assert args.attack in ['fgsm', 'bim-a', 'bim-b', 'jsma', 'cw-l2', 'all', 'cw-lid'], \ "Attack parameter must be either 'fgsm', 'bim-a', 'bim-b', " \ "'jsma', 'cw-l2', 'all' or 'cw-lid' for attacking LID detector" #model_file = os.path.join(PATH_DATA, "model_%s.h5" % args.dataset) model_file = "../model/densenet_cifar10.h5df" print(model_file) assert os.path.isfile(model_file), \ 'model file not found... must first train model using train_model.py.' if args.dataset == 'svhn' and args.attack == 'cw-l2': assert args.batch_size == 16, \ "svhn has 26032 test images, the batch_size for cw-l2 attack should be 16, " \ "otherwise, there will be error at the last batch-- needs to be fixed." print('Dataset: %s. Attack: %s' % (args.dataset, args.attack)) # Create TF session, set it as Keras backend init_op = tf.global_variables_initializer() sess = tf.Session() sess.run(init_op) #sess.run(tf.local_variables_initializer()) sess.run(tf.initialize_all_variables()) tf.keras.backend.set_session(sess) if args.attack == 'cw-l2' or args.attack == 'cw-lid': warnings.warn("Important: remove the softmax layer for cw attacks!") # use softmax=False to load without softmax layer if args.model == 'dense': model = densenet.create_dense_net(10, False, (32, 32, 3), 40, 3, 12, 16, dropout_rate=0) optimizer = Adam( lr=1e-4) # Using Adam instead of SGD to speed up training model.compile(loss='categorical_crossentropy', optimizer=optimizer, metrics=["accuracy"]) if args.dataset == 'mnist': model = get_model(args.dataset, softmax=False) model.compile(loss=cross_entropy, optimizer='adadelta', metrics=['accuracy']) if args.dataset == 'svhn': model = RCNN.get_model(False) model.load_weights(model_file) else: if args.model == 'dense': model = densenet.create_dense_net(10, True, (32, 32, 3), 40, 3, 12, 16, dropout_rate=0) optimizer = Adam( lr=1e-4) # Using Adam instead of SGD to speed up training model.compile(loss='categorical_crossentropy', optimizer=optimizer, metrics=["accuracy"]) model.load_weights(model_file) elif args.dataset == 'svhn': model = RCNN.get_model(True) model.load_weights(model_file) else: model = load_model(model_file) _, _, X_test, Y_test = get_data(args.dataset) score = model.evaluate(X_test, Y_test, batch_size=args.batch_size, verbose=0) print("Accuracy on the test set: %0.2f%%" % (100 * score[1])) if args.attack == 'cw-lid': # white box attacking LID detector - an example X_test = X_test[:1000] Y_test = Y_test[:1000] if args.attack == 'all': # Cycle through all attacks for attack in ['fgsm', 'bim-a', 'bim-b', 'jsma', 'cw-l2']: craft_one_type(sess, model, X_test, Y_test, args.dataset, attack, args.batch_size) else: # Craft one specific attack type craft_one_type(sess, model, X_test, Y_test, args.dataset, args.attack, args.batch_size) print('Adversarial samples crafted and saved to %s ' % PATH_DATA) _, acc = model.evaluate(X_test, Y_test, batch_size=args.batch_size, verbose=0) print("After crafting, Accuracy on the test set: %0.2f%%" % (100 * acc)) sess.close()
nnodes = [256, 128, 64] nmessage = 3 # make the model mus = np.linspace(0.8, 5.0, 43) etas = np.array([-100.0] * 43) model = util.get_model(mus, etas, pad_dim, nelem, nembed, nnodes, nmessage) model.compile(optimizer=tf.keras.optimizers.Adam(0.001), loss=['mse', util.mse_mp, util.mse_mp, util.mse_mp], loss_weights=[1.0, 1.0, 1.0, 1.0], metrics=[util.mae_mp]) print(model.summary()) # load data RT, ZT, yT = util.get_data(f'data/{args.dataset_train}.pkl', pad_dim) RV, ZV, yV = util.get_data(f'data/{args.dataset_val}.pkl', pad_dim) #RT, ZT, yT = RT[:800], ZT[:800], yT[:800] #RV, ZV, yV = RV[:800], ZV[:800], yV[:800] # monopole yV_ = yV[:, :, 0] # dipole (mu_x, mu_y, mu_z) yV_i_ = yV[:, :, 1:4] # quadrupole diagonal (Q_xx, Q_yy, Q_zz) yV_ii_ = yV[:, :, [4, 7, 9]] # quadrupole off-diagonal (Q_xy, Q_xz, Q_yz)
import seaborn as sns from sklearn.model_selection import train_test_split from sklearn.linear_model import LinearRegression from sklearn import metrics from munkres import Munkres, print_matrix from sklearn import metrics from sklearn.metrics import cluster from sklearn.cluster import KMeans from util import get_data from util import prepare_data from util import prepare_data df = get_data() cat_df_list = list(df.select_dtypes(include=['object'])) num_df_list = list(df.select_dtypes(include=['float64', 'int64'])) km_scores = [] inertia = [] km_silhouette = [] vmeasure_score = [] db_score = [] y = df["readmitted"] X = df[num_df_list] #X.drop("readmitted", inplace=True, axis=1) scaler = StandardScaler() X = StandardScaler().fit_transform(X)
self.max_depth = max_depth def fit(self, X, Y): self.root = TreeNode(max_depth=self.max_depth) self.root.fit(X, Y) def predict(self, X): return self.root.predict(X) def score(self, X, Y): P = self.predict(X) return np.mean(P == Y) if __name__ == '__main__': X, Y = get_data() # try donut and xor # from sklearn.utils import shuffle # X, Y = get_xor() # # X, Y = get_donut() # X, Y = shuffle(X, Y) # only take 0s and 1s since we're doing binary classification idx = np.logical_or(Y == 0, Y == 1) X = X[idx] Y = Y[idx] # split the data Ntrain = len(Y) / 2 Xtrain, Ytrain = X[:Ntrain], Y[:Ntrain]
def optimize_portfolio( sd=dt.datetime(2008, 1, 1), ed=dt.datetime(2009, 1, 1), syms=["GOOG", "AAPL", "GLD", "XOM"], gen_plot=False, ): """ This function should find the optimal allocations for a given set of stocks. You should optimize for maximum Sharpe Ratio. The function should accept as input a list of symbols as well as start and end dates and return a list of floats (as a one-dimensional numpy array) that represents the allocations to each of the equities. You can take advantage of routines developed in the optional assess portfolio project to compute daily portfolio value and statistics. :param sd: A datetime object that represents the start date, defaults to 1/1/2008 :type sd: datetime :param ed: A datetime object that represents the end date, defaults to 1/1/2009 :type ed: datetime :param syms: A list of symbols that make up the portfolio (note that your code should support any symbol in the data directory) :type syms: list :param gen_plot: If True, optionally create a plot named plot.png. The autograder will always call your code with gen_plot = False. :type gen_plot: bool :return: A tuple containing the portfolio allocations, cumulative return, average daily returns, standard deviation of daily returns, and Sharpe ratio :rtype: tuple """ # Read in adjusted closing prices for given symbols, date range dates = pd.date_range(sd, ed) prices_all = get_data(syms, dates) # automatically adds SPY prices = prices_all[syms] # only portfolio symbols prices_SPY = prices_all["SPY"] # only SPY, for comparison later # find the allocations for the optimal portfolio n = len(syms) allocs = [1 / n] * n result = spo.minimize(f, allocs, args=prices, method='SLSQP', bounds=[(0, 1)] * n, constraints=({ 'type': 'eq', 'fun': lambda x: 1.0 - np.sum(x) })) optimum_allocs = result.x # note that the values here ARE NOT meant to be correct for a test case cr, adr, sddr, sr = assess_portfolio( optimum_allocs, prices) # add code here to compute stats # Get daily portfolio value port_val = get_port_val( optimum_allocs, prices) # add code here to compute daily portfolio values # Compare daily portfolio value with SPY using a normalized plot if gen_plot: # add code to plot here df_temp = pd.concat([port_val, prices_SPY], keys=["Portfolio", "SPY"], axis=1) # df_temp = np.divide(df_temp, df_temp.iloc[0].values) # df_temp.iloc[0, :] = 0 plt.figure(1) ax = df_temp.plot(title='Portfolio Value ' + str(syms) + ' and SPY') ax.set_ylabel('Normalized Prices') ax.set_xlabel('Dates') plt.savefig('plot.png') pass return optimum_allocs, cr, adr, sddr, sr
def _get_data(symbol, dates, column): data = get_data([symbol], dates, colname=column) data.fillna(method='ffill', inplace=True) data.fillna(method='bfill', inplace=True) return data
def test_code(): startDate = dt.datetime(2008, 1, 1) endDate = dt.datetime(2009, 12, 31) dateRange = pd.date_range(startDate, endDate) symbol = 'JPM' prices = get_data([symbol], dateRange) indicators = get_indicators(prices, symbol) """ Graph for SMA """ sma = indicators[['price', 'SMA', 'price_SMA']] fig, ax = plt.subplots() ax.plot(sma['price'], label="Price") ax.plot(sma['SMA'], label="20-Day SMA", linewidth=2) ax.plot(sma['price_SMA'], label="Price/SMA", linewidth=0.85) ax.set(xlabel="Jan. 1, 2008 - Dec. 31, 2009", ylabel="Value (Normalized)", title="20-Day Simple Moving Average for JPM") ax.set_xlim(startDate, endDate) ax.title.set_fontsize(14) ax.xaxis.label.set_fontsize(14) ax.yaxis.label.set_fontsize(14) handles, labels = ax.get_legend_handles_labels() ax.legend(handles, labels) fig.autofmt_xdate() plt.show() """ Graph for Bollinger Bands""" bands = indicators[['price', 'upper band', 'lower band']] fig, ax = plt.subplots() ax.plot(bands['price'], label="Price") ax.plot(bands['upper band'], label="Upper Band", linewidth=0.85, linestyle='dashed') ax.plot(bands['lower band'], label="Lower Band", linewidth=0.85, linestyle='dashed') ax.set(xlabel="Jan. 1, 2008 - Dec. 31, 2009", ylabel="Value (Normalized)", title="Bollinger Bands for JPM Based on 20-Day Moving Average") ax.set_xlim(startDate, endDate) ax.title.set_fontsize(14) ax.xaxis.label.set_fontsize(14) ax.yaxis.label.set_fontsize(14) handles, labels = ax.get_legend_handles_labels() ax.legend(handles, labels) fig.autofmt_xdate() plt.show() """ Graph for Bollinger Band Value """ bb_value = indicators[['price', 'bb value']] fig, ax = plt.subplots() #ax.plot(bb_value['price'], label="Price") ax.plot(bb_value['bb value'], label="Bollinger Band Value") ax.set(xlabel="Jan. 1, 2008 - Dec. 31, 2009", ylabel="Value (Normalized)", title="Bollinger Band Value for JPM Based on 20-Day Moving Average") ax.set_xlim(startDate, endDate) ax.title.set_fontsize(14) ax.xaxis.label.set_fontsize(14) ax.yaxis.label.set_fontsize(14) ax.axhline(y=0, linewidth=0.85, linestyle='dashed', color='0.5') ax.axhline(y=1, linewidth=0.75, linestyle='dashed', color='0.5') ax.axhline(y=-1, linewidth=0.75, linestyle='dashed', color='0.5') handles, labels = ax.get_legend_handles_labels() ax.legend(handles, labels) fig.autofmt_xdate() plt.show() """ Graph for Momentum """ momentum = indicators[['price', 'momentum']] fig, ax = plt.subplots() ax.plot(momentum['price'], label="Price") ax.plot(momentum['momentum'], label="momentum") ax.set(xlabel="Jan. 1, 2008 - Dec. 31, 2009", ylabel="Stock Price (Normalized)", title="Momentum for JPM Over a 20-Day Period") ax.set_xlim(startDate, endDate) ax.title.set_fontsize(14) ax.xaxis.label.set_fontsize(14) ax.yaxis.label.set_fontsize(14) handles, labels = ax.get_legend_handles_labels() ax.legend(handles, labels) fig.autofmt_xdate() plt.show() """ Graph for Volatility """ vol = indicators[['price', 'volatility']] fig, ax = plt.subplots() ax.plot(vol['price'], label="Price") ax.plot(vol['volatility'], label="Volatility") ax.set(xlabel="Jan. 1, 2008 - Dec. 31, 2009", ylabel="Value (Normalized)", title="Volatility for JPM Based on 20-Day Moving Average") ax.set_xlim(startDate, endDate) ax.title.set_fontsize(14) ax.xaxis.label.set_fontsize(14) ax.yaxis.label.set_fontsize(14) y = np.arange(0, 1.3, 0.1) plt.yticks(y) plt.grid() handles, labels = ax.get_legend_handles_labels() ax.legend(handles, labels) fig.autofmt_xdate() plt.show() """ Graph for RSI w/ 20 day rolling mean""" rsi = indicators[['price', 'RSI_SMA', 'RSI_EMWA', 'prices_unnormed']] fig, ax = plt.subplots() ax.plot(rsi['prices_unnormed'], label="Price") ax.plot(rsi['RSI_SMA'], label="RSI Simple Moving Average", linewidth=0.85) ax.plot(rsi['RSI_EMWA'], label="RSI Exponential Moving Average", linewidth=0.85) ax.set( xlabel="Jan. 1, 2008 - Dec. 31, 2009", ylabel="Value", title= "RSI for JPM Based on Exponential Moving Average and \n 20-Day Simple Moving Average" ) ax.set_xlim(dt.datetime(2008, 1, 30), endDate) ax.title.set_fontsize(14) ax.xaxis.label.set_fontsize(14) ax.yaxis.label.set_fontsize(14) y = np.arange(0, 90, 10) plt.yticks(y) ax.axhline(y=50, linewidth=0.85, linestyle='dashed', color='0.5') ax.axhline(y=30, linewidth=0.75, color='0.5') ax.axhline(y=70, linewidth=0.75, color='0.5') handles, labels = ax.get_legend_handles_labels() ax.legend(handles, labels) fig.autofmt_xdate() plt.show()
# -*- coding: utf-8 -*- #!/usr/bin/env python # -*- coding: utf-8 -*- import os from deepy.networks import AutoEncoder from deepy.layers import RNN, Dense from deepy.trainers import SGDTrainer, LearningRateAnnealer from util import get_data, VECTOR_SIZE, SEQUENCE_LENGTH HIDDEN_SIZE = 50 model_path = os.path.join(os.path.dirname(__file__), "models", "rnn1.gz") if __name__ == '__main__': model = AutoEncoder(input_dim=VECTOR_SIZE, input_tensor=3) model.stack_encoders(RNN(hidden_size=HIDDEN_SIZE, input_type="sequence", output_type="one")) model.stack_decoders(RNN(hidden_size=HIDDEN_SIZE, input_type="one", output_type="sequence", steps=SEQUENCE_LENGTH), Dense(VECTOR_SIZE, 'softmax')) trainer = SGDTrainer(model) annealer = LearningRateAnnealer(trainer) trainer.run(get_data(), controllers=[annealer]) model.save_params(model_path)
#pdb.set_trace() return weighted_population[-1] if __name__ == "__main__": #TODO: Fix main method so we can use it to test the GA start_val = 100000 symbol = "GOOG" #In-sample period dates = [dt.datetime(2011, 1, 1), dt.datetime(2011, 12, 31)] #Benchmark benchmark_df = util.get_data([symbol], pd.date_range(dates[0], dates[1]), addSPY=False).dropna() #Benchmark trades benchmark_trades_df = pd.DataFrame( data=[(benchmark_df.index.min(), symbol, "BUY", 1000), (benchmark_df.index.max(), symbol, "SELL", 1000)], columns=['Date', 'Symbol', 'Order', 'Shares']) benchmark_trades_df.set_index('Date', inplace=True) gen_alg = GeneticAlgorithm(symbol=symbol, dates=dates, start_val=start_val) params, sharpe_ratio = gen_alg.start_ga() pdb.set_trace() manual_strat = manstrat.ManualStrategy() trades_df = manual_strat.testPolicy(symbol,
def compute_portvals(orders_file="./orders/orders.csv", start_val=1000000): # this is the function the autograder will call to test your code # TODO: Your code here orders_df = pd.read_csv(orders_file, index_col='Date', parse_dates=True, na_values=['nan']) #sort by index to get order right orders_df = orders_df.sort_index() print "ORDER BOOK", orders_df #convert index to datetime orders_df.index = pd.to_datetime(orders_df.index) #get start date, end date of order book sd = orders_df.index.values[0] ed = orders_df.index.values[-1] #get all symbols in order book def scrape_symbols(df): symbol_list = [] for i in range(0, (df.shape[0])): symbol = df.iloc[i, 0] if symbol in symbol_list: pass else: symbol_list.append(symbol) return symbol_list syms = scrape_symbols(orders_df) #create a dataframe based on the order book that contains a column for each stock listed, plus SPY, and cash column #dummy values for now #sd = dt.datetime(2010, 1, 1) #ed= dt.datetime(2010, 12, 31) dates = pd.date_range(sd, ed) prices_all = get_data(syms, dates) # automatically adds SPY # add cash column for later prices_all['Cash'] = np.ones(prices_all.shape[0]) #duplicate price df into a units df and intialize to zero units_all = prices_all * 0.0 #initialize starting cash position units_all.iloc[0, -1] = start_val order = orders_df.iloc[0] #adjust units_all to show how stock units and cash are changing over time w/orders for index2, row2 in orders_df.iterrows(): stock_name = row2[0] order_price = prices_all[stock_name].ix[index2] order_units = row2[2] if row2[1] == "BUY": pos_multplr = -1 else: pos_multplr = 1 #update units_all with order units_all.loc[index2, stock_name] += order_units * pos_multplr * -1 units_all.loc[index2, "Cash"] += order_units * order_price * pos_multplr print units_all.head() #now update units_all to be full accounting table of units over time for i in range(1, units_all.shape[0]): for j in range(0, units_all.shape[1]): new_val = units_all.iloc[i, j] + units_all.iloc[i - 1, j] units_all.iloc[i, j] = new_val #finally get port_vals port_vals = prices_all * units_all port_vals["port_val"] = port_vals.sum(axis=1) port_vals["daily_returns"] = (port_vals["port_val"][1:] / port_vals["port_val"][:-1].values) - 1 port_vals["daily_returns"][0] = 0 #now we have the port_val by day so can calculate common statistics def compute_pf_stats(df, rfr, sf): # Get portfolio statistics (note: std_daily_ret = volatility) # code for stats cr = (df.ix[-1, -2] - df.ix[0, -2]) / df.ix[0, -2] # adr adr = df["daily_returns"][1:].mean() # sddr, std deviation of daily returns sddr = df["daily_returns"][1:].std() # Sharpe Ratio sr = (sf**(1.0 / 2.0) * (adr - rfr)) / sddr # Compare daily portfolio value with SPY using a normalized plot return cr, adr, sddr, sr cr, adr, sddr, sr = compute_pf_stats(port_vals, rfr=0, sf=252) #update row based on orders that day #update port_vals to only be one column of values port_val = port_vals.iloc[:, -2:-1] print "cr, adr, sddr, sr", cr, adr, sddr, sr return port_val
def testPolicy(self, symbol = "IBM", \ sd=dt.datetime(2009,1,1), \ ed=dt.datetime(2010,1,1), \ sv = 10000): # compute the technical indicators sym = [symbol] momentum3, sma_ratio3, bbp3 = ind.indicators(sd, ed, sym, 70, False) momentum14, sma_ratio14, bbp14 = ind.indicators(sd, ed, sym, 14, False) # create feature array and discretize the values of the features row = momentum3.values[:, 0].size - 70 features = np.zeros((row, 7)) features[:, 0] = momentum3.ix[70:, symbol].values features[:, 1] = sma_ratio3.ix[70:, symbol].values features[:, 2] = bbp3.ix[70:, symbol].values features[:, 3] = momentum14.ix[70:, symbol].values features[:, 4] = sma_ratio14.ix[70:, symbol].values features[:, 5] = bbp14.ix[70:, symbol].values fmin = features.min(axis=0) fmax = features.max(axis=0) for i in range(6): bins = np.linspace(fmin[i], fmax[i], 9) features[:, i] = np.digitize(features[:, i], bins) # print features[0:30, ] for i in range(row): features[i, 6] = int( str(int(features[i, 0])) + str(int(features[i, 1])) + str(int(features[i, 2])) + str(int(features[i, 3])) + str(int(features[i, 4])) + str(int(features[i, 5]))) # print features[0:30, ] # build a set of trades dates = pd.date_range(sd, ed) prices_all = ut.get_data([symbol], dates) # automatically adds SPY holdings = prices_all[[ symbol, ]] # only portfolio symbols holdings.values[:, :] = 0 # set them all to nothing pre_action = 0 for i in (range(row)): cur_action = self.learner.querysetstate( int(str(int(features[i, 6])) + str(int(pre_action)))) ##print "row is ", i, " cur_action is ", cur_action if cur_action == 1: ## buy and long 1000 holdings.values[70 + i, :] = 1000 elif cur_action == 2: ## sell and short 1000 holdings.values[70 + i, :] = -1000 else: ## no holding holdings.values[70 + i, :] = 0 pre_action = cur_action trades = holdings.copy() trades[1:] = trades.diff() if self.verbose: print type(trades) # it better be a DataFrame! if self.verbose: print trades if self.verbose: print prices_all return trades
verbose = False start_val = 100000 benchmarkSymbol = "JPM" commission = 0.00 impact = 0.0 num_shares = 1000 print "In-sample training period" start_date = dt.datetime(2008, 1, 1,0,0) end_date = dt.datetime(2009, 12, 31,0,0) # Create benchmark data series. Benchmark is a portfolio starting with # $100,000, investing in 1000 shares of symbol and holding that position dates = pd.date_range(start_date, end_date) prices_all = get_data([benchmarkSymbol], dates).dropna() indexDates = prices_all.index zeroes = [0.0] * len(prices_all) benchmarkTrades = pd.DataFrame({"Date": indexDates, benchmarkSymbol: zeroes}) benchmarkTrades = benchmarkTrades.set_index('Date') benchmarkTrades.iloc[0][0] = 1000 #set to buy LONG on day1 benchmarkTrades.iloc[(len(prices_all)-1)][0] = -1000 # set to sell all on the last day benchmarkOrders = pd.Series(index=indexDates, data=zeroes) benchmarkOrders.iloc[0] = 1.0 # set to buy LONG on day1 benchmarkOrders.iloc[(len(prices_all)-1)] = -1.0 # set to sell all on the last day # Train and test a StrategyLearner stl = StrategyLearner(num_shares=num_shares, impact=impact, commission=commission, verbose=True, num_states=3000, num_actions=3)
def test_dot_real(data_dict): def get_iter(path, data_shape, batch_size): data_train = mx.io.LibSVMIter(data_libsvm=path, data_shape=data_shape, batch_size=batch_size) data_iter = iter(data_train) return data_iter data_dir = os.path.join(os.getcwd(), 'data') path = os.path.join(data_dir, data_dict['data_name']) if not os.path.exists(path): get_data(data_dir, data_dict['data_name'], data_dict['url'], data_dict['data_origin_name']) assert os.path.exists(path) k = data_dict['feature_dim'] m = data_dict['m'] density = estimate_density(path, data_dict['feature_dim']) mini_path = os.path.join(data_dir, data_dict['data_mini']) if not os.path.exists(mini_path): os.system("head -n 2000 %r > %r" % (path, mini_path)) assert os.path.exists(mini_path) print "Running Benchmarking on %r data" % data_dict['data_mini'] for batch_size in data_dict[ 'batch_size']: # iterator through different batch size of choice print "batch_size is %d" % batch_size # model data_shape = (k, ) train_iter = get_iter(mini_path, data_shape, batch_size) weight = mx.nd.random_uniform(low=0, high=1, shape=(k, m)) csr_data = [] dns_data = [] num_batch = 0 for batch in train_iter: data = train_iter.getdata() csr_data.append(data) dns_data.append(data.tostype('default')) num_batch += 1 bag_of_data = [csr_data, dns_data] num_repeat = 5 costs = [] for d in bag_of_data: weight.wait_to_read() cost = 0. count = 0 for d_batch in d: d_batch.wait_to_read() cost += measure_cost(num_repeat, mx.nd.dot, d_batch, weight) count += 1 costs.append(cost / count) t_sparse = costs[0] t_dense = costs[1] ratio = t_dense / t_sparse print('density(%)\tn\tm\tk\tt_dense/t_sparse\tt_dense\tt_sparse') fmt = "%0.4f\t\t%d\t%d\t%d\t%0.2f\t\t\t%0.4f\t%0.6f" print(fmt % (density * 100, batch_size, m, k, ratio, t_dense, t_sparse))
def addEvidence(self, symbol="IBM", sd=dt.datetime(2008, 1, 1), ed=dt.datetime(2009, 12, 31), sv=10000): """Creates a QLearner, and trains it for trading. Inputs / Parameters: symbol: The stock symbol to act on sd: A datetime object that represents the start date ed: A datetime object that represents the end date sv: Start value of the portfolio which contains only the one symbol """ # Get adjusted close prices for the given symbol on the given date range dates = pd.date_range(sd, ed) prices_all = get_data([symbol], dates) #includes SPY due to util function pricesDF = prices_all[[symbol]] # only the symbol # Get features and thresholds indicatorsDF = self.getIndicators(pricesDF[symbol]) thresholds = self.setThresholds(indicatorsDF, self.num_steps) cum_returns = [] for epoch in range(1, self.epochs + 1): # Initial position is holding nothing position = self.CASH # Create a series that captures order signals based on actions taken orders = pd.Series(index=indicatorsDF.index) # Iterate over the data by date for day, date in enumerate(indicatorsDF.index): # Get a state state = self.getState(indicatorsDF.loc[date], thresholds) # On the first day, get an action without updating the Q-table if date == indicatorsDF.index[0]: action = self.QLearner.querysetstate(state) newPos = float(action - 1) # On other days, calculate the reward and update the Q-table else: prev_price = pricesDF[symbol].iloc[day - 1] curr_price = pricesDF[symbol].loc[date] reward = self.calcDailyReward(prev_price,curr_price, position) action = self.QLearner.query(state, reward) newPos = float(action - 1) # Add new_pos to orders, update current position orders.loc[date] = newPos position = newPos #get the portfolio values (which also creates the tradesDF and pricesDF, in the background portvals, tradesDF, holdingsDF, pricesDF = marketsimcode.compute_portvals_single_stock(ordersDF=orders, symbol=symbol, start_val=sv, commission=self.commission, impact=self.impact, num_shares = self.num_shares) cum_return = marketsimcode.compute_portfolio_stats(portvals)[0] cum_returns.append(cum_return) # Check for convergence after running for at least 30 epochs if epoch > 20: # Stop if the cum_return doesn't improve for 10 epochs if self.checkConvergence(cum_returns): break #print "orders series from learner", orders #print "tradesDF from learner: ", tradesDF if self.verbose: plt.plot(cum_returns) plt.xlabel("Epoch") plt.ylabel("Cumulative return (%)") # plt.show() plt.savefig('result.png') plt.switch_backend('Agg')
def dU(beta): return mp.dot(X.T, (mp.exp(mp.dot(X,beta))/(1+mp.exp(mp.dot(X,beta))) - y)) + beta/alpha D = X.shape[1] q = mp.zeros((D, 1), dtype=mp.float32) out = mp.zeros((n_iter, D), dtype=mp.float32) for i in range(n_iter): q = hmc(U, dU, epsilon, L, q) out[i,:] = mp.ravel(q) return out with cpu() if args.mode == 'cpu' else gpu(0): with open('params.json') as params_file: out = {} params = json.load(params_file) X_train, y_train, X_test, y_test = get_data() X_train = mp.array(X_train) y_train = mp.array(y_train) X_test = mp.array(X_test) y_test = mp.array(y_test) y_train = mp.expand_dims(y_train, 1) z = lr_hmc(y_train, X_train, params['epsilon'], params['n_leaps'], params['alpha'], 1) # Warm-up t = time.perf_counter() z = lr_hmc(y_train, X_train, params['epsilon'], params['n_leaps'], params['alpha'], params['n_iter']) t = time.perf_counter() - t out[f'minpy-{args.mode}'] = t coef_ = mp.mean(z[params['burn_in']:], 0) acc = mp.mean((sigmoid(mp.dot(X_test, coef_)) > 0.5) == y_test)[0] assert acc > 0.8 print(json.dumps(out))
import os import math import matplotlib.pyplot as plt from util import get_data, plot_data def tech_indicators(syms=['JPM'], s_date=dt.datetime(2008, 01, 01), e_date=dt.datetime(2009, 12, 31)): start_date = s_date end_date = e_date symbols = syms lookback = 7 dfprices = get_data(symbols, pd.date_range(start_date, end_date)) dfprices = dfprices.drop('SPY', axis=1) dfprices = dfprices / dfprices.iloc[0] dfsma_prices = dfprices.rolling(window=lookback, min_periods=lookback).mean() dfsma = dfprices / dfsma_prices dfsma_cross = pd.DataFrame(0, index=dfsma.index, columns=dfsma.columns) dfsma_cross[dfsma >= 1.0] = 1 dfsma_cross[1:] = dfsma_cross.diff() dfsma_cross.iloc[0] = 0 dfmomentum = (dfprices / dfprices.shift(lookback - 1)) - 1
def compute_portvals(orders_df, start_val=1000000, commission=9.95, impact=0.005): # this is the function the autograder will call to test your code # NOTE: orders_file may be a string, or it may be a file object. Your # code should work correctly with either input # TODO: Your code here #Load in: # - orders # - dates # - list of stocks to call get_data on #orders = pd.read_csv(orders_file, index_col='Date', parse_dates=True, na_values=['nan'] ).sort_index() orders = orders_df.sort_index() stocks = orders['Symbol'].unique().tolist() start_date = orders.index[0] end_date = orders.index[-1] dates = pd.date_range(start_date, end_date) orders.fillna(method='ffill', inplace=True) orders.fillna(method='backfill', inplace=True) #get data and fill na data = get_data(stocks, dates) data.fillna(method='ffill', inplace=True) data.fillna(method='backfill', inplace=True) data["cash_change"] = 1.0 #df that has change in number of shares by day for each asset # df also includes change in cash # make another df called port to aggregate portfolio #share_chg = pd.DataFrame(np.zeros((data.shape)), data.index, data.columns) share_chg = data.copy() port = data.copy() for col in share_chg.columns: share_chg[col].values[:] = 0 port[col].values[:] = 0 rows = orders.iterrows() for idx, row in rows: ticker = row[0] ord_type = row[1] shares = row[2] value = data.loc[idx, ticker] * shares cost = value * impact + commission curr_shares = share_chg.loc[idx, ticker] curr_cash = share_chg.loc[idx, "cash_change"] if ord_type == "SELL": share_chg.loc[idx, ticker] = curr_shares - shares share_chg.loc[idx, "cash_change"] = curr_cash + (value - cost) elif ord_type == "BUY": share_chg.loc[idx, ticker] = curr_shares + shares share_chg.loc[idx, "cash_change"] = curr_cash - (value + cost) port.iloc[0, :-1] = share_chg.iloc[0, :-1] port.iloc[0, -1] = share_chg.iloc[0, -1] + start_val for count in range(1, len(port.index)): port.iloc[count] = port.iloc[count - 1] + share_chg.iloc[count] port = (data * port).sum(axis=1) portvals = pd.DataFrame(port, index=port.index, columns=["portfolio_totals"]) rv = pd.DataFrame(index=portvals.index, data=portvals.values) return portvals return rv
def compute_portvals( symbol, orders, start_val=1000000, commission=9.95, impact=0.005, ): """ Computes the portfolio values. :param orders_file: Path of the order file or the file object :type orders_file: str or file object :param start_val: The starting value of the portfolio :type start_val: int :param commission: The fixed amount in dollars charged for each transaction (both entry and exit) :type commission: float :param impact: The amount the price moves against the trader compared to the historical data at each transaction :type impact: float :return: the result (portvals) as a single-column dataframe, containing the value of the portfolio for each trading day in the first column from start_date to end_date, inclusive. :rtype: pandas.DataFrame """ # this is the function the autograder will call to test your code # NOTE: orders_file may be a string, or it may be a file object. Your # code should work correctly with either input # Read orders orders.sort_index(ascending=True, inplace=True) start_date = orders.index.min() end_date = orders.index.max() symbols = [symbol] columns = np.append(symbols, 'Cash') # Prices - [Date, Symbol1, Symbol2, ..., Cash] dates = pd.date_range(start_date, end_date) prices = get_data(symbols, dates) prices = pd.DataFrame(prices[symbols]) prices['Cash'] = 1 # Trades - [Date, Symbol1, Symbol2, ..., Cash] - captures changes for every day trades = pd.DataFrame(index=prices.index, columns=columns, data=np.zeros(prices.shape)) # Populate "trades" dataframe for date, row in orders.iterrows(): order = row[0] shares = abs(order) price = prices.loc[date, symbol] if order > 0: trades.loc[date, symbol] += shares trades.loc[date, 'Cash'] -= price * shares else: trades.loc[date, symbol] -= shares trades.loc[date, 'Cash'] += price * shares # substract transactional cost from cash trades.loc[date, 'Cash'] -= (commission + impact * price * shares) # populate 'Holdings' - [Date, Symbol1, Symbol2, ..., Cash] - captures everyday holdings holdings = trades.copy() holdings.iloc[0, -1] += start_val holdings = holdings.cumsum() # populate 'Values' - Prices * Holdings values = holdings * prices # calculate portfolio values = (prices * shares + cash) for each day portvals = values.sum(axis=1) return portvals
def createIndicators(self): ##mycode dates = pd.date_range(self.start_date, self.end_date) price = ut.get_data(self.symbol, dates, addSPY = True) price_SPY = price['SPY'] price = price.drop(['SPY'], axis=1) ##get data for all the indicators including T - 30 trading days of data (to account for creation of historical indicators) sma_ind = ind.get_price_sma_ind(price, 20) momentum_ind = ind.getMomentumInd(price, 14) bb_ind = ind.getBBInd(price, 14) macd_ind = ind.getMACDInd(price) vol_ind = ind.volatility(price, 14) #we now remove the last 30 days of data (December 2007) after creating the indicators so that we only have training period data. price = price.loc[price.index >= self.start_date + dt.timedelta(days=30)] sma_ind = sma_ind.loc[sma_ind.index >= self.start_date + dt.timedelta(days=30)] vol_ind = vol_ind.loc[vol_ind.index >= self.start_date + dt.timedelta(days=30)] momentum_ind = momentum_ind.loc[momentum_ind.index >= self.start_date + dt.timedelta(days=30)] bb_ind = bb_ind.loc[bb_ind.index >= self.start_date + dt.timedelta(days=30)] macd_ind = macd_ind.loc[macd_ind.index >= self.start_date + dt.timedelta(days=30)] ##create cross over signals for each day price_sma_crossover = pd.DataFrame(0, index=sma_ind.index, columns=sma_ind.columns) price_sma_crossover[sma_ind > 0] = 1 price_sma_crossover = price_sma_crossover.diff() price_sma_crossover[price_sma_crossover != 0] = 1 macd_sigal_diff = ind.getMACDHistogramInd(price) #macd cross below signal = sell macd_cross_below_signal = pd.DataFrame(0, index=macd_ind.index, columns=macd_ind.columns) macd_cross_below_signal[macd_sigal_diff < 0] = 1 macd_cross_below_signal[1:] = macd_cross_below_signal.diff() macd_cross_below_signal.ix[0] = 0 #print(macd_cross_above_signal) #macd cross above signal = buy macd_cross_above_signal = pd.DataFrame(0, index=macd_ind.index, columns=macd_ind.columns) macd_cross_above_signal[macd_sigal_diff > 0] = 1 macd_cross_above_signal[1:] = macd_cross_above_signal.diff() macd_cross_above_signal.ix[0] = 0 #print(macd_cross_above_signal) #bollinger crossovers ##this is a sell signal bb_upper_cross_signal = ind.getBBUpperCross(price,20) #this is a buy signal bb_lower_cross_signal = ind.getBBLowerCross(price,20) ##create and discretize states for Q Learner #print sma_ind daily_rets = ((price.shift(-5)/price) - 1) daily_rets.ix[-1] = 0 price['Price_Sma'] = sma_ind price['Volatility'] = vol_ind price['Momentum'] = momentum_ind price['BB_Ind'] = bb_ind #price['MACD_Ind'] = macd_ind #price['BB_Upper_Cross'] = bb_upper_cross_signal #price['BB_Lower_Cross'] = bb_lower_cross_signal price['MACD_Cross_Below'] = macd_cross_below_signal price['MACD_Cross_Above'] = macd_cross_above_signal #price['Price_SMA_Crossover'] = price_sma_crossover daily_ret_classes = pd.DataFrame(0, index=daily_rets.index, columns=daily_rets.columns, dtype = int) Y_buy = self.threshold Y_sell = -1*self.threshold daily_ret_classes[daily_rets > Y_buy] = 1 daily_ret_classes[daily_rets < Y_sell] = -1 price['Action'] = daily_ret_classes #print price.iloc[:,1:].sum(axis=1) #print price return price
def make_idkmer_vec(self, data, hs, non_hs): """Make IDKmer vector. :param data: Need to processed FASTA file. :param hs: Positive FASTA file. :param non_hs: Negative FASTA file. """ from nacutil import make_kmer_list from nacutil import diversity from nacutil import id_x_s rev_kmer_list, upto, revcomp, normalize = [], False, False, False pos_s_list = get_data(hs) neg_s_list = get_data(non_hs) # print self.k if self.upto is False: k_list = [self.k] else: k_list = list(range(1, self.k+1)) # print 'k_list =', k_list # Get all kmer ID from 1-kmer to 6-kmer. # Calculate standard source S vector. pos_s_vec, neg_s_vec = [], [] diversity_pos_s, diversity_neg_s = [], [] for k in k_list: kmer_list = make_kmer_list(k, self.alphabet) temp_pos_s_vec = make_kmer_vector(pos_s_list, kmer_list, rev_kmer_list, k, upto, revcomp, normalize) temp_neg_s_vec = make_kmer_vector(neg_s_list, kmer_list, rev_kmer_list, k, upto, revcomp, normalize) temp_pos_s_vec = [sum(e) for e in zip(*[e for e in temp_pos_s_vec])] temp_neg_s_vec = [sum(e) for e in zip(*[e for e in temp_neg_s_vec])] pos_s_vec.append(temp_pos_s_vec) neg_s_vec.append(temp_neg_s_vec) diversity_pos_s.append(diversity(temp_pos_s_vec)) diversity_neg_s.append(diversity(temp_neg_s_vec)) # Calculate Diversity(X) and ID(X, S). sequence_list = get_data(data) vec = [] for seq in sequence_list: # print seq temp_vec = [] for k in k_list: kmer_list = make_kmer_list(k, self.alphabet) seq_list = [seq] kmer_vec = make_kmer_vector(seq_list, kmer_list, rev_kmer_list, k, upto, revcomp, normalize) # print 'k', k # print 'kmer_vec', kmer_vec # print diversity_pos_s if upto is False: k = 1 # print 'pos_vec', pos_s_vec # print 'neg_vec', neg_s_vec # print 'diversity_pos_s', diversity_pos_s temp_vec.append(round(id_x_s(kmer_vec[0], pos_s_vec[k-1], diversity_pos_s[k-1]), 3)) temp_vec.append(round(id_x_s(kmer_vec[0], neg_s_vec[k-1], diversity_neg_s[k-1]), 3)) vec.append(temp_vec) return vec